diff options
Diffstat (limited to 'target/arm')
-rw-r--r-- | target/arm/Kconfig | 5 | ||||
-rw-r--r-- | target/arm/arch_dump.c | 9 | ||||
-rw-r--r-- | target/arm/arm-powerctl.c | 77 | ||||
-rw-r--r-- | target/arm/arm-qmp-cmds.c (renamed from target/arm/monitor.c) | 46 | ||||
-rw-r--r-- | target/arm/common-semi-target.h | 60 | ||||
-rw-r--r-- | target/arm/cortex-regs.c | 76 | ||||
-rw-r--r-- | target/arm/cpregs.h | 1137 | ||||
-rw-r--r-- | target/arm/cpu-features.h | 1021 | ||||
-rw-r--r-- | target/arm/cpu-param.h | 15 | ||||
-rw-r--r-- | target/arm/cpu-qom.h | 86 | ||||
-rw-r--r-- | target/arm/cpu.c | 1039 | ||||
-rw-r--r-- | target/arm/cpu.h | 2305 | ||||
-rw-r--r-- | target/arm/cpu64.c | 1017 | ||||
-rw-r--r-- | target/arm/debug_helper.c | 979 | ||||
-rw-r--r-- | target/arm/gdbstub.c | 600 | ||||
-rw-r--r-- | target/arm/gdbstub64.c | 314 | ||||
-rw-r--r-- | target/arm/gtimer.h | 21 | ||||
-rw-r--r-- | target/arm/helper-a64.c | 1136 | ||||
-rw-r--r-- | target/arm/helper.c | 8354 | ||||
-rw-r--r-- | target/arm/helper.h | 50 | ||||
-rw-r--r-- | target/arm/hvf/hvf.c | 1128 | ||||
-rw-r--r-- | target/arm/hvf/meson.build | 2 | ||||
-rw-r--r-- | target/arm/hvf/trace-events | 2 | ||||
-rw-r--r-- | target/arm/hvf_arm.h | 9 | ||||
-rw-r--r-- | target/arm/hyp_gdbstub.c | 253 | ||||
-rw-r--r-- | target/arm/internals.h | 879 | ||||
-rw-r--r-- | target/arm/kvm-consts.h | 31 | ||||
-rw-r--r-- | target/arm/kvm-stub.c | 4 | ||||
-rw-r--r-- | target/arm/kvm.c | 1520 | ||||
-rw-r--r-- | target/arm/kvm64.c | 1606 | ||||
-rw-r--r-- | target/arm/kvm_arm.h | 271 | ||||
-rw-r--r-- | target/arm/machine.c | 140 | ||||
-rw-r--r-- | target/arm/meson.build | 59 | ||||
-rw-r--r-- | target/arm/multiprocessing.h | 16 | ||||
-rw-r--r-- | target/arm/ptw.c | 3650 | ||||
-rw-r--r-- | target/arm/syndrome.h | 96 | ||||
-rw-r--r-- | target/arm/tcg-stubs.c | 27 | ||||
-rw-r--r-- | target/arm/tcg/a32-uncond.decode (renamed from target/arm/a32-uncond.decode) | 0 | ||||
-rw-r--r-- | target/arm/tcg/a32.decode (renamed from target/arm/a32.decode) | 16 | ||||
-rw-r--r-- | target/arm/tcg/a64.decode | 591 | ||||
-rw-r--r-- | target/arm/tcg/arm_ldst.h (renamed from target/arm/arm_ldst.h) | 0 | ||||
-rw-r--r-- | target/arm/tcg/cpu-v7m.c | 290 | ||||
-rw-r--r-- | target/arm/tcg/cpu32.c (renamed from target/arm/cpu_tcg.c) | 614 | ||||
-rw-r--r-- | target/arm/tcg/cpu64.c | 1295 | ||||
-rw-r--r-- | target/arm/tcg/crypto_helper.c (renamed from target/arm/crypto_helper.c) | 297 | ||||
-rw-r--r-- | target/arm/tcg/helper-a64.c | 1857 | ||||
-rw-r--r-- | target/arm/tcg/helper-a64.h (renamed from target/arm/helper-a64.h) | 29 | ||||
-rw-r--r-- | target/arm/tcg/helper-mve.h (renamed from target/arm/helper-mve.h) | 0 | ||||
-rw-r--r-- | target/arm/tcg/helper-sme.h | 146 | ||||
-rw-r--r-- | target/arm/tcg/helper-sve.h (renamed from target/arm/helper-sve.h) | 4 | ||||
-rw-r--r-- | target/arm/tcg/hflags.c | 485 | ||||
-rw-r--r-- | target/arm/tcg/iwmmxt_helper.c (renamed from target/arm/iwmmxt_helper.c) | 0 | ||||
-rw-r--r-- | target/arm/tcg/m-nocp.decode (renamed from target/arm/m-nocp.decode) | 0 | ||||
-rw-r--r-- | target/arm/tcg/m_helper.c (renamed from target/arm/m_helper.c) | 329 | ||||
-rw-r--r-- | target/arm/tcg/meson.build | 60 | ||||
-rw-r--r-- | target/arm/tcg/mte_helper.c (renamed from target/arm/mte_helper.c) | 484 | ||||
-rw-r--r-- | target/arm/tcg/mve.decode (renamed from target/arm/mve.decode) | 0 | ||||
-rw-r--r-- | target/arm/tcg/mve_helper.c (renamed from target/arm/mve_helper.c) | 38 | ||||
-rw-r--r-- | target/arm/tcg/neon-dp.decode (renamed from target/arm/neon-dp.decode) | 0 | ||||
-rw-r--r-- | target/arm/tcg/neon-ls.decode (renamed from target/arm/neon-ls.decode) | 0 | ||||
-rw-r--r-- | target/arm/tcg/neon-shared.decode (renamed from target/arm/neon-shared.decode) | 0 | ||||
-rw-r--r-- | target/arm/tcg/neon_helper.c (renamed from target/arm/neon_helper.c) | 2 | ||||
-rw-r--r-- | target/arm/tcg/op_helper.c (renamed from target/arm/op_helper.c) | 423 | ||||
-rw-r--r-- | target/arm/tcg/pauth_helper.c (renamed from target/arm/pauth_helper.c) | 201 | ||||
-rw-r--r-- | target/arm/tcg/psci.c (renamed from target/arm/psci.c) | 75 | ||||
-rw-r--r-- | target/arm/tcg/sme-fa64.decode | 60 | ||||
-rw-r--r-- | target/arm/tcg/sme.decode | 88 | ||||
-rw-r--r-- | target/arm/tcg/sme_helper.c | 1179 | ||||
-rw-r--r-- | target/arm/tcg/sve.decode (renamed from target/arm/sve.decode) | 107 | ||||
-rw-r--r-- | target/arm/tcg/sve_helper.c (renamed from target/arm/sve_helper.c) | 368 | ||||
-rw-r--r-- | target/arm/tcg/sve_ldst_internal.h | 222 | ||||
-rw-r--r-- | target/arm/tcg/t16.decode (renamed from target/arm/t16.decode) | 0 | ||||
-rw-r--r-- | target/arm/tcg/t32.decode (renamed from target/arm/t32.decode) | 18 | ||||
-rw-r--r-- | target/arm/tcg/tlb_helper.c | 398 | ||||
-rw-r--r-- | target/arm/tcg/translate-a32.h (renamed from target/arm/translate-a32.h) | 48 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.c (renamed from target/arm/translate-a64.c) | 6663 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.h (renamed from target/arm/translate-a64.h) | 107 | ||||
-rw-r--r-- | target/arm/tcg/translate-m-nocp.c (renamed from target/arm/translate-m-nocp.c) | 77 | ||||
-rw-r--r-- | target/arm/tcg/translate-mve.c (renamed from target/arm/translate-mve.c) | 161 | ||||
-rw-r--r-- | target/arm/tcg/translate-neon.c (renamed from target/arm/translate-neon.c) | 265 | ||||
-rw-r--r-- | target/arm/tcg/translate-sme.c | 343 | ||||
-rw-r--r-- | target/arm/tcg/translate-sve.c (renamed from target/arm/translate-sve.c) | 6479 | ||||
-rw-r--r-- | target/arm/tcg/translate-vfp.c (renamed from target/arm/translate-vfp.c) | 400 | ||||
-rw-r--r-- | target/arm/tcg/translate.c (renamed from target/arm/translate.c) | 1907 | ||||
-rw-r--r-- | target/arm/tcg/translate.h (renamed from target/arm/translate.h) | 312 | ||||
-rw-r--r-- | target/arm/tcg/vec_helper.c (renamed from target/arm/vec_helper.c) | 156 | ||||
-rw-r--r-- | target/arm/tcg/vec_internal.h (renamed from target/arm/vec_internal.h) | 43 | ||||
-rw-r--r-- | target/arm/tcg/vfp-uncond.decode (renamed from target/arm/vfp-uncond.decode) | 0 | ||||
-rw-r--r-- | target/arm/tcg/vfp.decode (renamed from target/arm/vfp.decode) | 0 | ||||
-rw-r--r-- | target/arm/tlb_helper.c | 214 | ||||
-rw-r--r-- | target/arm/trace-events | 8 | ||||
-rw-r--r-- | target/arm/vfp_helper.c | 111 |
92 files changed, 32768 insertions, 22242 deletions
diff --git a/target/arm/Kconfig b/target/arm/Kconfig index 3f3394a22b..bf57d739cd 100644 --- a/target/arm/Kconfig +++ b/target/arm/Kconfig @@ -1,5 +1,10 @@ config ARM bool + select ARM_COMPATIBLE_SEMIHOSTING if TCG + + # We need to select this until we move m_helper.c and the + # translate.c v7m helpers under ARM_V7M. + select ARM_V7M if TCG config AARCH64 bool diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c index 0184845310..06cdf4ba28 100644 --- a/target/arm/arch_dump.c +++ b/target/arm/arch_dump.c @@ -22,6 +22,7 @@ #include "cpu.h" #include "elf.h" #include "sysemu/dump.h" +#include "cpu-features.h" /* struct user_pt_regs from arch/arm64/include/uapi/asm/ptrace.h */ struct aarch64_user_regs { @@ -166,7 +167,7 @@ static off_t sve_fpcr_offset(uint32_t vq) static uint32_t sve_current_vq(CPUARMState *env) { - return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; + return sve_vqm1_for_el(env, arm_current_el(env)) + 1; } static size_t sve_size_vq(uint32_t vq) @@ -232,12 +233,11 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, #endif int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { struct aarch64_note note; ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - DumpState *s = opaque; uint64_t pstate, sp; int ret, i; @@ -360,12 +360,11 @@ static int arm_write_elf32_vfp(WriteCoreDumpFunction f, CPUARMState *env, } int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { struct arm_note note; ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - DumpState *s = opaque; int ret, i; bool fpvalid = cpu_isar_feature(aa32_vfp_simd, cpu); diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index b75f813b40..2b2055c6ac 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -15,6 +15,8 @@ #include "arm-powerctl.h" #include "qemu/log.h" #include "qemu/main-loop.h" +#include "sysemu/tcg.h" +#include "target/arm/multiprocessing.h" #ifndef DEBUG_ARM_POWERCTL #define DEBUG_ARM_POWERCTL 0 @@ -36,7 +38,7 @@ CPUState *arm_get_cpu_by_id(uint64_t id) CPU_FOREACH(cpu) { ARMCPU *armcpu = ARM_CPU(cpu); - if (armcpu->mp_affinity == id) { + if (arm_cpu_mp_affinity(armcpu) == id) { return cpu; } } @@ -64,60 +66,9 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, /* Initialize the cpu we are turning on */ cpu_reset(target_cpu_state); + arm_emulate_firmware_reset(target_cpu_state, info->target_el); target_cpu_state->halted = 0; - if (info->target_aa64) { - if ((info->target_el < 3) && arm_feature(&target_cpu->env, - ARM_FEATURE_EL3)) { - /* - * As target mode is AArch64, we need to set lower - * exception level (the requested level 2) to AArch64 - */ - target_cpu->env.cp15.scr_el3 |= SCR_RW; - } - - if ((info->target_el < 2) && arm_feature(&target_cpu->env, - ARM_FEATURE_EL2)) { - /* - * As target mode is AArch64, we need to set lower - * exception level (the requested level 1) to AArch64 - */ - target_cpu->env.cp15.hcr_el2 |= HCR_RW; - } - - target_cpu->env.pstate = aarch64_pstate_mode(info->target_el, true); - } else { - /* We are requested to boot in AArch32 mode */ - static const uint32_t mode_for_el[] = { 0, - ARM_CPU_MODE_SVC, - ARM_CPU_MODE_HYP, - ARM_CPU_MODE_SVC }; - - cpsr_write(&target_cpu->env, mode_for_el[info->target_el], CPSR_M, - CPSRWriteRaw); - } - - if (info->target_el == 3) { - /* Processor is in secure mode */ - target_cpu->env.cp15.scr_el3 &= ~SCR_NS; - } else { - /* Processor is not in secure mode */ - target_cpu->env.cp15.scr_el3 |= SCR_NS; - - /* Set NSACR.{CP11,CP10} so NS can access the FPU */ - target_cpu->env.cp15.nsacr |= 3 << 10; - - /* - * If QEMU is providing the equivalent of EL3 firmware, then we need - * to make sure a CPU targeting EL2 comes out of reset with a - * functional HVC insn. - */ - if (arm_feature(&target_cpu->env, ARM_FEATURE_EL3) - && info->target_el == 2) { - target_cpu->env.cp15.scr_el3 |= SCR_HCE; - } - } - /* We check if the started CPU is now at the correct level */ assert(info->target_el == arm_current_el(&target_cpu->env)); @@ -127,8 +78,10 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, target_cpu->env.regs[0] = info->context_id; } - /* CP15 update requires rebuilding hflags */ - arm_rebuild_hflags(&target_cpu->env); + if (tcg_enabled()) { + /* CP15 update requires rebuilding hflags */ + arm_rebuild_hflags(&target_cpu->env); + } /* Start the new CPU at the requested address */ cpu_set_pc(target_cpu_state, info->entry); @@ -136,7 +89,7 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, g_free(info); /* Finally set the power status */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_ON; } @@ -147,7 +100,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, ARMCPU *target_cpu; struct CpuOnInfo *info; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64 "\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry, @@ -244,7 +197,7 @@ static void arm_set_cpu_on_and_reset_async_work(CPUState *target_cpu_state, target_cpu_state->halted = 0; /* Finally set the power status */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_ON; } @@ -253,7 +206,7 @@ int arm_set_cpu_on_and_reset(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); /* Retrieve the cpu we are powering up */ target_cpu_state = arm_get_cpu_by_id(cpuid); @@ -295,7 +248,7 @@ static void arm_set_cpu_off_async_work(CPUState *target_cpu_state, { ARMCPU *target_cpu = ARM_CPU(target_cpu_state); - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_OFF; target_cpu_state->halted = 1; target_cpu_state->exception_index = EXCP_HLT; @@ -306,7 +259,7 @@ int arm_set_cpu_off(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 "\n", cpuid); @@ -342,7 +295,7 @@ int arm_reset_cpu(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 "\n", cpuid); diff --git a/target/arm/monitor.c b/target/arm/arm-qmp-cmds.c index 80c64fa355..3cc8cc738b 100644 --- a/target/arm/monitor.c +++ b/target/arm/arm-qmp-cmds.c @@ -28,7 +28,6 @@ #include "qapi/qobject-input-visitor.h" #include "qapi/qapi-commands-machine-target.h" #include "qapi/qapi-commands-misc-target.h" -#include "qapi/qmp/qerror.h" #include "qapi/qmp/qdict.h" #include "qom/qom-qobject.h" @@ -95,7 +94,7 @@ static const char *cpu_model_advertised_features[] = { "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", "kvm-no-adjvtime", "kvm-steal-time", - "pauth", "pauth-impdef", + "pauth", "pauth-impdef", "pauth-qarma3", NULL }; @@ -104,7 +103,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, Error **errp) { CpuModelExpansionInfo *expansion_info; - const QDict *qdict_in = NULL; + const QDict *qdict_in; QDict *qdict_out; ObjectClass *oc; Object *obj; @@ -151,27 +150,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, } } - if (model->props) { - qdict_in = qobject_to(QDict, model->props); - if (!qdict_in) { - error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict"); - return NULL; - } - } - obj = object_new(object_class_get_name(oc)); - if (qdict_in) { + if (model->props) { Visitor *visitor; Error *err = NULL; visitor = qobject_input_visitor_new(model->props); - if (!visit_start_struct(visitor, NULL, NULL, 0, errp)) { + if (!visit_start_struct(visitor, "model.props", NULL, 0, errp)) { visit_free(visitor); object_unref(obj); return NULL; } + qdict_in = qobject_to(QDict, model->props); i = 0; while ((name = cpu_model_advertised_features[i++]) != NULL) { if (qdict_get(qdict_in, name)) { @@ -221,10 +213,36 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, qobject_unref(qdict_out); } else { expansion_info->model->props = QOBJECT(qdict_out); - expansion_info->model->has_props = true; } object_unref(obj); return expansion_info; } + +static void arm_cpu_add_definition(gpointer data, gpointer user_data) +{ + ObjectClass *oc = data; + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; + + typename = object_class_get_name(oc); + info = g_malloc0(sizeof(*info)); + info->name = cpu_model_from_type(typename); + info->q_typename = g_strdup(typename); + + QAPI_LIST_PREPEND(*cpu_list, info); +} + +CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) +{ + CpuDefinitionInfoList *cpu_list = NULL; + GSList *list; + + list = object_class_get_list(TYPE_ARM_CPU, false); + g_slist_foreach(list, arm_cpu_add_definition, &cpu_list); + g_slist_free(list); + + return cpu_list; +} diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.h new file mode 100644 index 0000000000..da51f2d7f5 --- /dev/null +++ b/target/arm/common-semi-target.h @@ -0,0 +1,60 @@ +/* + * Target-specific parts of semihosting/arm-compat-semi.c. + * + * Copyright (c) 2005, 2007 CodeSourcery. + * Copyright (c) 2019, 2022 Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TARGET_ARM_COMMON_SEMI_TARGET_H +#define TARGET_ARM_COMMON_SEMI_TARGET_H + +#include "target/arm/cpu-qom.h" + +static inline target_ulong common_semi_arg(CPUState *cs, int argno) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + if (is_a64(env)) { + return env->xregs[argno]; + } else { + return env->regs[argno]; + } +} + +static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + if (is_a64(env)) { + env->xregs[0] = ret; + } else { + env->regs[0] = ret; + } +} + +static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr) +{ + return nr == TARGET_SYS_EXIT_EXTENDED || is_a64(cpu_env(cs)); +} + +static inline bool is_64bit_semihosting(CPUArchState *env) +{ + return is_a64(env); +} + +static inline target_ulong common_semi_stack_bottom(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + return is_a64(env) ? env->xregs[31] : env->regs[13]; +} + +static inline bool common_semi_has_synccache(CPUArchState *env) +{ + /* Ok for A64, invalid for A32/T32 */ + return is_a64(env); +} + +#endif diff --git a/target/arm/cortex-regs.c b/target/arm/cortex-regs.c new file mode 100644 index 0000000000..ae817b08dd --- /dev/null +++ b/target/arm/cortex-regs.c @@ -0,0 +1,76 @@ +/* + * ARM Cortex-A registers + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpregs.h" + + +static uint64_t l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu = env_archcpu(env); + + /* + * Number of cores is in [25:24]; otherwise we RAZ. + * If the board didn't configure the CPUs into clusters, + * we default to "all CPUs in one cluster", which might be + * more than the 4 that the hardware permits and which is + * all you can report in this two-bit field. Saturate to + * 0b11 (== 4 CPUs) rather than overflowing the field. + */ + return MIN(cpu->core_count - 1, 3) << 24; +} + +static const ARMCPRegInfo cortex_a72_a57_a53_cp_reginfo[] = { + { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2, + .access = PL1_RW, .readfn = l2ctlr_read, + .writefn = arm_cp_write_ignore }, + { .name = "L2CTLR", + .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 2, + .access = PL1_RW, .readfn = l2ctlr_read, + .writefn = arm_cp_write_ignore }, + { .name = "L2ECTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "L2ECTLR", + .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "L2ACTLR", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR", + .cp = 15, .opc1 = 0, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, + { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUECTLR", + .cp = 15, .opc1 = 1, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, + { .name = "CPUMERRSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUMERRSR", + .cp = 15, .opc1 = 2, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, + { .name = "L2MERRSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "L2MERRSR", + .cp = 15, .opc1 = 3, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, +}; + +void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +{ + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); +} diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h new file mode 100644 index 0000000000..cc7c54378f --- /dev/null +++ b/target/arm/cpregs.h @@ -0,0 +1,1137 @@ +/* + * QEMU ARM CP Register access and descriptions + * + * Copyright (c) 2022 Linaro Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * <http://www.gnu.org/licenses/gpl-2.0.html> + */ + +#ifndef TARGET_ARM_CPREGS_H +#define TARGET_ARM_CPREGS_H + +#include "hw/registerfields.h" +#include "target/arm/kvm-consts.h" + +/* + * ARMCPRegInfo type field bits: + */ +enum { + /* + * Register must be handled specially during translation. + * The method is one of the values below: + */ + ARM_CP_SPECIAL_MASK = 0x000f, + /* Special: no change to PE state: writes ignored, reads ignored. */ + ARM_CP_NOP = 0x0001, + /* Special: sysreg is WFI, for v5 and v6. */ + ARM_CP_WFI = 0x0002, + /* Special: sysreg is NZCV. */ + ARM_CP_NZCV = 0x0003, + /* Special: sysreg is CURRENTEL. */ + ARM_CP_CURRENTEL = 0x0004, + /* Special: sysreg is DC ZVA or similar. */ + ARM_CP_DC_ZVA = 0x0005, + ARM_CP_DC_GVA = 0x0006, + ARM_CP_DC_GZVA = 0x0007, + + /* Flag: reads produce resetvalue; writes ignored. */ + ARM_CP_CONST = 1 << 4, + /* Flag: For ARM_CP_STATE_AA32, sysreg is 64-bit. */ + ARM_CP_64BIT = 1 << 5, + /* + * Flag: TB should not be ended after a write to this register + * (the default is that the TB ends after cp writes). + */ + ARM_CP_SUPPRESS_TB_END = 1 << 6, + /* + * Flag: Permit a register definition to override a previous definition + * for the same (cp, is64, crn, crm, opc1, opc2) tuple: either the new + * or the old must have the ARM_CP_OVERRIDE bit set. + */ + ARM_CP_OVERRIDE = 1 << 7, + /* + * Flag: Register is an alias view of some underlying state which is also + * visible via another register, and that the other register is handling + * migration and reset; registers marked ARM_CP_ALIAS will not be migrated + * but may have their state set by syncing of register state from KVM. + */ + ARM_CP_ALIAS = 1 << 8, + /* + * Flag: Register does I/O and therefore its accesses need to be marked + * with translator_io_start() and also end the TB. In particular, + * registers which implement clocks or timers require this. + */ + ARM_CP_IO = 1 << 9, + /* + * Flag: Register has no underlying state and does not support raw access + * for state saving/loading; it will not be used for either migration or + * KVM state synchronization. Typically this is for "registers" which are + * actually used as instructions for cache maintenance and so on. + */ + ARM_CP_NO_RAW = 1 << 10, + /* + * Flag: The read or write hook might raise an exception; the generated + * code will synchronize the CPU state before calling the hook so that it + * is safe for the hook to call raise_exception(). + */ + ARM_CP_RAISES_EXC = 1 << 11, + /* + * Flag: Writes to the sysreg might change the exception level - typically + * on older ARM chips. For those cases we need to re-read the new el when + * recomputing the translation flags. + */ + ARM_CP_NEWEL = 1 << 12, + /* + * Flag: Access check for this sysreg is identical to accessing FPU state + * from an instruction: use translation fp_access_check(). + */ + ARM_CP_FPU = 1 << 13, + /* + * Flag: Access check for this sysreg is identical to accessing SVE state + * from an instruction: use translation sve_access_check(). + */ + ARM_CP_SVE = 1 << 14, + /* Flag: Do not expose in gdb sysreg xml. */ + ARM_CP_NO_GDB = 1 << 15, + /* + * Flags: If EL3 but not EL2... + * - UNDEF: discard the cpreg, + * - KEEP: retain the cpreg as is, + * - C_NZ: set const on the cpreg, but retain resetvalue, + * - else: set const on the cpreg, zero resetvalue, aka RES0. + * See rule RJFFP in section D1.1.3 of DDI0487H.a. + */ + ARM_CP_EL3_NO_EL2_UNDEF = 1 << 16, + ARM_CP_EL3_NO_EL2_KEEP = 1 << 17, + ARM_CP_EL3_NO_EL2_C_NZ = 1 << 18, + /* + * Flag: Access check for this sysreg is constrained by the + * ARM pseudocode function CheckSMEAccess(). + */ + ARM_CP_SME = 1 << 19, + /* + * Flag: one of the four EL2 registers which redirect to the + * equivalent EL1 register when FEAT_NV2 is enabled. + */ + ARM_CP_NV2_REDIRECT = 1 << 20, +}; + +/* + * Interface for defining coprocessor registers. + * Registers are defined in tables of arm_cp_reginfo structs + * which are passed to define_arm_cp_regs(). + */ + +/* + * When looking up a coprocessor register we look for it + * via an integer which encodes all of: + * coprocessor number + * Crn, Crm, opc1, opc2 fields + * 32 or 64 bit register (ie is it accessed via MRC/MCR + * or via MRRC/MCRR?) + * non-secure/secure bank (AArch32 only) + * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field. + * (In this case crn and opc2 should be zero.) + * For AArch64, there is no 32/64 bit size distinction; + * instead all registers have a 2 bit op0, 3 bit op1 and op2, + * and 4 bit CRn and CRm. The encoding patterns are chosen + * to be easy to convert to and from the KVM encodings, and also + * so that the hashtable can contain both AArch32 and AArch64 + * registers (to allow for interprocessing where we might run + * 32 bit code on a 64 bit core). + */ +/* + * This bit is private to our hashtable cpreg; in KVM register + * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64 + * in the upper bits of the 64 bit ID. + */ +#define CP_REG_AA64_SHIFT 28 +#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT) + +/* + * To enable banking of coprocessor registers depending on ns-bit we + * add a bit to distinguish between secure and non-secure cpregs in the + * hashtable. + */ +#define CP_REG_NS_SHIFT 29 +#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT) + +#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2) \ + ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) | \ + ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2)) + +#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \ + (CP_REG_AA64_MASK | \ + ((cp) << CP_REG_ARM_COPROC_SHIFT) | \ + ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ + ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ + ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ + ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \ + ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT)) + +/* + * Convert a full 64 bit KVM register ID to the truncated 32 bit + * version used as a key for the coprocessor register hashtable + */ +static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid) +{ + uint32_t cpregid = kvmid; + if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) { + cpregid |= CP_REG_AA64_MASK; + } else { + if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) { + cpregid |= (1 << 15); + } + + /* + * KVM is always non-secure so add the NS flag on AArch32 register + * entries. + */ + cpregid |= 1 << CP_REG_NS_SHIFT; + } + return cpregid; +} + +/* + * Convert a truncated 32 bit hashtable key into the full + * 64 bit KVM register ID. + */ +static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) +{ + uint64_t kvmid; + + if (cpregid & CP_REG_AA64_MASK) { + kvmid = cpregid & ~CP_REG_AA64_MASK; + kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64; + } else { + kvmid = cpregid & ~(1 << 15); + if (cpregid & (1 << 15)) { + kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM; + } else { + kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM; + } + } + return kvmid; +} + +/* + * Valid values for ARMCPRegInfo state field, indicating which of + * the AArch32 and AArch64 execution states this register is visible in. + * If the reginfo doesn't explicitly specify then it is AArch32 only. + * If the reginfo is declared to be visible in both states then a second + * reginfo is synthesised for the AArch32 view of the AArch64 register, + * such that the AArch32 view is the lower 32 bits of the AArch64 one. + * Note that we rely on the values of these enums as we iterate through + * the various states in some places. + */ +typedef enum { + ARM_CP_STATE_AA32 = 0, + ARM_CP_STATE_AA64 = 1, + ARM_CP_STATE_BOTH = 2, +} CPState; + +/* + * ARM CP register secure state flags. These flags identify security state + * attributes for a given CP register entry. + * The existence of both or neither secure and non-secure flags indicates that + * the register has both a secure and non-secure hash entry. A single one of + * these flags causes the register to only be hashed for the specified + * security state. + * Although definitions may have any combination of the S/NS bits, each + * registered entry will only have one to identify whether the entry is secure + * or non-secure. + */ +typedef enum { + ARM_CP_SECSTATE_BOTH = 0, /* define one cpreg for each secstate */ + ARM_CP_SECSTATE_S = (1 << 0), /* bit[0]: Secure state register */ + ARM_CP_SECSTATE_NS = (1 << 1), /* bit[1]: Non-secure state register */ +} CPSecureState; + +/* + * Access rights: + * We define bits for Read and Write access for what rev C of the v7-AR ARM ARM + * defines as PL0 (user), PL1 (fiq/irq/svc/abt/und/sys, ie privileged), and + * PL2 (hyp). The other level which has Read and Write bits is Secure PL1 + * (ie any of the privileged modes in Secure state, or Monitor mode). + * If a register is accessible in one privilege level it's always accessible + * in higher privilege levels too. Since "Secure PL1" also follows this rule + * (ie anything visible in PL2 is visible in S-PL1, some things are only + * visible in S-PL1) but "Secure PL1" is a bit of a mouthful, we bend the + * terminology a little and call this PL3. + * In AArch64 things are somewhat simpler as the PLx bits line up exactly + * with the ELx exception levels. + * + * If access permissions for a register are more complex than can be + * described with these bits, then use a laxer set of restrictions, and + * do the more restrictive/complex check inside a helper function. + */ +typedef enum { + PL3_R = 0x80, + PL3_W = 0x40, + PL2_R = 0x20 | PL3_R, + PL2_W = 0x10 | PL3_W, + PL1_R = 0x08 | PL2_R, + PL1_W = 0x04 | PL2_W, + PL0_R = 0x02 | PL1_R, + PL0_W = 0x01 | PL1_W, + + /* + * For user-mode some registers are accessible to EL0 via a kernel + * trap-and-emulate ABI. In this case we define the read permissions + * as actually being PL0_R. However some bits of any given register + * may still be masked. + */ +#ifdef CONFIG_USER_ONLY + PL0U_R = PL0_R, +#else + PL0U_R = PL1_R, +#endif + + PL3_RW = PL3_R | PL3_W, + PL2_RW = PL2_R | PL2_W, + PL1_RW = PL1_R | PL1_W, + PL0_RW = PL0_R | PL0_W, +} CPAccessRights; + +typedef enum CPAccessResult { + /* Access is permitted */ + CP_ACCESS_OK = 0, + + /* + * Combined with one of the following, the low 2 bits indicate the + * target exception level. If 0, the exception is taken to the usual + * target EL (EL1 or PL1 if in EL0, otherwise to the current EL). + */ + CP_ACCESS_EL_MASK = 3, + + /* + * Access fails due to a configurable trap or enable which would + * result in a categorized exception syndrome giving information about + * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6, + * 0xc or 0x18). + */ + CP_ACCESS_TRAP = (1 << 2), + CP_ACCESS_TRAP_EL2 = CP_ACCESS_TRAP | 2, + CP_ACCESS_TRAP_EL3 = CP_ACCESS_TRAP | 3, + + /* + * Access fails and results in an exception syndrome 0x0 ("uncategorized"). + * Note that this is not a catch-all case -- the set of cases which may + * result in this failure is specifically defined by the architecture. + * This trap is always to the usual target EL, never directly to a + * specified target EL. + */ + CP_ACCESS_TRAP_UNCATEGORIZED = (2 << 2), +} CPAccessResult; + +/* Indexes into fgt_read[] */ +#define FGTREG_HFGRTR 0 +#define FGTREG_HDFGRTR 1 +/* Indexes into fgt_write[] */ +#define FGTREG_HFGWTR 0 +#define FGTREG_HDFGWTR 1 +/* Indexes into fgt_exec[] */ +#define FGTREG_HFGITR 0 + +FIELD(HFGRTR_EL2, AFSR0_EL1, 0, 1) +FIELD(HFGRTR_EL2, AFSR1_EL1, 1, 1) +FIELD(HFGRTR_EL2, AIDR_EL1, 2, 1) +FIELD(HFGRTR_EL2, AMAIR_EL1, 3, 1) +FIELD(HFGRTR_EL2, APDAKEY, 4, 1) +FIELD(HFGRTR_EL2, APDBKEY, 5, 1) +FIELD(HFGRTR_EL2, APGAKEY, 6, 1) +FIELD(HFGRTR_EL2, APIAKEY, 7, 1) +FIELD(HFGRTR_EL2, APIBKEY, 8, 1) +FIELD(HFGRTR_EL2, CCSIDR_EL1, 9, 1) +FIELD(HFGRTR_EL2, CLIDR_EL1, 10, 1) +FIELD(HFGRTR_EL2, CONTEXTIDR_EL1, 11, 1) +FIELD(HFGRTR_EL2, CPACR_EL1, 12, 1) +FIELD(HFGRTR_EL2, CSSELR_EL1, 13, 1) +FIELD(HFGRTR_EL2, CTR_EL0, 14, 1) +FIELD(HFGRTR_EL2, DCZID_EL0, 15, 1) +FIELD(HFGRTR_EL2, ESR_EL1, 16, 1) +FIELD(HFGRTR_EL2, FAR_EL1, 17, 1) +FIELD(HFGRTR_EL2, ISR_EL1, 18, 1) +FIELD(HFGRTR_EL2, LORC_EL1, 19, 1) +FIELD(HFGRTR_EL2, LOREA_EL1, 20, 1) +FIELD(HFGRTR_EL2, LORID_EL1, 21, 1) +FIELD(HFGRTR_EL2, LORN_EL1, 22, 1) +FIELD(HFGRTR_EL2, LORSA_EL1, 23, 1) +FIELD(HFGRTR_EL2, MAIR_EL1, 24, 1) +FIELD(HFGRTR_EL2, MIDR_EL1, 25, 1) +FIELD(HFGRTR_EL2, MPIDR_EL1, 26, 1) +FIELD(HFGRTR_EL2, PAR_EL1, 27, 1) +FIELD(HFGRTR_EL2, REVIDR_EL1, 28, 1) +FIELD(HFGRTR_EL2, SCTLR_EL1, 29, 1) +FIELD(HFGRTR_EL2, SCXTNUM_EL1, 30, 1) +FIELD(HFGRTR_EL2, SCXTNUM_EL0, 31, 1) +FIELD(HFGRTR_EL2, TCR_EL1, 32, 1) +FIELD(HFGRTR_EL2, TPIDR_EL1, 33, 1) +FIELD(HFGRTR_EL2, TPIDRRO_EL0, 34, 1) +FIELD(HFGRTR_EL2, TPIDR_EL0, 35, 1) +FIELD(HFGRTR_EL2, TTBR0_EL1, 36, 1) +FIELD(HFGRTR_EL2, TTBR1_EL1, 37, 1) +FIELD(HFGRTR_EL2, VBAR_EL1, 38, 1) +FIELD(HFGRTR_EL2, ICC_IGRPENN_EL1, 39, 1) +FIELD(HFGRTR_EL2, ERRIDR_EL1, 40, 1) +FIELD(HFGRTR_EL2, ERRSELR_EL1, 41, 1) +FIELD(HFGRTR_EL2, ERXFR_EL1, 42, 1) +FIELD(HFGRTR_EL2, ERXCTLR_EL1, 43, 1) +FIELD(HFGRTR_EL2, ERXSTATUS_EL1, 44, 1) +FIELD(HFGRTR_EL2, ERXMISCN_EL1, 45, 1) +FIELD(HFGRTR_EL2, ERXPFGF_EL1, 46, 1) +FIELD(HFGRTR_EL2, ERXPFGCTL_EL1, 47, 1) +FIELD(HFGRTR_EL2, ERXPFGCDN_EL1, 48, 1) +FIELD(HFGRTR_EL2, ERXADDR_EL1, 49, 1) +FIELD(HFGRTR_EL2, NACCDATA_EL1, 50, 1) +/* 51-53: RES0 */ +FIELD(HFGRTR_EL2, NSMPRI_EL1, 54, 1) +FIELD(HFGRTR_EL2, NTPIDR2_EL0, 55, 1) +/* 56-63: RES0 */ + +/* These match HFGRTR but bits for RO registers are RES0 */ +FIELD(HFGWTR_EL2, AFSR0_EL1, 0, 1) +FIELD(HFGWTR_EL2, AFSR1_EL1, 1, 1) +FIELD(HFGWTR_EL2, AMAIR_EL1, 3, 1) +FIELD(HFGWTR_EL2, APDAKEY, 4, 1) +FIELD(HFGWTR_EL2, APDBKEY, 5, 1) +FIELD(HFGWTR_EL2, APGAKEY, 6, 1) +FIELD(HFGWTR_EL2, APIAKEY, 7, 1) +FIELD(HFGWTR_EL2, APIBKEY, 8, 1) +FIELD(HFGWTR_EL2, CONTEXTIDR_EL1, 11, 1) +FIELD(HFGWTR_EL2, CPACR_EL1, 12, 1) +FIELD(HFGWTR_EL2, CSSELR_EL1, 13, 1) +FIELD(HFGWTR_EL2, ESR_EL1, 16, 1) +FIELD(HFGWTR_EL2, FAR_EL1, 17, 1) +FIELD(HFGWTR_EL2, LORC_EL1, 19, 1) +FIELD(HFGWTR_EL2, LOREA_EL1, 20, 1) +FIELD(HFGWTR_EL2, LORN_EL1, 22, 1) +FIELD(HFGWTR_EL2, LORSA_EL1, 23, 1) +FIELD(HFGWTR_EL2, MAIR_EL1, 24, 1) +FIELD(HFGWTR_EL2, PAR_EL1, 27, 1) +FIELD(HFGWTR_EL2, SCTLR_EL1, 29, 1) +FIELD(HFGWTR_EL2, SCXTNUM_EL1, 30, 1) +FIELD(HFGWTR_EL2, SCXTNUM_EL0, 31, 1) +FIELD(HFGWTR_EL2, TCR_EL1, 32, 1) +FIELD(HFGWTR_EL2, TPIDR_EL1, 33, 1) +FIELD(HFGWTR_EL2, TPIDRRO_EL0, 34, 1) +FIELD(HFGWTR_EL2, TPIDR_EL0, 35, 1) +FIELD(HFGWTR_EL2, TTBR0_EL1, 36, 1) +FIELD(HFGWTR_EL2, TTBR1_EL1, 37, 1) +FIELD(HFGWTR_EL2, VBAR_EL1, 38, 1) +FIELD(HFGWTR_EL2, ICC_IGRPENN_EL1, 39, 1) +FIELD(HFGWTR_EL2, ERRSELR_EL1, 41, 1) +FIELD(HFGWTR_EL2, ERXCTLR_EL1, 43, 1) +FIELD(HFGWTR_EL2, ERXSTATUS_EL1, 44, 1) +FIELD(HFGWTR_EL2, ERXMISCN_EL1, 45, 1) +FIELD(HFGWTR_EL2, ERXPFGCTL_EL1, 47, 1) +FIELD(HFGWTR_EL2, ERXPFGCDN_EL1, 48, 1) +FIELD(HFGWTR_EL2, ERXADDR_EL1, 49, 1) +FIELD(HFGWTR_EL2, NACCDATA_EL1, 50, 1) +FIELD(HFGWTR_EL2, NSMPRI_EL1, 54, 1) +FIELD(HFGWTR_EL2, NTPIDR2_EL0, 55, 1) + +FIELD(HFGITR_EL2, ICIALLUIS, 0, 1) +FIELD(HFGITR_EL2, ICIALLU, 1, 1) +FIELD(HFGITR_EL2, ICIVAU, 2, 1) +FIELD(HFGITR_EL2, DCIVAC, 3, 1) +FIELD(HFGITR_EL2, DCISW, 4, 1) +FIELD(HFGITR_EL2, DCCSW, 5, 1) +FIELD(HFGITR_EL2, DCCISW, 6, 1) +FIELD(HFGITR_EL2, DCCVAU, 7, 1) +FIELD(HFGITR_EL2, DCCVAP, 8, 1) +FIELD(HFGITR_EL2, DCCVADP, 9, 1) +FIELD(HFGITR_EL2, DCCIVAC, 10, 1) +FIELD(HFGITR_EL2, DCZVA, 11, 1) +FIELD(HFGITR_EL2, ATS1E1R, 12, 1) +FIELD(HFGITR_EL2, ATS1E1W, 13, 1) +FIELD(HFGITR_EL2, ATS1E0R, 14, 1) +FIELD(HFGITR_EL2, ATS1E0W, 15, 1) +FIELD(HFGITR_EL2, ATS1E1RP, 16, 1) +FIELD(HFGITR_EL2, ATS1E1WP, 17, 1) +FIELD(HFGITR_EL2, TLBIVMALLE1OS, 18, 1) +FIELD(HFGITR_EL2, TLBIVAE1OS, 19, 1) +FIELD(HFGITR_EL2, TLBIASIDE1OS, 20, 1) +FIELD(HFGITR_EL2, TLBIVAAE1OS, 21, 1) +FIELD(HFGITR_EL2, TLBIVALE1OS, 22, 1) +FIELD(HFGITR_EL2, TLBIVAALE1OS, 23, 1) +FIELD(HFGITR_EL2, TLBIRVAE1OS, 24, 1) +FIELD(HFGITR_EL2, TLBIRVAAE1OS, 25, 1) +FIELD(HFGITR_EL2, TLBIRVALE1OS, 26, 1) +FIELD(HFGITR_EL2, TLBIRVAALE1OS, 27, 1) +FIELD(HFGITR_EL2, TLBIVMALLE1IS, 28, 1) +FIELD(HFGITR_EL2, TLBIVAE1IS, 29, 1) +FIELD(HFGITR_EL2, TLBIASIDE1IS, 30, 1) +FIELD(HFGITR_EL2, TLBIVAAE1IS, 31, 1) +FIELD(HFGITR_EL2, TLBIVALE1IS, 32, 1) +FIELD(HFGITR_EL2, TLBIVAALE1IS, 33, 1) +FIELD(HFGITR_EL2, TLBIRVAE1IS, 34, 1) +FIELD(HFGITR_EL2, TLBIRVAAE1IS, 35, 1) +FIELD(HFGITR_EL2, TLBIRVALE1IS, 36, 1) +FIELD(HFGITR_EL2, TLBIRVAALE1IS, 37, 1) +FIELD(HFGITR_EL2, TLBIRVAE1, 38, 1) +FIELD(HFGITR_EL2, TLBIRVAAE1, 39, 1) +FIELD(HFGITR_EL2, TLBIRVALE1, 40, 1) +FIELD(HFGITR_EL2, TLBIRVAALE1, 41, 1) +FIELD(HFGITR_EL2, TLBIVMALLE1, 42, 1) +FIELD(HFGITR_EL2, TLBIVAE1, 43, 1) +FIELD(HFGITR_EL2, TLBIASIDE1, 44, 1) +FIELD(HFGITR_EL2, TLBIVAAE1, 45, 1) +FIELD(HFGITR_EL2, TLBIVALE1, 46, 1) +FIELD(HFGITR_EL2, TLBIVAALE1, 47, 1) +FIELD(HFGITR_EL2, CFPRCTX, 48, 1) +FIELD(HFGITR_EL2, DVPRCTX, 49, 1) +FIELD(HFGITR_EL2, CPPRCTX, 50, 1) +FIELD(HFGITR_EL2, ERET, 51, 1) +FIELD(HFGITR_EL2, SVC_EL0, 52, 1) +FIELD(HFGITR_EL2, SVC_EL1, 53, 1) +FIELD(HFGITR_EL2, DCCVAC, 54, 1) +FIELD(HFGITR_EL2, NBRBINJ, 55, 1) +FIELD(HFGITR_EL2, NBRBIALL, 56, 1) + +FIELD(HDFGRTR_EL2, DBGBCRN_EL1, 0, 1) +FIELD(HDFGRTR_EL2, DBGBVRN_EL1, 1, 1) +FIELD(HDFGRTR_EL2, DBGWCRN_EL1, 2, 1) +FIELD(HDFGRTR_EL2, DBGWVRN_EL1, 3, 1) +FIELD(HDFGRTR_EL2, MDSCR_EL1, 4, 1) +FIELD(HDFGRTR_EL2, DBGCLAIM, 5, 1) +FIELD(HDFGRTR_EL2, DBGAUTHSTATUS_EL1, 6, 1) +FIELD(HDFGRTR_EL2, DBGPRCR_EL1, 7, 1) +/* 8: RES0: OSLAR_EL1 is WO */ +FIELD(HDFGRTR_EL2, OSLSR_EL1, 9, 1) +FIELD(HDFGRTR_EL2, OSECCR_EL1, 10, 1) +FIELD(HDFGRTR_EL2, OSDLR_EL1, 11, 1) +FIELD(HDFGRTR_EL2, PMEVCNTRN_EL0, 12, 1) +FIELD(HDFGRTR_EL2, PMEVTYPERN_EL0, 13, 1) +FIELD(HDFGRTR_EL2, PMCCFILTR_EL0, 14, 1) +FIELD(HDFGRTR_EL2, PMCCNTR_EL0, 15, 1) +FIELD(HDFGRTR_EL2, PMCNTEN, 16, 1) +FIELD(HDFGRTR_EL2, PMINTEN, 17, 1) +FIELD(HDFGRTR_EL2, PMOVS, 18, 1) +FIELD(HDFGRTR_EL2, PMSELR_EL0, 19, 1) +/* 20: RES0: PMSWINC_EL0 is WO */ +/* 21: RES0: PMCR_EL0 is WO */ +FIELD(HDFGRTR_EL2, PMMIR_EL1, 22, 1) +FIELD(HDFGRTR_EL2, PMBLIMITR_EL1, 23, 1) +FIELD(HDFGRTR_EL2, PMBPTR_EL1, 24, 1) +FIELD(HDFGRTR_EL2, PMBSR_EL1, 25, 1) +FIELD(HDFGRTR_EL2, PMSCR_EL1, 26, 1) +FIELD(HDFGRTR_EL2, PMSEVFR_EL1, 27, 1) +FIELD(HDFGRTR_EL2, PMSFCR_EL1, 28, 1) +FIELD(HDFGRTR_EL2, PMSICR_EL1, 29, 1) +FIELD(HDFGRTR_EL2, PMSIDR_EL1, 30, 1) +FIELD(HDFGRTR_EL2, PMSIRR_EL1, 31, 1) +FIELD(HDFGRTR_EL2, PMSLATFR_EL1, 32, 1) +FIELD(HDFGRTR_EL2, TRC, 33, 1) +FIELD(HDFGRTR_EL2, TRCAUTHSTATUS, 34, 1) +FIELD(HDFGRTR_EL2, TRCAUXCTLR, 35, 1) +FIELD(HDFGRTR_EL2, TRCCLAIM, 36, 1) +FIELD(HDFGRTR_EL2, TRCCNTVRn, 37, 1) +/* 38, 39: RES0 */ +FIELD(HDFGRTR_EL2, TRCID, 40, 1) +FIELD(HDFGRTR_EL2, TRCIMSPECN, 41, 1) +/* 42: RES0: TRCOSLAR is WO */ +FIELD(HDFGRTR_EL2, TRCOSLSR, 43, 1) +FIELD(HDFGRTR_EL2, TRCPRGCTLR, 44, 1) +FIELD(HDFGRTR_EL2, TRCSEQSTR, 45, 1) +FIELD(HDFGRTR_EL2, TRCSSCSRN, 46, 1) +FIELD(HDFGRTR_EL2, TRCSTATR, 47, 1) +FIELD(HDFGRTR_EL2, TRCVICTLR, 48, 1) +/* 49: RES0: TRFCR_EL1 is WO */ +FIELD(HDFGRTR_EL2, TRBBASER_EL1, 50, 1) +FIELD(HDFGRTR_EL2, TRBIDR_EL1, 51, 1) +FIELD(HDFGRTR_EL2, TRBLIMITR_EL1, 52, 1) +FIELD(HDFGRTR_EL2, TRBMAR_EL1, 53, 1) +FIELD(HDFGRTR_EL2, TRBPTR_EL1, 54, 1) +FIELD(HDFGRTR_EL2, TRBSR_EL1, 55, 1) +FIELD(HDFGRTR_EL2, TRBTRG_EL1, 56, 1) +FIELD(HDFGRTR_EL2, PMUSERENR_EL0, 57, 1) +FIELD(HDFGRTR_EL2, PMCEIDN_EL0, 58, 1) +FIELD(HDFGRTR_EL2, NBRBIDR, 59, 1) +FIELD(HDFGRTR_EL2, NBRBCTL, 60, 1) +FIELD(HDFGRTR_EL2, NBRBDATA, 61, 1) +FIELD(HDFGRTR_EL2, NPMSNEVFR_EL1, 62, 1) +FIELD(HDFGRTR_EL2, PMBIDR_EL1, 63, 1) + +/* + * These match HDFGRTR_EL2, but bits for RO registers are RES0. + * A few bits are for WO registers, where the HDFGRTR_EL2 bit is RES0. + */ +FIELD(HDFGWTR_EL2, DBGBCRN_EL1, 0, 1) +FIELD(HDFGWTR_EL2, DBGBVRN_EL1, 1, 1) +FIELD(HDFGWTR_EL2, DBGWCRN_EL1, 2, 1) +FIELD(HDFGWTR_EL2, DBGWVRN_EL1, 3, 1) +FIELD(HDFGWTR_EL2, MDSCR_EL1, 4, 1) +FIELD(HDFGWTR_EL2, DBGCLAIM, 5, 1) +FIELD(HDFGWTR_EL2, DBGPRCR_EL1, 7, 1) +FIELD(HDFGWTR_EL2, OSLAR_EL1, 8, 1) +FIELD(HDFGWTR_EL2, OSLSR_EL1, 9, 1) +FIELD(HDFGWTR_EL2, OSECCR_EL1, 10, 1) +FIELD(HDFGWTR_EL2, OSDLR_EL1, 11, 1) +FIELD(HDFGWTR_EL2, PMEVCNTRN_EL0, 12, 1) +FIELD(HDFGWTR_EL2, PMEVTYPERN_EL0, 13, 1) +FIELD(HDFGWTR_EL2, PMCCFILTR_EL0, 14, 1) +FIELD(HDFGWTR_EL2, PMCCNTR_EL0, 15, 1) +FIELD(HDFGWTR_EL2, PMCNTEN, 16, 1) +FIELD(HDFGWTR_EL2, PMINTEN, 17, 1) +FIELD(HDFGWTR_EL2, PMOVS, 18, 1) +FIELD(HDFGWTR_EL2, PMSELR_EL0, 19, 1) +FIELD(HDFGWTR_EL2, PMSWINC_EL0, 20, 1) +FIELD(HDFGWTR_EL2, PMCR_EL0, 21, 1) +FIELD(HDFGWTR_EL2, PMBLIMITR_EL1, 23, 1) +FIELD(HDFGWTR_EL2, PMBPTR_EL1, 24, 1) +FIELD(HDFGWTR_EL2, PMBSR_EL1, 25, 1) +FIELD(HDFGWTR_EL2, PMSCR_EL1, 26, 1) +FIELD(HDFGWTR_EL2, PMSEVFR_EL1, 27, 1) +FIELD(HDFGWTR_EL2, PMSFCR_EL1, 28, 1) +FIELD(HDFGWTR_EL2, PMSICR_EL1, 29, 1) +FIELD(HDFGWTR_EL2, PMSIRR_EL1, 31, 1) +FIELD(HDFGWTR_EL2, PMSLATFR_EL1, 32, 1) +FIELD(HDFGWTR_EL2, TRC, 33, 1) +FIELD(HDFGWTR_EL2, TRCAUXCTLR, 35, 1) +FIELD(HDFGWTR_EL2, TRCCLAIM, 36, 1) +FIELD(HDFGWTR_EL2, TRCCNTVRn, 37, 1) +FIELD(HDFGWTR_EL2, TRCIMSPECN, 41, 1) +FIELD(HDFGWTR_EL2, TRCOSLAR, 42, 1) +FIELD(HDFGWTR_EL2, TRCPRGCTLR, 44, 1) +FIELD(HDFGWTR_EL2, TRCSEQSTR, 45, 1) +FIELD(HDFGWTR_EL2, TRCSSCSRN, 46, 1) +FIELD(HDFGWTR_EL2, TRCVICTLR, 48, 1) +FIELD(HDFGWTR_EL2, TRFCR_EL1, 49, 1) +FIELD(HDFGWTR_EL2, TRBBASER_EL1, 50, 1) +FIELD(HDFGWTR_EL2, TRBLIMITR_EL1, 52, 1) +FIELD(HDFGWTR_EL2, TRBMAR_EL1, 53, 1) +FIELD(HDFGWTR_EL2, TRBPTR_EL1, 54, 1) +FIELD(HDFGWTR_EL2, TRBSR_EL1, 55, 1) +FIELD(HDFGWTR_EL2, TRBTRG_EL1, 56, 1) +FIELD(HDFGWTR_EL2, PMUSERENR_EL0, 57, 1) +FIELD(HDFGWTR_EL2, NBRBCTL, 60, 1) +FIELD(HDFGWTR_EL2, NBRBDATA, 61, 1) +FIELD(HDFGWTR_EL2, NPMSNEVFR_EL1, 62, 1) + +/* Which fine-grained trap bit register to check, if any */ +FIELD(FGT, TYPE, 10, 3) +FIELD(FGT, REV, 9, 1) /* Is bit sense reversed? */ +FIELD(FGT, IDX, 6, 3) /* Index within a uint64_t[] array */ +FIELD(FGT, BITPOS, 0, 6) /* Bit position within the uint64_t */ + +/* + * Macros to define FGT_##bitname enum constants to use in ARMCPRegInfo::fgt + * fields. We assume for brevity's sake that there are no duplicated + * bit names across the various FGT registers. + */ +#define DO_BIT(REG, BITNAME) \ + FGT_##BITNAME = FGT_##REG | R_##REG##_EL2_##BITNAME##_SHIFT + +/* Some bits have reversed sense, so 0 means trap and 1 means not */ +#define DO_REV_BIT(REG, BITNAME) \ + FGT_##BITNAME = FGT_##REG | FGT_REV | R_##REG##_EL2_##BITNAME##_SHIFT + +typedef enum FGTBit { + /* + * These bits tell us which register arrays to use: + * if FGT_R is set then reads are checked against fgt_read[]; + * if FGT_W is set then writes are checked against fgt_write[]; + * if FGT_EXEC is set then all accesses are checked against fgt_exec[]. + * + * For almost all bits in the R/W register pairs, the bit exists in + * both registers for a RW register, in HFGRTR/HDFGRTR for a RO register + * with the corresponding HFGWTR/HDFGTWTR bit being RES0, and vice-versa + * for a WO register. There are unfortunately a couple of exceptions + * (PMCR_EL0, TRFCR_EL1) where the register being trapped is RW but + * the FGT system only allows trapping of writes, not reads. + * + * Note that we arrange these bits so that a 0 FGTBit means "no trap". + */ + FGT_R = 1 << R_FGT_TYPE_SHIFT, + FGT_W = 2 << R_FGT_TYPE_SHIFT, + FGT_EXEC = 4 << R_FGT_TYPE_SHIFT, + FGT_RW = FGT_R | FGT_W, + /* Bit to identify whether trap bit is reversed sense */ + FGT_REV = R_FGT_REV_MASK, + + /* + * If a bit exists in HFGRTR/HDFGRTR then either the register being + * trapped is RO or the bit also exists in HFGWTR/HDFGWTR, so we either + * want to trap for both reads and writes or else it's harmless to mark + * it as trap-on-writes. + * If a bit exists only in HFGWTR/HDFGWTR then either the register being + * trapped is WO, or else it is one of the two oddball special cases + * which are RW but have only a write trap. We mark these as only + * FGT_W so we get the right behaviour for those special cases. + * (If a bit was added in future that provided only a read trap for an + * RW register we'd need to do something special to get the FGT_R bit + * only. But this seems unlikely to happen.) + * + * So for the DO_BIT/DO_REV_BIT macros: use FGT_HFGRTR/FGT_HDFGRTR if + * the bit exists in that register. Otherwise use FGT_HFGWTR/FGT_HDFGWTR. + */ + FGT_HFGRTR = FGT_RW | (FGTREG_HFGRTR << R_FGT_IDX_SHIFT), + FGT_HFGWTR = FGT_W | (FGTREG_HFGWTR << R_FGT_IDX_SHIFT), + FGT_HDFGRTR = FGT_RW | (FGTREG_HDFGRTR << R_FGT_IDX_SHIFT), + FGT_HDFGWTR = FGT_W | (FGTREG_HDFGWTR << R_FGT_IDX_SHIFT), + FGT_HFGITR = FGT_EXEC | (FGTREG_HFGITR << R_FGT_IDX_SHIFT), + + /* Trap bits in HFGRTR_EL2 / HFGWTR_EL2, starting from bit 0. */ + DO_BIT(HFGRTR, AFSR0_EL1), + DO_BIT(HFGRTR, AFSR1_EL1), + DO_BIT(HFGRTR, AIDR_EL1), + DO_BIT(HFGRTR, AMAIR_EL1), + DO_BIT(HFGRTR, APDAKEY), + DO_BIT(HFGRTR, APDBKEY), + DO_BIT(HFGRTR, APGAKEY), + DO_BIT(HFGRTR, APIAKEY), + DO_BIT(HFGRTR, APIBKEY), + DO_BIT(HFGRTR, CCSIDR_EL1), + DO_BIT(HFGRTR, CLIDR_EL1), + DO_BIT(HFGRTR, CONTEXTIDR_EL1), + DO_BIT(HFGRTR, CPACR_EL1), + DO_BIT(HFGRTR, CSSELR_EL1), + DO_BIT(HFGRTR, CTR_EL0), + DO_BIT(HFGRTR, DCZID_EL0), + DO_BIT(HFGRTR, ESR_EL1), + DO_BIT(HFGRTR, FAR_EL1), + DO_BIT(HFGRTR, ISR_EL1), + DO_BIT(HFGRTR, LORC_EL1), + DO_BIT(HFGRTR, LOREA_EL1), + DO_BIT(HFGRTR, LORID_EL1), + DO_BIT(HFGRTR, LORN_EL1), + DO_BIT(HFGRTR, LORSA_EL1), + DO_BIT(HFGRTR, MAIR_EL1), + DO_BIT(HFGRTR, MIDR_EL1), + DO_BIT(HFGRTR, MPIDR_EL1), + DO_BIT(HFGRTR, PAR_EL1), + DO_BIT(HFGRTR, REVIDR_EL1), + DO_BIT(HFGRTR, SCTLR_EL1), + DO_BIT(HFGRTR, SCXTNUM_EL1), + DO_BIT(HFGRTR, SCXTNUM_EL0), + DO_BIT(HFGRTR, TCR_EL1), + DO_BIT(HFGRTR, TPIDR_EL1), + DO_BIT(HFGRTR, TPIDRRO_EL0), + DO_BIT(HFGRTR, TPIDR_EL0), + DO_BIT(HFGRTR, TTBR0_EL1), + DO_BIT(HFGRTR, TTBR1_EL1), + DO_BIT(HFGRTR, VBAR_EL1), + DO_BIT(HFGRTR, ICC_IGRPENN_EL1), + DO_BIT(HFGRTR, ERRIDR_EL1), + DO_REV_BIT(HFGRTR, NSMPRI_EL1), + DO_REV_BIT(HFGRTR, NTPIDR2_EL0), + + /* Trap bits in HDFGRTR_EL2 / HDFGWTR_EL2, starting from bit 0. */ + DO_BIT(HDFGRTR, DBGBCRN_EL1), + DO_BIT(HDFGRTR, DBGBVRN_EL1), + DO_BIT(HDFGRTR, DBGWCRN_EL1), + DO_BIT(HDFGRTR, DBGWVRN_EL1), + DO_BIT(HDFGRTR, MDSCR_EL1), + DO_BIT(HDFGRTR, DBGCLAIM), + DO_BIT(HDFGWTR, OSLAR_EL1), + DO_BIT(HDFGRTR, OSLSR_EL1), + DO_BIT(HDFGRTR, OSECCR_EL1), + DO_BIT(HDFGRTR, OSDLR_EL1), + DO_BIT(HDFGRTR, PMEVCNTRN_EL0), + DO_BIT(HDFGRTR, PMEVTYPERN_EL0), + DO_BIT(HDFGRTR, PMCCFILTR_EL0), + DO_BIT(HDFGRTR, PMCCNTR_EL0), + DO_BIT(HDFGRTR, PMCNTEN), + DO_BIT(HDFGRTR, PMINTEN), + DO_BIT(HDFGRTR, PMOVS), + DO_BIT(HDFGRTR, PMSELR_EL0), + DO_BIT(HDFGWTR, PMSWINC_EL0), + DO_BIT(HDFGWTR, PMCR_EL0), + DO_BIT(HDFGRTR, PMMIR_EL1), + DO_BIT(HDFGRTR, PMCEIDN_EL0), + + /* Trap bits in HFGITR_EL2, starting from bit 0 */ + DO_BIT(HFGITR, ICIALLUIS), + DO_BIT(HFGITR, ICIALLU), + DO_BIT(HFGITR, ICIVAU), + DO_BIT(HFGITR, DCIVAC), + DO_BIT(HFGITR, DCISW), + DO_BIT(HFGITR, DCCSW), + DO_BIT(HFGITR, DCCISW), + DO_BIT(HFGITR, DCCVAU), + DO_BIT(HFGITR, DCCVAP), + DO_BIT(HFGITR, DCCVADP), + DO_BIT(HFGITR, DCCIVAC), + DO_BIT(HFGITR, DCZVA), + DO_BIT(HFGITR, ATS1E1R), + DO_BIT(HFGITR, ATS1E1W), + DO_BIT(HFGITR, ATS1E0R), + DO_BIT(HFGITR, ATS1E0W), + DO_BIT(HFGITR, ATS1E1RP), + DO_BIT(HFGITR, ATS1E1WP), + DO_BIT(HFGITR, TLBIVMALLE1OS), + DO_BIT(HFGITR, TLBIVAE1OS), + DO_BIT(HFGITR, TLBIASIDE1OS), + DO_BIT(HFGITR, TLBIVAAE1OS), + DO_BIT(HFGITR, TLBIVALE1OS), + DO_BIT(HFGITR, TLBIVAALE1OS), + DO_BIT(HFGITR, TLBIRVAE1OS), + DO_BIT(HFGITR, TLBIRVAAE1OS), + DO_BIT(HFGITR, TLBIRVALE1OS), + DO_BIT(HFGITR, TLBIRVAALE1OS), + DO_BIT(HFGITR, TLBIVMALLE1IS), + DO_BIT(HFGITR, TLBIVAE1IS), + DO_BIT(HFGITR, TLBIASIDE1IS), + DO_BIT(HFGITR, TLBIVAAE1IS), + DO_BIT(HFGITR, TLBIVALE1IS), + DO_BIT(HFGITR, TLBIVAALE1IS), + DO_BIT(HFGITR, TLBIRVAE1IS), + DO_BIT(HFGITR, TLBIRVAAE1IS), + DO_BIT(HFGITR, TLBIRVALE1IS), + DO_BIT(HFGITR, TLBIRVAALE1IS), + DO_BIT(HFGITR, TLBIRVAE1), + DO_BIT(HFGITR, TLBIRVAAE1), + DO_BIT(HFGITR, TLBIRVALE1), + DO_BIT(HFGITR, TLBIRVAALE1), + DO_BIT(HFGITR, TLBIVMALLE1), + DO_BIT(HFGITR, TLBIVAE1), + DO_BIT(HFGITR, TLBIASIDE1), + DO_BIT(HFGITR, TLBIVAAE1), + DO_BIT(HFGITR, TLBIVALE1), + DO_BIT(HFGITR, TLBIVAALE1), + DO_BIT(HFGITR, CFPRCTX), + DO_BIT(HFGITR, DVPRCTX), + DO_BIT(HFGITR, CPPRCTX), + DO_BIT(HFGITR, DCCVAC), +} FGTBit; + +#undef DO_BIT +#undef DO_REV_BIT + +typedef struct ARMCPRegInfo ARMCPRegInfo; + +/* + * Access functions for coprocessor registers. These cannot fail and + * may not raise exceptions. + */ +typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque); +typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque, + uint64_t value); +/* Access permission check functions for coprocessor registers. */ +typedef CPAccessResult CPAccessFn(CPUARMState *env, + const ARMCPRegInfo *opaque, + bool isread); +/* Hook function for register reset */ +typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque); + +#define CP_ANY 0xff + +/* Flags in the high bits of nv2_redirect_offset */ +#define NV2_REDIR_NV1 0x4000 /* Only redirect when HCR_EL2.NV1 == 1 */ +#define NV2_REDIR_NO_NV1 0x8000 /* Only redirect when HCR_EL2.NV1 == 0 */ +#define NV2_REDIR_FLAG_MASK 0xc000 + +/* Definition of an ARM coprocessor register */ +struct ARMCPRegInfo { + /* Name of register (useful mainly for debugging, need not be unique) */ + const char *name; + /* + * Location of register: coprocessor number and (crn,crm,opc1,opc2) + * tuple. Any of crm, opc1 and opc2 may be CP_ANY to indicate a + * 'wildcard' field -- any value of that field in the MRC/MCR insn + * will be decoded to this register. The register read and write + * callbacks will be passed an ARMCPRegInfo with the crn/crm/opc1/opc2 + * used by the program, so it is possible to register a wildcard and + * then behave differently on read/write if necessary. + * For 64 bit registers, only crm and opc1 are relevant; crn and opc2 + * must both be zero. + * For AArch64-visible registers, opc0 is also used. + * Since there are no "coprocessors" in AArch64, cp is purely used as a + * way to distinguish (for KVM's benefit) guest-visible system registers + * from demuxed ones provided to preserve the "no side effects on + * KVM register read/write from QEMU" semantics. cp==0x13 is guest + * visible (to match KVM's encoding); cp==0 will be converted to + * cp==0x13 when the ARMCPRegInfo is registered, for convenience. + */ + uint8_t cp; + uint8_t crn; + uint8_t crm; + uint8_t opc0; + uint8_t opc1; + uint8_t opc2; + /* Execution state in which this register is visible: ARM_CP_STATE_* */ + CPState state; + /* Register type: ARM_CP_* bits/values */ + int type; + /* Access rights: PL*_[RW] */ + CPAccessRights access; + /* Security state: ARM_CP_SECSTATE_* bits/values */ + CPSecureState secure; + /* + * Which fine-grained trap register bit to check, if any. This + * value encodes both the trap register and bit within it. + */ + FGTBit fgt; + + /* + * Offset from VNCR_EL2 when FEAT_NV2 redirects access to memory; + * may include an NV2_REDIR_* flag. + */ + uint32_t nv2_redirect_offset; + + /* + * The opaque pointer passed to define_arm_cp_regs_with_opaque() when + * this register was defined: can be used to hand data through to the + * register read/write functions, since they are passed the ARMCPRegInfo*. + */ + void *opaque; + /* + * Value of this register, if it is ARM_CP_CONST. Otherwise, if + * fieldoffset is non-zero, the reset value of the register. + */ + uint64_t resetvalue; + /* + * Offset of the field in CPUARMState for this register. + * This is not needed if either: + * 1. type is ARM_CP_CONST or one of the ARM_CP_SPECIALs + * 2. both readfn and writefn are specified + */ + ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */ + + /* + * Offsets of the secure and non-secure fields in CPUARMState for the + * register if it is banked. These fields are only used during the static + * registration of a register. During hashing the bank associated + * with a given security state is copied to fieldoffset which is used from + * there on out. + * + * It is expected that register definitions use either fieldoffset or + * bank_fieldoffsets in the definition but not both. It is also expected + * that both bank offsets are set when defining a banked register. This + * use indicates that a register is banked. + */ + ptrdiff_t bank_fieldoffsets[2]; + + /* + * Function for making any access checks for this register in addition to + * those specified by the 'access' permissions bits. If NULL, no extra + * checks required. The access check is performed at runtime, not at + * translate time. + */ + CPAccessFn *accessfn; + /* + * Function for handling reads of this register. If NULL, then reads + * will be done by loading from the offset into CPUARMState specified + * by fieldoffset. + */ + CPReadFn *readfn; + /* + * Function for handling writes of this register. If NULL, then writes + * will be done by writing to the offset into CPUARMState specified + * by fieldoffset. + */ + CPWriteFn *writefn; + /* + * Function for doing a "raw" read; used when we need to copy + * coprocessor state to the kernel for KVM or out for + * migration. This only needs to be provided if there is also a + * readfn and it has side effects (for instance clear-on-read bits). + */ + CPReadFn *raw_readfn; + /* + * Function for doing a "raw" write; used when we need to copy KVM + * kernel coprocessor state into userspace, or for inbound + * migration. This only needs to be provided if there is also a + * writefn and it masks out "unwritable" bits or has write-one-to-clear + * or similar behaviour. + */ + CPWriteFn *raw_writefn; + /* + * Function for resetting the register. If NULL, then reset will be done + * by writing resetvalue to the field specified in fieldoffset. If + * fieldoffset is 0 then no reset will be done. + */ + CPResetFn *resetfn; + + /* + * "Original" readfn, writefn, accessfn. + * For ARMv8.1-VHE register aliases, we overwrite the read/write + * accessor functions of various EL1/EL0 to perform the runtime + * check for which sysreg should actually be modified, and then + * forwards the operation. Before overwriting the accessors, + * the original function is copied here, so that accesses that + * really do go to the EL1/EL0 version proceed normally. + * (The corresponding EL2 register is linked via opaque.) + */ + CPReadFn *orig_readfn; + CPWriteFn *orig_writefn; + CPAccessFn *orig_accessfn; +}; + +/* + * Macros which are lvalues for the field in CPUARMState for the + * ARMCPRegInfo *ri. + */ +#define CPREG_FIELD32(env, ri) \ + (*(uint32_t *)((char *)(env) + (ri)->fieldoffset)) +#define CPREG_FIELD64(env, ri) \ + (*(uint64_t *)((char *)(env) + (ri)->fieldoffset)) + +void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, const ARMCPRegInfo *reg, + void *opaque); + +static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs) +{ + define_one_arm_cp_reg_with_opaque(cpu, regs, NULL); +} + +void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs, + void *opaque, size_t len); + +#define define_arm_cp_regs_with_opaque(CPU, REGS, OPAQUE) \ + do { \ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \ + define_arm_cp_regs_with_opaque_len(CPU, REGS, OPAQUE, \ + ARRAY_SIZE(REGS)); \ + } while (0) + +#define define_arm_cp_regs(CPU, REGS) \ + define_arm_cp_regs_with_opaque(CPU, REGS, NULL) + +const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp); + +/* + * Definition of an ARM co-processor register as viewed from + * userspace. This is used for presenting sanitised versions of + * registers to userspace when emulating the Linux AArch64 CPU + * ID/feature ABI (advertised as HWCAP_CPUID). + */ +typedef struct ARMCPRegUserSpaceInfo { + /* Name of register */ + const char *name; + + /* Is the name actually a glob pattern */ + bool is_glob; + + /* Only some bits are exported to user space */ + uint64_t exported_bits; + + /* Fixed bits are applied after the mask */ + uint64_t fixed_bits; +} ARMCPRegUserSpaceInfo; + +void modify_arm_cp_regs_with_len(ARMCPRegInfo *regs, size_t regs_len, + const ARMCPRegUserSpaceInfo *mods, + size_t mods_len); + +#define modify_arm_cp_regs(REGS, MODS) \ + do { \ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(MODS) == 0); \ + modify_arm_cp_regs_with_len(REGS, ARRAY_SIZE(REGS), \ + MODS, ARRAY_SIZE(MODS)); \ + } while (0) + +/* CPWriteFn that can be used to implement writes-ignored behaviour */ +void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value); +/* CPReadFn that can be used for read-as-zero behaviour */ +uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri); + +/* CPWriteFn that just writes the value to ri->fieldoffset */ +void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value); + +/* + * CPResetFn that does nothing, for use if no reset is required even + * if fieldoffset is non zero. + */ +void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque); + +/* + * Return true if this reginfo struct's field in the cpu state struct + * is 64 bits wide. + */ +static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri) +{ + return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT); +} + +static inline bool cp_access_ok(int current_el, + const ARMCPRegInfo *ri, int isread) +{ + return (ri->access >> ((current_el * 2) + isread)) & 1; +} + +/* Raw read of a coprocessor register (as needed for migration, etc) */ +uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri); + +/* + * Return true if the cp register encoding is in the "feature ID space" as + * defined by FEAT_IDST (and thus should be reported with ER_ELx.EC + * as EC_SYSTEMREGISTERTRAP rather than EC_UNCATEGORIZED). + */ +static inline bool arm_cpreg_encoding_in_idspace(uint8_t opc0, uint8_t opc1, + uint8_t opc2, + uint8_t crn, uint8_t crm) +{ + return opc0 == 3 && (opc1 == 0 || opc1 == 1 || opc1 == 3) && + crn == 0 && crm < 8; +} + +/* + * As arm_cpreg_encoding_in_idspace(), but take the encoding from an + * ARMCPRegInfo. + */ +static inline bool arm_cpreg_in_idspace(const ARMCPRegInfo *ri) +{ + return ri->state == ARM_CP_STATE_AA64 && + arm_cpreg_encoding_in_idspace(ri->opc0, ri->opc1, ri->opc2, + ri->crn, ri->crm); +} + +#ifdef CONFIG_USER_ONLY +static inline void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) { } +#else +void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu); +#endif + +CPAccessResult access_tvm_trvm(CPUARMState *, const ARMCPRegInfo *, bool); + +/** + * arm_cpreg_trap_in_nv: Return true if cpreg traps in nested virtualization + * + * Return true if this cpreg is one which should be trapped to EL2 if + * it is executed at EL1 when nested virtualization is enabled via HCR_EL2.NV. + */ +static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri) +{ + /* + * The Arm ARM defines the registers to be trapped in terms of + * their names (I_TZTZL). However the underlying principle is "if + * it would UNDEF at EL1 but work at EL2 then it should trap", and + * the way the encoding of sysregs and system instructions is done + * means that the right set of registers is exactly those where + * the opc1 field is 4 or 5. (You can see this also in the assert + * we do that the opc1 field and the permissions mask line up in + * define_one_arm_cp_reg_with_opaque().) + * Checking the opc1 field is easier for us and avoids the problem + * that we do not consistently use the right architectural names + * for all sysregs, since we treat the name field as largely for debug. + * + * However we do this check, it is going to be at least potentially + * fragile to future new sysregs, but this seems the least likely + * to break. + * + * In particular, note that the released sysreg XML defines that + * the FEAT_MEC sysregs and instructions do not follow this FEAT_NV + * trapping rule, so we will need to add an ARM_CP_* flag to indicate + * "register does not trap on NV" to handle those if/when we implement + * FEAT_MEC. + */ + return ri->opc1 == 4 || ri->opc1 == 5; +} + +#endif /* TARGET_ARM_CPREGS_H */ diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h new file mode 100644 index 0000000000..e5758d9fbc --- /dev/null +++ b/target/arm/cpu-features.h @@ -0,0 +1,1021 @@ +/* + * QEMU Arm CPU -- feature test functions + * + * Copyright (c) 2023 Linaro Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef TARGET_ARM_FEATURES_H +#define TARGET_ARM_FEATURES_H + +#include "hw/registerfields.h" + +/* + * Naming convention for isar_feature functions: + * Functions which test 32-bit ID registers should have _aa32_ in + * their name. Functions which test 64-bit ID registers should have + * _aa64_ in their name. These must only be used in code where we + * know for certain that the CPU has AArch32 or AArch64 respectively + * or where the correct answer for a CPU which doesn't implement that + * CPU state is "false" (eg when generating A32 or A64 code, if adding + * system registers that are specific to that CPU state, for "should + * we let this system register bit be set" tests where the 32-bit + * flavour of the register doesn't have the bit, and so on). + * Functions which simply ask "does this feature exist at all" have + * _any_ in their name, and always return the logical OR of the _aa64_ + * and the _aa32_ function. + */ + +/* + * 32-bit feature tests via id registers. + */ +static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; +} + +static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; +} + +static inline bool isar_feature_aa32_lob(const ARMISARegisters *id) +{ + /* (M-profile) low-overhead loops and branch future */ + return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3; +} + +static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; +} + +static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; +} + +static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; +} + +static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; +} + +static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; +} + +static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; +} + +static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; +} + +static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; +} + +static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0; +} + +static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; +} + +static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0; +} + +static inline bool isar_feature_aa32_sb(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0; +} + +static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0; +} + +static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0; +} + +static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0; +} + +static inline bool isar_feature_aa32_ras(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0; +} + +static inline bool isar_feature_aa32_mprofile(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_pfr1, ID_PFR1, MPROGMOD) != 0; +} + +static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id) +{ + /* + * Return true if M-profile state handling insns + * (VSCCLRM, CLRM, FPCTX access insns) are implemented + */ + return FIELD_EX32(id->id_pfr1, ID_PFR1, SECURITY) >= 3; +} + +static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) +{ + /* Sadly this is encoded differently for A-profile and M-profile */ + if (isar_feature_aa32_mprofile(id)) { + return FIELD_EX32(id->mvfr1, MVFR1, FP16) > 0; + } else { + return FIELD_EX32(id->mvfr1, MVFR1, FPHP) >= 3; + } +} + +static inline bool isar_feature_aa32_mve(const ARMISARegisters *id) +{ + /* + * Return true if MVE is supported (either integer or floating point). + * We must check for M-profile as the MVFR1 field means something + * else for A-profile. + */ + return isar_feature_aa32_mprofile(id) && + FIELD_EX32(id->mvfr1, MVFR1, MVE) > 0; +} + +static inline bool isar_feature_aa32_mve_fp(const ARMISARegisters *id) +{ + /* + * Return true if MVE is supported (either integer or floating point). + * We must check for M-profile as the MVFR1 field means something + * else for A-profile. + */ + return isar_feature_aa32_mprofile(id) && + FIELD_EX32(id->mvfr1, MVFR1, MVE) >= 2; +} + +static inline bool isar_feature_aa32_vfp_simd(const ARMISARegisters *id) +{ + /* + * Return true if either VFP or SIMD is implemented. + * In this case, a minimum of VFP w/ D0-D15. + */ + return FIELD_EX32(id->mvfr0, MVFR0, SIMDREG) > 0; +} + +static inline bool isar_feature_aa32_simd_r32(const ARMISARegisters *id) +{ + /* Return true if D16-D31 are implemented */ + return FIELD_EX32(id->mvfr0, MVFR0, SIMDREG) >= 2; +} + +static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr0, MVFR0, FPSHVEC) > 0; +} + +static inline bool isar_feature_aa32_fpsp_v2(const ARMISARegisters *id) +{ + /* Return true if CPU supports single precision floating point, VFPv2 */ + return FIELD_EX32(id->mvfr0, MVFR0, FPSP) > 0; +} + +static inline bool isar_feature_aa32_fpsp_v3(const ARMISARegisters *id) +{ + /* Return true if CPU supports single precision floating point, VFPv3 */ + return FIELD_EX32(id->mvfr0, MVFR0, FPSP) >= 2; +} + +static inline bool isar_feature_aa32_fpdp_v2(const ARMISARegisters *id) +{ + /* Return true if CPU supports double precision floating point, VFPv2 */ + return FIELD_EX32(id->mvfr0, MVFR0, FPDP) > 0; +} + +static inline bool isar_feature_aa32_fpdp_v3(const ARMISARegisters *id) +{ + /* Return true if CPU supports double precision floating point, VFPv3 */ + return FIELD_EX32(id->mvfr0, MVFR0, FPDP) >= 2; +} + +static inline bool isar_feature_aa32_vfp(const ARMISARegisters *id) +{ + return isar_feature_aa32_fpsp_v2(id) || isar_feature_aa32_fpdp_v2(id); +} + +/* + * We always set the FP and SIMD FP16 fields to indicate identical + * levels of support (assuming SIMD is implemented at all), so + * we only need one set of accessors. + */ +static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 0; +} + +static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1; +} + +/* + * Note that this ID register field covers both VFP and Neon FMAC, + * so should usually be tested in combination with some other + * check that confirms the presence of whichever of VFP or Neon is + * relevant, to avoid accidentally enabling a Neon feature on + * a VFP-no-Neon core or vice-versa. + */ +static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0; +} + +static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 1; +} + +static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 2; +} + +static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 3; +} + +static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) +{ + return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 4; +} + +static inline bool isar_feature_aa32_pxn(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr0, ID_MMFR0, VMSA) >= 4; +} + +static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; +} + +static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; +} + +static inline bool isar_feature_aa32_pmuv3p1(const ARMISARegisters *id) +{ + /* 0xf means "non-standard IMPDEF PMU" */ + return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && + FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; +} + +static inline bool isar_feature_aa32_pmuv3p4(const ARMISARegisters *id) +{ + /* 0xf means "non-standard IMPDEF PMU" */ + return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 && + FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; +} + +static inline bool isar_feature_aa32_pmuv3p5(const ARMISARegisters *id) +{ + /* 0xf means "non-standard IMPDEF PMU" */ + return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 6 && + FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; +} + +static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0; +} + +static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0; +} + +static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0; +} + +static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; +} + +static inline bool isar_feature_aa32_half_evt(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 1; +} + +static inline bool isar_feature_aa32_evt(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 2; +} + +static inline bool isar_feature_aa32_dit(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0; +} + +static inline bool isar_feature_aa32_ssbs(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_pfr2, ID_PFR2, SSBS) != 0; +} + +static inline bool isar_feature_aa32_debugv7p1(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 5; +} + +static inline bool isar_feature_aa32_debugv8p2(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 8; +} + +static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id) +{ + return FIELD_EX32(id->dbgdevid, DBGDEVID, DOUBLELOCK) > 0; +} + +/* + * 64-bit feature tests via id registers. + */ +static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; +} + +static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; +} + +static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; +} + +static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; +} + +static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; +} + +static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; +} + +static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; +} + +static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; +} + +static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; +} + +static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; +} + +static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; +} + +static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; +} + +static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0; +} + +static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0; +} + +static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2; +} + +static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0; +} + +static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2; +} + +static inline bool isar_feature_aa64_tlbios(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) != 0; +} + +static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0; +} + +static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; +} + +/* + * These are the values from APA/API/APA3. + * In general these must be compared '>=', per the normal Arm ARM + * treatment of fields in ID registers. + */ +typedef enum { + PauthFeat_None = 0, + PauthFeat_1 = 1, + PauthFeat_EPAC = 2, + PauthFeat_2 = 3, + PauthFeat_FPAC = 4, + PauthFeat_FPACCOMBINED = 5, +} ARMPauthFeature; + +static inline ARMPauthFeature +isar_feature_pauth_feature(const ARMISARegisters *id) +{ + /* + * Architecturally, only one of {APA,API,APA3} may be active (non-zero) + * and the other two must be zero. Thus we may avoid conditionals. + */ + return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) | + FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, API) | + FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3)); +} + +static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) +{ + /* + * Return true if any form of pauth is enabled, as this + * predicate controls migration of the 128-bit keys. + */ + return isar_feature_pauth_feature(id) != PauthFeat_None; +} + +static inline bool isar_feature_aa64_pauth_qarma5(const ARMISARegisters *id) +{ + /* + * Return true if pauth is enabled with the architected QARMA5 algorithm. + * QEMU will always enable or disable both APA and GPA. + */ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0; +} + +static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id) +{ + /* + * Return true if pauth is enabled with the architected QARMA3 algorithm. + * QEMU will always enable or disable both APA3 and GPA3. + */ + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3) != 0; +} + +static inline bool isar_feature_aa64_sb(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0; +} + +static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0; +} + +static inline bool isar_feature_aa64_frint(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0; +} + +static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0; +} + +static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2; +} + +static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0; +} + +static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0; +} + +static inline bool isar_feature_aa64_rcpc_8_4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) >= 2; +} + +static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0; +} + +static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0; +} + +static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); +} + +static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) +{ + /* We always set the AdvSIMD and FP fields identically. */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) != 0xf; +} + +static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) +{ + /* We always set the AdvSIMD and FP fields identically wrt FP16. */ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; +} + +static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2; +} + +static inline bool isar_feature_aa64_aa32_el1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL1) >= 2; +} + +static inline bool isar_feature_aa64_aa32_el2(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL2) >= 2; +} + +static inline bool isar_feature_aa64_ras(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) != 0; +} + +static inline bool isar_feature_aa64_doublefault(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) >= 2; +} + +static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; +} + +static inline bool isar_feature_aa64_sel2(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SEL2) != 0; +} + +static inline bool isar_feature_aa64_rme(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RME) != 0; +} + +static inline bool isar_feature_aa64_dit(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0; +} + +static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id) +{ + int key = FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, CSV2); + if (key >= 2) { + return true; /* FEAT_CSV2_2 */ + } + if (key == 1) { + key = FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, CSV2_FRAC); + return key >= 2; /* FEAT_CSV2_1p2 */ + } + return false; +} + +static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0; +} + +static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; +} + +static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; +} + +static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; +} + +static inline bool isar_feature_aa64_mte3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 3; +} + +static inline bool isar_feature_aa64_sme(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SME) != 0; +} + +static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id) +{ + return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 1; +} + +static inline bool isar_feature_aa64_tgran4_2_lpa2(const ARMISARegisters *id) +{ + unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + return t >= 3 || (t == 0 && isar_feature_aa64_tgran4_lpa2(id)); +} + +static inline bool isar_feature_aa64_tgran16_lpa2(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 2; +} + +static inline bool isar_feature_aa64_tgran16_2_lpa2(const ARMISARegisters *id) +{ + unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + return t >= 3 || (t == 0 && isar_feature_aa64_tgran16_lpa2(id)); +} + +static inline bool isar_feature_aa64_tgran4(const ARMISARegisters *id) +{ + return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 0; +} + +static inline bool isar_feature_aa64_tgran16(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 1; +} + +static inline bool isar_feature_aa64_tgran64(const ARMISARegisters *id) +{ + return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64) >= 0; +} + +static inline bool isar_feature_aa64_tgran4_2(const ARMISARegisters *id) +{ + unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2); + return t >= 2 || (t == 0 && isar_feature_aa64_tgran4(id)); +} + +static inline bool isar_feature_aa64_tgran16_2(const ARMISARegisters *id) +{ + unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2); + return t >= 2 || (t == 0 && isar_feature_aa64_tgran16(id)); +} + +static inline bool isar_feature_aa64_tgran64_2(const ARMISARegisters *id) +{ + unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64_2); + return t >= 2 || (t == 0 && isar_feature_aa64_tgran64(id)); +} + +static inline bool isar_feature_aa64_fgt(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, FGT) != 0; +} + +static inline bool isar_feature_aa64_ecv_traps(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 0; +} + +static inline bool isar_feature_aa64_ecv(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 1; +} + +static inline bool isar_feature_aa64_vh(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0; +} + +static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; +} + +static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; +} + +static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; +} + +static inline bool isar_feature_aa64_pan3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 3; +} + +static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; +} + +static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; +} + +static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0; +} + +static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2; +} + +static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; +} + +static inline bool isar_feature_aa64_uao(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0; +} + +static inline bool isar_feature_aa64_st(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, ST) != 0; +} + +static inline bool isar_feature_aa64_lse2(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, AT) != 0; +} + +static inline bool isar_feature_aa64_fwb(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, FWB) != 0; +} + +static inline bool isar_feature_aa64_ids(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, IDS) != 0; +} + +static inline bool isar_feature_aa64_half_evt(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 1; +} + +static inline bool isar_feature_aa64_evt(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 2; +} + +static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; +} + +static inline bool isar_feature_aa64_lva(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0; +} + +static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0; +} + +static inline bool isar_feature_aa64_nv(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) != 0; +} + +static inline bool isar_feature_aa64_nv2(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) >= 2; +} + +static inline bool isar_feature_aa64_pmuv3p1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && + FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; +} + +static inline bool isar_feature_aa64_pmuv3p4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 && + FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; +} + +static inline bool isar_feature_aa64_pmuv3p5(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 6 && + FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; +} + +static inline bool isar_feature_aa64_debugv8p2(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, DEBUGVER) >= 8; +} + +static inline bool isar_feature_aa64_doublelock(const ARMISARegisters *id) +{ + return FIELD_SEX64(id->id_aa64dfr0, ID_AA64DFR0, DOUBLELOCK) >= 0; +} + +static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SVEVER) != 0; +} + +static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) != 0; +} + +static inline bool isar_feature_aa64_sve2_pmull128(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) >= 2; +} + +static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0; +} + +static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0; +} + +static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0; +} + +static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0; +} + +static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0; +} + +static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0; +} + +static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0; +} + +static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, F64F64); +} + +static inline bool isar_feature_aa64_sme_i16i64(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, I16I64) == 0xf; +} + +static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, FA64); +} + +/* + * Feature tests for "does this exist in either 32-bit or 64-bit?" + */ +static inline bool isar_feature_any_fp16(const ARMISARegisters *id) +{ + return isar_feature_aa64_fp16(id) || isar_feature_aa32_fp16_arith(id); +} + +static inline bool isar_feature_any_predinv(const ARMISARegisters *id) +{ + return isar_feature_aa64_predinv(id) || isar_feature_aa32_predinv(id); +} + +static inline bool isar_feature_any_pmuv3p1(const ARMISARegisters *id) +{ + return isar_feature_aa64_pmuv3p1(id) || isar_feature_aa32_pmuv3p1(id); +} + +static inline bool isar_feature_any_pmuv3p4(const ARMISARegisters *id) +{ + return isar_feature_aa64_pmuv3p4(id) || isar_feature_aa32_pmuv3p4(id); +} + +static inline bool isar_feature_any_pmuv3p5(const ARMISARegisters *id) +{ + return isar_feature_aa64_pmuv3p5(id) || isar_feature_aa32_pmuv3p5(id); +} + +static inline bool isar_feature_any_ccidx(const ARMISARegisters *id) +{ + return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id); +} + +static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id) +{ + return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id); +} + +static inline bool isar_feature_any_debugv8p2(const ARMISARegisters *id) +{ + return isar_feature_aa64_debugv8p2(id) || isar_feature_aa32_debugv8p2(id); +} + +static inline bool isar_feature_any_ras(const ARMISARegisters *id) +{ + return isar_feature_aa64_ras(id) || isar_feature_aa32_ras(id); +} + +static inline bool isar_feature_any_half_evt(const ARMISARegisters *id) +{ + return isar_feature_aa64_half_evt(id) || isar_feature_aa32_half_evt(id); +} + +static inline bool isar_feature_any_evt(const ARMISARegisters *id) +{ + return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id); +} + +/* + * Forward to the above feature tests given an ARMCPU pointer. + */ +#define cpu_isar_feature(name, cpu) \ + ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); }) + +#endif diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index 7f38d33b8e..da3243ab21 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -6,12 +6,12 @@ */ #ifndef ARM_CPU_PARAM_H -#define ARM_CPU_PARAM_H 1 +#define ARM_CPU_PARAM_H #ifdef TARGET_AARCH64 # define TARGET_LONG_BITS 64 -# define TARGET_PHYS_ADDR_SPACE_BITS 48 -# define TARGET_VIRT_ADDR_SPACE_BITS 48 +# define TARGET_PHYS_ADDR_SPACE_BITS 52 +# define TARGET_VIRT_ADDR_SPACE_BITS 52 #else # define TARGET_LONG_BITS 32 # define TARGET_PHYS_ADDR_SPACE_BITS 40 @@ -19,9 +19,13 @@ #endif #ifdef CONFIG_USER_ONLY -#define TARGET_PAGE_BITS 12 # ifdef TARGET_AARCH64 # define TARGET_TAGGED_ADDRESSES +/* Allow user-only to vary page size from 4k */ +# define TARGET_PAGE_BITS_VARY +# define TARGET_PAGE_BITS_MIN 12 +# else +# define TARGET_PAGE_BITS 12 # endif #else /* @@ -30,8 +34,7 @@ */ # define TARGET_PAGE_BITS_VARY # define TARGET_PAGE_BITS_MIN 10 -#endif -#define NB_MMU_MODES 15 +#endif #endif diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h index a22bd506d0..8e032691db 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -1,5 +1,5 @@ /* - * QEMU ARM CPU + * QEMU ARM CPU QOM header (target agnostic) * * Copyright (c) 2012 SUSE LINUX Products GmbH * @@ -21,78 +21,40 @@ #define QEMU_ARM_CPU_QOM_H #include "hw/core/cpu.h" -#include "qom/object.h" - -struct arm_boot_info; #define TYPE_ARM_CPU "arm-cpu" -OBJECT_DECLARE_TYPE(ARMCPU, ARMCPUClass, - ARM_CPU) +OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU) #define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU -typedef struct ARMCPUInfo { - const char *name; - void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); -} ARMCPUInfo; - -void arm_cpu_register(const ARMCPUInfo *info); -void aarch64_cpu_register(const ARMCPUInfo *info); - -/** - * ARMCPUClass: - * @parent_realize: The parent class' realize handler. - * @parent_reset: The parent class' reset handler. - * - * An ARM CPU model. - */ -struct ARMCPUClass { - /*< private >*/ - CPUClass parent_class; - /*< public >*/ - - const ARMCPUInfo *info; - DeviceRealize parent_realize; - DeviceReset parent_reset; -}; - - #define TYPE_AARCH64_CPU "aarch64-cpu" typedef struct AArch64CPUClass AArch64CPUClass; DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU, TYPE_AARCH64_CPU) -struct AArch64CPUClass { - /*< private >*/ - ARMCPUClass parent_class; - /*< public >*/ +#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU +#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX) + +/* Meanings of the ARMCPU object's four inbound GPIO lines */ +#define ARM_CPU_IRQ 0 +#define ARM_CPU_FIQ 1 +#define ARM_CPU_VIRQ 2 +#define ARM_CPU_VFIQ 3 + +/* For M profile, some registers are banked secure vs non-secure; + * these are represented as a 2-element array where the first element + * is the non-secure copy and the second is the secure copy. + * When the CPU does not have implement the security extension then + * only the first element is used. + * This means that the copy for the current security state can be + * accessed via env->registerfield[env->v7m.secure] (whether the security + * extension is implemented or not). + */ +enum { + M_REG_NS = 0, + M_REG_S = 1, + M_REG_NUM_BANKS = 2, }; -void register_cp_regs_for_features(ARMCPU *cpu); -void init_cpreg_list(ARMCPU *cpu); - -/* Callback functions for the generic timer's timers. */ -void arm_gt_ptimer_cb(void *opaque); -void arm_gt_vtimer_cb(void *opaque); -void arm_gt_htimer_cb(void *opaque); -void arm_gt_stimer_cb(void *opaque); -void arm_gt_hvtimer_cb(void *opaque); - -#define ARM_AFF0_SHIFT 0 -#define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT) -#define ARM_AFF1_SHIFT 8 -#define ARM_AFF1_MASK (0xFFULL << ARM_AFF1_SHIFT) -#define ARM_AFF2_SHIFT 16 -#define ARM_AFF2_MASK (0xFFULL << ARM_AFF2_SHIFT) -#define ARM_AFF3_SHIFT 32 -#define ARM_AFF3_MASK (0xFFULL << ARM_AFF3_SHIFT) -#define ARM_DEFAULT_CPUS_PER_CLUSTER 8 - -#define ARM32_AFFINITY_MASK (ARM_AFF0_MASK|ARM_AFF1_MASK|ARM_AFF2_MASK) -#define ARM64_AFFINITY_MASK \ - (ARM_AFF0_MASK|ARM_AFF1_MASK|ARM_AFF2_MASK|ARM_AFF3_MASK) -#define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK) - #endif diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 641a8c2d3d..ab8d007a86 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -20,28 +20,36 @@ #include "qemu/osdep.h" #include "qemu/qemu-print.h" -#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/log.h" +#include "exec/page-vary.h" #include "target/arm/idau.h" #include "qemu/module.h" #include "qapi/error.h" -#include "qapi/visitor.h" #include "cpu.h" #ifdef CONFIG_TCG #include "hw/core/tcg-cpu-ops.h" #endif /* CONFIG_TCG */ #include "internals.h" +#include "cpu-features.h" #include "exec/exec-all.h" #include "hw/qdev-properties.h" #if !defined(CONFIG_USER_ONLY) #include "hw/loader.h" #include "hw/boards.h" -#endif +#ifdef CONFIG_TCG +#include "hw/intc/armv7m_nvic.h" +#endif /* CONFIG_TCG */ +#endif /* !CONFIG_USER_ONLY */ #include "sysemu/tcg.h" +#include "sysemu/qtest.h" #include "sysemu/hw_accel.h" #include "kvm_arm.h" -#include "hvf_arm.h" #include "disas/capstone.h" #include "fpu/softfloat.h" +#include "cpregs.h" +#include "target/arm/cpu-qom.h" +#include "target/arm/gtimer.h" static void arm_cpu_set_pc(CPUState *cs, vaddr value) { @@ -50,28 +58,66 @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value) if (is_a64(env)) { env->pc = value; - env->thumb = 0; + env->thumb = false; } else { env->regs[15] = value & ~1; env->thumb = value & 1; } } +static vaddr arm_cpu_get_pc(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + if (is_a64(env)) { + return env->pc; + } else { + return env->regs[15]; + } +} + #ifdef CONFIG_TCG void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; + /* The program counter is always up to date with CF_PCREL. */ + if (!(tb_cflags(tb) & CF_PCREL)) { + CPUARMState *env = cpu_env(cs); + /* + * It's OK to look at env for the current mode here, because it's + * never possible for an AArch64 TB to chain to an AArch32 TB. + */ + if (is_a64(env)) { + env->pc = tb->pc; + } else { + env->regs[15] = tb->pc; + } + } +} + +void arm_restore_state_to_opc(CPUState *cs, + const TranslationBlock *tb, + const uint64_t *data) +{ + CPUARMState *env = cpu_env(cs); - /* - * It's OK to look at env for the current mode here, because it's - * never possible for an AArch64 TB to chain to an AArch32 TB. - */ if (is_a64(env)) { - env->pc = tb->pc; + if (tb_cflags(tb) & CF_PCREL) { + env->pc = (env->pc & TARGET_PAGE_MASK) | data[0]; + } else { + env->pc = data[0]; + } + env->condexec_bits = 0; + env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT; } else { - env->regs[15] = tb->pc; + if (tb_cflags(tb) & CF_PCREL) { + env->regs[15] = (env->regs[15] & TARGET_PAGE_MASK) | data[0]; + } else { + env->regs[15] = data[0]; + } + env->condexec_bits = data[1]; + env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT; } } #endif /* CONFIG_TCG */ @@ -83,10 +129,15 @@ static bool arm_cpu_has_work(CPUState *cs) return (cpu->power_state != PSCI_OFF) && cs->interrupt_request & (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD - | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ + | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR | CPU_INTERRUPT_EXITTB); } +static int arm_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + return arm_env_mmu_index(cpu_env(cs)); +} + void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void *opaque) { @@ -115,7 +166,7 @@ static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) ARMCPRegInfo *ri = value; ARMCPU *cpu = opaque; - if (ri->type & (ARM_CP_SPECIAL | ARM_CP_ALIAS)) { + if (ri->type & (ARM_CP_SPECIAL_MASK | ARM_CP_ALIAS)) { return; } @@ -151,7 +202,7 @@ static void cp_reg_check_reset(gpointer key, gpointer value, gpointer opaque) ARMCPU *cpu = opaque; uint64_t oldvalue, newvalue; - if (ri->type & (ARM_CP_SPECIAL | ARM_CP_ALIAS | ARM_CP_NO_RAW)) { + if (ri->type & (ARM_CP_SPECIAL_MASK | ARM_CP_ALIAS | ARM_CP_NO_RAW)) { return; } @@ -161,14 +212,16 @@ static void cp_reg_check_reset(gpointer key, gpointer value, gpointer opaque) assert(oldvalue == newvalue); } -static void arm_cpu_reset(DeviceState *dev) +static void arm_cpu_reset_hold(Object *obj) { - CPUState *s = CPU(dev); - ARMCPU *cpu = ARM_CPU(s); - ARMCPUClass *acc = ARM_CPU_GET_CLASS(cpu); + CPUState *cs = CPU(obj); + ARMCPU *cpu = ARM_CPU(cs); + ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj); CPUARMState *env = &cpu->env; - acc->parent_reset(dev); + if (acc->parent_phases.hold) { + acc->parent_phases.hold(obj); + } memset(env, 0, offsetof(CPUARMState, end_reset_fields)); @@ -180,7 +233,7 @@ static void arm_cpu_reset(DeviceState *dev) env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; - cpu->power_state = s->start_powered_off ? PSCI_OFF : PSCI_ON; + cpu->power_state = cs->start_powered_off ? PSCI_OFF : PSCI_ON; if (arm_feature(env, ARM_FEATURE_IWMMXT)) { env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q'; @@ -188,7 +241,7 @@ static void arm_cpu_reset(DeviceState *dev) if (arm_feature(env, ARM_FEATURE_AARCH64)) { /* 64 bit CPUs always start in 64 bit mode */ - env->aarch64 = 1; + env->aarch64 = true; #if defined(CONFIG_USER_ONLY) env->pstate = PSTATE_MODE_EL0t; /* Userspace expects access to DC ZVA, CTL_EL0 and the cache ops */ @@ -196,20 +249,38 @@ static void arm_cpu_reset(DeviceState *dev) /* Enable all PAC keys. */ env->cp15.sctlr_el[1] |= (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB); + /* Trap on btype=3 for PACIxSP. */ + env->cp15.sctlr_el[1] |= SCTLR_BT0; + /* Trap on implementation defined registers. */ + if (cpu_isar_feature(aa64_tidcp1, cpu)) { + env->cp15.sctlr_el[1] |= SCTLR_TIDCP; + } /* and to the FP/Neon instructions */ - env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 2, 3); - /* and to the SVE instructions */ - env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 16, 2, 3); - /* with reasonable vector length */ + env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, + CPACR_EL1, FPEN, 3); + /* and to the SVE instructions, with default vector length */ if (cpu_isar_feature(aa64_sve, cpu)) { - env->vfp.zcr_el[1] = - aarch64_sve_zcr_get_valid_len(cpu, cpu->sve_default_vq - 1); + env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, + CPACR_EL1, ZEN, 3); + env->vfp.zcr_el[1] = cpu->sve_default_vq - 1; + } + /* and for SME instructions, with default vector length, and TPIDR2 */ + if (cpu_isar_feature(aa64_sme, cpu)) { + env->cp15.sctlr_el[1] |= SCTLR_EnTP2; + env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, + CPACR_EL1, SMEN, 3); + env->vfp.smcr_el[1] = cpu->sme_default_vq - 1; + if (cpu_isar_feature(aa64_sme_fa64, cpu)) { + env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1], + SMCR, FA64, 1); + } } /* + * Enable 48-bit address space (TODO: take reserved_va into account). * Enable TBI0 but not TBI1. * Note that this must match useronly_clean_ptr. */ - env->cp15.tcr_el[1].raw_tcr = (1ULL << 37); + env->cp15.tcr_el[1] = 5 | (1ULL << 37); /* Enable MTE */ if (cpu_isar_feature(aa64_mte, cpu)) { @@ -225,6 +296,15 @@ static void arm_cpu_reset(DeviceState *dev) */ env->cp15.gcr_el1 = 0x1ffff; } + /* + * Disable access to SCXTNUM_EL0 from CSV2_1p2. + * This is not yet exposed from the Linux kernel in any way. + */ + env->cp15.sctlr_el[1] |= SCTLR_TSCXT; + /* Disable access to Debug Communication Channel (DCC). */ + env->cp15.mdscr_el1 |= 1 << 12; + /* Enable FEAT_MOPS */ + env->cp15.sctlr_el[1] |= SCTLR_MSCEN; #else /* Reset into the highest available EL */ if (arm_feature(env, ARM_FEATURE_EL3)) { @@ -234,13 +314,23 @@ static void arm_cpu_reset(DeviceState *dev) } else { env->pstate = PSTATE_MODE_EL1h; } - env->pc = cpu->rvbar; + + /* Sample rvbar at reset. */ + env->cp15.rvbar = cpu->rvbar_prop; + env->pc = env->cp15.rvbar; #endif } else { #if defined(CONFIG_USER_ONLY) /* Userspace expects access to cp10 and cp11 for FP/Neon */ - env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 4, 0xf); + env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, + CPACR, CP10, 3); + env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, + CPACR, CP11, 3); #endif + if (arm_feature(env, ARM_FEATURE_V8)) { + env->cp15.rvbar = cpu->rvbar_prop; + env->regs[15] = cpu->rvbar_prop; + } } #if defined(CONFIG_USER_ONLY) @@ -348,7 +438,7 @@ static void arm_cpu_reset(DeviceState *dev) /* Load the initial SP and PC from offset 0 and 4 in the vector table */ vecbase = env->v7m.vecbase[env->v7m.secure]; - rom = rom_ptr_for_as(s->as, vecbase, 8); + rom = rom_ptr_for_as(cs->as, vecbase, 8); if (rom) { /* Address zero is covered by ROM which hasn't yet been * copied into physical memory. @@ -361,10 +451,14 @@ static void arm_cpu_reset(DeviceState *dev) * it got copied into memory. In the latter case, rom_ptr * will return a NULL pointer and we should use ldl_phys instead. */ - initial_msp = ldl_phys(s->as, vecbase); - initial_pc = ldl_phys(s->as, vecbase + 4); + initial_msp = ldl_phys(cs->as, vecbase); + initial_pc = ldl_phys(cs->as, vecbase + 4); } + qemu_log_mask(CPU_LOG_INT, + "Loaded reset SP 0x%x PC 0x%x from vector table\n", + initial_msp, initial_pc); + env->regs[13] = initial_msp & 0xFFFFFFFC; env->regs[15] = initial_pc & ~1; env->thumb = initial_pc & 1; @@ -415,6 +509,14 @@ static void arm_cpu_reset(DeviceState *dev) sizeof(*env->pmsav7.dracr) * cpu->pmsav7_dregion); } } + + if (cpu->pmsav8r_hdregion > 0) { + memset(env->pmsav8.hprbar, 0, + sizeof(*env->pmsav8.hprbar) * cpu->pmsav8r_hdregion); + memset(env->pmsav8.hprlar, 0, + sizeof(*env->pmsav8.hprlar) * cpu->pmsav8r_hdregion); + } + env->pmsav7.rnr[M_REG_NS] = 0; env->pmsav7.rnr[M_REG_S] = 0; env->pmsav8.mair0[M_REG_NS] = 0; @@ -453,19 +555,117 @@ static void arm_cpu_reset(DeviceState *dev) } #endif - hw_breakpoint_update_all(cpu); - hw_watchpoint_update_all(cpu); - arm_rebuild_hflags(env); + if (tcg_enabled()) { + hw_breakpoint_update_all(cpu); + hw_watchpoint_update_all(cpu); + + arm_rebuild_hflags(env); + } } -#ifndef CONFIG_USER_ONLY +void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) +{ + ARMCPU *cpu = ARM_CPU(cpustate); + CPUARMState *env = &cpu->env; + bool have_el3 = arm_feature(env, ARM_FEATURE_EL3); + bool have_el2 = arm_feature(env, ARM_FEATURE_EL2); + + /* + * Check we have the EL we're aiming for. If that is the + * highest implemented EL, then cpu_reset has already done + * all the work. + */ + switch (target_el) { + case 3: + assert(have_el3); + return; + case 2: + assert(have_el2); + if (!have_el3) { + return; + } + break; + case 1: + if (!have_el3 && !have_el2) { + return; + } + break; + default: + g_assert_not_reached(); + } + + if (have_el3) { + /* + * Set the EL3 state so code can run at EL2. This should match + * the requirements set by Linux in its booting spec. + */ + if (env->aarch64) { + env->cp15.scr_el3 |= SCR_RW; + if (cpu_isar_feature(aa64_pauth, cpu)) { + env->cp15.scr_el3 |= SCR_API | SCR_APK; + } + if (cpu_isar_feature(aa64_mte, cpu)) { + env->cp15.scr_el3 |= SCR_ATA; + } + if (cpu_isar_feature(aa64_sve, cpu)) { + env->cp15.cptr_el[3] |= R_CPTR_EL3_EZ_MASK; + env->vfp.zcr_el[3] = 0xf; + } + if (cpu_isar_feature(aa64_sme, cpu)) { + env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK; + env->cp15.scr_el3 |= SCR_ENTP2; + env->vfp.smcr_el[3] = 0xf; + } + if (cpu_isar_feature(aa64_hcx, cpu)) { + env->cp15.scr_el3 |= SCR_HXEN; + } + if (cpu_isar_feature(aa64_fgt, cpu)) { + env->cp15.scr_el3 |= SCR_FGTEN; + } + } + + if (target_el == 2) { + /* If the guest is at EL2 then Linux expects the HVC insn to work */ + env->cp15.scr_el3 |= SCR_HCE; + } + + /* Put CPU into non-secure state */ + env->cp15.scr_el3 |= SCR_NS; + /* Set NSACR.{CP11,CP10} so NS can access the FPU */ + env->cp15.nsacr |= 3 << 10; + } + + if (have_el2 && target_el < 2) { + /* Set EL2 state so code can run at EL1. */ + if (env->aarch64) { + env->cp15.hcr_el2 |= HCR_RW; + } + } + + /* Set the CPU to the desired state */ + if (env->aarch64) { + env->pstate = aarch64_pstate_mode(target_el, true); + } else { + static const uint32_t mode_for_el[] = { + 0, + ARM_CPU_MODE_SVC, + ARM_CPU_MODE_HYP, + ARM_CPU_MODE_SVC, + }; + + cpsr_write(env, mode_for_el[target_el], CPSR_M, CPSRWriteRaw); + } +} + + +#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, unsigned int target_el, unsigned int cur_el, bool secure, uint64_t hcr_el2) { - CPUARMState *env = cs->env_ptr; + CPUARMState *env = cpu_env(cs); bool pstate_unmasked; bool unmasked = false; @@ -499,6 +699,12 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, return false; } return !(env->daif & PSTATE_I); + case EXCP_VSERR: + if (!(hcr_el2 & HCR_AMO) || (hcr_el2 & HCR_TGE)) { + /* VIRQs are only taken when hypervized. */ + return false; + } + return !(env->daif & PSTATE_A); default: g_assert_not_reached(); } @@ -511,14 +717,24 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, if ((target_el > cur_el) && (target_el != 1)) { /* Exceptions targeting a higher EL may not be maskable */ if (arm_feature(env, ARM_FEATURE_AARCH64)) { - /* - * 64-bit masking rules are simple: exceptions to EL3 - * can't be masked, and exceptions to EL2 can only be - * masked from Secure state. The HCR and SCR settings - * don't affect the masking logic, only the interrupt routing. - */ - if (target_el == 3 || !secure || (env->cp15.scr_el3 & SCR_EEL2)) { + switch (target_el) { + case 2: + /* + * According to ARM DDI 0487H.a, an interrupt can be masked + * when HCR_E2H and HCR_TGE are both set regardless of the + * current Security state. Note that we need to revisit this + * part again once we need to support NMI. + */ + if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { + unmasked = true; + } + break; + case 3: + /* Interrupt cannot be masked when the target EL is 3 */ unmasked = true; + break; + default: + g_assert_not_reached(); } } else { /* @@ -570,7 +786,7 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, } /* - * The PSTATE bits only mask the interrupt if we have not overriden the + * The PSTATE bits only mask the interrupt if we have not overridden the * ability above. */ return unmasked || pstate_unmasked; @@ -579,7 +795,7 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { CPUClass *cc = CPU_GET_CLASS(cs); - CPUARMState *env = cs->env_ptr; + CPUARMState *env = cpu_env(cs); uint32_t cur_el = arm_current_el(env); bool secure = arm_is_secure(env); uint64_t hcr_el2 = arm_hcr_el2_eff(env); @@ -620,6 +836,17 @@ static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) goto found; } } + if (interrupt_request & CPU_INTERRUPT_VSERR) { + excp_idx = EXCP_VSERR; + target_el = 1; + if (arm_excp_unmasked(cs, excp_idx, target_el, + cur_el, secure, hcr_el2)) { + /* Taking a virtual abort clears HCR_EL2.VSE */ + env->cp15.hcr_el2 &= ~HCR_VSE; + cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); + goto found; + } + } return false; found: @@ -628,7 +855,8 @@ static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) cc->tcg_ops->do_interrupt(cs); return true; } -#endif /* !CONFIG_USER_ONLY */ + +#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ void arm_cpu_update_virq(ARMCPU *cpu) { @@ -672,6 +900,25 @@ void arm_cpu_update_vfiq(ARMCPU *cpu) } } +void arm_cpu_update_vserr(ARMCPU *cpu) +{ + /* + * Update the interrupt level for VSERR, which is the HCR_EL2.VSE bit. + */ + CPUARMState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + bool new_state = env->cp15.hcr_el2 & HCR_VSE; + + if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VSERR) != 0)) { + if (new_state) { + cpu_interrupt(cs, CPU_INTERRUPT_VSERR); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR); + } + } +} + #ifndef CONFIG_USER_ONLY static void arm_cpu_set_irq(void *opaque, int irq, int level) { @@ -685,6 +932,16 @@ static void arm_cpu_set_irq(void *opaque, int irq, int level) [ARM_CPU_VFIQ] = CPU_INTERRUPT_VFIQ }; + if (!arm_feature(env, ARM_FEATURE_EL2) && + (irq == ARM_CPU_VIRQ || irq == ARM_CPU_VFIQ)) { + /* + * The GIC might tell us about VIRQ and VFIQ state, but if we don't + * have EL2 support we don't care. (Unless the guest is doing something + * silly this will only be calls saying "level is still 0".) + */ + return; + } + if (level) { env->irq_line_state |= mask[irq]; } else { @@ -693,11 +950,9 @@ static void arm_cpu_set_irq(void *opaque, int irq, int level) switch (irq) { case ARM_CPU_VIRQ: - assert(arm_feature(env, ARM_FEATURE_EL2)); arm_cpu_update_virq(cpu); break; case ARM_CPU_VFIQ: - assert(arm_feature(env, ARM_FEATURE_EL2)); arm_cpu_update_vfiq(cpu); break; case ARM_CPU_IRQ: @@ -755,12 +1010,6 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs) #endif -static int -print_insn_thumb1(bfd_vma pc, disassemble_info *info) -{ - return print_insn_arm(pc | 1, info); -} - static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) { ARMCPU *ac = ARM_CPU(cpu); @@ -768,25 +1017,16 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) bool sctlr_b; if (is_a64(env)) { - /* We might not be compiled with the A64 disassembler - * because it needs a C++ compiler. Leave print_insn - * unset in this case to use the caller default behaviour. - */ -#if defined(CONFIG_ARM_A64_DIS) - info->print_insn = print_insn_arm_a64; -#endif info->cap_arch = CS_ARCH_ARM64; info->cap_insn_unit = 4; info->cap_insn_split = 4; } else { int cap_mode; if (env->thumb) { - info->print_insn = print_insn_thumb1; info->cap_insn_unit = 2; info->cap_insn_split = 4; cap_mode = CS_MODE_THUMB; } else { - info->print_insn = print_insn_arm; info->cap_insn_unit = 4; info->cap_insn_split = 4; cap_mode = CS_MODE_ARM; @@ -803,7 +1043,7 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) sctlr_b = arm_sctlr_b(env); if (bswap_code(sctlr_b)) { -#ifdef TARGET_WORDS_BIGENDIAN +#if TARGET_BIG_ENDIAN info->endian = BFD_ENDIAN_LITTLE; #else info->endian = BFD_ENDIAN_BIG; @@ -824,9 +1064,11 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; uint32_t psr = pstate_read(env); - int i; + int i, j; int el = arm_current_el(env); + uint64_t hcr = arm_hcr_el2_eff(env); const char *ns_status; + bool sve; qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc); for (i = 0; i < 32; i++) { @@ -853,9 +1095,19 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) el, psr & PSTATE_SP ? 'h' : 't'); + if (cpu_isar_feature(aa64_sme, cpu)) { + qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c", + env->svcr, + (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'), + (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-')); + } if (cpu_isar_feature(aa64_bti, cpu)) { qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10); } + qemu_fprintf(f, "%s%s%s", + (hcr & HCR_NV) ? " NV" : "", + (hcr & HCR_NV1) ? " NV1" : "", + (hcr & HCR_NV2) ? " NV2" : ""); if (!(flags & CPU_DUMP_FPU)) { qemu_fprintf(f, "\n"); return; @@ -867,8 +1119,16 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n", vfp_get_fpcr(env), vfp_get_fpsr(env)); - if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) { - int j, zcr_len = sve_zcr_len_for_el(env, el); + if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) { + sve = sme_exception_el(env, el) == 0; + } else if (cpu_isar_feature(aa64_sve, cpu)) { + sve = sve_exception_el(env, el) == 0; + } else { + sve = false; + } + + if (sve) { + int zcr_len = sve_vqm1_for_el(env, el); for (i = 0; i <= FFR_PRED_NUM; i++) { bool eol; @@ -908,32 +1168,24 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } - for (i = 0; i < 32; i++) { - if (zcr_len == 0) { + if (zcr_len == 0) { + /* + * With vl=16, there are only 37 columns per register, + * so output two registers per line. + */ + for (i = 0; i < 32; i++) { qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 "%s", i, env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0], i & 1 ? "\n" : " "); - } else if (zcr_len == 1) { - qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 - ":%016" PRIx64 ":%016" PRIx64 "\n", - i, env->vfp.zregs[i].d[3], env->vfp.zregs[i].d[2], - env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0]); - } else { + } + } else { + for (i = 0; i < 32; i++) { + qemu_fprintf(f, "Z%02d=", i); for (j = zcr_len; j >= 0; j--) { - bool odd = (zcr_len - j) % 2 != 0; - if (j == zcr_len) { - qemu_fprintf(f, "Z%02d[%x-%x]=", i, j, j - 1); - } else if (!odd) { - if (j > 0) { - qemu_fprintf(f, " [%x-%x]=", j, j - 1); - } else { - qemu_fprintf(f, " [%x]=", j); - } - } qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s", env->vfp.zregs[i].d[j * 2 + 1], - env->vfp.zregs[i].d[j * 2], - odd || j == 0 ? "\n" : ":"); + env->vfp.zregs[i].d[j * 2 + 0], + j ? ":" : "\n"); } } } @@ -944,6 +1196,24 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags) i, q[1], q[0], (i & 1 ? "\n" : " ")); } } + + if (cpu_isar_feature(aa64_sme, cpu) && + FIELD_EX64(env->svcr, SVCR, ZA) && + sme_exception_el(env, el) == 0) { + int zcr_len = sve_vqm1_for_el_sm(env, el, true); + int svl = (zcr_len + 1) * 16; + int svl_lg10 = svl < 100 ? 2 : 3; + + for (i = 0; i < svl; i++) { + qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i); + for (j = zcr_len; j >= 0; --j) { + qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c", + env->zarray[i].d[2 * j + 1], + env->zarray[i].d[2 * j], + j ? ':' : '\n'); + } + } + } } #else @@ -1044,34 +1314,24 @@ static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags) } } -uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz) +uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz) { uint32_t Aff1 = idx / clustersz; uint32_t Aff0 = idx % clustersz; return (Aff1 << ARM_AFF1_SHIFT) | Aff0; } -static void cpreg_hashtable_data_destroy(gpointer data) +uint64_t arm_cpu_mp_affinity(ARMCPU *cpu) { - /* - * Destroy function for cpu->cp_regs hashtable data entries. - * We must free the name string because it was g_strdup()ed in - * add_cpreg_to_hashtable(). It's OK to cast away the 'const' - * from r->name because we know we definitely allocated it. - */ - ARMCPRegInfo *r = data; - - g_free((void *)r->name); - g_free(r); + return cpu->mp_affinity; } static void arm_cpu_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); - cpu_set_cpustate_pointers(cpu); - cpu->cp_regs = g_hash_table_new_full(g_int_hash, g_int_equal, - g_free, cpreg_hashtable_data_destroy); + cpu->cp_regs = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, g_free); QLIST_INIT(&cpu->pre_el_change_hooks); QLIST_INIT(&cpu->el_change_hooks); @@ -1079,11 +1339,13 @@ static void arm_cpu_initfn(Object *obj) #ifdef CONFIG_USER_ONLY # ifdef TARGET_AARCH64 /* - * The linux kernel defaults to 512-bit vectors, when sve is supported. - * See documentation for /proc/sys/abi/sve_default_vector_length, and - * our corresponding sve-default-vector-length cpu property. + * The linux kernel defaults to 512-bit for SVE, and 256-bit for SME. + * These values were chosen to fit within the default signal frame. + * See documentation for /proc/sys/abi/{sve,sme}_default_vector_length, + * and our corresponding cpu property. */ cpu->sve_default_vq = 4; + cpu->sme_default_vq = 2; # endif #else /* Our inbound IRQ and FIQ lines */ @@ -1110,11 +1372,12 @@ static void arm_cpu_initfn(Object *obj) * picky DTB consumer will also provide a helpful error message. */ cpu->dtb_compatible = "qemu,unknown"; - cpu->psci_version = 1; /* By default assume PSCI v0.1 */ + cpu->psci_version = QEMU_PSCI_VERSION_0_1; /* By default assume PSCI v0.1 */ cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; if (tcg_enabled() || hvf_enabled()) { - cpu->psci_version = 2; /* TCG and HVF implement PSCI 0.2 */ + /* TCG and HVF implement PSCI 1.1 */ + cpu->psci_version = QEMU_PSCI_VERSION_1_1; } } @@ -1128,9 +1391,6 @@ static Property arm_cpu_reset_cbar_property = static Property arm_cpu_reset_hivecs_property = DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false); -static Property arm_cpu_rvbar_property = - DEFINE_PROP_UINT64("rvbar", ARMCPU, rvbar, 0); - #ifndef CONFIG_USER_ONLY static Property arm_cpu_has_el2_property = DEFINE_PROP_BOOL("has_el2", ARMCPU, has_el2, true); @@ -1145,6 +1405,9 @@ static Property arm_cpu_cfgend_property = static Property arm_cpu_has_vfp_property = DEFINE_PROP_BOOL("vfp", ARMCPU, has_vfp, true); +static Property arm_cpu_has_vfp_d32_property = + DEFINE_PROP_BOOL("vfp-d32", ARMCPU, has_vfp_d32, true); + static Property arm_cpu_has_neon_property = DEFINE_PROP_BOOL("neon", ARMCPU, has_neon, true); @@ -1211,17 +1474,108 @@ unsigned int gt_cntfrq_period_ns(ARMCPU *cpu) NANOSECONDS_PER_SECOND / cpu->gt_cntfrq_hz : 1; } +static void arm_cpu_propagate_feature_implications(ARMCPU *cpu) +{ + CPUARMState *env = &cpu->env; + bool no_aa32 = false; + + /* + * Some features automatically imply others: set the feature + * bits explicitly for these cases. + */ + + if (arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_PMSA); + } + + if (arm_feature(env, ARM_FEATURE_V8)) { + if (arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_V7); + } else { + set_feature(env, ARM_FEATURE_V7VE); + } + } + + /* + * There exist AArch64 cpus without AArch32 support. When KVM + * queries ID_ISAR0_EL1 on such a host, the value is UNKNOWN. + * Similarly, we cannot check ID_AA64PFR0 without AArch64 support. + * As a general principle, we also do not make ID register + * consistency checks anywhere unless using TCG, because only + * for TCG would a consistency-check failure be a QEMU bug. + */ + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + no_aa32 = !cpu_isar_feature(aa64_aa32, cpu); + } + + if (arm_feature(env, ARM_FEATURE_V7VE)) { + /* + * v7 Virtualization Extensions. In real hardware this implies + * EL2 and also the presence of the Security Extensions. + * For QEMU, for backwards-compatibility we implement some + * CPUs or CPU configs which have no actual EL2 or EL3 but do + * include the various other features that V7VE implies. + * Presence of EL2 itself is ARM_FEATURE_EL2, and of the + * Security Extensions is ARM_FEATURE_EL3. + */ + assert(!tcg_enabled() || no_aa32 || + cpu_isar_feature(aa32_arm_div, cpu)); + set_feature(env, ARM_FEATURE_LPAE); + set_feature(env, ARM_FEATURE_V7); + } + if (arm_feature(env, ARM_FEATURE_V7)) { + set_feature(env, ARM_FEATURE_VAPA); + set_feature(env, ARM_FEATURE_THUMB2); + set_feature(env, ARM_FEATURE_MPIDR); + if (!arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_V6K); + } else { + set_feature(env, ARM_FEATURE_V6); + } + + /* + * Always define VBAR for V7 CPUs even if it doesn't exist in + * non-EL3 configs. This is needed by some legacy boards. + */ + set_feature(env, ARM_FEATURE_VBAR); + } + if (arm_feature(env, ARM_FEATURE_V6K)) { + set_feature(env, ARM_FEATURE_V6); + set_feature(env, ARM_FEATURE_MVFR); + } + if (arm_feature(env, ARM_FEATURE_V6)) { + set_feature(env, ARM_FEATURE_V5); + if (!arm_feature(env, ARM_FEATURE_M)) { + assert(!tcg_enabled() || no_aa32 || + cpu_isar_feature(aa32_jazelle, cpu)); + set_feature(env, ARM_FEATURE_AUXCR); + } + } + if (arm_feature(env, ARM_FEATURE_V5)) { + set_feature(env, ARM_FEATURE_V4T); + } + if (arm_feature(env, ARM_FEATURE_LPAE)) { + set_feature(env, ARM_FEATURE_V7MP); + } + if (arm_feature(env, ARM_FEATURE_CBAR_RO)) { + set_feature(env, ARM_FEATURE_CBAR); + } + if (arm_feature(env, ARM_FEATURE_THUMB2) && + !arm_feature(env, ARM_FEATURE_M)) { + set_feature(env, ARM_FEATURE_THUMB_DSP); + } +} + void arm_cpu_post_init(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); - /* M profile implies PMSA. We have to do this here rather than - * in realize with the other feature-implication checks because - * we look at the PMSA bit to see if we should add some properties. + /* + * Some features imply others. Figure this out now, because we + * are going to look at the feature bits in deciding which + * properties to add. */ - if (arm_feature(&cpu->env, ARM_FEATURE_M)) { - set_feature(&cpu->env, ARM_FEATURE_PMSA); - } + arm_cpu_propagate_feature_implications(cpu); if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) || arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) { @@ -1232,8 +1586,10 @@ void arm_cpu_post_init(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_hivecs_property); } - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - qdev_property_add_static(DEVICE(obj), &arm_cpu_rvbar_property); + if (arm_feature(&cpu->env, ARM_FEATURE_V8)) { + object_property_add_uint64_ptr(obj, "rvbar", + &cpu->rvbar_prop, + OBJ_PROP_FLAG_READWRITE); } #ifndef CONFIG_USER_ONLY @@ -1265,12 +1621,34 @@ void arm_cpu_post_init(Object *obj) * KVM does not currently allow us to lie to the guest about its * ID/feature registers, so the guest always sees what the host has. */ - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) - ? cpu_isar_feature(aa64_fp_simd, cpu) - : cpu_isar_feature(aa32_vfp, cpu)) { + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + if (cpu_isar_feature(aa64_fp_simd, cpu)) { + cpu->has_vfp = true; + cpu->has_vfp_d32 = true; + if (tcg_enabled() || qtest_enabled()) { + qdev_property_add_static(DEVICE(obj), + &arm_cpu_has_vfp_property); + } + } + } else if (cpu_isar_feature(aa32_vfp, cpu)) { cpu->has_vfp = true; - if (!kvm_enabled()) { - qdev_property_add_static(DEVICE(obj), &arm_cpu_has_vfp_property); + if (tcg_enabled() || qtest_enabled()) { + qdev_property_add_static(DEVICE(obj), + &arm_cpu_has_vfp_property); + } + if (cpu_isar_feature(aa32_simd_r32, cpu)) { + cpu->has_vfp_d32 = true; + /* + * The permitted values of the SIMDReg bits [3:0] on + * Armv8-A are either 0b0000 and 0b0010. On such CPUs, + * make sure that has_vfp_d32 can not be set to false. + */ + if ((tcg_enabled() || qtest_enabled()) + && !(arm_feature(&cpu->env, ARM_FEATURE_V8) + && !arm_feature(&cpu->env, ARM_FEATURE_M))) { + qdev_property_add_static(DEVICE(obj), + &arm_cpu_has_vfp_d32_property); + } } } @@ -1317,6 +1695,11 @@ void arm_cpu_post_init(Object *obj) OBJ_PROP_FLAG_READWRITE); } + /* Not DEFINE_PROP_UINT32: we want this to be settable after realize */ + object_property_add_uint32_ptr(obj, "psci-conduit", + &cpu->psci_conduit, + OBJ_PROP_FLAG_READWRITE); + qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property); if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { @@ -1324,7 +1707,7 @@ void arm_cpu_post_init(Object *obj) } if (kvm_enabled()) { - kvm_arm_add_vcpu_properties(obj); + kvm_arm_add_vcpu_properties(cpu); } #ifndef CONFIG_USER_ONLY @@ -1373,6 +1756,7 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp) { Error *local_err = NULL; +#ifdef TARGET_AARCH64 if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { arm_cpu_sve_finalize(cpu, &local_err); if (local_err != NULL) { @@ -1381,18 +1765,34 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp) } /* - * KVM does not support modifications to this feature. - * We have not registered the cpu properties when KVM - * is in use, so the user will not be able to set them. + * FEAT_SME is not architecturally dependent on FEAT_SVE (unless + * FEAT_SME_FA64 is present). However our implementation currently + * assumes it, so if the user asked for sve=off then turn off SME also. + * (KVM doesn't currently support SME at all.) */ - if (!kvm_enabled()) { - arm_cpu_pauth_finalize(cpu, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } + if (cpu_isar_feature(aa64_sme, cpu) && !cpu_isar_feature(aa64_sve, cpu)) { + object_property_set_bool(OBJECT(cpu), "sme", false, &error_abort); + } + + arm_cpu_sme_finalize(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + + arm_cpu_pauth_finalize(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + + arm_cpu_lpa2_finalize(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; } } +#endif if (kvm_enabled()) { kvm_arm_steal_time_finalize(cpu, &local_err); @@ -1409,9 +1809,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) ARMCPU *cpu = ARM_CPU(dev); ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); CPUARMState *env = &cpu->env; - int pagebits; Error *local_err = NULL; - bool no_aa32 = false; + +#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) + /* Use pc-relative instructions in system-mode */ + cs->tcg_cflags |= CF_PCREL; +#endif /* If we needed to query the host kernel for the CPU features * then it's possible that might have failed in the initfn, but @@ -1443,25 +1846,32 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } } - if (kvm_enabled()) { + if (!tcg_enabled() && !qtest_enabled()) { /* + * We assume that no accelerator except TCG (and the "not really an + * accelerator" qtest) can handle these features, because Arm hardware + * virtualization can't virtualize them. + * * Catch all the cases which might cause us to create more than one * address space for the CPU (otherwise we will assert() later in * cpu_address_space_init()). */ if (arm_feature(env, ARM_FEATURE_M)) { error_setg(errp, - "Cannot enable KVM when using an M-profile guest CPU"); + "Cannot enable %s when using an M-profile guest CPU", + current_accel_name()); return; } if (cpu->has_el3) { error_setg(errp, - "Cannot enable KVM when guest CPU has EL3 enabled"); + "Cannot enable %s when guest CPU has EL3 enabled", + current_accel_name()); return; } if (cpu->tag_memory) { error_setg(errp, - "Cannot enable KVM when guest CPUs has MTE enabled"); + "Cannot enable %s when guest CPUs has MTE enabled", + current_accel_name()); return; } } @@ -1505,6 +1915,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) return; } +#ifdef CONFIG_USER_ONLY + /* + * User mode relies on IC IVAU instructions to catch modification of + * dual-mapped code. + * + * Clear CTR_EL0.DIC to ensure that software that honors these flags uses + * IC IVAU even if the emulated processor does not normally require it. + */ + cpu->ctr = FIELD_DP64(cpu->ctr, CTR_EL0, DIC, 0); +#endif + if (arm_feature(env, ARM_FEATURE_AARCH64) && cpu->has_vfp != cpu->has_neon) { /* @@ -1516,6 +1937,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) return; } + if (cpu->has_vfp_d32 != cpu->has_neon) { + error_setg(errp, "ARM CPUs must have both VFP-D32 and Neon or neither"); + return; + } + + if (!cpu->has_vfp_d32) { + uint32_t u; + + u = cpu->isar.mvfr0; + u = FIELD_DP32(u, MVFR0, SIMDREG, 1); /* 16 registers */ + cpu->isar.mvfr0 = u; + } + if (!cpu->has_vfp) { uint64_t t; uint32_t u; @@ -1566,6 +2000,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) unset_feature(env, ARM_FEATURE_NEON); t = cpu->isar.id_aa64isar0; + t = FIELD_DP64(t, ID_AA64ISAR0, AES, 0); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 0); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 0); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 0); + t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 0); + t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 0); t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0); cpu->isar.id_aa64isar0 = t; @@ -1580,6 +2020,9 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) cpu->isar.id_aa64pfr0 = t; u = cpu->isar.id_isar5; + u = FIELD_DP32(u, ID_ISAR5, AES, 0); + u = FIELD_DP32(u, ID_ISAR5, SHA1, 0); + u = FIELD_DP32(u, ID_ISAR5, SHA2, 0); u = FIELD_DP32(u, ID_ISAR5, RDM, 0); u = FIELD_DP32(u, ID_ISAR5, VCMA, 0); cpu->isar.id_isar5 = u; @@ -1647,112 +2090,45 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) cpu->isar.id_isar3 = u; } - /* Some features automatically imply others: */ - if (arm_feature(env, ARM_FEATURE_V8)) { - if (arm_feature(env, ARM_FEATURE_M)) { - set_feature(env, ARM_FEATURE_V7); - } else { - set_feature(env, ARM_FEATURE_V7VE); - } - } - - /* - * There exist AArch64 cpus without AArch32 support. When KVM - * queries ID_ISAR0_EL1 on such a host, the value is UNKNOWN. - * Similarly, we cannot check ID_AA64PFR0 without AArch64 support. - * As a general principle, we also do not make ID register - * consistency checks anywhere unless using TCG, because only - * for TCG would a consistency-check failure be a QEMU bug. - */ - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - no_aa32 = !cpu_isar_feature(aa64_aa32, cpu); - } - - if (arm_feature(env, ARM_FEATURE_V7VE)) { - /* v7 Virtualization Extensions. In real hardware this implies - * EL2 and also the presence of the Security Extensions. - * For QEMU, for backwards-compatibility we implement some - * CPUs or CPU configs which have no actual EL2 or EL3 but do - * include the various other features that V7VE implies. - * Presence of EL2 itself is ARM_FEATURE_EL2, and of the - * Security Extensions is ARM_FEATURE_EL3. - */ - assert(!tcg_enabled() || no_aa32 || - cpu_isar_feature(aa32_arm_div, cpu)); - set_feature(env, ARM_FEATURE_LPAE); - set_feature(env, ARM_FEATURE_V7); - } - if (arm_feature(env, ARM_FEATURE_V7)) { - set_feature(env, ARM_FEATURE_VAPA); - set_feature(env, ARM_FEATURE_THUMB2); - set_feature(env, ARM_FEATURE_MPIDR); - if (!arm_feature(env, ARM_FEATURE_M)) { - set_feature(env, ARM_FEATURE_V6K); - } else { - set_feature(env, ARM_FEATURE_V6); - } - - /* Always define VBAR for V7 CPUs even if it doesn't exist in - * non-EL3 configs. This is needed by some legacy boards. - */ - set_feature(env, ARM_FEATURE_VBAR); - } - if (arm_feature(env, ARM_FEATURE_V6K)) { - set_feature(env, ARM_FEATURE_V6); - set_feature(env, ARM_FEATURE_MVFR); - } - if (arm_feature(env, ARM_FEATURE_V6)) { - set_feature(env, ARM_FEATURE_V5); - if (!arm_feature(env, ARM_FEATURE_M)) { - assert(!tcg_enabled() || no_aa32 || - cpu_isar_feature(aa32_jazelle, cpu)); - set_feature(env, ARM_FEATURE_AUXCR); - } - } - if (arm_feature(env, ARM_FEATURE_V5)) { - set_feature(env, ARM_FEATURE_V4T); - } - if (arm_feature(env, ARM_FEATURE_LPAE)) { - set_feature(env, ARM_FEATURE_V7MP); - } - if (arm_feature(env, ARM_FEATURE_CBAR_RO)) { - set_feature(env, ARM_FEATURE_CBAR); - } - if (arm_feature(env, ARM_FEATURE_THUMB2) && - !arm_feature(env, ARM_FEATURE_M)) { - set_feature(env, ARM_FEATURE_THUMB_DSP); - } /* * We rely on no XScale CPU having VFP so we can use the same bits in the * TB flags field for VECSTRIDE and XSCALE_CPAR. */ - assert(arm_feature(&cpu->env, ARM_FEATURE_AARCH64) || + assert(arm_feature(env, ARM_FEATURE_AARCH64) || !cpu_isar_feature(aa32_vfp_simd, cpu) || !arm_feature(env, ARM_FEATURE_XSCALE)); - if (arm_feature(env, ARM_FEATURE_V7) && - !arm_feature(env, ARM_FEATURE_M) && - !arm_feature(env, ARM_FEATURE_PMSA)) { - /* v7VMSA drops support for the old ARMv5 tiny pages, so we - * can use 4K pages. - */ - pagebits = 12; - } else { - /* For CPUs which might have tiny 1K pages, or which have an - * MPU and might have small region sizes, stick with 1K pages. - */ - pagebits = 10; - } - if (!set_preferred_target_page_bits(pagebits)) { - /* This can only ever happen for hotplugging a CPU, or if - * the board code incorrectly creates a CPU which it has - * promised via minimum_page_size that it will not. - */ - error_setg(errp, "This CPU requires a smaller page size than the " - "system is using"); - return; +#ifndef CONFIG_USER_ONLY + { + int pagebits; + if (arm_feature(env, ARM_FEATURE_V7) && + !arm_feature(env, ARM_FEATURE_M) && + !arm_feature(env, ARM_FEATURE_PMSA)) { + /* + * v7VMSA drops support for the old ARMv5 tiny pages, + * so we can use 4K pages. + */ + pagebits = 12; + } else { + /* + * For CPUs which might have tiny 1K pages, or which have an + * MPU and might have small region sizes, stick with 1K pages. + */ + pagebits = 10; + } + if (!set_preferred_target_page_bits(pagebits)) { + /* + * This can only ever happen for hotplugging a CPU, or if + * the board code incorrectly creates a CPU which it has + * promised via minimum_page_size that it will not. + */ + error_setg(errp, "This CPU requires a smaller page size " + "than the system is using"); + return; + } } +#endif /* This cpu-id-to-MPIDR affinity is used only for TCG; KVM will override it. * We don't support setting cluster ID ([16..23]) (known as Aff2 @@ -1760,8 +2136,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) * so these bits always RAZ. */ if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) { - cpu->mp_affinity = arm_cpu_mp_affinity(cs->cpu_index, - ARM_DEFAULT_CPUS_PER_CLUSTER); + cpu->mp_affinity = arm_build_mp_affinity(cs->cpu_index, + ARM_DEFAULT_CPUS_PER_CLUSTER); } if (cpu->reset_hivecs) { @@ -1769,7 +2145,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (cpu->cfgend) { - if (arm_feature(&cpu->env, ARM_FEATURE_V7)) { + if (arm_feature(env, ARM_FEATURE_V7)) { cpu->reset_sctlr |= SCTLR_EE; } else { cpu->reset_sctlr |= SCTLR_B; @@ -1782,11 +2158,18 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) */ unset_feature(env, ARM_FEATURE_EL3); - /* Disable the security extension feature bits in the processor feature - * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12]. + /* + * Disable the security extension feature bits in the processor + * feature registers as well. */ - cpu->isar.id_pfr1 &= ~0xf0; - cpu->isar.id_aa64pfr0 &= ~0xf000; + cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, ID_PFR1, SECURITY, 0); + cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPSDBG, 0); + cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, + ID_AA64PFR0, EL3, 0); + + /* Disable the realm management extension, which requires EL3. */ + cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, + ID_AA64PFR0, RME, 0); } if (!cpu->has_el2) { @@ -1817,33 +2200,81 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } if (!arm_feature(env, ARM_FEATURE_EL2)) { - /* Disable the hypervisor feature bits in the processor feature - * registers if we don't have EL2. These are id_pfr1[15:12] and - * id_aa64pfr0_el1[11:8]. + /* + * Disable the hypervisor feature bits in the processor feature + * registers if we don't have EL2. */ - cpu->isar.id_aa64pfr0 &= ~0xf00; - cpu->isar.id_pfr1 &= ~0xf000; + cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0, + ID_AA64PFR0, EL2, 0); + cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, + ID_PFR1, VIRTUALIZATION, 0); } + if (cpu_isar_feature(aa64_mte, cpu)) { + /* + * The architectural range of GM blocksize is 2-6, however qemu + * doesn't support blocksize of 2 (see HELPER(ldgm)). + */ + if (tcg_enabled()) { + assert(cpu->gm_blocksize >= 3 && cpu->gm_blocksize <= 6); + } + #ifndef CONFIG_USER_ONLY - if (cpu->tag_memory == NULL && cpu_isar_feature(aa64_mte, cpu)) { /* - * Disable the MTE feature bits if we do not have tag-memory - * provided by the machine. + * If we do not have tag-memory provided by the machine, + * reduce MTE support to instructions enabled at EL0. + * This matches Cortex-A710 BROADCASTMTE input being LOW. */ - cpu->isar.id_aa64pfr1 = - FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0); - } + if (cpu->tag_memory == NULL) { + cpu->isar.id_aa64pfr1 = + FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1); + } #endif + } + + if (tcg_enabled()) { + /* + * Don't report some architectural features in the ID registers + * where TCG does not yet implement it (not even a minimal + * stub version). This avoids guests falling over when they + * try to access the non-existent system registers for them. + */ + /* FEAT_SPE (Statistical Profiling Extension) */ + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0); + /* FEAT_TRBE (Trace Buffer Extension) */ + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEBUFFER, 0); + /* FEAT_TRF (Self-hosted Trace Extension) */ + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0); + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0); + /* Trace Macrocell system register access */ + cpu->isar.id_aa64dfr0 = + FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0); + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0); + /* Memory mapped trace */ + cpu->isar.id_dfr0 = + FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0); + /* FEAT_AMU (Activity Monitors Extension) */ + cpu->isar.id_aa64pfr0 = + FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0); + cpu->isar.id_pfr0 = + FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0); + /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */ + cpu->isar.id_aa64pfr0 = + FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0); + } /* MPU can be configured out of a PMSA CPU either by setting has-mpu * to false or by setting pmsav7-dregion to 0. */ - if (!cpu->has_mpu) { - cpu->pmsav7_dregion = 0; - } - if (cpu->pmsav7_dregion == 0) { + if (!cpu->has_mpu || cpu->pmsav7_dregion == 0) { cpu->has_mpu = false; + cpu->pmsav7_dregion = 0; + cpu->pmsav8r_hdregion = 0; } if (arm_feature(env, ARM_FEATURE_PMSA) && @@ -1870,6 +2301,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) env->pmsav7.dracr = g_new0(uint32_t, nr); } } + + if (cpu->pmsav8r_hdregion > 0xff) { + error_setg(errp, "PMSAv8 MPU EL2 #regions invalid %" PRIu32, + cpu->pmsav8r_hdregion); + return; + } + + if (cpu->pmsav8r_hdregion) { + env->pmsav8.hprbar = g_new0(uint32_t, + cpu->pmsav8r_hdregion); + env->pmsav8.hprlar = g_new0(uint32_t, + cpu->pmsav8r_hdregion); + } } if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { @@ -1890,6 +2334,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) set_feature(env, ARM_FEATURE_VBAR); } +#ifndef CONFIG_USER_ONLY + if (tcg_enabled() && cpu_isar_feature(aa64_rme, cpu)) { + arm_register_el_change_hook(cpu, >_rme_post_el_change, 0); + } +#endif + register_cp_regs_for_features(cpu); arm_cpu_register_gdb_regs_for_features(cpu); @@ -1986,15 +2436,11 @@ static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) oc = object_class_by_name(typename); g_strfreev(cpuname); g_free(typename); - if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU) || - object_class_is_abstract(oc)) { - return NULL; - } + return oc; } static Property arm_cpu_properties[] = { - DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0), DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0), DEFINE_PROP_UINT64("mp-affinity", ARMCPU, mp_affinity, ARM64_AFFINITY_INVALID), @@ -2003,15 +2449,15 @@ static Property arm_cpu_properties[] = { DEFINE_PROP_END_OF_LIST() }; -static gchar *arm_gdb_arch_name(CPUState *cs) +static const gchar *arm_gdb_arch_name(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - return g_strdup("iwmmxt"); + return "iwmmxt"; } - return g_strdup("arm"); + return "arm"; } #ifndef CONFIG_USER_ONLY @@ -2028,13 +2474,17 @@ static const struct SysemuCPUOps arm_sysemu_ops = { #endif #ifdef CONFIG_TCG -static const struct TCGCPUOps arm_tcg_ops = { +static const TCGCPUOps arm_tcg_ops = { .initialize = arm_translate_init, .synchronize_from_tb = arm_cpu_synchronize_from_tb, - .tlb_fill = arm_cpu_tlb_fill, .debug_excp_handler = arm_debug_excp_handler, + .restore_state_to_opc = arm_restore_state_to_opc, -#if !defined(CONFIG_USER_ONLY) +#ifdef CONFIG_USER_ONLY + .record_sigsegv = arm_cpu_record_sigsegv, + .record_sigbus = arm_cpu_record_sigbus, +#else + .tlb_fill = arm_cpu_tlb_fill, .cpu_exec_interrupt = arm_cpu_exec_interrupt, .do_interrupt = arm_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, @@ -2051,26 +2501,28 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) ARMCPUClass *acc = ARM_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(acc); DeviceClass *dc = DEVICE_CLASS(oc); + ResettableClass *rc = RESETTABLE_CLASS(oc); device_class_set_parent_realize(dc, arm_cpu_realizefn, &acc->parent_realize); device_class_set_props(dc, arm_cpu_properties); - device_class_set_parent_reset(dc, arm_cpu_reset, &acc->parent_reset); + + resettable_class_set_parent_phases(rc, NULL, arm_cpu_reset_hold, NULL, + &acc->parent_phases); cc->class_by_name = arm_cpu_class_by_name; cc->has_work = arm_cpu_has_work; + cc->mmu_index = arm_cpu_mmu_index; cc->dump_state = arm_cpu_dump_state; cc->set_pc = arm_cpu_set_pc; + cc->get_pc = arm_cpu_get_pc; cc->gdb_read_register = arm_cpu_gdb_read_register; cc->gdb_write_register = arm_cpu_gdb_write_register; #ifndef CONFIG_USER_ONLY cc->sysemu_ops = &arm_sysemu_ops; #endif - cc->gdb_num_core_regs = 26; - cc->gdb_core_xml_file = "arm-core.xml"; cc->gdb_arch_name = arm_gdb_arch_name; - cc->gdb_get_dynamic_xml = arm_gdb_get_dynamic_xml; cc->gdb_stop_before_watchpoint = true; cc->disas_set_info = arm_disas_set_info; @@ -2079,30 +2531,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) #endif /* CONFIG_TCG */ } -#if defined(CONFIG_KVM) || defined(CONFIG_HVF) -static void arm_host_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - -#ifdef CONFIG_KVM - kvm_arm_set_cpu_features_from_host(cpu); - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - aarch64_add_sve_properties(obj); - } -#else - hvf_arm_set_cpu_features_from_host(cpu); -#endif - arm_cpu_post_init(obj); -} - -static const TypeInfo host_arm_cpu_type_info = { - .name = TYPE_ARM_HOST_CPU, - .parent = TYPE_AARCH64_CPU, - .instance_init = arm_host_initfn, -}; - -#endif - static void arm_cpu_instance_init(Object *obj) { ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj); @@ -2114,18 +2542,17 @@ static void arm_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(acc); acc->info = data; + cc->gdb_core_xml_file = "arm-core.xml"; } void arm_cpu_register(const ARMCPUInfo *info) { TypeInfo type_info = { .parent = TYPE_ARM_CPU, - .instance_size = sizeof(ARMCPU), - .instance_align = __alignof__(ARMCPU), .instance_init = arm_cpu_instance_init, - .class_size = sizeof(ARMCPUClass), .class_init = info->class_init ?: cpu_register_class_init, .class_data = (void *)info, }; @@ -2150,10 +2577,6 @@ static const TypeInfo arm_cpu_type_info = { static void arm_cpu_register_types(void) { type_register_static(&arm_cpu_type_info); - -#if defined(CONFIG_KVM) || defined(CONFIG_HVF) - type_register_static(&host_arm_cpu_type_info); -#endif } type_init(arm_cpu_register_types) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index e33f37b70a..bc0c84873f 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -21,10 +21,14 @@ #define ARM_CPU_H #include "kvm-consts.h" +#include "qemu/cpu-float.h" #include "hw/registerfields.h" #include "cpu-qom.h" #include "exec/cpu-defs.h" +#include "exec/gdbstub.h" #include "qapi/qapi-types-common.h" +#include "target/arm/multiprocessing.h" +#include "target/arm/gtimer.h" /* ARM processors have a weak memory model */ #define TCG_GUEST_DEFAULT_MO (0) @@ -55,6 +59,8 @@ #define EXCP_LSERR 21 /* v8M LSERR SecureFault */ #define EXCP_UNALIGNED 22 /* v7M UNALIGNED UsageFault */ #define EXCP_DIVBYZERO 23 /* v7M DIVBYZERO UsageFault */ +#define EXCP_VSERR 24 +#define EXCP_GPC 25 /* v9 Granule Protection Check Fault */ /* NB: add new EXCP_ defines to the array in arm_log_exception() too */ #define ARMV7M_EXCP_RESET 1 @@ -69,25 +75,11 @@ #define ARMV7M_EXCP_PENDSV 14 #define ARMV7M_EXCP_SYSTICK 15 -/* For M profile, some registers are banked secure vs non-secure; - * these are represented as a 2-element array where the first element - * is the non-secure copy and the second is the secure copy. - * When the CPU does not have implement the security extension then - * only the first element is used. - * This means that the copy for the current security state can be - * accessed via env->registerfield[env->v7m.secure] (whether the security - * extension is implemented or not). - */ -enum { - M_REG_NS = 0, - M_REG_S = 1, - M_REG_NUM_BANKS = 2, -}; - /* ARM-specific interrupt pending bits. */ #define CPU_INTERRUPT_FIQ CPU_INTERRUPT_TGT_EXT_1 #define CPU_INTERRUPT_VIRQ CPU_INTERRUPT_TGT_EXT_2 #define CPU_INTERRUPT_VFIQ CPU_INTERRUPT_TGT_EXT_3 +#define CPU_INTERRUPT_VSERR CPU_INTERRUPT_TGT_INT_0 /* The usual mapping for an AArch64 system register to its AArch32 * counterpart is for the 32 bit world to have access to the lower @@ -95,7 +87,7 @@ enum { * therefore useful to be able to pass TCG the offset of the least * significant half of a uint64_t struct member. */ -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN #define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t)) #define offsetofhigh32(S, M) offsetof(S, M) #else @@ -103,12 +95,6 @@ enum { #define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t)) #endif -/* Meanings of the ARMCPU object's four inbound GPIO lines */ -#define ARM_CPU_IRQ 0 -#define ARM_CPU_FIQ 1 -#define ARM_CPU_VIRQ 2 -#define ARM_CPU_VFIQ 3 - /* ARM-specific extra insn start words: * 1: Conditional execution bits * 2: Partial exception syndrome for data aborts @@ -116,12 +102,12 @@ enum { #define TARGET_INSN_START_EXTRA_WORDS 2 /* The 2nd extra word holding syndrome info for data aborts does not use - * the upper 6 bits nor the lower 14 bits. We mask and shift it down to + * the upper 6 bits nor the lower 13 bits. We mask and shift it down to * help the sleb128 encoder do a better job. * When restoring the CPU state, we shift it back up. */ #define ARM_INSN_START_WORD2_MASK ((1 << 26) - 1) -#define ARM_INSN_START_WORD2_SHIFT 14 +#define ARM_INSN_START_WORD2_SHIFT 13 /* We currently assume float and double are IEEE single and double precision respectively. @@ -132,23 +118,21 @@ enum { */ /** - * DynamicGDBXMLInfo: - * @desc: Contains the XML descriptions. - * @num: Number of the registers in this XML seen by GDB. + * DynamicGDBFeatureInfo: + * @desc: Contains the feature descriptions. * @data: A union with data specific to the set of registers * @cpregs_keys: Array that contains the corresponding Key of * a given cpreg with the same order of the cpreg * in the XML description. */ -typedef struct DynamicGDBXMLInfo { - char *desc; - int num; +typedef struct DynamicGDBFeatureInfo { + GDBFeature desc; union { struct { uint32_t *keys; } cpregs; } data; -} DynamicGDBXMLInfo; +} DynamicGDBFeatureInfo; /* CPU state for each instance of a generic timer (in cp15 c14) */ typedef struct ARMGenericTimer { @@ -156,24 +140,6 @@ typedef struct ARMGenericTimer { uint64_t ctl; /* Timer Control register */ } ARMGenericTimer; -#define GTIMER_PHYS 0 -#define GTIMER_VIRT 1 -#define GTIMER_HYP 2 -#define GTIMER_SEC 3 -#define GTIMER_HYPVIRT 4 -#define NUM_GTIMERS 5 - -typedef struct { - uint64_t raw_tcr; - uint32_t mask; - uint32_t base_mask; -} TCR; - -#define VTCR_NSW (1u << 29) -#define VTCR_NSA (1u << 30) -#define VSTCR_SW VTCR_NSW -#define VSTCR_SA VTCR_NSA - /* Define a maximum sized vector register. * For 32-bit, this is a 128-bit NEON/AdvSIMD register. * For 64-bit, this is a 2048-bit SVE register. @@ -202,12 +168,8 @@ typedef struct { #ifdef TARGET_AARCH64 # define ARM_MAX_VQ 16 -void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp); -void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp); #else # define ARM_MAX_VQ 1 -static inline void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { } -static inline void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) { } #endif typedef struct ARMVectorReg { @@ -232,7 +194,11 @@ typedef struct CPUARMTBFlags { target_ulong flags2; } CPUARMTBFlags; -typedef struct CPUARMState { +typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; + +typedef struct NVICState NVICState; + +typedef struct CPUArchState { /* Regs for current mode. */ uint32_t regs[16]; @@ -253,10 +219,12 @@ typedef struct CPUARMState { * nRW (also known as M[4]) is kept, inverted, in env->aarch64 * DAIF (exception masks) are kept in env->daif * BTYPE is kept in env->btype + * SM and ZA are kept in env->svcr * all other bits are stored in their correct places in env->pstate */ uint32_t pstate; - uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */ + bool aarch64; /* True if CPU is in aarch64 state; inverse of PSTATE.nRW */ + bool thumb; /* True if CPU is in thumb mode; cpsr[5] */ /* Cached TBFLAGS state. See below for which bits are included. */ CPUARMTBFlags hflags; @@ -283,10 +251,10 @@ typedef struct CPUARMState { uint32_t ZF; /* Z set if zero. */ uint32_t QF; /* 0 or 1 */ uint32_t GE; /* cpsr[19:16] */ - uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */ uint32_t condexec_bits; /* IT bits. cpsr[15:10,26:25]. */ uint32_t btype; /* BTI branch type. spsr[11:10]. */ uint64_t daif; /* exception masks, in the bits they are in PSTATE */ + uint64_t svcr; /* PSTATE.{SM,ZA} in the bits they are in SVCR */ uint64_t elr_el[4]; /* AArch64 exception link regs */ uint64_t sp_el[4]; /* AArch64 banked stack pointers */ @@ -312,6 +280,7 @@ typedef struct CPUARMState { }; uint64_t sctlr_el[4]; }; + uint64_t vsctlr; /* Virtualization System control register. */ uint64_t cpacr_el1; /* Architectural feature access control register */ uint64_t cptr_el[4]; /* ARMv8 feature trap registers */ uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */ @@ -338,9 +307,9 @@ typedef struct CPUARMState { uint64_t vttbr_el2; /* Virtualization Translation Table Base. */ uint64_t vsttbr_el2; /* Secure Virtualization Translation Table. */ /* MMU translation table base control. */ - TCR tcr_el[4]; - TCR vtcr_el2; /* Virtualization Translation Control. */ - TCR vstcr_el2; /* Secure Virtualization Translation Control. */ + uint64_t tcr_el[4]; + uint64_t vtcr_el2; /* Virtualization Translation Control. */ + uint64_t vstcr_el2; /* Secure Virtualization Translation Control. */ uint32_t c2_data; /* MPU data cacheable bits. */ uint32_t c2_insn; /* MPU instruction cacheable bits. */ union { /* MMU domain access control register @@ -357,6 +326,7 @@ typedef struct CPUARMState { uint32_t pmsav5_data_ap; /* PMSAv5 MPU data access permissions */ uint32_t pmsav5_insn_ap; /* PMSAv5 MPU insn access permissions */ uint64_t hcr_el2; /* Hypervisor configuration register */ + uint64_t hcrx_el2; /* Extended Hypervisor configuration register */ uint64_t scr_el3; /* Secure configuration register. */ union { /* Fault status registers. */ struct { @@ -380,7 +350,7 @@ typedef struct CPUARMState { union { /* Fault address registers. */ struct { uint64_t _unused_far0; -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN uint32_t ifar_ns; uint32_t dfar_ns; uint32_t ifar_s; @@ -417,7 +387,7 @@ typedef struct CPUARMState { uint64_t c9_pminten; /* perf monitor interrupt enables */ union { /* Memory attribute redirection */ struct { -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN uint64_t _unused_mair_0; uint32_t mair1_ns; uint32_t mair0_ns; @@ -445,6 +415,7 @@ typedef struct CPUARMState { uint64_t vbar_el[4]; }; uint32_t mvbar; /* (monitor) vector base address register */ + uint64_t rvbar; /* rvbar sampled from rvbar property at reset */ struct { /* FCSE PID. */ uint32_t fcseidr_ns; uint32_t fcseidr_s; @@ -467,6 +438,7 @@ typedef struct CPUARMState { }; uint64_t tpidr_el[4]; }; + uint64_t tpidr2_el0; /* The secure banks of these registers don't map anywhere */ uint64_t tpidrurw_s; uint64_t tpidrprw_s; @@ -478,8 +450,9 @@ typedef struct CPUARMState { }; uint64_t c14_cntfrq; /* Counter Frequency register */ uint64_t c14_cntkctl; /* Timer Control register */ - uint32_t cnthctl_el2; /* Counter/Timer Hyp Control register */ + uint64_t cnthctl_el2; /* Counter/Timer Hyp Control register */ uint64_t cntvoff_el2; /* Counter Virtual Offset register */ + uint64_t cntpoff_el2; /* Counter Physical Offset register */ ARMGenericTimer c14_timer[NUM_GTIMERS]; uint32_t c15_cpar; /* XScale Coprocessor Access Register */ uint32_t c15_ticonfig; /* TI925T configuration byte. */ @@ -494,8 +467,10 @@ typedef struct CPUARMState { uint64_t dbgbcr[16]; /* breakpoint control registers */ uint64_t dbgwvr[16]; /* watchpoint value registers */ uint64_t dbgwcr[16]; /* watchpoint control registers */ + uint64_t dbgclaim; /* DBGCLAIM bits */ uint64_t mdscr_el1; uint64_t oslsr_el1; /* OS Lock Status */ + uint64_t osdlr_el1; /* OS DoubleLock status */ uint64_t mdcr_el2; uint64_t mdcr_el3; /* Stores the architectural value of the counter *the last time it was @@ -521,6 +496,29 @@ typedef struct CPUARMState { uint64_t tfsr_el[4]; /* tfsre0_el1 is index 0. */ uint64_t gcr_el1; uint64_t rgsr_el1; + + /* Minimal RAS registers */ + uint64_t disr_el1; + uint64_t vdisr_el2; + uint64_t vsesr_el2; + + /* + * Fine-Grained Trap registers. We store these as arrays so the + * access checking code doesn't have to manually select + * HFGRTR_EL2 vs HFDFGRTR_EL2 etc when looking up the bit to test. + * FEAT_FGT2 will add more elements to these arrays. + */ + uint64_t fgt_read[2]; /* HFGRTR, HDFGRTR */ + uint64_t fgt_write[2]; /* HFGWTR, HDFGWTR */ + uint64_t fgt_exec[1]; /* HFGITR */ + + /* RME registers */ + uint64_t gpccr_el3; + uint64_t gptbr_el3; + uint64_t mfar_el3; + + /* NV2 register */ + uint64_t vncr_el2; } cp15; struct { @@ -654,11 +652,19 @@ typedef struct CPUARMState { float_status standard_fp_status; float_status standard_fp_status_f16; - /* ZCR_EL[1-3] */ - uint64_t zcr_el[4]; + uint64_t zcr_el[4]; /* ZCR_EL[1-3] */ + uint64_t smcr_el[4]; /* SMCR_EL[1-3] */ } vfp; + uint64_t exclusive_addr; uint64_t exclusive_val; + /* + * Contains the 'val' for the second 64-bit register of LDXP, which comes + * from the higher address, not the high part of a complete 128-bit value. + * In some ways it might be more convenient to record the exclusive value + * as the low and high halves of a 128 bit data value, but the current + * semantics of these fields are baked into the migration format. + */ uint64_t exclusive_high; /* iwMMXt coprocessor state. */ @@ -677,16 +683,38 @@ typedef struct CPUARMState { ARMPACKey apdb; ARMPACKey apga; } keys; -#endif -#if defined(CONFIG_USER_ONLY) - /* For usermode syscall translation. */ - int eabi; + uint64_t scxtnum_el[4]; + + /* + * SME ZA storage -- 256 x 256 byte array, with bytes in host word order, + * as we do with vfp.zregs[]. This corresponds to the architectural ZA + * array, where ZA[N] is in the least-significant bytes of env->zarray[N]. + * When SVL is less than the architectural maximum, the accessible + * storage is restricted, such that if the SVL is X bytes the guest can + * see only the bottom X elements of zarray[], and only the least + * significant X bytes of each element of the array. (In other words, + * the observable part is always square.) + * + * The ZA storage can also be considered as a set of square tiles of + * elements of different sizes. The mapping from tiles to the ZA array + * is architecturally defined, such that for tiles of elements of esz + * bytes, the Nth row (or "horizontal slice") of tile T is in + * ZA[T + N * esz]. Note that this means that each tile is not contiguous + * in the ZA storage, because its rows are striped through the ZA array. + * + * Because this is so large, keep this toward the end of the reset area, + * to keep the offsets into the rest of the structure smaller. + */ + ARMVectorReg zarray[ARM_MAX_VQ * 16]; #endif struct CPUBreakpoint *cpu_breakpoint[16]; struct CPUWatchpoint *cpu_watchpoint[16]; + /* Optional fault info across tlb lookup. */ + ARMMMUFaultInfo *tlb_fi; + /* Fields up to this point are cleared by a CPU reset */ struct {} end_reset_fields; @@ -712,8 +740,11 @@ typedef struct CPUARMState { */ uint32_t *rbar[M_REG_NUM_BANKS]; uint32_t *rlar[M_REG_NUM_BANKS]; + uint32_t *hprbar; + uint32_t *hprlar; uint32_t mair0[M_REG_NUM_BANKS]; uint32_t mair1[M_REG_NUM_BANKS]; + uint32_t hprselr; } pmsav8; /* v8M SAU */ @@ -724,10 +755,15 @@ typedef struct CPUARMState { uint32_t ctrl; } sau; - void *nvic; +#if !defined(CONFIG_USER_ONLY) + NVICState *nvic; const struct arm_boot_info *boot_info; /* Store GICv3CPUState to access from this struct */ void *gicv3state; +#else /* CONFIG_USER_ONLY */ + /* For usermode syscall translation. */ + bool eabi; +#endif /* CONFIG_USER_ONLY */ #ifdef TARGET_TAGGED_ADDRESSES /* Linux syscall tagged address support */ @@ -768,18 +804,28 @@ typedef enum ARMPSCIState { typedef struct ARMISARegisters ARMISARegisters; +/* + * In map, each set bit is a supported vector length of (bit-number + 1) * 16 + * bytes, i.e. each bit number + 1 is the vector length in quadwords. + * + * While processing properties during initialization, corresponding init bits + * are set for bits in sve_vq_map that have been set by properties. + * + * Bits set in supported represent valid vector lengths for the CPU type. + */ +typedef struct { + uint32_t map, init, supported; +} ARMVQMap; + /** * ARMCPU: * @env: #CPUARMState * * An ARM CPU core. */ -struct ARMCPU { - /*< private >*/ +struct ArchCPU { CPUState parent_obj; - /*< public >*/ - CPUNegativeOffsetState neg; CPUARMState env; /* Coprocessor information */ @@ -804,8 +850,10 @@ struct ARMCPU { uint64_t *cpreg_vmstate_values; int32_t cpreg_vmstate_array_len; - DynamicGDBXMLInfo dyn_sysreg_xml; - DynamicGDBXMLInfo dyn_svereg_xml; + DynamicGDBFeatureInfo dyn_sysreg_feature; + DynamicGDBFeatureInfo dyn_svereg_feature; + DynamicGDBFeatureInfo dyn_m_systemreg_feature; + DynamicGDBFeatureInfo dyn_m_secextreg_feature; /* Timers used by the generic (architected) timer */ QEMUTimer *gt_timer[NUM_GTIMERS]; @@ -851,6 +899,8 @@ struct ARMCPU { bool has_pmu; /* CPU has VFP */ bool has_vfp; + /* CPU has 32 VFP registers */ + bool has_vfp_d32; /* CPU has Neon */ bool has_neon; /* CPU has M-profile DSP extension */ @@ -860,6 +910,8 @@ struct ARMCPU { bool has_mpu; /* PMSAv7 MPU number of supported regions */ uint32_t pmsav7_dregion; + /* PMSAv8 MPU number of supported hyp regions */ + uint32_t pmsav8r_hdregion; /* v8M SAU number of supported regions */ uint32_t sau_sregion; @@ -878,6 +930,7 @@ struct ARMCPU { */ uint32_t kvm_target; +#ifdef CONFIG_KVM /* KVM init features for this CPU */ uint32_t kvm_init_features[7]; @@ -890,6 +943,7 @@ struct ARMCPU { /* KVM steal time */ OnOffAuto kvm_steal_time; +#endif /* CONFIG_KVM */ /* Uniprocessor system with MP extensions */ bool mp_is_up; @@ -934,6 +988,7 @@ struct ARMCPU { uint32_t id_mmfr2; uint32_t id_mmfr3; uint32_t id_mmfr4; + uint32_t id_mmfr5; uint32_t id_pfr0; uint32_t id_pfr1; uint32_t id_pfr2; @@ -941,9 +996,13 @@ struct ARMCPU { uint32_t mvfr1; uint32_t mvfr2; uint32_t id_dfr0; + uint32_t id_dfr1; uint32_t dbgdidr; + uint32_t dbgdevid; + uint32_t dbgdevid1; uint64_t id_aa64isar0; uint64_t id_aa64isar1; + uint64_t id_aa64isar2; uint64_t id_aa64pfr0; uint64_t id_aa64pfr1; uint64_t id_aa64mmfr0; @@ -952,6 +1011,8 @@ struct ARMCPU { uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64zfr0; + uint64_t id_aa64smfr0; + uint64_t reset_pmcr_el0; } isar; uint64_t midr; uint32_t revidr; @@ -972,22 +1033,29 @@ struct ARMCPU { uint64_t reset_cbar; uint32_t reset_auxcr; bool reset_hivecs; + uint8_t reset_l0gptsz; /* * Intermediate values used during property parsing. - * Once finalized, the values should be read from ID_AA64ISAR1. + * Once finalized, the values should be read from ID_AA64*. */ bool prop_pauth; bool prop_pauth_impdef; + bool prop_pauth_qarma3; + bool prop_lpa2; /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */ - uint32_t dcz_blocksize; - uint64_t rvbar; + uint8_t dcz_blocksize; + /* GM blocksize, in log_2(words), ie low 4 bits of GMID_EL0 */ + uint8_t gm_blocksize; + + uint64_t rvbar_prop; /* Property/input signals. */ /* Configurable aspects of GIC cpu interface (which is part of the CPU) */ int gic_num_lrs; /* number of list registers */ int gic_vpribits; /* number of virtual priority bits */ int gic_vprebits; /* number of virtual preemption bits */ + int gic_pribits; /* number of physical priority bits */ /* Whether the cfgend input is high (i.e. this CPU should reset into * big-endian mode). This setting isn't used directly: instead it modifies @@ -1010,64 +1078,109 @@ struct ARMCPU { #ifdef CONFIG_USER_ONLY /* Used to set the default vector length at process start. */ uint32_t sve_default_vq; + uint32_t sme_default_vq; #endif - /* - * In sve_vq_map each set bit is a supported vector length of - * (bit-number + 1) * 16 bytes, i.e. each bit number + 1 is the vector - * length in quadwords. - * - * While processing properties during initialization, corresponding - * sve_vq_init bits are set for bits in sve_vq_map that have been - * set by properties. - * - * Bits set in sve_vq_supported represent valid vector lengths for - * the CPU type. - */ - DECLARE_BITMAP(sve_vq_map, ARM_MAX_VQ); - DECLARE_BITMAP(sve_vq_init, ARM_MAX_VQ); - DECLARE_BITMAP(sve_vq_supported, ARM_MAX_VQ); + ARMVQMap sve_vq; + ARMVQMap sme_vq; /* Generic timer counter frequency, in Hz */ uint64_t gt_cntfrq_hz; }; +typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); +} ARMCPUInfo; + +/** + * ARMCPUClass: + * @parent_realize: The parent class' realize handler. + * @parent_phases: The parent class' reset phase handlers. + * + * An ARM CPU model. + */ +struct ARMCPUClass { + CPUClass parent_class; + + const ARMCPUInfo *info; + DeviceRealize parent_realize; + ResettablePhases parent_phases; +}; + +struct AArch64CPUClass { + ARMCPUClass parent_class; +}; + +/* Callback functions for the generic timer's timers. */ +void arm_gt_ptimer_cb(void *opaque); +void arm_gt_vtimer_cb(void *opaque); +void arm_gt_htimer_cb(void *opaque); +void arm_gt_stimer_cb(void *opaque); +void arm_gt_hvtimer_cb(void *opaque); + unsigned int gt_cntfrq_period_ns(ARMCPU *cpu); +void gt_rme_post_el_change(ARMCPU *cpu, void *opaque); void arm_cpu_post_init(Object *obj); -uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz); +#define ARM_AFF0_SHIFT 0 +#define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT) +#define ARM_AFF1_SHIFT 8 +#define ARM_AFF1_MASK (0xFFULL << ARM_AFF1_SHIFT) +#define ARM_AFF2_SHIFT 16 +#define ARM_AFF2_MASK (0xFFULL << ARM_AFF2_SHIFT) +#define ARM_AFF3_SHIFT 32 +#define ARM_AFF3_MASK (0xFFULL << ARM_AFF3_SHIFT) +#define ARM_DEFAULT_CPUS_PER_CLUSTER 8 + +#define ARM32_AFFINITY_MASK (ARM_AFF0_MASK | ARM_AFF1_MASK | ARM_AFF2_MASK) +#define ARM64_AFFINITY_MASK \ + (ARM_AFF0_MASK | ARM_AFF1_MASK | ARM_AFF2_MASK | ARM_AFF3_MASK) +#define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK) + +uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz); #ifndef CONFIG_USER_ONLY extern const VMStateDescription vmstate_arm_cpu; void arm_cpu_do_interrupt(CPUState *cpu); void arm_v7m_cpu_do_interrupt(CPUState *cpu); -#endif /* !CONFIG_USER_ONLY */ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, MemTxAttrs *attrs); +#endif /* !CONFIG_USER_ONLY */ int arm_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); -/* - * Helpers to dynamically generates XML descriptions of the sysregs - * and SVE registers. Returns the number of registers in each set. - */ -int arm_gen_dynamic_sysreg_xml(CPUState *cpu, int base_reg); -int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg); - -/* Returns the dynamically generated XML for the gdb stub. - * Returns a pointer to the XML contents for the specified XML file or NULL - * if the XML name doesn't match the predefined one. - */ -const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname); - int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque); + int cpuid, DumpState *s); + +/** + * arm_emulate_firmware_reset: Emulate firmware CPU reset handling + * @cpu: CPU (which must have been freshly reset) + * @target_el: exception level to put the CPU into + * @secure: whether to put the CPU in secure state + * + * When QEMU is directly running a guest kernel at a lower level than + * EL3 it implicitly emulates some aspects of the guest firmware. + * This includes that on reset we need to configure the parts of the + * CPU corresponding to EL3 so that the real guest code can run at its + * lower exception level. This function does that post-reset CPU setup, + * for when we do direct boot of a guest kernel, and for when we + * emulate PSCI and similar firmware interfaces starting a CPU at a + * lower exception level. + * + * @target_el must be an EL implemented by the CPU between 1 and 3. + * We do not support dropping into a Secure EL other than 3. + * + * It is the responsibility of the caller to call arm_rebuild_hflags(). + */ +void arm_emulate_firmware_reset(CPUState *cpustate, int target_el); #ifdef TARGET_AARCH64 int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); @@ -1075,7 +1188,7 @@ int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el, bool el0_a64); -void aarch64_add_sve_properties(Object *obj); +void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask); /* * SVE registers are encoded in KVM's memory in an endianness-invariant format. @@ -1088,7 +1201,7 @@ void aarch64_add_sve_properties(Object *obj); */ static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) { -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN int i; for (i = 0; i < nr; ++i) { @@ -1106,7 +1219,6 @@ static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } static inline void aarch64_sve_change_el(CPUARMState *env, int o, int n, bool a) { } -static inline void aarch64_add_sve_properties(Object *obj) { } #endif void aarch64_sync_32_to_64(CPUARMState *env); @@ -1114,7 +1226,22 @@ void aarch64_sync_64_to_32(CPUARMState *env); int fp_exception_el(CPUARMState *env, int cur_el); int sve_exception_el(CPUARMState *env, int cur_el); -uint32_t sve_zcr_len_for_el(CPUARMState *env, int el); +int sme_exception_el(CPUARMState *env, int cur_el); + +/** + * sve_vqm1_for_el_sm: + * @env: CPUARMState + * @el: exception level + * @sm: streaming mode + * + * Compute the current vector length for @el & @sm, in units of + * Quadwords Minus 1 -- the same scale used for ZCR_ELx.LEN. + * If @sm, compute for SVL, otherwise NVL. + */ +uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm); + +/* Likewise, but using @sm = PSTATE.SM. */ +uint32_t sve_vqm1_for_el(CPUARMState *env, int el); static inline bool is_a64(CPUARMState *env) { @@ -1170,7 +1297,7 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_D (1U << 5) /* up to v5; RAO in v6 */ #define SCTLR_CP15BEN (1U << 5) /* v7 onward */ #define SCTLR_L (1U << 6) /* up to v5; RAO in v6 and v7; RAZ in v8 */ -#define SCTLR_nAA (1U << 6) /* when v8.4-LSE is implemented */ +#define SCTLR_nAA (1U << 6) /* when FEAT_LSE2 is implemented */ #define SCTLR_B (1U << 7) /* up to v6; RAZ in v7 */ #define SCTLR_ITD (1U << 7) /* v8 onward */ #define SCTLR_S (1U << 8) /* up to v6; RAZ in v7 */ @@ -1198,6 +1325,7 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_WXN (1U << 19) #define SCTLR_ST (1U << 20) /* up to ??, RAZ in v6 */ #define SCTLR_UWXN (1U << 20) /* v7 onward, AArch32 only */ +#define SCTLR_TSCXT (1U << 20) /* FEAT_CSV2_1p2, AArch64 only */ #define SCTLR_FI (1U << 21) /* up to v7, v8 RES0 */ #define SCTLR_IESB (1U << 21) /* v8.2-IESB, AArch64 only */ #define SCTLR_U (1U << 22) /* up to v6, RAO in v7 */ @@ -1219,6 +1347,7 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */ #define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */ #define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */ +#define SCTLR_MSCEN (1ULL << 33) /* FEAT_MOPS */ #define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */ #define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */ #define SCTLR_ITFSB (1ULL << 37) /* v8.5-MemTag */ @@ -1227,30 +1356,20 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_ATA0 (1ULL << 42) /* v8.5-MemTag */ #define SCTLR_ATA (1ULL << 43) /* v8.5-MemTag */ #define SCTLR_DSSBS_64 (1ULL << 44) /* v8.5, AArch64 only */ - -#define CPTR_TCPAC (1U << 31) -#define CPTR_TTA (1U << 20) -#define CPTR_TFP (1U << 10) -#define CPTR_TZ (1U << 8) /* CPTR_EL2 */ -#define CPTR_EZ (1U << 8) /* CPTR_EL3 */ - -#define MDCR_EPMAD (1U << 21) -#define MDCR_EDAD (1U << 20) -#define MDCR_SPME (1U << 17) /* MDCR_EL3 */ -#define MDCR_HPMD (1U << 17) /* MDCR_EL2 */ -#define MDCR_SDD (1U << 16) -#define MDCR_SPD (3U << 14) -#define MDCR_TDRA (1U << 11) -#define MDCR_TDOSA (1U << 10) -#define MDCR_TDA (1U << 9) -#define MDCR_TDE (1U << 8) -#define MDCR_HPME (1U << 7) -#define MDCR_TPM (1U << 6) -#define MDCR_TPMCR (1U << 5) -#define MDCR_HPMN (0x1fU) - -/* Not all of the MDCR_EL3 bits are present in the 32-bit SDCR */ -#define SDCR_VALID_MASK (MDCR_EPMAD | MDCR_EDAD | MDCR_SPME | MDCR_SPD) +#define SCTLR_TWEDEn (1ULL << 45) /* FEAT_TWED */ +#define SCTLR_TWEDEL MAKE_64_MASK(46, 4) /* FEAT_TWED */ +#define SCTLR_TMT0 (1ULL << 50) /* FEAT_TME */ +#define SCTLR_TMT (1ULL << 51) /* FEAT_TME */ +#define SCTLR_TME0 (1ULL << 52) /* FEAT_TME */ +#define SCTLR_TME (1ULL << 53) /* FEAT_TME */ +#define SCTLR_EnASR (1ULL << 54) /* FEAT_LS64_V */ +#define SCTLR_EnAS0 (1ULL << 55) /* FEAT_LS64_ACCDATA */ +#define SCTLR_EnALS (1ULL << 56) /* FEAT_LS64 */ +#define SCTLR_EPAN (1ULL << 57) /* FEAT_PAN3 */ +#define SCTLR_EnTP2 (1ULL << 60) /* FEAT_SME */ +#define SCTLR_NMI (1ULL << 61) /* FEAT_NMI */ +#define SCTLR_SPINTMASK (1ULL << 62) /* FEAT_NMI */ +#define SCTLR_TIDCP (1ULL << 63) /* FEAT_TIDCP1 */ #define CPSR_M (0x1fU) #define CPSR_T (1U << 5) @@ -1298,22 +1417,6 @@ void pmu_init(ARMCPU *cpu); #define XPSR_NZCV CPSR_NZCV #define XPSR_IT CPSR_IT -#define TTBCR_N (7U << 0) /* TTBCR.EAE==0 */ -#define TTBCR_T0SZ (7U << 0) /* TTBCR.EAE==1 */ -#define TTBCR_PD0 (1U << 4) -#define TTBCR_PD1 (1U << 5) -#define TTBCR_EPD0 (1U << 7) -#define TTBCR_IRGN0 (3U << 8) -#define TTBCR_ORGN0 (3U << 10) -#define TTBCR_SH0 (3U << 12) -#define TTBCR_T1SZ (3U << 16) -#define TTBCR_A1 (1U << 22) -#define TTBCR_EPD1 (1U << 23) -#define TTBCR_IRGN1 (3U << 24) -#define TTBCR_ORGN1 (3U << 26) -#define TTBCR_SH1 (1U << 28) -#define TTBCR_EAE (1U << 31) - /* Bit definitions for ARMv8 SPSR (PSTATE) format. * Only these are valid when in AArch64 mode; in * AArch32 mode SPSRs are basically CPSR-format. @@ -1349,6 +1452,14 @@ void pmu_init(ARMCPU *cpu); #define PSTATE_MODE_EL1t 4 #define PSTATE_MODE_EL0t 0 +/* PSTATE bits that are accessed via SVCR and not stored in SPSR_ELx. */ +FIELD(SVCR, SM, 0, 1) +FIELD(SVCR, ZA, 1, 1) + +/* Fields for SMCR_ELx. */ +FIELD(SMCR, LEN, 0, 4) +FIELD(SMCR, FA64, 31, 1) + /* Write a new value to v7m.exception, thus transitioning into or out * of Handler mode; this may result in a change of active stack pointer. */ @@ -1490,7 +1601,7 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define HCR_TERR (1ULL << 36) #define HCR_TEA (1ULL << 37) #define HCR_MIOCNCE (1ULL << 38) -/* RES0 bit 39 */ +#define HCR_TME (1ULL << 39) #define HCR_APK (1ULL << 40) #define HCR_API (1ULL << 41) #define HCR_NV (1ULL << 42) @@ -1499,7 +1610,7 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define HCR_NV2 (1ULL << 45) #define HCR_FWB (1ULL << 46) #define HCR_FIEN (1ULL << 47) -/* RES0 bit 48 */ +#define HCR_GPF (1ULL << 48) #define HCR_TID4 (1ULL << 49) #define HCR_TICAB (1ULL << 50) #define HCR_AMVOFFEN (1ULL << 51) @@ -1513,35 +1624,43 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask) #define HCR_TWEDEN (1ULL << 59) #define HCR_TWEDEL MAKE_64BIT_MASK(60, 4) -#define HPFAR_NS (1ULL << 63) - -#define SCR_NS (1U << 0) -#define SCR_IRQ (1U << 1) -#define SCR_FIQ (1U << 2) -#define SCR_EA (1U << 3) -#define SCR_FW (1U << 4) -#define SCR_AW (1U << 5) -#define SCR_NET (1U << 6) -#define SCR_SMD (1U << 7) -#define SCR_HCE (1U << 8) -#define SCR_SIF (1U << 9) -#define SCR_RW (1U << 10) -#define SCR_ST (1U << 11) -#define SCR_TWI (1U << 12) -#define SCR_TWE (1U << 13) -#define SCR_TLOR (1U << 14) -#define SCR_TERR (1U << 15) -#define SCR_APK (1U << 16) -#define SCR_API (1U << 17) -#define SCR_EEL2 (1U << 18) -#define SCR_EASE (1U << 19) -#define SCR_NMEA (1U << 20) -#define SCR_FIEN (1U << 21) -#define SCR_ENSCXT (1U << 25) -#define SCR_ATA (1U << 26) - -#define HSTR_TTEE (1 << 16) -#define HSTR_TJDBX (1 << 17) +#define SCR_NS (1ULL << 0) +#define SCR_IRQ (1ULL << 1) +#define SCR_FIQ (1ULL << 2) +#define SCR_EA (1ULL << 3) +#define SCR_FW (1ULL << 4) +#define SCR_AW (1ULL << 5) +#define SCR_NET (1ULL << 6) +#define SCR_SMD (1ULL << 7) +#define SCR_HCE (1ULL << 8) +#define SCR_SIF (1ULL << 9) +#define SCR_RW (1ULL << 10) +#define SCR_ST (1ULL << 11) +#define SCR_TWI (1ULL << 12) +#define SCR_TWE (1ULL << 13) +#define SCR_TLOR (1ULL << 14) +#define SCR_TERR (1ULL << 15) +#define SCR_APK (1ULL << 16) +#define SCR_API (1ULL << 17) +#define SCR_EEL2 (1ULL << 18) +#define SCR_EASE (1ULL << 19) +#define SCR_NMEA (1ULL << 20) +#define SCR_FIEN (1ULL << 21) +#define SCR_ENSCXT (1ULL << 25) +#define SCR_ATA (1ULL << 26) +#define SCR_FGTEN (1ULL << 27) +#define SCR_ECVEN (1ULL << 28) +#define SCR_TWEDEN (1ULL << 29) +#define SCR_TWEDEL MAKE_64BIT_MASK(30, 4) +#define SCR_TME (1ULL << 34) +#define SCR_AMVOFFEN (1ULL << 35) +#define SCR_ENAS0 (1ULL << 36) +#define SCR_ADEN (1ULL << 37) +#define SCR_HXEN (1ULL << 38) +#define SCR_TRNDR (1ULL << 40) +#define SCR_ENTP2 (1ULL << 41) +#define SCR_GPF (1ULL << 48) +#define SCR_NSE (1ULL << 62) /* Return the current FPSCR value. */ uint32_t vfp_get_fpscr(CPUARMState *env); @@ -1928,6 +2047,7 @@ FIELD(ID_MMFR4, CCIDX, 24, 4) FIELD(ID_MMFR4, EVT, 28, 4) FIELD(ID_MMFR5, ETS, 0, 4) +FIELD(ID_MMFR5, NTLBPA, 4, 4) FIELD(ID_PFR0, STATE0, 0, 4) FIELD(ID_PFR0, STATE1, 4, 4) @@ -1956,6 +2076,7 @@ FIELD(ID_AA64ISAR0, SHA1, 8, 4) FIELD(ID_AA64ISAR0, SHA2, 12, 4) FIELD(ID_AA64ISAR0, CRC32, 16, 4) FIELD(ID_AA64ISAR0, ATOMIC, 20, 4) +FIELD(ID_AA64ISAR0, TME, 24, 4) FIELD(ID_AA64ISAR0, RDM, 28, 4) FIELD(ID_AA64ISAR0, SHA3, 32, 4) FIELD(ID_AA64ISAR0, SM3, 36, 4) @@ -1980,6 +2101,23 @@ FIELD(ID_AA64ISAR1, SPECRES, 40, 4) FIELD(ID_AA64ISAR1, BF16, 44, 4) FIELD(ID_AA64ISAR1, DGH, 48, 4) FIELD(ID_AA64ISAR1, I8MM, 52, 4) +FIELD(ID_AA64ISAR1, XS, 56, 4) +FIELD(ID_AA64ISAR1, LS64, 60, 4) + +FIELD(ID_AA64ISAR2, WFXT, 0, 4) +FIELD(ID_AA64ISAR2, RPRES, 4, 4) +FIELD(ID_AA64ISAR2, GPA3, 8, 4) +FIELD(ID_AA64ISAR2, APA3, 12, 4) +FIELD(ID_AA64ISAR2, MOPS, 16, 4) +FIELD(ID_AA64ISAR2, BC, 20, 4) +FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4) +FIELD(ID_AA64ISAR2, CLRBHB, 28, 4) +FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4) +FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4) +FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4) +FIELD(ID_AA64ISAR2, RPRFM, 48, 4) +FIELD(ID_AA64ISAR2, CSSC, 52, 4) +FIELD(ID_AA64ISAR2, ATS1A, 60, 4) FIELD(ID_AA64PFR0, EL0, 0, 4) FIELD(ID_AA64PFR0, EL1, 4, 4) @@ -1994,6 +2132,7 @@ FIELD(ID_AA64PFR0, SEL2, 36, 4) FIELD(ID_AA64PFR0, MPAM, 40, 4) FIELD(ID_AA64PFR0, AMU, 44, 4) FIELD(ID_AA64PFR0, DIT, 48, 4) +FIELD(ID_AA64PFR0, RME, 52, 4) FIELD(ID_AA64PFR0, CSV2, 56, 4) FIELD(ID_AA64PFR0, CSV3, 60, 4) @@ -2002,6 +2141,16 @@ FIELD(ID_AA64PFR1, SSBS, 4, 4) FIELD(ID_AA64PFR1, MTE, 8, 4) FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4) FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4) +FIELD(ID_AA64PFR1, SME, 24, 4) +FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4) +FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4) +FIELD(ID_AA64PFR1, NMI, 36, 4) +FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4) +FIELD(ID_AA64PFR1, GCS, 44, 4) +FIELD(ID_AA64PFR1, THE, 48, 4) +FIELD(ID_AA64PFR1, MTEX, 52, 4) +FIELD(ID_AA64PFR1, DF2, 56, 4) +FIELD(ID_AA64PFR1, PFAR, 60, 4) FIELD(ID_AA64MMFR0, PARANGE, 0, 4) FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4) @@ -2028,6 +2177,12 @@ FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) FIELD(ID_AA64MMFR1, XNX, 28, 4) FIELD(ID_AA64MMFR1, TWED, 32, 4) FIELD(ID_AA64MMFR1, ETS, 36, 4) +FIELD(ID_AA64MMFR1, HCX, 40, 4) +FIELD(ID_AA64MMFR1, AFP, 44, 4) +FIELD(ID_AA64MMFR1, NTLBPA, 48, 4) +FIELD(ID_AA64MMFR1, TIDCP1, 52, 4) +FIELD(ID_AA64MMFR1, CMOW, 56, 4) +FIELD(ID_AA64MMFR1, ECBHB, 60, 4) FIELD(ID_AA64MMFR2, CNP, 0, 4) FIELD(ID_AA64MMFR2, UAO, 4, 4) @@ -2049,23 +2204,43 @@ FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) FIELD(ID_AA64DFR0, TRACEVER, 4, 4) FIELD(ID_AA64DFR0, PMUVER, 8, 4) FIELD(ID_AA64DFR0, BRPS, 12, 4) +FIELD(ID_AA64DFR0, PMSS, 16, 4) FIELD(ID_AA64DFR0, WRPS, 20, 4) +FIELD(ID_AA64DFR0, SEBEP, 24, 4) FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) FIELD(ID_AA64DFR0, PMSVER, 32, 4) FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) +FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4) FIELD(ID_AA64DFR0, MTPMU, 48, 4) +FIELD(ID_AA64DFR0, BRBE, 52, 4) +FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4) +FIELD(ID_AA64DFR0, HPMN0, 60, 4) FIELD(ID_AA64ZFR0, SVEVER, 0, 4) FIELD(ID_AA64ZFR0, AES, 4, 4) FIELD(ID_AA64ZFR0, BITPERM, 16, 4) FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4) +FIELD(ID_AA64ZFR0, B16B16, 24, 4) FIELD(ID_AA64ZFR0, SHA3, 32, 4) FIELD(ID_AA64ZFR0, SM4, 40, 4) FIELD(ID_AA64ZFR0, I8MM, 44, 4) FIELD(ID_AA64ZFR0, F32MM, 52, 4) FIELD(ID_AA64ZFR0, F64MM, 56, 4) +FIELD(ID_AA64SMFR0, F32F32, 32, 1) +FIELD(ID_AA64SMFR0, BI32I32, 33, 1) +FIELD(ID_AA64SMFR0, B16F32, 34, 1) +FIELD(ID_AA64SMFR0, F16F32, 35, 1) +FIELD(ID_AA64SMFR0, I8I32, 36, 4) +FIELD(ID_AA64SMFR0, F16F16, 42, 1) +FIELD(ID_AA64SMFR0, B16B16, 43, 1) +FIELD(ID_AA64SMFR0, I16I32, 44, 4) +FIELD(ID_AA64SMFR0, F64F64, 48, 1) +FIELD(ID_AA64SMFR0, I16I64, 52, 4) +FIELD(ID_AA64SMFR0, SMEVER, 56, 4) +FIELD(ID_AA64SMFR0, FA64, 63, 1) + FIELD(ID_DFR0, COPDBG, 0, 4) FIELD(ID_DFR0, COPSDBG, 4, 4) FIELD(ID_DFR0, MMAPDBG, 8, 4) @@ -2076,6 +2251,7 @@ FIELD(ID_DFR0, PERFMON, 24, 4) FIELD(ID_DFR0, TRACEFILT, 28, 4) FIELD(ID_DFR1, MTPMU, 0, 4) +FIELD(ID_DFR1, HPMN0, 4, 4) FIELD(DBGDIDR, SE_IMP, 12, 1) FIELD(DBGDIDR, NSUHD_IMP, 14, 1) @@ -2084,6 +2260,15 @@ FIELD(DBGDIDR, CTX_CMPS, 20, 4) FIELD(DBGDIDR, BRPS, 24, 4) FIELD(DBGDIDR, WRPS, 28, 4) +FIELD(DBGDEVID, PCSAMPLE, 0, 4) +FIELD(DBGDEVID, WPADDRMASK, 4, 4) +FIELD(DBGDEVID, BPADDRMASK, 8, 4) +FIELD(DBGDEVID, VECTORCATCH, 12, 4) +FIELD(DBGDEVID, VIRTEXTNS, 16, 4) +FIELD(DBGDEVID, DOUBLELOCK, 20, 4) +FIELD(DBGDEVID, AUXREGS, 24, 4) +FIELD(DBGDEVID, CIDMASK, 28, 4) + FIELD(MVFR0, SIMDREG, 0, 4) FIELD(MVFR0, FPSP, 4, 4) FIELD(MVFR0, FPDP, 8, 4) @@ -2107,6 +2292,19 @@ FIELD(MVFR1, SIMDFMAC, 28, 4) FIELD(MVFR2, SIMDMISC, 0, 4) FIELD(MVFR2, FPMISC, 4, 4) +FIELD(GPCCR, PPS, 0, 3) +FIELD(GPCCR, IRGN, 8, 2) +FIELD(GPCCR, ORGN, 10, 2) +FIELD(GPCCR, SH, 12, 2) +FIELD(GPCCR, PGS, 14, 2) +FIELD(GPCCR, GPC, 16, 1) +FIELD(GPCCR, GPCP, 17, 1) +FIELD(GPCCR, L0GPTSZ, 20, 4) + +FIELD(MFAR, FPA, 12, 40) +FIELD(MFAR, NSE, 62, 1) +FIELD(MFAR, NS, 63, 1) + QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK); /* If adding a feature bit which corresponds to a Linux ELF @@ -2161,28 +2359,59 @@ static inline int arm_feature(CPUARMState *env, int feature) void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp); +/* + * ARM v9 security states. + * The ordering of the enumeration corresponds to the low 2 bits + * of the GPI value, and (except for Root) the concat of NSE:NS. + */ + +typedef enum ARMSecuritySpace { + ARMSS_Secure = 0, + ARMSS_NonSecure = 1, + ARMSS_Root = 2, + ARMSS_Realm = 3, +} ARMSecuritySpace; + +/* Return true if @space is secure, in the pre-v9 sense. */ +static inline bool arm_space_is_secure(ARMSecuritySpace space) +{ + return space == ARMSS_Secure || space == ARMSS_Root; +} + +/* Return the ARMSecuritySpace for @secure, assuming !RME or EL[0-2]. */ +static inline ARMSecuritySpace arm_secure_to_space(bool secure) +{ + return secure ? ARMSS_Secure : ARMSS_NonSecure; +} + #if !defined(CONFIG_USER_ONLY) -/* Return true if exception levels below EL3 are in secure state, - * or would be following an exception return to that level. - * Unlike arm_is_secure() (which is always a question about the - * _current_ state of the CPU) this doesn't care about the current - * EL or mode. +/** + * arm_security_space_below_el3: + * @env: cpu context + * + * Return the security space of exception levels below EL3, following + * an exception return to those levels. Unlike arm_security_space, + * this doesn't care about the current EL. + */ +ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env); + +/** + * arm_is_secure_below_el3: + * @env: cpu context + * + * Return true if exception levels below EL3 are in secure state, + * or would be following an exception return to those levels. */ static inline bool arm_is_secure_below_el3(CPUARMState *env) { - if (arm_feature(env, ARM_FEATURE_EL3)) { - return !(env->cp15.scr_el3 & SCR_NS); - } else { - /* If EL3 is not supported then the secure state is implementation - * defined, in which case QEMU defaults to non-secure. - */ - return false; - } + ARMSecuritySpace ss = arm_security_space_below_el3(env); + return ss == ARMSS_Secure; } /* Return true if the CPU is AArch64 EL3 or AArch32 Mon */ static inline bool arm_is_el3_or_mon(CPUARMState *env) { + assert(!arm_feature(env, ARM_FEATURE_M)); if (arm_feature(env, ARM_FEATURE_EL3)) { if (is_a64(env) && extract32(env->pstate, 2, 2) == 3) { /* CPU currently in AArch64 state and EL3 */ @@ -2196,41 +2425,69 @@ static inline bool arm_is_el3_or_mon(CPUARMState *env) return false; } -/* Return true if the processor is in secure state */ +/** + * arm_security_space: + * @env: cpu context + * + * Return the current security space of the cpu. + */ +ARMSecuritySpace arm_security_space(CPUARMState *env); + +/** + * arm_is_secure: + * @env: cpu context + * + * Return true if the processor is in secure state. + */ static inline bool arm_is_secure(CPUARMState *env) { - if (arm_is_el3_or_mon(env)) { - return true; - } - return arm_is_secure_below_el3(env); + return arm_space_is_secure(arm_security_space(env)); } /* * Return true if the current security state has AArch64 EL2 or AArch32 Hyp. - * This corresponds to the pseudocode EL2Enabled() + * This corresponds to the pseudocode EL2Enabled(). */ +static inline bool arm_is_el2_enabled_secstate(CPUARMState *env, + ARMSecuritySpace space) +{ + assert(space != ARMSS_Root); + return arm_feature(env, ARM_FEATURE_EL2) + && (space != ARMSS_Secure || (env->cp15.scr_el3 & SCR_EEL2)); +} + static inline bool arm_is_el2_enabled(CPUARMState *env) { - if (arm_feature(env, ARM_FEATURE_EL2)) { - if (arm_is_secure_below_el3(env)) { - return (env->cp15.scr_el3 & SCR_EEL2) != 0; - } - return true; - } - return false; + return arm_is_el2_enabled_secstate(env, arm_security_space_below_el3(env)); } #else +static inline ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env) +{ + return ARMSS_NonSecure; +} + static inline bool arm_is_secure_below_el3(CPUARMState *env) { return false; } +static inline ARMSecuritySpace arm_security_space(CPUARMState *env) +{ + return ARMSS_NonSecure; +} + static inline bool arm_is_secure(CPUARMState *env) { return false; } +static inline bool arm_is_el2_enabled_secstate(CPUARMState *env, + ARMSecuritySpace space) +{ + return false; +} + static inline bool arm_is_el2_enabled(CPUARMState *env) { return false; @@ -2243,7 +2500,9 @@ static inline bool arm_is_el2_enabled(CPUARMState *env) * "for all purposes other than a direct read or write access of HCR_EL2." * Not included here is HCR_RW. */ +uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space); uint64_t arm_hcr_el2_eff(CPUARMState *env); +uint64_t arm_hcrx_el2_eff(CPUARMState *env); /* Return true if the specified exception level is running in AArch64 state. */ static inline bool arm_el_is_aa64(CPUARMState *env, int el) @@ -2278,7 +2537,7 @@ static inline bool arm_el_is_aa64(CPUARMState *env, int el) return aa64; } -/* Function for determing whether guest cp register reads and writes should +/* Function for determining whether guest cp register reads and writes should * access the secure or non-secure bank of a cp register. When EL3 is * operating in AArch32 state, the NS-bit determines whether the secure * instance of a cp register should be used. When EL3 is AArch64 (or if @@ -2321,362 +2580,9 @@ static inline bool access_secure_reg(CPUARMState *env) (arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \ (_val)) -void arm_cpu_list(void); uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, uint32_t cur_el, bool secure); -/* Interface between CPU and Interrupt controller. */ -#ifndef CONFIG_USER_ONLY -bool armv7m_nvic_can_take_pending_exception(void *opaque); -#else -static inline bool armv7m_nvic_can_take_pending_exception(void *opaque) -{ - return true; -} -#endif -/** - * armv7m_nvic_set_pending: mark the specified exception as pending - * @opaque: the NVIC - * @irq: the exception number to mark pending - * @secure: false for non-banked exceptions or for the nonsecure - * version of a banked exception, true for the secure version of a banked - * exception. - * - * Marks the specified exception as pending. Note that we will assert() - * if @secure is true and @irq does not specify one of the fixed set - * of architecturally banked exceptions. - */ -void armv7m_nvic_set_pending(void *opaque, int irq, bool secure); -/** - * armv7m_nvic_set_pending_derived: mark this derived exception as pending - * @opaque: the NVIC - * @irq: the exception number to mark pending - * @secure: false for non-banked exceptions or for the nonsecure - * version of a banked exception, true for the secure version of a banked - * exception. - * - * Similar to armv7m_nvic_set_pending(), but specifically for derived - * exceptions (exceptions generated in the course of trying to take - * a different exception). - */ -void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure); -/** - * armv7m_nvic_set_pending_lazyfp: mark this lazy FP exception as pending - * @opaque: the NVIC - * @irq: the exception number to mark pending - * @secure: false for non-banked exceptions or for the nonsecure - * version of a banked exception, true for the secure version of a banked - * exception. - * - * Similar to armv7m_nvic_set_pending(), but specifically for exceptions - * generated in the course of lazy stacking of FP registers. - */ -void armv7m_nvic_set_pending_lazyfp(void *opaque, int irq, bool secure); -/** - * armv7m_nvic_get_pending_irq_info: return highest priority pending - * exception, and whether it targets Secure state - * @opaque: the NVIC - * @pirq: set to pending exception number - * @ptargets_secure: set to whether pending exception targets Secure - * - * This function writes the number of the highest priority pending - * exception (the one which would be made active by - * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure - * to true if the current highest priority pending exception should - * be taken to Secure state, false for NS. - */ -void armv7m_nvic_get_pending_irq_info(void *opaque, int *pirq, - bool *ptargets_secure); -/** - * armv7m_nvic_acknowledge_irq: make highest priority pending exception active - * @opaque: the NVIC - * - * Move the current highest priority pending exception from the pending - * state to the active state, and update v7m.exception to indicate that - * it is the exception currently being handled. - */ -void armv7m_nvic_acknowledge_irq(void *opaque); -/** - * armv7m_nvic_complete_irq: complete specified interrupt or exception - * @opaque: the NVIC - * @irq: the exception number to complete - * @secure: true if this exception was secure - * - * Returns: -1 if the irq was not active - * 1 if completing this irq brought us back to base (no active irqs) - * 0 if there is still an irq active after this one was completed - * (Ignoring -1, this is the same as the RETTOBASE value before completion.) - */ -int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure); -/** - * armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure) - * @opaque: the NVIC - * @irq: the exception number to mark pending - * @secure: false for non-banked exceptions or for the nonsecure - * version of a banked exception, true for the secure version of a banked - * exception. - * - * Return whether an exception is "ready", i.e. whether the exception is - * enabled and is configured at a priority which would allow it to - * interrupt the current execution priority. This controls whether the - * RDY bit for it in the FPCCR is set. - */ -bool armv7m_nvic_get_ready_status(void *opaque, int irq, bool secure); -/** - * armv7m_nvic_raw_execution_priority: return the raw execution priority - * @opaque: the NVIC - * - * Returns: the raw execution priority as defined by the v8M architecture. - * This is the execution priority minus the effects of AIRCR.PRIS, - * and minus any PRIMASK/FAULTMASK/BASEPRI priority boosting. - * (v8M ARM ARM I_PKLD.) - */ -int armv7m_nvic_raw_execution_priority(void *opaque); -/** - * armv7m_nvic_neg_prio_requested: return true if the requested execution - * priority is negative for the specified security state. - * @opaque: the NVIC - * @secure: the security state to test - * This corresponds to the pseudocode IsReqExecPriNeg(). - */ -#ifndef CONFIG_USER_ONLY -bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure); -#else -static inline bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure) -{ - return false; -} -#endif - -/* Interface for defining coprocessor registers. - * Registers are defined in tables of arm_cp_reginfo structs - * which are passed to define_arm_cp_regs(). - */ - -/* When looking up a coprocessor register we look for it - * via an integer which encodes all of: - * coprocessor number - * Crn, Crm, opc1, opc2 fields - * 32 or 64 bit register (ie is it accessed via MRC/MCR - * or via MRRC/MCRR?) - * non-secure/secure bank (AArch32 only) - * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field. - * (In this case crn and opc2 should be zero.) - * For AArch64, there is no 32/64 bit size distinction; - * instead all registers have a 2 bit op0, 3 bit op1 and op2, - * and 4 bit CRn and CRm. The encoding patterns are chosen - * to be easy to convert to and from the KVM encodings, and also - * so that the hashtable can contain both AArch32 and AArch64 - * registers (to allow for interprocessing where we might run - * 32 bit code on a 64 bit core). - */ -/* This bit is private to our hashtable cpreg; in KVM register - * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64 - * in the upper bits of the 64 bit ID. - */ -#define CP_REG_AA64_SHIFT 28 -#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT) - -/* To enable banking of coprocessor registers depending on ns-bit we - * add a bit to distinguish between secure and non-secure cpregs in the - * hashtable. - */ -#define CP_REG_NS_SHIFT 29 -#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT) - -#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2) \ - ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) | \ - ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2)) - -#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \ - (CP_REG_AA64_MASK | \ - ((cp) << CP_REG_ARM_COPROC_SHIFT) | \ - ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \ - ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \ - ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \ - ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \ - ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT)) - -/* Convert a full 64 bit KVM register ID to the truncated 32 bit - * version used as a key for the coprocessor register hashtable - */ -static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid) -{ - uint32_t cpregid = kvmid; - if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) { - cpregid |= CP_REG_AA64_MASK; - } else { - if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) { - cpregid |= (1 << 15); - } - - /* KVM is always non-secure so add the NS flag on AArch32 register - * entries. - */ - cpregid |= 1 << CP_REG_NS_SHIFT; - } - return cpregid; -} - -/* Convert a truncated 32 bit hashtable key into the full - * 64 bit KVM register ID. - */ -static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) -{ - uint64_t kvmid; - - if (cpregid & CP_REG_AA64_MASK) { - kvmid = cpregid & ~CP_REG_AA64_MASK; - kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64; - } else { - kvmid = cpregid & ~(1 << 15); - if (cpregid & (1 << 15)) { - kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM; - } else { - kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM; - } - } - return kvmid; -} - -/* ARMCPRegInfo type field bits. If the SPECIAL bit is set this is a - * special-behaviour cp reg and bits [11..8] indicate what behaviour - * it has. Otherwise it is a simple cp reg, where CONST indicates that - * TCG can assume the value to be constant (ie load at translate time) - * and 64BIT indicates a 64 bit wide coprocessor register. SUPPRESS_TB_END - * indicates that the TB should not be ended after a write to this register - * (the default is that the TB ends after cp writes). OVERRIDE permits - * a register definition to override a previous definition for the - * same (cp, is64, crn, crm, opc1, opc2) tuple: either the new or the - * old must have the OVERRIDE bit set. - * ALIAS indicates that this register is an alias view of some underlying - * state which is also visible via another register, and that the other - * register is handling migration and reset; registers marked ALIAS will not be - * migrated but may have their state set by syncing of register state from KVM. - * NO_RAW indicates that this register has no underlying state and does not - * support raw access for state saving/loading; it will not be used for either - * migration or KVM state synchronization. (Typically this is for "registers" - * which are actually used as instructions for cache maintenance and so on.) - * IO indicates that this register does I/O and therefore its accesses - * need to be marked with gen_io_start() and also end the TB. In particular, - * registers which implement clocks or timers require this. - * RAISES_EXC is for when the read or write hook might raise an exception; - * the generated code will synchronize the CPU state before calling the hook - * so that it is safe for the hook to call raise_exception(). - * NEWEL is for writes to registers that might change the exception - * level - typically on older ARM chips. For those cases we need to - * re-read the new el when recomputing the translation flags. - */ -#define ARM_CP_SPECIAL 0x0001 -#define ARM_CP_CONST 0x0002 -#define ARM_CP_64BIT 0x0004 -#define ARM_CP_SUPPRESS_TB_END 0x0008 -#define ARM_CP_OVERRIDE 0x0010 -#define ARM_CP_ALIAS 0x0020 -#define ARM_CP_IO 0x0040 -#define ARM_CP_NO_RAW 0x0080 -#define ARM_CP_NOP (ARM_CP_SPECIAL | 0x0100) -#define ARM_CP_WFI (ARM_CP_SPECIAL | 0x0200) -#define ARM_CP_NZCV (ARM_CP_SPECIAL | 0x0300) -#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | 0x0400) -#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500) -#define ARM_CP_DC_GVA (ARM_CP_SPECIAL | 0x0600) -#define ARM_CP_DC_GZVA (ARM_CP_SPECIAL | 0x0700) -#define ARM_LAST_SPECIAL ARM_CP_DC_GZVA -#define ARM_CP_FPU 0x1000 -#define ARM_CP_SVE 0x2000 -#define ARM_CP_NO_GDB 0x4000 -#define ARM_CP_RAISES_EXC 0x8000 -#define ARM_CP_NEWEL 0x10000 -/* Used only as a terminator for ARMCPRegInfo lists */ -#define ARM_CP_SENTINEL 0xfffff -/* Mask of only the flag bits in a type field */ -#define ARM_CP_FLAG_MASK 0x1f0ff - -/* Valid values for ARMCPRegInfo state field, indicating which of - * the AArch32 and AArch64 execution states this register is visible in. - * If the reginfo doesn't explicitly specify then it is AArch32 only. - * If the reginfo is declared to be visible in both states then a second - * reginfo is synthesised for the AArch32 view of the AArch64 register, - * such that the AArch32 view is the lower 32 bits of the AArch64 one. - * Note that we rely on the values of these enums as we iterate through - * the various states in some places. - */ -enum { - ARM_CP_STATE_AA32 = 0, - ARM_CP_STATE_AA64 = 1, - ARM_CP_STATE_BOTH = 2, -}; - -/* ARM CP register secure state flags. These flags identify security state - * attributes for a given CP register entry. - * The existence of both or neither secure and non-secure flags indicates that - * the register has both a secure and non-secure hash entry. A single one of - * these flags causes the register to only be hashed for the specified - * security state. - * Although definitions may have any combination of the S/NS bits, each - * registered entry will only have one to identify whether the entry is secure - * or non-secure. - */ -enum { - ARM_CP_SECSTATE_S = (1 << 0), /* bit[0]: Secure state register */ - ARM_CP_SECSTATE_NS = (1 << 1), /* bit[1]: Non-secure state register */ -}; - -/* Return true if cptype is a valid type field. This is used to try to - * catch errors where the sentinel has been accidentally left off the end - * of a list of registers. - */ -static inline bool cptype_valid(int cptype) -{ - return ((cptype & ~ARM_CP_FLAG_MASK) == 0) - || ((cptype & ARM_CP_SPECIAL) && - ((cptype & ~ARM_CP_FLAG_MASK) <= ARM_LAST_SPECIAL)); -} - -/* Access rights: - * We define bits for Read and Write access for what rev C of the v7-AR ARM ARM - * defines as PL0 (user), PL1 (fiq/irq/svc/abt/und/sys, ie privileged), and - * PL2 (hyp). The other level which has Read and Write bits is Secure PL1 - * (ie any of the privileged modes in Secure state, or Monitor mode). - * If a register is accessible in one privilege level it's always accessible - * in higher privilege levels too. Since "Secure PL1" also follows this rule - * (ie anything visible in PL2 is visible in S-PL1, some things are only - * visible in S-PL1) but "Secure PL1" is a bit of a mouthful, we bend the - * terminology a little and call this PL3. - * In AArch64 things are somewhat simpler as the PLx bits line up exactly - * with the ELx exception levels. - * - * If access permissions for a register are more complex than can be - * described with these bits, then use a laxer set of restrictions, and - * do the more restrictive/complex check inside a helper function. - */ -#define PL3_R 0x80 -#define PL3_W 0x40 -#define PL2_R (0x20 | PL3_R) -#define PL2_W (0x10 | PL3_W) -#define PL1_R (0x08 | PL2_R) -#define PL1_W (0x04 | PL2_W) -#define PL0_R (0x02 | PL1_R) -#define PL0_W (0x01 | PL1_W) - -/* - * For user-mode some registers are accessible to EL0 via a kernel - * trap-and-emulate ABI. In this case we define the read permissions - * as actually being PL0_R. However some bits of any given register - * may still be masked. - */ -#ifdef CONFIG_USER_ONLY -#define PL0U_R PL0_R -#else -#define PL0U_R PL1_R -#endif - -#define PL3_RW (PL3_R | PL3_W) -#define PL2_RW (PL2_R | PL2_W) -#define PL1_RW (PL1_R | PL1_W) -#define PL0_RW (PL0_R | PL0_W) - /* Return the highest implemented Exception Level */ static inline int arm_highest_el(CPUARMState *env) { @@ -2728,241 +2634,6 @@ static inline int arm_current_el(CPUARMState *env) } } -typedef struct ARMCPRegInfo ARMCPRegInfo; - -typedef enum CPAccessResult { - /* Access is permitted */ - CP_ACCESS_OK = 0, - /* Access fails due to a configurable trap or enable which would - * result in a categorized exception syndrome giving information about - * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6, - * 0xc or 0x18). The exception is taken to the usual target EL (EL1 or - * PL1 if in EL0, otherwise to the current EL). - */ - CP_ACCESS_TRAP = 1, - /* Access fails and results in an exception syndrome 0x0 ("uncategorized"). - * Note that this is not a catch-all case -- the set of cases which may - * result in this failure is specifically defined by the architecture. - */ - CP_ACCESS_TRAP_UNCATEGORIZED = 2, - /* As CP_ACCESS_TRAP, but for traps directly to EL2 or EL3 */ - CP_ACCESS_TRAP_EL2 = 3, - CP_ACCESS_TRAP_EL3 = 4, - /* As CP_ACCESS_UNCATEGORIZED, but for traps directly to EL2 or EL3 */ - CP_ACCESS_TRAP_UNCATEGORIZED_EL2 = 5, - CP_ACCESS_TRAP_UNCATEGORIZED_EL3 = 6, - /* Access fails and results in an exception syndrome for an FP access, - * trapped directly to EL2 or EL3 - */ - CP_ACCESS_TRAP_FP_EL2 = 7, - CP_ACCESS_TRAP_FP_EL3 = 8, -} CPAccessResult; - -/* Access functions for coprocessor registers. These cannot fail and - * may not raise exceptions. - */ -typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque); -typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque, - uint64_t value); -/* Access permission check functions for coprocessor registers. */ -typedef CPAccessResult CPAccessFn(CPUARMState *env, - const ARMCPRegInfo *opaque, - bool isread); -/* Hook function for register reset */ -typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque); - -#define CP_ANY 0xff - -/* Definition of an ARM coprocessor register */ -struct ARMCPRegInfo { - /* Name of register (useful mainly for debugging, need not be unique) */ - const char *name; - /* Location of register: coprocessor number and (crn,crm,opc1,opc2) - * tuple. Any of crm, opc1 and opc2 may be CP_ANY to indicate a - * 'wildcard' field -- any value of that field in the MRC/MCR insn - * will be decoded to this register. The register read and write - * callbacks will be passed an ARMCPRegInfo with the crn/crm/opc1/opc2 - * used by the program, so it is possible to register a wildcard and - * then behave differently on read/write if necessary. - * For 64 bit registers, only crm and opc1 are relevant; crn and opc2 - * must both be zero. - * For AArch64-visible registers, opc0 is also used. - * Since there are no "coprocessors" in AArch64, cp is purely used as a - * way to distinguish (for KVM's benefit) guest-visible system registers - * from demuxed ones provided to preserve the "no side effects on - * KVM register read/write from QEMU" semantics. cp==0x13 is guest - * visible (to match KVM's encoding); cp==0 will be converted to - * cp==0x13 when the ARMCPRegInfo is registered, for convenience. - */ - uint8_t cp; - uint8_t crn; - uint8_t crm; - uint8_t opc0; - uint8_t opc1; - uint8_t opc2; - /* Execution state in which this register is visible: ARM_CP_STATE_* */ - int state; - /* Register type: ARM_CP_* bits/values */ - int type; - /* Access rights: PL*_[RW] */ - int access; - /* Security state: ARM_CP_SECSTATE_* bits/values */ - int secure; - /* The opaque pointer passed to define_arm_cp_regs_with_opaque() when - * this register was defined: can be used to hand data through to the - * register read/write functions, since they are passed the ARMCPRegInfo*. - */ - void *opaque; - /* Value of this register, if it is ARM_CP_CONST. Otherwise, if - * fieldoffset is non-zero, the reset value of the register. - */ - uint64_t resetvalue; - /* Offset of the field in CPUARMState for this register. - * - * This is not needed if either: - * 1. type is ARM_CP_CONST or one of the ARM_CP_SPECIALs - * 2. both readfn and writefn are specified - */ - ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */ - - /* Offsets of the secure and non-secure fields in CPUARMState for the - * register if it is banked. These fields are only used during the static - * registration of a register. During hashing the bank associated - * with a given security state is copied to fieldoffset which is used from - * there on out. - * - * It is expected that register definitions use either fieldoffset or - * bank_fieldoffsets in the definition but not both. It is also expected - * that both bank offsets are set when defining a banked register. This - * use indicates that a register is banked. - */ - ptrdiff_t bank_fieldoffsets[2]; - - /* Function for making any access checks for this register in addition to - * those specified by the 'access' permissions bits. If NULL, no extra - * checks required. The access check is performed at runtime, not at - * translate time. - */ - CPAccessFn *accessfn; - /* Function for handling reads of this register. If NULL, then reads - * will be done by loading from the offset into CPUARMState specified - * by fieldoffset. - */ - CPReadFn *readfn; - /* Function for handling writes of this register. If NULL, then writes - * will be done by writing to the offset into CPUARMState specified - * by fieldoffset. - */ - CPWriteFn *writefn; - /* Function for doing a "raw" read; used when we need to copy - * coprocessor state to the kernel for KVM or out for - * migration. This only needs to be provided if there is also a - * readfn and it has side effects (for instance clear-on-read bits). - */ - CPReadFn *raw_readfn; - /* Function for doing a "raw" write; used when we need to copy KVM - * kernel coprocessor state into userspace, or for inbound - * migration. This only needs to be provided if there is also a - * writefn and it masks out "unwritable" bits or has write-one-to-clear - * or similar behaviour. - */ - CPWriteFn *raw_writefn; - /* Function for resetting the register. If NULL, then reset will be done - * by writing resetvalue to the field specified in fieldoffset. If - * fieldoffset is 0 then no reset will be done. - */ - CPResetFn *resetfn; - - /* - * "Original" writefn and readfn. - * For ARMv8.1-VHE register aliases, we overwrite the read/write - * accessor functions of various EL1/EL0 to perform the runtime - * check for which sysreg should actually be modified, and then - * forwards the operation. Before overwriting the accessors, - * the original function is copied here, so that accesses that - * really do go to the EL1/EL0 version proceed normally. - * (The corresponding EL2 register is linked via opaque.) - */ - CPReadFn *orig_readfn; - CPWriteFn *orig_writefn; -}; - -/* Macros which are lvalues for the field in CPUARMState for the - * ARMCPRegInfo *ri. - */ -#define CPREG_FIELD32(env, ri) \ - (*(uint32_t *)((char *)(env) + (ri)->fieldoffset)) -#define CPREG_FIELD64(env, ri) \ - (*(uint64_t *)((char *)(env) + (ri)->fieldoffset)) - -#define REGINFO_SENTINEL { .type = ARM_CP_SENTINEL } - -void define_arm_cp_regs_with_opaque(ARMCPU *cpu, - const ARMCPRegInfo *regs, void *opaque); -void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, - const ARMCPRegInfo *regs, void *opaque); -static inline void define_arm_cp_regs(ARMCPU *cpu, const ARMCPRegInfo *regs) -{ - define_arm_cp_regs_with_opaque(cpu, regs, 0); -} -static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs) -{ - define_one_arm_cp_reg_with_opaque(cpu, regs, 0); -} -const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp); - -/* - * Definition of an ARM co-processor register as viewed from - * userspace. This is used for presenting sanitised versions of - * registers to userspace when emulating the Linux AArch64 CPU - * ID/feature ABI (advertised as HWCAP_CPUID). - */ -typedef struct ARMCPRegUserSpaceInfo { - /* Name of register */ - const char *name; - - /* Is the name actually a glob pattern */ - bool is_glob; - - /* Only some bits are exported to user space */ - uint64_t exported_bits; - - /* Fixed bits are applied after the mask */ - uint64_t fixed_bits; -} ARMCPRegUserSpaceInfo; - -#define REGUSERINFO_SENTINEL { .name = NULL } - -void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods); - -/* CPWriteFn that can be used to implement writes-ignored behaviour */ -void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value); -/* CPReadFn that can be used for read-as-zero behaviour */ -uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri); - -/* CPResetFn that does nothing, for use if no reset is required even - * if fieldoffset is non zero. - */ -void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque); - -/* Return true if this reginfo struct's field in the cpu state struct - * is 64 bits wide. - */ -static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri) -{ - return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT); -} - -static inline bool cp_access_ok(int current_el, - const ARMCPRegInfo *ri, int isread) -{ - return (ri->access >> ((current_el * 2) + isread)) & 1; -} - -/* Raw read of a coprocessor register (as needed for migration, etc) */ -uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri); - /** * write_list_to_cpustate * @cpu: ARMCPU @@ -3005,14 +2676,10 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); #define ARM_CPUID_TI915T 0x54029152 #define ARM_CPUID_TI925T 0x54029252 -#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU -#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX) #define CPU_RESOLVING_TYPE TYPE_ARM_CPU #define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU -#define cpu_list arm_cpu_list - /* ARM has the following "translation regimes" (as the ARM ARM calls them): * * If EL3 is 64-bit: @@ -3053,26 +2720,29 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * table over and over. * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access * Never (PAN) bit within PSTATE. + * 7. we fold together the secure and non-secure regimes for A-profile, + * because there are no banked system registers for aarch64, so the + * process of switching between secure and non-secure is + * already heavyweight. * * This gives us the following list of cases: * - * NS EL0 EL1&0 stage 1+2 (aka NS PL0) - * NS EL1 EL1&0 stage 1+2 (aka NS PL1) - * NS EL1 EL1&0 stage 1+2 +PAN - * NS EL0 EL2&0 - * NS EL2 EL2&0 - * NS EL2 EL2&0 +PAN - * NS EL2 (aka NS PL2) - * S EL0 EL1&0 (aka S PL0) - * S EL1 EL1&0 (not used if EL3 is 32 bit) - * S EL1 EL1&0 +PAN - * S EL3 (aka S PL1) + * EL0 EL1&0 stage 1+2 (aka NS PL0) + * EL1 EL1&0 stage 1+2 (aka NS PL1) + * EL1 EL1&0 stage 1+2 +PAN + * EL0 EL2&0 + * EL2 EL2&0 + * EL2 EL2&0 +PAN + * EL2 (aka NS PL2) + * EL3 (aka S PL1) + * Physical (NS & S) + * Stage2 (NS & S) * - * for a total of 11 different mmu_idx. + * for a total of 12 different mmu_idx. * * R profile CPUs have an MPU, but can use the same set of MMU indexes - * as A profile. They only need to distinguish NS EL0 and NS EL1 (and - * NS EL2 if we ever model a Cortex-R52). + * as A profile. They only need to distinguish EL0 and EL1 (and + * EL2 if we ever model a Cortex-R52). * * M profile CPUs are rather different as they do not have a true MMU. * They have the following different MMU indexes: @@ -3111,9 +2781,6 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); #define ARM_MMU_IDX_NOTLB 0x20 /* does not have a TLB */ #define ARM_MMU_IDX_M 0x40 /* M profile */ -/* Meanings of the bits for A profile mmu idx values */ -#define ARM_MMU_IDX_A_NS 0x8 - /* Meanings of the bits for M profile mmu idx values */ #define ARM_MMU_IDX_M_PRIV 0x1 #define ARM_MMU_IDX_M_NEGPRI 0x2 @@ -3127,22 +2794,29 @@ typedef enum ARMMMUIdx { /* * A-profile. */ - ARMMMUIdx_SE10_0 = 0 | ARM_MMU_IDX_A, - ARMMMUIdx_SE20_0 = 1 | ARM_MMU_IDX_A, - ARMMMUIdx_SE10_1 = 2 | ARM_MMU_IDX_A, - ARMMMUIdx_SE20_2 = 3 | ARM_MMU_IDX_A, - ARMMMUIdx_SE10_1_PAN = 4 | ARM_MMU_IDX_A, - ARMMMUIdx_SE20_2_PAN = 5 | ARM_MMU_IDX_A, - ARMMMUIdx_SE2 = 6 | ARM_MMU_IDX_A, - ARMMMUIdx_SE3 = 7 | ARM_MMU_IDX_A, - - ARMMMUIdx_E10_0 = ARMMMUIdx_SE10_0 | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E20_0 = ARMMMUIdx_SE20_0 | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E10_1 = ARMMMUIdx_SE10_1 | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E20_2 = ARMMMUIdx_SE20_2 | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E10_1_PAN = ARMMMUIdx_SE10_1_PAN | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E20_2_PAN = ARMMMUIdx_SE20_2_PAN | ARM_MMU_IDX_A_NS, - ARMMMUIdx_E2 = ARMMMUIdx_SE2 | ARM_MMU_IDX_A_NS, + ARMMMUIdx_E10_0 = 0 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_0 = 1 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1 = 2 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2 = 3 | ARM_MMU_IDX_A, + ARMMMUIdx_E10_1_PAN = 4 | ARM_MMU_IDX_A, + ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A, + ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A, + ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A, + + /* + * Used for second stage of an S12 page table walk, or for descriptor + * loads during first stage of an S1 page table walk. Note that both + * are in use simultaneously for SecureEL2: the security state for + * the S2 ptw is selected by the NS bit from the S1 ptw. + */ + ARMMMUIdx_Stage2_S = 8 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2 = 9 | ARM_MMU_IDX_A, + + /* TLBs with 1-1 mapping to the physical address spaces. */ + ARMMMUIdx_Phys_S = 10 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_NS = 11 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Root = 12 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Realm = 13 | ARM_MMU_IDX_A, /* * These are not allocated TLBs and are used only for AT system @@ -3151,18 +2825,6 @@ typedef enum ARMMMUIdx { ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB, ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_SE0 = 3 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_SE1 = 4 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage1_SE1_PAN = 5 | ARM_MMU_IDX_NOTLB, - /* - * Not allocated a TLB: used only for second stage of an S12 page - * table walk, or for descriptor loads during first stage of an S1 - * page table walk. Note that if we ever want to have a TLB for this - * then various TLB flush insns which currently are no-ops or flush - * only stage 1 MMU indexes will need to change to flush stage 2. - */ - ARMMMUIdx_Stage2 = 6 | ARM_MMU_IDX_NOTLB, - ARMMMUIdx_Stage2_S = 7 | ARM_MMU_IDX_NOTLB, /* * M-profile. @@ -3192,14 +2854,9 @@ typedef enum ARMMMUIdxBit { TO_CORE_BIT(E2), TO_CORE_BIT(E20_2), TO_CORE_BIT(E20_2_PAN), - TO_CORE_BIT(SE10_0), - TO_CORE_BIT(SE20_0), - TO_CORE_BIT(SE10_1), - TO_CORE_BIT(SE20_2), - TO_CORE_BIT(SE10_1_PAN), - TO_CORE_BIT(SE20_2_PAN), - TO_CORE_BIT(SE2), - TO_CORE_BIT(SE3), + TO_CORE_BIT(E3), + TO_CORE_BIT(Stage2), + TO_CORE_BIT(Stage2_S), TO_CORE_BIT(MUser), TO_CORE_BIT(MPriv), @@ -3223,25 +2880,21 @@ typedef enum ARMASIdx { ARMASIdx_TagS = 3, } ARMASIdx; -/* Return the Exception Level targeted by debug exceptions. */ -static inline int arm_debug_target_el(CPUARMState *env) +static inline ARMMMUIdx arm_space_to_phys(ARMSecuritySpace space) { - bool secure = arm_is_secure(env); - bool route_to_el2 = false; + /* Assert the relative order of the physical mmu indexes. */ + QEMU_BUILD_BUG_ON(ARMSS_Secure != 0); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS != ARMMMUIdx_Phys_S + ARMSS_NonSecure); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_Root != ARMMMUIdx_Phys_S + ARMSS_Root); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_Realm != ARMMMUIdx_Phys_S + ARMSS_Realm); - if (arm_is_el2_enabled(env)) { - route_to_el2 = env->cp15.hcr_el2 & HCR_TGE || - env->cp15.mdcr_el2 & MDCR_TDE; - } + return ARMMMUIdx_Phys_S + space; +} - if (route_to_el2) { - return 2; - } else if (arm_feature(env, ARM_FEATURE_EL3) && - !arm_el_is_aa64(env, 3) && secure) { - return 3; - } else { - return 1; - } +static inline ARMSecuritySpace arm_phys_to_space(ARMMMUIdx idx) +{ + assert(idx >= ARMMMUIdx_Phys_S && idx <= ARMMMUIdx_Phys_Realm); + return idx - ARMMMUIdx_Phys_S; } static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu) @@ -3252,107 +2905,6 @@ static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu) return (cpu->clidr & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0; } -/* See AArch64.GenerateDebugExceptionsFrom() in ARM ARM pseudocode */ -static inline bool aa64_generate_debug_exceptions(CPUARMState *env) -{ - int cur_el = arm_current_el(env); - int debug_el; - - if (cur_el == 3) { - return false; - } - - /* MDCR_EL3.SDD disables debug events from Secure state */ - if (arm_is_secure_below_el3(env) - && extract32(env->cp15.mdcr_el3, 16, 1)) { - return false; - } - - /* - * Same EL to same EL debug exceptions need MDSCR_KDE enabled - * while not masking the (D)ebug bit in DAIF. - */ - debug_el = arm_debug_target_el(env); - - if (cur_el == debug_el) { - return extract32(env->cp15.mdscr_el1, 13, 1) - && !(env->daif & PSTATE_D); - } - - /* Otherwise the debug target needs to be a higher EL */ - return debug_el > cur_el; -} - -static inline bool aa32_generate_debug_exceptions(CPUARMState *env) -{ - int el = arm_current_el(env); - - if (el == 0 && arm_el_is_aa64(env, 1)) { - return aa64_generate_debug_exceptions(env); - } - - if (arm_is_secure(env)) { - int spd; - - if (el == 0 && (env->cp15.sder & 1)) { - /* SDER.SUIDEN means debug exceptions from Secure EL0 - * are always enabled. Otherwise they are controlled by - * SDCR.SPD like those from other Secure ELs. - */ - return true; - } - - spd = extract32(env->cp15.mdcr_el3, 14, 2); - switch (spd) { - case 1: - /* SPD == 0b01 is reserved, but behaves as 0b00. */ - case 0: - /* For 0b00 we return true if external secure invasive debug - * is enabled. On real hardware this is controlled by external - * signals to the core. QEMU always permits debug, and behaves - * as if DBGEN, SPIDEN, NIDEN and SPNIDEN are all tied high. - */ - return true; - case 2: - return false; - case 3: - return true; - } - } - - return el != 2; -} - -/* Return true if debugging exceptions are currently enabled. - * This corresponds to what in ARM ARM pseudocode would be - * if UsingAArch32() then - * return AArch32.GenerateDebugExceptions() - * else - * return AArch64.GenerateDebugExceptions() - * We choose to push the if() down into this function for clarity, - * since the pseudocode has it at all callsites except for the one in - * CheckSoftwareStep(), where it is elided because both branches would - * always return the same value. - */ -static inline bool arm_generate_debug_exceptions(CPUARMState *env) -{ - if (env->aarch64) { - return aa64_generate_debug_exceptions(env); - } else { - return aa32_generate_debug_exceptions(env); - } -} - -/* Is single-stepping active? (Note that the "is EL_D AArch64?" check - * implicitly means this always returns false in pre-v8 CPUs.) - */ -static inline bool arm_singlestep_active(CPUARMState *env) -{ - return extract32(env->cp15.mdscr_el1, 0, 1) - && arm_el_is_aa64(env, arm_debug_target_el(env)) - && arm_generate_debug_exceptions(env); -} - static inline bool arm_sctlr_b(CPUARMState *env) { return @@ -3409,9 +2961,6 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) } } -typedef CPUARMState CPUArchState; -typedef ARMCPU ArchCPU; - #include "exec/cpu-all.h" /* @@ -3445,11 +2994,11 @@ FIELD(TBFLAG_ANY, BE_DATA, 3, 1) FIELD(TBFLAG_ANY, MMUIDX, 4, 4) /* Target EL if we take a floating-point-disabled exception */ FIELD(TBFLAG_ANY, FPEXC_EL, 8, 2) -/* For A-profile only, target EL for debug exceptions. */ -FIELD(TBFLAG_ANY, DEBUG_TARGET_EL, 10, 2) /* Memory operations require alignment: SCTLR_ELx.A or CCR.UNALIGN_TRP */ -FIELD(TBFLAG_ANY, ALIGN_MEM, 12, 1) -FIELD(TBFLAG_ANY, PSTATE__IL, 13, 1) +FIELD(TBFLAG_ANY, ALIGN_MEM, 10, 1) +FIELD(TBFLAG_ANY, PSTATE__IL, 11, 1) +FIELD(TBFLAG_ANY, FGT_ACTIVE, 12, 1) +FIELD(TBFLAG_ANY, FGT_SVC, 13, 1) /* * Bit usage when in AArch32 state, both A- and M-profile. @@ -3478,6 +3027,11 @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1) * the same thing as the current security state of the processor! */ FIELD(TBFLAG_A32, NS, 10, 1) +/* + * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. + * This requires an SME trap from AArch32 mode when using NEON. + */ +FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1) /* * Bit usage when in AArch32 state, for M-profile only. @@ -3494,13 +3048,16 @@ FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 3, 1) /* Not cached. */ FIELD(TBFLAG_M32, FPCCR_S_WRONG, 4, 1) /* Not cached. */ /* Set if MVE insns are definitely not predicated by VPR or LTPSIZE */ FIELD(TBFLAG_M32, MVE_NO_PRED, 5, 1) /* Not cached. */ +/* Set if in secure mode */ +FIELD(TBFLAG_M32, SECURE, 6, 1) /* * Bit usage when in AArch64 state */ FIELD(TBFLAG_A64, TBII, 0, 2) FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2) -FIELD(TBFLAG_A64, ZCR_LEN, 4, 4) +/* The current vector length, either NVL or SVL. */ +FIELD(TBFLAG_A64, VL, 4, 4) FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1) FIELD(TBFLAG_A64, BT, 9, 1) FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */ @@ -3510,14 +3067,32 @@ FIELD(TBFLAG_A64, ATA, 15, 1) FIELD(TBFLAG_A64, TCMA, 16, 2) FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1) FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) +FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2) +FIELD(TBFLAG_A64, PSTATE_SM, 22, 1) +FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1) +FIELD(TBFLAG_A64, SVL, 24, 4) +/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */ +FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1) +FIELD(TBFLAG_A64, TRAP_ERET, 29, 1) +FIELD(TBFLAG_A64, NAA, 30, 1) +FIELD(TBFLAG_A64, ATA0, 31, 1) +FIELD(TBFLAG_A64, NV, 32, 1) +FIELD(TBFLAG_A64, NV1, 33, 1) +FIELD(TBFLAG_A64, NV2, 34, 1) +/* Set if FEAT_NV2 RAM accesses use the EL2&0 translation regime */ +FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1) +/* Set if FEAT_NV2 RAM accesses are big-endian */ +FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1) /* - * Helpers for using the above. + * Helpers for using the above. Note that only the A64 accessors use + * FIELD_DP64() and FIELD_EX64(), because in the other cases the flags + * word either is or might be 32 bits only. */ #define DP_TBFLAG_ANY(DST, WHICH, VAL) \ (DST.flags = FIELD_DP32(DST.flags, TBFLAG_ANY, WHICH, VAL)) #define DP_TBFLAG_A64(DST, WHICH, VAL) \ - (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A64, WHICH, VAL)) + (DST.flags2 = FIELD_DP64(DST.flags2, TBFLAG_A64, WHICH, VAL)) #define DP_TBFLAG_A32(DST, WHICH, VAL) \ (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A32, WHICH, VAL)) #define DP_TBFLAG_M32(DST, WHICH, VAL) \ @@ -3526,36 +3101,41 @@ FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_AM32, WHICH, VAL)) #define EX_TBFLAG_ANY(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_ANY, WHICH) -#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_A64, WHICH) +#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX64(IN.flags2, TBFLAG_A64, WHICH) #define EX_TBFLAG_A32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_A32, WHICH) #define EX_TBFLAG_M32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_M32, WHICH) #define EX_TBFLAG_AM32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_AM32, WHICH) /** - * cpu_mmu_index: - * @env: The cpu environment - * @ifetch: True for code access, false for data access. + * sve_vq + * @env: the cpu context + * + * Return the VL cached within env->hflags, in units of quadwords. + */ +static inline int sve_vq(CPUARMState *env) +{ + return EX_TBFLAG_A64(env->hflags, VL) + 1; +} + +/** + * sme_vq + * @env: the cpu context * - * Return the core mmu index for the current translation regime. - * This function is used by generic TCG code paths. + * Return the SVL cached within env->hflags, in units of quadwords. */ -static inline int cpu_mmu_index(CPUARMState *env, bool ifetch) +static inline int sme_vq(CPUARMState *env) { - return EX_TBFLAG_ANY(env->hflags, MMUIDX); + return EX_TBFLAG_A64(env->hflags, SVL) + 1; } static inline bool bswap_code(bool sctlr_b) { #ifdef CONFIG_USER_ONLY - /* BE8 (SCTLR.B = 0, TARGET_WORDS_BIGENDIAN = 1) is mixed endian. - * The invalid combination SCTLR.B=1/CPSR.E=1/TARGET_WORDS_BIGENDIAN=0 + /* BE8 (SCTLR.B = 0, TARGET_BIG_ENDIAN = 1) is mixed endian. + * The invalid combination SCTLR.B=1/CPSR.E=1/TARGET_BIG_ENDIAN=0 * would also end up as a mixed-endian mode with BE code, LE data. */ - return -#ifdef TARGET_WORDS_BIGENDIAN - 1 ^ -#endif - sctlr_b; + return TARGET_BIG_ENDIAN ^ sctlr_b; #else /* All code access in ARM is little endian, and there are no loaders * doing swaps that need to be reversed @@ -3567,16 +3147,12 @@ static inline bool bswap_code(bool sctlr_b) #ifdef CONFIG_USER_ONLY static inline bool arm_cpu_bswap_data(CPUARMState *env) { - return -#ifdef TARGET_WORDS_BIGENDIAN - 1 ^ -#endif - arm_cpu_data_is_big_endian(env); + return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env); } #endif -void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags); +void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags); enum { QEMU_PSCI_CONDUIT_DISABLED = 0, @@ -3660,27 +3236,24 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) } /* Shared between translate-sve.c and sve_helper.c. */ -extern const uint64_t pred_esz_masks[4]; - -/* Helper for the macros below, validating the argument type. */ -static inline MemTxAttrs *typecheck_memtxattrs(MemTxAttrs *x) -{ - return x; -} - -/* - * Lvalue macros for ARM TLB bits that we must cache in the TCG TLB. - * Using these should be a bit more self-documenting than using the - * generic target bits directly. - */ -#define arm_tlb_bti_gp(x) (typecheck_memtxattrs(x)->target_tlb_bit0) -#define arm_tlb_mte_tagged(x) (typecheck_memtxattrs(x)->target_tlb_bit1) +extern const uint64_t pred_esz_masks[5]; /* * AArch64 usage of the PAGE_TARGET_* bits for linux-user. + * Note that with the Linux kernel, PROT_MTE may not be cleared by mprotect + * mprotect but PROT_BTI may be cleared. C.f. the kernel's VM_ARCH_CLEAR. */ -#define PAGE_BTI PAGE_TARGET_1 -#define PAGE_MTE PAGE_TARGET_2 +#define PAGE_BTI PAGE_TARGET_1 +#define PAGE_MTE PAGE_TARGET_2 +#define PAGE_TARGET_STICKY PAGE_MTE + +/* We associate one allocation tag per 16 bytes, the minimum. */ +#define LOG2_TAG_GRANULE 4 +#define TAG_GRANULE (1 << LOG2_TAG_GRANULE) + +#ifdef CONFIG_USER_ONLY +#define TARGET_PAGE_DATA_SIZE (TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1)) +#endif #ifdef TARGET_TAGGED_ADDRESSES /** @@ -3708,688 +3281,4 @@ static inline target_ulong cpu_untagged_addr(CPUState *cs, target_ulong x) } #endif -/* - * Naming convention for isar_feature functions: - * Functions which test 32-bit ID registers should have _aa32_ in - * their name. Functions which test 64-bit ID registers should have - * _aa64_ in their name. These must only be used in code where we - * know for certain that the CPU has AArch32 or AArch64 respectively - * or where the correct answer for a CPU which doesn't implement that - * CPU state is "false" (eg when generating A32 or A64 code, if adding - * system registers that are specific to that CPU state, for "should - * we let this system register bit be set" tests where the 32-bit - * flavour of the register doesn't have the bit, and so on). - * Functions which simply ask "does this feature exist at all" have - * _any_ in their name, and always return the logical OR of the _aa64_ - * and the _aa32_ function. - */ - -/* - * 32-bit feature tests via id registers. - */ -static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; -} - -static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; -} - -static inline bool isar_feature_aa32_lob(const ARMISARegisters *id) -{ - /* (M-profile) low-overhead loops and branch future */ - return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3; -} - -static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; -} - -static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; -} - -static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; -} - -static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; -} - -static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; -} - -static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; -} - -static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; -} - -static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; -} - -static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0; -} - -static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; -} - -static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0; -} - -static inline bool isar_feature_aa32_sb(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0; -} - -static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0; -} - -static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0; -} - -static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0; -} - -static inline bool isar_feature_aa32_ras(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0; -} - -static inline bool isar_feature_aa32_mprofile(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_pfr1, ID_PFR1, MPROGMOD) != 0; -} - -static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id) -{ - /* - * Return true if M-profile state handling insns - * (VSCCLRM, CLRM, FPCTX access insns) are implemented - */ - return FIELD_EX32(id->id_pfr1, ID_PFR1, SECURITY) >= 3; -} - -static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) -{ - /* Sadly this is encoded differently for A-profile and M-profile */ - if (isar_feature_aa32_mprofile(id)) { - return FIELD_EX32(id->mvfr1, MVFR1, FP16) > 0; - } else { - return FIELD_EX32(id->mvfr1, MVFR1, FPHP) >= 3; - } -} - -static inline bool isar_feature_aa32_mve(const ARMISARegisters *id) -{ - /* - * Return true if MVE is supported (either integer or floating point). - * We must check for M-profile as the MVFR1 field means something - * else for A-profile. - */ - return isar_feature_aa32_mprofile(id) && - FIELD_EX32(id->mvfr1, MVFR1, MVE) > 0; -} - -static inline bool isar_feature_aa32_mve_fp(const ARMISARegisters *id) -{ - /* - * Return true if MVE is supported (either integer or floating point). - * We must check for M-profile as the MVFR1 field means something - * else for A-profile. - */ - return isar_feature_aa32_mprofile(id) && - FIELD_EX32(id->mvfr1, MVFR1, MVE) >= 2; -} - -static inline bool isar_feature_aa32_vfp_simd(const ARMISARegisters *id) -{ - /* - * Return true if either VFP or SIMD is implemented. - * In this case, a minimum of VFP w/ D0-D15. - */ - return FIELD_EX32(id->mvfr0, MVFR0, SIMDREG) > 0; -} - -static inline bool isar_feature_aa32_simd_r32(const ARMISARegisters *id) -{ - /* Return true if D16-D31 are implemented */ - return FIELD_EX32(id->mvfr0, MVFR0, SIMDREG) >= 2; -} - -static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id) -{ - return FIELD_EX32(id->mvfr0, MVFR0, FPSHVEC) > 0; -} - -static inline bool isar_feature_aa32_fpsp_v2(const ARMISARegisters *id) -{ - /* Return true if CPU supports single precision floating point, VFPv2 */ - return FIELD_EX32(id->mvfr0, MVFR0, FPSP) > 0; -} - -static inline bool isar_feature_aa32_fpsp_v3(const ARMISARegisters *id) -{ - /* Return true if CPU supports single precision floating point, VFPv3 */ - return FIELD_EX32(id->mvfr0, MVFR0, FPSP) >= 2; -} - -static inline bool isar_feature_aa32_fpdp_v2(const ARMISARegisters *id) -{ - /* Return true if CPU supports double precision floating point, VFPv2 */ - return FIELD_EX32(id->mvfr0, MVFR0, FPDP) > 0; -} - -static inline bool isar_feature_aa32_fpdp_v3(const ARMISARegisters *id) -{ - /* Return true if CPU supports double precision floating point, VFPv3 */ - return FIELD_EX32(id->mvfr0, MVFR0, FPDP) >= 2; -} - -static inline bool isar_feature_aa32_vfp(const ARMISARegisters *id) -{ - return isar_feature_aa32_fpsp_v2(id) || isar_feature_aa32_fpdp_v2(id); -} - -/* - * We always set the FP and SIMD FP16 fields to indicate identical - * levels of support (assuming SIMD is implemented at all), so - * we only need one set of accessors. - */ -static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id) -{ - return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 0; -} - -static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) -{ - return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1; -} - -/* - * Note that this ID register field covers both VFP and Neon FMAC, - * so should usually be tested in combination with some other - * check that confirms the presence of whichever of VFP or Neon is - * relevant, to avoid accidentally enabling a Neon feature on - * a VFP-no-Neon core or vice-versa. - */ -static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id) -{ - return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0; -} - -static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id) -{ - return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 1; -} - -static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id) -{ - return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 2; -} - -static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id) -{ - return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 3; -} - -static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) -{ - return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 4; -} - -static inline bool isar_feature_aa32_pxn(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_mmfr0, ID_MMFR0, VMSA) >= 4; -} - -static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; -} - -static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; -} - -static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) -{ - /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; -} - -static inline bool isar_feature_aa32_pmu_8_4(const ARMISARegisters *id) -{ - /* 0xf means "non-standard IMPDEF PMU" */ - return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 && - FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; -} - -static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0; -} - -static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0; -} - -static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0; -} - -static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; -} - -static inline bool isar_feature_aa32_dit(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0; -} - -static inline bool isar_feature_aa32_ssbs(const ARMISARegisters *id) -{ - return FIELD_EX32(id->id_pfr2, ID_PFR2, SSBS) != 0; -} - -/* - * 64-bit feature tests via id registers. - */ -static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; -} - -static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; -} - -static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; -} - -static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; -} - -static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; -} - -static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; -} - -static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; -} - -static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; -} - -static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; -} - -static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; -} - -static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; -} - -static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; -} - -static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0; -} - -static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0; -} - -static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2; -} - -static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0; -} - -static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0; -} - -static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; -} - -static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) -{ - /* - * Return true if any form of pauth is enabled, as this - * predicate controls migration of the 128-bit keys. - */ - return (id->id_aa64isar1 & - (FIELD_DP64(0, ID_AA64ISAR1, APA, 0xf) | - FIELD_DP64(0, ID_AA64ISAR1, API, 0xf) | - FIELD_DP64(0, ID_AA64ISAR1, GPA, 0xf) | - FIELD_DP64(0, ID_AA64ISAR1, GPI, 0xf))) != 0; -} - -static inline bool isar_feature_aa64_pauth_arch(const ARMISARegisters *id) -{ - /* - * Return true if pauth is enabled with the architected QARMA algorithm. - * QEMU will always set APA+GPA to the same value. - */ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0; -} - -static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2; -} - -static inline bool isar_feature_aa64_tlbios(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) != 0; -} - -static inline bool isar_feature_aa64_sb(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0; -} - -static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0; -} - -static inline bool isar_feature_aa64_frint(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0; -} - -static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0; -} - -static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2; -} - -static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0; -} - -static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id) -{ - /* We always set the AdvSIMD and FP fields identically. */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) != 0xf; -} - -static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) -{ - /* We always set the AdvSIMD and FP fields identically wrt FP16. */ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; -} - -static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2; -} - -static inline bool isar_feature_aa64_aa32_el1(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL1) >= 2; -} - -static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; -} - -static inline bool isar_feature_aa64_sel2(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SEL2) != 0; -} - -static inline bool isar_feature_aa64_vh(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0; -} - -static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; -} - -static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; -} - -static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; -} - -static inline bool isar_feature_aa64_uao(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0; -} - -static inline bool isar_feature_aa64_st(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, ST) != 0; -} - -static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; -} - -static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; -} - -static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; -} - -static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; -} - -static inline bool isar_feature_aa64_pmu_8_4(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 && - FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; -} - -static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0; -} - -static inline bool isar_feature_aa64_rcpc_8_4(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) >= 2; -} - -static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0; -} - -static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0; -} - -static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0; -} - -static inline bool isar_feature_aa64_dit(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0; -} - -static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0; -} - -static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SVEVER) != 0; -} - -static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) != 0; -} - -static inline bool isar_feature_aa64_sve2_pmull128(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) >= 2; -} - -static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0; -} - -static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0; -} - -static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0; -} - -static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0; -} - -static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0; -} - -static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0; -} - -static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id) -{ - return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0; -} - -/* - * Feature tests for "does this exist in either 32-bit or 64-bit?" - */ -static inline bool isar_feature_any_fp16(const ARMISARegisters *id) -{ - return isar_feature_aa64_fp16(id) || isar_feature_aa32_fp16_arith(id); -} - -static inline bool isar_feature_any_predinv(const ARMISARegisters *id) -{ - return isar_feature_aa64_predinv(id) || isar_feature_aa32_predinv(id); -} - -static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id) -{ - return isar_feature_aa64_pmu_8_1(id) || isar_feature_aa32_pmu_8_1(id); -} - -static inline bool isar_feature_any_pmu_8_4(const ARMISARegisters *id) -{ - return isar_feature_aa64_pmu_8_4(id) || isar_feature_aa32_pmu_8_4(id); -} - -static inline bool isar_feature_any_ccidx(const ARMISARegisters *id) -{ - return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id); -} - -static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id) -{ - return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id); -} - -/* - * Forward to the above feature tests given an ARMCPU pointer. - */ -#define cpu_isar_feature(name, cpu) \ - ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); }) - #endif diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 15245a60a8..985b1efe16 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -21,232 +21,19 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "cpu.h" -#ifdef CONFIG_TCG -#include "hw/core/tcg-cpu-ops.h" -#endif /* CONFIG_TCG */ +#include "cpregs.h" #include "qemu/module.h" -#if !defined(CONFIG_USER_ONLY) -#include "hw/loader.h" -#endif #include "sysemu/kvm.h" +#include "sysemu/hvf.h" +#include "sysemu/qtest.h" +#include "sysemu/tcg.h" #include "kvm_arm.h" +#include "hvf_arm.h" #include "qapi/visitor.h" #include "hw/qdev-properties.h" - - -#ifndef CONFIG_USER_ONLY -static uint64_t a57_a53_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -{ - ARMCPU *cpu = env_archcpu(env); - - /* Number of cores is in [25:24]; otherwise we RAZ */ - return (cpu->core_count - 1) << 24; -} -#endif - -static const ARMCPRegInfo cortex_a72_a57_a53_cp_reginfo[] = { -#ifndef CONFIG_USER_ONLY - { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2, - .access = PL1_RW, .readfn = a57_a53_l2ctlr_read, - .writefn = arm_cp_write_ignore }, - { .name = "L2CTLR", - .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 2, - .access = PL1_RW, .readfn = a57_a53_l2ctlr_read, - .writefn = arm_cp_write_ignore }, -#endif - { .name = "L2ECTLR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 3, - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "L2ECTLR", - .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 3, - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "L2ACTLR", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 0, .opc2 = 0, - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 0, - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CPUACTLR", - .cp = 15, .opc1 = 0, .crm = 15, - .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, - { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 1, - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CPUECTLR", - .cp = 15, .opc1 = 1, .crm = 15, - .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, - { .name = "CPUMERRSR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 2, - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CPUMERRSR", - .cp = 15, .opc1 = 2, .crm = 15, - .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, - { .name = "L2MERRSR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 3, - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "L2MERRSR", - .cp = 15, .opc1 = 3, .crm = 15, - .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, - REGINFO_SENTINEL -}; - -static void aarch64_a57_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,cortex-a57"; - set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_NEON); - set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); - set_feature(&cpu->env, ARM_FEATURE_AARCH64); - set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_EL2); - set_feature(&cpu->env, ARM_FEATURE_EL3); - set_feature(&cpu->env, ARM_FEATURE_PMU); - cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57; - cpu->midr = 0x411fd070; - cpu->revidr = 0x00000000; - cpu->reset_fpsid = 0x41034070; - cpu->isar.mvfr0 = 0x10110222; - cpu->isar.mvfr1 = 0x12111111; - cpu->isar.mvfr2 = 0x00000043; - cpu->ctr = 0x8444c004; - cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; - cpu->isar.dbgdidr = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ - cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ - cpu->dcz_blocksize = 4; /* 64 bytes */ - cpu->gic_num_lrs = 4; - cpu->gic_vpribits = 5; - cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); -} - -static void aarch64_a53_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,cortex-a53"; - set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_NEON); - set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); - set_feature(&cpu->env, ARM_FEATURE_AARCH64); - set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_EL2); - set_feature(&cpu->env, ARM_FEATURE_EL3); - set_feature(&cpu->env, ARM_FEATURE_PMU); - cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A53; - cpu->midr = 0x410fd034; - cpu->revidr = 0x00000000; - cpu->reset_fpsid = 0x41034070; - cpu->isar.mvfr0 = 0x10110222; - cpu->isar.mvfr1 = 0x12111111; - cpu->isar.mvfr2 = 0x00000043; - cpu->ctr = 0x84448004; /* L1Ip = VIPT */ - cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10101105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_isar6 = 0; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ - cpu->isar.dbgdidr = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ - cpu->ccsidr[2] = 0x707fe07a; /* 1024KB L2 cache */ - cpu->dcz_blocksize = 4; /* 64 bytes */ - cpu->gic_num_lrs = 4; - cpu->gic_vpribits = 5; - cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); -} - -static void aarch64_a72_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - cpu->dtb_compatible = "arm,cortex-a72"; - set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_NEON); - set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); - set_feature(&cpu->env, ARM_FEATURE_AARCH64); - set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); - set_feature(&cpu->env, ARM_FEATURE_EL2); - set_feature(&cpu->env, ARM_FEATURE_EL3); - set_feature(&cpu->env, ARM_FEATURE_PMU); - cpu->midr = 0x410fd083; - cpu->revidr = 0x00000000; - cpu->reset_fpsid = 0x41034080; - cpu->isar.mvfr0 = 0x10110222; - cpu->isar.mvfr1 = 0x12111111; - cpu->isar.mvfr2 = 0x00000043; - cpu->ctr = 0x8444c004; - cpu->reset_sctlr = 0x00c50838; - cpu->isar.id_pfr0 = 0x00000131; - cpu->isar.id_pfr1 = 0x00011011; - cpu->isar.id_dfr0 = 0x03010066; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x10201105; - cpu->isar.id_mmfr1 = 0x40000000; - cpu->isar.id_mmfr2 = 0x01260000; - cpu->isar.id_mmfr3 = 0x02102211; - cpu->isar.id_isar0 = 0x02101110; - cpu->isar.id_isar1 = 0x13112111; - cpu->isar.id_isar2 = 0x21232042; - cpu->isar.id_isar3 = 0x01112131; - cpu->isar.id_isar4 = 0x00011142; - cpu->isar.id_isar5 = 0x00011121; - cpu->isar.id_aa64pfr0 = 0x00002222; - cpu->isar.id_aa64dfr0 = 0x10305106; - cpu->isar.id_aa64isar0 = 0x00011120; - cpu->isar.id_aa64mmfr0 = 0x00001124; - cpu->isar.dbgdidr = 0x3516d000; - cpu->clidr = 0x0a200023; - cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ - cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ - cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */ - cpu->dcz_blocksize = 4; /* 64 bytes */ - cpu->gic_num_lrs = 4; - cpu->gic_vpribits = 5; - cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); -} +#include "internals.h" +#include "cpu-features.h" +#include "cpregs.h" void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { @@ -265,8 +52,11 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * any of the above. Finally, if SVE is not disabled, then at least one * vector length must be enabled. */ - DECLARE_BITMAP(tmp, ARM_MAX_VQ); - uint32_t vq, max_vq = 0; + uint32_t vq_map = cpu->sve_vq.map; + uint32_t vq_init = cpu->sve_vq.init; + uint32_t vq_supported; + uint32_t vq_mask = 0; + uint32_t tmp, vq, max_vq = 0; /* * CPU models specify a set of supported vector lengths which are @@ -274,10 +64,16 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * in the supported bitmap results in an error. When KVM is enabled we * fetch the supported bitmap from the host. */ - if (kvm_enabled() && kvm_arm_sve_supported()) { - kvm_arm_sve_get_vls(CPU(cpu), cpu->sve_vq_supported); - } else if (kvm_enabled()) { - assert(!cpu_isar_feature(aa64_sve, cpu)); + if (kvm_enabled()) { + if (kvm_arm_sve_supported()) { + cpu->sve_vq.supported = kvm_arm_sve_get_vls(cpu); + vq_supported = cpu->sve_vq.supported; + } else { + assert(!cpu_isar_feature(aa64_sve, cpu)); + vq_supported = 0; + } + } else { + vq_supported = cpu->sve_vq.supported; } /* @@ -285,8 +81,9 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * From the properties, sve_vq_map<N> implies sve_vq_init<N>. * Check first for any sve<N> enabled. */ - if (!bitmap_empty(cpu->sve_vq_map, ARM_MAX_VQ)) { - max_vq = find_last_bit(cpu->sve_vq_map, ARM_MAX_VQ) + 1; + if (vq_map != 0) { + max_vq = 32 - clz32(vq_map); + vq_mask = MAKE_64BIT_MASK(0, max_vq); if (cpu->sve_max_vq && max_vq > cpu->sve_max_vq) { error_setg(errp, "cannot enable sve%d", max_vq * 128); @@ -299,18 +96,13 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) if (kvm_enabled()) { /* - * For KVM we have to automatically enable all supported unitialized + * For KVM we have to automatically enable all supported uninitialized * lengths, even when the smaller lengths are not all powers-of-two. */ - bitmap_andnot(tmp, cpu->sve_vq_supported, cpu->sve_vq_init, max_vq); - bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq); + vq_map |= vq_supported & ~vq_init & vq_mask; } else { /* Propagate enabled bits down through required powers-of-two. */ - for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) { - if (!test_bit(vq - 1, cpu->sve_vq_init)) { - set_bit(vq - 1, cpu->sve_vq_map); - } - } + vq_map |= SVE_VQ_POW2_MAP & ~vq_init & vq_mask; } } else if (cpu->sve_max_vq == 0) { /* @@ -323,25 +115,18 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) if (kvm_enabled()) { /* Disabling a supported length disables all larger lengths. */ - for (vq = 1; vq <= ARM_MAX_VQ; ++vq) { - if (test_bit(vq - 1, cpu->sve_vq_init) && - test_bit(vq - 1, cpu->sve_vq_supported)) { - break; - } - } + tmp = vq_init & vq_supported; } else { /* Disabling a power-of-two disables all larger lengths. */ - for (vq = 1; vq <= ARM_MAX_VQ; vq <<= 1) { - if (test_bit(vq - 1, cpu->sve_vq_init)) { - break; - } - } + tmp = vq_init & SVE_VQ_POW2_MAP; } + vq = ctz32(tmp) + 1; max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ; - bitmap_andnot(cpu->sve_vq_map, cpu->sve_vq_supported, - cpu->sve_vq_init, max_vq); - if (max_vq == 0 || bitmap_empty(cpu->sve_vq_map, max_vq)) { + vq_mask = max_vq > 0 ? MAKE_64BIT_MASK(0, max_vq) : 0; + vq_map = vq_supported & ~vq_init & vq_mask; + + if (vq_map == 0) { error_setg(errp, "cannot disable sve%d", vq * 128); error_append_hint(errp, "Disabling sve%d results in all " "vector lengths being disabled.\n", @@ -351,7 +136,8 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) return; } - max_vq = find_last_bit(cpu->sve_vq_map, max_vq) + 1; + max_vq = 32 - clz32(vq_map); + vq_mask = MAKE_64BIT_MASK(0, max_vq); } /* @@ -361,9 +147,9 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) */ if (cpu->sve_max_vq != 0) { max_vq = cpu->sve_max_vq; + vq_mask = MAKE_64BIT_MASK(0, max_vq); - if (!test_bit(max_vq - 1, cpu->sve_vq_map) && - test_bit(max_vq - 1, cpu->sve_vq_init)) { + if (vq_init & ~vq_map & (1 << (max_vq - 1))) { error_setg(errp, "cannot disable sve%d", max_vq * 128); error_append_hint(errp, "The maximum vector length must be " "enabled, sve-max-vq=%d (%d bits)\n", @@ -372,8 +158,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) } /* Set all bits not explicitly set within sve-max-vq. */ - bitmap_complement(tmp, cpu->sve_vq_init, max_vq); - bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq); + vq_map |= ~vq_init & vq_mask; } /* @@ -382,13 +167,14 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * are clear, just in case anybody looks. */ assert(max_vq != 0); - bitmap_clear(cpu->sve_vq_map, max_vq, ARM_MAX_VQ - max_vq); + assert(vq_mask != 0); + vq_map &= vq_mask; /* Ensure the set of lengths matches what is supported. */ - bitmap_xor(tmp, cpu->sve_vq_map, cpu->sve_vq_supported, max_vq); - if (!bitmap_empty(tmp, max_vq)) { - vq = find_last_bit(tmp, max_vq) + 1; - if (test_bit(vq - 1, cpu->sve_vq_map)) { + tmp = vq_map ^ (vq_supported & vq_mask); + if (tmp) { + vq = 32 - clz32(tmp); + if (vq_map & (1 << (vq - 1))) { if (cpu->sve_max_vq) { error_setg(errp, "cannot set sve-max-vq=%d", cpu->sve_max_vq); error_append_hint(errp, "This CPU does not support " @@ -398,8 +184,13 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) "using only sve<N> properties.\n"); } else { error_setg(errp, "cannot enable sve%d", vq * 128); - error_append_hint(errp, "This CPU does not support " - "the vector length %d-bits.\n", vq * 128); + if (vq_supported) { + error_append_hint(errp, "This CPU does not support " + "the vector length %d-bits.\n", vq * 128); + } else { + error_append_hint(errp, "SVE not supported by KVM " + "on this host\n"); + } } return; } else { @@ -412,15 +203,15 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) return; } else { /* Ensure all required powers-of-two are enabled. */ - for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) { - if (!test_bit(vq - 1, cpu->sve_vq_map)) { - error_setg(errp, "cannot disable sve%d", vq * 128); - error_append_hint(errp, "sve%d is required as it " - "is a power-of-two length smaller " - "than the maximum, sve%d\n", - vq * 128, max_vq * 128); - return; - } + tmp = SVE_VQ_POW2_MAP & vq_mask & ~vq_map; + if (tmp) { + vq = 32 - clz32(tmp); + error_setg(errp, "cannot disable sve%d", vq * 128); + error_append_hint(errp, "sve%d is required as it " + "is a power-of-two length smaller " + "than the maximum, sve%d\n", + vq * 128, max_vq * 128); + return; } } } @@ -440,75 +231,38 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) /* From now on sve_max_vq is the actual maximum supported length. */ cpu->sve_max_vq = max_vq; -} - -static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - ARMCPU *cpu = ARM_CPU(obj); - uint32_t value; - - /* All vector lengths are disabled when SVE is off. */ - if (!cpu_isar_feature(aa64_sve, cpu)) { - value = 0; - } else { - value = cpu->sve_max_vq; - } - visit_type_uint32(v, name, &value, errp); -} - -static void cpu_max_set_sve_max_vq(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - ARMCPU *cpu = ARM_CPU(obj); - uint32_t max_vq; - - if (!visit_type_uint32(v, name, &max_vq, errp)) { - return; - } - - if (kvm_enabled() && !kvm_arm_sve_supported()) { - error_setg(errp, "cannot set sve-max-vq"); - error_append_hint(errp, "SVE not supported by KVM on this host\n"); - return; - } - - if (max_vq == 0 || max_vq > ARM_MAX_VQ) { - error_setg(errp, "unsupported SVE vector length"); - error_append_hint(errp, "Valid sve-max-vq in range [1-%d]\n", - ARM_MAX_VQ); - return; - } - - cpu->sve_max_vq = max_vq; + cpu->sve_vq.map = vq_map; } /* - * Note that cpu_arm_get/set_sve_vq cannot use the simpler - * object_property_add_bool interface because they make use - * of the contents of "name" to determine which bit on which - * to operate. + * Note that cpu_arm_{get,set}_vq cannot use the simpler + * object_property_add_bool interface because they make use of the + * contents of "name" to determine which bit on which to operate. */ -static void cpu_arm_get_sve_vq(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) +static void cpu_arm_get_vq(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); + ARMVQMap *vq_map = opaque; uint32_t vq = atoi(&name[3]) / 128; + bool sve = vq_map == &cpu->sve_vq; bool value; - /* All vector lengths are disabled when SVE is off. */ - if (!cpu_isar_feature(aa64_sve, cpu)) { + /* All vector lengths are disabled when feature is off. */ + if (sve + ? !cpu_isar_feature(aa64_sve, cpu) + : !cpu_isar_feature(aa64_sme, cpu)) { value = false; } else { - value = test_bit(vq - 1, cpu->sve_vq_map); + value = extract32(vq_map->map, vq - 1, 1); } visit_type_bool(v, name, &value, errp); } -static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) +static void cpu_arm_set_vq(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) { - ARMCPU *cpu = ARM_CPU(obj); + ARMVQMap *vq_map = opaque; uint32_t vq = atoi(&name[3]) / 128; bool value; @@ -516,18 +270,8 @@ static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name, return; } - if (value && kvm_enabled() && !kvm_arm_sve_supported()) { - error_setg(errp, "cannot enable %s", name); - error_append_hint(errp, "SVE not supported by KVM on this host\n"); - return; - } - - if (value) { - set_bit(vq - 1, cpu->sve_vq_map); - } else { - clear_bit(vq - 1, cpu->sve_vq_map); - } - set_bit(vq - 1, cpu->sve_vq_init); + vq_map->map = deposit32(vq_map->map, vq - 1, 1, value); + vq_map->init |= 1 << (vq - 1); } static bool cpu_arm_get_sve(Object *obj, Error **errp) @@ -551,13 +295,85 @@ static void cpu_arm_set_sve(Object *obj, bool value, Error **errp) cpu->isar.id_aa64pfr0 = t; } -#ifdef CONFIG_USER_ONLY -/* Mirror linux /proc/sys/abi/sve_default_vector_length. */ -static void cpu_arm_set_sve_default_vec_len(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) +void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp) +{ + uint32_t vq_map = cpu->sme_vq.map; + uint32_t vq_init = cpu->sme_vq.init; + uint32_t vq_supported = cpu->sme_vq.supported; + uint32_t vq; + + if (vq_map == 0) { + if (!cpu_isar_feature(aa64_sme, cpu)) { + cpu->isar.id_aa64smfr0 = 0; + return; + } + + /* TODO: KVM will require limitations via SMCR_EL2. */ + vq_map = vq_supported & ~vq_init; + + if (vq_map == 0) { + vq = ctz32(vq_supported) + 1; + error_setg(errp, "cannot disable sme%d", vq * 128); + error_append_hint(errp, "All SME vector lengths are disabled.\n"); + error_append_hint(errp, "With SME enabled, at least one " + "vector length must be enabled.\n"); + return; + } + } else { + if (!cpu_isar_feature(aa64_sme, cpu)) { + vq = 32 - clz32(vq_map); + error_setg(errp, "cannot enable sme%d", vq * 128); + error_append_hint(errp, "SME must be enabled to enable " + "vector lengths.\n"); + error_append_hint(errp, "Add sme=on to the CPU property list.\n"); + return; + } + /* TODO: KVM will require limitations via SMCR_EL2. */ + } + + cpu->sme_vq.map = vq_map; +} + +static bool cpu_arm_get_sme(Object *obj, Error **errp) { ARMCPU *cpu = ARM_CPU(obj); + return cpu_isar_feature(aa64_sme, cpu); +} + +static void cpu_arm_set_sme(Object *obj, bool value, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint64_t t; + + t = cpu->isar.id_aa64pfr1; + t = FIELD_DP64(t, ID_AA64PFR1, SME, value); + cpu->isar.id_aa64pfr1 = t; +} + +static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + return cpu_isar_feature(aa64_sme, cpu) && + cpu_isar_feature(aa64_sme_fa64, cpu); +} + +static void cpu_arm_set_sme_fa64(Object *obj, bool value, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint64_t t; + + t = cpu->isar.id_aa64smfr0; + t = FIELD_DP64(t, ID_AA64SMFR0, FA64, value); + cpu->isar.id_aa64smfr0 = t; +} + +#ifdef CONFIG_USER_ONLY +/* Mirror linux /proc/sys/abi/{sve,sme}_default_vector_length. */ +static void cpu_arm_set_default_vec_len(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + uint32_t *ptr_default_vq = opaque; int32_t default_len, default_vq, remainder; if (!visit_type_int32(v, name, &default_len, errp)) { @@ -566,7 +382,7 @@ static void cpu_arm_set_sve_default_vec_len(Object *obj, Visitor *v, /* Undocumented, but the kernel allows -1 to indicate "maximum". */ if (default_len == -1) { - cpu->sve_default_vq = ARM_MAX_VQ; + *ptr_default_vq = ARM_MAX_VQ; return; } @@ -578,7 +394,11 @@ static void cpu_arm_set_sve_default_vec_len(Object *obj, Visitor *v, * and is the maximum architectural width of ZCR_ELx.LEN. */ if (remainder || default_vq < 1 || default_vq > 512) { - error_setg(errp, "cannot set sve-default-vector-length"); + ARMCPU *cpu = ARM_CPU(obj); + const char *which = + (ptr_default_vq == &cpu->sve_default_vq ? "sve" : "sme"); + + error_setg(errp, "cannot set %s-default-vector-length", which); if (remainder) { error_append_hint(errp, "Vector length not a multiple of 16\n"); } else if (default_vq < 1) { @@ -590,15 +410,15 @@ static void cpu_arm_set_sve_default_vec_len(Object *obj, Visitor *v, return; } - cpu->sve_default_vq = default_vq; + *ptr_default_vq = default_vq; } -static void cpu_arm_get_sve_default_vec_len(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) +static void cpu_arm_get_default_vec_len(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) { - ARMCPU *cpu = ARM_CPU(obj); - int32_t value = cpu->sve_default_vq * 16; + uint32_t *ptr_default_vq = opaque; + int32_t value = *ptr_default_vq * 16; visit_type_int32(v, name, &value, errp); } @@ -606,6 +426,7 @@ static void cpu_arm_get_sve_default_vec_len(Object *obj, Visitor *v, void aarch64_add_sve_properties(Object *obj) { + ARMCPU *cpu = ARM_CPU(obj); uint32_t vq; object_property_add_bool(obj, "sve", cpu_arm_get_sve, cpu_arm_set_sve); @@ -613,287 +434,320 @@ void aarch64_add_sve_properties(Object *obj) for (vq = 1; vq <= ARM_MAX_VQ; ++vq) { char name[8]; sprintf(name, "sve%d", vq * 128); - object_property_add(obj, name, "bool", cpu_arm_get_sve_vq, - cpu_arm_set_sve_vq, NULL, NULL); + object_property_add(obj, name, "bool", cpu_arm_get_vq, + cpu_arm_set_vq, NULL, &cpu->sve_vq); } #ifdef CONFIG_USER_ONLY /* Mirror linux /proc/sys/abi/sve_default_vector_length. */ object_property_add(obj, "sve-default-vector-length", "int32", - cpu_arm_get_sve_default_vec_len, - cpu_arm_set_sve_default_vec_len, NULL, NULL); + cpu_arm_get_default_vec_len, + cpu_arm_set_default_vec_len, NULL, + &cpu->sve_default_vq); +#endif +} + +void aarch64_add_sme_properties(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint32_t vq; + + object_property_add_bool(obj, "sme", cpu_arm_get_sme, cpu_arm_set_sme); + object_property_add_bool(obj, "sme_fa64", cpu_arm_get_sme_fa64, + cpu_arm_set_sme_fa64); + + for (vq = 1; vq <= ARM_MAX_VQ; vq <<= 1) { + char name[8]; + sprintf(name, "sme%d", vq * 128); + object_property_add(obj, name, "bool", cpu_arm_get_vq, + cpu_arm_set_vq, NULL, &cpu->sme_vq); + } + +#ifdef CONFIG_USER_ONLY + /* Mirror linux /proc/sys/abi/sme_default_vector_length. */ + object_property_add(obj, "sme-default-vector-length", "int32", + cpu_arm_get_default_vec_len, + cpu_arm_set_default_vec_len, NULL, + &cpu->sme_default_vq); #endif } void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) { - int arch_val = 0, impdef_val = 0; - uint64_t t; + ARMPauthFeature features = cpu_isar_feature(pauth_feature, cpu); + uint64_t isar1, isar2; - /* TODO: Handle HaveEnhancedPAC, HaveEnhancedPAC2, HaveFPAC. */ - if (cpu->prop_pauth) { - if (cpu->prop_pauth_impdef) { - impdef_val = 1; - } else { - arch_val = 1; + /* + * These properties enable or disable Pauth as a whole, or change + * the pauth algorithm, but do not change the set of features that + * are present. We have saved a copy of those features above and + * will now place it into the field that chooses the algorithm. + * + * Begin by disabling all fields. + */ + isar1 = cpu->isar.id_aa64isar1; + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, 0); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 0); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, 0); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 0); + + isar2 = cpu->isar.id_aa64isar2; + isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0); + isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0); + + if (kvm_enabled() || hvf_enabled()) { + /* + * Exit early if PAuth is enabled and fall through to disable it. + * The algorithm selection properties are not present. + */ + if (cpu->prop_pauth) { + if (features == 0) { + error_setg(errp, "'pauth' feature not supported by " + "%s on this host", current_accel_name()); + } + return; + } + } else { + /* Pauth properties are only present when the model supports it. */ + if (features == 0) { + assert(!cpu->prop_pauth); + return; + } + + if (cpu->prop_pauth) { + if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) { + error_setg(errp, + "cannot enable both pauth-impdef and pauth-qarma3"); + return; + } + + if (cpu->prop_pauth_impdef) { + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1); + } else if (cpu->prop_pauth_qarma3) { + isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features); + isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1); + } else { + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); + } + } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) { + error_setg(errp, "cannot enable pauth-impdef or " + "pauth-qarma3 without pauth"); + error_append_hint(errp, "Add pauth=on to the CPU property list.\n"); } - } else if (cpu->prop_pauth_impdef) { - error_setg(errp, "cannot enable pauth-impdef without pauth"); - error_append_hint(errp, "Add pauth=on to the CPU property list.\n"); } - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, APA, arch_val); - t = FIELD_DP64(t, ID_AA64ISAR1, GPA, arch_val); - t = FIELD_DP64(t, ID_AA64ISAR1, API, impdef_val); - t = FIELD_DP64(t, ID_AA64ISAR1, GPI, impdef_val); - cpu->isar.id_aa64isar1 = t; + cpu->isar.id_aa64isar1 = isar1; + cpu->isar.id_aa64isar2 = isar2; } static Property arm_cpu_pauth_property = DEFINE_PROP_BOOL("pauth", ARMCPU, prop_pauth, true); static Property arm_cpu_pauth_impdef_property = DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false); +static Property arm_cpu_pauth_qarma3_property = + DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false); -/* -cpu max: if KVM is enabled, like -cpu host (best possible with this host); - * otherwise, a CPU with as many features enabled as our emulation supports. - * The version of '-cpu max' for qemu-system-arm is defined in cpu.c; - * this only needs to handle 64 bits. - */ -static void aarch64_max_initfn(Object *obj) +void aarch64_add_pauth_properties(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); - } else { - uint64_t t; - uint32_t u; - aarch64_a57_initfn(obj); - - /* - * Reset MIDR so the guest doesn't mistake our 'max' CPU type for a real - * one and try to apply errata workarounds or use impdef features we - * don't provide. - * An IMPLEMENTER field of 0 means "reserved for software use"; - * ARCHITECTURE must be 0xf indicating "v7 or later, check ID registers - * to see which features are present"; - * the VARIANT, PARTNUM and REVISION fields are all implementation - * defined and we choose to define PARTNUM just in case guest - * code needs to distinguish this QEMU CPU from other software - * implementations, though this shouldn't be needed. - */ - t = FIELD_DP64(0, MIDR_EL1, IMPLEMENTER, 0); - t = FIELD_DP64(t, MIDR_EL1, ARCHITECTURE, 0xf); - t = FIELD_DP64(t, MIDR_EL1, PARTNUM, 'Q'); - t = FIELD_DP64(t, MIDR_EL1, VARIANT, 0); - t = FIELD_DP64(t, MIDR_EL1, REVISION, 0); - cpu->midr = t; - - t = cpu->isar.id_aa64isar0; - t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* AES + PMULL */ - t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* SHA512 */ - t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); - t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 1); - t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* v8.5-CondM */ - t = FIELD_DP64(t, ID_AA64ISAR0, TLB, 2); /* FEAT_TLBIRANGE */ - t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); - cpu->isar.id_aa64isar0 = t; - - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2); - t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); - t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 2); /* ARMv8.4-RCPC */ - t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); - cpu->isar.id_aa64isar1 = t; - - t = cpu->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); - t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); - t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); - t = FIELD_DP64(t, ID_AA64PFR0, SEL2, 1); - t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); - cpu->isar.id_aa64pfr0 = t; - - t = cpu->isar.id_aa64pfr1; - t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); - t = FIELD_DP64(t, ID_AA64PFR1, SSBS, 2); + /* Default to PAUTH on, with the architected algorithm on TCG. */ + qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_property); + if (kvm_enabled() || hvf_enabled()) { /* - * Begin with full support for MTE. This will be downgraded to MTE=0 - * during realize if the board provides no tag memory, much like - * we do for EL2 with the virtualization=on property. + * Mirror PAuth support from the probed sysregs back into the + * property for KVM or hvf. Is it just a bit backward? Yes it is! + * Note that prop_pauth is true whether the host CPU supports the + * architected QARMA5 algorithm or the IMPDEF one. We don't + * provide the separate pauth-impdef property for KVM or hvf, + * only for TCG. */ - t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); - cpu->isar.id_aa64pfr1 = t; - - t = cpu->isar.id_aa64mmfr0; - t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 5); /* PARange: 48 bits */ - cpu->isar.id_aa64mmfr0 = t; - - t = cpu->isar.id_aa64mmfr1; - t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* HPD */ - t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1); - t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); - t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */ - t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* VMID16 */ - t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* TTS2UXN */ - cpu->isar.id_aa64mmfr1 = t; - - t = cpu->isar.id_aa64mmfr2; - t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1); - t = FIELD_DP64(t, ID_AA64MMFR2, CNP, 1); /* TTCNP */ - t = FIELD_DP64(t, ID_AA64MMFR2, ST, 1); /* TTST */ - cpu->isar.id_aa64mmfr2 = t; - - t = cpu->isar.id_aa64zfr0; - t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1); - t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* PMULL */ - t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); - t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1); - t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); - t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); - t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); - t = FIELD_DP64(t, ID_AA64ZFR0, F32MM, 1); - t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 1); - cpu->isar.id_aa64zfr0 = t; - - /* Replicate the same data to the 32-bit id registers. */ - u = cpu->isar.id_isar5; - u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */ - u = FIELD_DP32(u, ID_ISAR5, SHA1, 1); - u = FIELD_DP32(u, ID_ISAR5, SHA2, 1); - u = FIELD_DP32(u, ID_ISAR5, CRC32, 1); - u = FIELD_DP32(u, ID_ISAR5, RDM, 1); - u = FIELD_DP32(u, ID_ISAR5, VCMA, 1); - cpu->isar.id_isar5 = u; - - u = cpu->isar.id_isar6; - u = FIELD_DP32(u, ID_ISAR6, JSCVT, 1); - u = FIELD_DP32(u, ID_ISAR6, DP, 1); - u = FIELD_DP32(u, ID_ISAR6, FHM, 1); - u = FIELD_DP32(u, ID_ISAR6, SB, 1); - u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); - u = FIELD_DP32(u, ID_ISAR6, BF16, 1); - u = FIELD_DP32(u, ID_ISAR6, I8MM, 1); - cpu->isar.id_isar6 = u; - - u = cpu->isar.id_pfr0; - u = FIELD_DP32(u, ID_PFR0, DIT, 1); - cpu->isar.id_pfr0 = u; - - u = cpu->isar.id_pfr2; - u = FIELD_DP32(u, ID_PFR2, SSBS, 1); - cpu->isar.id_pfr2 = u; - - u = cpu->isar.id_mmfr3; - u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */ - cpu->isar.id_mmfr3 = u; - - u = cpu->isar.id_mmfr4; - u = FIELD_DP32(u, ID_MMFR4, HPDS, 1); /* AA32HPD */ - u = FIELD_DP32(u, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ - u = FIELD_DP32(u, ID_MMFR4, CNP, 1); /* TTCNP */ - u = FIELD_DP32(u, ID_MMFR4, XNX, 1); /* TTS2UXN */ - cpu->isar.id_mmfr4 = u; - - t = cpu->isar.id_aa64dfr0; - t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5); /* v8.4-PMU */ - cpu->isar.id_aa64dfr0 = t; - - u = cpu->isar.id_dfr0; - u = FIELD_DP32(u, ID_DFR0, PERFMON, 5); /* v8.4-PMU */ - cpu->isar.id_dfr0 = u; - - u = cpu->isar.mvfr1; - u = FIELD_DP32(u, MVFR1, FPHP, 3); /* v8.2-FP16 */ - u = FIELD_DP32(u, MVFR1, SIMDHP, 2); /* v8.2-FP16 */ - cpu->isar.mvfr1 = u; - -#ifdef CONFIG_USER_ONLY - /* For usermode -cpu max we can use a larger and more efficient DCZ - * blocksize since we don't have to follow what the hardware does. - */ - cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */ - cpu->dcz_blocksize = 7; /* 512 bytes */ -#endif - - /* Default to PAUTH on, with the architected algorithm. */ - qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_property); + cpu->prop_pauth = cpu_isar_feature(aa64_pauth, cpu); + } else { qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property); + qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property); + } +} + +void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp) +{ + uint64_t t; - bitmap_fill(cpu->sve_vq_supported, ARM_MAX_VQ); + /* + * We only install the property for tcg -cpu max; this is the + * only situation in which the cpu field can be true. + */ + if (!cpu->prop_lpa2) { + return; } - aarch64_add_sve_properties(obj); - object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq, - cpu_max_set_sve_max_vq, NULL, NULL); + t = cpu->isar.id_aa64mmfr0; + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* 16k pages w/ LPA2 */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1); /* 4k pages w/ LPA2 */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 3); /* 16k stage2 w/ LPA2 */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 3); /* 4k stage2 w/ LPA2 */ + cpu->isar.id_aa64mmfr0 = t; } -static void aarch64_a64fx_initfn(Object *obj) +static void aarch64_a57_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); - cpu->dtb_compatible = "arm,a64fx"; + cpu->dtb_compatible = "arm,cortex-a57"; set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); - cpu->midr = 0x461f0010; + cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57; + cpu->midr = 0x411fd070; cpu->revidr = 0x00000000; - cpu->ctr = 0x86668006; - cpu->reset_sctlr = 0x30000180; - cpu->isar.id_aa64pfr0 = 0x0000000101111111; /* No RAS Extensions */ - cpu->isar.id_aa64pfr1 = 0x0000000000000000; - cpu->isar.id_aa64dfr0 = 0x0000000010305408; - cpu->isar.id_aa64dfr1 = 0x0000000000000000; - cpu->id_aa64afr0 = 0x0000000000000000; - cpu->id_aa64afr1 = 0x0000000000000000; - cpu->isar.id_aa64mmfr0 = 0x0000000000001122; - cpu->isar.id_aa64mmfr1 = 0x0000000011212100; - cpu->isar.id_aa64mmfr2 = 0x0000000000001011; - cpu->isar.id_aa64isar0 = 0x0000000010211120; - cpu->isar.id_aa64isar1 = 0x0000000000010001; - cpu->isar.id_aa64zfr0 = 0x0000000000000000; - cpu->clidr = 0x0000000080000023; - cpu->ccsidr[0] = 0x7007e01c; /* 64KB L1 dcache */ - cpu->ccsidr[1] = 0x2007e01c; /* 64KB L1 icache */ - cpu->ccsidr[2] = 0x70ffe07c; /* 8MB L2 cache */ - cpu->dcz_blocksize = 6; /* 256 bytes */ + cpu->reset_fpsid = 0x41034070; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50838; + cpu->isar.id_pfr0 = 0x00000131; + cpu->isar.id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x10101105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; + cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001124; + cpu->isar.dbgdidr = 0x3516d000; + cpu->isar.dbgdevid = 0x01110f13; + cpu->isar.dbgdevid1 = 0x2; + cpu->isar.reset_pmcr_el0 = 0x41013000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ + cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ + cpu->dcz_blocksize = 4; /* 64 bytes */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + define_cortex_a72_a57_a53_cp_reginfo(cpu); +} + +static void aarch64_a53_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a53"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A53; + cpu->midr = 0x410fd034; + cpu->revidr = 0x00000100; + cpu->reset_fpsid = 0x41034070; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + cpu->ctr = 0x84448004; /* L1Ip = VIPT */ + cpu->reset_sctlr = 0x00c50838; + cpu->isar.id_pfr0 = 0x00000131; + cpu->isar.id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x10101105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; + cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ + cpu->isar.dbgdidr = 0x3516d000; + cpu->isar.dbgdevid = 0x00110f13; + cpu->isar.dbgdevid1 = 0x1; + cpu->isar.reset_pmcr_el0 = 0x41033000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ + cpu->ccsidr[2] = 0x707fe07a; /* 1024KB L2 cache */ + cpu->dcz_blocksize = 4; /* 64 bytes */ cpu->gic_num_lrs = 4; cpu->gic_vpribits = 5; cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + define_cortex_a72_a57_a53_cp_reginfo(cpu); +} - /* Suppport of A64FX's vector length are 128,256 and 512bit only */ - aarch64_add_sve_properties(obj); - bitmap_zero(cpu->sve_vq_supported, ARM_MAX_VQ); - set_bit(0, cpu->sve_vq_supported); /* 128bit */ - set_bit(1, cpu->sve_vq_supported); /* 256bit */ - set_bit(3, cpu->sve_vq_supported); /* 512bit */ +static void aarch64_host_initfn(Object *obj) +{ +#if defined(CONFIG_KVM) + ARMCPU *cpu = ARM_CPU(obj); + kvm_arm_set_cpu_features_from_host(cpu); + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + aarch64_add_sve_properties(obj); + aarch64_add_pauth_properties(obj); + } +#elif defined(CONFIG_HVF) + ARMCPU *cpu = ARM_CPU(obj); + hvf_arm_set_cpu_features_from_host(cpu); + aarch64_add_pauth_properties(obj); +#else + g_assert_not_reached(); +#endif +} + +static void aarch64_max_initfn(Object *obj) +{ + if (kvm_enabled() || hvf_enabled()) { + /* With KVM or HVF, '-cpu max' is identical to '-cpu host' */ + aarch64_host_initfn(obj); + return; + } + + if (tcg_enabled() || qtest_enabled()) { + aarch64_a57_initfn(obj); + } - /* TODO: Add A64FX specific HPC extension registers */ + /* '-cpu max' for TCG: we currently do this as "A57 with extra things" */ + if (tcg_enabled()) { + aarch64_max_tcg_initfn(obj); + } } static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, - { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, { .name = "max", .initfn = aarch64_max_initfn }, +#if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +#endif }; static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp) @@ -928,9 +782,9 @@ static void aarch64_cpu_finalizefn(Object *obj) { } -static gchar *aarch64_gdb_arch_name(CPUState *cs) +static const gchar *aarch64_gdb_arch_name(CPUState *cs) { - return g_strdup("aarch64"); + return "aarch64"; } static void aarch64_cpu_class_init(ObjectClass *oc, void *data) @@ -939,7 +793,6 @@ static void aarch64_cpu_class_init(ObjectClass *oc, void *data) cc->gdb_read_register = aarch64_cpu_gdb_read_register; cc->gdb_write_register = aarch64_cpu_gdb_write_register; - cc->gdb_num_core_regs = 34; cc->gdb_core_xml_file = "aarch64-core.xml"; cc->gdb_arch_name = aarch64_gdb_arch_name; @@ -969,9 +822,7 @@ void aarch64_cpu_register(const ARMCPUInfo *info) { TypeInfo type_info = { .parent = TYPE_AARCH64_CPU, - .instance_size = sizeof(ARMCPU), .instance_init = aarch64_cpu_instance_init, - .class_size = sizeof(ARMCPUClass), .class_init = info->class_init ?: cpu_register_class_init, .class_data = (void *)info, }; @@ -984,10 +835,8 @@ void aarch64_cpu_register(const ARMCPUInfo *info) static const TypeInfo aarch64_cpu_type_info = { .name = TYPE_AARCH64_CPU, .parent = TYPE_ARM_CPU, - .instance_size = sizeof(ARMCPU), .instance_finalize = aarch64_cpu_finalizefn, .abstract = true, - .class_size = sizeof(AArch64CPUClass), .class_init = aarch64_cpu_class_init, }; diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index 2983e36dd3..7d856acddf 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -6,10 +6,168 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "cpu.h" #include "internals.h" +#include "cpu-features.h" +#include "cpregs.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "sysemu/tcg.h" + +#ifdef CONFIG_TCG +/* Return the Exception Level targeted by debug exceptions. */ +static int arm_debug_target_el(CPUARMState *env) +{ + bool secure = arm_is_secure(env); + bool route_to_el2 = false; + + if (arm_feature(env, ARM_FEATURE_M)) { + return 1; + } + + if (arm_is_el2_enabled(env)) { + route_to_el2 = env->cp15.hcr_el2 & HCR_TGE || + env->cp15.mdcr_el2 & MDCR_TDE; + } + + if (route_to_el2) { + return 2; + } else if (arm_feature(env, ARM_FEATURE_EL3) && + !arm_el_is_aa64(env, 3) && secure) { + return 3; + } else { + return 1; + } +} + +/* + * Raise an exception to the debug target el. + * Modify syndrome to indicate when origin and target EL are the same. + */ +G_NORETURN static void +raise_exception_debug(CPUARMState *env, uint32_t excp, uint32_t syndrome) +{ + int debug_el = arm_debug_target_el(env); + int cur_el = arm_current_el(env); + + /* + * If singlestep is targeting a lower EL than the current one, then + * DisasContext.ss_active must be false and we can never get here. + * Similarly for watchpoint and breakpoint matches. + */ + assert(debug_el >= cur_el); + syndrome |= (debug_el == cur_el) << ARM_EL_EC_SHIFT; + raise_exception(env, excp, syndrome, debug_el); +} + +/* See AArch64.GenerateDebugExceptionsFrom() in ARM ARM pseudocode */ +static bool aa64_generate_debug_exceptions(CPUARMState *env) +{ + int cur_el = arm_current_el(env); + int debug_el; + + if (cur_el == 3) { + return false; + } + + /* MDCR_EL3.SDD disables debug events from Secure state */ + if (arm_is_secure_below_el3(env) + && extract32(env->cp15.mdcr_el3, 16, 1)) { + return false; + } + + /* + * Same EL to same EL debug exceptions need MDSCR_KDE enabled + * while not masking the (D)ebug bit in DAIF. + */ + debug_el = arm_debug_target_el(env); + + if (cur_el == debug_el) { + return extract32(env->cp15.mdscr_el1, 13, 1) + && !(env->daif & PSTATE_D); + } + + /* Otherwise the debug target needs to be a higher EL */ + return debug_el > cur_el; +} + +static bool aa32_generate_debug_exceptions(CPUARMState *env) +{ + int el = arm_current_el(env); + + if (el == 0 && arm_el_is_aa64(env, 1)) { + return aa64_generate_debug_exceptions(env); + } + + if (arm_is_secure(env)) { + int spd; + + if (el == 0 && (env->cp15.sder & 1)) { + /* + * SDER.SUIDEN means debug exceptions from Secure EL0 + * are always enabled. Otherwise they are controlled by + * SDCR.SPD like those from other Secure ELs. + */ + return true; + } + + spd = extract32(env->cp15.mdcr_el3, 14, 2); + switch (spd) { + case 1: + /* SPD == 0b01 is reserved, but behaves as 0b00. */ + case 0: + /* + * For 0b00 we return true if external secure invasive debug + * is enabled. On real hardware this is controlled by external + * signals to the core. QEMU always permits debug, and behaves + * as if DBGEN, SPIDEN, NIDEN and SPNIDEN are all tied high. + */ + return true; + case 2: + return false; + case 3: + return true; + } + } + + return el != 2; +} + +/* + * Return true if debugging exceptions are currently enabled. + * This corresponds to what in ARM ARM pseudocode would be + * if UsingAArch32() then + * return AArch32.GenerateDebugExceptions() + * else + * return AArch64.GenerateDebugExceptions() + * We choose to push the if() down into this function for clarity, + * since the pseudocode has it at all callsites except for the one in + * CheckSoftwareStep(), where it is elided because both branches would + * always return the same value. + */ +bool arm_generate_debug_exceptions(CPUARMState *env) +{ + if ((env->cp15.oslsr_el1 & 1) || (env->cp15.osdlr_el1 & 1)) { + return false; + } + if (is_a64(env)) { + return aa64_generate_debug_exceptions(env); + } else { + return aa32_generate_debug_exceptions(env); + } +} + +/* + * Is single-stepping active? (Note that the "is EL_D AArch64?" check + * implicitly means this always returns false in pre-v8 CPUs.) + */ +bool arm_singlestep_active(CPUARMState *env) +{ + return extract32(env->cp15.mdscr_el1, 0, 1) + && arm_el_is_aa64(env, arm_debug_target_el(env)) + && arm_generate_debug_exceptions(env); +} /* Return true if the linked breakpoint entry lbn passes its checks */ static bool linked_bp_matches(ARMCPU *cpu, int lbn) @@ -143,9 +301,9 @@ static bool bp_wp_matches(ARMCPU *cpu, int n, bool is_wp) * Non-Secure to simplify the code slightly compared to the full * table in the ARM ARM. */ - pac = extract64(cr, 1, 2); - hmc = extract64(cr, 13, 1); - ssc = extract64(cr, 14, 2); + pac = FIELD_EX64(cr, DBGWCR, PAC); + hmc = FIELD_EX64(cr, DBGWCR, HMC); + ssc = FIELD_EX64(cr, DBGWCR, SSC); switch (ssc) { case 0: @@ -184,8 +342,8 @@ static bool bp_wp_matches(ARMCPU *cpu, int n, bool is_wp) g_assert_not_reached(); } - wt = extract64(cr, 20, 1); - lbn = extract64(cr, 16, 4); + wt = FIELD_EX64(cr, DBGWCR, WT); + lbn = FIELD_EX64(cr, DBGWCR, LBN); if (wt && !linked_bp_matches(cpu, lbn)) { return false; @@ -220,6 +378,7 @@ bool arm_debug_check_breakpoint(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; + target_ulong pc; int n; /* @@ -231,6 +390,28 @@ bool arm_debug_check_breakpoint(CPUState *cs) return false; } + /* + * Single-step exceptions have priority over breakpoint exceptions. + * If single-step state is active-pending, suppress the bp. + */ + if (arm_singlestep_active(env) && !(env->pstate & PSTATE_SS)) { + return false; + } + + /* + * PC alignment faults have priority over breakpoint exceptions. + */ + pc = is_a64(env) ? env->pc : env->regs[15]; + if ((is_a64(env) || !env->thumb) && (pc & 3) != 0) { + return false; + } + + /* + * Instruction aborts have priority over breakpoint exceptions. + * TODO: We would need to look up the page for PC and verify that + * it is present and executable. + */ + for (n = 0; n < ARRAY_SIZE(env->cpu_breakpoint); n++) { if (bp_wp_matches(cpu, n, false)) { return true; @@ -250,6 +431,37 @@ bool arm_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) return check_watchpoints(cpu); } +/* + * Return the FSR value for a debug exception (watchpoint, hardware + * breakpoint or BKPT insn) targeting the specified exception level. + */ +static uint32_t arm_debug_exception_fsr(CPUARMState *env) +{ + ARMMMUFaultInfo fi = { .type = ARMFault_Debug }; + int target_el = arm_debug_target_el(env); + bool using_lpae; + + if (arm_feature(env, ARM_FEATURE_M)) { + using_lpae = false; + } else if (target_el == 2 || arm_el_is_aa64(env, target_el)) { + using_lpae = true; + } else if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + using_lpae = true; + } else if (arm_feature(env, ARM_FEATURE_LPAE) && + (env->cp15.tcr_el[target_el] & TTBCR_EAE)) { + using_lpae = true; + } else { + using_lpae = false; + } + + if (using_lpae) { + return arm_fi_to_lfsc(&fi); + } else { + return arm_fi_to_sfsc(&fi); + } +} + void arm_debug_excp_handler(CPUState *cs) { /* @@ -263,19 +475,16 @@ void arm_debug_excp_handler(CPUState *cs) if (wp_hit) { if (wp_hit->flags & BP_CPU) { bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0; - bool same_el = arm_debug_target_el(env) == arm_current_el(env); cs->watchpoint_hit = NULL; env->exception.fsr = arm_debug_exception_fsr(env); env->exception.vaddress = wp_hit->hitaddr; - raise_exception(env, EXCP_DATA_ABORT, - syn_watchpoint(same_el, 0, wnr), - arm_debug_target_el(env)); + raise_exception_debug(env, EXCP_DATA_ABORT, + syn_watchpoint(0, 0, wnr)); } } else { uint64_t pc = is_a64(env) ? env->pc : env->regs[15]; - bool same_el = (arm_debug_target_el(env) == arm_current_el(env)); /* * (1) GDB breakpoints should be handled first. @@ -295,9 +504,252 @@ void arm_debug_excp_handler(CPUState *cs) * exception/security level. */ env->exception.vaddress = 0; - raise_exception(env, EXCP_PREFETCH_ABORT, - syn_breakpoint(same_el), - arm_debug_target_el(env)); + raise_exception_debug(env, EXCP_PREFETCH_ABORT, syn_breakpoint(0)); + } +} + +/* + * Raise an EXCP_BKPT with the specified syndrome register value, + * targeting the correct exception level for debug exceptions. + */ +void HELPER(exception_bkpt_insn)(CPUARMState *env, uint32_t syndrome) +{ + int debug_el = arm_debug_target_el(env); + int cur_el = arm_current_el(env); + + /* FSR will only be used if the debug target EL is AArch32. */ + env->exception.fsr = arm_debug_exception_fsr(env); + /* + * FAR is UNKNOWN: clear vaddress to avoid potentially exposing + * values to the guest that it shouldn't be able to see at its + * exception/security level. + */ + env->exception.vaddress = 0; + /* + * Other kinds of architectural debug exception are ignored if + * they target an exception level below the current one (in QEMU + * this is checked by arm_generate_debug_exceptions()). Breakpoint + * instructions are special because they always generate an exception + * to somewhere: if they can't go to the configured debug exception + * level they are taken to the current exception level. + */ + if (debug_el < cur_el) { + debug_el = cur_el; + } + raise_exception(env, EXCP_BKPT, syndrome, debug_el); +} + +void HELPER(exception_swstep)(CPUARMState *env, uint32_t syndrome) +{ + raise_exception_debug(env, EXCP_UDEF, syndrome); +} + +void hw_watchpoint_update(ARMCPU *cpu, int n) +{ + CPUARMState *env = &cpu->env; + vaddr len = 0; + vaddr wvr = env->cp15.dbgwvr[n]; + uint64_t wcr = env->cp15.dbgwcr[n]; + int mask; + int flags = BP_CPU | BP_STOP_BEFORE_ACCESS; + + if (env->cpu_watchpoint[n]) { + cpu_watchpoint_remove_by_ref(CPU(cpu), env->cpu_watchpoint[n]); + env->cpu_watchpoint[n] = NULL; + } + + if (!FIELD_EX64(wcr, DBGWCR, E)) { + /* E bit clear : watchpoint disabled */ + return; + } + + switch (FIELD_EX64(wcr, DBGWCR, LSC)) { + case 0: + /* LSC 00 is reserved and must behave as if the wp is disabled */ + return; + case 1: + flags |= BP_MEM_READ; + break; + case 2: + flags |= BP_MEM_WRITE; + break; + case 3: + flags |= BP_MEM_ACCESS; + break; + } + + /* + * Attempts to use both MASK and BAS fields simultaneously are + * CONSTRAINED UNPREDICTABLE; we opt to ignore BAS in this case, + * thus generating a watchpoint for every byte in the masked region. + */ + mask = FIELD_EX64(wcr, DBGWCR, MASK); + if (mask == 1 || mask == 2) { + /* + * Reserved values of MASK; we must act as if the mask value was + * some non-reserved value, or as if the watchpoint were disabled. + * We choose the latter. + */ + return; + } else if (mask) { + /* Watchpoint covers an aligned area up to 2GB in size */ + len = 1ULL << mask; + /* + * If masked bits in WVR are not zero it's CONSTRAINED UNPREDICTABLE + * whether the watchpoint fires when the unmasked bits match; we opt + * to generate the exceptions. + */ + wvr &= ~(len - 1); + } else { + /* Watchpoint covers bytes defined by the byte address select bits */ + int bas = FIELD_EX64(wcr, DBGWCR, BAS); + int basstart; + + if (extract64(wvr, 2, 1)) { + /* + * Deprecated case of an only 4-aligned address. BAS[7:4] are + * ignored, and BAS[3:0] define which bytes to watch. + */ + bas &= 0xf; + } + + if (bas == 0) { + /* This must act as if the watchpoint is disabled */ + return; + } + + /* + * The BAS bits are supposed to be programmed to indicate a contiguous + * range of bytes. Otherwise it is CONSTRAINED UNPREDICTABLE whether + * we fire for each byte in the word/doubleword addressed by the WVR. + * We choose to ignore any non-zero bits after the first range of 1s. + */ + basstart = ctz32(bas); + len = cto32(bas >> basstart); + wvr += basstart; + } + + cpu_watchpoint_insert(CPU(cpu), wvr, len, flags, + &env->cpu_watchpoint[n]); +} + +void hw_watchpoint_update_all(ARMCPU *cpu) +{ + int i; + CPUARMState *env = &cpu->env; + + /* + * Completely clear out existing QEMU watchpoints and our array, to + * avoid possible stale entries following migration load. + */ + cpu_watchpoint_remove_all(CPU(cpu), BP_CPU); + memset(env->cpu_watchpoint, 0, sizeof(env->cpu_watchpoint)); + + for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_watchpoint); i++) { + hw_watchpoint_update(cpu, i); + } +} + +void hw_breakpoint_update(ARMCPU *cpu, int n) +{ + CPUARMState *env = &cpu->env; + uint64_t bvr = env->cp15.dbgbvr[n]; + uint64_t bcr = env->cp15.dbgbcr[n]; + vaddr addr; + int bt; + int flags = BP_CPU; + + if (env->cpu_breakpoint[n]) { + cpu_breakpoint_remove_by_ref(CPU(cpu), env->cpu_breakpoint[n]); + env->cpu_breakpoint[n] = NULL; + } + + if (!extract64(bcr, 0, 1)) { + /* E bit clear : watchpoint disabled */ + return; + } + + bt = extract64(bcr, 20, 4); + + switch (bt) { + case 4: /* unlinked address mismatch (reserved if AArch64) */ + case 5: /* linked address mismatch (reserved if AArch64) */ + qemu_log_mask(LOG_UNIMP, + "arm: address mismatch breakpoint types not implemented\n"); + return; + case 0: /* unlinked address match */ + case 1: /* linked address match */ + { + /* + * Bits [1:0] are RES0. + * + * It is IMPLEMENTATION DEFINED whether bits [63:49] + * ([63:53] for FEAT_LVA) are hardwired to a copy of the sign bit + * of the VA field ([48] or [52] for FEAT_LVA), or whether the + * value is read as written. It is CONSTRAINED UNPREDICTABLE + * whether the RESS bits are ignored when comparing an address. + * Therefore we are allowed to compare the entire register, which + * lets us avoid considering whether FEAT_LVA is actually enabled. + * + * The BAS field is used to allow setting breakpoints on 16-bit + * wide instructions; it is CONSTRAINED UNPREDICTABLE whether + * a bp will fire if the addresses covered by the bp and the addresses + * covered by the insn overlap but the insn doesn't start at the + * start of the bp address range. We choose to require the insn and + * the bp to have the same address. The constraints on writing to + * BAS enforced in dbgbcr_write mean we have only four cases: + * 0b0000 => no breakpoint + * 0b0011 => breakpoint on addr + * 0b1100 => breakpoint on addr + 2 + * 0b1111 => breakpoint on addr + * See also figure D2-3 in the v8 ARM ARM (DDI0487A.c). + */ + int bas = extract64(bcr, 5, 4); + addr = bvr & ~3ULL; + if (bas == 0) { + return; + } + if (bas == 0xc) { + addr += 2; + } + break; + } + case 2: /* unlinked context ID match */ + case 8: /* unlinked VMID match (reserved if no EL2) */ + case 10: /* unlinked context ID and VMID match (reserved if no EL2) */ + qemu_log_mask(LOG_UNIMP, + "arm: unlinked context breakpoint types not implemented\n"); + return; + case 9: /* linked VMID match (reserved if no EL2) */ + case 11: /* linked context ID and VMID match (reserved if no EL2) */ + case 3: /* linked context ID match */ + default: + /* + * We must generate no events for Linked context matches (unless + * they are linked to by some other bp/wp, which is handled in + * updates for the linking bp/wp). We choose to also generate no events + * for reserved values. + */ + return; + } + + cpu_breakpoint_insert(CPU(cpu), addr, flags, &env->cpu_breakpoint[n]); +} + +void hw_breakpoint_update_all(ARMCPU *cpu) +{ + int i; + CPUARMState *env = &cpu->env; + + /* + * Completely clear out existing QEMU breakpoints and our array, to + * avoid possible stale entries following migration load. + */ + cpu_breakpoint_remove_all(CPU(cpu), BP_CPU); + memset(env->cpu_breakpoint, 0, sizeof(env->cpu_breakpoint)); + + for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_breakpoint); i++) { + hw_breakpoint_update(cpu, i); } } @@ -326,4 +778,503 @@ vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len) return addr; } -#endif +#endif /* !CONFIG_USER_ONLY */ +#endif /* CONFIG_TCG */ + +/* + * Check for traps to "powerdown debug" registers, which are controlled + * by MDCR.TDOSA + */ +static CPAccessResult access_tdosa(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + bool mdcr_el2_tdosa = (mdcr_el2 & MDCR_TDOSA) || (mdcr_el2 & MDCR_TDE) || + (arm_hcr_el2_eff(env) & HCR_TGE); + + if (el < 2 && mdcr_el2_tdosa) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDOSA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +/* + * Check for traps to "debug ROM" registers, which are controlled + * by MDCR_EL2.TDRA for EL2 but by the more general MDCR_EL3.TDA for EL3. + */ +static CPAccessResult access_tdra(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + bool mdcr_el2_tdra = (mdcr_el2 & MDCR_TDRA) || (mdcr_el2 & MDCR_TDE) || + (arm_hcr_el2_eff(env) & HCR_TGE); + + if (el < 2 && mdcr_el2_tdra) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +/* + * Check for traps to general debug registers, which are controlled + * by MDCR_EL2.TDA for EL2 and MDCR_EL3.TDA for EL3. + */ +static CPAccessResult access_tda(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + bool mdcr_el2_tda = (mdcr_el2 & MDCR_TDA) || (mdcr_el2 & MDCR_TDE) || + (arm_hcr_el2_eff(env) & HCR_TGE); + + if (el < 2 && mdcr_el2_tda) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_dbgvcr32(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* MCDR_EL3.TDMA doesn't apply for FEAT_NV traps */ + if (arm_current_el(env) == 2 && (env->cp15.mdcr_el3 & MDCR_TDA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +/* + * Check for traps to Debug Comms Channel registers. If FEAT_FGT + * is implemented then these are controlled by MDCR_EL2.TDCC for + * EL2 and MDCR_EL3.TDCC for EL3. They are also controlled by + * the general debug access trap bits MDCR_EL2.TDA and MDCR_EL3.TDA. + * For EL0, they are also controlled by MDSCR_EL1.TDCC. + */ +static CPAccessResult access_tdcc(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); + bool mdscr_el1_tdcc = extract32(env->cp15.mdscr_el1, 12, 1); + bool mdcr_el2_tda = (mdcr_el2 & MDCR_TDA) || (mdcr_el2 & MDCR_TDE) || + (arm_hcr_el2_eff(env) & HCR_TGE); + bool mdcr_el2_tdcc = cpu_isar_feature(aa64_fgt, env_archcpu(env)) && + (mdcr_el2 & MDCR_TDCC); + bool mdcr_el3_tdcc = cpu_isar_feature(aa64_fgt, env_archcpu(env)) && + (env->cp15.mdcr_el3 & MDCR_TDCC); + + if (el < 1 && mdscr_el1_tdcc) { + return CP_ACCESS_TRAP; + } + if (el < 2 && (mdcr_el2_tda || mdcr_el2_tdcc)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && ((env->cp15.mdcr_el3 & MDCR_TDA) || mdcr_el3_tdcc)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static void oslar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Writes to OSLAR_EL1 may update the OS lock status, which can be + * read via a bit in OSLSR_EL1. + */ + int oslock; + + if (ri->state == ARM_CP_STATE_AA32) { + oslock = (value == 0xC5ACCE55); + } else { + oslock = value & 1; + } + + env->cp15.oslsr_el1 = deposit32(env->cp15.oslsr_el1, 1, 1, oslock); +} + +static void osdlr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + /* + * Only defined bit is bit 0 (DLK); if Feat_DoubleLock is not + * implemented this is RAZ/WI. + */ + if(arm_feature(env, ARM_FEATURE_AARCH64) + ? cpu_isar_feature(aa64_doublelock, cpu) + : cpu_isar_feature(aa32_doublelock, cpu)) { + env->cp15.osdlr_el1 = value & 1; + } +} + +static void dbgclaimset_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.dbgclaim |= (value & 0xFF); +} + +static uint64_t dbgclaimset_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* CLAIM bits are RAO */ + return 0xFF; +} + +static void dbgclaimclr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + env->cp15.dbgclaim &= ~(value & 0xFF); +} + +static const ARMCPRegInfo debug_cp_reginfo[] = { + /* + * DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped + * debug components. The AArch64 version of DBGDRAR is named MDRAR_EL1; + * unlike DBGDRAR it is never accessible from EL0. + * DBGDSAR is deprecated and must RAZ from v8 anyway, so it has no AArch64 + * accessor. + */ + { .name = "DBGDRAR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL0_R, .accessfn = access_tdra, + .type = ARM_CP_CONST | ARM_CP_NO_GDB, .resetvalue = 0 }, + { .name = "MDRAR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0, + .access = PL1_R, .accessfn = access_tdra, + .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL0_R, .accessfn = access_tdra, + .type = ARM_CP_CONST | ARM_CP_NO_GDB, .resetvalue = 0 }, + /* Monitor debug system control register; the 32-bit alias is DBGDSCRext. */ + { .name = "MDSCR_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, + .access = PL1_RW, .accessfn = access_tda, + .fgt = FGT_MDSCR_EL1, + .nv2_redirect_offset = 0x158, + .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), + .resetvalue = 0 }, + /* + * MDCCSR_EL0[30:29] map to EDSCR[30:29]. Simply RAZ as the external + * Debug Communication Channel is not implemented. + */ + { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 1, .opc2 = 0, + .access = PL0_R, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* + * These registers belong to the Debug Communications Channel, + * which is not implemented. However we implement RAZ/WI behaviour + * with trapping to prevent spurious SIGILLs if the guest OS does + * access them as the support cannot be probed for. + */ + { .name = "OSDTRRX_EL1", .state = ARM_CP_STATE_BOTH, .cp = 14, + .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 2, + .access = PL1_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "OSDTRTX_EL1", .state = ARM_CP_STATE_BOTH, .cp = 14, + .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, + .access = PL1_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* DBGDTRTX_EL0/DBGDTRRX_EL0 depend on direction */ + { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_BOTH, .cp = 14, + .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* + * OSECCR_EL1 provides a mechanism for an operating system + * to access the contents of EDECCR. EDECCR is not implemented though, + * as is the rest of external device mechanism. + */ + { .name = "OSECCR_EL1", .state = ARM_CP_STATE_BOTH, .cp = 14, + .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, + .access = PL1_RW, .accessfn = access_tda, + .fgt = FGT_OSECCR_EL1, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* + * DBGDSCRint[15,12,5:2] map to MDSCR_EL1[15,12,5:2]. Map all bits as + * it is unlikely a guest will care. + * We don't implement the configurable EL0 access. + */ + { .name = "DBGDSCRint", .state = ARM_CP_STATE_AA32, + .cp = 14, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0, + .type = ARM_CP_ALIAS, + .access = PL1_R, .accessfn = access_tda, + .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), }, + { .name = "OSLAR_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4, + .access = PL1_W, .type = ARM_CP_NO_RAW, + .accessfn = access_tdosa, + .fgt = FGT_OSLAR_EL1, + .writefn = oslar_write }, + { .name = "OSLSR_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 4, + .access = PL1_R, .resetvalue = 10, + .accessfn = access_tdosa, + .fgt = FGT_OSLSR_EL1, + .fieldoffset = offsetof(CPUARMState, cp15.oslsr_el1) }, + /* Dummy OSDLR_EL1: 32-bit Linux will read this */ + { .name = "OSDLR_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 3, .opc2 = 4, + .access = PL1_RW, .accessfn = access_tdosa, + .fgt = FGT_OSDLR_EL1, + .writefn = osdlr_write, + .fieldoffset = offsetof(CPUARMState, cp15.osdlr_el1) }, + /* + * Dummy DBGVCR: Linux wants to clear this on startup, but we don't + * implement vector catch debug events yet. + */ + { .name = "DBGVCR", + .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, + .access = PL1_RW, .accessfn = access_tda, + .type = ARM_CP_NOP }, + /* + * Dummy MDCCINT_EL1, since we don't implement the Debug Communications + * Channel but Linux may try to access this register. The 32-bit + * alias is DBGDCCINT. + */ + { .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, + .access = PL1_RW, .accessfn = access_tdcc, + .type = ARM_CP_NOP }, + /* + * Dummy DBGCLAIM registers. + * "The architecture does not define any functionality for the CLAIM tag bits.", + * so we only keep the raw bits + */ + { .name = "DBGCLAIMSET_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 6, + .type = ARM_CP_ALIAS, + .access = PL1_RW, .accessfn = access_tda, + .fgt = FGT_DBGCLAIM, + .writefn = dbgclaimset_write, .readfn = dbgclaimset_read }, + { .name = "DBGCLAIMCLR_EL1", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 6, + .access = PL1_RW, .accessfn = access_tda, + .fgt = FGT_DBGCLAIM, + .writefn = dbgclaimclr_write, .raw_writefn = raw_write, + .fieldoffset = offsetof(CPUARMState, cp15.dbgclaim) }, +}; + +/* These are present only when EL1 supports AArch32 */ +static const ARMCPRegInfo debug_aa32_el1_reginfo[] = { + /* + * Dummy DBGVCR32_EL2 (which is only for a 64-bit hypervisor + * to save and restore a 32-bit guest's DBGVCR) + */ + { .name = "DBGVCR32_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 4, .crn = 0, .crm = 7, .opc2 = 0, + .access = PL2_RW, .accessfn = access_dbgvcr32, + .type = ARM_CP_NOP | ARM_CP_EL3_NO_EL2_KEEP }, +}; + +static const ARMCPRegInfo debug_lpae_cp_reginfo[] = { + /* 64 bit access versions of the (dummy) debug registers */ + { .name = "DBGDRAR", .cp = 14, .crm = 1, .opc1 = 0, + .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_64BIT | ARM_CP_NO_GDB, + .resetvalue = 0 }, + { .name = "DBGDSAR", .cp = 14, .crm = 2, .opc1 = 0, + .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_64BIT | ARM_CP_NO_GDB, + .resetvalue = 0 }, +}; + +static void dbgwvr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + int i = ri->crm; + + /* + * Bits [1:0] are RES0. + * + * It is IMPLEMENTATION DEFINED whether [63:49] ([63:53] with FEAT_LVA) + * are hardwired to the value of bit [48] ([52] with FEAT_LVA), or if + * they contain the value written. It is CONSTRAINED UNPREDICTABLE + * whether the RESS bits are ignored when comparing an address. + * + * Therefore we are allowed to compare the entire register, which lets + * us avoid considering whether or not FEAT_LVA is actually enabled. + */ + value &= ~3ULL; + + raw_write(env, ri, value); + if (tcg_enabled()) { + hw_watchpoint_update(cpu, i); + } +} + +static void dbgwcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + int i = ri->crm; + + raw_write(env, ri, value); + if (tcg_enabled()) { + hw_watchpoint_update(cpu, i); + } +} + +static void dbgbvr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + int i = ri->crm; + + raw_write(env, ri, value); + if (tcg_enabled()) { + hw_breakpoint_update(cpu, i); + } +} + +static void dbgbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + int i = ri->crm; + + /* + * BAS[3] is a read-only copy of BAS[2], and BAS[1] a read-only + * copy of BAS[0]. + */ + value = deposit64(value, 6, 1, extract64(value, 5, 1)); + value = deposit64(value, 8, 1, extract64(value, 7, 1)); + + raw_write(env, ri, value); + if (tcg_enabled()) { + hw_breakpoint_update(cpu, i); + } +} + +void define_debug_regs(ARMCPU *cpu) +{ + /* + * Define v7 and v8 architectural debug registers. + * These are just dummy implementations for now. + */ + int i; + int wrps, brps, ctx_cmps; + + /* + * The Arm ARM says DBGDIDR is optional and deprecated if EL1 cannot + * use AArch32. Given that bit 15 is RES1, if the value is 0 then + * the register must not exist for this cpu. + */ + if (cpu->isar.dbgdidr != 0) { + ARMCPRegInfo dbgdidr = { + .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, + .opc1 = 0, .opc2 = 0, + .access = PL0_R, .accessfn = access_tda, + .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr, + }; + define_one_arm_cp_reg(cpu, &dbgdidr); + } + + /* + * DBGDEVID is present in the v7 debug architecture if + * DBGDIDR.DEVID_imp is 1 (bit 15); from v7.1 and on it is + * mandatory (and bit 15 is RES1). DBGDEVID1 and DBGDEVID2 exist + * from v7.1 of the debug architecture. Because no fields have yet + * been defined in DBGDEVID2 (and quite possibly none will ever + * be) we don't define an ARMISARegisters field for it. + * These registers exist only if EL1 can use AArch32, but that + * happens naturally because they are only PL1 accessible anyway. + */ + if (extract32(cpu->isar.dbgdidr, 15, 1)) { + ARMCPRegInfo dbgdevid = { + .name = "DBGDEVID", + .cp = 14, .opc1 = 0, .crn = 7, .opc2 = 2, .crn = 7, + .access = PL1_R, .accessfn = access_tda, + .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdevid, + }; + define_one_arm_cp_reg(cpu, &dbgdevid); + } + if (cpu_isar_feature(aa32_debugv7p1, cpu)) { + ARMCPRegInfo dbgdevid12[] = { + { + .name = "DBGDEVID1", + .cp = 14, .opc1 = 0, .crn = 7, .opc2 = 1, .crn = 7, + .access = PL1_R, .accessfn = access_tda, + .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdevid1, + }, { + .name = "DBGDEVID2", + .cp = 14, .opc1 = 0, .crn = 7, .opc2 = 0, .crn = 7, + .access = PL1_R, .accessfn = access_tda, + .type = ARM_CP_CONST, .resetvalue = 0, + }, + }; + define_arm_cp_regs(cpu, dbgdevid12); + } + + brps = arm_num_brps(cpu); + wrps = arm_num_wrps(cpu); + ctx_cmps = arm_num_ctx_cmps(cpu); + + assert(ctx_cmps <= brps); + + define_arm_cp_regs(cpu, debug_cp_reginfo); + if (cpu_isar_feature(aa64_aa32_el1, cpu)) { + define_arm_cp_regs(cpu, debug_aa32_el1_reginfo); + } + + if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) { + define_arm_cp_regs(cpu, debug_lpae_cp_reginfo); + } + + for (i = 0; i < brps; i++) { + char *dbgbvr_el1_name = g_strdup_printf("DBGBVR%d_EL1", i); + char *dbgbcr_el1_name = g_strdup_printf("DBGBCR%d_EL1", i); + ARMCPRegInfo dbgregs[] = { + { .name = dbgbvr_el1_name, .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4, + .access = PL1_RW, .accessfn = access_tda, + .fgt = FGT_DBGBVRN_EL1, + .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]), + .writefn = dbgbvr_write, .raw_writefn = raw_write + }, + { .name = dbgbcr_el1_name, .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5, + .access = PL1_RW, .accessfn = access_tda, + .fgt = FGT_DBGBCRN_EL1, + .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]), + .writefn = dbgbcr_write, .raw_writefn = raw_write + }, + }; + define_arm_cp_regs(cpu, dbgregs); + g_free(dbgbvr_el1_name); + g_free(dbgbcr_el1_name); + } + + for (i = 0; i < wrps; i++) { + char *dbgwvr_el1_name = g_strdup_printf("DBGWVR%d_EL1", i); + char *dbgwcr_el1_name = g_strdup_printf("DBGWCR%d_EL1", i); + ARMCPRegInfo dbgregs[] = { + { .name = dbgwvr_el1_name, .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6, + .access = PL1_RW, .accessfn = access_tda, + .fgt = FGT_DBGWVRN_EL1, + .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]), + .writefn = dbgwvr_write, .raw_writefn = raw_write + }, + { .name = dbgwcr_el1_name, .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7, + .access = PL1_RW, .accessfn = access_tda, + .fgt = FGT_DBGWCRN_EL1, + .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]), + .writefn = dbgwcr_write, .raw_writefn = raw_write + }, + }; + define_arm_cp_regs(cpu, dbgregs); + g_free(dbgwvr_el1_name); + g_free(dbgwcr_el1_name); + } +} diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c index 826601b341..a3bb73cfa7 100644 --- a/target/arm/gdbstub.c +++ b/target/arm/gdbstub.c @@ -20,12 +20,17 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/gdbstub.h" +#include "gdbstub/helpers.h" +#include "sysemu/tcg.h" +#include "internals.h" +#include "cpu-features.h" +#include "cpregs.h" -typedef struct RegisterSysregXmlParam { +typedef struct RegisterSysregFeatureParam { CPUState *cs; - GString *s; + GDBFeatureBuilder builder; int n; -} RegisterSysregXmlParam; +} RegisterSysregFeatureParam; /* Old gdb always expect FPA registers. Newer (xml-aware) gdb only expect whatever the target description contains. Due to a historical mishap @@ -42,21 +47,7 @@ int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) /* Core integer register. */ return gdb_get_reg32(mem_buf, env->regs[n]); } - if (n < 24) { - /* FPA registers. */ - if (gdb_has_xml) { - return 0; - } - return gdb_get_zeroes(mem_buf, 12); - } - switch (n) { - case 24: - /* FPA status register. */ - if (gdb_has_xml) { - return 0; - } - return gdb_get_reg32(mem_buf, 0); - case 25: + if (n == 25) { /* CPSR, or XPSR for M-profile */ if (arm_feature(env, ARM_FEATURE_M)) { return gdb_get_reg32(mem_buf, xpsr_read(env)); @@ -76,8 +67,13 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) tmp = ldl_p(mem_buf); - /* Mask out low bit of PC to workaround gdb bugs. This will probably - cause problems if we ever implement the Jazelle DBX extensions. */ + /* + * Mask out low bits of PC to workaround gdb bugs. + * This avoids an assert in thumb_tr_translate_insn, because it is + * architecturally impossible to misalign the pc. + * This will probably cause problems if we ever implement the + * Jazelle DBX extensions. + */ if (n == 15) { tmp &= ~1; } @@ -91,27 +87,13 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) env->regs[n] = tmp; return 4; } - if (n < 24) { /* 16-23 */ - /* FPA registers (ignored). */ - if (gdb_has_xml) { - return 0; - } - return 12; - } - switch (n) { - case 24: - /* FPA status register (ignored). */ - if (gdb_has_xml) { - return 0; - } - return 4; - case 25: + if (n == 25) { /* CPSR, or XPSR for M-profile */ if (arm_feature(env, ARM_FEATURE_M)) { /* * Don't allow writing to XPSR.Exception as it can cause * a transition into or out of handler mode (it's not - * writeable via the MSR insn so this is a reasonable + * writable via the MSR insn so this is a reasonable * restriction). Other fields are safe to update. */ xpsr_write(env, tmp, ~XPSR_EXCP); @@ -124,34 +106,174 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) return 0; } -static void arm_gen_one_xml_sysreg_tag(GString *s, DynamicGDBXMLInfo *dyn_xml, +static int vfp_gdb_get_reg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16; + + /* VFP data registers are always little-endian. */ + if (reg < nregs) { + return gdb_get_reg64(buf, *aa32_vfp_dreg(env, reg)); + } + if (arm_feature(env, ARM_FEATURE_NEON)) { + /* Aliases for Q regs. */ + nregs += 16; + if (reg < nregs) { + uint64_t *q = aa32_vfp_qreg(env, reg - 32); + return gdb_get_reg128(buf, q[0], q[1]); + } + } + switch (reg - nregs) { + case 0: + return gdb_get_reg32(buf, vfp_get_fpscr(env)); + } + return 0; +} + +static int vfp_gdb_set_reg(CPUState *cs, uint8_t *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16; + + if (reg < nregs) { + *aa32_vfp_dreg(env, reg) = ldq_le_p(buf); + return 8; + } + if (arm_feature(env, ARM_FEATURE_NEON)) { + nregs += 16; + if (reg < nregs) { + uint64_t *q = aa32_vfp_qreg(env, reg - 32); + q[0] = ldq_le_p(buf); + q[1] = ldq_le_p(buf + 8); + return 16; + } + } + switch (reg - nregs) { + case 0: + vfp_set_fpscr(env, ldl_p(buf)); + return 4; + } + return 0; +} + +static int vfp_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: + return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPSID]); + case 1: + return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPEXC]); + } + return 0; +} + +static int vfp_gdb_set_sysreg(CPUState *cs, uint8_t *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: + env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); + return 4; + case 1: + env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); + return 4; + } + return 0; +} + +static int mve_gdb_get_reg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: + return gdb_get_reg32(buf, env->v7m.vpr); + default: + return 0; + } +} + +static int mve_gdb_set_reg(CPUState *cs, uint8_t *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: + env->v7m.vpr = ldl_p(buf); + return 4; + default: + return 0; + } +} + +/** + * arm_get/set_gdb_*: get/set a gdb register + * @env: the CPU state + * @buf: a buffer to copy to/from + * @reg: register number (offset from start of group) + * + * We return the number of bytes copied + */ + +static int arm_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + const ARMCPRegInfo *ri; + uint32_t key; + + key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg]; + ri = get_arm_cp_reginfo(cpu->cp_regs, key); + if (ri) { + if (cpreg_field_is_64bit(ri)) { + return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri)); + } else { + return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri)); + } + } + return 0; +} + +static int arm_gdb_set_sysreg(CPUState *cs, uint8_t *buf, int reg) +{ + return 0; +} + +static void arm_gen_one_feature_sysreg(GDBFeatureBuilder *builder, + DynamicGDBFeatureInfo *dyn_feature, ARMCPRegInfo *ri, uint32_t ri_key, - int bitsize, int regnum) + int bitsize, int n) { - g_string_append_printf(s, "<reg name=\"%s\"", ri->name); - g_string_append_printf(s, " bitsize=\"%d\"", bitsize); - g_string_append_printf(s, " regnum=\"%d\"", regnum); - g_string_append_printf(s, " group=\"cp_regs\"/>"); - dyn_xml->data.cpregs.keys[dyn_xml->num] = ri_key; - dyn_xml->num++; + gdb_feature_builder_append_reg(builder, ri->name, bitsize, n, + "int", "cp_regs"); + + dyn_feature->data.cpregs.keys[n] = ri_key; } -static void arm_register_sysreg_for_xml(gpointer key, gpointer value, - gpointer p) +static void arm_register_sysreg_for_feature(gpointer key, gpointer value, + gpointer p) { - uint32_t ri_key = *(uint32_t *)key; + uint32_t ri_key = (uintptr_t)key; ARMCPRegInfo *ri = value; - RegisterSysregXmlParam *param = (RegisterSysregXmlParam *)p; - GString *s = param->s; + RegisterSysregFeatureParam *param = p; ARMCPU *cpu = ARM_CPU(param->cs); CPUARMState *env = &cpu->env; - DynamicGDBXMLInfo *dyn_xml = &cpu->dyn_sysreg_xml; + DynamicGDBFeatureInfo *dyn_feature = &cpu->dyn_sysreg_feature; if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_NO_GDB))) { if (arm_feature(env, ARM_FEATURE_AARCH64)) { if (ri->state == ARM_CP_STATE_AA64) { - arm_gen_one_xml_sysreg_tag(s , dyn_xml, ri, ri_key, 64, - param->n++); + arm_gen_one_feature_sysreg(¶m->builder, dyn_feature, + ri, ri_key, 64, param->n++); } } else { if (ri->state == ARM_CP_STATE_AA32) { @@ -160,162 +282,276 @@ static void arm_register_sysreg_for_xml(gpointer key, gpointer value, return; } if (ri->type & ARM_CP_64BIT) { - arm_gen_one_xml_sysreg_tag(s , dyn_xml, ri, ri_key, 64, - param->n++); + arm_gen_one_feature_sysreg(¶m->builder, dyn_feature, + ri, ri_key, 64, param->n++); } else { - arm_gen_one_xml_sysreg_tag(s , dyn_xml, ri, ri_key, 32, - param->n++); + arm_gen_one_feature_sysreg(¶m->builder, dyn_feature, + ri, ri_key, 32, param->n++); } } } } } -int arm_gen_dynamic_sysreg_xml(CPUState *cs, int base_reg) +static GDBFeature *arm_gen_dynamic_sysreg_feature(CPUState *cs, int base_reg) { ARMCPU *cpu = ARM_CPU(cs); - GString *s = g_string_new(NULL); - RegisterSysregXmlParam param = {cs, s, base_reg}; - - cpu->dyn_sysreg_xml.num = 0; - cpu->dyn_sysreg_xml.data.cpregs.keys = g_new(uint32_t, g_hash_table_size(cpu->cp_regs)); - g_string_printf(s, "<?xml version=\"1.0\"?>"); - g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">"); - g_string_append_printf(s, "<feature name=\"org.qemu.gdb.arm.sys.regs\">"); - g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_xml, ¶m); - g_string_append_printf(s, "</feature>"); - cpu->dyn_sysreg_xml.desc = g_string_free(s, false); - return cpu->dyn_sysreg_xml.num; + RegisterSysregFeatureParam param = {cs}; + gsize num_regs = g_hash_table_size(cpu->cp_regs); + + gdb_feature_builder_init(¶m.builder, + &cpu->dyn_sysreg_feature.desc, + "org.qemu.gdb.arm.sys.regs", + "system-registers.xml", + base_reg); + cpu->dyn_sysreg_feature.data.cpregs.keys = g_new(uint32_t, num_regs); + g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_feature, ¶m); + gdb_feature_builder_end(¶m.builder); + return &cpu->dyn_sysreg_feature.desc; } -struct TypeSize { - const char *gdb_type; - int size; - const char sz, suffix; -}; +#ifdef CONFIG_TCG +typedef enum { + M_SYSREG_MSP, + M_SYSREG_PSP, + M_SYSREG_PRIMASK, + M_SYSREG_CONTROL, + M_SYSREG_BASEPRI, + M_SYSREG_FAULTMASK, + M_SYSREG_MSPLIM, + M_SYSREG_PSPLIM, +} MProfileSysreg; -static const struct TypeSize vec_lanes[] = { - /* quads */ - { "uint128", 128, 'q', 'u' }, - { "int128", 128, 'q', 's' }, - /* 64 bit */ - { "ieee_double", 64, 'd', 'f' }, - { "uint64", 64, 'd', 'u' }, - { "int64", 64, 'd', 's' }, - /* 32 bit */ - { "ieee_single", 32, 's', 'f' }, - { "uint32", 32, 's', 'u' }, - { "int32", 32, 's', 's' }, - /* 16 bit */ - { "ieee_half", 16, 'h', 'f' }, - { "uint16", 16, 'h', 'u' }, - { "int16", 16, 'h', 's' }, - /* bytes */ - { "uint8", 8, 'b', 'u' }, - { "int8", 8, 'b', 's' }, +static const struct { + const char *name; + int feature; +} m_sysreg_def[] = { + [M_SYSREG_MSP] = { "msp", ARM_FEATURE_M }, + [M_SYSREG_PSP] = { "psp", ARM_FEATURE_M }, + [M_SYSREG_PRIMASK] = { "primask", ARM_FEATURE_M }, + [M_SYSREG_CONTROL] = { "control", ARM_FEATURE_M }, + [M_SYSREG_BASEPRI] = { "basepri", ARM_FEATURE_M_MAIN }, + [M_SYSREG_FAULTMASK] = { "faultmask", ARM_FEATURE_M_MAIN }, + [M_SYSREG_MSPLIM] = { "msplim", ARM_FEATURE_V8 }, + [M_SYSREG_PSPLIM] = { "psplim", ARM_FEATURE_V8 }, }; +static uint32_t *m_sysreg_ptr(CPUARMState *env, MProfileSysreg reg, bool sec) +{ + uint32_t *ptr; -int arm_gen_dynamic_svereg_xml(CPUState *cs, int base_reg) + switch (reg) { + case M_SYSREG_MSP: + ptr = arm_v7m_get_sp_ptr(env, sec, false, true); + break; + case M_SYSREG_PSP: + ptr = arm_v7m_get_sp_ptr(env, sec, true, true); + break; + case M_SYSREG_MSPLIM: + ptr = &env->v7m.msplim[sec]; + break; + case M_SYSREG_PSPLIM: + ptr = &env->v7m.psplim[sec]; + break; + case M_SYSREG_PRIMASK: + ptr = &env->v7m.primask[sec]; + break; + case M_SYSREG_BASEPRI: + ptr = &env->v7m.basepri[sec]; + break; + case M_SYSREG_FAULTMASK: + ptr = &env->v7m.faultmask[sec]; + break; + case M_SYSREG_CONTROL: + ptr = &env->v7m.control[sec]; + break; + default: + return NULL; + } + return arm_feature(env, m_sysreg_def[reg].feature) ? ptr : NULL; +} + +static int m_sysreg_get(CPUARMState *env, GByteArray *buf, + MProfileSysreg reg, bool secure) { - ARMCPU *cpu = ARM_CPU(cs); - GString *s = g_string_new(NULL); - DynamicGDBXMLInfo *info = &cpu->dyn_svereg_xml; - g_autoptr(GString) ts = g_string_new(""); - int i, j, bits, reg_width = (cpu->sve_max_vq * 128); - info->num = 0; - g_string_printf(s, "<?xml version=\"1.0\"?>"); - g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">"); - g_string_append_printf(s, "<feature name=\"org.gnu.gdb.aarch64.sve\">"); - - /* First define types and totals in a whole VL */ - for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) { - int count = reg_width / vec_lanes[i].size; - g_string_printf(ts, "svev%c%c", vec_lanes[i].sz, vec_lanes[i].suffix); - g_string_append_printf(s, - "<vector id=\"%s\" type=\"%s\" count=\"%d\"/>", - ts->str, vec_lanes[i].gdb_type, count); + uint32_t *ptr = m_sysreg_ptr(env, reg, secure); + + if (ptr == NULL) { + return 0; } + return gdb_get_reg32(buf, *ptr); +} + +static int arm_gdb_get_m_systemreg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + /* - * Now define a union for each size group containing unsigned and - * signed and potentially float versions of each size from 128 to - * 8 bits. + * Here, we emulate MRS instruction, where CONTROL has a mix of + * banked and non-banked bits. */ - for (bits = 128, i = 0; bits >= 8; bits /= 2, i++) { - const char suf[] = { 'q', 'd', 's', 'h', 'b' }; - g_string_append_printf(s, "<union id=\"svevn%c\">", suf[i]); - for (j = 0; j < ARRAY_SIZE(vec_lanes); j++) { - if (vec_lanes[j].size == bits) { - g_string_append_printf(s, "<field name=\"%c\" type=\"svev%c%c\"/>", - vec_lanes[j].suffix, - vec_lanes[j].sz, vec_lanes[j].suffix); - } - } - g_string_append(s, "</union>"); + if (reg == M_SYSREG_CONTROL) { + return gdb_get_reg32(buf, arm_v7m_mrs_control(env, env->v7m.secure)); } - /* And now the final union of unions */ - g_string_append(s, "<union id=\"svev\">"); - for (bits = 128, i = 0; bits >= 8; bits /= 2, i++) { - const char suf[] = { 'q', 'd', 's', 'h', 'b' }; - g_string_append_printf(s, "<field name=\"%c\" type=\"svevn%c\"/>", - suf[i], suf[i]); - } - g_string_append(s, "</union>"); - - /* Finally the sve prefix type */ - g_string_append_printf(s, - "<vector id=\"svep\" type=\"uint8\" count=\"%d\"/>", - reg_width / 8); - - /* Then define each register in parts for each vq */ - for (i = 0; i < 32; i++) { - g_string_append_printf(s, - "<reg name=\"z%d\" bitsize=\"%d\"" - " regnum=\"%d\" type=\"svev\"/>", - i, reg_width, base_reg++); - info->num++; - } - /* fpscr & status registers */ - g_string_append_printf(s, "<reg name=\"fpsr\" bitsize=\"32\"" - " regnum=\"%d\" group=\"float\"" - " type=\"int\"/>", base_reg++); - g_string_append_printf(s, "<reg name=\"fpcr\" bitsize=\"32\"" - " regnum=\"%d\" group=\"float\"" - " type=\"int\"/>", base_reg++); - info->num += 2; - - for (i = 0; i < 16; i++) { - g_string_append_printf(s, - "<reg name=\"p%d\" bitsize=\"%d\"" - " regnum=\"%d\" type=\"svep\"/>", - i, cpu->sve_max_vq * 16, base_reg++); - info->num++; + return m_sysreg_get(env, buf, reg, env->v7m.secure); +} + +static int arm_gdb_set_m_systemreg(CPUState *cs, uint8_t *buf, int reg) +{ + return 0; /* TODO */ +} + +static GDBFeature *arm_gen_dynamic_m_systemreg_feature(CPUState *cs, + int base_reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + GDBFeatureBuilder builder; + int reg = 0; + int i; + + gdb_feature_builder_init(&builder, &cpu->dyn_m_systemreg_feature.desc, + "org.gnu.gdb.arm.m-system", "arm-m-system.xml", + base_reg); + + for (i = 0; i < ARRAY_SIZE(m_sysreg_def); i++) { + if (arm_feature(env, m_sysreg_def[i].feature)) { + gdb_feature_builder_append_reg(&builder, m_sysreg_def[i].name, 32, + reg++, "int", NULL); + } } - g_string_append_printf(s, - "<reg name=\"ffr\" bitsize=\"%d\"" - " regnum=\"%d\" group=\"vector\"" - " type=\"svep\"/>", - cpu->sve_max_vq * 16, base_reg++); - g_string_append_printf(s, - "<reg name=\"vg\" bitsize=\"64\"" - " regnum=\"%d\" type=\"int\"/>", - base_reg++); - info->num += 2; - g_string_append_printf(s, "</feature>"); - cpu->dyn_svereg_xml.desc = g_string_free(s, false); - - return cpu->dyn_svereg_xml.num; + + gdb_feature_builder_end(&builder); + + return &cpu->dyn_m_systemreg_feature.desc; +} + +#ifndef CONFIG_USER_ONLY +/* + * For user-only, we see the non-secure registers via m_systemreg above. + * For secext, encode the non-secure view as even and secure view as odd. + */ +static int arm_gdb_get_m_secextreg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + return m_sysreg_get(env, buf, reg >> 1, reg & 1); } +static int arm_gdb_set_m_secextreg(CPUState *cs, uint8_t *buf, int reg) +{ + return 0; /* TODO */ +} -const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname) +static GDBFeature *arm_gen_dynamic_m_secextreg_feature(CPUState *cs, + int base_reg) { ARMCPU *cpu = ARM_CPU(cs); + GDBFeatureBuilder builder; + char *name; + int reg = 0; + int i; + + gdb_feature_builder_init(&builder, &cpu->dyn_m_secextreg_feature.desc, + "org.gnu.gdb.arm.secext", "arm-m-secext.xml", + base_reg); + + for (i = 0; i < ARRAY_SIZE(m_sysreg_def); i++) { + name = g_strconcat(m_sysreg_def[i].name, "_ns", NULL); + gdb_feature_builder_append_reg(&builder, name, 32, reg++, + "int", NULL); + name = g_strconcat(m_sysreg_def[i].name, "_s", NULL); + gdb_feature_builder_append_reg(&builder, name, 32, reg++, + "int", NULL); + } + + gdb_feature_builder_end(&builder); + + return &cpu->dyn_m_secextreg_feature.desc; +} +#endif +#endif /* CONFIG_TCG */ + +void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) +{ + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + + if (arm_feature(env, ARM_FEATURE_AARCH64)) { + /* + * The lower part of each SVE register aliases to the FPU + * registers so we don't need to include both. + */ +#ifdef TARGET_AARCH64 + if (isar_feature_aa64_sve(&cpu->isar)) { + GDBFeature *feature = arm_gen_dynamic_svereg_feature(cs, cs->gdb_num_regs); + gdb_register_coprocessor(cs, aarch64_gdb_get_sve_reg, + aarch64_gdb_set_sve_reg, feature, 0); + } else { + gdb_register_coprocessor(cs, aarch64_gdb_get_fpu_reg, + aarch64_gdb_set_fpu_reg, + gdb_find_static_feature("aarch64-fpu.xml"), + 0); + } + /* + * Note that we report pauth information via the feature name + * org.gnu.gdb.aarch64.pauth_v2, not org.gnu.gdb.aarch64.pauth. + * GDB versions 9 through 12 have a bug where they will crash + * if they see the latter XML from QEMU. + */ + if (isar_feature_aa64_pauth(&cpu->isar)) { + gdb_register_coprocessor(cs, aarch64_gdb_get_pauth_reg, + aarch64_gdb_set_pauth_reg, + gdb_find_static_feature("aarch64-pauth.xml"), + 0); + } +#endif + } else { + if (arm_feature(env, ARM_FEATURE_NEON)) { + gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, + gdb_find_static_feature("arm-neon.xml"), + 0); + } else if (cpu_isar_feature(aa32_simd_r32, cpu)) { + gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, + gdb_find_static_feature("arm-vfp3.xml"), + 0); + } else if (cpu_isar_feature(aa32_vfp_simd, cpu)) { + gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, + gdb_find_static_feature("arm-vfp.xml"), 0); + } + if (!arm_feature(env, ARM_FEATURE_M)) { + /* + * A and R profile have FP sysregs FPEXC and FPSID that we + * expose to gdb. + */ + gdb_register_coprocessor(cs, vfp_gdb_get_sysreg, vfp_gdb_set_sysreg, + gdb_find_static_feature("arm-vfp-sysregs.xml"), + 0); + } + } + if (cpu_isar_feature(aa32_mve, cpu) && tcg_enabled()) { + gdb_register_coprocessor(cs, mve_gdb_get_reg, mve_gdb_set_reg, + gdb_find_static_feature("arm-m-profile-mve.xml"), + 0); + } + gdb_register_coprocessor(cs, arm_gdb_get_sysreg, arm_gdb_set_sysreg, + arm_gen_dynamic_sysreg_feature(cs, cs->gdb_num_regs), + 0); - if (strcmp(xmlname, "system-registers.xml") == 0) { - return cpu->dyn_sysreg_xml.desc; - } else if (strcmp(xmlname, "sve-registers.xml") == 0) { - return cpu->dyn_svereg_xml.desc; +#ifdef CONFIG_TCG + if (arm_feature(env, ARM_FEATURE_M) && tcg_enabled()) { + gdb_register_coprocessor(cs, + arm_gdb_get_m_systemreg, arm_gdb_set_m_systemreg, + arm_gen_dynamic_m_systemreg_feature(cs, cs->gdb_num_regs), 0); +#ifndef CONFIG_USER_ONLY + if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { + gdb_register_coprocessor(cs, + arm_gdb_get_m_secextreg, arm_gdb_set_m_secextreg, + arm_gen_dynamic_m_secextreg_feature(cs, cs->gdb_num_regs), 0); + } +#endif } - return NULL; +#endif /* CONFIG_TCG */ } diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c index 251539ef79..caa31ff3fa 100644 --- a/target/arm/gdbstub64.c +++ b/target/arm/gdbstub64.c @@ -17,8 +17,10 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "cpu.h" -#include "exec/gdbstub.h" +#include "internals.h" +#include "gdbstub/helpers.h" int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { @@ -69,3 +71,313 @@ int aarch64_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) /* Unknown register. */ return 0; } + +int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0 ... 31: + { + /* 128 bit FP register - quads are in LE order */ + uint64_t *q = aa64_vfp_qreg(env, reg); + return gdb_get_reg128(buf, q[1], q[0]); + } + case 32: + /* FPSR */ + return gdb_get_reg32(buf, vfp_get_fpsr(env)); + case 33: + /* FPCR */ + return gdb_get_reg32(buf, vfp_get_fpcr(env)); + default: + return 0; + } +} + +int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0 ... 31: + /* 128 bit FP register */ + { + uint64_t *q = aa64_vfp_qreg(env, reg); + q[0] = ldq_le_p(buf); + q[1] = ldq_le_p(buf + 8); + return 16; + } + case 32: + /* FPSR */ + vfp_set_fpsr(env, ldl_p(buf)); + return 4; + case 33: + /* FPCR */ + vfp_set_fpcr(env, ldl_p(buf)); + return 4; + default: + return 0; + } +} + +int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + /* The first 32 registers are the zregs */ + case 0 ... 31: + { + int vq, len = 0; + for (vq = 0; vq < cpu->sve_max_vq; vq++) { + len += gdb_get_reg128(buf, + env->vfp.zregs[reg].d[vq * 2 + 1], + env->vfp.zregs[reg].d[vq * 2]); + } + return len; + } + case 32: + return gdb_get_reg32(buf, vfp_get_fpsr(env)); + case 33: + return gdb_get_reg32(buf, vfp_get_fpcr(env)); + /* then 16 predicates and the ffr */ + case 34 ... 50: + { + int preg = reg - 34; + int vq, len = 0; + for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) { + len += gdb_get_reg64(buf, env->vfp.pregs[preg].p[vq / 4]); + } + return len; + } + case 51: + { + /* + * We report in Vector Granules (VG) which is 64bit in a Z reg + * while the ZCR works in Vector Quads (VQ) which is 128bit chunks. + */ + int vq = sve_vqm1_for_el(env, arm_current_el(env)) + 1; + return gdb_get_reg64(buf, vq * 2); + } + default: + /* gdbstub asked for something out our range */ + qemu_log_mask(LOG_UNIMP, "%s: out of range register %d", __func__, reg); + break; + } + + return 0; +} + +int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + /* The first 32 registers are the zregs */ + switch (reg) { + /* The first 32 registers are the zregs */ + case 0 ... 31: + { + int vq, len = 0; + uint64_t *p = (uint64_t *) buf; + for (vq = 0; vq < cpu->sve_max_vq; vq++) { + env->vfp.zregs[reg].d[vq * 2 + 1] = *p++; + env->vfp.zregs[reg].d[vq * 2] = *p++; + len += 16; + } + return len; + } + case 32: + vfp_set_fpsr(env, *(uint32_t *)buf); + return 4; + case 33: + vfp_set_fpcr(env, *(uint32_t *)buf); + return 4; + case 34 ... 50: + { + int preg = reg - 34; + int vq, len = 0; + uint64_t *p = (uint64_t *) buf; + for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) { + env->vfp.pregs[preg].p[vq / 4] = *p++; + len += 8; + } + return len; + } + case 51: + /* cannot set vg via gdbstub */ + return 0; + default: + /* gdbstub asked for something out our range */ + break; + } + + return 0; +} + +int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + switch (reg) { + case 0: /* pauth_dmask */ + case 1: /* pauth_cmask */ + case 2: /* pauth_dmask_high */ + case 3: /* pauth_cmask_high */ + /* + * Note that older versions of this feature only contained + * pauth_{d,c}mask, for use with Linux user processes, and + * thus exclusively in the low half of the address space. + * + * To support system mode, and to debug kernels, two new regs + * were added to cover the high half of the address space. + * For the purpose of pauth_ptr_mask, we can use any well-formed + * address within the address space half -- here, 0 and -1. + */ + { + bool is_data = !(reg & 1); + bool is_high = reg & 2; + ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env); + ARMVAParameters param; + + param = aa64_va_parameters(env, -is_high, mmu_idx, is_data, false); + return gdb_get_reg64(buf, pauth_ptr_mask(param)); + } + default: + return 0; + } +} + +int aarch64_gdb_set_pauth_reg(CPUState *cs, uint8_t *buf, int reg) +{ + /* All pseudo registers are read-only. */ + return 0; +} + +static void output_vector_union_type(GDBFeatureBuilder *builder, int reg_width, + const char *name) +{ + struct TypeSize { + const char *gdb_type; + short size; + char sz, suffix; + }; + + static const struct TypeSize vec_lanes[] = { + /* quads */ + { "uint128", 128, 'q', 'u' }, + { "int128", 128, 'q', 's' }, + /* 64 bit */ + { "ieee_double", 64, 'd', 'f' }, + { "uint64", 64, 'd', 'u' }, + { "int64", 64, 'd', 's' }, + /* 32 bit */ + { "ieee_single", 32, 's', 'f' }, + { "uint32", 32, 's', 'u' }, + { "int32", 32, 's', 's' }, + /* 16 bit */ + { "ieee_half", 16, 'h', 'f' }, + { "uint16", 16, 'h', 'u' }, + { "int16", 16, 'h', 's' }, + /* bytes */ + { "uint8", 8, 'b', 'u' }, + { "int8", 8, 'b', 's' }, + }; + + static const char suf[] = { 'b', 'h', 's', 'd', 'q' }; + int i, j; + + /* First define types and totals in a whole VL */ + for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) { + gdb_feature_builder_append_tag( + builder, "<vector id=\"%s%c%c\" type=\"%s\" count=\"%d\"/>", + name, vec_lanes[i].sz, vec_lanes[i].suffix, + vec_lanes[i].gdb_type, reg_width / vec_lanes[i].size); + } + + /* + * Now define a union for each size group containing unsigned and + * signed and potentially float versions of each size from 128 to + * 8 bits. + */ + for (i = 0; i < ARRAY_SIZE(suf); i++) { + int bits = 8 << i; + + gdb_feature_builder_append_tag(builder, "<union id=\"%sn%c\">", + name, suf[i]); + for (j = 0; j < ARRAY_SIZE(vec_lanes); j++) { + if (vec_lanes[j].size == bits) { + gdb_feature_builder_append_tag( + builder, "<field name=\"%c\" type=\"%s%c%c\"/>", + vec_lanes[j].suffix, name, + vec_lanes[j].sz, vec_lanes[j].suffix); + } + } + gdb_feature_builder_append_tag(builder, "</union>"); + } + + /* And now the final union of unions */ + gdb_feature_builder_append_tag(builder, "<union id=\"%s\">", name); + for (i = ARRAY_SIZE(suf) - 1; i >= 0; i--) { + gdb_feature_builder_append_tag(builder, + "<field name=\"%c\" type=\"%sn%c\"/>", + suf[i], name, suf[i]); + } + gdb_feature_builder_append_tag(builder, "</union>"); +} + +GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cs, int base_reg) +{ + ARMCPU *cpu = ARM_CPU(cs); + int reg_width = cpu->sve_max_vq * 128; + int pred_width = cpu->sve_max_vq * 16; + GDBFeatureBuilder builder; + char *name; + int reg = 0; + int i; + + gdb_feature_builder_init(&builder, &cpu->dyn_svereg_feature.desc, + "org.gnu.gdb.aarch64.sve", "sve-registers.xml", + base_reg); + + /* Create the vector union type. */ + output_vector_union_type(&builder, reg_width, "svev"); + + /* Create the predicate vector type. */ + gdb_feature_builder_append_tag( + &builder, "<vector id=\"svep\" type=\"uint8\" count=\"%d\"/>", + pred_width / 8); + + /* Define the vector registers. */ + for (i = 0; i < 32; i++) { + name = g_strdup_printf("z%d", i); + gdb_feature_builder_append_reg(&builder, name, reg_width, reg++, + "svev", NULL); + } + + /* fpscr & status registers */ + gdb_feature_builder_append_reg(&builder, "fpsr", 32, reg++, + "int", "float"); + gdb_feature_builder_append_reg(&builder, "fpcr", 32, reg++, + "int", "float"); + + /* Define the predicate registers. */ + for (i = 0; i < 16; i++) { + name = g_strdup_printf("p%d", i); + gdb_feature_builder_append_reg(&builder, name, pred_width, reg++, + "svep", NULL); + } + gdb_feature_builder_append_reg(&builder, "ffr", pred_width, reg++, + "svep", "vector"); + + /* Define the vector length pseudo-register. */ + gdb_feature_builder_append_reg(&builder, "vg", 64, reg++, "int", NULL); + + gdb_feature_builder_end(&builder); + + return &cpu->dyn_svereg_feature.desc; +} diff --git a/target/arm/gtimer.h b/target/arm/gtimer.h new file mode 100644 index 0000000000..b992941bef --- /dev/null +++ b/target/arm/gtimer.h @@ -0,0 +1,21 @@ +/* + * ARM generic timer definitions for Arm A-class CPU + * + * Copyright (c) 2003 Fabrice Bellard + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#ifndef TARGET_ARM_GTIMER_H +#define TARGET_ARM_GTIMER_H + +enum { + GTIMER_PHYS = 0, + GTIMER_VIRT = 1, + GTIMER_HYP = 2, + GTIMER_SEC = 3, + GTIMER_HYPVIRT = 4, +#define NUM_GTIMERS 5 +}; + +#endif diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c deleted file mode 100644 index 19445b3c94..0000000000 --- a/target/arm/helper-a64.c +++ /dev/null @@ -1,1136 +0,0 @@ -/* - * AArch64 specific helpers - * - * Copyright (c) 2013 Alexander Graf <agraf@suse.de> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include "qemu/units.h" -#include "cpu.h" -#include "exec/gdbstub.h" -#include "exec/helper-proto.h" -#include "qemu/host-utils.h" -#include "qemu/log.h" -#include "qemu/main-loop.h" -#include "qemu/bitops.h" -#include "internals.h" -#include "qemu/crc32c.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" -#include "qemu/int128.h" -#include "qemu/atomic128.h" -#include "tcg/tcg.h" -#include "fpu/softfloat.h" -#include <zlib.h> /* For crc32 */ - -/* C2.4.7 Multiply and divide */ -/* special cases for 0 and LLONG_MIN are mandated by the standard */ -uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) -{ - if (den == 0) { - return 0; - } - return num / den; -} - -int64_t HELPER(sdiv64)(int64_t num, int64_t den) -{ - if (den == 0) { - return 0; - } - if (num == LLONG_MIN && den == -1) { - return LLONG_MIN; - } - return num / den; -} - -uint64_t HELPER(rbit64)(uint64_t x) -{ - return revbit64(x); -} - -void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) -{ - update_spsel(env, imm); -} - -static void daif_check(CPUARMState *env, uint32_t op, - uint32_t imm, uintptr_t ra) -{ - /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ - if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { - raise_exception_ra(env, EXCP_UDEF, - syn_aa64_sysregtrap(0, extract32(op, 0, 3), - extract32(op, 3, 3), 4, - imm, 0x1f, 0), - exception_target_el(env), ra); - } -} - -void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) -{ - daif_check(env, 0x1e, imm, GETPC()); - env->daif |= (imm << 6) & PSTATE_DAIF; -} - -void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) -{ - daif_check(env, 0x1f, imm, GETPC()); - env->daif &= ~((imm << 6) & PSTATE_DAIF); -} - -/* Convert a softfloat float_relation_ (as returned by - * the float*_compare functions) to the correct ARM - * NZCV flag state. - */ -static inline uint32_t float_rel_to_flags(int res) -{ - uint64_t flags; - switch (res) { - case float_relation_equal: - flags = PSTATE_Z | PSTATE_C; - break; - case float_relation_less: - flags = PSTATE_N; - break; - case float_relation_greater: - flags = PSTATE_C; - break; - case float_relation_unordered: - default: - flags = PSTATE_C | PSTATE_V; - break; - } - return flags; -} - -uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status) -{ - return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); -} - -uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status) -{ - return float_rel_to_flags(float16_compare(x, y, fp_status)); -} - -uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status) -{ - return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); -} - -uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status) -{ - return float_rel_to_flags(float32_compare(x, y, fp_status)); -} - -uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status) -{ - return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); -} - -uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status) -{ - return float_rel_to_flags(float64_compare(x, y, fp_status)); -} - -float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); - - if ((float32_is_zero(a) && float32_is_infinity(b)) || - (float32_is_infinity(a) && float32_is_zero(b))) { - /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ - return make_float32((1U << 30) | - ((float32_val(a) ^ float32_val(b)) & (1U << 31))); - } - return float32_mul(a, b, fpst); -} - -float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - if ((float64_is_zero(a) && float64_is_infinity(b)) || - (float64_is_infinity(a) && float64_is_zero(b))) { - /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ - return make_float64((1ULL << 62) | - ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); - } - return float64_mul(a, b, fpst); -} - -/* 64bit/double versions of the neon float compare functions */ -uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - return -float64_eq_quiet(a, b, fpst); -} - -uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - return -float64_le(b, a, fpst); -} - -uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - return -float64_lt(b, a, fpst); -} - -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 - * versions, these do a fully fused multiply-add or - * multiply-add-and-halve. - */ - -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - a = float16_chs(a); - if ((float16_is_infinity(a) && float16_is_zero(b)) || - (float16_is_infinity(b) && float16_is_zero(a))) { - return float16_two; - } - return float16_muladd(a, b, float16_two, 0, fpst); -} - -float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); - - a = float32_chs(a); - if ((float32_is_infinity(a) && float32_is_zero(b)) || - (float32_is_infinity(b) && float32_is_zero(a))) { - return float32_two; - } - return float32_muladd(a, b, float32_two, 0, fpst); -} - -float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - a = float64_chs(a); - if ((float64_is_infinity(a) && float64_is_zero(b)) || - (float64_is_infinity(b) && float64_is_zero(a))) { - return float64_two; - } - return float64_muladd(a, b, float64_two, 0, fpst); -} - -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - a = float16_chs(a); - if ((float16_is_infinity(a) && float16_is_zero(b)) || - (float16_is_infinity(b) && float16_is_zero(a))) { - return float16_one_point_five; - } - return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst); -} - -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); - - a = float32_chs(a); - if ((float32_is_infinity(a) && float32_is_zero(b)) || - (float32_is_infinity(b) && float32_is_zero(a))) { - return float32_one_point_five; - } - return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); -} - -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - a = float64_chs(a); - if ((float64_is_infinity(a) && float64_is_zero(b)) || - (float64_is_infinity(b) && float64_is_zero(a))) { - return float64_one_point_five; - } - return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); -} - -/* Pairwise long add: add pairs of adjacent elements into - * double-width elements in the result (eg _s8 is an 8x8->16 op) - */ -uint64_t HELPER(neon_addlp_s8)(uint64_t a) -{ - uint64_t nsignmask = 0x0080008000800080ULL; - uint64_t wsignmask = 0x8000800080008000ULL; - uint64_t elementmask = 0x00ff00ff00ff00ffULL; - uint64_t tmp1, tmp2; - uint64_t res, signres; - - /* Extract odd elements, sign extend each to a 16 bit field */ - tmp1 = a & elementmask; - tmp1 ^= nsignmask; - tmp1 |= wsignmask; - tmp1 = (tmp1 - nsignmask) ^ wsignmask; - /* Ditto for the even elements */ - tmp2 = (a >> 8) & elementmask; - tmp2 ^= nsignmask; - tmp2 |= wsignmask; - tmp2 = (tmp2 - nsignmask) ^ wsignmask; - - /* calculate the result by summing bits 0..14, 16..22, etc, - * and then adjusting the sign bits 15, 23, etc manually. - * This ensures the addition can't overflow the 16 bit field. - */ - signres = (tmp1 ^ tmp2) & wsignmask; - res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); - res ^= signres; - - return res; -} - -uint64_t HELPER(neon_addlp_u8)(uint64_t a) -{ - uint64_t tmp; - - tmp = a & 0x00ff00ff00ff00ffULL; - tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; - return tmp; -} - -uint64_t HELPER(neon_addlp_s16)(uint64_t a) -{ - int32_t reslo, reshi; - - reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); - reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); - - return (uint32_t)reslo | (((uint64_t)reshi) << 32); -} - -uint64_t HELPER(neon_addlp_u16)(uint64_t a) -{ - uint64_t tmp; - - tmp = a & 0x0000ffff0000ffffULL; - tmp += (a >> 16) & 0x0000ffff0000ffffULL; - return tmp; -} - -/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ -uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) -{ - float_status *fpst = fpstp; - uint16_t val16, sbit; - int16_t exp; - - if (float16_is_any_nan(a)) { - float16 nan = a; - if (float16_is_signaling_nan(a, fpst)) { - float_raise(float_flag_invalid, fpst); - if (!fpst->default_nan_mode) { - nan = float16_silence_nan(a, fpst); - } - } - if (fpst->default_nan_mode) { - nan = float16_default_nan(fpst); - } - return nan; - } - - a = float16_squash_input_denormal(a, fpst); - - val16 = float16_val(a); - sbit = 0x8000 & val16; - exp = extract32(val16, 10, 5); - - if (exp == 0) { - return make_float16(deposit32(sbit, 10, 5, 0x1e)); - } else { - return make_float16(deposit32(sbit, 10, 5, ~exp)); - } -} - -float32 HELPER(frecpx_f32)(float32 a, void *fpstp) -{ - float_status *fpst = fpstp; - uint32_t val32, sbit; - int32_t exp; - - if (float32_is_any_nan(a)) { - float32 nan = a; - if (float32_is_signaling_nan(a, fpst)) { - float_raise(float_flag_invalid, fpst); - if (!fpst->default_nan_mode) { - nan = float32_silence_nan(a, fpst); - } - } - if (fpst->default_nan_mode) { - nan = float32_default_nan(fpst); - } - return nan; - } - - a = float32_squash_input_denormal(a, fpst); - - val32 = float32_val(a); - sbit = 0x80000000ULL & val32; - exp = extract32(val32, 23, 8); - - if (exp == 0) { - return make_float32(sbit | (0xfe << 23)); - } else { - return make_float32(sbit | (~exp & 0xff) << 23); - } -} - -float64 HELPER(frecpx_f64)(float64 a, void *fpstp) -{ - float_status *fpst = fpstp; - uint64_t val64, sbit; - int64_t exp; - - if (float64_is_any_nan(a)) { - float64 nan = a; - if (float64_is_signaling_nan(a, fpst)) { - float_raise(float_flag_invalid, fpst); - if (!fpst->default_nan_mode) { - nan = float64_silence_nan(a, fpst); - } - } - if (fpst->default_nan_mode) { - nan = float64_default_nan(fpst); - } - return nan; - } - - a = float64_squash_input_denormal(a, fpst); - - val64 = float64_val(a); - sbit = 0x8000000000000000ULL & val64; - exp = extract64(float64_val(a), 52, 11); - - if (exp == 0) { - return make_float64(sbit | (0x7feULL << 52)); - } else { - return make_float64(sbit | (~exp & 0x7ffULL) << 52); - } -} - -float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) -{ - /* Von Neumann rounding is implemented by using round-to-zero - * and then setting the LSB of the result if Inexact was raised. - */ - float32 r; - float_status *fpst = &env->vfp.fp_status; - float_status tstat = *fpst; - int exflags; - - set_float_rounding_mode(float_round_to_zero, &tstat); - set_float_exception_flags(0, &tstat); - r = float64_to_float32(a, &tstat); - exflags = get_float_exception_flags(&tstat); - if (exflags & float_flag_inexact) { - r = make_float32(float32_val(r) | 1); - } - exflags |= get_float_exception_flags(fpst); - set_float_exception_flags(exflags, fpst); - return r; -} - -/* 64-bit versions of the CRC helpers. Note that although the operation - * (and the prototypes of crc32c() and crc32() mean that only the bottom - * 32 bits of the accumulator and result are used, we pass and return - * uint64_t for convenience of the generated code. Unlike the 32-bit - * instruction set versions, val may genuinely have 64 bits of data in it. - * The upper bytes of val (above the number specified by 'bytes') must have - * been zeroed out by the caller. - */ -uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) -{ - uint8_t buf[8]; - - stq_le_p(buf, val); - - /* zlib crc32 converts the accumulator and output to one's complement. */ - return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; -} - -uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) -{ - uint8_t buf[8]; - - stq_le_p(buf, val); - - /* Linux crc32c converts the output to one's complement. */ - return crc32c(acc, buf, bytes) ^ 0xffffffff; -} - -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - Int128 newv = int128_make128(new_lo, new_hi); - Int128 oldv; - uintptr_t ra = GETPC(); - uint64_t o0, o1; - bool success; - -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(env_cpu(env), addr); - - set_helper_retaddr(ra); - o0 = ldq_le_p(haddr + 0); - o1 = ldq_le_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_le_p(haddr + 0, int128_getlo(newv)); - stq_le_p(haddr + 1, int128_gethi(newv)); - } - clear_helper_retaddr(); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); - - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); - } -#endif - - return !success; -} - -uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - Int128 oldv, cmpv, newv; - uintptr_t ra = GETPC(); - bool success; - int mem_idx; - TCGMemOpIdx oi; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - - cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - newv = int128_make128(new_lo, new_hi); - oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - - success = int128_eq(oldv, cmpv); - return !success; -} - -uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - /* - * High and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - Int128 newv = int128_make128(new_hi, new_lo); - Int128 oldv; - uintptr_t ra = GETPC(); - uint64_t o0, o1; - bool success; - -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(env_cpu(env), addr); - - set_helper_retaddr(ra); - o1 = ldq_be_p(haddr + 0); - o0 = ldq_be_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_be_p(haddr + 0, int128_gethi(newv)); - stq_be_p(haddr + 1, int128_getlo(newv)); - } - clear_helper_retaddr(); -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); - - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); - } -#endif - - return !success; -} - -uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - Int128 oldv, cmpv, newv; - uintptr_t ra = GETPC(); - bool success; - int mem_idx; - TCGMemOpIdx oi; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - - /* - * High and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - newv = int128_make128(new_hi, new_lo); - oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - - success = int128_eq(oldv, cmpv); - return !success; -} - -/* Writes back the old data into Rs. */ -void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) -{ - Int128 oldv, cmpv, newv; - uintptr_t ra = GETPC(); - int mem_idx; - TCGMemOpIdx oi; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - - cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); - newv = int128_make128(new_lo, new_hi); - oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); - - env->xregs[rs] = int128_getlo(oldv); - env->xregs[rs + 1] = int128_gethi(oldv); -} - -void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, - uint64_t new_hi, uint64_t new_lo) -{ - Int128 oldv, cmpv, newv; - uintptr_t ra = GETPC(); - int mem_idx; - TCGMemOpIdx oi; - - assert(HAVE_CMPXCHG128); - - mem_idx = cpu_mmu_index(env, false); - oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - - cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); - newv = int128_make128(new_lo, new_hi); - oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); - - env->xregs[rs + 1] = int128_getlo(oldv); - env->xregs[rs] = int128_gethi(oldv); -} - -/* - * AdvSIMD half-precision - */ - -#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) - -#define ADVSIMD_HALFOP(name) \ -uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \ -{ \ - float_status *fpst = fpstp; \ - return float16_ ## name(a, b, fpst); \ -} - -ADVSIMD_HALFOP(add) -ADVSIMD_HALFOP(sub) -ADVSIMD_HALFOP(mul) -ADVSIMD_HALFOP(div) -ADVSIMD_HALFOP(min) -ADVSIMD_HALFOP(max) -ADVSIMD_HALFOP(minnum) -ADVSIMD_HALFOP(maxnum) - -#define ADVSIMD_TWOHALFOP(name) \ -uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \ -{ \ - float16 a1, a2, b1, b2; \ - uint32_t r1, r2; \ - float_status *fpst = fpstp; \ - a1 = extract32(two_a, 0, 16); \ - a2 = extract32(two_a, 16, 16); \ - b1 = extract32(two_b, 0, 16); \ - b2 = extract32(two_b, 16, 16); \ - r1 = float16_ ## name(a1, b1, fpst); \ - r2 = float16_ ## name(a2, b2, fpst); \ - return deposit32(r1, 16, 16, r2); \ -} - -ADVSIMD_TWOHALFOP(add) -ADVSIMD_TWOHALFOP(sub) -ADVSIMD_TWOHALFOP(mul) -ADVSIMD_TWOHALFOP(div) -ADVSIMD_TWOHALFOP(min) -ADVSIMD_TWOHALFOP(max) -ADVSIMD_TWOHALFOP(minnum) -ADVSIMD_TWOHALFOP(maxnum) - -/* Data processing - scalar floating-point and advanced SIMD */ -static float16 float16_mulx(float16 a, float16 b, void *fpstp) -{ - float_status *fpst = fpstp; - - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - if ((float16_is_zero(a) && float16_is_infinity(b)) || - (float16_is_infinity(a) && float16_is_zero(b))) { - /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ - return make_float16((1U << 14) | - ((float16_val(a) ^ float16_val(b)) & (1U << 15))); - } - return float16_mul(a, b, fpst); -} - -ADVSIMD_HALFOP(mulx) -ADVSIMD_TWOHALFOP(mulx) - -/* fused multiply-accumulate */ -uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, - void *fpstp) -{ - float_status *fpst = fpstp; - return float16_muladd(a, b, c, 0, fpst); -} - -uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, - uint32_t two_c, void *fpstp) -{ - float_status *fpst = fpstp; - float16 a1, a2, b1, b2, c1, c2; - uint32_t r1, r2; - a1 = extract32(two_a, 0, 16); - a2 = extract32(two_a, 16, 16); - b1 = extract32(two_b, 0, 16); - b2 = extract32(two_b, 16, 16); - c1 = extract32(two_c, 0, 16); - c2 = extract32(two_c, 16, 16); - r1 = float16_muladd(a1, b1, c1, 0, fpst); - r2 = float16_muladd(a2, b2, c2, 0, fpst); - return deposit32(r1, 16, 16, r2); -} - -/* - * Floating point comparisons produce an integer result. Softfloat - * routines return float_relation types which we convert to the 0/-1 - * Neon requires. - */ - -#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 - -uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - int compare = float16_compare_quiet(a, b, fpst); - return ADVSIMD_CMPRES(compare == float_relation_equal); -} - -uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - int compare = float16_compare(a, b, fpst); - return ADVSIMD_CMPRES(compare == float_relation_greater || - compare == float_relation_equal); -} - -uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - int compare = float16_compare(a, b, fpst); - return ADVSIMD_CMPRES(compare == float_relation_greater); -} - -uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - float16 f0 = float16_abs(a); - float16 f1 = float16_abs(b); - int compare = float16_compare(f0, f1, fpst); - return ADVSIMD_CMPRES(compare == float_relation_greater || - compare == float_relation_equal); -} - -uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) -{ - float_status *fpst = fpstp; - float16 f0 = float16_abs(a); - float16 f1 = float16_abs(b); - int compare = float16_compare(f0, f1, fpst); - return ADVSIMD_CMPRES(compare == float_relation_greater); -} - -/* round to integral */ -uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status) -{ - return float16_round_to_int(x, fp_status); -} - -uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) -{ - int old_flags = get_float_exception_flags(fp_status), new_flags; - float16 ret; - - ret = float16_round_to_int(x, fp_status); - - /* Suppress any inexact exceptions the conversion produced */ - if (!(old_flags & float_flag_inexact)) { - new_flags = get_float_exception_flags(fp_status); - set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); - } - - return ret; -} - -/* - * Half-precision floating point conversion functions - * - * There are a multitude of conversion functions with various - * different rounding modes. This is dealt with by the calling code - * setting the mode appropriately before calling the helper. - */ - -uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp) -{ - float_status *fpst = fpstp; - - /* Invalid if we are passed a NaN */ - if (float16_is_any_nan(a)) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_int16(a, fpst); -} - -uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp) -{ - float_status *fpst = fpstp; - - /* Invalid if we are passed a NaN */ - if (float16_is_any_nan(a)) { - float_raise(float_flag_invalid, fpst); - return 0; - } - return float16_to_uint16(a, fpst); -} - -static int el_from_spsr(uint32_t spsr) -{ - /* Return the exception level that this SPSR is requesting a return to, - * or -1 if it is invalid (an illegal return) - */ - if (spsr & PSTATE_nRW) { - switch (spsr & CPSR_M) { - case ARM_CPU_MODE_USR: - return 0; - case ARM_CPU_MODE_HYP: - return 2; - case ARM_CPU_MODE_FIQ: - case ARM_CPU_MODE_IRQ: - case ARM_CPU_MODE_SVC: - case ARM_CPU_MODE_ABT: - case ARM_CPU_MODE_UND: - case ARM_CPU_MODE_SYS: - return 1; - case ARM_CPU_MODE_MON: - /* Returning to Mon from AArch64 is never possible, - * so this is an illegal return. - */ - default: - return -1; - } - } else { - if (extract32(spsr, 1, 1)) { - /* Return with reserved M[1] bit set */ - return -1; - } - if (extract32(spsr, 0, 4) == 1) { - /* return to EL0 with M[0] bit set */ - return -1; - } - return extract32(spsr, 2, 2); - } -} - -static void cpsr_write_from_spsr_elx(CPUARMState *env, - uint32_t val) -{ - uint32_t mask; - - /* Save SPSR_ELx.SS into PSTATE. */ - env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); - val &= ~PSTATE_SS; - - /* Move DIT to the correct location for CPSR */ - if (val & PSTATE_DIT) { - val &= ~PSTATE_DIT; - val |= CPSR_DIT; - } - - mask = aarch32_cpsr_valid_mask(env->features, \ - &env_archcpu(env)->isar); - cpsr_write(env, val, mask, CPSRWriteRaw); -} - -void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) -{ - int cur_el = arm_current_el(env); - unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); - uint32_t spsr = env->banked_spsr[spsr_idx]; - int new_el; - bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; - - aarch64_save_sp(env, cur_el); - - arm_clear_exclusive(env); - - /* We must squash the PSTATE.SS bit to zero unless both of the - * following hold: - * 1. debug exceptions are currently disabled - * 2. singlestep will be active in the EL we return to - * We check 1 here and 2 after we've done the pstate/cpsr write() to - * transition to the EL we're going to. - */ - if (arm_generate_debug_exceptions(env)) { - spsr &= ~PSTATE_SS; - } - - new_el = el_from_spsr(spsr); - if (new_el == -1) { - goto illegal_return; - } - if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { - /* Disallow return to an EL which is unimplemented or higher - * than the current one. - */ - goto illegal_return; - } - - if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { - /* Return to an EL which is configured for a different register width */ - goto illegal_return; - } - - if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { - goto illegal_return; - } - - qemu_mutex_lock_iothread(); - arm_call_pre_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); - - if (!return_to_aa64) { - env->aarch64 = 0; - /* We do a raw CPSR write because aarch64_sync_64_to_32() - * will sort the register banks out for us, and we've already - * caught all the bad-mode cases in el_from_spsr(). - */ - cpsr_write_from_spsr_elx(env, spsr); - if (!arm_singlestep_active(env)) { - env->pstate &= ~PSTATE_SS; - } - aarch64_sync_64_to_32(env); - - if (spsr & CPSR_T) { - env->regs[15] = new_pc & ~0x1; - } else { - env->regs[15] = new_pc & ~0x3; - } - helper_rebuild_hflags_a32(env, new_el); - qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " - "AArch32 EL%d PC 0x%" PRIx32 "\n", - cur_el, new_el, env->regs[15]); - } else { - int tbii; - - env->aarch64 = 1; - spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar); - pstate_write(env, spsr); - if (!arm_singlestep_active(env)) { - env->pstate &= ~PSTATE_SS; - } - aarch64_restore_sp(env, new_el); - helper_rebuild_hflags_a64(env, new_el); - - /* - * Apply TBI to the exception return address. We had to delay this - * until after we selected the new EL, so that we could select the - * correct TBI+TBID bits. This is made easier by waiting until after - * the hflags rebuild, since we can pull the composite TBII field - * from there. - */ - tbii = EX_TBFLAG_A64(env->hflags, TBII); - if ((tbii >> extract64(new_pc, 55, 1)) & 1) { - /* TBI is enabled. */ - int core_mmu_idx = cpu_mmu_index(env, false); - if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { - new_pc = sextract64(new_pc, 0, 56); - } else { - new_pc = extract64(new_pc, 0, 56); - } - } - env->pc = new_pc; - - qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " - "AArch64 EL%d PC 0x%" PRIx64 "\n", - cur_el, new_el, env->pc); - } - - /* - * Note that cur_el can never be 0. If new_el is 0, then - * el0_a64 is return_to_aa64, else el0_a64 is ignored. - */ - aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); - - qemu_mutex_lock_iothread(); - arm_call_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); - - return; - -illegal_return: - /* Illegal return events of various kinds have architecturally - * mandated behaviour: - * restore NZCV and DAIF from SPSR_ELx - * set PSTATE.IL - * restore PC from ELR_ELx - * no change to exception level, execution state or stack pointer - */ - env->pstate |= PSTATE_IL; - env->pc = new_pc; - spsr &= PSTATE_NZCV | PSTATE_DAIF; - spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF); - pstate_write(env, spsr); - if (!arm_singlestep_active(env)) { - env->pstate &= ~PSTATE_SS; - } - helper_rebuild_hflags_a64(env, cur_el); - qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " - "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); -} - -/* - * Square Root and Reciprocal square root - */ - -uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) -{ - float_status *s = fpstp; - - return float16_sqrt(a, s); -} - -void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) -{ - /* - * Implement DC ZVA, which zeroes a fixed-length block of memory. - * Note that we do not implement the (architecturally mandated) - * alignment fault for attempts to use this on Device memory - * (which matches the usual QEMU behaviour of not implementing either - * alignment faults or any memory attribute handling). - */ - int blocklen = 4 << env_archcpu(env)->dcz_blocksize; - uint64_t vaddr = vaddr_in & ~(blocklen - 1); - int mmu_idx = cpu_mmu_index(env, false); - void *mem; - - /* - * Trapless lookup. In addition to actual invalid page, may - * return NULL for I/O, watchpoints, clean pages, etc. - */ - mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); - -#ifndef CONFIG_USER_ONLY - if (unlikely(!mem)) { - uintptr_t ra = GETPC(); - - /* - * Trap if accessing an invalid page. DC_ZVA requires that we supply - * the original pointer for an invalid page. But watchpoints require - * that we probe the actual space. So do both. - */ - (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); - mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); - - if (unlikely(!mem)) { - /* - * The only remaining reason for mem == NULL is I/O. - * Just do a series of byte writes as the architecture demands. - */ - for (int i = 0; i < blocklen; i++) { - cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); - } - return; - } - } -#endif - - memset(mem, 0, blocklen); -} diff --git a/target/arm/helper.c b/target/arm/helper.c index 6274221447..a620481d7c 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -7,147 +7,34 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" -#include "target/arm/idau.h" +#include "qemu/log.h" #include "trace.h" #include "cpu.h" #include "internals.h" -#include "exec/gdbstub.h" +#include "cpu-features.h" #include "exec/helper-proto.h" -#include "qemu/host-utils.h" #include "qemu/main-loop.h" +#include "qemu/timer.h" #include "qemu/bitops.h" #include "qemu/crc32c.h" #include "qemu/qemu-print.h" #include "exec/exec-all.h" #include <zlib.h> /* For crc32 */ #include "hw/irq.h" -#include "semihosting/semihost.h" -#include "sysemu/cpus.h" #include "sysemu/cpu-timers.h" #include "sysemu/kvm.h" #include "sysemu/tcg.h" -#include "qemu/range.h" -#include "qapi/qapi-commands-machine-target.h" #include "qapi/error.h" #include "qemu/guest-random.h" #ifdef CONFIG_TCG -#include "arm_ldst.h" -#include "exec/cpu_ldst.h" #include "semihosting/common-semi.h" #endif +#include "cpregs.h" +#include "target/arm/gtimer.h" #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ -#define PMCR_NUM_COUNTERS 4 /* QEMU IMPDEF choice */ - -#ifndef CONFIG_USER_ONLY - -static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - bool s1_is_el0, - hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, - target_ulong *page_size_ptr, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) - __attribute__((nonnull)); -#endif static void switch_mode(CPUARMState *env, int mode); -static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx); - -static int vfp_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg) -{ - ARMCPU *cpu = env_archcpu(env); - int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16; - - /* VFP data registers are always little-endian. */ - if (reg < nregs) { - return gdb_get_reg64(buf, *aa32_vfp_dreg(env, reg)); - } - if (arm_feature(env, ARM_FEATURE_NEON)) { - /* Aliases for Q regs. */ - nregs += 16; - if (reg < nregs) { - uint64_t *q = aa32_vfp_qreg(env, reg - 32); - return gdb_get_reg128(buf, q[0], q[1]); - } - } - switch (reg - nregs) { - case 0: return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPSID]); break; - case 1: return gdb_get_reg32(buf, vfp_get_fpscr(env)); break; - case 2: return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPEXC]); break; - } - return 0; -} - -static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) -{ - ARMCPU *cpu = env_archcpu(env); - int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16; - - if (reg < nregs) { - *aa32_vfp_dreg(env, reg) = ldq_le_p(buf); - return 8; - } - if (arm_feature(env, ARM_FEATURE_NEON)) { - nregs += 16; - if (reg < nregs) { - uint64_t *q = aa32_vfp_qreg(env, reg - 32); - q[0] = ldq_le_p(buf); - q[1] = ldq_le_p(buf + 8); - return 16; - } - } - switch (reg - nregs) { - case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4; - case 1: vfp_set_fpscr(env, ldl_p(buf)); return 4; - case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4; - } - return 0; -} - -static int aarch64_fpu_gdb_get_reg(CPUARMState *env, GByteArray *buf, int reg) -{ - switch (reg) { - case 0 ... 31: - { - /* 128 bit FP register - quads are in LE order */ - uint64_t *q = aa64_vfp_qreg(env, reg); - return gdb_get_reg128(buf, q[1], q[0]); - } - case 32: - /* FPSR */ - return gdb_get_reg32(buf, vfp_get_fpsr(env)); - case 33: - /* FPCR */ - return gdb_get_reg32(buf,vfp_get_fpcr(env)); - default: - return 0; - } -} - -static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg) -{ - switch (reg) { - case 0 ... 31: - /* 128 bit FP register */ - { - uint64_t *q = aa64_vfp_qreg(env, reg); - q[0] = ldq_le_p(buf); - q[1] = ldq_le_p(buf + 8); - return 16; - } - case 32: - /* FPSR */ - vfp_set_fpsr(env, ldl_p(buf)); - return 4; - case 33: - /* FPCR */ - vfp_set_fpcr(env, ldl_p(buf)); - return 4; - default: - return 0; - } -} static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) { @@ -159,8 +46,7 @@ static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri) } } -static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { assert(ri->fieldoffset); if (cpreg_field_is_64bit(ri)) { @@ -192,7 +78,8 @@ uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri) static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t v) { - /* Raw write of a coprocessor register (as needed for migration, etc). + /* + * Raw write of a coprocessor register (as needed for migration, etc). * Note that constant registers are treated as write-ignored; the * caller should check for success by whether a readback gives the * value written. @@ -208,137 +95,10 @@ static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri, } } -/** - * arm_get/set_gdb_*: get/set a gdb register - * @env: the CPU state - * @buf: a buffer to copy to/from - * @reg: register number (offset from start of group) - * - * We return the number of bytes copied - */ - -static int arm_gdb_get_sysreg(CPUARMState *env, GByteArray *buf, int reg) -{ - ARMCPU *cpu = env_archcpu(env); - const ARMCPRegInfo *ri; - uint32_t key; - - key = cpu->dyn_sysreg_xml.data.cpregs.keys[reg]; - ri = get_arm_cp_reginfo(cpu->cp_regs, key); - if (ri) { - if (cpreg_field_is_64bit(ri)) { - return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri)); - } else { - return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri)); - } - } - return 0; -} - -static int arm_gdb_set_sysreg(CPUARMState *env, uint8_t *buf, int reg) -{ - return 0; -} - -#ifdef TARGET_AARCH64 -static int arm_gdb_get_svereg(CPUARMState *env, GByteArray *buf, int reg) -{ - ARMCPU *cpu = env_archcpu(env); - - switch (reg) { - /* The first 32 registers are the zregs */ - case 0 ... 31: - { - int vq, len = 0; - for (vq = 0; vq < cpu->sve_max_vq; vq++) { - len += gdb_get_reg128(buf, - env->vfp.zregs[reg].d[vq * 2 + 1], - env->vfp.zregs[reg].d[vq * 2]); - } - return len; - } - case 32: - return gdb_get_reg32(buf, vfp_get_fpsr(env)); - case 33: - return gdb_get_reg32(buf, vfp_get_fpcr(env)); - /* then 16 predicates and the ffr */ - case 34 ... 50: - { - int preg = reg - 34; - int vq, len = 0; - for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) { - len += gdb_get_reg64(buf, env->vfp.pregs[preg].p[vq / 4]); - } - return len; - } - case 51: - { - /* - * We report in Vector Granules (VG) which is 64bit in a Z reg - * while the ZCR works in Vector Quads (VQ) which is 128bit chunks. - */ - int vq = sve_zcr_len_for_el(env, arm_current_el(env)) + 1; - return gdb_get_reg64(buf, vq * 2); - } - default: - /* gdbstub asked for something out our range */ - qemu_log_mask(LOG_UNIMP, "%s: out of range register %d", __func__, reg); - break; - } - - return 0; -} - -static int arm_gdb_set_svereg(CPUARMState *env, uint8_t *buf, int reg) -{ - ARMCPU *cpu = env_archcpu(env); - - /* The first 32 registers are the zregs */ - switch (reg) { - /* The first 32 registers are the zregs */ - case 0 ... 31: - { - int vq, len = 0; - uint64_t *p = (uint64_t *) buf; - for (vq = 0; vq < cpu->sve_max_vq; vq++) { - env->vfp.zregs[reg].d[vq * 2 + 1] = *p++; - env->vfp.zregs[reg].d[vq * 2] = *p++; - len += 16; - } - return len; - } - case 32: - vfp_set_fpsr(env, *(uint32_t *)buf); - return 4; - case 33: - vfp_set_fpcr(env, *(uint32_t *)buf); - return 4; - case 34 ... 50: - { - int preg = reg - 34; - int vq, len = 0; - uint64_t *p = (uint64_t *) buf; - for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) { - env->vfp.pregs[preg].p[vq / 4] = *p++; - len += 8; - } - return len; - } - case 51: - /* cannot set vg via gdbstub */ - return 0; - default: - /* gdbstub asked for something out our range */ - break; - } - - return 0; -} -#endif /* TARGET_AARCH64 */ - static bool raw_accessors_invalid(const ARMCPRegInfo *ri) { - /* Return true if the regdef would cause an assertion if you called + /* + * Return true if the regdef would cause an assertion if you called * read_raw_cp_reg() or write_raw_cp_reg() on it (ie if it is a * program bug for it not to have the NO_RAW flag). * NB that returning false here doesn't necessarily mean that calling @@ -421,7 +181,8 @@ bool write_list_to_cpustate(ARMCPU *cpu) if (ri->type & ARM_CP_NO_RAW) { continue; } - /* Write value and confirm it reads back as written + /* + * Write value and confirm it reads back as written * (to catch read-only registers and partially read-only * registers where the incoming migration value doesn't match) */ @@ -436,13 +197,10 @@ bool write_list_to_cpustate(ARMCPU *cpu) static void add_cpreg_to_list(gpointer key, gpointer opaque) { ARMCPU *cpu = opaque; - uint64_t regidx; - const ARMCPRegInfo *ri; - - regidx = *(uint32_t *)key; - ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); + uint32_t regidx = (uintptr_t)key; + const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); - if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) { + if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) { cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx); /* The value array need not be initialized at this point */ cpu->cpreg_array_len++; @@ -452,21 +210,19 @@ static void add_cpreg_to_list(gpointer key, gpointer opaque) static void count_cpreg(gpointer key, gpointer opaque) { ARMCPU *cpu = opaque; - uint64_t regidx; const ARMCPRegInfo *ri; - regidx = *(uint32_t *)key; - ri = get_arm_cp_reginfo(cpu->cp_regs, regidx); + ri = g_hash_table_lookup(cpu->cp_regs, key); - if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) { + if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) { cpu->cpreg_array_len++; } } static gint cpreg_key_compare(gconstpointer a, gconstpointer b) { - uint64_t aidx = cpreg_to_kvm_id(*(uint32_t *)a); - uint64_t bidx = cpreg_to_kvm_id(*(uint32_t *)b); + uint64_t aidx = cpreg_to_kvm_id((uintptr_t)a); + uint64_t bidx = cpreg_to_kvm_id((uintptr_t)b); if (aidx > bidx) { return 1; @@ -479,7 +235,8 @@ static gint cpreg_key_compare(gconstpointer a, gconstpointer b) void init_cpreg_list(ARMCPU *cpu) { - /* Initialise the cpreg_tuples[] array based on the cp_regs hash. + /* + * Initialise the cpreg_tuples[] array based on the cp_regs hash. * Note that we require cpreg_tuples[] to be sorted by key ID. */ GList *keys; @@ -507,6 +264,18 @@ void init_cpreg_list(ARMCPU *cpu) g_list_free(keys); } +static bool arm_pan_enabled(CPUARMState *env) +{ + if (is_a64(env)) { + if ((arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1)) { + return false; + } + return env->pstate & PSTATE_PAN; + } else { + return env->uncached_cpsr & CPSR_PAN; + } +} + /* * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0. */ @@ -521,7 +290,8 @@ static CPAccessResult access_el3_aa32ns(CPUARMState *env, return CP_ACCESS_OK; } -/* Some secure-only AArch32 registers trap to EL3 if used from +/* + * Some secure-only AArch32 registers trap to EL3 if used from * Secure EL1 (but are just ordinary UNDEF in other non-EL3 contexts). * Note that an access from Secure EL1 can only happen if EL3 is AArch64. * We assume that the .access field is set to PL1_RW. @@ -543,72 +313,8 @@ static CPAccessResult access_trap_aa32s_el1(CPUARMState *env, return CP_ACCESS_TRAP_UNCATEGORIZED; } -static uint64_t arm_mdcr_el2_eff(CPUARMState *env) -{ - return arm_is_el2_enabled(env) ? env->cp15.mdcr_el2 : 0; -} - -/* Check for traps to "powerdown debug" registers, which are controlled - * by MDCR.TDOSA - */ -static CPAccessResult access_tdosa(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - int el = arm_current_el(env); - uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); - bool mdcr_el2_tdosa = (mdcr_el2 & MDCR_TDOSA) || (mdcr_el2 & MDCR_TDE) || - (arm_hcr_el2_eff(env) & HCR_TGE); - - if (el < 2 && mdcr_el2_tdosa) { - return CP_ACCESS_TRAP_EL2; - } - if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDOSA)) { - return CP_ACCESS_TRAP_EL3; - } - return CP_ACCESS_OK; -} - -/* Check for traps to "debug ROM" registers, which are controlled - * by MDCR_EL2.TDRA for EL2 but by the more general MDCR_EL3.TDA for EL3. - */ -static CPAccessResult access_tdra(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - int el = arm_current_el(env); - uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); - bool mdcr_el2_tdra = (mdcr_el2 & MDCR_TDRA) || (mdcr_el2 & MDCR_TDE) || - (arm_hcr_el2_eff(env) & HCR_TGE); - - if (el < 2 && mdcr_el2_tdra) { - return CP_ACCESS_TRAP_EL2; - } - if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) { - return CP_ACCESS_TRAP_EL3; - } - return CP_ACCESS_OK; -} - -/* Check for traps to general debug registers, which are controlled - * by MDCR_EL2.TDA for EL2 and MDCR_EL3.TDA for EL3. - */ -static CPAccessResult access_tda(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) -{ - int el = arm_current_el(env); - uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); - bool mdcr_el2_tda = (mdcr_el2 & MDCR_TDA) || (mdcr_el2 & MDCR_TDE) || - (arm_hcr_el2_eff(env) & HCR_TGE); - - if (el < 2 && mdcr_el2_tda) { - return CP_ACCESS_TRAP_EL2; - } - if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) { - return CP_ACCESS_TRAP_EL3; - } - return CP_ACCESS_OK; -} - -/* Check for traps to performance monitor registers, which are controlled +/* + * Check for traps to performance monitor registers, which are controlled * by MDCR_EL2.TPM for EL2 and MDCR_EL3.TPM for EL3. */ static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri, @@ -627,8 +333,8 @@ static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri, } /* Check for traps from EL1 due to HCR_EL2.TVM and HCR_EL2.TRVM. */ -static CPAccessResult access_tvm_trvm(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) +CPAccessResult access_tvm_trvm(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) { if (arm_current_el(env) == 1) { uint64_t trap = isread ? HCR_TRVM : HCR_TVM; @@ -669,6 +375,30 @@ static CPAccessResult access_ttlb(CPUARMState *env, const ARMCPRegInfo *ri, return CP_ACCESS_OK; } +/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBIS. */ +static CPAccessResult access_ttlbis(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && + (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBIS))) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_OK; +} + +#ifdef TARGET_AARCH64 +/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBOS. */ +static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && + (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBOS))) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_OK; +} +#endif + static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = env_archcpu(env); @@ -682,7 +412,8 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) ARMCPU *cpu = env_archcpu(env); if (raw_read(env, ri) != value) { - /* Unlike real hardware the qemu TLB uses virtual addresses, + /* + * Unlike real hardware the qemu TLB uses virtual addresses, * not modified virtual addresses, so this causes a TLB flush. */ tlb_flush(CPU(cpu)); @@ -697,7 +428,8 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_PMSA) && !extended_addresses_enabled(env)) { - /* For VMSA (when not using the LPAE long descriptor page table + /* + * For VMSA (when not using the LPAE long descriptor page table * format) this register includes the ASID, so do a TLB flush. * For PMSA it is purely a process ID and no action is needed. */ @@ -706,6 +438,21 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); } +static int alle1_tlbmask(CPUARMState *env) +{ + /* + * Note that the 'ALL' scope must invalidate both stage 1 and + * stage 2 translations, whereas most other scopes only invalidate + * stage 1 translations. + */ + return (ARMMMUIdxBit_E10_1 | + ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_Stage2 | + ARMMMUIdxBit_Stage2_S); +} + + /* IS variants of TLB operations must affect all cores */ static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -808,10 +555,7 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx(cs, - ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0); + tlb_flush_by_mmuidx(cs, alle1_tlbmask(env)); } static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -819,10 +563,7 @@ static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx_all_cpus_synced(cs, - ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0); + tlb_flush_by_mmuidx_all_cpus_synced(cs, alle1_tlbmask(env)); } @@ -861,8 +602,27 @@ static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMMMUIdxBit_E2); } +static void tlbiipas2_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12; + + tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2); +} + +static void tlbiipas2is_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12; + + tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, ARMMMUIdxBit_Stage2); +} + static const ARMCPRegInfo cp_reginfo[] = { - /* Define the secure and non-secure FCSE identifier CP registers + /* + * Define the secure and non-secure FCSE identifier CP registers * separately because there is no secure bank in V8 (no _EL3). This allows * the secure register to be properly reset and migrated. There is also no * v8 EL1 version of the register so the non-secure instance stands alone. @@ -877,7 +637,8 @@ static const ARMCPRegInfo cp_reginfo[] = { .access = PL1_RW, .secure = ARM_CP_SECSTATE_S, .fieldoffset = offsetof(CPUARMState, cp15.fcseidr_s), .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, }, - /* Define the secure and non-secure context identifier CP registers + /* + * Define the secure and non-secure context identifier CP registers * separately because there is no secure bank in V8 (no _EL3). This allows * the secure register to be properly reset and migrated. In the * non-secure case, the 32-bit register will have reset and migration @@ -886,6 +647,8 @@ static const ARMCPRegInfo cp_reginfo[] = { { .name = "CONTEXTIDR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_CONTEXTIDR_EL1, + .nv2_redirect_offset = 0x108 | NV2_REDIR_NV1, .secure = ARM_CP_SECSTATE_NS, .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]), .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, }, @@ -895,11 +658,11 @@ static const ARMCPRegInfo cp_reginfo[] = { .secure = ARM_CP_SECSTATE_S, .fieldoffset = offsetof(CPUARMState, cp15.contextidr_s), .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, }, - REGINFO_SENTINEL }; static const ARMCPRegInfo not_v8_cp_reginfo[] = { - /* NB: Some of these registers exist in v8 but with more precise + /* + * NB: Some of these registers exist in v8 but with more precise * definitions that don't use CP_ANY wildcards (mostly in v8_cp_reginfo[]). */ /* MMU Domain access control / MPU write buffer control */ @@ -909,7 +672,8 @@ static const ARMCPRegInfo not_v8_cp_reginfo[] = { .writefn = dacr_write, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s), offsetoflow32(CPUARMState, cp15.dacr_ns) } }, - /* ARMv7 allocates a range of implementation defined TLB LOCKDOWN regs. + /* + * ARMv7 allocates a range of implementation defined TLB LOCKDOWN regs. * For v6 and v5, these mappings are overly broad. */ { .name = "TLB_LOCKDOWN", .cp = 15, .crn = 10, .crm = 0, @@ -924,25 +688,26 @@ static const ARMCPRegInfo not_v8_cp_reginfo[] = { { .name = "CACHEMAINT", .cp = 15, .crn = 7, .crm = CP_ANY, .opc1 = 0, .opc2 = CP_ANY, .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_OVERRIDE }, - REGINFO_SENTINEL }; static const ARMCPRegInfo not_v6_cp_reginfo[] = { - /* Not all pre-v6 cores implemented this WFI, so this is slightly + /* + * Not all pre-v6 cores implemented this WFI, so this is slightly * over-broad. */ { .name = "WFI_v5", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = 2, .access = PL1_W, .type = ARM_CP_WFI }, - REGINFO_SENTINEL }; static const ARMCPRegInfo not_v7_cp_reginfo[] = { - /* Standard v6 WFI (also used in some pre-v6 cores); not in v7 (which + /* + * Standard v6 WFI (also used in some pre-v6 cores); not in v7 (which * is UNPREDICTABLE; we choose to NOP as most implementations do). */ { .name = "WFI_v6", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4, .access = PL1_W, .type = ARM_CP_WFI }, - /* L1 cache lockdown. Not architectural in v6 and earlier but in practice + /* + * L1 cache lockdown. Not architectural in v6 and earlier but in practice * implemented in 926, 946, 1026, 1136, 1176 and 11MPCore. StrongARM and * OMAPCP will override this space. */ @@ -956,14 +721,16 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = { { .name = "DUMMY", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = CP_ANY, .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = 0 }, - /* We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR; + /* + * We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR; * implementing it as RAZ means the "debug architecture version" bits * will read as a reserved value, which should cause Linux to not try * to use the debug hardware. */ { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* MMU TLB control. Note that the wildcarding means we cover not just + /* + * MMU TLB control. Note that the wildcarding means we cover not just * the unified TLB ops but also the dside/iside/inner-shareable variants. */ { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY, @@ -982,7 +749,6 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = { .opc1 = 0, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_NOP }, { .name = "NMRR", .cp = 15, .crn = 10, .crm = 2, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_NOP }, - REGINFO_SENTINEL }; static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -992,25 +758,30 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* In ARMv8 most bits of CPACR_EL1 are RES0. */ if (!arm_feature(env, ARM_FEATURE_V8)) { - /* ARMv7 defines bits for unimplemented coprocessors as RAZ/WI. + /* + * ARMv7 defines bits for unimplemented coprocessors as RAZ/WI. * ASEDIS [31] and D32DIS [30] are both UNK/SBZP without VFP. * TRCDIS [28] is RAZ/WI since we do not implement a trace macrocell. */ if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) { /* VFP coprocessor: cp10 & cp11 [23:20] */ - mask |= (1 << 31) | (1 << 30) | (0xf << 20); + mask |= R_CPACR_ASEDIS_MASK | + R_CPACR_D32DIS_MASK | + R_CPACR_CP11_MASK | + R_CPACR_CP10_MASK; if (!arm_feature(env, ARM_FEATURE_NEON)) { /* ASEDIS [31] bit is RAO/WI */ - value |= (1 << 31); + value |= R_CPACR_ASEDIS_MASK; } - /* VFPv3 and upwards with NEON implement 32 double precision + /* + * VFPv3 and upwards with NEON implement 32 double precision * registers (D0-D31). */ if (!cpu_isar_feature(aa32_simd_r32, env_archcpu(env))) { /* D32DIS [30] is RAO/WI if D16-31 are not implemented. */ - value |= (1 << 30); + value |= R_CPACR_D32DIS_MASK; } } value &= mask; @@ -1022,8 +793,8 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri, */ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) && !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) { - value &= ~(0xf << 20); - value |= env->cp15.cpacr_el1 & (0xf << 20); + mask = R_CPACR_CP11_MASK | R_CPACR_CP10_MASK; + value = (value & ~mask) | (env->cp15.cpacr_el1 & mask); } env->cp15.cpacr_el1 = value; @@ -1039,7 +810,7 @@ static uint64_t cpacr_read(CPUARMState *env, const ARMCPRegInfo *ri) if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) && !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) { - value &= ~(0xf << 20); + value = ~(R_CPACR_CP11_MASK | R_CPACR_CP10_MASK); } return value; } @@ -1047,7 +818,8 @@ static uint64_t cpacr_read(CPUARMState *env, const ARMCPRegInfo *ri) static void cpacr_reset(CPUARMState *env, const ARMCPRegInfo *ri) { - /* Call cpacr_write() so that we reset with the correct RAO bits set + /* + * Call cpacr_write() so that we reset with the correct RAO bits set * for our CPU features. */ cpacr_write(env, ri, 0); @@ -1059,11 +831,11 @@ static CPAccessResult cpacr_access(CPUARMState *env, const ARMCPRegInfo *ri, if (arm_feature(env, ARM_FEATURE_V8)) { /* Check if CPACR accesses are to be trapped to EL2 */ if (arm_current_el(env) == 1 && arm_is_el2_enabled(env) && - (env->cp15.cptr_el[2] & CPTR_TCPAC)) { + FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TCPAC)) { return CP_ACCESS_TRAP_EL2; /* Check if CPACR accesses are to be trapped to EL3 */ } else if (arm_current_el(env) < 3 && - (env->cp15.cptr_el[3] & CPTR_TCPAC)) { + FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TCPAC)) { return CP_ACCESS_TRAP_EL3; } } @@ -1075,7 +847,8 @@ static CPAccessResult cptr_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { /* Check if CPTR accesses are set to trap to EL3 */ - if (arm_current_el(env) == 2 && (env->cp15.cptr_el[3] & CPTR_TCPAC)) { + if (arm_current_el(env) == 2 && + FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TCPAC)) { return CP_ACCESS_TRAP_EL3; } @@ -1087,7 +860,8 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { { .name = "MVA_prefetch", .cp = 15, .crn = 7, .crm = 13, .opc1 = 0, .opc2 = 1, .access = PL1_W, .type = ARM_CP_NOP }, - /* We need to break the TB after ISB to execute self-modifying code + /* + * We need to break the TB after ISB to execute self-modifying code * correctly and also to take any pending interrupts immediately. * So use arm_cp_write_ignore() function instead of ARM_CP_NOP flag. */ @@ -1102,16 +876,18 @@ static const ARMCPRegInfo v6_cp_reginfo[] = { .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ifar_s), offsetof(CPUARMState, cp15.ifar_ns) }, .resetvalue = 0, }, - /* Watchpoint Fault Address Register : should actually only be present + /* + * Watchpoint Fault Address Register : should actually only be present * for 1136, 1176, 11MPCore. */ { .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, }, { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access, + .fgt = FGT_CPACR_EL1, + .nv2_redirect_offset = 0x100 | NV2_REDIR_NV1, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1), .resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read }, - REGINFO_SENTINEL }; typedef struct pm_event { @@ -1173,30 +949,33 @@ static int64_t cycles_ns_per(uint64_t cycles) static bool instructions_supported(CPUARMState *env) { - return icount_enabled() == 1; /* Precise instruction counting */ + /* Precise instruction counting */ + return icount_enabled() == ICOUNT_PRECISE; } static uint64_t instructions_get_count(CPUARMState *env) { + assert(icount_enabled() == ICOUNT_PRECISE); return (uint64_t)icount_get_raw(); } static int64_t instructions_ns_per(uint64_t icount) { + assert(icount_enabled() == ICOUNT_PRECISE); return icount_to_ns((int64_t)icount); } #endif -static bool pmu_8_1_events_supported(CPUARMState *env) +static bool pmuv3p1_events_supported(CPUARMState *env) { /* For events which are supported in any v8.1 PMU */ - return cpu_isar_feature(any_pmu_8_1, env_archcpu(env)); + return cpu_isar_feature(any_pmuv3p1, env_archcpu(env)); } -static bool pmu_8_4_events_supported(CPUARMState *env) +static bool pmuv3p4_events_supported(CPUARMState *env) { /* For events which are supported in any v8.1 PMU */ - return cpu_isar_feature(any_pmu_8_4, env_archcpu(env)); + return cpu_isar_feature(any_pmuv3p4, env_archcpu(env)); } static uint64_t zero_event_get_count(CPUARMState *env) @@ -1230,17 +1009,17 @@ static const pm_event pm_events[] = { }, #endif { .number = 0x023, /* STALL_FRONTEND */ - .supported = pmu_8_1_events_supported, + .supported = pmuv3p1_events_supported, .get_count = zero_event_get_count, .ns_per_count = zero_event_ns_per, }, { .number = 0x024, /* STALL_BACKEND */ - .supported = pmu_8_1_events_supported, + .supported = pmuv3p1_events_supported, .get_count = zero_event_get_count, .ns_per_count = zero_event_ns_per, }, { .number = 0x03c, /* STALL */ - .supported = pmu_8_4_events_supported, + .supported = pmuv3p4_events_supported, .get_count = zero_event_get_count, .ns_per_count = zero_event_ns_per, }, @@ -1308,7 +1087,8 @@ static bool event_supported(uint16_t number) static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - /* Performance monitor registers user accessibility is controlled + /* + * Performance monitor registers user accessibility is controlled * by PMUSERENR. MDCR_EL2.TPM and MDCR_EL3.TPM allow configurable * trapping to EL2 or EL3 for other accesses. */ @@ -1387,23 +1167,41 @@ static CPAccessResult pmreg_access_ccntr(CPUARMState *env, return pmreg_access(env, ri, isread); } -/* Returns true if the counter (pass 31 for PMCCNTR) should count events using +/* + * Bits in MDCR_EL2 and MDCR_EL3 which pmu_counter_enabled() looks at. + * We use these to decide whether we need to wrap a write to MDCR_EL2 + * or MDCR_EL3 in pmu_op_start()/pmu_op_finish() calls. + */ +#define MDCR_EL2_PMU_ENABLE_BITS \ + (MDCR_HPME | MDCR_HPMD | MDCR_HPMN | MDCR_HCCD | MDCR_HLP) +#define MDCR_EL3_PMU_ENABLE_BITS (MDCR_SPME | MDCR_SCCD) + +/* + * Returns true if the counter (pass 31 for PMCCNTR) should count events using * the current EL, security state, and register configuration. */ static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter) { uint64_t filter; bool e, p, u, nsk, nsu, nsh, m; - bool enabled, prohibited, filtered; + bool enabled, prohibited = false, filtered; bool secure = arm_is_secure(env); int el = arm_current_el(env); - uint64_t mdcr_el2 = arm_mdcr_el2_eff(env); - uint8_t hpmn = mdcr_el2 & MDCR_HPMN; + uint64_t mdcr_el2; + uint8_t hpmn; + /* + * We might be called for M-profile cores where MDCR_EL2 doesn't + * exist and arm_mdcr_el2_eff() will assert, so this early-exit check + * must be before we read that value. + */ if (!arm_feature(env, ARM_FEATURE_PMU)) { return false; } + mdcr_el2 = arm_mdcr_el2_eff(env); + hpmn = mdcr_el2 & MDCR_HPMN; + if (!arm_feature(env, ARM_FEATURE_EL2) || (counter < hpmn || counter == 31)) { e = env->cp15.c9_pmcr & PMCRE; @@ -1412,19 +1210,29 @@ static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter) } enabled = e && (env->cp15.c9_pmcnten & (1 << counter)); - if (!secure) { - if (el == 2 && (counter < hpmn || counter == 31)) { - prohibited = mdcr_el2 & MDCR_HPMD; - } else { - prohibited = false; - } - } else { - prohibited = arm_feature(env, ARM_FEATURE_EL3) && - !(env->cp15.mdcr_el3 & MDCR_SPME); + /* Is event counting prohibited? */ + if (el == 2 && (counter < hpmn || counter == 31)) { + prohibited = mdcr_el2 & MDCR_HPMD; + } + if (secure) { + prohibited = prohibited || !(env->cp15.mdcr_el3 & MDCR_SPME); } - if (prohibited && counter == 31) { - prohibited = env->cp15.c9_pmcr & PMCRDP; + if (counter == 31) { + /* + * The cycle counter defaults to running. PMCR.DP says "disable + * the cycle counter when event counting is prohibited". + * Some MDCR bits disable the cycle counter specifically. + */ + prohibited = prohibited && env->cp15.c9_pmcr & PMCRDP; + if (cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + if (secure) { + prohibited = prohibited || (env->cp15.mdcr_el3 & MDCR_SCCD); + } + if (el == 2) { + prohibited = prohibited || (mdcr_el2 & MDCR_HCCD); + } + } } if (counter == 31) { @@ -1472,6 +1280,43 @@ static void pmu_update_irq(CPUARMState *env) (env->cp15.c9_pminten & env->cp15.c9_pmovsr)); } +static bool pmccntr_clockdiv_enabled(CPUARMState *env) +{ + /* + * Return true if the clock divider is enabled and the cycle counter + * is supposed to tick only once every 64 clock cycles. This is + * controlled by PMCR.D, but if PMCR.LC is set to enable the long + * (64-bit) cycle counter PMCR.D has no effect. + */ + return (env->cp15.c9_pmcr & (PMCRD | PMCRLC)) == PMCRD; +} + +static bool pmevcntr_is_64_bit(CPUARMState *env, int counter) +{ + /* Return true if the specified event counter is configured to be 64 bit */ + + /* This isn't intended to be used with the cycle counter */ + assert(counter < 31); + + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + return false; + } + + if (arm_feature(env, ARM_FEATURE_EL2)) { + /* + * MDCR_EL2.HLP still applies even when EL2 is disabled in the + * current security state, so we don't use arm_mdcr_el2_eff() here. + */ + bool hlp = env->cp15.mdcr_el2 & MDCR_HLP; + int hpmn = env->cp15.mdcr_el2 & MDCR_HPMN; + + if (counter >= hpmn) { + return hlp; + } + } + return env->cp15.c9_pmcr & PMCRLP; +} + /* * Ensure c15_ccnt is the guest-visible count so that operations such as * enabling/disabling the counter or filtering, modifying the count itself, @@ -1484,8 +1329,7 @@ static void pmccntr_op_start(CPUARMState *env) if (pmu_counter_enabled(env, 31)) { uint64_t eff_cycles = cycles; - if (env->cp15.c9_pmcr & PMCRD) { - /* Increment once every 64 processor clock cycles */ + if (pmccntr_clockdiv_enabled(env)) { eff_cycles /= 64; } @@ -1494,7 +1338,7 @@ static void pmccntr_op_start(CPUARMState *env) uint64_t overflow_mask = env->cp15.c9_pmcr & PMCRLC ? \ 1ull << 63 : 1ull << 31; if (env->cp15.c15_ccnt & ~new_pmccntr & overflow_mask) { - env->cp15.c9_pmovsr |= (1 << 31); + env->cp15.c9_pmovsr |= (1ULL << 31); pmu_update_irq(env); } @@ -1520,16 +1364,18 @@ static void pmccntr_op_finish(CPUARMState *env) int64_t overflow_in = cycles_ns_per(remaining_cycles); if (overflow_in > 0) { - int64_t overflow_at = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - overflow_in; - ARMCPU *cpu = env_archcpu(env); - timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); + int64_t overflow_at; + + if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + overflow_in, &overflow_at)) { + ARMCPU *cpu = env_archcpu(env); + timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); + } } #endif uint64_t prev_cycles = env->cp15.c15_ccnt_delta; - if (env->cp15.c9_pmcr & PMCRD) { - /* Increment once every 64 processor clock cycles */ + if (pmccntr_clockdiv_enabled(env)) { prev_cycles /= 64; } env->cp15.c15_ccnt_delta = prev_cycles - env->cp15.c15_ccnt; @@ -1547,9 +1393,11 @@ static void pmevcntr_op_start(CPUARMState *env, uint8_t counter) } if (pmu_counter_enabled(env, counter)) { - uint32_t new_pmevcntr = count - env->cp15.c14_pmevcntr_delta[counter]; + uint64_t new_pmevcntr = count - env->cp15.c14_pmevcntr_delta[counter]; + uint64_t overflow_mask = pmevcntr_is_64_bit(env, counter) ? + 1ULL << 63 : 1ULL << 31; - if (env->cp15.c14_pmevcntr[counter] & ~new_pmevcntr & INT32_MIN) { + if (env->cp15.c14_pmevcntr[counter] & ~new_pmevcntr & overflow_mask) { env->cp15.c9_pmovsr |= (1 << counter); pmu_update_irq(env); } @@ -1564,15 +1412,22 @@ static void pmevcntr_op_finish(CPUARMState *env, uint8_t counter) #ifndef CONFIG_USER_ONLY uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT; uint16_t event_idx = supported_event_map[event]; - uint64_t delta = UINT32_MAX - - (uint32_t)env->cp15.c14_pmevcntr[counter] + 1; - int64_t overflow_in = pm_events[event_idx].ns_per_count(delta); + uint64_t delta = -(env->cp15.c14_pmevcntr[counter] + 1); + int64_t overflow_in; + + if (!pmevcntr_is_64_bit(env, counter)) { + delta = (uint32_t)delta; + } + overflow_in = pm_events[event_idx].ns_per_count(delta); if (overflow_in > 0) { - int64_t overflow_at = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - overflow_in; - ARMCPU *cpu = env_archcpu(env); - timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); + int64_t overflow_at; + + if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + overflow_in, &overflow_at)) { + ARMCPU *cpu = env_archcpu(env); + timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at); + } } #endif @@ -1640,16 +1495,34 @@ static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri, } } - env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK; - env->cp15.c9_pmcr |= (value & PMCR_WRITEABLE_MASK); + env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; + env->cp15.c9_pmcr |= (value & PMCR_WRITABLE_MASK); pmu_op_finish(env); } +static uint64_t pmcr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint64_t pmcr = env->cp15.c9_pmcr; + + /* + * If EL2 is implemented and enabled for the current security state, reads + * of PMCR.N from EL1 or EL0 return the value of MDCR_EL2.HPMN or HDCR.HPMN. + */ + if (arm_current_el(env) <= 1 && arm_is_el2_enabled(env)) { + pmcr &= ~PMCRN_MASK; + pmcr |= (env->cp15.mdcr_el2 & MDCR_HPMN) << PMCRN_SHIFT; + } + + return pmcr; +} + static void pmswinc_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { unsigned int i; + uint64_t overflow_mask, new_pmswinc; + for (i = 0; i < pmu_num_counters(env); i++) { /* Increment a counter's count iff: */ if ((value & (1 << i)) && /* counter's bit is set */ @@ -1663,9 +1536,12 @@ static void pmswinc_write(CPUARMState *env, const ARMCPRegInfo *ri, * Detect if this write causes an overflow since we can't predict * PMSWINC overflows like we can for other events */ - uint32_t new_pmswinc = env->cp15.c14_pmevcntr[i] + 1; + new_pmswinc = env->cp15.c14_pmevcntr[i] + 1; - if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & INT32_MIN) { + overflow_mask = pmevcntr_is_64_bit(env, i) ? + 1ULL << 63 : 1ULL << 31; + + if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & overflow_mask) { env->cp15.c9_pmovsr |= (1 << i); pmu_update_irq(env); } @@ -1689,7 +1565,8 @@ static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri) static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* The value of PMSELR.SEL affects the behavior of PMXEVTYPER and + /* + * The value of PMSELR.SEL affects the behavior of PMXEVTYPER and * PMXEVCNTR. We allow [0..31] to be written to PMSELR here; in the * meanwhile, we check PMSELR.SEL when PMXEVTYPER and PMXEVCNTR are * accessed. @@ -1740,15 +1617,19 @@ static uint64_t pmccfiltr_read_a32(CPUARMState *env, const ARMCPRegInfo *ri) static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + pmu_op_start(env); value &= pmu_counter_mask(env); env->cp15.c9_pmcnten |= value; + pmu_op_finish(env); } static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { + pmu_op_start(env); value &= pmu_counter_mask(env); env->cp15.c9_pmcnten &= ~value; + pmu_op_finish(env); } static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -1796,7 +1677,8 @@ static void pmevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri, env->cp15.c14_pmevtyper[counter] = value & PMXEVTYPER_MASK; pmevcntr_op_finish(env, counter); } - /* Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when + /* + * Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when * PMSELR value is equal to or greater than the number of implemented * counters, but not equal to 0x1f. We opt to behave as a RAZ/WI. */ @@ -1835,7 +1717,7 @@ static void pmevtyper_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri, * pmevtyper_rawwrite is called between a pair of pmu_op_start and * pmu_op_finish calls when loading saved state for a migration. Because * we're potentially updating the type of event here, the value written to - * c14_pmevcntr_delta by the preceeding pmu_op_start call may be for a + * c14_pmevcntr_delta by the preceding pmu_op_start call may be for a * different counter type. Therefore, we need to set this value to the * current count for the counter type we're writing so that pmu_op_finish * has the correct count for its calculation. @@ -1868,6 +1750,10 @@ static uint64_t pmxevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri) static void pmevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value, uint8_t counter) { + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ + value &= MAKE_64BIT_MASK(0, 32); + } if (counter < pmu_num_counters(env)) { pmevcntr_op_start(env, counter); env->cp15.c14_pmevcntr[counter] = value; @@ -1887,10 +1773,16 @@ static uint64_t pmevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri, pmevcntr_op_start(env, counter); ret = env->cp15.c14_pmevcntr[counter]; pmevcntr_op_finish(env, counter); + if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) { + /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */ + ret &= MAKE_64BIT_MASK(0, 32); + } return ret; } else { - /* We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR - * are CONSTRAINED UNPREDICTABLE. */ + /* + * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR + * are CONSTRAINED UNPREDICTABLE. + */ return 0; } } @@ -1965,7 +1857,8 @@ static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri, static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* Note that even though the AArch64 view of this register has bits + /* + * Note that even though the AArch64 view of this register has bits * [10:0] all RES0 we can only mask the bottom 5, to comply with the * architectural requirements for bits which are RES0 only in some * contexts. (ARMv8 would permit us to do no masking at all, but ARMv7 @@ -1977,16 +1870,26 @@ static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri, static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { /* Begin with base v8.0 state. */ - uint32_t valid_mask = 0x3fff; + uint64_t valid_mask = 0x3fff; ARMCPU *cpu = env_archcpu(env); + uint64_t changed; - if (ri->state == ARM_CP_STATE_AA64) { - if (arm_feature(env, ARM_FEATURE_AARCH64) && - !cpu_isar_feature(aa64_aa32_el1, cpu)) { - value |= SCR_FW | SCR_AW; /* these two bits are RES1. */ - } - valid_mask &= ~SCR_NET; + /* + * Because SCR_EL3 is the "real" cpreg and SCR is the alias, reset always + * passes the reginfo for SCR_EL3, which has type ARM_CP_STATE_AA64. + * Instead, choose the format based on the mode of EL3. + */ + if (arm_el_is_aa64(env, 3)) { + value |= SCR_FW | SCR_AW; /* RES1 */ + valid_mask &= ~SCR_NET; /* RES0 */ + if (!cpu_isar_feature(aa64_aa32_el1, cpu) && + !cpu_isar_feature(aa64_aa32_el2, cpu)) { + value |= SCR_RW; /* RAO/WI */ + } + if (cpu_isar_feature(aa64_ras, cpu)) { + valid_mask |= SCR_TERR; + } if (cpu_isar_feature(aa64_lor, cpu)) { valid_mask |= SCR_TLOR; } @@ -1995,18 +1898,46 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) } if (cpu_isar_feature(aa64_sel2, cpu)) { valid_mask |= SCR_EEL2; + } else if (cpu_isar_feature(aa64_rme, cpu)) { + /* With RME and without SEL2, NS is RES1 (R_GSWWH, I_DJJQJ). */ + value |= SCR_NS; } if (cpu_isar_feature(aa64_mte, cpu)) { valid_mask |= SCR_ATA; } + if (cpu_isar_feature(aa64_scxtnum, cpu)) { + valid_mask |= SCR_ENSCXT; + } + if (cpu_isar_feature(aa64_doublefault, cpu)) { + valid_mask |= SCR_EASE | SCR_NMEA; + } + if (cpu_isar_feature(aa64_sme, cpu)) { + valid_mask |= SCR_ENTP2; + } + if (cpu_isar_feature(aa64_hcx, cpu)) { + valid_mask |= SCR_HXEN; + } + if (cpu_isar_feature(aa64_fgt, cpu)) { + valid_mask |= SCR_FGTEN; + } + if (cpu_isar_feature(aa64_rme, cpu)) { + valid_mask |= SCR_NSE | SCR_GPF; + } + if (cpu_isar_feature(aa64_ecv, cpu)) { + valid_mask |= SCR_ECVEN; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); + if (cpu_isar_feature(aa32_ras, cpu)) { + valid_mask |= SCR_TERR; + } } if (!arm_feature(env, ARM_FEATURE_EL2)) { valid_mask &= ~SCR_HCE; - /* On ARMv7, SMD (or SCD as it is called in v7) is only + /* + * On ARMv7, SMD (or SCD as it is called in v7) is only * supported if EL2 exists. The bit is UNK/SBZP when * EL2 is unavailable. In QEMU ARMv7, we force it to always zero * when EL2 is unavailable. @@ -2020,7 +1951,22 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* Clear all-context RES0 bits. */ value &= valid_mask; - raw_write(env, ri, value); + changed = env->cp15.scr_el3 ^ value; + env->cp15.scr_el3 = value; + + /* + * If SCR_EL3.{NS,NSE} changes, i.e. change of security state, + * we must invalidate all TLBs below EL3. + */ + if (changed & (SCR_NS | SCR_NSE)) { + tlb_flush_by_mmuidx(env_cpu(env), (ARMMMUIdxBit_E10_0 | + ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E10_1 | + ARMMMUIdxBit_E20_2 | + ARMMMUIdxBit_E10_1_PAN | + ARMMMUIdxBit_E20_2_PAN | + ARMMMUIdxBit_E2)); + } } static void scr_reset(CPUARMState *env, const ARMCPRegInfo *ri) @@ -2032,11 +1978,12 @@ static void scr_reset(CPUARMState *env, const ARMCPRegInfo *ri) scr_write(env, ri, 0); } -static CPAccessResult access_aa64_tid2(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) +static CPAccessResult access_tid4(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) { - if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TID2)) { + if (arm_current_el(env) == 1 && + (arm_hcr_el2_eff(env) & (HCR_TID2 | HCR_TID4))) { return CP_ACCESS_TRAP_EL2; } @@ -2047,7 +1994,8 @@ static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - /* Acquire the CSSELR index from the bank corresponding to the CCSIDR + /* + * Acquire the CSSELR index from the bank corresponding to the CCSIDR * bank */ uint32_t index = A32_BANKED_REG_GET(env, csselr, @@ -2089,7 +2037,12 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri) } } - /* External aborts are not possible in QEMU so A bit is always clear */ + if (hcr_el2 & HCR_AMO) { + if (cs->interrupt_request & CPU_INTERRUPT_VSERR) { + ret |= CPSR_A; + } + } + return ret; } @@ -2117,7 +2070,8 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { /* the old v6 WFI, UNPREDICTABLE in v7 but we choose to NOP */ { .name = "NOP", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4, .access = PL1_W, .type = ARM_CP_NOP }, - /* Performance monitors are implementation defined in v7, + /* + * Performance monitors are implementation defined in v7, * but with an ARM recommended set of registers, which we * follow. * @@ -2129,67 +2083,79 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { * or PL0_RO as appropriate and then check PMUSERENR in the helper fn. */ { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1, - .access = PL0_RW, .type = ARM_CP_ALIAS, + .access = PL0_RW, .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), .writefn = pmcntenset_write, .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, .raw_writefn = raw_write }, - { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64, + { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64, .type = ARM_CP_IO, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0, .writefn = pmcntenset_write, .raw_writefn = raw_write }, { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2, .access = PL0_RW, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), .accessfn = pmreg_access, + .fgt = FGT_PMCNTEN, .writefn = pmcntenclr_write, - .type = ARM_CP_ALIAS }, + .type = ARM_CP_ALIAS | ARM_CP_IO }, { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, .access = PL0_RW, .accessfn = pmreg_access, - .type = ARM_CP_ALIAS, + .fgt = FGT_PMCNTEN, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .writefn = pmcntenclr_write }, { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, .access = PL0_RW, .type = ARM_CP_IO, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), .accessfn = pmreg_access, + .fgt = FGT_PMOVS, .writefn = pmovsr_write, .raw_writefn = raw_write }, { .name = "PMOVSCLR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 3, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), .writefn = pmovsr_write, .raw_writefn = raw_write }, { .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4, .access = PL0_W, .accessfn = pmreg_access_swinc, + .fgt = FGT_PMSWINC_EL0, .type = ARM_CP_NO_RAW | ARM_CP_IO, .writefn = pmswinc_write }, { .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 4, .access = PL0_W, .accessfn = pmreg_access_swinc, + .fgt = FGT_PMSWINC_EL0, .type = ARM_CP_NO_RAW | ARM_CP_IO, .writefn = pmswinc_write }, { .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5, .access = PL0_RW, .type = ARM_CP_ALIAS, + .fgt = FGT_PMSELR_EL0, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr), .accessfn = pmreg_access_selr, .writefn = pmselr_write, .raw_writefn = raw_write}, { .name = "PMSELR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 5, .access = PL0_RW, .accessfn = pmreg_access_selr, + .fgt = FGT_PMSELR_EL0, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmselr), .writefn = pmselr_write, .raw_writefn = raw_write, }, { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0, .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_ALIAS | ARM_CP_IO, + .fgt = FGT_PMCCNTR_EL0, .readfn = pmccntr_read, .writefn = pmccntr_write32, .accessfn = pmreg_access_ccntr }, { .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0, .access = PL0_RW, .accessfn = pmreg_access_ccntr, + .fgt = FGT_PMCCNTR_EL0, .type = ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt), .readfn = pmccntr_read, .writefn = pmccntr_write, @@ -2197,32 +2163,38 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { { .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 7, .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCCFILTR_EL0, .type = ARM_CP_ALIAS | ARM_CP_IO, .resetvalue = 0, }, { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7, .writefn = pmccfiltr_write, .raw_writefn = raw_write, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCCFILTR_EL0, .type = ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0), .resetvalue = 0, }, { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1, .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, { .name = "PMXEVTYPER_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 1, .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, .writefn = pmxevtyper_write, .readfn = pmxevtyper_read }, { .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2, .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, .accessfn = pmreg_access_xevcntr, + .fgt = FGT_PMEVCNTRN_EL0, .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, { .name = "PMXEVCNTR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 2, .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO, .accessfn = pmreg_access_xevcntr, + .fgt = FGT_PMEVCNTRN_EL0, .writefn = pmxevcntr_write, .readfn = pmxevcntr_read }, { .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0, .access = PL0_R | PL1_RW, .accessfn = access_tpm, @@ -2237,6 +2209,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .writefn = pmuserenr_write, .raw_writefn = raw_write }, { .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pminten), .resetvalue = 0, @@ -2244,68 +2217,85 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { { .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 1, .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, .type = ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenset_write, .raw_writefn = raw_write, .resetvalue = 0x0 }, { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write, }, { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, + .fgt = FGT_PMINTEN, .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), .writefn = pmintenclr_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, .access = PL1_R, - .accessfn = access_aa64_tid2, + .accessfn = access_tid4, + .fgt = FGT_CCSIDR_EL1, .readfn = ccsidr_read, .type = ARM_CP_NO_RAW }, { .name = "CSSELR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0, .access = PL1_RW, - .accessfn = access_aa64_tid2, + .accessfn = access_tid4, + .fgt = FGT_CSSELR_EL1, .writefn = csselr_write, .resetvalue = 0, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.csselr_s), offsetof(CPUARMState, cp15.csselr_ns) } }, - /* Auxiliary ID register: this actually has an IMPDEF value but for now + /* + * Auxiliary ID register: this actually has an IMPDEF value but for now * just RAZ for all cores: */ { .name = "AIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid1, + .fgt = FGT_AIDR_EL1, .resetvalue = 0 }, - /* Auxiliary fault status registers: these also are IMPDEF, and we + /* + * Auxiliary fault status registers: these also are IMPDEF, and we * choose to RAZ/WI for all cores. */ { .name = "AFSR0_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_AFSR0_EL1, + .nv2_redirect_offset = 0x128 | NV2_REDIR_NV1, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_AFSR1_EL1, + .nv2_redirect_offset = 0x130 | NV2_REDIR_NV1, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* MAIR can just read-as-written because we don't implement caches + /* + * MAIR can just read-as-written because we don't implement caches * and so don't need to care about memory attributes. */ { .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_MAIR_EL1, + .nv2_redirect_offset = 0x140 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]), .resetvalue = 0 }, { .name = "MAIR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 10, .crm = 2, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[3]), .resetvalue = 0 }, - /* For non-long-descriptor page tables these are PRRR and NMRR; + /* + * For non-long-descriptor page tables these are PRRR and NMRR; * regardless they still act as reads-as-written for QEMU. */ - /* MAIR0/1 are defined separately from their 64-bit counterpart which + /* + * MAIR0/1 are defined separately from their 64-bit counterpart which * allows them to assign the correct fieldoffset based on the endianness * handled in the field definitions. */ @@ -2323,6 +2313,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .resetfn = arm_cp_reset_ignore }, { .name = "ISR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0, + .fgt = FGT_ISR_EL1, .type = ARM_CP_NO_RAW, .access = PL1_R, .readfn = isr_read }, /* 32 bit ITLB invalidates */ { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0, @@ -2357,30 +2348,29 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, .writefn = tlbimvaa_write }, - REGINFO_SENTINEL }; static const ARMCPRegInfo v7mp_cp_reginfo[] = { /* 32 bit TLB invalidates, Inner Shareable */ { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbiall_is_write }, { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbimva_is_write }, { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbiasid_is_write }, { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbimvaa_is_write }, - REGINFO_SENTINEL }; static const ARMCPRegInfo pmovsset_cp_reginfo[] = { /* PMOVSSET is not implemented in v7 before v7ve */ { .name = "PMOVSSET", .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 3, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), .writefn = pmovsset_write, @@ -2388,11 +2378,11 @@ static const ARMCPRegInfo pmovsset_cp_reginfo[] = { { .name = "PMOVSSET_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 3, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMOVS, .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr), .writefn = pmovsset_write, .raw_writefn = raw_write }, - REGINFO_SENTINEL }; static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -2433,39 +2423,42 @@ static const ARMCPRegInfo t2ee_cp_reginfo[] = { { .name = "TEEHBR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 6, .opc2 = 0, .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, teehbr), .accessfn = teehbr_access, .resetvalue = 0 }, - REGINFO_SENTINEL }; static const ARMCPRegInfo v6k_cp_reginfo[] = { { .name = "TPIDR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 2, .crn = 13, .crm = 0, .access = PL0_RW, + .fgt = FGT_TPIDR_EL0, .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[0]), .resetvalue = 0 }, { .name = "TPIDRURW", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 2, .access = PL0_RW, + .fgt = FGT_TPIDR_EL0, .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidrurw_s), offsetoflow32(CPUARMState, cp15.tpidrurw_ns) }, .resetfn = arm_cp_reset_ignore }, { .name = "TPIDRRO_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 3, .crn = 13, .crm = 0, - .access = PL0_R|PL1_W, + .access = PL0_R | PL1_W, + .fgt = FGT_TPIDRRO_EL0, .fieldoffset = offsetof(CPUARMState, cp15.tpidrro_el[0]), .resetvalue = 0}, { .name = "TPIDRURO", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 3, - .access = PL0_R|PL1_W, + .access = PL0_R | PL1_W, + .fgt = FGT_TPIDRRO_EL0, .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidruro_s), offsetoflow32(CPUARMState, cp15.tpidruro_ns) }, .resetfn = arm_cp_reset_ignore }, { .name = "TPIDR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .opc2 = 4, .crn = 13, .crm = 0, .access = PL1_RW, + .fgt = FGT_TPIDR_EL1, .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[1]), .resetvalue = 0 }, { .name = "TPIDRPRW", .opc1 = 0, .cp = 15, .crn = 13, .crm = 0, .opc2 = 4, .access = PL1_RW, .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidrprw_s), offsetoflow32(CPUARMState, cp15.tpidrprw_ns) }, .resetvalue = 0 }, - REGINFO_SENTINEL }; #ifndef CONFIG_USER_ONLY @@ -2473,7 +2466,8 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = { static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - /* CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero. + /* + * CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero. * Writable only at the highest implemented exception level. */ int el = arm_current_el(env); @@ -2530,22 +2524,7 @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx, if (!extract32(env->cp15.c14_cntkctl, timeridx, 1)) { return CP_ACCESS_TRAP; } - - /* If HCR_EL2.<E2H,TGE> == '10': check CNTHCTL_EL2.EL1PCTEN. */ - if (hcr & HCR_E2H) { - if (timeridx == GTIMER_PHYS && - !extract32(env->cp15.cnthctl_el2, 10, 1)) { - return CP_ACCESS_TRAP_EL2; - } - } else { - /* If HCR_EL2.<E2H> == 0: check CNTHCTL_EL2.EL1PCEN. */ - if (has_el2 && timeridx == GTIMER_PHYS && - !extract32(env->cp15.cnthctl_el2, 1, 1)) { - return CP_ACCESS_TRAP_EL2; - } - } - break; - + /* fall through */ case 1: /* Check CNTHCTL_EL2.EL1PCTEN, which changes location based on E2H. */ if (has_el2 && timeridx == GTIMER_PHYS && @@ -2554,6 +2533,11 @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx, : !extract32(env->cp15.cnthctl_el2, 0, 1))) { return CP_ACCESS_TRAP_EL2; } + if (has_el2 && timeridx == GTIMER_VIRT) { + if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1TVCT)) { + return CP_ACCESS_TRAP_EL2; + } + } break; } return CP_ACCESS_OK; @@ -2597,6 +2581,11 @@ static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx, } } } + if (has_el2 && timeridx == GTIMER_VIRT) { + if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1TVT)) { + return CP_ACCESS_TRAP_EL2; + } + } break; } return CP_ACCESS_OK; @@ -2632,7 +2621,8 @@ static CPAccessResult gt_stimer_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - /* The AArch64 register view of the secure physical timer is + /* + * The AArch64 register view of the secure physical timer is * always accessible from EL3, and configurably accessible from * Secure EL1. */ @@ -2662,35 +2652,102 @@ static uint64_t gt_get_countervalue(CPUARMState *env) return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / gt_cntfrq_period_ns(cpu); } +static void gt_update_irq(ARMCPU *cpu, int timeridx) +{ + CPUARMState *env = &cpu->env; + uint64_t cnthctl = env->cp15.cnthctl_el2; + ARMSecuritySpace ss = arm_security_space(env); + /* ISTATUS && !IMASK */ + int irqstate = (env->cp15.c14_timer[timeridx].ctl & 6) == 4; + + /* + * If bit CNTHCTL_EL2.CNT[VP]MASK is set, it overrides IMASK. + * It is RES0 in Secure and NonSecure state. + */ + if ((ss == ARMSS_Root || ss == ARMSS_Realm) && + ((timeridx == GTIMER_VIRT && (cnthctl & R_CNTHCTL_CNTVMASK_MASK)) || + (timeridx == GTIMER_PHYS && (cnthctl & R_CNTHCTL_CNTPMASK_MASK)))) { + irqstate = 0; + } + + qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate); + trace_arm_gt_update_irq(timeridx, irqstate); +} + +void gt_rme_post_el_change(ARMCPU *cpu, void *ignored) +{ + /* + * Changing security state between Root and Secure/NonSecure, which may + * happen when switching EL, can change the effective value of CNTHCTL_EL2 + * mask bits. Update the IRQ state accordingly. + */ + gt_update_irq(cpu, GTIMER_VIRT); + gt_update_irq(cpu, GTIMER_PHYS); +} + +static uint64_t gt_phys_raw_cnt_offset(CPUARMState *env) +{ + if ((env->cp15.scr_el3 & SCR_ECVEN) && + FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, ECV) && + arm_is_el2_enabled(env) && + (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { + return env->cp15.cntpoff_el2; + } + return 0; +} + +static uint64_t gt_phys_cnt_offset(CPUARMState *env) +{ + if (arm_current_el(env) >= 2) { + return 0; + } + return gt_phys_raw_cnt_offset(env); +} + static void gt_recalc_timer(ARMCPU *cpu, int timeridx) { ARMGenericTimer *gt = &cpu->env.cp15.c14_timer[timeridx]; if (gt->ctl & 1) { - /* Timer enabled: calculate and set current ISTATUS, irq, and + /* + * Timer enabled: calculate and set current ISTATUS, irq, and * reset timer to when ISTATUS next has to change */ uint64_t offset = timeridx == GTIMER_VIRT ? - cpu->env.cp15.cntvoff_el2 : 0; + cpu->env.cp15.cntvoff_el2 : gt_phys_raw_cnt_offset(&cpu->env); uint64_t count = gt_get_countervalue(&cpu->env); /* Note that this must be unsigned 64 bit arithmetic: */ int istatus = count - offset >= gt->cval; uint64_t nexttick; - int irqstate; gt->ctl = deposit32(gt->ctl, 2, 1, istatus); - irqstate = (istatus && !(gt->ctl & 2)); - qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate); - if (istatus) { - /* Next transition is when count rolls back over to zero */ - nexttick = UINT64_MAX; + /* + * Next transition is when (count - offset) rolls back over to 0. + * If offset > count then this is when count == offset; + * if offset <= count then this is when count == offset + 2^64 + * For the latter case we set nexttick to an "as far in future + * as possible" value and let the code below handle it. + */ + if (offset > count) { + nexttick = offset; + } else { + nexttick = UINT64_MAX; + } } else { - /* Next transition is when we hit cval */ - nexttick = gt->cval + offset; + /* + * Next transition is when (count - offset) == cval, i.e. + * when count == (cval + offset). + * If that would overflow, then again we set up the next interrupt + * for "as far in the future as possible" for the code below. + */ + if (uadd64_overflow(gt->cval, offset, &nexttick)) { + nexttick = UINT64_MAX; + } } - /* Note that the desired next expiry time might be beyond the + /* + * Note that the desired next expiry time might be beyond the * signed-64-bit range of a QEMUTimer -- in this case we just * set the timer for as far in the future as possible. When the * timer expires we will reset the timer for any remaining period. @@ -2700,14 +2757,14 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx) } else { timer_mod(cpu->gt_timer[timeridx], nexttick); } - trace_arm_gt_recalc(timeridx, irqstate, nexttick); + trace_arm_gt_recalc(timeridx, nexttick); } else { /* Timer disabled: ISTATUS and timer output always clear */ gt->ctl &= ~4; - qemu_set_irq(cpu->gt_timer_outputs[timeridx], 0); timer_del(cpu->gt_timer[timeridx]); trace_arm_gt_recalc_disabled(timeridx); } + gt_update_irq(cpu, timeridx); } static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri, @@ -2720,7 +2777,7 @@ static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri, static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) { - return gt_get_countervalue(env); + return gt_get_countervalue(env) - gt_phys_cnt_offset(env); } static uint64_t gt_virt_cnt_offset(CPUARMState *env) @@ -2769,6 +2826,9 @@ static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri, case GTIMER_HYPVIRT: offset = gt_virt_cnt_offset(env); break; + case GTIMER_PHYS: + offset = gt_phys_cnt_offset(env); + break; } return (uint32_t)(env->cp15.c14_timer[timeridx].cval - @@ -2786,6 +2846,9 @@ static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri, case GTIMER_HYPVIRT: offset = gt_virt_cnt_offset(env); break; + case GTIMER_PHYS: + offset = gt_phys_cnt_offset(env); + break; } trace_arm_gt_tval_write(timeridx, value); @@ -2807,13 +2870,12 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, /* Enable toggled */ gt_recalc_timer(cpu, timeridx); } else if ((oldval ^ value) & 2) { - /* IMASK toggled: don't need to recalculate, + /* + * IMASK toggled: don't need to recalculate, * just set the interrupt line based on ISTATUS */ - int irqstate = (oldval & 4) && !(value & 2); - - trace_arm_gt_imask_toggle(timeridx, irqstate); - qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate); + trace_arm_gt_imask_toggle(timeridx); + gt_update_irq(cpu, timeridx); } } @@ -2851,9 +2913,6 @@ static int gt_phys_redir_timeridx(CPUARMState *env) case ARMMMUIdx_E20_0: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: return GTIMER_HYP; default: return GTIMER_PHYS; @@ -2866,9 +2925,6 @@ static int gt_virt_redir_timeridx(CPUARMState *env) case ARMMMUIdx_E20_0: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: return GTIMER_HYPVIRT; default: return GTIMER_VIRT; @@ -2945,6 +3001,49 @@ static void gt_virt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri, gt_ctl_write(env, ri, GTIMER_VIRT, value); } +static void gt_cnthctl_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + uint32_t oldval = env->cp15.cnthctl_el2; + uint32_t valid_mask = + R_CNTHCTL_EL0PCTEN_E2H1_MASK | + R_CNTHCTL_EL0VCTEN_E2H1_MASK | + R_CNTHCTL_EVNTEN_MASK | + R_CNTHCTL_EVNTDIR_MASK | + R_CNTHCTL_EVNTI_MASK | + R_CNTHCTL_EL0VTEN_MASK | + R_CNTHCTL_EL0PTEN_MASK | + R_CNTHCTL_EL1PCTEN_E2H1_MASK | + R_CNTHCTL_EL1PTEN_MASK; + + if (cpu_isar_feature(aa64_rme, cpu)) { + valid_mask |= R_CNTHCTL_CNTVMASK_MASK | R_CNTHCTL_CNTPMASK_MASK; + } + if (cpu_isar_feature(aa64_ecv_traps, cpu)) { + valid_mask |= + R_CNTHCTL_EL1TVT_MASK | + R_CNTHCTL_EL1TVCT_MASK | + R_CNTHCTL_EL1NVPCT_MASK | + R_CNTHCTL_EL1NVVCT_MASK | + R_CNTHCTL_EVNTIS_MASK; + } + if (cpu_isar_feature(aa64_ecv, cpu)) { + valid_mask |= R_CNTHCTL_ECV_MASK; + } + + /* Clear RES0 bits */ + value &= valid_mask; + + raw_write(env, ri, value); + + if ((oldval ^ value) & R_CNTHCTL_CNTVMASK_MASK) { + gt_update_irq(cpu, GTIMER_VIRT); + } else if ((oldval ^ value) & R_CNTHCTL_CNTPMASK_MASK) { + gt_update_irq(cpu, GTIMER_PHYS); + } +} + static void gt_cntvoff_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -3124,7 +3223,8 @@ static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque) } static const ARMCPRegInfo generic_timer_cp_reginfo[] = { - /* Note that CNTFRQ is purely reads-as-written for the benefit + /* + * Note that CNTFRQ is purely reads-as-written for the benefit * of software; writing it doesn't actually change the timer frequency. * Our reset value matches the fixed frequency we implement the timer at. */ @@ -3169,6 +3269,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1, .type = ARM_CP_IO, .access = PL0_RW, .accessfn = gt_ptimer_access, + .nv2_redirect_offset = 0x180 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl), .resetvalue = 0, .readfn = gt_phys_redir_ctl_read, .raw_readfn = raw_read, @@ -3186,6 +3287,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1, .type = ARM_CP_IO, .access = PL0_RW, .accessfn = gt_vtimer_access, + .nv2_redirect_offset = 0x170 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl), .resetvalue = 0, .readfn = gt_virt_redir_ctl_read, .raw_readfn = raw_read, @@ -3265,6 +3367,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2, .access = PL0_RW, .type = ARM_CP_IO, + .nv2_redirect_offset = 0x178 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval), .resetvalue = 0, .accessfn = gt_ptimer_access, .readfn = gt_phys_redir_cval_read, .raw_readfn = raw_read, @@ -3282,12 +3385,14 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2, .access = PL0_RW, .type = ARM_CP_IO, + .nv2_redirect_offset = 0x168 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval), .resetvalue = 0, .accessfn = gt_vtimer_access, .readfn = gt_virt_redir_cval_read, .raw_readfn = raw_read, .writefn = gt_virt_redir_cval_write, .raw_writefn = raw_write, }, - /* Secure timer -- this is actually restricted to only EL3 + /* + * Secure timer -- this is actually restricted to only EL3 * and configurably Secure-EL1 via the accessfn. */ { .name = "CNTPS_TVAL_EL1", .state = ARM_CP_STATE_AA64, @@ -3313,21 +3418,69 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_SEC].cval), .writefn = gt_sec_cval_write, .raw_writefn = raw_write, }, - REGINFO_SENTINEL }; -static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) +/* + * FEAT_ECV adds extra views of CNTVCT_EL0 and CNTPCT_EL0 which + * are "self-synchronizing". For QEMU all sysregs are self-synchronizing, + * so our implementations here are identical to the normal registers. + */ +static const ARMCPRegInfo gen_timer_ecv_cp_reginfo[] = { + { .name = "CNTVCTSS", .cp = 15, .crm = 14, .opc1 = 9, + .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = gt_vct_access, + .readfn = gt_virt_cnt_read, .resetfn = arm_cp_reset_ignore, + }, + { .name = "CNTVCTSS_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 6, + .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = gt_vct_access, .readfn = gt_virt_cnt_read, + }, + { .name = "CNTPCTSS", .cp = 15, .crm = 14, .opc1 = 8, + .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = gt_pct_access, + .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore, + }, + { .name = "CNTPCTSS_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 5, + .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .accessfn = gt_pct_access, .readfn = gt_cnt_read, + }, +}; + +static CPAccessResult gt_cntpoff_access(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) { - if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { - return CP_ACCESS_TRAP; + if (arm_current_el(env) == 2 && arm_feature(env, ARM_FEATURE_EL3) && + !(env->cp15.scr_el3 & SCR_ECVEN)) { + return CP_ACCESS_TRAP_EL3; } return CP_ACCESS_OK; } +static void gt_cntpoff_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + trace_arm_gt_cntpoff_write(value); + raw_write(env, ri, value); + gt_recalc_timer(cpu, GTIMER_PHYS); +} + +static const ARMCPRegInfo gen_timer_cntpoff_reginfo = { + .name = "CNTPOFF_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 6, + .access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 0, + .accessfn = gt_cntpoff_access, .writefn = gt_cntpoff_write, + .nv2_redirect_offset = 0x1a8, + .fieldoffset = offsetof(CPUARMState, cp15.cntpoff_el2), +}; #else -/* In user-mode most of the generic timer registers are inaccessible +/* + * In user-mode most of the generic timer registers are inaccessible * however modern kernels (4.12+) allow access to cntvct_el0 */ @@ -3335,7 +3488,8 @@ static uint64_t gt_virt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); - /* Currently we have no support for QEMUTimer in linux-user so we + /* + * Currently we have no support for QEMUTimer in linux-user so we * can't call gt_get_countervalue(env), instead we directly * call the lower level functions. */ @@ -3354,7 +3508,18 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO, .readfn = gt_virt_cnt_read, }, - REGINFO_SENTINEL +}; + +/* + * CNTVCTSS_EL0 has the same trap conditions as CNTVCT_EL0, so it also + * is exposed to userspace by Linux. + */ +static const ARMCPRegInfo gen_timer_ecv_cp_reginfo[] = { + { .name = "CNTVCTSS_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 6, + .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO, + .readfn = gt_virt_cnt_read, + }, }; #endif @@ -3377,7 +3542,8 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (ri->opc2 & 4) { - /* The ATS12NSO* operations must trap to EL3 or EL2 if executed in + /* + * The ATS12NSO* operations must trap to EL3 or EL2 if executed in * Secure EL1 (which can only happen if EL3 is AArch64). * They are simply UNDEF if executed from NS EL1. * They function normally from EL2 or EL3. @@ -3385,9 +3551,9 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, if (arm_current_el(env) == 1) { if (arm_is_secure_below_el3(env)) { if (env->cp15.scr_el3 & SCR_EEL2) { - return CP_ACCESS_TRAP_UNCATEGORIZED_EL2; + return CP_ACCESS_TRAP_EL2; } - return CP_ACCESS_TRAP_UNCATEGORIZED_EL3; + return CP_ACCESS_TRAP_EL3; } return CP_ACCESS_TRAP_UNCATEGORIZED; } @@ -3396,21 +3562,41 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri, } #ifdef CONFIG_TCG +static int par_el1_shareability(GetPhysAddrResult *res) +{ + /* + * The PAR_EL1.SH field must be 0b10 for Device or Normal-NC + * memory -- see pseudocode PAREncodeShareability(). + */ + if (((res->cacheattrs.attrs & 0xf0) == 0) || + res->cacheattrs.attrs == 0x44 || res->cacheattrs.attrs == 0x40) { + return 2; + } + return res->cacheattrs.shareability; +} + static uint64_t do_ats_write(CPUARMState *env, uint64_t value, - MMUAccessType access_type, ARMMMUIdx mmu_idx) + MMUAccessType access_type, ARMMMUIdx mmu_idx, + ARMSecuritySpace ss) { - hwaddr phys_addr; - target_ulong page_size; - int prot; bool ret; uint64_t par64; bool format64 = false; - MemTxAttrs attrs = {}; ARMMMUFaultInfo fi = {}; - ARMCacheAttrs cacheattrs = {}; + GetPhysAddrResult res = {}; - ret = get_phys_addr(env, value, access_type, mmu_idx, &phys_addr, &attrs, - &prot, &page_size, &fi, &cacheattrs); + /* + * I_MXTJT: Granule protection checks are not performed on the final address + * of a successful translation. + */ + ret = get_phys_addr_with_space_nogpc(env, value, access_type, mmu_idx, ss, + &res, &fi); + + /* + * ATS operations only do S1 or S1+S2 translations, so we never + * have to deal with the ARMCacheAttrs format for S2 only. + */ + assert(!res.cacheattrs.is_s2_format); if (ret) { /* @@ -3516,12 +3702,12 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, /* Create a 64-bit PAR */ par64 = (1 << 11); /* LPAE bit always set */ if (!ret) { - par64 |= phys_addr & ~0xfffULL; - if (!attrs.secure) { + par64 |= res.f.phys_addr & ~0xfffULL; + if (!res.f.attrs.secure) { par64 |= (1 << 9); /* NS */ } - par64 |= (uint64_t)cacheattrs.attrs << 56; /* ATTR */ - par64 |= cacheattrs.shareability << 7; /* SH */ + par64 |= (uint64_t)res.cacheattrs.attrs << 56; /* ATTR */ + par64 |= par_el1_shareability(&res) << 7; /* SH */ } else { uint32_t fsr = arm_fi_to_lfsc(&fi); @@ -3535,19 +3721,20 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, } } } else { - /* fsr is a DFSR/IFSR value for the short descriptor + /* + * fsr is a DFSR/IFSR value for the short descriptor * translation table format (with WnR always clear). * Convert it to a 32-bit PAR. */ if (!ret) { /* We do not set any attribute bits in the PAR */ - if (page_size == (1 << 24) + if (res.f.lg_page_size == 24 && arm_feature(env, ARM_FEATURE_V7)) { - par64 = (phys_addr & 0xff000000) | (1 << 1); + par64 = (res.f.phys_addr & 0xff000000) | (1 << 1); } else { - par64 = phys_addr & 0xfffff000; + par64 = res.f.phys_addr & 0xfffff000; } - if (!attrs.secure) { + if (!res.f.attrs.secure) { par64 |= (1 << 9); /* NS */ } } else { @@ -3568,24 +3755,23 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) uint64_t par64; ARMMMUIdx mmu_idx; int el = arm_current_el(env); - bool secure = arm_is_secure_below_el3(env); + ARMSecuritySpace ss = arm_security_space(env); switch (ri->opc2 & 6) { case 0: /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ switch (el) { case 3: - mmu_idx = ARMMMUIdx_SE3; + mmu_idx = ARMMMUIdx_E3; break; case 2: - g_assert(!secure); /* ARMv8.4-SecEL2 is 64-bit only */ + g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ /* fall through */ case 1: - if (ri->crm == 9 && (env->uncached_cpsr & CPSR_PAN)) { - mmu_idx = (secure ? ARMMMUIdx_Stage1_SE1_PAN - : ARMMMUIdx_Stage1_E1_PAN); + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = ARMMMUIdx_Stage1_E1_PAN; } else { - mmu_idx = secure ? ARMMMUIdx_Stage1_SE1 : ARMMMUIdx_Stage1_E1; + mmu_idx = ARMMMUIdx_Stage1_E1; } break; default: @@ -3596,14 +3782,14 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ switch (el) { case 3: - mmu_idx = ARMMMUIdx_SE10_0; + mmu_idx = ARMMMUIdx_E10_0; break; case 2: - g_assert(!secure); /* ARMv8.4-SecEL2 is 64-bit only */ + g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ mmu_idx = ARMMMUIdx_Stage1_E0; break; case 1: - mmu_idx = secure ? ARMMMUIdx_Stage1_SE0 : ARMMMUIdx_Stage1_E0; + mmu_idx = ARMMMUIdx_Stage1_E0; break; default: g_assert_not_reached(); @@ -3612,16 +3798,18 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) case 4: /* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */ mmu_idx = ARMMMUIdx_E10_1; + ss = ARMSS_NonSecure; break; case 6: /* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */ mmu_idx = ARMMMUIdx_E10_0; + ss = ARMSS_NonSecure; break; default: g_assert_not_reached(); } - par64 = do_ats_write(env, value, access_type, mmu_idx); + par64 = do_ats_write(env, value, access_type, mmu_idx, ss); A32_BANKED_CURRENT_REG_SET(env, par, par64); #else @@ -3637,7 +3825,9 @@ static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; uint64_t par64; - par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2); + /* There is no SecureEL2 for AArch32. */ + par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2, + ARMSS_NonSecure); A32_BANKED_CURRENT_REG_SET(env, par, par64); #else @@ -3646,6 +3836,22 @@ static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri, #endif /* CONFIG_TCG */ } +static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* + * R_NYXTL: instruction is UNDEFINED if it applies to an Exception level + * lower than EL3 and the combination SCR_EL3.{NSE,NS} is reserved. This can + * only happen when executing at EL3 because that combination also causes an + * illegal exception return. We don't need to check FEAT_RME either, because + * scr_write() ensures that the NSE bit is not set otherwise. + */ + if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) { + return CP_ACCESS_TRAP; + } + return CP_ACCESS_OK; +} + static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { @@ -3653,7 +3859,16 @@ static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri, !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) { return CP_ACCESS_TRAP; } - return CP_ACCESS_OK; + return at_e012_access(env, ri, isread); +} + +static CPAccessResult at_s1e01_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_AT)) { + return CP_ACCESS_TRAP_EL2; + } + return at_e012_access(env, ri, isread); } static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, @@ -3662,43 +3877,48 @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, #ifdef CONFIG_TCG MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD; ARMMMUIdx mmu_idx; - int secure = arm_is_secure_below_el3(env); + uint64_t hcr_el2 = arm_hcr_el2_eff(env); + bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE); + bool for_el3 = false; + ARMSecuritySpace ss; switch (ri->opc2 & 6) { case 0: switch (ri->opc1) { case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */ - if (ri->crm == 9 && (env->pstate & PSTATE_PAN)) { - mmu_idx = (secure ? ARMMMUIdx_Stage1_SE1_PAN - : ARMMMUIdx_Stage1_E1_PAN); + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = regime_e20 ? + ARMMMUIdx_E20_2_PAN : ARMMMUIdx_Stage1_E1_PAN; } else { - mmu_idx = secure ? ARMMMUIdx_Stage1_SE1 : ARMMMUIdx_Stage1_E1; + mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1; } break; case 4: /* AT S1E2R, AT S1E2W */ - mmu_idx = secure ? ARMMMUIdx_SE2 : ARMMMUIdx_E2; + mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; break; case 6: /* AT S1E3R, AT S1E3W */ - mmu_idx = ARMMMUIdx_SE3; + mmu_idx = ARMMMUIdx_E3; + for_el3 = true; break; default: g_assert_not_reached(); } break; case 2: /* AT S1E0R, AT S1E0W */ - mmu_idx = secure ? ARMMMUIdx_Stage1_SE0 : ARMMMUIdx_Stage1_E0; + mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_Stage1_E0; break; case 4: /* AT S12E1R, AT S12E1W */ - mmu_idx = secure ? ARMMMUIdx_SE10_1 : ARMMMUIdx_E10_1; + mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E10_1; break; case 6: /* AT S12E0R, AT S12E0W */ - mmu_idx = secure ? ARMMMUIdx_SE10_0 : ARMMMUIdx_E10_0; + mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_E10_0; break; default: g_assert_not_reached(); } - env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx); + ss = for_el3 ? arm_security_space(env) : arm_security_space_below_el3(env); + env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx, ss); #else /* Handled by hardware accelerator. */ g_assert_not_reached(); @@ -3706,21 +3926,6 @@ static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri, } #endif -static const ARMCPRegInfo vapa_cp_reginfo[] = { - { .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0, - .access = PL1_RW, .resetvalue = 0, - .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.par_s), - offsetoflow32(CPUARMState, cp15.par_ns) }, - .writefn = par_write }, -#ifndef CONFIG_USER_ONLY - /* This underdecoding is safe because the reginfo is NO_RAW. */ - { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, - .access = PL1_W, .accessfn = ats_access, - .writefn = ats_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, -#endif - REGINFO_SENTINEL -}; - /* Return basic MPU access permission bits. */ static uint32_t simple_mpu_ap_bits(uint32_t val) { @@ -3816,8 +4021,225 @@ static void pmsav7_rgnr_write(CPUARMState *env, const ARMCPRegInfo *ri, raw_write(env, ri, value); } +static void prbar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + env->pmsav8.rbar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]] = value; +} + +static uint64_t prbar_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pmsav8.rbar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]]; +} + +static void prlar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + env->pmsav8.rlar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]] = value; +} + +static uint64_t prlar_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pmsav8.rlar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]]; +} + +static void prselr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + /* + * Ignore writes that would select not implemented region. + * This is architecturally UNPREDICTABLE. + */ + if (value >= cpu->pmsav7_dregion) { + return; + } + + env->pmsav7.rnr[M_REG_NS] = value; +} + +static void hprbar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + env->pmsav8.hprbar[env->pmsav8.hprselr] = value; +} + +static uint64_t hprbar_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pmsav8.hprbar[env->pmsav8.hprselr]; +} + +static void hprlar_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + env->pmsav8.hprlar[env->pmsav8.hprselr] = value; +} + +static uint64_t hprlar_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pmsav8.hprlar[env->pmsav8.hprselr]; +} + +static void hprenr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint32_t n; + uint32_t bit; + ARMCPU *cpu = env_archcpu(env); + + /* Ignore writes to unimplemented regions */ + int rmax = MIN(cpu->pmsav8r_hdregion, 32); + value &= MAKE_64BIT_MASK(0, rmax); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + + /* Register alias is only valid for first 32 indexes */ + for (n = 0; n < rmax; ++n) { + bit = extract32(value, n, 1); + env->pmsav8.hprlar[n] = deposit32( + env->pmsav8.hprlar[n], 0, 1, bit); + } +} + +static uint64_t hprenr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + uint32_t n; + uint32_t result = 0x0; + ARMCPU *cpu = env_archcpu(env); + + /* Register alias is only valid for first 32 indexes */ + for (n = 0; n < MIN(cpu->pmsav8r_hdregion, 32); ++n) { + if (env->pmsav8.hprlar[n] & 0x1) { + result |= (0x1 << n); + } + } + return result; +} + +static void hprselr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + + /* + * Ignore writes that would select not implemented region. + * This is architecturally UNPREDICTABLE. + */ + if (value >= cpu->pmsav8r_hdregion) { + return; + } + + env->pmsav8.hprselr = value; +} + +static void pmsav8r_regn_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + ARMCPU *cpu = env_archcpu(env); + uint8_t index = (extract32(ri->opc0, 0, 1) << 4) | + (extract32(ri->crm, 0, 3) << 1) | extract32(ri->opc2, 2, 1); + + tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */ + + if (ri->opc1 & 4) { + if (index >= cpu->pmsav8r_hdregion) { + return; + } + if (ri->opc2 & 0x1) { + env->pmsav8.hprlar[index] = value; + } else { + env->pmsav8.hprbar[index] = value; + } + } else { + if (index >= cpu->pmsav7_dregion) { + return; + } + if (ri->opc2 & 0x1) { + env->pmsav8.rlar[M_REG_NS][index] = value; + } else { + env->pmsav8.rbar[M_REG_NS][index] = value; + } + } +} + +static uint64_t pmsav8r_regn_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + ARMCPU *cpu = env_archcpu(env); + uint8_t index = (extract32(ri->opc0, 0, 1) << 4) | + (extract32(ri->crm, 0, 3) << 1) | extract32(ri->opc2, 2, 1); + + if (ri->opc1 & 4) { + if (index >= cpu->pmsav8r_hdregion) { + return 0x0; + } + if (ri->opc2 & 0x1) { + return env->pmsav8.hprlar[index]; + } else { + return env->pmsav8.hprbar[index]; + } + } else { + if (index >= cpu->pmsav7_dregion) { + return 0x0; + } + if (ri->opc2 & 0x1) { + return env->pmsav8.rlar[M_REG_NS][index]; + } else { + return env->pmsav8.rbar[M_REG_NS][index]; + } + } +} + +static const ARMCPRegInfo pmsav8r_cp_reginfo[] = { + { .name = "PRBAR", + .cp = 15, .opc1 = 0, .crn = 6, .crm = 3, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_NO_RAW, + .accessfn = access_tvm_trvm, + .readfn = prbar_read, .writefn = prbar_write }, + { .name = "PRLAR", + .cp = 15, .opc1 = 0, .crn = 6, .crm = 3, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_NO_RAW, + .accessfn = access_tvm_trvm, + .readfn = prlar_read, .writefn = prlar_write }, + { .name = "PRSELR", .resetvalue = 0, + .cp = 15, .opc1 = 0, .crn = 6, .crm = 2, .opc2 = 1, + .access = PL1_RW, .accessfn = access_tvm_trvm, + .writefn = prselr_write, + .fieldoffset = offsetof(CPUARMState, pmsav7.rnr[M_REG_NS]) }, + { .name = "HPRBAR", .resetvalue = 0, + .cp = 15, .opc1 = 4, .crn = 6, .crm = 3, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_NO_RAW, + .readfn = hprbar_read, .writefn = hprbar_write }, + { .name = "HPRLAR", + .cp = 15, .opc1 = 4, .crn = 6, .crm = 3, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_NO_RAW, + .readfn = hprlar_read, .writefn = hprlar_write }, + { .name = "HPRSELR", .resetvalue = 0, + .cp = 15, .opc1 = 4, .crn = 6, .crm = 2, .opc2 = 1, + .access = PL2_RW, + .writefn = hprselr_write, + .fieldoffset = offsetof(CPUARMState, pmsav8.hprselr) }, + { .name = "HPRENR", + .cp = 15, .opc1 = 4, .crn = 6, .crm = 1, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_NO_RAW, + .readfn = hprenr_read, .writefn = hprenr_write }, +}; + static const ARMCPRegInfo pmsav7_cp_reginfo[] = { - /* Reset for all these registers is handled in arm_cpu_reset(), + /* + * Reset for all these registers is handled in arm_cpu_reset(), * because the PMSAv7 is also used by M-profile CPUs, which do * not register cpregs but still need the state to be reset. */ @@ -3841,7 +4263,6 @@ static const ARMCPRegInfo pmsav7_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, pmsav7.rnr[M_REG_NS]), .writefn = pmsav7_rgnr_write, .resetfn = arm_cp_reset_ignore }, - REGINFO_SENTINEL }; static const ARMCPRegInfo pmsav5_cp_reginfo[] = { @@ -3892,22 +4313,23 @@ static const ARMCPRegInfo pmsav5_cp_reginfo[] = { { .name = "946_PRBS7", .cp = 15, .crn = 6, .crm = 7, .opc1 = 0, .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, .fieldoffset = offsetof(CPUARMState, cp15.c6_region[7]) }, - REGINFO_SENTINEL }; -static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - TCR *tcr = raw_ptr(env, ri); - int maskshift = extract32(value, 0, 3); + ARMCPU *cpu = env_archcpu(env); if (!arm_feature(env, ARM_FEATURE_V8)) { if (arm_feature(env, ARM_FEATURE_LPAE) && (value & TTBCR_EAE)) { - /* Pre ARMv8 bits [21:19], [15:14] and [6:3] are UNK/SBZP when - * using Long-desciptor translation table format */ + /* + * Pre ARMv8 bits [21:19], [15:14] and [6:3] are UNK/SBZP when + * using Long-descriptor translation table format + */ value &= ~((7 << 19) | (3 << 14) | (0xf << 3)); } else if (arm_feature(env, ARM_FEATURE_EL3)) { - /* In an implementation that includes the Security Extensions + /* + * In an implementation that includes the Security Extensions * TTBCR has additional fields PD0 [4] and PD1 [5] for * Short-descriptor translation table format. */ @@ -3917,55 +4339,24 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri, } } - /* Update the masks corresponding to the TCR bank being written - * Note that we always calculate mask and base_mask, but - * they are only used for short-descriptor tables (ie if EAE is 0); - * for long-descriptor tables the TCR fields are used differently - * and the mask and base_mask values are meaningless. - */ - tcr->raw_tcr = value; - tcr->mask = ~(((uint32_t)0xffffffffu) >> maskshift); - tcr->base_mask = ~((uint32_t)0x3fffu >> maskshift); -} - -static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) -{ - ARMCPU *cpu = env_archcpu(env); - TCR *tcr = raw_ptr(env, ri); - if (arm_feature(env, ARM_FEATURE_LPAE)) { - /* With LPAE the TTBCR could result in a change of ASID + /* + * With LPAE the TTBCR could result in a change of ASID * via the TTBCR.A1 bit, so do a TLB flush. */ tlb_flush(CPU(cpu)); } - /* Preserve the high half of TCR_EL1, set via TTBCR2. */ - value = deposit64(tcr->raw_tcr, 0, 32, value); - vmsa_ttbcr_raw_write(env, ri, value); -} - -static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri) -{ - TCR *tcr = raw_ptr(env, ri); - - /* Reset both the TCR as well as the masks corresponding to the bank of - * the TCR being reset. - */ - tcr->raw_tcr = 0; - tcr->mask = 0; - tcr->base_mask = 0xffffc000u; + raw_write(env, ri, value); } static void vmsa_tcr_el12_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = env_archcpu(env); - TCR *tcr = raw_ptr(env, ri); /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */ tlb_flush(CPU(cpu)); - tcr->raw_tcr = value; + raw_write(env, ri, value); } static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -3994,11 +4385,6 @@ static void vmsa_tcr_ttbr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint16_t mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | ARMMMUIdxBit_E20_0; - - if (arm_is_secure_below_el3(env)) { - mask >>= ARM_MMU_IDX_A_NS; - } - tlb_flush_by_mmuidx(env_cpu(env), mask); } raw_write(env, ri, value); @@ -4012,20 +4398,12 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* * A change in VMID to the stage2 page table (Stage2) invalidates - * the combined stage 1&2 tlbs (EL10_1 and EL10_0). + * the stage2 and combined stage 1&2 tlbs (EL10_1 and EL10_0). */ - if (raw_read(env, ri) != value) { - uint16_t mask = ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0; - - if (arm_is_secure_below_el3(env)) { - mask >>= ARM_MMU_IDX_A_NS; - } - - tlb_flush_by_mmuidx(cs, mask); - raw_write(env, ri, value); + if (extract64(raw_read(env, ri) ^ value, 48, 16) != 0) { + tlb_flush_by_mmuidx(cs, alle1_tlbmask(env)); } + raw_write(env, ri, value); } static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = { @@ -4044,45 +4422,54 @@ static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = { { .name = "FAR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_FAR_EL1, + .nv2_redirect_offset = 0x220 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, cp15.far_el[1]), .resetvalue = 0, }, - REGINFO_SENTINEL }; static const ARMCPRegInfo vmsa_cp_reginfo[] = { { .name = "ESR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .crn = 5, .crm = 2, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_ESR_EL1, + .nv2_redirect_offset = 0x138 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[1]), .resetvalue = 0, }, { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, - .writefn = vmsa_ttbr_write, .resetvalue = 0, + .fgt = FGT_TTBR0_EL1, + .nv2_redirect_offset = 0x200 | NV2_REDIR_NV1, + .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) } }, { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, - .writefn = vmsa_ttbr_write, .resetvalue = 0, + .fgt = FGT_TTBR1_EL1, + .nv2_redirect_offset = 0x210 | NV2_REDIR_NV1, + .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) } }, { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_TCR_EL1, + .nv2_redirect_offset = 0x120 | NV2_REDIR_NV1, .writefn = vmsa_tcr_el12_write, - .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write, + .raw_writefn = raw_write, + .resetvalue = 0, .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[1]) }, { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2, .access = PL1_RW, .accessfn = access_tvm_trvm, .type = ARM_CP_ALIAS, .writefn = vmsa_ttbcr_write, - .raw_writefn = vmsa_ttbcr_raw_write, - /* No offsetoflow32 -- pass the entire TCR to writefn/raw_writefn. */ - .bank_fieldoffsets = { offsetof(CPUARMState, cp15.tcr_el[3]), - offsetof(CPUARMState, cp15.tcr_el[1])} }, - REGINFO_SENTINEL + .raw_writefn = raw_write, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tcr_el[3]), + offsetoflow32(CPUARMState, cp15.tcr_el[1])} }, }; -/* Note that unlike TTBCR, writing to TTBCR2 does not require flushing +/* + * Note that unlike TTBCR, writing to TTBCR2 does not require flushing * qemu tlbs nor adjusting cached masks. */ static const ARMCPRegInfo ttbcr2_reginfo = { @@ -4090,8 +4477,8 @@ static const ARMCPRegInfo ttbcr2_reginfo = { .access = PL1_RW, .accessfn = access_tvm_trvm, .type = ARM_CP_ALIAS, .bank_fieldoffsets = { - offsetofhigh32(CPUARMState, cp15.tcr_el[3].raw_tcr), - offsetofhigh32(CPUARMState, cp15.tcr_el[1].raw_tcr), + offsetofhigh32(CPUARMState, cp15.tcr_el[3]), + offsetofhigh32(CPUARMState, cp15.tcr_el[1]), }, }; @@ -4120,7 +4507,8 @@ static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri, static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* On OMAP there are registers indicating the max/min index of dcache lines + /* + * On OMAP there are registers indicating the max/min index of dcache lines * containing a dirty line; cache flush operations have to reset these. */ env->cp15.c15_i_max = 0x000; @@ -4152,7 +4540,8 @@ static const ARMCPRegInfo omap_cp_reginfo[] = { .crm = 8, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_NO_RAW, .readfn = arm_cp_read_zero, .writefn = omap_wfi_write, }, - /* TODO: Peripheral port remap register: + /* + * TODO: Peripheral port remap register: * On OMAP2 mcr p15, 0, rn, c15, c2, 4 sets up the interrupt controller * base address at $rn & ~0xfff and map size of 0x200 << ($rn & 0xfff), * when MMU is off. @@ -4164,7 +4553,6 @@ static const ARMCPRegInfo omap_cp_reginfo[] = { { .name = "C9", .cp = 15, .crn = 9, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE, .resetvalue = 0 }, - REGINFO_SENTINEL }; static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4182,7 +4570,8 @@ static const ARMCPRegInfo xscale_cp_reginfo[] = { .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_xscaleauxcr), .resetvalue = 0, }, - /* XScale specific cache-lockdown: since we have no cache we NOP these + /* + * XScale specific cache-lockdown: since we have no cache we NOP these * and hope the guest does not really rely on cache behaviour. */ { .name = "XSCALE_LOCK_ICACHE_LINE", @@ -4197,11 +4586,11 @@ static const ARMCPRegInfo xscale_cp_reginfo[] = { { .name = "XSCALE_UNLOCK_DCACHE", .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 1, .access = PL1_W, .type = ARM_CP_NOP }, - REGINFO_SENTINEL }; static const ARMCPRegInfo dummy_c15_cp_reginfo[] = { - /* RAZ/WI the whole crn=15 space, when we don't have a more specific + /* + * RAZ/WI the whole crn=15 space, when we don't have a more specific * implementation of this implementation-defined space. * Ideally this should eventually disappear in favour of actually * implementing the correct behaviour for all cores. @@ -4211,7 +4600,6 @@ static const ARMCPRegInfo dummy_c15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_NO_RAW | ARM_CP_OVERRIDE, .resetvalue = 0 }, - REGINFO_SENTINEL }; static const ARMCPRegInfo cache_dirty_status_cp_reginfo[] = { @@ -4219,32 +4607,31 @@ static const ARMCPRegInfo cache_dirty_status_cp_reginfo[] = { { .name = "CDSR", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = 0 }, - REGINFO_SENTINEL }; static const ARMCPRegInfo cache_block_ops_cp_reginfo[] = { - /* We never have a a block transfer operation in progress */ + /* We never have a block transfer operation in progress */ { .name = "BXSR", .cp = 15, .crn = 7, .crm = 12, .opc1 = 0, .opc2 = 4, .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = 0 }, /* The cache ops themselves: these all NOP for QEMU */ { .name = "IICR", .cp = 15, .crm = 5, .opc1 = 0, - .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "IDCR", .cp = 15, .crm = 6, .opc1 = 0, - .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "CDCR", .cp = 15, .crm = 12, .opc1 = 0, - .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "PIR", .cp = 15, .crm = 12, .opc1 = 1, - .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "PDR", .cp = 15, .crm = 12, .opc1 = 2, - .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, + .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, { .name = "CIDCR", .cp = 15, .crm = 14, .opc1 = 0, - .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT }, - REGINFO_SENTINEL + .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT }, }; static const ARMCPRegInfo cache_test_clean_cp_reginfo[] = { - /* The cache test-and-clean instructions always return (1 << 30) + /* + * The cache test-and-clean instructions always return (1 << 30) * to indicate that there are no dirty cache lines. */ { .name = "TC_DCACHE", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 3, @@ -4253,7 +4640,6 @@ static const ARMCPRegInfo cache_test_clean_cp_reginfo[] = { { .name = "TCI_DCACHE", .cp = 15, .crn = 7, .crm = 14, .opc1 = 0, .opc2 = 3, .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW, .resetvalue = (1 << 30) }, - REGINFO_SENTINEL }; static const ARMCPRegInfo strongarm_cp_reginfo[] = { @@ -4262,7 +4648,6 @@ static const ARMCPRegInfo strongarm_cp_reginfo[] = { .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST | ARM_CP_OVERRIDE | ARM_CP_NO_RAW }, - REGINFO_SENTINEL }; static uint64_t midr_read(CPUARMState *env, const ARMCPRegInfo *ri) @@ -4282,7 +4667,8 @@ static uint64_t mpidr_read_val(CPUARMState *env) if (arm_feature(env, ARM_FEATURE_V7MP)) { mpidr |= (1U << 31); - /* Cores which are uniprocessor (non-coherent) + /* + * Cores which are uniprocessor (non-coherent) * but still implement the MP extensions set * bit 30. (For instance, Cortex-R5). */ @@ -4308,6 +4694,8 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = { { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_AMAIR_EL1, + .nv2_redirect_offset = 0x148 | NV2_REDIR_NV1, .type = ARM_CP_CONST, .resetvalue = 0 }, /* AMAIR1 is mapped to AMAIR_EL1[63:32] */ { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1, @@ -4322,14 +4710,13 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = { .type = ARM_CP_64BIT | ARM_CP_ALIAS, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s), offsetof(CPUARMState, cp15.ttbr0_ns) }, - .writefn = vmsa_ttbr_write, }, + .writefn = vmsa_ttbr_write, .raw_writefn = raw_write }, { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1, .access = PL1_RW, .accessfn = access_tvm_trvm, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s), offsetof(CPUARMState, cp15.ttbr1_ns) }, - .writefn = vmsa_ttbr_write, }, - REGINFO_SENTINEL + .writefn = vmsa_ttbr_write, .raw_writefn = raw_write }, }; static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri) @@ -4463,9 +4850,7 @@ static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env, return CP_ACCESS_OK; } -static CPAccessResult aa64_cacheop_pou_access(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) +static CPAccessResult do_cacheop_pou_access(CPUARMState *env, uint64_t hcrflags) { /* Cache invalidate/clean to Point of Unification... */ switch (arm_current_el(env)) { @@ -4476,8 +4861,8 @@ static CPAccessResult aa64_cacheop_pou_access(CPUARMState *env, } /* fall through */ case 1: - /* ... EL1 must trap to EL2 if HCR_EL2.TPU is set. */ - if (arm_hcr_el2_eff(env) & HCR_TPU) { + /* ... EL1 must trap to EL2 if relevant HCR_EL2 flags are set. */ + if (arm_hcr_el2_eff(env) & hcrflags) { return CP_ACCESS_TRAP_EL2; } break; @@ -4485,7 +4870,20 @@ static CPAccessResult aa64_cacheop_pou_access(CPUARMState *env, return CP_ACCESS_OK; } -/* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions +static CPAccessResult access_ticab(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + return do_cacheop_pou_access(env, HCR_TICAB | HCR_TPU); +} + +static CPAccessResult access_tocu(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + return do_cacheop_pou_access(env, HCR_TOCU | HCR_TPU); +} + +/* + * See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions * Page D4-1736 (DDI0487A.b) */ @@ -4503,11 +4901,21 @@ static int vae1_tlbmask(CPUARMState *env) ARMMMUIdxBit_E10_1_PAN | ARMMMUIdxBit_E10_0; } + return mask; +} - if (arm_is_secure_below_el3(env)) { - mask >>= ARM_MMU_IDX_A_NS; - } +static int vae2_tlbmask(CPUARMState *env) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + uint16_t mask; + if (hcr & HCR_E2H) { + mask = ARMMMUIdxBit_E20_2 | + ARMMMUIdxBit_E20_2_PAN | + ARMMMUIdxBit_E20_0; + } else { + mask = ARMMMUIdxBit_E2; + } return mask; } @@ -4515,7 +4923,7 @@ static int vae1_tlbmask(CPUARMState *env) static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx, uint64_t addr) { - uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; + uint64_t tcr = regime_tcr(env, mmu_idx); int tbi = aa64_va_parameter_tbi(tcr, mmu_idx); int select = extract64(addr, 55, 1); @@ -4534,8 +4942,23 @@ static int vae1_tlbbits(CPUARMState *env, uint64_t addr) mmu_idx = ARMMMUIdx_E10_0; } - if (arm_is_secure_below_el3(env)) { - mmu_idx &= ~ARM_MMU_IDX_A_NS; + return tlbbits_for_regime(env, mmu_idx, addr); +} + +static int vae2_tlbbits(CPUARMState *env, uint64_t addr) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + ARMMMUIdx mmu_idx; + + /* + * Only the regime of the mmu_idx below is significant. + * Regime EL2&0 has two ranges with separate TBI configuration, while EL2 + * only has one. + */ + if (hcr & HCR_E2H) { + mmu_idx = ARMMMUIdx_E20_2; + } else { + mmu_idx = ARMMMUIdx_E2; } return tlbbits_for_regime(env, mmu_idx, addr); @@ -4563,37 +4986,12 @@ static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri, } } -static int alle1_tlbmask(CPUARMState *env) -{ - /* - * Note that the 'ALL' scope must invalidate both stage 1 and - * stage 2 translations, whereas most other scopes only invalidate - * stage 1 translations. - */ - if (arm_is_secure_below_el3(env)) { - return ARMMMUIdxBit_SE10_1 | - ARMMMUIdxBit_SE10_1_PAN | - ARMMMUIdxBit_SE10_0; - } else { - return ARMMMUIdxBit_E10_1 | - ARMMMUIdxBit_E10_1_PAN | - ARMMMUIdxBit_E10_0; - } -} - static int e2_tlbmask(CPUARMState *env) { - if (arm_is_secure_below_el3(env)) { - return ARMMMUIdxBit_SE20_0 | - ARMMMUIdxBit_SE20_2 | - ARMMMUIdxBit_SE20_2_PAN | - ARMMMUIdxBit_SE2; - } else { - return ARMMMUIdxBit_E20_0 | - ARMMMUIdxBit_E20_2 | - ARMMMUIdxBit_E20_2_PAN | - ARMMMUIdxBit_E2; - } + return (ARMMMUIdxBit_E20_0 | + ARMMMUIdxBit_E20_2 | + ARMMMUIdxBit_E20_2_PAN | + ARMMMUIdxBit_E2); } static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4620,7 +5018,7 @@ static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri, ARMCPU *cpu = env_archcpu(env); CPUState *cs = CPU(cpu); - tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_SE3); + tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3); } static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4646,27 +5044,30 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); - tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_SE3); + tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3); } static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* Invalidate by VA, EL2 + /* + * Invalidate by VA, EL2 * Currently handles both VAE2 and VALE2, since we don't support * flush-last-level-only. */ CPUState *cs = env_cpu(env); - int mask = e2_tlbmask(env); + int mask = vae2_tlbmask(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); + int bits = vae2_tlbbits(env, pageaddr); - tlb_flush_page_by_mmuidx(cs, pageaddr, mask); + tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits); } static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* Invalidate by VA, EL3 + /* + * Invalidate by VA, EL3 * Currently handles both VAE3 and VALE3, since we don't support * flush-last-level-only. */ @@ -4674,7 +5075,7 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri, CPUState *cs = CPU(cpu); uint64_t pageaddr = sextract64(value << 12, 0, 56); - tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_SE3); + tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3); } static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -4691,7 +5092,8 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri, static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - /* Invalidate by VA, EL1&0 (AArch64 version). + /* + * Invalidate by VA, EL1&0 (AArch64 version). * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1, * since we don't support flush-for-specific-ASID-only or * flush-last-level-only. @@ -4712,11 +5114,9 @@ static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { CPUState *cs = env_cpu(env); + int mask = vae2_tlbmask(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); - bool secure = arm_is_secure_below_el3(env); - int mask = secure ? ARMMMUIdxBit_SE2 : ARMMMUIdxBit_E2; - int bits = tlbbits_for_regime(env, secure ? ARMMMUIdx_SE2 : ARMMMUIdx_E2, - pageaddr); + int bits = vae2_tlbbits(env, pageaddr); tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits); } @@ -4726,77 +5126,137 @@ static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri, { CPUState *cs = env_cpu(env); uint64_t pageaddr = sextract64(value << 12, 0, 56); - int bits = tlbbits_for_regime(env, ARMMMUIdx_SE3, pageaddr); + int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr); tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, - ARMMMUIdxBit_SE3, bits); + ARMMMUIdxBit_E3, bits); +} + +static int ipas2e1_tlbmask(CPUARMState *env, int64_t value) +{ + /* + * The MSB of value is the NS field, which only applies if SEL2 + * is implemented and SCR_EL3.NS is not set (i.e. in secure mode). + */ + return (value >= 0 + && cpu_isar_feature(aa64_sel2, env_archcpu(env)) + && arm_is_secure_below_el3(env) + ? ARMMMUIdxBit_Stage2_S + : ARMMMUIdxBit_Stage2); +} + +static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = ipas2e1_tlbmask(env, value); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + if (tlb_force_broadcast(env)) { + tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); + } else { + tlb_flush_page_by_mmuidx(cs, pageaddr, mask); + } +} + +static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + CPUState *cs = env_cpu(env); + int mask = ipas2e1_tlbmask(env, value); + uint64_t pageaddr = sextract64(value << 12, 0, 56); + + tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask); } #ifdef TARGET_AARCH64 -static uint64_t tlbi_aa64_range_get_length(CPUARMState *env, - uint64_t value) -{ - unsigned int page_shift; - unsigned int page_size_granule; - uint64_t num; - uint64_t scale; - uint64_t exponent; +typedef struct { + uint64_t base; uint64_t length; +} TLBIRange; - num = extract64(value, 39, 4); - scale = extract64(value, 44, 2); - page_size_granule = extract64(value, 46, 2); +static ARMGranuleSize tlbi_range_tg_to_gran_size(int tg) +{ + /* + * Note that the TLBI range TG field encoding differs from both + * TG0 and TG1 encodings. + */ + switch (tg) { + case 1: + return Gran4K; + case 2: + return Gran16K; + case 3: + return Gran64K; + default: + return GranInvalid; + } +} + +static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx, + uint64_t value) +{ + unsigned int page_size_granule, page_shift, num, scale, exponent; + /* Extract one bit to represent the va selector in use. */ + uint64_t select = sextract64(value, 36, 1); + ARMVAParameters param = aa64_va_parameters(env, select, mmuidx, true, false); + TLBIRange ret = { }; + ARMGranuleSize gran; - page_shift = page_size_granule * 2 + 12; + page_size_granule = extract64(value, 46, 2); + gran = tlbi_range_tg_to_gran_size(page_size_granule); - if (page_size_granule == 0) { - qemu_log_mask(LOG_GUEST_ERROR, "Invalid page size granule %d\n", + /* The granule encoded in value must match the granule in use. */ + if (gran != param.gran) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid tlbi page size granule %d\n", page_size_granule); - return 0; + return ret; } + page_shift = arm_granule_bits(gran); + num = extract64(value, 39, 5); + scale = extract64(value, 44, 2); exponent = (5 * scale) + 1; - length = (num + 1) << (exponent + page_shift); - return length; -} - -static uint64_t tlbi_aa64_range_get_base(CPUARMState *env, uint64_t value, - bool two_ranges) -{ - /* TODO: ARMv8.7 FEAT_LPA2 */ - uint64_t pageaddr; + ret.length = (num + 1) << (exponent + page_shift); - if (two_ranges) { - pageaddr = sextract64(value, 0, 37) << TARGET_PAGE_BITS; + if (param.select) { + ret.base = sextract64(value, 0, 37); } else { - pageaddr = extract64(value, 0, 37) << TARGET_PAGE_BITS; + ret.base = extract64(value, 0, 37); + } + if (param.ds) { + /* + * With DS=1, BaseADDR is always shifted 16 so that it is able + * to address all 52 va bits. The input address is perforce + * aligned on a 64k boundary regardless of translation granule. + */ + page_shift = 16; } + ret.base <<= page_shift; - return pageaddr; + return ret; } static void do_rvae_write(CPUARMState *env, uint64_t value, int idxmap, bool synced) { ARMMMUIdx one_idx = ARM_MMU_IDX_A | ctz32(idxmap); - bool two_ranges = regime_has_2_ranges(one_idx); - uint64_t baseaddr, length; + TLBIRange range; int bits; - baseaddr = tlbi_aa64_range_get_base(env, value, two_ranges); - length = tlbi_aa64_range_get_length(env, value); - bits = tlbbits_for_regime(env, one_idx, baseaddr); + range = tlbi_aa64_get_range(env, one_idx, value); + bits = tlbbits_for_regime(env, one_idx, range.base); if (synced) { tlb_flush_range_by_mmuidx_all_cpus_synced(env_cpu(env), - baseaddr, - length, + range.base, + range.length, idxmap, bits); } else { - tlb_flush_range_by_mmuidx(env_cpu(env), baseaddr, - length, idxmap, bits); + tlb_flush_range_by_mmuidx(env_cpu(env), range.base, + range.length, idxmap, bits); } } @@ -4830,12 +5290,6 @@ static void tlbi_aa64_rvae1is_write(CPUARMState *env, do_rvae_write(env, value, vae1_tlbmask(env), true); } -static int vae2_tlbmask(CPUARMState *env) -{ - return (arm_is_secure_below_el3(env) - ? ARMMMUIdxBit_SE2 : ARMMMUIdxBit_E2); -} - static void tlbi_aa64_rvae2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -4879,8 +5333,7 @@ static void tlbi_aa64_rvae3_write(CPUARMState *env, * flush-last-level-only. */ - do_rvae_write(env, value, ARMMMUIdxBit_SE3, - tlb_force_broadcast(env)); + do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env)); } static void tlbi_aa64_rvae3is_write(CPUARMState *env, @@ -4894,7 +5347,21 @@ static void tlbi_aa64_rvae3is_write(CPUARMState *env, * flush-last-level-only or inner/outer specific flushes. */ - do_rvae_write(env, value, ARMMMUIdxBit_SE3, true); + do_rvae_write(env, value, ARMMMUIdxBit_E3, true); +} + +static void tlbi_aa64_ripas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + do_rvae_write(env, value, ipas2e1_tlbmask(env, value), + tlb_force_broadcast(env)); +} + +static void tlbi_aa64_ripas2e1is_write(CPUARMState *env, + const ARMCPRegInfo *ri, + uint64_t value) +{ + do_rvae_write(env, value, ipas2e1_tlbmask(env, value), true); } #endif @@ -4942,7 +5409,8 @@ static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { if (!(env->pstate & PSTATE_SP)) { - /* Access to SP_EL0 is undefined if it's being used as + /* + * Access to SP_EL0 is undefined if it's being used as * the stack pointer. */ return CP_ACCESS_TRAP_UNCATEGORIZED; @@ -4982,7 +5450,8 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, } if (raw_read(env, ri) == value) { - /* Skip the TLB flush if nothing actually changed; Linux likes + /* + * Skip the TLB flush if nothing actually changed; Linux likes * to do a lot of pointless SCTLR writes. */ return; @@ -4993,7 +5462,7 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu)); - if (ri->type & ARM_CP_SUPPRESS_TB_END) { + if (tcg_enabled() && ri->type & ARM_CP_SUPPRESS_TB_END) { /* * Normally we would always end the TB on an SCTLR write; see the * comment in ARMCPRegInfo sctlr initialization below for why Xscale @@ -5004,26 +5473,97 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, } } -static CPAccessResult fpexc32_access(CPUARMState *env, const ARMCPRegInfo *ri, - bool isread) +static void mdcr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - if ((env->cp15.cptr_el[2] & CPTR_TFP) && arm_current_el(env) == 2) { - return CP_ACCESS_TRAP_FP_EL2; + /* + * Some MDCR_EL3 bits affect whether PMU counters are running: + * if we are trying to change any of those then we must + * bracket this update with PMU start/finish calls. + */ + bool pmu_op = (env->cp15.mdcr_el3 ^ value) & MDCR_EL3_PMU_ENABLE_BITS; + + if (pmu_op) { + pmu_op_start(env); } - if (env->cp15.cptr_el[3] & CPTR_TFP) { - return CP_ACCESS_TRAP_FP_EL3; + env->cp15.mdcr_el3 = value; + if (pmu_op) { + pmu_op_finish(env); } - return CP_ACCESS_OK; } static void sdcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { - env->cp15.mdcr_el3 = value & SDCR_VALID_MASK; + /* Not all bits defined for MDCR_EL3 exist in the AArch32 SDCR */ + mdcr_el3_write(env, ri, value & SDCR_VALID_MASK); +} + +static void mdcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Some MDCR_EL2 bits affect whether PMU counters are running: + * if we are trying to change any of those then we must + * bracket this update with PMU start/finish calls. + */ + bool pmu_op = (env->cp15.mdcr_el2 ^ value) & MDCR_EL2_PMU_ENABLE_BITS; + + if (pmu_op) { + pmu_op_start(env); + } + env->cp15.mdcr_el2 = value; + if (pmu_op) { + pmu_op_finish(env); + } +} + +static CPAccessResult access_nv1(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1) { + uint64_t hcr_nv = arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1 | HCR_NV2); + + if (hcr_nv == (HCR_NV | HCR_NV1)) { + return CP_ACCESS_TRAP_EL2; + } + } + return CP_ACCESS_OK; +} + +#ifdef CONFIG_USER_ONLY +/* + * `IC IVAU` is handled to improve compatibility with JITs that dual-map their + * code to get around W^X restrictions, where one region is writable and the + * other is executable. + * + * Since the executable region is never written to we cannot detect code + * changes when running in user mode, and rely on the emulated JIT telling us + * that the code has changed by executing this instruction. + */ +static void ic_ivau_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t icache_line_mask, start_address, end_address; + const ARMCPU *cpu; + + cpu = env_archcpu(env); + + icache_line_mask = (4 << extract32(cpu->ctr, 0, 4)) - 1; + start_address = value & ~icache_line_mask; + end_address = value | icache_line_mask; + + mmap_lock(); + + tb_invalidate_phys_range(start_address, end_address); + + mmap_unlock(); } +#endif static const ARMCPRegInfo v8_cp_reginfo[] = { - /* Minimal set of EL0-visible registers. This will need to be expanded + /* + * Minimal set of EL0-visible registers. This will need to be expanded * significantly for system emulation of AArch64 CPUs. */ { .name = "NZCV", .state = ARM_CP_STATE_AA64, @@ -5046,6 +5586,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0, .access = PL0_R, .type = ARM_CP_NO_RAW, + .fgt = FGT_DCZID_EL0, .readfn = aa64_dczid_read }, { .name = "DC_ZVA", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 1, @@ -5053,104 +5594,140 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { #ifndef CONFIG_USER_ONLY /* Avoid overhead of an access check that always passes in user-mode */ .accessfn = aa64_zva_access, + .fgt = FGT_DCZVA, #endif }, { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2, .access = PL1_R, .type = ARM_CP_CURRENTEL }, - /* Cache ops: all NOPs since we don't emulate caches */ + /* + * Instruction cache ops. All of these except `IC IVAU` NOP because we + * don't emulate caches. + */ { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP, - .accessfn = aa64_cacheop_pou_access }, + .fgt = FGT_ICIALLUIS, + .accessfn = access_ticab }, { .name = "IC_IALLU", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP, - .accessfn = aa64_cacheop_pou_access }, + .fgt = FGT_ICIALLU, + .accessfn = access_tocu }, { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1, - .access = PL0_W, .type = ARM_CP_NOP, - .accessfn = aa64_cacheop_pou_access }, + .access = PL0_W, + .fgt = FGT_ICIVAU, + .accessfn = access_tocu, +#ifdef CONFIG_USER_ONLY + .type = ARM_CP_NO_RAW, + .writefn = ic_ivau_write +#else + .type = ARM_CP_NOP +#endif + }, + /* Cache ops: all NOPs since we don't emulate caches */ { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1, .access = PL1_W, .accessfn = aa64_cacheop_poc_access, + .fgt = FGT_DCIVAC, .type = ARM_CP_NOP }, { .name = "DC_ISW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2, + .fgt = FGT_DCISW, .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP }, { .name = "DC_CVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1, .access = PL0_W, .type = ARM_CP_NOP, + .fgt = FGT_DCCVAC, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CSW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2, + .fgt = FGT_DCCSW, .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP }, { .name = "DC_CVAU", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1, .access = PL0_W, .type = ARM_CP_NOP, - .accessfn = aa64_cacheop_pou_access }, + .fgt = FGT_DCCVAU, + .accessfn = access_tocu }, { .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1, .access = PL0_W, .type = ARM_CP_NOP, + .fgt = FGT_DCCIVAC, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CISW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, + .fgt = FGT_DCCISW, .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP }, /* TLBI operations */ { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVMALLE1IS, .writefn = tlbi_aa64_vmalle1is_write }, { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVAE1IS, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIASIDE1IS, .writefn = tlbi_aa64_vmalle1is_write }, { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVAAE1IS, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVALE1IS, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVAALE1IS, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVMALLE1, .writefn = tlbi_aa64_vmalle1_write }, { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1, .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVAE1, .writefn = tlbi_aa64_vae1_write }, { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2, .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIASIDE1, .writefn = tlbi_aa64_vmalle1_write }, { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3, .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVAAE1, .writefn = tlbi_aa64_vae1_write }, { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVALE1, .writefn = tlbi_aa64_vae1_write }, { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7, .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVAALE1, .writefn = tlbi_aa64_vae1_write }, { .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NOP }, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_ipas2e1is_write }, { .name = "TLBI_IPAS2LE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NOP }, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_ipas2e1is_write }, { .name = "TLBI_ALLE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -5161,10 +5738,12 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbi_aa64_alle1is_write }, { .name = "TLBI_IPAS2E1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NOP }, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_ipas2e1_write }, { .name = "TLBI_IPAS2LE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NOP }, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_ipas2e1_write }, { .name = "TLBI_ALLE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -5178,35 +5757,39 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, + .fgt = FGT_ATS1E1R, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1, .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, + .fgt = FGT_ATS1E1W, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2, .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, + .fgt = FGT_ATS1E0R, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3, .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, + .fgt = FGT_ATS1E0W, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, { .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, + .accessfn = at_e012_access, .writefn = ats_write64 }, { .name = "AT_S12E1W", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 5, .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, + .accessfn = at_e012_access, .writefn = ats_write64 }, { .name = "AT_S12E0R", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 6, .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, + .accessfn = at_e012_access, .writefn = ats_write64 }, { .name = "AT_S12E0W", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 7, .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, + .accessfn = at_e012_access, .writefn = ats_write64 }, /* AT S1E2* are elsewhere as they UNDEF from EL3 if EL2 is not present */ { .name = "AT_S1E3R", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 0, @@ -5220,15 +5803,16 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 7, .crm = 4, .opc2 = 0, .access = PL1_RW, .resetvalue = 0, + .fgt = FGT_PAR_EL1, .fieldoffset = offsetof(CPUARMState, cp15.par_el[1]), .writefn = par_write }, #endif /* TLB invalidate last level of translation table walk */ { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbimva_is_write }, { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbimvaa_is_write }, { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, @@ -5245,25 +5829,29 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbimva_hyp_is_write }, { .name = "TLBIIPAS2", .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1, - .type = ARM_CP_NOP, .access = PL2_W }, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiipas2_hyp_write }, { .name = "TLBIIPAS2IS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1, - .type = ARM_CP_NOP, .access = PL2_W }, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiipas2is_hyp_write }, { .name = "TLBIIPAS2L", .cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5, - .type = ARM_CP_NOP, .access = PL2_W }, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiipas2_hyp_write }, { .name = "TLBIIPAS2LIS", .cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5, - .type = ARM_CP_NOP, .access = PL2_W }, + .type = ARM_CP_NO_RAW, .access = PL2_W, + .writefn = tlbiipas2is_hyp_write }, /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, - .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_ticab }, { .name = "BPIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 6, .type = ARM_CP_NOP, .access = PL1_W }, { .name = "ICIALLU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0, - .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu }, { .name = "ICIMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 1, - .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu }, { .name = "BPIALL", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 6, .type = ARM_CP_NOP, .access = PL1_W }, { .name = "BPIMVA", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 7, @@ -5277,7 +5865,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "DCCSW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, { .name = "DCCMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 11, .opc2 = 1, - .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu }, { .name = "DCCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 1, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_poc_access }, { .name = "DCCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, @@ -5291,14 +5879,17 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1, - .access = PL1_RW, + .access = PL1_RW, .accessfn = access_nv1, + .nv2_redirect_offset = 0x230 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, elr_el[1]) }, { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL1_RW, + .access = PL1_RW, .accessfn = access_nv1, + .nv2_redirect_offset = 0x160 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) }, - /* We rely on the access checks not allowing the guest to write to the + /* + * We rely on the access checks not allowing the guest to write to the * state field when SPSel indicates that it's being used as the stack * pointer. */ @@ -5309,26 +5900,13 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .fieldoffset = offsetof(CPUARMState, sp_el[0]) }, { .name = "SP_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 1, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_ALIAS, + .nv2_redirect_offset = 0x240, + .access = PL2_RW, .type = ARM_CP_ALIAS | ARM_CP_EL3_NO_EL2_KEEP, .fieldoffset = offsetof(CPUARMState, sp_el[1]) }, { .name = "SPSel", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 0, .type = ARM_CP_NO_RAW, .access = PL1_RW, .readfn = spsel_read, .writefn = spsel_write }, - { .name = "FPEXC32_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 3, .opc2 = 0, - .type = ARM_CP_ALIAS, - .fieldoffset = offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPEXC]), - .access = PL2_RW, .accessfn = fpexc32_access }, - { .name = "DACR32_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 0, .opc2 = 0, - .access = PL2_RW, .resetvalue = 0, - .writefn = dacr_write, .raw_writefn = raw_write, - .fieldoffset = offsetof(CPUARMState, cp15.dacr32_el2) }, - { .name = "IFSR32_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 0, .opc2 = 1, - .access = PL2_RW, .resetvalue = 0, - .fieldoffset = offsetof(CPUARMState, cp15.ifsr32_el2) }, { .name = "SPSR_IRQ", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 0, @@ -5350,135 +5928,35 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_FIQ]) }, { .name = "MDCR_EL3", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_IO, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 3, .opc2 = 1, .resetvalue = 0, - .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el3) }, - { .name = "SDCR", .type = ARM_CP_ALIAS, + .access = PL3_RW, + .writefn = mdcr_el3_write, + .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el3) }, + { .name = "SDCR", .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 0, .crn = 1, .crm = 3, .opc2 = 1, .access = PL1_RW, .accessfn = access_trap_aa32s_el1, .writefn = sdcr_write, .fieldoffset = offsetoflow32(CPUARMState, cp15.mdcr_el3) }, - REGINFO_SENTINEL -}; - -/* Used to describe the behaviour of EL2 regs when EL2 does not exist. */ -static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = { - { .name = "VBAR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0, - .access = PL2_RW, - .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore }, - { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, - .access = PL2_RW, - .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0, - .access = PL2_RW, - .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "MAIR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, - .resetvalue = 0 }, - { .name = "HMAIR1", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "AMAIR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, - .resetvalue = 0 }, - { .name = "HAMAIR1", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1, - .access = PL2_RW, .type = ARM_CP_CONST, - .resetvalue = 0 }, - { .name = "AFSR0_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 1, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, - .resetvalue = 0 }, - { .name = "AFSR1_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 1, .opc2 = 1, - .access = PL2_RW, .type = ARM_CP_CONST, - .resetvalue = 0 }, - { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2, - .access = PL2_RW, .accessfn = access_el3_aa32ns, - .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "VTTBR", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 6, .crm = 2, - .access = PL2_RW, .accessfn = access_el3_aa32ns, - .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, - { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "TPIDR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 2, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2, - .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_CONST, - .resetvalue = 0 }, - { .name = "CNTHCTL_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 1, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CNTVOFF_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 3, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CNTVOFF", .cp = 15, .opc1 = 4, .crm = 14, - .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_CONST, - .resetvalue = 0 }, - { .name = "CNTHP_CVAL_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 2, .opc2 = 2, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CNTHP_CVAL", .cp = 15, .opc1 = 6, .crm = 14, - .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_CONST, - .resetvalue = 0 }, - { .name = "CNTHP_TVAL_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 2, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "CNTHP_CTL_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 2, .opc2 = 1, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1, - .access = PL2_RW, .accessfn = access_tda, - .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4, - .access = PL2_RW, .accessfn = access_el3_aa32ns, - .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "HIFAR", .state = ARM_CP_STATE_AA32, - .type = ARM_CP_CONST, - .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 2, - .access = PL2_RW, .resetvalue = 0 }, - REGINFO_SENTINEL }; -/* Ditto, but for registers which exist in ARMv8 but not v7 */ -static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = { - { .name = "HCR2", .state = ARM_CP_STATE_AA32, - .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4, +/* These are present only when EL1 supports AArch32 */ +static const ARMCPRegInfo v8_aa32_el1_reginfo[] = { + { .name = "FPEXC32_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 3, .opc2 = 0, .access = PL2_RW, - .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL + .type = ARM_CP_ALIAS | ARM_CP_FPU | ARM_CP_EL3_NO_EL2_KEEP, + .fieldoffset = offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPEXC]) }, + { .name = "DACR32_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 0, .opc2 = 0, + .access = PL2_RW, .resetvalue = 0, .type = ARM_CP_EL3_NO_EL2_KEEP, + .writefn = dacr_write, .raw_writefn = raw_write, + .fieldoffset = offsetof(CPUARMState, cp15.dacr32_el2) }, + { .name = "IFSR32_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 0, .opc2 = 1, + .access = PL2_RW, .resetvalue = 0, .type = ARM_CP_EL3_NO_EL2_KEEP, + .fieldoffset = offsetof(CPUARMState, cp15.ifsr32_el2) }, }; static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) @@ -5494,7 +5972,8 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (arm_feature(env, ARM_FEATURE_EL3)) { valid_mask &= ~HCR_HCD; } else if (cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) { - /* Architecturally HCR.TSC is RES0 if EL3 is not implemented. + /* + * Architecturally HCR.TSC is RES0 if EL3 is not implemented. * However, if we're using the SMC PSCI conduit then QEMU is * effectively acting like EL3 firmware and so the guest at * EL2 should retain the ability to prevent EL1 from being @@ -5508,6 +5987,9 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (cpu_isar_feature(aa64_vh, cpu)) { valid_mask |= HCR_E2H; } + if (cpu_isar_feature(aa64_ras, cpu)) { + valid_mask |= HCR_TERR | HCR_TEA; + } if (cpu_isar_feature(aa64_lor, cpu)) { valid_mask |= HCR_TLOR; } @@ -5517,6 +5999,27 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (cpu_isar_feature(aa64_mte, cpu)) { valid_mask |= HCR_ATA | HCR_DCT | HCR_TID5; } + if (cpu_isar_feature(aa64_scxtnum, cpu)) { + valid_mask |= HCR_ENSCXT; + } + if (cpu_isar_feature(aa64_fwb, cpu)) { + valid_mask |= HCR_FWB; + } + if (cpu_isar_feature(aa64_rme, cpu)) { + valid_mask |= HCR_GPF; + } + if (cpu_isar_feature(aa64_nv, cpu)) { + valid_mask |= HCR_NV | HCR_NV1 | HCR_AT; + } + if (cpu_isar_feature(aa64_nv2, cpu)) { + valid_mask |= HCR_NV2; + } + } + + if (cpu_isar_feature(any_evt, cpu)) { + valid_mask |= HCR_TTLBIS | HCR_TTLBOS | HCR_TICAB | HCR_TOCU | HCR_TID4; + } else if (cpu_isar_feature(any_half_evt, cpu)) { + valid_mask |= HCR_TICAB | HCR_TOCU | HCR_TID4; } /* Clear RES0 bits. */ @@ -5528,8 +6031,11 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) * HCR_PTW forbids certain page-table setups * HCR_DC disables stage1 and enables stage2 translation * HCR_DCT enables tagging on (disabled) stage1 translation + * HCR_FWB changes the interpretation of stage2 descriptor bits + * HCR_NV and HCR_NV1 affect interpretation of descriptor bits */ - if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC | HCR_DCT)) { + if ((env->cp15.hcr_el2 ^ value) & + (HCR_VM | HCR_PTW | HCR_DC | HCR_DCT | HCR_FWB | HCR_NV | HCR_NV1)) { tlb_flush(CPU(cpu)); } env->cp15.hcr_el2 = value; @@ -5538,16 +6044,17 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) * Updates to VI and VF require us to update the status of * virtual interrupts, which are the logical OR of these bits * and the state of the input lines from the GIC. (This requires - * that we have the iothread lock, which is done by marking the + * that we have the BQL, which is done by marking the * reginfo structs as ARM_CP_IO.) * Note that if a write to HCR pends a VIRQ or VFIQ it is never * possible for it to be taken immediately, because VIRQ and * VFIQ are masked unless running at EL0 or EL1, and HCR * can only be written at EL2. */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); arm_cpu_update_virq(cpu); arm_cpu_update_vfiq(cpu); + arm_cpu_update_vserr(cpu); } static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) @@ -5572,15 +6079,17 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri, } /* - * Return the effective value of HCR_EL2. + * Return the effective value of HCR_EL2, at the given security state. * Bits that are not included here: * RW (read from SCR_EL3.RW as needed) */ -uint64_t arm_hcr_el2_eff(CPUARMState *env) +uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space) { uint64_t ret = env->cp15.hcr_el2; - if (!arm_is_el2_enabled(env)) { + assert(space != ARMSS_Root); + + if (!arm_is_el2_enabled_secstate(env, space)) { /* * "This register has no effect if EL2 is not enabled in the * current Security state". This is ARMv8.4-SecEL2 speak for @@ -5639,6 +6148,103 @@ uint64_t arm_hcr_el2_eff(CPUARMState *env) return ret; } +uint64_t arm_hcr_el2_eff(CPUARMState *env) +{ + if (arm_feature(env, ARM_FEATURE_M)) { + return 0; + } + return arm_hcr_el2_eff_secstate(env, arm_security_space_below_el3(env)); +} + +/* + * Corresponds to ARM pseudocode function ELIsInHost(). + */ +bool el_is_in_host(CPUARMState *env, int el) +{ + uint64_t mask; + + /* + * Since we only care about E2H and TGE, we can skip arm_hcr_el2_eff(). + * Perform the simplest bit tests first, and validate EL2 afterward. + */ + if (el & 1) { + return false; /* EL1 or EL3 */ + } + + /* + * Note that hcr_write() checks isar_feature_aa64_vh(), + * aka HaveVirtHostExt(), in allowing HCR_E2H to be set. + */ + mask = el ? HCR_E2H : HCR_E2H | HCR_TGE; + if ((env->cp15.hcr_el2 & mask) != mask) { + return false; + } + + /* TGE and/or E2H set: double check those bits are currently legal. */ + return arm_is_el2_enabled(env) && arm_el_is_aa64(env, 2); +} + +static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + uint64_t valid_mask = 0; + + /* FEAT_MOPS adds MSCEn and MCE2 */ + if (cpu_isar_feature(aa64_mops, env_archcpu(env))) { + valid_mask |= HCRX_MSCEN | HCRX_MCE2; + } + + /* Clear RES0 bits. */ + env->cp15.hcrx_el2 = value & valid_mask; +} + +static CPAccessResult access_hxen(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 2 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_HXEN)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo hcrx_el2_reginfo = { + .name = "HCRX_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 2, + .access = PL2_RW, .writefn = hcrx_write, .accessfn = access_hxen, + .nv2_redirect_offset = 0xa0, + .fieldoffset = offsetof(CPUARMState, cp15.hcrx_el2), +}; + +/* Return the effective value of HCRX_EL2. */ +uint64_t arm_hcrx_el2_eff(CPUARMState *env) +{ + /* + * The bits in this register behave as 0 for all purposes other than + * direct reads of the register if SCR_EL3.HXEn is 0. + * If EL2 is not enabled in the current security state, then the + * bit may behave as if 0, or as if 1, depending on the bit. + * For the moment, we treat the EL2-disabled case as taking + * priority over the HXEn-disabled case. This is true for the only + * bit for a feature which we implement where the answer is different + * for the two cases (MSCEn for FEAT_MOPS). + * This may need to be revisited for future bits. + */ + if (!arm_is_el2_enabled(env)) { + uint64_t hcrx = 0; + if (cpu_isar_feature(aa64_mops, env_archcpu(env))) { + /* MSCEn behaves as 1 if EL2 is not enabled */ + hcrx |= HCRX_MSCEN; + } + return hcrx; + } + if (arm_feature(env, ARM_FEATURE_EL3) && !(env->cp15.scr_el3 & SCR_HXEN)) { + return 0; + } + return env->cp15.hcrx_el2; +} + static void cptr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { @@ -5648,8 +6254,8 @@ static void cptr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri, */ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) && !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) { - value &= ~(0x3 << 10); - value |= env->cp15.cptr_el[2] & (0x3 << 10); + uint64_t mask = R_HCPTR_TCP11_MASK | R_HCPTR_TCP10_MASK; + value = (value & ~mask) | (env->cp15.cptr_el[2] & mask); } env->cp15.cptr_el[2] = value; } @@ -5664,7 +6270,7 @@ static uint64_t cptr_el2_read(CPUARMState *env, const ARMCPRegInfo *ri) if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) && !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) { - value |= 0x3 << 10; + value |= R_HCPTR_TCP11_MASK | R_HCPTR_TCP10_MASK; } return value; } @@ -5674,7 +6280,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .type = ARM_CP_IO, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2), - .writefn = hcr_write }, + .nv2_redirect_offset = 0x78, + .writefn = hcr_write, .raw_writefn = raw_write }, { .name = "HCR", .state = ARM_CP_STATE_AA32, .type = ARM_CP_ALIAS | ARM_CP_IO, .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0, @@ -5684,14 +6291,16 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, elr_el[2]) }, { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH, + .type = ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[2]) }, { .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH, + .type = ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[2]) }, { .name = "HIFAR", .state = ARM_CP_STATE_AA32, @@ -5700,7 +6309,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .access = PL2_RW, .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el[2]) }, { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64, - .type = ARM_CP_ALIAS, + .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0, .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_HYP]) }, @@ -5746,29 +6355,29 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2, .access = PL2_RW, .writefn = vmsa_tcr_el12_write, - /* no .raw_writefn or .resetfn needed as we never use mask/base_mask */ + .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) }, { .name = "VTCR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2, .type = ARM_CP_ALIAS, .access = PL2_RW, .accessfn = access_el3_aa32ns, - .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) }, + .fieldoffset = offsetoflow32(CPUARMState, cp15.vtcr_el2) }, { .name = "VTCR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2, .access = PL2_RW, - /* no .writefn needed as this can't cause an ASID change; - * no .raw_writefn or .resetfn needed as we never use mask/base_mask - */ + .nv2_redirect_offset = 0x40, + /* no .writefn needed as this can't cause an ASID change */ .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) }, { .name = "VTTBR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 6, .crm = 2, .type = ARM_CP_64BIT | ARM_CP_ALIAS, .access = PL2_RW, .accessfn = access_el3_aa32ns, .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2), - .writefn = vttbr_write }, + .writefn = vttbr_write, .raw_writefn = raw_write }, { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0, - .access = PL2_RW, .writefn = vttbr_write, + .access = PL2_RW, .writefn = vttbr_write, .raw_writefn = raw_write, + .nv2_redirect_offset = 0x20, .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) }, { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0, @@ -5777,10 +6386,12 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "TPIDR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 2, .access = PL2_RW, .resetvalue = 0, + .nv2_redirect_offset = 0x90, .fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[2]) }, { .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0, - .access = PL2_RW, .resetvalue = 0, .writefn = vmsa_tcr_ttbr_el2_write, + .access = PL2_RW, .resetvalue = 0, + .writefn = vmsa_tcr_ttbr_el2_write, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) }, { .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2, .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS, @@ -5807,42 +6418,46 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .writefn = tlbimva_hyp_is_write }, { .name = "TLBI_ALLE2", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL2_W, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_alle2_write }, { .name = "TLBI_VAE2", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_vae2_write }, { .name = "TLBI_VALE2", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_vae2_write }, { .name = "TLBI_ALLE2IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_alle2is_write }, { .name = "TLBI_VAE2IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL2_W, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_vae2is_write }, { .name = "TLBI_VALE2IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_vae2is_write }, #ifndef CONFIG_USER_ONLY - /* Unlike the other EL2-related AT operations, these must + /* + * Unlike the other EL2-related AT operations, these must * UNDEF from EL3 if EL2 is not implemented, which is why we * define them here rather than with the rest of the AT ops. */ { .name = "AT_S1E2R", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0, .access = PL2_W, .accessfn = at_s1e2_access, - .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, .writefn = ats_write64 }, + .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = ats_write64 }, { .name = "AT_S1E2W", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1, .access = PL2_W, .accessfn = at_s1e2_access, - .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, .writefn = ats_write64 }, - /* The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE + .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = ats_write64 }, + /* + * The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE * if EL2 is not implemented; we choose to UNDEF. Behaviour at EL3 * with SCR.NS == 0 outside Monitor mode is UNPREDICTABLE; we choose * to behave as if SCR.NS was 1. @@ -5855,16 +6470,19 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, { .name = "CNTHCTL_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 1, .opc2 = 0, - /* ARMv7 requires bit 0 and 1 to reset to 1. ARMv8 defines the + /* + * ARMv7 requires bit 0 and 1 to reset to 1. ARMv8 defines the * reset values as IMPDEF. We choose to reset to 3 to comply with * both ARMv7 and ARMv8. */ - .access = PL2_RW, .resetvalue = 3, + .access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 3, + .writefn = gt_cnthctl_write, .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.cnthctl_el2) }, { .name = "CNTVOFF_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 3, .access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 0, .writefn = gt_cntvoff_write, + .nv2_redirect_offset = 0x60, .fieldoffset = offsetof(CPUARMState, cp15.cntvoff_el2) }, { .name = "CNTVOFF", .cp = 15, .opc1 = 4, .crm = 14, .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS | ARM_CP_IO, @@ -5892,13 +6510,6 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { .resetvalue = 0, .writefn = gt_hyp_ctl_write, .raw_writefn = raw_write }, #endif - /* The only field of MDCR_EL2 that has a defined architectural reset value - * is MDCR_EL2.HPMN which should reset to the value of PMCR_EL0.N. - */ - { .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1, - .access = PL2_RW, .resetvalue = PMCR_NUM_COUNTERS, - .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el2), }, { .name = "HPFAR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4, .access = PL2_RW, .accessfn = access_el3_aa32ns, @@ -5910,8 +6521,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = { { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH, .cp = 15, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3, .access = PL2_RW, + .nv2_redirect_offset = 0x80, .fieldoffset = offsetof(CPUARMState, cp15.hstr_el2) }, - REGINFO_SENTINEL }; static const ARMCPRegInfo el2_v8_cp_reginfo[] = { @@ -5921,7 +6532,6 @@ static const ARMCPRegInfo el2_v8_cp_reginfo[] = { .access = PL2_RW, .fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2), .writefn = hcr_writehigh }, - REGINFO_SENTINEL }; static CPAccessResult sel2_access(CPUARMState *env, const ARMCPRegInfo *ri, @@ -5937,18 +6547,20 @@ static const ARMCPRegInfo el2_sec_cp_reginfo[] = { { .name = "VSTTBR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 6, .opc2 = 0, .access = PL2_RW, .accessfn = sel2_access, + .nv2_redirect_offset = 0x30, .fieldoffset = offsetof(CPUARMState, cp15.vsttbr_el2) }, { .name = "VSTCR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 6, .opc2 = 2, .access = PL2_RW, .accessfn = sel2_access, + .nv2_redirect_offset = 0x48, .fieldoffset = offsetof(CPUARMState, cp15.vstcr_el2) }, - REGINFO_SENTINEL }; static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { - /* The NSACR is RW at EL3, and RO for NS EL1 and NS EL2. + /* + * The NSACR is RW at EL3, and RO for NS EL1 and NS EL2. * At Secure EL1 it traps to EL3 or EL2. */ if (arm_current_el(env) == 3) { @@ -5971,12 +6583,12 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "SCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0, .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3), - .resetfn = scr_reset, .writefn = scr_write }, + .resetfn = scr_reset, .writefn = scr_write, .raw_writefn = raw_write }, { .name = "SCR", .type = ARM_CP_ALIAS | ARM_CP_NEWEL, .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 0, .access = PL1_RW, .accessfn = access_trap_aa32s_el1, .fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3), - .writefn = scr_write }, + .writefn = scr_write, .raw_writefn = raw_write }, { .name = "SDER32_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 1, .access = PL3_RW, .resetvalue = 0, @@ -5996,12 +6608,8 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { { .name = "TCR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2, .access = PL3_RW, - /* no .writefn needed as this can't cause an ASID change; - * we must provide a .raw_writefn and .resetfn because we handle - * reset and migration for the AArch32 TTBCR(S), which might be - * using mask and base_mask. - */ - .resetfn = vmsa_ttbcr_reset, .raw_writefn = vmsa_ttbcr_raw_write, + /* no .writefn needed as this can't cause an ASID change */ + .resetvalue = 0, .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[3]) }, { .name = "ELR_EL3", .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, @@ -6068,10 +6676,47 @@ static const ARMCPRegInfo el3_cp_reginfo[] = { .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 5, .access = PL3_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae3_write }, - REGINFO_SENTINEL }; #ifndef CONFIG_USER_ONLY + +static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1) { + /* This must be a FEAT_NV access */ + return CP_ACCESS_OK; + } + if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { + return CP_ACCESS_TRAP_UNCATEGORIZED; + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_el1nvpct(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1) { + /* This must be a FEAT_NV access with NVx == 101 */ + if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1NVPCT)) { + return CP_ACCESS_TRAP_EL2; + } + } + return e2h_access(env, ri, isread); +} + +static CPAccessResult access_el1nvvct(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1) { + /* This must be a FEAT_NV access with NVx == 101 */ + if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1NVVCT)) { + return CP_ACCESS_TRAP_EL2; + } + } + return e2h_access(env, ri, isread); +} + /* Test if system register redirection is to occur in the current state. */ static bool redirect_for_e2h(CPUARMState *env) { @@ -6113,6 +6758,42 @@ static void el2_e2h_write(CPUARMState *env, const ARMCPRegInfo *ri, writefn(env, ri, value); } +static uint64_t el2_e2h_e12_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + /* Pass the EL1 register accessor its ri, not the EL12 alias ri */ + return ri->orig_readfn(env, ri->opaque); +} + +static void el2_e2h_e12_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* Pass the EL1 register accessor its ri, not the EL12 alias ri */ + return ri->orig_writefn(env, ri->opaque, value); +} + +static CPAccessResult el2_e2h_e12_access(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1) { + /* + * This must be a FEAT_NV access (will either trap or redirect + * to memory). None of the registers with _EL12 aliases want to + * apply their trap controls for this kind of access, so don't + * call the orig_accessfn or do the "UNDEF when E2H is 0" check. + */ + return CP_ACCESS_OK; + } + /* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */ + if (!(arm_hcr_el2_eff(env) & HCR_E2H)) { + return CP_ACCESS_TRAP_UNCATEGORIZED; + } + if (ri->orig_accessfn) { + return ri->orig_accessfn(env, ri->opaque, isread); + } + return CP_ACCESS_OK; +} + static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) { struct E2HAlias { @@ -6165,10 +6846,16 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) */ { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0), "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve }, + { K(3, 0, 1, 2, 6), K(3, 4, 1, 2, 6), K(3, 5, 1, 2, 6), + "SMCR_EL1", "SMCR_EL2", "SMCR_EL12", isar_feature_aa64_sme }, { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0), "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte }, + { K(3, 0, 13, 0, 7), K(3, 4, 13, 0, 7), K(3, 5, 13, 0, 7), + "SCXTNUM_EL1", "SCXTNUM_EL2", "SCXTNUM_EL12", + isar_feature_aa64_scxtnum }, + /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */ /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */ }; @@ -6178,14 +6865,17 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) for (i = 0; i < ARRAY_SIZE(aliases); i++) { const struct E2HAlias *a = &aliases[i]; - ARMCPRegInfo *src_reg, *dst_reg; + ARMCPRegInfo *src_reg, *dst_reg, *new_reg; + bool ok; if (a->feature && !a->feature(&cpu->isar)) { continue; } - src_reg = g_hash_table_lookup(cpu->cp_regs, &a->src_key); - dst_reg = g_hash_table_lookup(cpu->cp_regs, &a->dst_key); + src_reg = g_hash_table_lookup(cpu->cp_regs, + (gpointer)(uintptr_t)a->src_key); + dst_reg = g_hash_table_lookup(cpu->cp_regs, + (gpointer)(uintptr_t)a->dst_key); g_assert(src_reg != NULL); g_assert(dst_reg != NULL); @@ -6197,20 +6887,52 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) g_assert(src_reg->opaque == NULL); /* Create alias before redirection so we dup the right data. */ - if (a->new_key) { - ARMCPRegInfo *new_reg = g_memdup(src_reg, sizeof(ARMCPRegInfo)); - uint32_t *new_key = g_memdup(&a->new_key, sizeof(uint32_t)); - bool ok; - - new_reg->name = a->new_name; - new_reg->type |= ARM_CP_ALIAS; - /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */ - new_reg->access &= PL2_RW | PL3_RW; + new_reg = g_memdup(src_reg, sizeof(ARMCPRegInfo)); + + new_reg->name = a->new_name; + new_reg->type |= ARM_CP_ALIAS; + /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */ + new_reg->access &= PL2_RW | PL3_RW; + /* The new_reg op fields are as per new_key, not the target reg */ + new_reg->crn = (a->new_key & CP_REG_ARM64_SYSREG_CRN_MASK) + >> CP_REG_ARM64_SYSREG_CRN_SHIFT; + new_reg->crm = (a->new_key & CP_REG_ARM64_SYSREG_CRM_MASK) + >> CP_REG_ARM64_SYSREG_CRM_SHIFT; + new_reg->opc0 = (a->new_key & CP_REG_ARM64_SYSREG_OP0_MASK) + >> CP_REG_ARM64_SYSREG_OP0_SHIFT; + new_reg->opc1 = (a->new_key & CP_REG_ARM64_SYSREG_OP1_MASK) + >> CP_REG_ARM64_SYSREG_OP1_SHIFT; + new_reg->opc2 = (a->new_key & CP_REG_ARM64_SYSREG_OP2_MASK) + >> CP_REG_ARM64_SYSREG_OP2_SHIFT; + new_reg->opaque = src_reg; + new_reg->orig_readfn = src_reg->readfn ?: raw_read; + new_reg->orig_writefn = src_reg->writefn ?: raw_write; + new_reg->orig_accessfn = src_reg->accessfn; + if (!new_reg->raw_readfn) { + new_reg->raw_readfn = raw_read; + } + if (!new_reg->raw_writefn) { + new_reg->raw_writefn = raw_write; + } + new_reg->readfn = el2_e2h_e12_read; + new_reg->writefn = el2_e2h_e12_write; + new_reg->accessfn = el2_e2h_e12_access; - ok = g_hash_table_insert(cpu->cp_regs, new_key, new_reg); - g_assert(ok); + /* + * If the _EL1 register is redirected to memory by FEAT_NV2, + * then it shares the offset with the _EL12 register, + * and which one is redirected depends on HCR_EL2.NV1. + */ + if (new_reg->nv2_redirect_offset) { + assert(new_reg->nv2_redirect_offset & NV2_REDIR_NV1); + new_reg->nv2_redirect_offset &= ~NV2_REDIR_NV1; + new_reg->nv2_redirect_offset |= NV2_REDIR_NO_NV1; } + ok = g_hash_table_insert(cpu->cp_regs, + (gpointer)(uintptr_t)a->new_key, new_reg); + g_assert(ok); + src_reg->opaque = dst_reg; src_reg->orig_readfn = src_reg->readfn ?: raw_read; src_reg->orig_writefn = src_reg->writefn ?: raw_write; @@ -6259,214 +6981,243 @@ static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri, return CP_ACCESS_OK; } -static void oslar_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +/* + * Check for traps to RAS registers, which are controlled + * by HCR_EL2.TERR and SCR_EL3.TERR. + */ +static CPAccessResult access_terr(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) { - /* Writes to OSLAR_EL1 may update the OS lock status, which can be - * read via a bit in OSLSR_EL1. - */ - int oslock; + int el = arm_current_el(env); - if (ri->state == ARM_CP_STATE_AA32) { - oslock = (value == 0xC5ACCE55); - } else { - oslock = value & 1; + if (el < 2 && (arm_hcr_el2_eff(env) & HCR_TERR)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && (env->cp15.scr_el3 & SCR_TERR)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static uint64_t disr_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + int el = arm_current_el(env); + + if (el < 2 && (arm_hcr_el2_eff(env) & HCR_AMO)) { + return env->cp15.vdisr_el2; } + if (el < 3 && (env->cp15.scr_el3 & SCR_EA)) { + return 0; /* RAZ/WI */ + } + return env->cp15.disr_el1; +} - env->cp15.oslsr_el1 = deposit32(env->cp15.oslsr_el1, 1, 1, oslock); +static void disr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val) +{ + int el = arm_current_el(env); + + if (el < 2 && (arm_hcr_el2_eff(env) & HCR_AMO)) { + env->cp15.vdisr_el2 = val; + return; + } + if (el < 3 && (env->cp15.scr_el3 & SCR_EA)) { + return; /* RAZ/WI */ + } + env->cp15.disr_el1 = val; } -static const ARMCPRegInfo debug_cp_reginfo[] = { - /* DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped - * debug components. The AArch64 version of DBGDRAR is named MDRAR_EL1; - * unlike DBGDRAR it is never accessible from EL0. - * DBGDSAR is deprecated and must RAZ from v8 anyway, so it has no AArch64 - * accessor. - */ - { .name = "DBGDRAR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0, - .access = PL0_R, .accessfn = access_tdra, - .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "MDRAR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0, - .access = PL1_R, .accessfn = access_tdra, - .type = ARM_CP_CONST, .resetvalue = 0 }, - { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0, - .access = PL0_R, .accessfn = access_tdra, - .type = ARM_CP_CONST, .resetvalue = 0 }, - /* Monitor debug system control register; the 32-bit alias is DBGDSCRext. */ - { .name = "MDSCR_EL1", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, - .access = PL1_RW, .accessfn = access_tda, - .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), - .resetvalue = 0 }, - /* - * MDCCSR_EL0[30:29] map to EDSCR[30:29]. Simply RAZ as the external - * Debug Communication Channel is not implemented. - */ - { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_AA64, - .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 1, .opc2 = 0, - .access = PL0_R, .accessfn = access_tda, +/* + * Minimal RAS implementation with no Error Records. + * Which means that all of the Error Record registers: + * ERXADDR_EL1 + * ERXCTLR_EL1 + * ERXFR_EL1 + * ERXMISC0_EL1 + * ERXMISC1_EL1 + * ERXMISC2_EL1 + * ERXMISC3_EL1 + * ERXPFGCDN_EL1 (RASv1p1) + * ERXPFGCTL_EL1 (RASv1p1) + * ERXPFGF_EL1 (RASv1p1) + * ERXSTATUS_EL1 + * and + * ERRSELR_EL1 + * may generate UNDEFINED, which is the effect we get by not + * listing them at all. + * + * These registers have fine-grained trap bits, but UNDEF-to-EL1 + * is higher priority than FGT-to-EL2 so we do not need to list them + * in order to check for an FGT. + */ +static const ARMCPRegInfo minimal_ras_reginfo[] = { + { .name = "DISR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 1, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.disr_el1), + .readfn = disr_read, .writefn = disr_write, .raw_writefn = raw_write }, + { .name = "ERRIDR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 3, .opc2 = 0, + .access = PL1_R, .accessfn = access_terr, + .fgt = FGT_ERRIDR_EL1, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* - * DBGDSCRint[15,12,5:2] map to MDSCR_EL1[15,12,5:2]. Map all bits as - * it is unlikely a guest will care. - * We don't implement the configurable EL0 access. - */ - { .name = "DBGDSCRint", .state = ARM_CP_STATE_AA32, - .cp = 14, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0, - .type = ARM_CP_ALIAS, - .access = PL1_R, .accessfn = access_tda, - .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), }, - { .name = "OSLAR_EL1", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4, - .access = PL1_W, .type = ARM_CP_NO_RAW, - .accessfn = access_tdosa, - .writefn = oslar_write }, - { .name = "OSLSR_EL1", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 4, - .access = PL1_R, .resetvalue = 10, - .accessfn = access_tdosa, - .fieldoffset = offsetof(CPUARMState, cp15.oslsr_el1) }, - /* Dummy OSDLR_EL1: 32-bit Linux will read this */ - { .name = "OSDLR_EL1", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 3, .opc2 = 4, - .access = PL1_RW, .accessfn = access_tdosa, - .type = ARM_CP_NOP }, - /* Dummy DBGVCR: Linux wants to clear this on startup, but we don't - * implement vector catch debug events yet. - */ - { .name = "DBGVCR", - .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, - .access = PL1_RW, .accessfn = access_tda, - .type = ARM_CP_NOP }, - /* Dummy DBGVCR32_EL2 (which is only for a 64-bit hypervisor - * to save and restore a 32-bit guest's DBGVCR) - */ - { .name = "DBGVCR32_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 2, .opc1 = 4, .crn = 0, .crm = 7, .opc2 = 0, - .access = PL2_RW, .accessfn = access_tda, - .type = ARM_CP_NOP }, - /* Dummy MDCCINT_EL1, since we don't implement the Debug Communications - * Channel but Linux may try to access this register. The 32-bit - * alias is DBGDCCINT. - */ - { .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, - .access = PL1_RW, .accessfn = access_tda, - .type = ARM_CP_NOP }, - REGINFO_SENTINEL -}; - -static const ARMCPRegInfo debug_lpae_cp_reginfo[] = { - /* 64 bit access versions of the (dummy) debug registers */ - { .name = "DBGDRAR", .cp = 14, .crm = 1, .opc1 = 0, - .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 }, - { .name = "DBGDSAR", .cp = 14, .crm = 2, .opc1 = 0, - .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 }, - REGINFO_SENTINEL + { .name = "VDISR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 1, .opc2 = 1, + .nv2_redirect_offset = 0x500, + .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vdisr_el2) }, + { .name = "VSESR_EL2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 3, + .nv2_redirect_offset = 0x508, + .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vsesr_el2) }, }; -/* Return the exception level to which exceptions should be taken - * via SVEAccessTrap. If an exception should be routed through - * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should - * take care of raising that exception. - * C.f. the ARM pseudocode function CheckSVEEnabled. +/* + * Return the exception level to which exceptions should be taken + * via SVEAccessTrap. This excludes the check for whether the exception + * should be routed through AArch64.AdvSIMDFPAccessTrap. That can easily + * be found by testing 0 < fp_exception_el < sve_exception_el. + * + * C.f. the ARM pseudocode function CheckSVEEnabled. Note that the + * pseudocode does *not* separate out the FP trap checks, but has them + * all in one function. */ int sve_exception_el(CPUARMState *env, int el) { #ifndef CONFIG_USER_ONLY - uint64_t hcr_el2 = arm_hcr_el2_eff(env); - - if (el <= 1 && (hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { - bool disabled = false; - - /* The CPACR.ZEN controls traps to EL1: - * 0, 2 : trap EL0 and EL1 accesses - * 1 : trap only EL0 accesses - * 3 : trap no accesses - */ - if (!extract32(env->cp15.cpacr_el1, 16, 1)) { - disabled = true; - } else if (!extract32(env->cp15.cpacr_el1, 17, 1)) { - disabled = el == 0; - } - if (disabled) { - /* route_to_el2 */ - return hcr_el2 & HCR_TGE ? 2 : 1; - } - - /* Check CPACR.FPEN. */ - if (!extract32(env->cp15.cpacr_el1, 20, 1)) { - disabled = true; - } else if (!extract32(env->cp15.cpacr_el1, 21, 1)) { - disabled = el == 0; - } - if (disabled) { - return 0; + if (el <= 1 && !el_is_in_host(env, el)) { + switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, ZEN)) { + case 1: + if (el != 0) { + break; + } + /* fall through */ + case 0: + case 2: + return 1; } } - /* CPTR_EL2. Since TZ and TFP are positive, - * they will be zero when EL2 is not present. - */ if (el <= 2 && arm_is_el2_enabled(env)) { - if (env->cp15.cptr_el[2] & CPTR_TZ) { - return 2; - } - if (env->cp15.cptr_el[2] & CPTR_TFP) { - return 0; + /* CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE). */ + if (env->cp15.hcr_el2 & HCR_E2H) { + switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, ZEN)) { + case 1: + if (el != 0 || !(env->cp15.hcr_el2 & HCR_TGE)) { + break; + } + /* fall through */ + case 0: + case 2: + return 2; + } + } else { + if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TZ)) { + return 2; + } } } /* CPTR_EL3. Since EZ is negative we must check for EL3. */ if (arm_feature(env, ARM_FEATURE_EL3) - && !(env->cp15.cptr_el[3] & CPTR_EZ)) { + && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, EZ)) { return 3; } #endif return 0; } -uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len) +/* + * Return the exception level to which exceptions should be taken for SME. + * C.f. the ARM pseudocode function CheckSMEAccess. + */ +int sme_exception_el(CPUARMState *env, int el) { - uint32_t end_len; +#ifndef CONFIG_USER_ONLY + if (el <= 1 && !el_is_in_host(env, el)) { + switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, SMEN)) { + case 1: + if (el != 0) { + break; + } + /* fall through */ + case 0: + case 2: + return 1; + } + } - start_len = MIN(start_len, ARM_MAX_VQ - 1); - end_len = start_len; + if (el <= 2 && arm_is_el2_enabled(env)) { + /* CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE). */ + if (env->cp15.hcr_el2 & HCR_E2H) { + switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, SMEN)) { + case 1: + if (el != 0 || !(env->cp15.hcr_el2 & HCR_TGE)) { + break; + } + /* fall through */ + case 0: + case 2: + return 2; + } + } else { + if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TSM)) { + return 2; + } + } + } - if (!test_bit(start_len, cpu->sve_vq_map)) { - end_len = find_last_bit(cpu->sve_vq_map, start_len); - assert(end_len < start_len); + /* CPTR_EL3. Since ESM is negative we must check for EL3. */ + if (arm_feature(env, ARM_FEATURE_EL3) + && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, ESM)) { + return 3; } - return end_len; +#endif + return 0; } /* * Given that SVE is enabled, return the vector length for EL. */ -uint32_t sve_zcr_len_for_el(CPUARMState *env, int el) +uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm) { ARMCPU *cpu = env_archcpu(env); - uint32_t zcr_len = cpu->sve_max_vq - 1; + uint64_t *cr = env->vfp.zcr_el; + uint32_t map = cpu->sve_vq.map; + uint32_t len = ARM_MAX_VQ - 1; - if (el <= 1) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]); + if (sm) { + cr = env->vfp.smcr_el; + map = cpu->sme_vq.map; + } + + if (el <= 1 && !el_is_in_host(env, el)) { + len = MIN(len, 0xf & (uint32_t)cr[1]); } if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]); + len = MIN(len, 0xf & (uint32_t)cr[2]); } if (arm_feature(env, ARM_FEATURE_EL3)) { - zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]); + len = MIN(len, 0xf & (uint32_t)cr[3]); + } + + map &= MAKE_64BIT_MASK(0, len + 1); + if (map != 0) { + return 31 - clz32(map); } - return aarch64_sve_zcr_get_valid_len(cpu, zcr_len); + /* Bit 0 is always set for Normal SVE -- not so for Streaming SVE. */ + assert(sm); + return ctz32(cpu->sme_vq.map); +} + +uint32_t sve_vqm1_for_el(CPUARMState *env, int el) +{ + return sve_vqm1_for_el_sm(env, el, FIELD_EX64(env->svcr, SVCR, SM)); } static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { int cur_el = arm_current_el(env); - int old_len = sve_zcr_len_for_el(env, cur_el); + int old_len = sve_vqm1_for_el(env, cur_el); int new_len; /* Bits other than [3:0] are RAZ/WI. */ @@ -6477,361 +7228,273 @@ static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, * Because we arrived here, we know both FP and SVE are enabled; * otherwise we would have trapped access to the ZCR_ELn register. */ - new_len = sve_zcr_len_for_el(env, cur_el); + new_len = sve_vqm1_for_el(env, cur_el); if (new_len < old_len) { aarch64_sve_narrow_vq(env, new_len + 1); } } -static const ARMCPRegInfo zcr_el1_reginfo = { - .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0, - .access = PL1_RW, .type = ARM_CP_SVE, - .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]), - .writefn = zcr_write, .raw_writefn = raw_write -}; - -static const ARMCPRegInfo zcr_el2_reginfo = { - .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_SVE, - .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[2]), - .writefn = zcr_write, .raw_writefn = raw_write -}; - -static const ARMCPRegInfo zcr_no_el2_reginfo = { - .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0, - .access = PL2_RW, .type = ARM_CP_SVE, - .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore -}; - -static const ARMCPRegInfo zcr_el3_reginfo = { - .name = "ZCR_EL3", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 0, - .access = PL3_RW, .type = ARM_CP_SVE, - .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[3]), - .writefn = zcr_write, .raw_writefn = raw_write +static const ARMCPRegInfo zcr_reginfo[] = { + { .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0, + .nv2_redirect_offset = 0x1e0 | NV2_REDIR_NV1, + .access = PL1_RW, .type = ARM_CP_SVE, + .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]), + .writefn = zcr_write, .raw_writefn = raw_write }, + { .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_SVE, + .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[2]), + .writefn = zcr_write, .raw_writefn = raw_write }, + { .name = "ZCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_SVE, + .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[3]), + .writefn = zcr_write, .raw_writefn = raw_write }, }; -void hw_watchpoint_update(ARMCPU *cpu, int n) +#ifdef TARGET_AARCH64 +static CPAccessResult access_tpidr2(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) { - CPUARMState *env = &cpu->env; - vaddr len = 0; - vaddr wvr = env->cp15.dbgwvr[n]; - uint64_t wcr = env->cp15.dbgwcr[n]; - int mask; - int flags = BP_CPU | BP_STOP_BEFORE_ACCESS; - - if (env->cpu_watchpoint[n]) { - cpu_watchpoint_remove_by_ref(CPU(cpu), env->cpu_watchpoint[n]); - env->cpu_watchpoint[n] = NULL; - } - - if (!extract64(wcr, 0, 1)) { - /* E bit clear : watchpoint disabled */ - return; - } - - switch (extract64(wcr, 3, 2)) { - case 0: - /* LSC 00 is reserved and must behave as if the wp is disabled */ - return; - case 1: - flags |= BP_MEM_READ; - break; - case 2: - flags |= BP_MEM_WRITE; - break; - case 3: - flags |= BP_MEM_ACCESS; - break; - } - - /* Attempts to use both MASK and BAS fields simultaneously are - * CONSTRAINED UNPREDICTABLE; we opt to ignore BAS in this case, - * thus generating a watchpoint for every byte in the masked region. - */ - mask = extract64(wcr, 24, 4); - if (mask == 1 || mask == 2) { - /* Reserved values of MASK; we must act as if the mask value was - * some non-reserved value, or as if the watchpoint were disabled. - * We choose the latter. - */ - return; - } else if (mask) { - /* Watchpoint covers an aligned area up to 2GB in size */ - len = 1ULL << mask; - /* If masked bits in WVR are not zero it's CONSTRAINED UNPREDICTABLE - * whether the watchpoint fires when the unmasked bits match; we opt - * to generate the exceptions. - */ - wvr &= ~(len - 1); - } else { - /* Watchpoint covers bytes defined by the byte address select bits */ - int bas = extract64(wcr, 5, 8); - int basstart; - - if (extract64(wvr, 2, 1)) { - /* Deprecated case of an only 4-aligned address. BAS[7:4] are - * ignored, and BAS[3:0] define which bytes to watch. - */ - bas &= 0xf; - } + int el = arm_current_el(env); - if (bas == 0) { - /* This must act as if the watchpoint is disabled */ - return; + if (el == 0) { + uint64_t sctlr = arm_sctlr(env, el); + if (!(sctlr & SCTLR_EnTP2)) { + return CP_ACCESS_TRAP; } - - /* The BAS bits are supposed to be programmed to indicate a contiguous - * range of bytes. Otherwise it is CONSTRAINED UNPREDICTABLE whether - * we fire for each byte in the word/doubleword addressed by the WVR. - * We choose to ignore any non-zero bits after the first range of 1s. - */ - basstart = ctz32(bas); - len = cto32(bas >> basstart); - wvr += basstart; } - - cpu_watchpoint_insert(CPU(cpu), wvr, len, flags, - &env->cpu_watchpoint[n]); + /* TODO: FEAT_FGT */ + if (el < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_ENTP2)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; } -void hw_watchpoint_update_all(ARMCPU *cpu) +static CPAccessResult access_smprimap(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) { - int i; - CPUARMState *env = &cpu->env; - - /* Completely clear out existing QEMU watchpoints and our array, to - * avoid possible stale entries following migration load. - */ - cpu_watchpoint_remove_all(CPU(cpu), BP_CPU); - memset(env->cpu_watchpoint, 0, sizeof(env->cpu_watchpoint)); - - for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_watchpoint); i++) { - hw_watchpoint_update(cpu, i); + /* If EL1 this is a FEAT_NV access and CPTR_EL3.ESM doesn't apply */ + if (arm_current_el(env) == 2 + && arm_feature(env, ARM_FEATURE_EL3) + && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, ESM)) { + return CP_ACCESS_TRAP_EL3; } + return CP_ACCESS_OK; } -static void dbgwvr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static CPAccessResult access_smpri(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) { - ARMCPU *cpu = env_archcpu(env); - int i = ri->crm; - - /* Bits [63:49] are hardwired to the value of bit [48]; that is, the - * register reads and behaves as if values written are sign extended. - * Bits [1:0] are RES0. - */ - value = sextract64(value, 0, 49) & ~3ULL; - - raw_write(env, ri, value); - hw_watchpoint_update(cpu, i); + if (arm_current_el(env) < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, ESM)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; } -static void dbgwcr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +/* ResetSVEState */ +static void arm_reset_sve_state(CPUARMState *env) { - ARMCPU *cpu = env_archcpu(env); - int i = ri->crm; - - raw_write(env, ri, value); - hw_watchpoint_update(cpu, i); + memset(env->vfp.zregs, 0, sizeof(env->vfp.zregs)); + /* Recall that FFR is stored as pregs[16]. */ + memset(env->vfp.pregs, 0, sizeof(env->vfp.pregs)); + vfp_set_fpcr(env, 0x0800009f); } -void hw_breakpoint_update(ARMCPU *cpu, int n) +void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask) { - CPUARMState *env = &cpu->env; - uint64_t bvr = env->cp15.dbgbvr[n]; - uint64_t bcr = env->cp15.dbgbcr[n]; - vaddr addr; - int bt; - int flags = BP_CPU; + uint64_t change = (env->svcr ^ new) & mask; - if (env->cpu_breakpoint[n]) { - cpu_breakpoint_remove_by_ref(CPU(cpu), env->cpu_breakpoint[n]); - env->cpu_breakpoint[n] = NULL; - } - - if (!extract64(bcr, 0, 1)) { - /* E bit clear : watchpoint disabled */ + if (change == 0) { return; } + env->svcr ^= change; - bt = extract64(bcr, 20, 4); + if (change & R_SVCR_SM_MASK) { + arm_reset_sve_state(env); + } - switch (bt) { - case 4: /* unlinked address mismatch (reserved if AArch64) */ - case 5: /* linked address mismatch (reserved if AArch64) */ - qemu_log_mask(LOG_UNIMP, - "arm: address mismatch breakpoint types not implemented\n"); - return; - case 0: /* unlinked address match */ - case 1: /* linked address match */ - { - /* Bits [63:49] are hardwired to the value of bit [48]; that is, - * we behave as if the register was sign extended. Bits [1:0] are - * RES0. The BAS field is used to allow setting breakpoints on 16 - * bit wide instructions; it is CONSTRAINED UNPREDICTABLE whether - * a bp will fire if the addresses covered by the bp and the addresses - * covered by the insn overlap but the insn doesn't start at the - * start of the bp address range. We choose to require the insn and - * the bp to have the same address. The constraints on writing to - * BAS enforced in dbgbcr_write mean we have only four cases: - * 0b0000 => no breakpoint - * 0b0011 => breakpoint on addr - * 0b1100 => breakpoint on addr + 2 - * 0b1111 => breakpoint on addr - * See also figure D2-3 in the v8 ARM ARM (DDI0487A.c). - */ - int bas = extract64(bcr, 5, 4); - addr = sextract64(bvr, 0, 49) & ~3ULL; - if (bas == 0) { - return; - } - if (bas == 0xc) { - addr += 2; - } - break; + /* + * ResetSMEState. + * + * SetPSTATE_ZA zeros on enable and disable. We can zero this only + * on enable: while disabled, the storage is inaccessible and the + * value does not matter. We're not saving the storage in vmstate + * when disabled either. + */ + if (change & new & R_SVCR_ZA_MASK) { + memset(env->zarray, 0, sizeof(env->zarray)); } - case 2: /* unlinked context ID match */ - case 8: /* unlinked VMID match (reserved if no EL2) */ - case 10: /* unlinked context ID and VMID match (reserved if no EL2) */ - qemu_log_mask(LOG_UNIMP, - "arm: unlinked context breakpoint types not implemented\n"); - return; - case 9: /* linked VMID match (reserved if no EL2) */ - case 11: /* linked context ID and VMID match (reserved if no EL2) */ - case 3: /* linked context ID match */ - default: - /* We must generate no events for Linked context matches (unless - * they are linked to by some other bp/wp, which is handled in - * updates for the linking bp/wp). We choose to also generate no events - * for reserved values. - */ - return; + + if (tcg_enabled()) { + arm_rebuild_hflags(env); } +} - cpu_breakpoint_insert(CPU(cpu), addr, flags, &env->cpu_breakpoint[n]); +static void svcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + aarch64_set_svcr(env, value, -1); } -void hw_breakpoint_update_all(ARMCPU *cpu) +static void smcr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - int i; - CPUARMState *env = &cpu->env; + int cur_el = arm_current_el(env); + int old_len = sve_vqm1_for_el(env, cur_el); + int new_len; - /* Completely clear out existing QEMU breakpoints and our array, to - * avoid possible stale entries following migration load. - */ - cpu_breakpoint_remove_all(CPU(cpu), BP_CPU); - memset(env->cpu_breakpoint, 0, sizeof(env->cpu_breakpoint)); + QEMU_BUILD_BUG_ON(ARM_MAX_VQ > R_SMCR_LEN_MASK + 1); + value &= R_SMCR_LEN_MASK | R_SMCR_FA64_MASK; + raw_write(env, ri, value); - for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_breakpoint); i++) { - hw_breakpoint_update(cpu, i); + /* + * Note that it is CONSTRAINED UNPREDICTABLE what happens to ZA storage + * when SVL is widened (old values kept, or zeros). Choose to keep the + * current values for simplicity. But for QEMU internals, we must still + * apply the narrower SVL to the Zregs and Pregs -- see the comment + * above aarch64_sve_narrow_vq. + */ + new_len = sve_vqm1_for_el(env, cur_el); + if (new_len < old_len) { + aarch64_sve_narrow_vq(env, new_len + 1); } } -static void dbgbvr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static const ARMCPRegInfo sme_reginfo[] = { + { .name = "TPIDR2_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 13, .crm = 0, .opc2 = 5, + .access = PL0_RW, .accessfn = access_tpidr2, + .fgt = FGT_NTPIDR2_EL0, + .fieldoffset = offsetof(CPUARMState, cp15.tpidr2_el0) }, + { .name = "SVCR", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 2, + .access = PL0_RW, .type = ARM_CP_SME, + .fieldoffset = offsetof(CPUARMState, svcr), + .writefn = svcr_write, .raw_writefn = raw_write }, + { .name = "SMCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 6, + .nv2_redirect_offset = 0x1f0 | NV2_REDIR_NV1, + .access = PL1_RW, .type = ARM_CP_SME, + .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[1]), + .writefn = smcr_write, .raw_writefn = raw_write }, + { .name = "SMCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 6, + .access = PL2_RW, .type = ARM_CP_SME, + .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[2]), + .writefn = smcr_write, .raw_writefn = raw_write }, + { .name = "SMCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 6, + .access = PL3_RW, .type = ARM_CP_SME, + .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[3]), + .writefn = smcr_write, .raw_writefn = raw_write }, + { .name = "SMIDR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 6, + .access = PL1_R, .accessfn = access_aa64_tid1, + /* + * IMPLEMENTOR = 0 (software) + * REVISION = 0 (implementation defined) + * SMPS = 0 (no streaming execution priority in QEMU) + * AFFINITY = 0 (streaming sve mode not shared with other PEs) + */ + .type = ARM_CP_CONST, .resetvalue = 0, }, + /* + * Because SMIDR_EL1.SMPS is 0, SMPRI_EL1 and SMPRIMAP_EL2 are RES 0. + */ + { .name = "SMPRI_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 4, + .access = PL1_RW, .accessfn = access_smpri, + .fgt = FGT_NSMPRI_EL1, + .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "SMPRIMAP_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 5, + .nv2_redirect_offset = 0x1f8, + .access = PL2_RW, .accessfn = access_smprimap, + .type = ARM_CP_CONST, .resetvalue = 0 }, +}; + +static void tlbi_aa64_paall_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - ARMCPU *cpu = env_archcpu(env); - int i = ri->crm; + CPUState *cs = env_cpu(env); - raw_write(env, ri, value); - hw_breakpoint_update(cpu, i); + tlb_flush(cs); } -static void dbgbcr_write(CPUARMState *env, const ARMCPRegInfo *ri, - uint64_t value) +static void gpccr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - ARMCPU *cpu = env_archcpu(env); - int i = ri->crm; + /* L0GPTSZ is RO; other bits not mentioned are RES0. */ + uint64_t rw_mask = R_GPCCR_PPS_MASK | R_GPCCR_IRGN_MASK | + R_GPCCR_ORGN_MASK | R_GPCCR_SH_MASK | R_GPCCR_PGS_MASK | + R_GPCCR_GPC_MASK | R_GPCCR_GPCP_MASK; - /* BAS[3] is a read-only copy of BAS[2], and BAS[1] a read-only - * copy of BAS[0]. - */ - value = deposit64(value, 6, 1, extract64(value, 5, 1)); - value = deposit64(value, 8, 1, extract64(value, 7, 1)); + env->cp15.gpccr_el3 = (value & rw_mask) | (env->cp15.gpccr_el3 & ~rw_mask); +} - raw_write(env, ri, value); - hw_breakpoint_update(cpu, i); +static void gpccr_reset(CPUARMState *env, const ARMCPRegInfo *ri) +{ + env->cp15.gpccr_el3 = FIELD_DP64(0, GPCCR, L0GPTSZ, + env_archcpu(env)->reset_l0gptsz); } -static void define_debug_regs(ARMCPU *cpu) +static void tlbi_aa64_paallos_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) { - /* Define v7 and v8 architectural debug registers. - * These are just dummy implementations for now. - */ - int i; - int wrps, brps, ctx_cmps; + CPUState *cs = env_cpu(env); + tlb_flush_all_cpus_synced(cs); +} + +static const ARMCPRegInfo rme_reginfo[] = { + { .name = "GPCCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 1, .opc2 = 6, + .access = PL3_RW, .writefn = gpccr_write, .resetfn = gpccr_reset, + .fieldoffset = offsetof(CPUARMState, cp15.gpccr_el3) }, + { .name = "GPTBR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 1, .opc2 = 4, + .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.gptbr_el3) }, + { .name = "MFAR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 0, .opc2 = 5, + .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mfar_el3) }, + { .name = "TLBI_PAALL", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 4, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paall_write }, + { .name = "TLBI_PAALLOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 4, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, /* - * The Arm ARM says DBGDIDR is optional and deprecated if EL1 cannot - * use AArch32. Given that bit 15 is RES1, if the value is 0 then - * the register must not exist for this cpu. + * QEMU does not have a way to invalidate by physical address, thus + * invalidating a range of physical addresses is accomplished by + * flushing all tlb entries in the outer shareable domain, + * just like PAALLOS. */ - if (cpu->isar.dbgdidr != 0) { - ARMCPRegInfo dbgdidr = { - .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, - .opc1 = 0, .opc2 = 0, - .access = PL0_R, .accessfn = access_tda, - .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr, - }; - define_one_arm_cp_reg(cpu, &dbgdidr); - } - - /* Note that all these register fields hold "number of Xs minus 1". */ - brps = arm_num_brps(cpu); - wrps = arm_num_wrps(cpu); - ctx_cmps = arm_num_ctx_cmps(cpu); - - assert(ctx_cmps <= brps); - - define_arm_cp_regs(cpu, debug_cp_reginfo); - - if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) { - define_arm_cp_regs(cpu, debug_lpae_cp_reginfo); - } - - for (i = 0; i < brps; i++) { - ARMCPRegInfo dbgregs[] = { - { .name = "DBGBVR", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4, - .access = PL1_RW, .accessfn = access_tda, - .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]), - .writefn = dbgbvr_write, .raw_writefn = raw_write - }, - { .name = "DBGBCR", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5, - .access = PL1_RW, .accessfn = access_tda, - .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]), - .writefn = dbgbcr_write, .raw_writefn = raw_write - }, - REGINFO_SENTINEL - }; - define_arm_cp_regs(cpu, dbgregs); - } + { .name = "TLBI_RPALOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 7, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, + { .name = "TLBI_RPAOS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 3, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_paallos_write }, + { .name = "DC_CIPAPA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 14, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NOP }, +}; - for (i = 0; i < wrps; i++) { - ARMCPRegInfo dbgregs[] = { - { .name = "DBGWVR", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6, - .access = PL1_RW, .accessfn = access_tda, - .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]), - .writefn = dbgwvr_write, .raw_writefn = raw_write - }, - { .name = "DBGWCR", .state = ARM_CP_STATE_BOTH, - .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7, - .access = PL1_RW, .accessfn = access_tda, - .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]), - .writefn = dbgwcr_write, .raw_writefn = raw_write - }, - REGINFO_SENTINEL - }; - define_arm_cp_regs(cpu, dbgregs); - } -} +static const ARMCPRegInfo rme_mte_reginfo[] = { + { .name = "DC_CIGDPAPA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 14, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NOP }, +}; +#endif /* TARGET_AARCH64 */ static void define_pmu_regs(ARMCPU *cpu) { @@ -6840,25 +7503,29 @@ static void define_pmu_regs(ARMCPU *cpu) * field as main ID register, and we implement four counters in * addition to the cycle count register. */ - unsigned int i, pmcrn = PMCR_NUM_COUNTERS; + unsigned int i, pmcrn = pmu_num_counters(&cpu->env); ARMCPRegInfo pmcr = { .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0, .access = PL0_RW, + .fgt = FGT_PMCR_EL0, .type = ARM_CP_IO | ARM_CP_ALIAS, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr), - .accessfn = pmreg_access, .writefn = pmcr_write, - .raw_writefn = raw_write, + .accessfn = pmreg_access, + .readfn = pmcr_read, .raw_readfn = raw_read, + .writefn = pmcr_write, .raw_writefn = raw_write, }; ARMCPRegInfo pmcr64 = { .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMCR_EL0, .type = ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr), - .resetvalue = (cpu->midr & 0xff000000) | (pmcrn << PMCRN_SHIFT) | - PMCRLC, + .resetvalue = cpu->isar.reset_pmcr_el0, + .readfn = pmcr_read, .raw_readfn = raw_read, .writefn = pmcr_write, .raw_writefn = raw_write, }; + define_one_arm_cp_reg(cpu, &pmcr); define_one_arm_cp_reg(cpu, &pmcr64); for (i = 0; i < pmcrn; i++) { @@ -6870,27 +7537,30 @@ static void define_pmu_regs(ARMCPU *cpu) { .name = pmevcntr_name, .cp = 15, .crn = 14, .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, + .fgt = FGT_PMEVCNTRN_EL0, .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, - .accessfn = pmreg_access }, + .accessfn = pmreg_access_xevcntr }, { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)), - .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, + .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access_xevcntr, .type = ARM_CP_IO, + .fgt = FGT_PMEVCNTRN_EL0, .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn, .raw_readfn = pmevcntr_rawread, .raw_writefn = pmevcntr_rawwrite }, { .name = pmevtyper_name, .cp = 15, .crn = 14, .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7, .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS, + .fgt = FGT_PMEVTYPERN_EL0, .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, .accessfn = pmreg_access }, { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)), .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access, + .fgt = FGT_PMEVTYPERN_EL0, .type = ARM_CP_IO, .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn, .raw_writefn = pmevtyper_rawwrite }, - REGINFO_SENTINEL }; define_arm_cp_regs(cpu, pmev_regs); g_free(pmevcntr_name); @@ -6898,32 +7568,36 @@ static void define_pmu_regs(ARMCPU *cpu) g_free(pmevtyper_name); g_free(pmevtyper_el0_name); } - if (cpu_isar_feature(aa32_pmu_8_1, cpu)) { + if (cpu_isar_feature(aa32_pmuv3p1, cpu)) { ARMCPRegInfo v81_pmu_regs[] = { { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, .resetvalue = extract64(cpu->pmceid0, 32, 32) }, { .name = "PMCEID3", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5, .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, .resetvalue = extract64(cpu->pmceid1, 32, 32) }, - REGINFO_SENTINEL }; define_arm_cp_regs(cpu, v81_pmu_regs); } - if (cpu_isar_feature(any_pmu_8_4, cpu)) { + if (cpu_isar_feature(any_pmuv3p4, cpu)) { static const ARMCPRegInfo v84_pmmir = { .name = "PMMIR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 6, .access = PL1_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMMIR_EL1, .resetvalue = 0 }; define_one_arm_cp_reg(cpu, &v84_pmmir); } } -/* We don't know until after realize whether there's a GICv3 +#ifndef CONFIG_USER_ONLY +/* + * We don't know until after realize whether there's a GICv3 * attached, and that is what registers the gicv3 sysregs. * So we have to fill in the GIC fields in ID_PFR/ID_PFR1_EL1/ID_AA64PFR0_EL1 * at runtime. @@ -6939,7 +7613,6 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) return pfr1; } -#ifndef CONFIG_USER_ONLY static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) { ARMCPU *cpu = env_archcpu(env); @@ -6952,7 +7625,8 @@ static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) } #endif -/* Shared logic between LORID and the rest of the LOR* registers. +/* + * Shared logic between LORID and the rest of the LOR* registers. * Secure state exclusion has already been dealt with. */ static CPAccessResult access_lor_ns(CPUARMState *env, @@ -6988,24 +7662,28 @@ static const ARMCPRegInfo lor_reginfo[] = { { .name = "LORSA_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 0, .access = PL1_RW, .accessfn = access_lor_other, + .fgt = FGT_LORSA_EL1, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "LOREA_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 1, .access = PL1_RW, .accessfn = access_lor_other, + .fgt = FGT_LOREA_EL1, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "LORN_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 2, .access = PL1_RW, .accessfn = access_lor_other, + .fgt = FGT_LORN_EL1, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "LORC_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 3, .access = PL1_RW, .accessfn = access_lor_other, + .fgt = FGT_LORC_EL1, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "LORID_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 7, .access = PL1_R, .accessfn = access_lor_ns, + .fgt = FGT_LORID_EL1, .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL }; #ifdef TARGET_AARCH64 @@ -7015,7 +7693,7 @@ static CPAccessResult access_pauth(CPUARMState *env, const ARMCPRegInfo *ri, int el = arm_current_el(env); if (el < 2 && - arm_feature(env, ARM_FEATURE_EL2) && + arm_is_el2_enabled(env) && !(arm_hcr_el2_eff(env) & HCR_APK)) { return CP_ACCESS_TRAP_EL2; } @@ -7031,130 +7709,155 @@ static const ARMCPRegInfo pauth_reginfo[] = { { .name = "APDAKEYLO_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 0, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APDAKEY, .fieldoffset = offsetof(CPUARMState, keys.apda.lo) }, { .name = "APDAKEYHI_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 1, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APDAKEY, .fieldoffset = offsetof(CPUARMState, keys.apda.hi) }, { .name = "APDBKEYLO_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 2, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APDBKEY, .fieldoffset = offsetof(CPUARMState, keys.apdb.lo) }, { .name = "APDBKEYHI_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 3, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APDBKEY, .fieldoffset = offsetof(CPUARMState, keys.apdb.hi) }, { .name = "APGAKEYLO_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 3, .opc2 = 0, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APGAKEY, .fieldoffset = offsetof(CPUARMState, keys.apga.lo) }, { .name = "APGAKEYHI_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 3, .opc2 = 1, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APGAKEY, .fieldoffset = offsetof(CPUARMState, keys.apga.hi) }, { .name = "APIAKEYLO_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 0, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APIAKEY, .fieldoffset = offsetof(CPUARMState, keys.apia.lo) }, { .name = "APIAKEYHI_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 1, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APIAKEY, .fieldoffset = offsetof(CPUARMState, keys.apia.hi) }, { .name = "APIBKEYLO_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 2, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APIBKEY, .fieldoffset = offsetof(CPUARMState, keys.apib.lo) }, { .name = "APIBKEYHI_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 3, .access = PL1_RW, .accessfn = access_pauth, + .fgt = FGT_APIBKEY, .fieldoffset = offsetof(CPUARMState, keys.apib.hi) }, - REGINFO_SENTINEL }; static const ARMCPRegInfo tlbirange_reginfo[] = { { .name = "TLBI_RVAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAE1IS, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 3, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAAE1IS, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 5, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVALE1IS, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 7, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAALE1IS, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAE1OS, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAAE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 3, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAAE1OS, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVALE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 5, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVALE1OS, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAALE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 7, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAALE1OS, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAE1, .writefn = tlbi_aa64_rvae1_write }, { .name = "TLBI_RVAAE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 3, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAAE1, .writefn = tlbi_aa64_rvae1_write }, { .name = "TLBI_RVALE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 5, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVALE1, .writefn = tlbi_aa64_rvae1_write }, { .name = "TLBI_RVAALE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 7, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIRVAALE1, .writefn = tlbi_aa64_rvae1_write }, { .name = "TLBI_RIPAS2E1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 2, - .access = PL2_W, .type = ARM_CP_NOP }, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_ripas2e1is_write }, { .name = "TLBI_RIPAS2LE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NOP }, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_ripas2e1is_write }, { .name = "TLBI_RVAE2IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_rvae2is_write }, { .name = "TLBI_RVALE2IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_rvae2is_write }, { .name = "TLBI_RIPAS2E1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 2, - .access = PL2_W, .type = ARM_CP_NOP }, - { .name = "TLBI_RIPAS2LE1", .state = ARM_CP_STATE_AA64, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_ripas2e1_write }, + { .name = "TLBI_RIPAS2LE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 6, - .access = PL2_W, .type = ARM_CP_NOP }, + .access = PL2_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_ripas2e1_write }, { .name = "TLBI_RVAE2OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_rvae2is_write }, { .name = "TLBI_RVALE2OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_rvae2is_write }, { .name = "TLBI_RVAE2", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 1, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_rvae2_write }, { .name = "TLBI_RVALE2", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 5, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_rvae2_write }, { .name = "TLBI_RVAE3IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 1, @@ -7180,26 +7883,55 @@ static const ARMCPRegInfo tlbirange_reginfo[] = { .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 5, .access = PL3_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae3_write }, - REGINFO_SENTINEL }; static const ARMCPRegInfo tlbios_reginfo[] = { { .name = "TLBI_VMALLE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 0, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVMALLE1OS, .writefn = tlbi_aa64_vmalle1is_write }, + { .name = "TLBI_VAE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 1, + .fgt = FGT_TLBIVAE1OS, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_ASIDE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 2, - .access = PL1_W, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIASIDE1OS, .writefn = tlbi_aa64_vmalle1is_write }, + { .name = "TLBI_VAAE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 3, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVAAE1OS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_VALE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 5, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVALE1OS, + .writefn = tlbi_aa64_vae1is_write }, + { .name = "TLBI_VAALE1OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 7, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, + .fgt = FGT_TLBIVAALE1OS, + .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_ALLE2OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 0, - .access = PL2_W, .type = ARM_CP_NO_RAW, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, .writefn = tlbi_aa64_alle2is_write }, + { .name = "TLBI_VAE2OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 1, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_vae2is_write }, { .name = "TLBI_ALLE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 4, .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_alle1is_write }, + { .name = "TLBI_VALE2OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 5, + .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF, + .writefn = tlbi_aa64_vae2is_write }, { .name = "TLBI_VMALLS12E1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 6, .access = PL2_W, .type = ARM_CP_NO_RAW, @@ -7220,7 +7952,14 @@ static const ARMCPRegInfo tlbios_reginfo[] = { .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 0, .access = PL3_W, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_alle3is_write }, - REGINFO_SENTINEL + { .name = "TLBI_VAE3OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 1, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_vae3is_write }, + { .name = "TLBI_VALE3OS", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 5, + .access = PL3_W, .type = ARM_CP_NO_RAW, + .writefn = tlbi_aa64_vae3is_write }, }; static uint64_t rndr_readfn(CPUARMState *env, const ARMCPRegInfo *ri) @@ -7259,24 +7998,24 @@ static const ARMCPRegInfo rndr_reginfo[] = { .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END | ARM_CP_IO, .opc0 = 3, .opc1 = 3, .crn = 2, .crm = 4, .opc2 = 1, .access = PL0_R, .readfn = rndr_readfn }, - REGINFO_SENTINEL }; -#ifndef CONFIG_USER_ONLY static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, uint64_t value) { +#ifdef CONFIG_TCG ARMCPU *cpu = env_archcpu(env); /* CTR_EL0 System register -> DminLine, bits [19:16] */ uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF); uint64_t vaddr_in = (uint64_t) value; uint64_t vaddr = vaddr_in & ~(dline_size - 1); void *haddr; - int mem_idx = cpu_mmu_index(env, false); + int mem_idx = arm_env_mmu_index(env); /* This won't be crossing page boundaries */ haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC()); if (haddr) { +#ifndef CONFIG_USER_ONLY ram_addr_t offset; MemoryRegion *mr; @@ -7287,25 +8026,29 @@ static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque, if (mr) { memory_region_writeback(mr, offset, dline_size); } +#endif /*CONFIG_USER_ONLY*/ } +#else + /* Handled by hardware accelerator. */ + g_assert_not_reached(); +#endif /* CONFIG_TCG */ } static const ARMCPRegInfo dcpop_reg[] = { { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1, .access = PL0_W, .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END, + .fgt = FGT_DCCVAP, .accessfn = aa64_cacheop_poc_access, .writefn = dccvap_writefn }, - REGINFO_SENTINEL }; static const ARMCPRegInfo dcpodp_reg[] = { { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1, .access = PL0_W, .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END, + .fgt = FGT_DCCVADP, .accessfn = aa64_cacheop_poc_access, .writefn = dccvap_writefn }, - REGINFO_SENTINEL }; -#endif /*CONFIG_USER_ONLY*/ static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) @@ -7321,8 +8064,47 @@ static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) { int el = arm_current_el(env); + if (el < 2 && arm_is_el2_enabled(env)) { + uint64_t hcr = arm_hcr_el2_eff(env); + if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) { + return CP_ACCESS_TRAP_EL2; + } + } + if (el < 3 && + arm_feature(env, ARM_FEATURE_EL3) && + !(env->cp15.scr_el3 & SCR_ATA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_tfsr_el1(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + CPAccessResult nv1 = access_nv1(env, ri, isread); - if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + if (nv1 != CP_ACCESS_OK) { + return nv1; + } + return access_mte(env, ri, isread); +} + +static CPAccessResult access_tfsr_el2(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + /* + * TFSR_EL2: similar to generic access_mte(), but we need to + * account for FEAT_NV. At EL1 this must be a FEAT_NV access; + * if NV2 is enabled then we will redirect this to TFSR_EL1 + * after doing the HCR and SCR ATA traps; otherwise this will + * be a trap to EL2 and the HCR/SCR traps do not apply. + */ + int el = arm_current_el(env); + + if (el == 1 && (arm_hcr_el2_eff(env) & HCR_NV2)) { + return CP_ACCESS_OK; + } + if (el < 2 && arm_is_el2_enabled(env)) { uint64_t hcr = arm_hcr_el2_eff(env); if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) { return CP_ACCESS_TRAP_EL2; @@ -7353,11 +8135,13 @@ static const ARMCPRegInfo mte_reginfo[] = { .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) }, { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0, - .access = PL1_RW, .accessfn = access_mte, + .access = PL1_RW, .accessfn = access_tfsr_el1, + .nv2_redirect_offset = 0x190 | NV2_REDIR_NV1, .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) }, { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64, + .type = ARM_CP_NV2_REDIRECT, .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0, - .access = PL2_RW, .accessfn = access_mte, + .access = PL2_RW, .accessfn = access_tfsr_el2, .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) }, { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 6, .opc2 = 0, @@ -7371,10 +8155,6 @@ static const ARMCPRegInfo mte_reginfo[] = { .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6, .access = PL1_RW, .accessfn = access_mte, .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) }, - { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4, - .access = PL1_R, .accessfn = access_aa64_tid5, - .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS }, { .name = "TCO", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, .type = ARM_CP_NO_RAW, @@ -7382,71 +8162,85 @@ static const ARMCPRegInfo mte_reginfo[] = { { .name = "DC_IGVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3, .type = ARM_CP_NOP, .access = PL1_W, + .fgt = FGT_DCIVAC, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_IGSW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4, + .fgt = FGT_DCISW, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, { .name = "DC_IGDVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5, .type = ARM_CP_NOP, .access = PL1_W, + .fgt = FGT_DCIVAC, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_IGDSW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6, + .fgt = FGT_DCISW, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, { .name = "DC_CGSW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4, + .fgt = FGT_DCCSW, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, { .name = "DC_CGDSW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6, + .fgt = FGT_DCCSW, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, { .name = "DC_CIGSW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4, + .fgt = FGT_DCCISW, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, { .name = "DC_CIGDSW", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6, + .fgt = FGT_DCCISW, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, - REGINFO_SENTINEL }; static const ARMCPRegInfo mte_tco_ro_reginfo[] = { { .name = "TCO", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, .type = ARM_CP_CONST, .access = PL0_RW, }, - REGINFO_SENTINEL }; static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = { { .name = "DC_CGVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3, .type = ARM_CP_NOP, .access = PL0_W, + .fgt = FGT_DCCVAC, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CGDVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5, .type = ARM_CP_NOP, .access = PL0_W, + .fgt = FGT_DCCVAC, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CGVAP", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3, .type = ARM_CP_NOP, .access = PL0_W, + .fgt = FGT_DCCVAP, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CGDVAP", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5, .type = ARM_CP_NOP, .access = PL0_W, + .fgt = FGT_DCCVAP, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CGVADP", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3, .type = ARM_CP_NOP, .access = PL0_W, + .fgt = FGT_DCCVADP, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CGDVADP", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5, .type = ARM_CP_NOP, .access = PL0_W, + .fgt = FGT_DCCVADP, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CIGVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3, .type = ARM_CP_NOP, .access = PL0_W, + .fgt = FGT_DCCIVAC, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_CIGDVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5, .type = ARM_CP_NOP, .access = PL0_W, + .fgt = FGT_DCCIVAC, .accessfn = aa64_cacheop_poc_access }, { .name = "DC_GVA", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3, @@ -7454,6 +8248,7 @@ static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = { #ifndef CONFIG_USER_ONLY /* Avoid overhead of an access check that always passes in user-mode */ .accessfn = aa64_zva_access, + .fgt = FGT_DCZVA, #endif }, { .name = "DC_GZVA", .state = ARM_CP_STATE_AA64, @@ -7462,12 +8257,133 @@ static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = { #ifndef CONFIG_USER_ONLY /* Avoid overhead of an access check that always passes in user-mode */ .accessfn = aa64_zva_access, + .fgt = FGT_DCZVA, #endif }, - REGINFO_SENTINEL }; -#endif +static CPAccessResult access_scxtnum(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + uint64_t hcr = arm_hcr_el2_eff(env); + int el = arm_current_el(env); + + if (el == 0 && !((hcr & HCR_E2H) && (hcr & HCR_TGE))) { + if (env->cp15.sctlr_el[1] & SCTLR_TSCXT) { + if (hcr & HCR_TGE) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_TRAP; + } + } else if (el < 2 && (env->cp15.sctlr_el[2] & SCTLR_TSCXT)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 2 && arm_is_el2_enabled(env) && !(hcr & HCR_ENSCXT)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_ENSCXT)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static CPAccessResult access_scxtnum_el1(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) +{ + CPAccessResult nv1 = access_nv1(env, ri, isread); + + if (nv1 != CP_ACCESS_OK) { + return nv1; + } + return access_scxtnum(env, ri, isread); +} + +static const ARMCPRegInfo scxtnum_reginfo[] = { + { .name = "SCXTNUM_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 13, .crm = 0, .opc2 = 7, + .access = PL0_RW, .accessfn = access_scxtnum, + .fgt = FGT_SCXTNUM_EL0, + .fieldoffset = offsetof(CPUARMState, scxtnum_el[0]) }, + { .name = "SCXTNUM_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 7, + .access = PL1_RW, .accessfn = access_scxtnum_el1, + .fgt = FGT_SCXTNUM_EL1, + .nv2_redirect_offset = 0x188 | NV2_REDIR_NV1, + .fieldoffset = offsetof(CPUARMState, scxtnum_el[1]) }, + { .name = "SCXTNUM_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 7, + .access = PL2_RW, .accessfn = access_scxtnum, + .fieldoffset = offsetof(CPUARMState, scxtnum_el[2]) }, + { .name = "SCXTNUM_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 13, .crm = 0, .opc2 = 7, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, scxtnum_el[3]) }, +}; + +static CPAccessResult access_fgt(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 2 && + arm_feature(env, ARM_FEATURE_EL3) && !(env->cp15.scr_el3 & SCR_FGTEN)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo fgt_reginfo[] = { + { .name = "HFGRTR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4, + .nv2_redirect_offset = 0x1b8, + .access = PL2_RW, .accessfn = access_fgt, + .fieldoffset = offsetof(CPUARMState, cp15.fgt_read[FGTREG_HFGRTR]) }, + { .name = "HFGWTR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 5, + .nv2_redirect_offset = 0x1c0, + .access = PL2_RW, .accessfn = access_fgt, + .fieldoffset = offsetof(CPUARMState, cp15.fgt_write[FGTREG_HFGWTR]) }, + { .name = "HDFGRTR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 1, .opc2 = 4, + .nv2_redirect_offset = 0x1d0, + .access = PL2_RW, .accessfn = access_fgt, + .fieldoffset = offsetof(CPUARMState, cp15.fgt_read[FGTREG_HDFGRTR]) }, + { .name = "HDFGWTR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 1, .opc2 = 5, + .nv2_redirect_offset = 0x1d8, + .access = PL2_RW, .accessfn = access_fgt, + .fieldoffset = offsetof(CPUARMState, cp15.fgt_write[FGTREG_HDFGWTR]) }, + { .name = "HFGITR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 6, + .nv2_redirect_offset = 0x1c8, + .access = PL2_RW, .accessfn = access_fgt, + .fieldoffset = offsetof(CPUARMState, cp15.fgt_exec[FGTREG_HFGITR]) }, +}; + +static void vncr_write(CPUARMState *env, const ARMCPRegInfo *ri, + uint64_t value) +{ + /* + * Clear the RES0 bottom 12 bits; this means at runtime we can guarantee + * that VNCR_EL2 + offset is 64-bit aligned. We don't need to do anything + * about the RESS bits at the top -- we choose the "generate an EL2 + * translation abort on use" CONSTRAINED UNPREDICTABLE option (i.e. let + * the ptw.c code detect the resulting invalid address). + */ + env->cp15.vncr_el2 = value & ~0xfffULL; +} + +static const ARMCPRegInfo nv2_reginfo[] = { + { .name = "VNCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 2, .opc2 = 0, + .access = PL2_RW, + .writefn = vncr_write, + .nv2_redirect_offset = 0xb0, + .fieldoffset = offsetof(CPUARMState, cp15.vncr_el2) }, +}; + +#endif /* TARGET_AARCH64 */ static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, bool isread) @@ -7491,26 +8407,31 @@ static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, static const ARMCPRegInfo predinv_reginfo[] = { { .name = "CFP_RCTX", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 4, + .fgt = FGT_CFPRCTX, .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv }, { .name = "DVP_RCTX", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 5, + .fgt = FGT_DVPRCTX, .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv }, { .name = "CPP_RCTX", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 7, + .fgt = FGT_CPPRCTX, .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv }, /* * Note the AArch32 opcodes have a different OPC1. */ { .name = "CFPRCTX", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 4, + .fgt = FGT_CFPRCTX, .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv }, { .name = "DVPRCTX", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 5, + .fgt = FGT_DVPRCTX, .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv }, { .name = "CPPRCTX", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 7, + .fgt = FGT_CPPRCTX, .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv }, - REGINFO_SENTINEL }; static uint64_t ccsidr2_read(CPUARMState *env, const ARMCPRegInfo *ri) @@ -7523,9 +8444,8 @@ static const ARMCPRegInfo ccsidr2_reginfo[] = { { .name = "CCSIDR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 2, .access = PL1_R, - .accessfn = access_aa64_tid2, + .accessfn = access_tid4, .readfn = ccsidr2_read, .type = ARM_CP_NO_RAW }, - REGINFO_SENTINEL }; static CPAccessResult access_aa64_tid3(CPUARMState *env, const ARMCPRegInfo *ri, @@ -7586,17 +8506,20 @@ static const ARMCPRegInfo jazelle_regs[] = { .cp = 14, .crn = 2, .crm = 0, .opc1 = 7, .opc2 = 0, .accessfn = access_joscr_jmcr, .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL +}; + +static const ARMCPRegInfo contextidr_el2 = { + .name = "CONTEXTIDR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 1, + .access = PL2_RW, + .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[2]) }; static const ARMCPRegInfo vhe_reginfo[] = { - { .name = "CONTEXTIDR_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 1, - .access = PL2_RW, - .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[2]) }, { .name = "TTBR1_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 1, .access = PL2_RW, .writefn = vmsa_tcr_ttbr_el2_write, + .raw_writefn = raw_write, .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el[2]) }, #ifndef CONFIG_USER_ONLY { .name = "CNTHV_CVAL_EL2", .state = ARM_CP_STATE_AA64, @@ -7619,13 +8542,15 @@ static const ARMCPRegInfo vhe_reginfo[] = { { .name = "CNTP_CTL_EL02", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 2, .opc2 = 1, .type = ARM_CP_IO | ARM_CP_ALIAS, - .access = PL2_RW, .accessfn = e2h_access, + .access = PL2_RW, .accessfn = access_el1nvpct, + .nv2_redirect_offset = 0x180 | NV2_REDIR_NO_NV1, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl), .writefn = gt_phys_ctl_write, .raw_writefn = raw_write }, { .name = "CNTV_CTL_EL02", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 3, .opc2 = 1, .type = ARM_CP_IO | ARM_CP_ALIAS, - .access = PL2_RW, .accessfn = e2h_access, + .access = PL2_RW, .accessfn = access_el1nvvct, + .nv2_redirect_offset = 0x170 | NV2_REDIR_NO_NV1, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl), .writefn = gt_virt_ctl_write, .raw_writefn = raw_write }, { .name = "CNTP_TVAL_EL02", .state = ARM_CP_STATE_AA64, @@ -7642,29 +8567,31 @@ static const ARMCPRegInfo vhe_reginfo[] = { .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 2, .opc2 = 2, .type = ARM_CP_IO | ARM_CP_ALIAS, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval), - .access = PL2_RW, .accessfn = e2h_access, + .nv2_redirect_offset = 0x178 | NV2_REDIR_NO_NV1, + .access = PL2_RW, .accessfn = access_el1nvpct, .writefn = gt_phys_cval_write, .raw_writefn = raw_write }, { .name = "CNTV_CVAL_EL02", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 3, .opc2 = 2, .type = ARM_CP_IO | ARM_CP_ALIAS, + .nv2_redirect_offset = 0x168 | NV2_REDIR_NO_NV1, .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval), - .access = PL2_RW, .accessfn = e2h_access, + .access = PL2_RW, .accessfn = access_el1nvvct, .writefn = gt_virt_cval_write, .raw_writefn = raw_write }, #endif - REGINFO_SENTINEL }; #ifndef CONFIG_USER_ONLY static const ARMCPRegInfo ats1e1_reginfo[] = { - { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64, + { .name = "AT_S1E1RP", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, - { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64, + .fgt = FGT_ATS1E1RP, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, + { .name = "AT_S1E1WP", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, - .writefn = ats_write64 }, - REGINFO_SENTINEL + .fgt = FGT_ATS1E1WP, + .accessfn = at_s1e01_access, .writefn = ats_write64 }, }; static const ARMCPRegInfo ats1cp_reginfo[] = { @@ -7676,7 +8603,6 @@ static const ARMCPRegInfo ats1cp_reginfo[] = { .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1, .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC, .writefn = ats_write }, - REGINFO_SENTINEL }; #endif @@ -7698,7 +8624,6 @@ static const ARMCPRegInfo actlr2_hactlr2_reginfo[] = { .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3, .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL }; void register_cp_regs_for_features(ARMCPU *cpu) @@ -7712,7 +8637,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, cp_reginfo); if (!arm_feature(env, ARM_FEATURE_V8)) { - /* Must go early as it is full of wildcards that may be + /* + * Must go early as it is full of wildcards that may be * overridden by later definitions. */ define_arm_cp_regs(cpu, not_v8_cp_reginfo); @@ -7726,15 +8652,24 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, .resetvalue = cpu->isar.id_pfr0 }, - /* ID_PFR1 is not a plain ARM_CP_CONST because we don't know + /* + * ID_PFR1 is not a plain ARM_CP_CONST because we don't know * the value of the GIC field until after we define these regs. */ { .name = "ID_PFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 1, .access = PL1_R, .type = ARM_CP_NO_RAW, .accessfn = access_aa32_tid3, +#ifdef CONFIG_USER_ONLY + .type = ARM_CP_CONST, + .resetvalue = cpu->isar.id_pfr1, +#else + .type = ARM_CP_NO_RAW, + .accessfn = access_aa32_tid3, .readfn = id_pfr1_read, - .writefn = arm_cp_write_ignore }, + .writefn = arm_cp_write_ignore +#endif + }, { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, @@ -7805,7 +8740,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa32_tid3, .resetvalue = cpu->isar.id_isar6 }, - REGINFO_SENTINEL }; define_arm_cp_regs(cpu, v6_idregs); define_arm_cp_regs(cpu, v6_cp_reginfo); @@ -7827,7 +8761,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) .name = "CLIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .accessfn = access_aa64_tid2, + .accessfn = access_tid4, + .fgt = FGT_CLIDR_EL1, .resetvalue = cpu->clidr }; define_one_arm_cp_reg(cpu, &clidr); @@ -7838,11 +8773,16 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_arm_cp_regs(cpu, not_v7_cp_reginfo); } if (arm_feature(env, ARM_FEATURE_V8)) { - /* AArch64 ID registers, which all have impdef reset values. + /* + * v8 ID registers, which all have impdef reset values. * Note that within the ID register ranges the unused slots * must all RAZ, not UNDEF; future architecture versions may * define new registers here. + * ID registers which are AArch64 views of the AArch32 ID registers + * which already existed in v6 and v7 are handled elsewhere, + * in v6_idregs[]. */ + int i; ARMCPRegInfo v8_idregs[] = { /* * ID_AA64PFR0_EL1 is not a plain ARM_CP_CONST in system @@ -7882,11 +8822,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, .resetvalue = cpu->isar.id_aa64zfr0 }, - { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + { .name = "ID_AA64SMFR0_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = 0 }, + .resetvalue = cpu->isar.id_aa64smfr0 }, { .name = "ID_AA64PFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, @@ -7947,11 +8887,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, .resetvalue = cpu->isar.id_aa64isar1 }, - { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + { .name = "ID_AA64ISAR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = 0 }, + .resetvalue = cpu->isar.id_aa64isar2 }, { .name = "ID_AA64ISAR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -8032,7 +8972,34 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, .resetvalue = cpu->isar.mvfr2 }, - { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + /* + * "0, c0, c3, {0,1,2}" are the encodings corresponding to + * AArch64 MVFR[012]_EL1. Define the STATE_AA32 encoding + * as RAZ, since it is in the "reserved for future ID + * registers, RAZ" part of the AArch32 encoding space. + */ + { .name = "RES_0_C0_C3_0", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, + .accessfn = access_aa64_tid3, + .resetvalue = 0 }, + { .name = "RES_0_C0_C3_1", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .accessfn = access_aa64_tid3, + .resetvalue = 0 }, + { .name = "RES_0_C0_C3_2", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, + .accessfn = access_aa64_tid3, + .resetvalue = 0 }, + /* + * Other encodings in "0, c0, c3, ..." are STATE_BOTH because + * they're also RAZ for AArch64, and in v8 are gradually + * being filled with AArch64-view-of-AArch32-ID-register + * for new ID registers. + */ + { .name = "RES_0_C0_C3_3", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, @@ -8042,17 +9009,17 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, .resetvalue = cpu->isar.id_pfr2 }, - { .name = "MVFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + { .name = "ID_DFR1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = 0 }, - { .name = "MVFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .resetvalue = cpu->isar.id_dfr1 }, + { .name = "ID_MMFR5", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = 0 }, - { .name = "MVFR7_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .resetvalue = cpu->isar.id_mmfr5 }, + { .name = "RES_0_C0_C3_7", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, @@ -8060,90 +9027,222 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "PMCEID0", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 6, .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, .resetvalue = extract64(cpu->pmceid0, 0, 32) }, { .name = "PMCEID0_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 6, .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, .resetvalue = cpu->pmceid0 }, { .name = "PMCEID1", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 7, .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, .resetvalue = extract64(cpu->pmceid1, 0, 32) }, { .name = "PMCEID1_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 7, .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST, + .fgt = FGT_PMCEIDN_EL0, .resetvalue = cpu->pmceid1 }, - REGINFO_SENTINEL }; #ifdef CONFIG_USER_ONLY - ARMCPRegUserSpaceInfo v8_user_idregs[] = { + static const ARMCPRegUserSpaceInfo v8_user_idregs[] = { { .name = "ID_AA64PFR0_EL1", - .exported_bits = 0x000f000f00ff0000, - .fixed_bits = 0x0000000000000011 }, + .exported_bits = R_ID_AA64PFR0_FP_MASK | + R_ID_AA64PFR0_ADVSIMD_MASK | + R_ID_AA64PFR0_SVE_MASK | + R_ID_AA64PFR0_DIT_MASK, + .fixed_bits = (0x1u << R_ID_AA64PFR0_EL0_SHIFT) | + (0x1u << R_ID_AA64PFR0_EL1_SHIFT) }, { .name = "ID_AA64PFR1_EL1", - .exported_bits = 0x00000000000000f0 }, + .exported_bits = R_ID_AA64PFR1_BT_MASK | + R_ID_AA64PFR1_SSBS_MASK | + R_ID_AA64PFR1_MTE_MASK | + R_ID_AA64PFR1_SME_MASK }, { .name = "ID_AA64PFR*_EL1_RESERVED", - .is_glob = true }, - { .name = "ID_AA64ZFR0_EL1" }, + .is_glob = true }, + { .name = "ID_AA64ZFR0_EL1", + .exported_bits = R_ID_AA64ZFR0_SVEVER_MASK | + R_ID_AA64ZFR0_AES_MASK | + R_ID_AA64ZFR0_BITPERM_MASK | + R_ID_AA64ZFR0_BFLOAT16_MASK | + R_ID_AA64ZFR0_B16B16_MASK | + R_ID_AA64ZFR0_SHA3_MASK | + R_ID_AA64ZFR0_SM4_MASK | + R_ID_AA64ZFR0_I8MM_MASK | + R_ID_AA64ZFR0_F32MM_MASK | + R_ID_AA64ZFR0_F64MM_MASK }, + { .name = "ID_AA64SMFR0_EL1", + .exported_bits = R_ID_AA64SMFR0_F32F32_MASK | + R_ID_AA64SMFR0_BI32I32_MASK | + R_ID_AA64SMFR0_B16F32_MASK | + R_ID_AA64SMFR0_F16F32_MASK | + R_ID_AA64SMFR0_I8I32_MASK | + R_ID_AA64SMFR0_F16F16_MASK | + R_ID_AA64SMFR0_B16B16_MASK | + R_ID_AA64SMFR0_I16I32_MASK | + R_ID_AA64SMFR0_F64F64_MASK | + R_ID_AA64SMFR0_I16I64_MASK | + R_ID_AA64SMFR0_SMEVER_MASK | + R_ID_AA64SMFR0_FA64_MASK }, { .name = "ID_AA64MMFR0_EL1", - .fixed_bits = 0x00000000ff000000 }, - { .name = "ID_AA64MMFR1_EL1" }, + .exported_bits = R_ID_AA64MMFR0_ECV_MASK, + .fixed_bits = (0xfu << R_ID_AA64MMFR0_TGRAN64_SHIFT) | + (0xfu << R_ID_AA64MMFR0_TGRAN4_SHIFT) }, + { .name = "ID_AA64MMFR1_EL1", + .exported_bits = R_ID_AA64MMFR1_AFP_MASK }, + { .name = "ID_AA64MMFR2_EL1", + .exported_bits = R_ID_AA64MMFR2_AT_MASK }, { .name = "ID_AA64MMFR*_EL1_RESERVED", - .is_glob = true }, + .is_glob = true }, { .name = "ID_AA64DFR0_EL1", - .fixed_bits = 0x0000000000000006 }, - { .name = "ID_AA64DFR1_EL1" }, + .fixed_bits = (0x6u << R_ID_AA64DFR0_DEBUGVER_SHIFT) }, + { .name = "ID_AA64DFR1_EL1" }, { .name = "ID_AA64DFR*_EL1_RESERVED", - .is_glob = true }, + .is_glob = true }, { .name = "ID_AA64AFR*", - .is_glob = true }, + .is_glob = true }, { .name = "ID_AA64ISAR0_EL1", - .exported_bits = 0x00fffffff0fffff0 }, + .exported_bits = R_ID_AA64ISAR0_AES_MASK | + R_ID_AA64ISAR0_SHA1_MASK | + R_ID_AA64ISAR0_SHA2_MASK | + R_ID_AA64ISAR0_CRC32_MASK | + R_ID_AA64ISAR0_ATOMIC_MASK | + R_ID_AA64ISAR0_RDM_MASK | + R_ID_AA64ISAR0_SHA3_MASK | + R_ID_AA64ISAR0_SM3_MASK | + R_ID_AA64ISAR0_SM4_MASK | + R_ID_AA64ISAR0_DP_MASK | + R_ID_AA64ISAR0_FHM_MASK | + R_ID_AA64ISAR0_TS_MASK | + R_ID_AA64ISAR0_RNDR_MASK }, { .name = "ID_AA64ISAR1_EL1", - .exported_bits = 0x000000f0ffffffff }, + .exported_bits = R_ID_AA64ISAR1_DPB_MASK | + R_ID_AA64ISAR1_APA_MASK | + R_ID_AA64ISAR1_API_MASK | + R_ID_AA64ISAR1_JSCVT_MASK | + R_ID_AA64ISAR1_FCMA_MASK | + R_ID_AA64ISAR1_LRCPC_MASK | + R_ID_AA64ISAR1_GPA_MASK | + R_ID_AA64ISAR1_GPI_MASK | + R_ID_AA64ISAR1_FRINTTS_MASK | + R_ID_AA64ISAR1_SB_MASK | + R_ID_AA64ISAR1_BF16_MASK | + R_ID_AA64ISAR1_DGH_MASK | + R_ID_AA64ISAR1_I8MM_MASK }, + { .name = "ID_AA64ISAR2_EL1", + .exported_bits = R_ID_AA64ISAR2_WFXT_MASK | + R_ID_AA64ISAR2_RPRES_MASK | + R_ID_AA64ISAR2_GPA3_MASK | + R_ID_AA64ISAR2_APA3_MASK | + R_ID_AA64ISAR2_MOPS_MASK | + R_ID_AA64ISAR2_BC_MASK | + R_ID_AA64ISAR2_RPRFM_MASK | + R_ID_AA64ISAR2_CSSC_MASK }, { .name = "ID_AA64ISAR*_EL1_RESERVED", - .is_glob = true }, - REGUSERINFO_SENTINEL + .is_glob = true }, }; modify_arm_cp_regs(v8_idregs, v8_user_idregs); #endif - /* RVBAR_EL1 is only implemented if EL1 is the highest EL */ + /* + * RVBAR_EL1 and RMR_EL1 only implemented if EL1 is the highest EL. + * TODO: For RMR, a write with bit 1 set should do something with + * cpu_reset(). In the meantime, "the bit is strictly a request", + * so we are in spec just ignoring writes. + */ if (!arm_feature(env, ARM_FEATURE_EL3) && !arm_feature(env, ARM_FEATURE_EL2)) { - ARMCPRegInfo rvbar = { - .name = "RVBAR_EL1", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, - .type = ARM_CP_CONST, .access = PL1_R, .resetvalue = cpu->rvbar + ARMCPRegInfo el1_reset_regs[] = { + { .name = "RVBAR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL1_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar) }, + { .name = "RMR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, + .resetvalue = arm_feature(env, ARM_FEATURE_AARCH64) } }; - define_one_arm_cp_reg(cpu, &rvbar); + define_arm_cp_regs(cpu, el1_reset_regs); } define_arm_cp_regs(cpu, v8_idregs); define_arm_cp_regs(cpu, v8_cp_reginfo); + if (cpu_isar_feature(aa64_aa32_el1, cpu)) { + define_arm_cp_regs(cpu, v8_aa32_el1_reginfo); + } + + for (i = 4; i < 16; i++) { + /* + * Encodings in "0, c0, {c4-c7}, {0-7}" are RAZ for AArch32. + * For pre-v8 cores there are RAZ patterns for these in + * id_pre_v8_midr_cp_reginfo[]; for v8 we do that here. + * v8 extends the "must RAZ" part of the ID register space + * to also cover c0, 0, c{8-15}, {0-7}. + * These are STATE_AA32 because in the AArch64 sysreg space + * c4-c7 is where the AArch64 ID registers live (and we've + * already defined those in v8_idregs[]), and c8-c15 are not + * "must RAZ" for AArch64. + */ + g_autofree char *name = g_strdup_printf("RES_0_C0_C%d_X", i); + ARMCPRegInfo v8_aa32_raz_idregs = { + .name = name, + .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 0, .crm = i, .opc2 = CP_ANY, + .access = PL1_R, .type = ARM_CP_CONST, + .accessfn = access_aa64_tid3, + .resetvalue = 0 }; + define_one_arm_cp_reg(cpu, &v8_aa32_raz_idregs); + } } - if (arm_feature(env, ARM_FEATURE_EL2)) { + + /* + * Register the base EL2 cpregs. + * Pre v8, these registers are implemented only as part of the + * Virtualization Extensions (EL2 present). Beginning with v8, + * if EL2 is missing but EL3 is enabled, mostly these become + * RES0 from EL3, with some specific exceptions. + */ + if (arm_feature(env, ARM_FEATURE_EL2) + || (arm_feature(env, ARM_FEATURE_EL3) + && arm_feature(env, ARM_FEATURE_V8))) { uint64_t vmpidr_def = mpidr_read_val(env); ARMCPRegInfo vpidr_regs[] = { { .name = "VPIDR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0, .access = PL2_RW, .accessfn = access_el3_aa32ns, - .resetvalue = cpu->midr, .type = ARM_CP_ALIAS, + .resetvalue = cpu->midr, + .type = ARM_CP_ALIAS | ARM_CP_EL3_NO_EL2_C_NZ, .fieldoffset = offsetoflow32(CPUARMState, cp15.vpidr_el2) }, { .name = "VPIDR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0, .access = PL2_RW, .resetvalue = cpu->midr, + .type = ARM_CP_EL3_NO_EL2_C_NZ, + .nv2_redirect_offset = 0x88, .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) }, { .name = "VMPIDR", .state = ARM_CP_STATE_AA32, .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5, .access = PL2_RW, .accessfn = access_el3_aa32ns, - .resetvalue = vmpidr_def, .type = ARM_CP_ALIAS, + .resetvalue = vmpidr_def, + .type = ARM_CP_ALIAS | ARM_CP_EL3_NO_EL2_C_NZ, .fieldoffset = offsetoflow32(CPUARMState, cp15.vmpidr_el2) }, { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5, - .access = PL2_RW, - .resetvalue = vmpidr_def, + .access = PL2_RW, .resetvalue = vmpidr_def, + .type = ARM_CP_EL3_NO_EL2_C_NZ, + .nv2_redirect_offset = 0x50, .fieldoffset = offsetof(CPUARMState, cp15.vmpidr_el2) }, - REGINFO_SENTINEL }; + /* + * The only field of MDCR_EL2 that has a defined architectural reset + * value is MDCR_EL2.HPMN which should reset to the value of PMCR_EL0.N. + */ + ARMCPRegInfo mdcr_el2 = { + .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH, .type = ARM_CP_IO, + .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1, + .writefn = mdcr_el2_write, + .access = PL2_RW, .resetvalue = pmu_num_counters(env), + .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el2), + }; + define_one_arm_cp_reg(cpu, &mdcr_el2); define_arm_cp_regs(cpu, vpidr_regs); define_arm_cp_regs(cpu, el2_cp_reginfo); if (arm_feature(env, ARM_FEATURE_V8)) { @@ -8152,61 +9251,55 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_sel2, cpu)) { define_arm_cp_regs(cpu, el2_sec_cp_reginfo); } - /* RVBAR_EL2 is only implemented if EL2 is the highest EL */ - if (!arm_feature(env, ARM_FEATURE_EL3)) { - ARMCPRegInfo rvbar = { - .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1, - .type = ARM_CP_CONST, .access = PL2_R, .resetvalue = cpu->rvbar - }; - define_one_arm_cp_reg(cpu, &rvbar); - } - } else { - /* If EL2 is missing but higher ELs are enabled, we need to - * register the no_el2 reginfos. + /* + * RVBAR_EL2 and RMR_EL2 only implemented if EL2 is the highest EL. + * See commentary near RMR_EL1. */ - if (arm_feature(env, ARM_FEATURE_EL3)) { - /* When EL3 exists but not EL2, VPIDR and VMPIDR take the value - * of MIDR_EL1 and MPIDR_EL1. - */ - ARMCPRegInfo vpidr_regs[] = { - { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0, - .access = PL2_RW, .accessfn = access_el3_aa32ns, - .type = ARM_CP_CONST, .resetvalue = cpu->midr, - .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) }, - { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5, - .access = PL2_RW, .accessfn = access_el3_aa32ns, - .type = ARM_CP_NO_RAW, - .writefn = arm_cp_write_ignore, .readfn = mpidr_read }, - REGINFO_SENTINEL + if (!arm_feature(env, ARM_FEATURE_EL3)) { + static const ARMCPRegInfo el2_reset_regs[] = { + { .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL2_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar) }, + { .name = "RVBAR", .type = ARM_CP_ALIAS, + .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL2_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar) }, + { .name = "RMR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 2, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 1 }, }; - define_arm_cp_regs(cpu, vpidr_regs); - define_arm_cp_regs(cpu, el3_no_el2_cp_reginfo); - if (arm_feature(env, ARM_FEATURE_V8)) { - define_arm_cp_regs(cpu, el3_no_el2_v8_cp_reginfo); - } + define_arm_cp_regs(cpu, el2_reset_regs); } } + + /* Register the base EL3 cpregs. */ if (arm_feature(env, ARM_FEATURE_EL3)) { define_arm_cp_regs(cpu, el3_cp_reginfo); ARMCPRegInfo el3_regs[] = { { .name = "RVBAR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 1, - .type = ARM_CP_CONST, .access = PL3_R, .resetvalue = cpu->rvbar }, + .access = PL3_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar), }, + { .name = "RMR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 2, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 1 }, + { .name = "RMR", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2, + .access = PL3_RW, .type = ARM_CP_CONST, + .resetvalue = arm_feature(env, ARM_FEATURE_AARCH64) }, { .name = "SCTLR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 0, .opc2 = 0, .access = PL3_RW, .raw_writefn = raw_write, .writefn = sctlr_write, .fieldoffset = offsetof(CPUARMState, cp15.sctlr_el[3]), .resetvalue = cpu->reset_sctlr }, - REGINFO_SENTINEL }; define_arm_cp_regs(cpu, el3_regs); } - /* The behaviour of NSACR is sufficiently various that we don't + /* + * The behaviour of NSACR is sufficiently various that we don't * try to describe it in a single reginfo: * if EL3 is 64 bit, then trap to EL3 from S EL1, * reads as constant 0xc00 from NS EL1 and NS EL2 @@ -8216,7 +9309,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) */ if (arm_feature(env, ARM_FEATURE_EL3)) { if (arm_feature(env, ARM_FEATURE_AARCH64)) { - ARMCPRegInfo nsacr = { + static const ARMCPRegInfo nsacr = { .name = "NSACR", .type = ARM_CP_CONST, .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 2, .access = PL1_RW, .accessfn = nsacr_access, @@ -8224,7 +9317,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) }; define_one_arm_cp_reg(cpu, &nsacr); } else { - ARMCPRegInfo nsacr = { + static const ARMCPRegInfo nsacr = { .name = "NSACR", .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 2, .access = PL3_RW | PL1_R, @@ -8235,7 +9328,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) } } else { if (arm_feature(env, ARM_FEATURE_V8)) { - ARMCPRegInfo nsacr = { + static const ARMCPRegInfo nsacr = { .name = "NSACR", .type = ARM_CP_CONST, .cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 2, .access = PL1_R, @@ -8268,7 +9361,36 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { define_arm_cp_regs(cpu, generic_timer_cp_reginfo); } + if (cpu_isar_feature(aa64_ecv_traps, cpu)) { + define_arm_cp_regs(cpu, gen_timer_ecv_cp_reginfo); + } +#ifndef CONFIG_USER_ONLY + if (cpu_isar_feature(aa64_ecv, cpu)) { + define_one_arm_cp_reg(cpu, &gen_timer_cntpoff_reginfo); + } +#endif if (arm_feature(env, ARM_FEATURE_VAPA)) { + ARMCPRegInfo vapa_cp_reginfo[] = { + { .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0, + .access = PL1_RW, .resetvalue = 0, + .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.par_s), + offsetoflow32(CPUARMState, cp15.par_ns) }, + .writefn = par_write}, +#ifndef CONFIG_USER_ONLY + /* This underdecoding is safe because the reginfo is NO_RAW. */ + { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY, + .access = PL1_W, .accessfn = ats_access, + .writefn = ats_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC }, +#endif + }; + + /* + * When LPAE exists this 32-bit PAR register is an alias of the + * 64-bit AArch32 PAR register defined in lpae_cp_reginfo[] + */ + if (arm_feature(env, ARM_FEATURE_LPAE)) { + vapa_cp_reginfo[0].type = ARM_CP_ALIAS | ARM_CP_NO_GDB; + } define_arm_cp_regs(cpu, vapa_cp_reginfo); } if (arm_feature(env, ARM_FEATURE_CACHE_TEST_CLEAN)) { @@ -8298,13 +9420,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa32_jazelle, cpu)) { define_arm_cp_regs(cpu, jazelle_regs); } - /* Slightly awkwardly, the OMAP and StrongARM cores need all of + /* + * Slightly awkwardly, the OMAP and StrongARM cores need all of * cp15 crn=0 to be writes-ignored, whereas for other cores they should * be read-only (ie write causes UNDEF exception). */ { ARMCPRegInfo id_pre_v8_midr_cp_reginfo[] = { - /* Pre-v8 MIDR space. + /* + * Pre-v8 MIDR space. * Note that the MIDR isn't a simple constant register because * of the TI925 behaviour where writes to another register can * cause the MIDR value to change. @@ -8336,18 +9460,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "DUMMY", .cp = 15, .crn = 0, .crm = 7, .opc1 = 0, .opc2 = CP_ANY, .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL }; ARMCPRegInfo id_v8_midr_cp_reginfo[] = { { .name = "MIDR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 0, .access = PL1_R, .type = ARM_CP_NO_RAW, .resetvalue = cpu->midr, + .fgt = FGT_MIDR_EL1, .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid), .readfn = midr_read }, - /* crn = 0 op1 = 0 crm = 0 op2 = 4,7 : AArch32 aliases of MIDR */ - { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST, - .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4, - .access = PL1_R, .resetvalue = cpu->midr }, + /* crn = 0 op1 = 0 crm = 0 op2 = 7 : AArch32 aliases of MIDR */ { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST, .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 7, .access = PL1_R, .resetvalue = cpu->midr }, @@ -8355,8 +9476,13 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 6, .access = PL1_R, .accessfn = access_aa64_tid1, + .fgt = FGT_REVIDR_EL1, .type = ARM_CP_CONST, .resetvalue = cpu->revidr }, - REGINFO_SENTINEL + }; + ARMCPRegInfo id_v8_midr_alias_cp_reginfo = { + .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST | ARM_CP_NO_GDB, + .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4, + .access = PL1_R, .resetvalue = cpu->midr }; ARMCPRegInfo id_cp_reginfo[] = { /* These are common to v8 and pre-v8 */ @@ -8367,6 +9493,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0, .access = PL0_R, .accessfn = ctr_el0_access, + .fgt = FGT_CTR_EL0, .type = ARM_CP_CONST, .resetvalue = cpu->ctr }, /* TCMTR and TLBTR exist in v8 but have no 64-bit versions */ { .name = "TCMTR", @@ -8374,7 +9501,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .accessfn = access_aa32_tid1, .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL }; /* TLBTR is specific to VMSA */ ARMCPRegInfo id_tlbtr_reginfo = { @@ -8391,47 +9517,128 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->pmsav7_dregion << 8 }; - ARMCPRegInfo crn0_wi_reginfo = { + /* HMPUIR is specific to PMSA V8 */ + ARMCPRegInfo id_hmpuir_reginfo = { + .name = "HMPUIR", + .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 4, + .access = PL2_R, .type = ARM_CP_CONST, + .resetvalue = cpu->pmsav8r_hdregion + }; + static const ARMCPRegInfo crn0_wi_reginfo = { .name = "CRN0_WI", .cp = 15, .crn = 0, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_OVERRIDE }; #ifdef CONFIG_USER_ONLY - ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = { + static const ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = { { .name = "MIDR_EL1", - .exported_bits = 0x00000000ffffffff }, - { .name = "REVIDR_EL1" }, - REGUSERINFO_SENTINEL + .exported_bits = R_MIDR_EL1_REVISION_MASK | + R_MIDR_EL1_PARTNUM_MASK | + R_MIDR_EL1_ARCHITECTURE_MASK | + R_MIDR_EL1_VARIANT_MASK | + R_MIDR_EL1_IMPLEMENTER_MASK }, + { .name = "REVIDR_EL1" }, }; modify_arm_cp_regs(id_v8_midr_cp_reginfo, id_v8_user_midr_cp_reginfo); #endif if (arm_feature(env, ARM_FEATURE_OMAPCP) || arm_feature(env, ARM_FEATURE_STRONGARM)) { - ARMCPRegInfo *r; - /* Register the blanket "writes ignored" value first to cover the + size_t i; + /* + * Register the blanket "writes ignored" value first to cover the * whole space. Then update the specific ID registers to allow write * access, so that they ignore writes rather than causing them to * UNDEF. */ define_one_arm_cp_reg(cpu, &crn0_wi_reginfo); - for (r = id_pre_v8_midr_cp_reginfo; - r->type != ARM_CP_SENTINEL; r++) { - r->access = PL1_RW; + for (i = 0; i < ARRAY_SIZE(id_pre_v8_midr_cp_reginfo); ++i) { + id_pre_v8_midr_cp_reginfo[i].access = PL1_RW; } - for (r = id_cp_reginfo; r->type != ARM_CP_SENTINEL; r++) { - r->access = PL1_RW; + for (i = 0; i < ARRAY_SIZE(id_cp_reginfo); ++i) { + id_cp_reginfo[i].access = PL1_RW; } id_mpuir_reginfo.access = PL1_RW; id_tlbtr_reginfo.access = PL1_RW; } if (arm_feature(env, ARM_FEATURE_V8)) { define_arm_cp_regs(cpu, id_v8_midr_cp_reginfo); + if (!arm_feature(env, ARM_FEATURE_PMSA)) { + define_one_arm_cp_reg(cpu, &id_v8_midr_alias_cp_reginfo); + } } else { define_arm_cp_regs(cpu, id_pre_v8_midr_cp_reginfo); } define_arm_cp_regs(cpu, id_cp_reginfo); if (!arm_feature(env, ARM_FEATURE_PMSA)) { define_one_arm_cp_reg(cpu, &id_tlbtr_reginfo); + } else if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + uint32_t i = 0; + char *tmp_string; + + define_one_arm_cp_reg(cpu, &id_mpuir_reginfo); + define_one_arm_cp_reg(cpu, &id_hmpuir_reginfo); + define_arm_cp_regs(cpu, pmsav8r_cp_reginfo); + + /* Register alias is only valid for first 32 indexes */ + for (i = 0; i < MIN(cpu->pmsav7_dregion, 32); ++i) { + uint8_t crm = 0b1000 | extract32(i, 1, 3); + uint8_t opc1 = extract32(i, 4, 1); + uint8_t opc2 = extract32(i, 0, 1) << 2; + + tmp_string = g_strdup_printf("PRBAR%u", i); + ARMCPRegInfo tmp_prbarn_reginfo = { + .name = tmp_string, .type = ARM_CP_ALIAS | ARM_CP_NO_RAW, + .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2, + .access = PL1_RW, .resetvalue = 0, + .accessfn = access_tvm_trvm, + .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read + }; + define_one_arm_cp_reg(cpu, &tmp_prbarn_reginfo); + g_free(tmp_string); + + opc2 = extract32(i, 0, 1) << 2 | 0x1; + tmp_string = g_strdup_printf("PRLAR%u", i); + ARMCPRegInfo tmp_prlarn_reginfo = { + .name = tmp_string, .type = ARM_CP_ALIAS | ARM_CP_NO_RAW, + .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2, + .access = PL1_RW, .resetvalue = 0, + .accessfn = access_tvm_trvm, + .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read + }; + define_one_arm_cp_reg(cpu, &tmp_prlarn_reginfo); + g_free(tmp_string); + } + + /* Register alias is only valid for first 32 indexes */ + for (i = 0; i < MIN(cpu->pmsav8r_hdregion, 32); ++i) { + uint8_t crm = 0b1000 | extract32(i, 1, 3); + uint8_t opc1 = 0b100 | extract32(i, 4, 1); + uint8_t opc2 = extract32(i, 0, 1) << 2; + + tmp_string = g_strdup_printf("HPRBAR%u", i); + ARMCPRegInfo tmp_hprbarn_reginfo = { + .name = tmp_string, + .type = ARM_CP_NO_RAW, + .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2, + .access = PL2_RW, .resetvalue = 0, + .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read + }; + define_one_arm_cp_reg(cpu, &tmp_hprbarn_reginfo); + g_free(tmp_string); + + opc2 = extract32(i, 0, 1) << 2 | 0x1; + tmp_string = g_strdup_printf("HPRLAR%u", i); + ARMCPRegInfo tmp_hprlarn_reginfo = { + .name = tmp_string, + .type = ARM_CP_NO_RAW, + .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2, + .access = PL2_RW, .resetvalue = 0, + .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read + }; + define_one_arm_cp_reg(cpu, &tmp_hprlarn_reginfo); + g_free(tmp_string); + } } else if (arm_feature(env, ARM_FEATURE_V7)) { define_one_arm_cp_reg(cpu, &id_mpuir_reginfo); } @@ -8441,14 +9648,13 @@ void register_cp_regs_for_features(ARMCPU *cpu) ARMCPRegInfo mpidr_cp_reginfo[] = { { .name = "MPIDR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5, + .fgt = FGT_MPIDR_EL1, .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW }, - REGINFO_SENTINEL }; #ifdef CONFIG_USER_ONLY - ARMCPRegUserSpaceInfo mpidr_user_cp_reginfo[] = { + static const ARMCPRegUserSpaceInfo mpidr_user_cp_reginfo[] = { { .name = "MPIDR_EL1", .fixed_bits = 0x0000000080000000 }, - REGUSERINFO_SENTINEL }; modify_arm_cp_regs(mpidr_cp_reginfo, mpidr_user_cp_reginfo); #endif @@ -8460,6 +9666,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "ACTLR_EL1", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 1, .access = PL1_RW, .accessfn = access_tacr, + .nv2_redirect_offset = 0x118, .type = ARM_CP_CONST, .resetvalue = cpu->reset_auxcr }, { .name = "ACTLR_EL2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 1, @@ -8469,7 +9676,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 0, .opc2 = 1, .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL }; define_arm_cp_regs(cpu, auxcr_reginfo); if (cpu_isar_feature(aa32_ac2, cpu)) { @@ -8491,7 +9697,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) * AArch64 cores we might need to add a specific feature flag * to indicate cores with "flavour 2" CBAR. */ - if (arm_feature(env, ARM_FEATURE_AARCH64)) { + if (arm_feature(env, ARM_FEATURE_V8)) { /* 32 bit view is [31:18] 0...0 [43:32]. */ uint32_t cbar32 = (extract64(cpu->reset_cbar, 18, 14) << 18) | extract64(cpu->reset_cbar, 32, 12); @@ -8504,7 +9710,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) .type = ARM_CP_CONST, .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 0, .access = PL1_R, .resetvalue = cpu->reset_cbar }, - REGINFO_SENTINEL }; /* We don't implement a r/w 64 bit CBAR currently */ assert(arm_feature(env, ARM_FEATURE_CBAR_RO)); @@ -8513,7 +9718,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) ARMCPRegInfo cbar = { .name = "CBAR", .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0, - .access = PL1_R|PL3_W, .resetvalue = cpu->reset_cbar, + .access = PL1_R | PL3_W, .resetvalue = cpu->reset_cbar, .fieldoffset = offsetof(CPUARMState, cp15.c15_config_base_address) }; @@ -8527,14 +9732,16 @@ void register_cp_regs_for_features(ARMCPU *cpu) } if (arm_feature(env, ARM_FEATURE_VBAR)) { - ARMCPRegInfo vbar_cp_reginfo[] = { + static const ARMCPRegInfo vbar_cp_reginfo[] = { { .name = "VBAR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0, .access = PL1_RW, .writefn = vbar_write, + .accessfn = access_nv1, + .fgt = FGT_VBAR_EL1, + .nv2_redirect_offset = 0x250 | NV2_REDIR_NV1, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s), offsetof(CPUARMState, cp15.vbar_ns) }, .resetvalue = 0 }, - REGINFO_SENTINEL }; define_arm_cp_regs(cpu, vbar_cp_reginfo); } @@ -8545,19 +9752,33 @@ void register_cp_regs_for_features(ARMCPU *cpu) .name = "SCTLR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0, .access = PL1_RW, .accessfn = access_tvm_trvm, + .fgt = FGT_SCTLR_EL1, + .nv2_redirect_offset = 0x110 | NV2_REDIR_NV1, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.sctlr_s), offsetof(CPUARMState, cp15.sctlr_ns) }, .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr, .raw_writefn = raw_write, }; if (arm_feature(env, ARM_FEATURE_XSCALE)) { - /* Normally we would always end the TB on an SCTLR write, but Linux + /* + * Normally we would always end the TB on an SCTLR write, but Linux * arch/arm/mach-pxa/sleep.S expects two instructions following * an MMU enable to execute from cache. Imitate this behaviour. */ sctlr.type |= ARM_CP_SUPPRESS_TB_END; } define_one_arm_cp_reg(cpu, &sctlr); + + if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + ARMCPRegInfo vsctlr = { + .name = "VSCTLR", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0, + .access = PL2_RW, .resetvalue = 0x0, + .fieldoffset = offsetoflow32(CPUARMState, cp15.vsctlr), + }; + define_one_arm_cp_reg(cpu, &vsctlr); + } } if (cpu_isar_feature(aa64_lor, cpu)) { @@ -8584,24 +9805,30 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_ssbs, cpu)) { define_one_arm_cp_reg(cpu, &ssbs_reginfo); } + if (cpu_isar_feature(any_ras, cpu)) { + define_arm_cp_regs(cpu, minimal_ras_reginfo); + } + if (cpu_isar_feature(aa64_vh, cpu) || + cpu_isar_feature(aa64_debugv8p2, cpu)) { + define_one_arm_cp_reg(cpu, &contextidr_el2); + } if (arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu)) { define_arm_cp_regs(cpu, vhe_reginfo); } if (cpu_isar_feature(aa64_sve, cpu)) { - define_one_arm_cp_reg(cpu, &zcr_el1_reginfo); - if (arm_feature(env, ARM_FEATURE_EL2)) { - define_one_arm_cp_reg(cpu, &zcr_el2_reginfo); - } else { - define_one_arm_cp_reg(cpu, &zcr_no_el2_reginfo); - } - if (arm_feature(env, ARM_FEATURE_EL3)) { - define_one_arm_cp_reg(cpu, &zcr_el3_reginfo); - } + define_arm_cp_regs(cpu, zcr_reginfo); + } + + if (cpu_isar_feature(aa64_hcx, cpu)) { + define_one_arm_cp_reg(cpu, &hcrx_el2_reginfo); } #ifdef TARGET_AARCH64 + if (cpu_isar_feature(aa64_sme, cpu)) { + define_arm_cp_regs(cpu, sme_reginfo); + } if (cpu_isar_feature(aa64_pauth, cpu)) { define_arm_cp_regs(cpu, pauth_reginfo); } @@ -8614,7 +9841,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_tlbios, cpu)) { define_arm_cp_regs(cpu, tlbios_reginfo); } -#ifndef CONFIG_USER_ONLY /* Data Cache clean instructions up to PoP */ if (cpu_isar_feature(aa64_dcpop, cpu)) { define_one_arm_cp_reg(cpu, dcpop_reg); @@ -8623,7 +9849,6 @@ void register_cp_regs_for_features(ARMCPU *cpu) define_one_arm_cp_reg(cpu, dcpodp_reg); } } -#endif /*CONFIG_USER_ONLY*/ /* * If full MTE is enabled, add all of the system registers. @@ -8631,12 +9856,38 @@ void register_cp_regs_for_features(ARMCPU *cpu) * then define only a RAZ/WI version of PSTATE.TCO. */ if (cpu_isar_feature(aa64_mte, cpu)) { + ARMCPRegInfo gmid_reginfo = { + .name = "GMID_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4, + .access = PL1_R, .accessfn = access_aa64_tid5, + .type = ARM_CP_CONST, .resetvalue = cpu->gm_blocksize, + }; + define_one_arm_cp_reg(cpu, &gmid_reginfo); define_arm_cp_regs(cpu, mte_reginfo); define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) { define_arm_cp_regs(cpu, mte_tco_ro_reginfo); define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); } + + if (cpu_isar_feature(aa64_scxtnum, cpu)) { + define_arm_cp_regs(cpu, scxtnum_reginfo); + } + + if (cpu_isar_feature(aa64_fgt, cpu)) { + define_arm_cp_regs(cpu, fgt_reginfo); + } + + if (cpu_isar_feature(aa64_rme, cpu)) { + define_arm_cp_regs(cpu, rme_reginfo); + if (cpu_isar_feature(aa64_mte, cpu)) { + define_arm_cp_regs(cpu, rme_mte_reginfo); + } + } + + if (cpu_isar_feature(aa64_nv2, cpu)) { + define_arm_cp_regs(cpu, nv2_reginfo); + } #endif if (cpu_isar_feature(any_predinv, cpu)) { @@ -8658,213 +9909,186 @@ void register_cp_regs_for_features(ARMCPU *cpu) #endif } -void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) +/* + * Private utility function for define_one_arm_cp_reg_with_opaque(): + * add a single reginfo struct to the hash table. + */ +static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, + void *opaque, CPState state, + CPSecureState secstate, + int crm, int opc1, int opc2, + const char *name) { - CPUState *cs = CPU(cpu); CPUARMState *env = &cpu->env; - - if (arm_feature(env, ARM_FEATURE_AARCH64)) { + uint32_t key; + ARMCPRegInfo *r2; + bool is64 = r->type & ARM_CP_64BIT; + bool ns = secstate & ARM_CP_SECSTATE_NS; + int cp = r->cp; + size_t name_len; + bool make_const; + + switch (state) { + case ARM_CP_STATE_AA32: + /* We assume it is a cp15 register if the .cp field is left unset. */ + if (cp == 0 && r->state == ARM_CP_STATE_BOTH) { + cp = 15; + } + key = ENCODE_CP_REG(cp, is64, ns, r->crn, crm, opc1, opc2); + break; + case ARM_CP_STATE_AA64: /* - * The lower part of each SVE register aliases to the FPU - * registers so we don't need to include both. + * To allow abbreviation of ARMCPRegInfo definitions, we treat + * cp == 0 as equivalent to the value for "standard guest-visible + * sysreg". STATE_BOTH definitions are also always "standard sysreg" + * in their AArch64 view (the .cp value may be non-zero for the + * benefit of the AArch32 view). */ -#ifdef TARGET_AARCH64 - if (isar_feature_aa64_sve(&cpu->isar)) { - gdb_register_coprocessor(cs, arm_gdb_get_svereg, arm_gdb_set_svereg, - arm_gen_dynamic_svereg_xml(cs, cs->gdb_num_regs), - "sve-registers.xml", 0); - } else -#endif - { - gdb_register_coprocessor(cs, aarch64_fpu_gdb_get_reg, - aarch64_fpu_gdb_set_reg, - 34, "aarch64-fpu.xml", 0); + if (cp == 0 || r->state == ARM_CP_STATE_BOTH) { + cp = CP_REG_ARM64_SYSREG_CP; } - } else if (arm_feature(env, ARM_FEATURE_NEON)) { - gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, - 51, "arm-neon.xml", 0); - } else if (cpu_isar_feature(aa32_simd_r32, cpu)) { - gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, - 35, "arm-vfp3.xml", 0); - } else if (cpu_isar_feature(aa32_vfp_simd, cpu)) { - gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg, - 19, "arm-vfp.xml", 0); + key = ENCODE_AA64_CP_REG(cp, r->crn, crm, r->opc0, opc1, opc2); + break; + default: + g_assert_not_reached(); } - gdb_register_coprocessor(cs, arm_gdb_get_sysreg, arm_gdb_set_sysreg, - arm_gen_dynamic_sysreg_xml(cs, cs->gdb_num_regs), - "system-registers.xml", 0); - -} -/* Sort alphabetically by type name, except for "any". */ -static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b) -{ - ObjectClass *class_a = (ObjectClass *)a; - ObjectClass *class_b = (ObjectClass *)b; - const char *name_a, *name_b; + /* Overriding of an existing definition must be explicitly requested. */ + if (!(r->type & ARM_CP_OVERRIDE)) { + const ARMCPRegInfo *oldreg = get_arm_cp_reginfo(cpu->cp_regs, key); + if (oldreg) { + assert(oldreg->type & ARM_CP_OVERRIDE); + } + } - name_a = object_class_get_name(class_a); - name_b = object_class_get_name(class_b); - if (strcmp(name_a, "any-" TYPE_ARM_CPU) == 0) { - return 1; - } else if (strcmp(name_b, "any-" TYPE_ARM_CPU) == 0) { - return -1; + /* + * Eliminate registers that are not present because the EL is missing. + * Doing this here makes it easier to put all registers for a given + * feature into the same ARMCPRegInfo array and define them all at once. + */ + make_const = false; + if (arm_feature(env, ARM_FEATURE_EL3)) { + /* + * An EL2 register without EL2 but with EL3 is (usually) RES0. + * See rule RJFFP in section D1.1.3 of DDI0487H.a. + */ + int min_el = ctz32(r->access) / 2; + if (min_el == 2 && !arm_feature(env, ARM_FEATURE_EL2)) { + if (r->type & ARM_CP_EL3_NO_EL2_UNDEF) { + return; + } + make_const = !(r->type & ARM_CP_EL3_NO_EL2_KEEP); + } } else { - return strcmp(name_a, name_b); + CPAccessRights max_el = (arm_feature(env, ARM_FEATURE_EL2) + ? PL2_RW : PL1_RW); + if ((r->access & max_el) == 0) { + return; + } } -} - -static void arm_cpu_list_entry(gpointer data, gpointer user_data) -{ - ObjectClass *oc = data; - const char *typename; - char *name; - - typename = object_class_get_name(oc); - name = g_strndup(typename, strlen(typename) - strlen("-" TYPE_ARM_CPU)); - qemu_printf(" %s\n", name); - g_free(name); -} - -void arm_cpu_list(void) -{ - GSList *list; - - list = object_class_get_list(TYPE_ARM_CPU, false); - list = g_slist_sort(list, arm_cpu_list_compare); - qemu_printf("Available CPUs:\n"); - g_slist_foreach(list, arm_cpu_list_entry, NULL); - g_slist_free(list); -} -static void arm_cpu_add_definition(gpointer data, gpointer user_data) -{ - ObjectClass *oc = data; - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; - - typename = object_class_get_name(oc); - info = g_malloc0(sizeof(*info)); - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); - - QAPI_LIST_PREPEND(*cpu_list, info); -} - -CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) -{ - CpuDefinitionInfoList *cpu_list = NULL; - GSList *list; - - list = object_class_get_list(TYPE_ARM_CPU, false); - g_slist_foreach(list, arm_cpu_add_definition, &cpu_list); - g_slist_free(list); - - return cpu_list; -} - -static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, - void *opaque, int state, int secstate, - int crm, int opc1, int opc2, - const char *name) -{ - /* Private utility function for define_one_arm_cp_reg_with_opaque(): - * add a single reginfo struct to the hash table. - */ - uint32_t *key = g_new(uint32_t, 1); - ARMCPRegInfo *r2 = g_memdup(r, sizeof(ARMCPRegInfo)); - int is64 = (r->type & ARM_CP_64BIT) ? 1 : 0; - int ns = (secstate & ARM_CP_SECSTATE_NS) ? 1 : 0; + /* Combine cpreg and name into one allocation. */ + name_len = strlen(name) + 1; + r2 = g_malloc(sizeof(*r2) + name_len); + *r2 = *r; + r2->name = memcpy(r2 + 1, name, name_len); - r2->name = g_strdup(name); - /* Reset the secure state to the specific incoming state. This is - * necessary as the register may have been defined with both states. + /* + * Update fields to match the instantiation, overwiting wildcards + * such as CP_ANY, ARM_CP_STATE_BOTH, or ARM_CP_SECSTATE_BOTH. */ + r2->cp = cp; + r2->crm = crm; + r2->opc1 = opc1; + r2->opc2 = opc2; + r2->state = state; r2->secure = secstate; - - if (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]) { - /* Register is banked (using both entries in array). - * Overwriting fieldoffset as the array is only used to define - * banked registers but later only fieldoffset is used. - */ - r2->fieldoffset = r->bank_fieldoffsets[ns]; + if (opaque) { + r2->opaque = opaque; } - if (state == ARM_CP_STATE_AA32) { - if (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]) { - /* If the register is banked then we don't need to migrate or - * reset the 32-bit instance in certain cases: - * - * 1) If the register has both 32-bit and 64-bit instances then we - * can count on the 64-bit instance taking care of the - * non-secure bank. - * 2) If ARMv8 is enabled then we can count on a 64-bit version - * taking care of the secure bank. This requires that separate - * 32 and 64-bit definitions are provided. - */ - if ((r->state == ARM_CP_STATE_BOTH && ns) || - (arm_feature(&cpu->env, ARM_FEATURE_V8) && !ns)) { - r2->type |= ARM_CP_ALIAS; - } - } else if ((secstate != r->secure) && !ns) { - /* The register is not banked so we only want to allow migration of - * the non-secure instance. - */ - r2->type |= ARM_CP_ALIAS; + if (make_const) { + /* This should not have been a very special register to begin. */ + int old_special = r2->type & ARM_CP_SPECIAL_MASK; + assert(old_special == 0 || old_special == ARM_CP_NOP); + /* + * Set the special function to CONST, retaining the other flags. + * This is important for e.g. ARM_CP_SVE so that we still + * take the SVE trap if CPTR_EL3.EZ == 0. + */ + r2->type = (r2->type & ~ARM_CP_SPECIAL_MASK) | ARM_CP_CONST; + /* + * Usually, these registers become RES0, but there are a few + * special cases like VPIDR_EL2 which have a constant non-zero + * value with writes ignored. + */ + if (!(r->type & ARM_CP_EL3_NO_EL2_C_NZ)) { + r2->resetvalue = 0; } + /* + * ARM_CP_CONST has precedence, so removing the callbacks and + * offsets are not strictly necessary, but it is potentially + * less confusing to debug later. + */ + r2->readfn = NULL; + r2->writefn = NULL; + r2->raw_readfn = NULL; + r2->raw_writefn = NULL; + r2->resetfn = NULL; + r2->fieldoffset = 0; + r2->bank_fieldoffsets[0] = 0; + r2->bank_fieldoffsets[1] = 0; + } else { + bool isbanked = r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]; - if (r->state == ARM_CP_STATE_BOTH) { - /* We assume it is a cp15 register if the .cp field is left unset. + if (isbanked) { + /* + * Register is banked (using both entries in array). + * Overwriting fieldoffset as the array is only used to define + * banked registers but later only fieldoffset is used. */ - if (r2->cp == 0) { - r2->cp = 15; + r2->fieldoffset = r->bank_fieldoffsets[ns]; + } + if (state == ARM_CP_STATE_AA32) { + if (isbanked) { + /* + * If the register is banked then we don't need to migrate or + * reset the 32-bit instance in certain cases: + * + * 1) If the register has both 32-bit and 64-bit instances + * then we can count on the 64-bit instance taking care + * of the non-secure bank. + * 2) If ARMv8 is enabled then we can count on a 64-bit + * version taking care of the secure bank. This requires + * that separate 32 and 64-bit definitions are provided. + */ + if ((r->state == ARM_CP_STATE_BOTH && ns) || + (arm_feature(env, ARM_FEATURE_V8) && !ns)) { + r2->type |= ARM_CP_ALIAS; + } + } else if ((secstate != r->secure) && !ns) { + /* + * The register is not banked so we only want to allow + * migration of the non-secure instance. + */ + r2->type |= ARM_CP_ALIAS; } -#ifdef HOST_WORDS_BIGENDIAN - if (r2->fieldoffset) { + if (HOST_BIG_ENDIAN && + r->state == ARM_CP_STATE_BOTH && r2->fieldoffset) { r2->fieldoffset += sizeof(uint32_t); } -#endif - } - } - if (state == ARM_CP_STATE_AA64) { - /* To allow abbreviation of ARMCPRegInfo - * definitions, we treat cp == 0 as equivalent to - * the value for "standard guest-visible sysreg". - * STATE_BOTH definitions are also always "standard - * sysreg" in their AArch64 view (the .cp value may - * be non-zero for the benefit of the AArch32 view). - */ - if (r->cp == 0 || r->state == ARM_CP_STATE_BOTH) { - r2->cp = CP_REG_ARM64_SYSREG_CP; } - *key = ENCODE_AA64_CP_REG(r2->cp, r2->crn, crm, - r2->opc0, opc1, opc2); - } else { - *key = ENCODE_CP_REG(r2->cp, is64, ns, r2->crn, crm, opc1, opc2); } - if (opaque) { - r2->opaque = opaque; - } - /* reginfo passed to helpers is correct for the actual access, - * and is never ARM_CP_STATE_BOTH: - */ - r2->state = state; - /* Make sure reginfo passed to helpers for wildcarded regs - * has the correct crm/opc1/opc2 for this reg, not CP_ANY: - */ - r2->crm = crm; - r2->opc1 = opc1; - r2->opc2 = opc2; - /* By convention, for wildcarded registers only the first + + /* + * By convention, for wildcarded registers only the first * entry is used for migration; the others are marked as * ALIAS so we don't try to transfer the register * multiple times. Special registers (ie NOP/WFI) are * never migratable and not even raw-accessible. */ - if ((r->type & ARM_CP_SPECIAL)) { + if (r2->type & ARM_CP_SPECIAL_MASK) { r2->type |= ARM_CP_NO_RAW; } if (((r->crm == CP_ANY) && crm != 0) || @@ -8873,7 +10097,8 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB; } - /* Check that raw accesses are either forbidden or handled. Note that + /* + * Check that raw accesses are either forbidden or handled. Note that * we can't assert this earlier because the setup of fieldoffset for * banked registers has to be done first. */ @@ -8881,29 +10106,15 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r, assert(!raw_accessors_invalid(r2)); } - /* Overriding of an existing definition must be explicitly - * requested. - */ - if (!(r->type & ARM_CP_OVERRIDE)) { - ARMCPRegInfo *oldreg; - oldreg = g_hash_table_lookup(cpu->cp_regs, key); - if (oldreg && !(oldreg->type & ARM_CP_OVERRIDE)) { - fprintf(stderr, "Register redefined: cp=%d %d bit " - "crn=%d crm=%d opc1=%d opc2=%d, " - "was %s, now %s\n", r2->cp, 32 + 32 * is64, - r2->crn, r2->crm, r2->opc1, r2->opc2, - oldreg->name, r2->name); - g_assert_not_reached(); - } - } - g_hash_table_insert(cpu->cp_regs, key, r2); + g_hash_table_insert(cpu->cp_regs, (gpointer)(uintptr_t)key, r2); } void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, const ARMCPRegInfo *r, void *opaque) { - /* Define implementations of coprocessor registers. + /* + * Define implementations of coprocessor registers. * We store these in a hashtable because typically * there are less than 150 registers in a space which * is 16*16*16*8*8 = 262144 in size. @@ -8926,13 +10137,15 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, * bits; the ARM_CP_64BIT* flag applies only to the AArch32 view of * the register, if any. */ - int crm, opc1, opc2, state; + int crm, opc1, opc2; int crmmin = (r->crm == CP_ANY) ? 0 : r->crm; int crmmax = (r->crm == CP_ANY) ? 15 : r->crm; int opc1min = (r->opc1 == CP_ANY) ? 0 : r->opc1; int opc1max = (r->opc1 == CP_ANY) ? 7 : r->opc1; int opc2min = (r->opc2 == CP_ANY) ? 0 : r->opc2; int opc2max = (r->opc2 == CP_ANY) ? 7 : r->opc2; + CPState state; + /* 64 bit registers have only CRm and Opc1 fields */ assert(!((r->type & ARM_CP_64BIT) && (r->opc2 || r->crn))); /* op0 only exists in the AArch64 encodings */ @@ -8968,14 +10181,15 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, default: g_assert_not_reached(); } - /* The AArch64 pseudocode CheckSystemAccess() specifies that op1 + /* + * The AArch64 pseudocode CheckSystemAccess() specifies that op1 * encodes a minimum access level for the register. We roll this * runtime check into our general permission check code, so check * here that the reginfo's specified permissions are strict enough * to encompass the generic architectural permission check. */ if (r->state != ARM_CP_STATE_AA32) { - int mask = 0; + CPAccessRights mask; switch (r->opc1) { case 0: /* min_EL EL1, but some accessible to EL0 via kernel ABI */ @@ -9004,17 +10218,17 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, break; default: /* broken reginfo with out-of-range opc1 */ - assert(false); - break; + g_assert_not_reached(); } /* assert our permissions are not too lax (stricter is fine) */ assert((r->access & ~mask) == 0); } - /* Check that the register definition has enough info to handle + /* + * Check that the register definition has enough info to handle * reads and writes if they are permitted. */ - if (!(r->type & (ARM_CP_SPECIAL|ARM_CP_CONST))) { + if (!(r->type & (ARM_CP_SPECIAL_MASK | ARM_CP_CONST))) { if (r->access & PL3_R) { assert((r->fieldoffset || (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1])) || @@ -9026,8 +10240,7 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, r->writefn); } } - /* Bad type field probably means missing sentinel at end of reg list */ - assert(cptype_valid(r->type)); + for (crm = crmmin; crm <= crmmax; crm++) { for (opc1 = opc1min; opc1 <= opc1max; opc1++) { for (opc2 = opc2min; opc2 <= opc2max; opc2++) { @@ -9037,7 +10250,8 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, continue; } if (state == ARM_CP_STATE_AA32) { - /* Under AArch32 CP registers can be common + /* + * Under AArch32 CP registers can be common * (same for secure and non-secure world) or banked. */ char *name; @@ -9049,7 +10263,7 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, r->secure, crm, opc1, opc2, r->name); break; - default: + case ARM_CP_SECSTATE_BOTH: name = g_strdup_printf("%s_S", r->name); add_cpreg_to_hashtable(cpu, r, opaque, state, ARM_CP_SECSTATE_S, @@ -9059,10 +10273,14 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, ARM_CP_SECSTATE_NS, crm, opc1, opc2, r->name); break; + default: + g_assert_not_reached(); } } else { - /* AArch64 registers get mapped to non-secure instance - * of AArch32 */ + /* + * AArch64 registers get mapped to non-secure instance + * of AArch32 + */ add_cpreg_to_hashtable(cpu, r, opaque, state, ARM_CP_SECSTATE_NS, crm, opc1, opc2, r->name); @@ -9073,13 +10291,13 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, } } -void define_arm_cp_regs_with_opaque(ARMCPU *cpu, - const ARMCPRegInfo *regs, void *opaque) +/* Define a whole list of registers */ +void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs, + void *opaque, size_t len) { - /* Define a whole list of registers */ - const ARMCPRegInfo *r; - for (r = regs; r->type != ARM_CP_SENTINEL; r++) { - define_one_arm_cp_reg_with_opaque(cpu, r, opaque); + size_t i; + for (i = 0; i < len; ++i) { + define_one_arm_cp_reg_with_opaque(cpu, regs + i, opaque); } } @@ -9091,17 +10309,20 @@ void define_arm_cp_regs_with_opaque(ARMCPU *cpu, * user-space cannot alter any values and dynamic values pertaining to * execution state are hidden from user space view anyway. */ -void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods) +void modify_arm_cp_regs_with_len(ARMCPRegInfo *regs, size_t regs_len, + const ARMCPRegUserSpaceInfo *mods, + size_t mods_len) { - const ARMCPRegUserSpaceInfo *m; - ARMCPRegInfo *r; - - for (m = mods; m->name; m++) { + for (size_t mi = 0; mi < mods_len; ++mi) { + const ARMCPRegUserSpaceInfo *m = mods + mi; GPatternSpec *pat = NULL; + if (m->is_glob) { pat = g_pattern_spec_new(m->name); } - for (r = regs; r->type != ARM_CP_SENTINEL; r++) { + for (size_t ri = 0; ri < regs_len; ++ri) { + ARMCPRegInfo *r = regs + ri; + if (pat && g_pattern_match_string(pat, r->name)) { r->type = ARM_CP_CONST; r->access = PL0U_R; @@ -9123,7 +10344,7 @@ void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods) const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp) { - return g_hash_table_lookup(cpregs, &encoded_cp); + return g_hash_table_lookup(cpregs, (gpointer)(uintptr_t)encoded_cp); } void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri, @@ -9145,7 +10366,8 @@ void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque) static int bad_mode_switch(CPUARMState *env, int mode, CPSRWriteType write_type) { - /* Return true if it is not valid for us to switch to + /* + * Return true if it is not valid for us to switch to * this CPU mode (ie all the UNPREDICTABLE cases in * the ARM ARM CPSRWriteByInstr pseudocode). */ @@ -9166,10 +10388,12 @@ static int bad_mode_switch(CPUARMState *env, int mode, CPSRWriteType write_type) case ARM_CPU_MODE_UND: case ARM_CPU_MODE_IRQ: case ARM_CPU_MODE_FIQ: - /* Note that we don't implement the IMPDEF NSACR.RFR which in v7 + /* + * Note that we don't implement the IMPDEF NSACR.RFR which in v7 * allows FIQ mode to be Secure-only. (In v8 this doesn't exist.) */ - /* If HCR.TGE is set then changes from Monitor to NS PL1 via MSR + /* + * If HCR.TGE is set then changes from Monitor to NS PL1 via MSR * and CPS are treated as illegal mode changes. */ if (write_type == CPSRWriteByInstr && @@ -9211,10 +10435,12 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, env->CF = (val >> 29) & 1; env->VF = (val << 3) & 0x80000000; } - if (mask & CPSR_Q) + if (mask & CPSR_Q) { env->QF = ((val & CPSR_Q) != 0); - if (mask & CPSR_T) + } + if (mask & CPSR_T) { env->thumb = ((val & CPSR_T) != 0); + } if (mask & CPSR_IT_0_1) { env->condexec_bits &= ~3; env->condexec_bits |= (val >> 25) & 3; @@ -9227,7 +10453,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, env->GE = (val >> 16) & 0xf; } - /* In a V7 implementation that includes the security extensions but does + /* + * In a V7 implementation that includes the security extensions but does * not include Virtualization Extensions the SCR.FW and SCR.AW bits control * whether non-secure software is allowed to change the CPSR_F and CPSR_A * bits respectively. @@ -9243,7 +10470,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, changed_daif = (env->daif ^ val) & mask; if (changed_daif & CPSR_A) { - /* Check to see if we are allowed to change the masking of async + /* + * Check to see if we are allowed to change the masking of async * abort exceptions from a non-secure state. */ if (!(env->cp15.scr_el3 & SCR_AW)) { @@ -9255,7 +10483,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, } if (changed_daif & CPSR_F) { - /* Check to see if we are allowed to change the masking of FIQ + /* + * Check to see if we are allowed to change the masking of FIQ * exceptions from a non-secure state. */ if (!(env->cp15.scr_el3 & SCR_FW)) { @@ -9265,7 +10494,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, mask &= ~CPSR_F; } - /* Check whether non-maskable FIQ (NMFI) support is enabled. + /* + * Check whether non-maskable FIQ (NMFI) support is enabled. * If this bit is set software is not allowed to mask * FIQs, but is allowed to set CPSR_F to 0. */ @@ -9285,7 +10515,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, if (write_type != CPSRWriteRaw && ((env->uncached_cpsr ^ val) & mask & CPSR_M)) { if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) { - /* Note that we can only get here in USR mode if this is a + /* + * Note that we can only get here in USR mode if this is a * gdb stub write; for this case we follow the architectural * behaviour for guest writes in USR mode of ignoring an attempt * to switch mode. (Those are caught by translate.c for writes @@ -9293,7 +10524,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, */ mask &= ~CPSR_M; } else if (bad_mode_switch(env, val & CPSR_M, write_type)) { - /* Attempt to switch to an invalid mode: this is UNPREDICTABLE in + /* + * Attempt to switch to an invalid mode: this is UNPREDICTABLE in * v7, and has defined behaviour in v8: * + leave CPSR.M untouched * + allow changes to the other CPSR fields @@ -9323,66 +10555,11 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask, } mask &= ~CACHED_CPSR_BITS; env->uncached_cpsr = (env->uncached_cpsr & ~mask) | (val & mask); - if (rebuild_hflags) { + if (tcg_enabled() && rebuild_hflags) { arm_rebuild_hflags(env); } } -/* Sign/zero extend */ -uint32_t HELPER(sxtb16)(uint32_t x) -{ - uint32_t res; - res = (uint16_t)(int8_t)x; - res |= (uint32_t)(int8_t)(x >> 16) << 16; - return res; -} - -static void handle_possible_div0_trap(CPUARMState *env, uintptr_t ra) -{ - /* - * Take a division-by-zero exception if necessary; otherwise return - * to get the usual non-trapping division behaviour (result of 0) - */ - if (arm_feature(env, ARM_FEATURE_M) - && (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_DIV_0_TRP_MASK)) { - raise_exception_ra(env, EXCP_DIVBYZERO, 0, 1, ra); - } -} - -uint32_t HELPER(uxtb16)(uint32_t x) -{ - uint32_t res; - res = (uint16_t)(uint8_t)x; - res |= (uint32_t)(uint8_t)(x >> 16) << 16; - return res; -} - -int32_t HELPER(sdiv)(CPUARMState *env, int32_t num, int32_t den) -{ - if (den == 0) { - handle_possible_div0_trap(env, GETPC()); - return 0; - } - if (num == INT_MIN && den == -1) { - return INT_MIN; - } - return num / den; -} - -uint32_t HELPER(udiv)(CPUARMState *env, uint32_t num, uint32_t den) -{ - if (den == 0) { - handle_possible_div0_trap(env, GETPC()); - return 0; - } - return num / den; -} - -uint32_t HELPER(rbit)(uint32_t x) -{ - return revbit32(x); -} - #ifdef CONFIG_USER_ONLY static void switch_mode(CPUARMState *env, int mode) @@ -9413,15 +10590,16 @@ static void switch_mode(CPUARMState *env, int mode) int i; old_mode = env->uncached_cpsr & CPSR_M; - if (mode == old_mode) + if (mode == old_mode) { return; + } if (old_mode == ARM_CPU_MODE_FIQ) { - memcpy (env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t)); - memcpy (env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t)); + memcpy(env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t)); + memcpy(env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t)); } else if (mode == ARM_CPU_MODE_FIQ) { - memcpy (env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t)); - memcpy (env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t)); + memcpy(env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t)); + memcpy(env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t)); } i = bank_number(old_mode); @@ -9436,7 +10614,8 @@ static void switch_mode(CPUARMState *env, int mode) env->regs[14] = env->banked_r14[r14_bank_number(mode)]; } -/* Physical Interrupt Target EL Lookup Table +/* + * Physical Interrupt Target EL Lookup Table * * [ From ARM ARM section G1.13.4 (Table G1-15) ] * @@ -9498,7 +10677,7 @@ static const int8_t target_el_table[2][2][2][2][2][4] = { uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, uint32_t cur_el, bool secure) { - CPUARMState *env = cs->env_ptr; + CPUARMState *env = cpu_env(cs); bool rw; bool scr; bool hcr; @@ -9510,7 +10689,8 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, if (arm_feature(env, ARM_FEATURE_EL3)) { rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW); } else { - /* Either EL2 is the highest EL (and so the EL2 register width + /* + * Either EL2 is the highest EL (and so the EL2 register width * is given by is64); or there is no EL2 or EL3, in which case * the value of 'rw' does not affect the table lookup anyway. */ @@ -9547,8 +10727,10 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx, return target_el; } -void arm_log_exception(int idx) +void arm_log_exception(CPUState *cs) { + int idx = cs->exception_index; + if (qemu_loglevel_mask(CPU_LOG_INT)) { const char *exc = NULL; static const char * const excnames[] = { @@ -9574,6 +10756,8 @@ void arm_log_exception(int idx) [EXCP_LSERR] = "v8M LSERR UsageFault", [EXCP_UNALIGNED] = "v7M UNALIGNED UsageFault", [EXCP_DIVBYZERO] = "v7M DIVBYZERO UsageFault", + [EXCP_VSERR] = "Virtual SERR", + [EXCP_GPC] = "Granule Protection Check", }; if (idx >= 0 && idx < ARRAY_SIZE(excnames)) { @@ -9582,7 +10766,8 @@ void arm_log_exception(int idx) if (!exc) { exc = "unknown"; } - qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s]\n", idx, exc); + qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s] on CPU %d\n", + idx, exc, cs->cpu_index); } } @@ -9781,7 +10966,8 @@ void aarch64_sync_64_to_32(CPUARMState *env) env->banked_r13[bank_number(ARM_CPU_MODE_UND)] = env->xregs[23]; } - /* Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ + /* + * Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ * mode, then we can copy to r8-r14. Otherwise, we copy to the * FIQ bank for r8-r14. */ @@ -9874,7 +11060,10 @@ static void take_aarch32_exception(CPUARMState *env, int new_mode, env->regs[14] = env->regs[15] + offset; } env->regs[15] = newpc; - arm_rebuild_hflags(env); + + if (tcg_enabled()) { + arm_rebuild_hflags(env); + } } static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) @@ -9885,7 +11074,7 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) * separately here. * * The vector table entry used is always the 0x14 Hyp mode entry point, - * unless this is an UNDEF/HVC/abort taken from Hyp to Hyp. + * unless this is an UNDEF/SVC/HVC/abort taken from Hyp to Hyp. * The offset applied to the preferred return address is always zero * (see DDI0487C.a section G1.12.3). * PSTATE A/I/F masks are set based only on the SCR.EA/IRQ/FIQ values. @@ -9899,7 +11088,7 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs) addr = 0x04; break; case EXCP_SWI: - addr = 0x14; + addr = 0x08; break; case EXCP_BKPT: /* Fall through to prefetch abort. */ @@ -10004,6 +11193,24 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) } if (env->exception.target_el == 2) { + /* Debug exceptions are reported differently on AArch32 */ + switch (syn_get_ec(env->exception.syndrome)) { + case EC_BREAKPOINT: + case EC_BREAKPOINT_SAME_EL: + case EC_AA32_BKPT: + case EC_VECTORCATCH: + env->exception.syndrome = syn_insn_abort(arm_current_el(env) == 2, + 0, 0, 0x22); + break; + case EC_WATCHPOINT: + env->exception.syndrome = syn_set_ec(env->exception.syndrome, + EC_DATAABORT); + break; + case EC_WATCHPOINT_SAME_EL: + env->exception.syndrome = syn_set_ec(env->exception.syndrome, + EC_DATAABORT_SAME_EL); + break; + } arm_cpu_do_interrupt_aarch32_hyp(cs); return; } @@ -10013,10 +11220,11 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) new_mode = ARM_CPU_MODE_UND; addr = 0x04; mask = CPSR_I; - if (env->thumb) + if (env->thumb) { offset = 2; - else + } else { offset = 4; + } break; case EXCP_SWI: new_mode = ARM_CPU_MODE_SVC; @@ -10085,6 +11293,31 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) mask = CPSR_A | CPSR_I | CPSR_F; offset = 4; break; + case EXCP_VSERR: + { + /* + * Note that this is reported as a data abort, but the DFAR + * has an UNKNOWN value. Construct the SError syndrome from + * AET and ExT fields. + */ + ARMMMUFaultInfo fi = { .type = ARMFault_AsyncExternal, }; + + if (extended_addresses_enabled(env)) { + env->exception.fsr = arm_fi_to_lfsc(&fi); + } else { + env->exception.fsr = arm_fi_to_sfsc(&fi); + } + env->exception.fsr |= env->cp15.vsesr_el2 & 0xd000; + A32_BANKED_CURRENT_REG_SET(env, dfsr, env->exception.fsr); + qemu_log_mask(CPU_LOG_INT, "...with IFSR 0x%x\n", + env->exception.fsr); + + new_mode = ARM_CPU_MODE_ABT; + addr = 0x10; + mask = CPSR_A | CPSR_I; + offset = 8; + } + break; case EXCP_SMC: new_mode = ARM_CPU_MODE_MON; addr = 0x08; @@ -10102,7 +11335,8 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs) /* High vectors. When enabled, base address cannot be remapped. */ addr += 0xffff0000; } else { - /* ARM v7 architectures provide a vector base address register to remap + /* + * ARM v7 architectures provide a vector base address register to remap * the interrupt vector table. * This register is only followed in non-monitor mode, and is banked. * Note: only bits 31:5 are valid. @@ -10192,6 +11426,31 @@ static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env) return ret; } +static bool syndrome_is_sync_extabt(uint32_t syndrome) +{ + /* Return true if this syndrome value is a synchronous external abort */ + switch (syn_get_ec(syndrome)) { + case EC_INSNABORT: + case EC_INSNABORT_SAME_EL: + case EC_DATAABORT: + case EC_DATAABORT_SAME_EL: + /* Look at fault status code for all the synchronous ext abort cases */ + switch (syndrome & 0x3f) { + case 0x10: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + return true; + default: + return false; + } + default: + return false; + } +} + /* Handle exception entry to a target EL which is using AArch64 */ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) { @@ -10204,14 +11463,17 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) unsigned int cur_el = arm_current_el(env); int rt; - /* - * Note that new_el can never be 0. If cur_el is 0, then - * el0_a64 is is_a64(), else el0_a64 is ignored. - */ - aarch64_sve_change_el(env, cur_el, new_el, is_a64(env)); + if (tcg_enabled()) { + /* + * Note that new_el can never be 0. If cur_el is 0, then + * el0_a64 is is_a64(), else el0_a64 is ignored. + */ + aarch64_sve_change_el(env, cur_el, new_el, is_a64(env)); + } if (cur_el < new_el) { - /* Entry vector offset depends on whether the implemented EL + /* + * Entry vector offset depends on whether the implemented EL * immediately lower than the target level is using AArch32 or AArch64 */ bool is_aa64; @@ -10245,8 +11507,20 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) } switch (cs->exception_index) { + case EXCP_GPC: + qemu_log_mask(CPU_LOG_INT, "...with MFAR 0x%" PRIx64 "\n", + env->cp15.mfar_el3); + /* fall through */ case EXCP_PREFETCH_ABORT: case EXCP_DATA_ABORT: + /* + * FEAT_DoubleFault allows synchronous external aborts taken to EL3 + * to be taken to the SError vector entrypoint. + */ + if (new_el == 3 && (env->cp15.scr_el3 & SCR_EASE) && + syndrome_is_sync_extabt(env->exception.syndrome)) { + addr += 0x180; + } env->cp15.far_el[new_el] = env->exception.vaddress; qemu_log_mask(CPU_LOG_INT, "...with FAR 0x%" PRIx64 "\n", env->cp15.far_el[new_el]); @@ -10305,6 +11579,12 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) case EXCP_VFIQ: addr += 0x100; break; + case EXCP_VSERR: + addr += 0x180; + /* Construct the SError syndrome from IDS and ISS fields. */ + env->exception.syndrome = syn_serror(env->cp15.vsesr_el2 & 0x1ffffff); + env->cp15.esr_el[new_el] = env->exception.syndrome; + break; default: cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index); } @@ -10313,6 +11593,20 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) old_mode = pstate_read(env); aarch64_save_sp(env, arm_current_el(env)); env->elr_el[new_el] = env->pc; + + if (cur_el == 1 && new_el == 1) { + uint64_t hcr = arm_hcr_el2_eff(env); + if ((hcr & (HCR_NV | HCR_NV1 | HCR_NV2)) == HCR_NV || + (hcr & (HCR_NV | HCR_NV2)) == (HCR_NV | HCR_NV2)) { + /* + * FEAT_NV, FEAT_NV2 may need to report EL2 in the SPSR + * by setting M[3:2] to 0b10. + * If NV2 is disabled, change SPSR when NV,NV1 == 1,0 (I_ZJRNN) + * If NV2 is enabled, change SPSR when NV is 1 (I_DBTLM) + */ + old_mode = deposit32(old_mode, 2, 2, 2); + } + } } else { old_mode = cpsr_read_for_spsr_elx(env); env->elr_el[new_el] = env->regs[15]; @@ -10323,6 +11617,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) } env->banked_spsr[aarch64_banked_spsr_index(new_el)] = old_mode; + qemu_log_mask(CPU_LOG_INT, "...with SPSR 0x%x\n", old_mode); qemu_log_mask(CPU_LOG_INT, "...with ELR 0x%" PRIx64 "\n", env->elr_el[new_el]); @@ -10359,9 +11654,12 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) } pstate_write(env, PSTATE_DAIF | new_mode); - env->aarch64 = 1; + env->aarch64 = true; aarch64_restore_sp(env, new_el); - helper_rebuild_hflags_a64(env, new_el); + + if (tcg_enabled()) { + helper_rebuild_hflags_a64(env, new_el); + } env->pc = addr; @@ -10377,7 +11675,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) * trapped to the hypervisor in KVM. */ #ifdef CONFIG_TCG -static void handle_semihosting(CPUState *cs) +static void tcg_handle_semihosting(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; @@ -10386,19 +11684,20 @@ static void handle_semihosting(CPUState *cs) qemu_log_mask(CPU_LOG_INT, "...handling as semihosting call 0x%" PRIx64 "\n", env->xregs[0]); - env->xregs[0] = do_common_semihosting(cs); + do_common_semihosting(cs); env->pc += 4; } else { qemu_log_mask(CPU_LOG_INT, "...handling as semihosting call 0x%x\n", env->regs[0]); - env->regs[0] = do_common_semihosting(cs); + do_common_semihosting(cs); env->regs[15] += env->thumb ? 2 : 4; } } #endif -/* Handle a CPU exception for A and R profile CPUs. +/* + * Handle a CPU exception for A and R profile CPUs. * Do any appropriate logging, handle PSCI calls, and then hand off * to the AArch64-entry or AArch32-entry function depending on the * target exception level's register width. @@ -10415,7 +11714,7 @@ void arm_cpu_do_interrupt(CPUState *cs) assert(!arm_feature(env, ARM_FEATURE_M)); - arm_log_exception(cs->exception_index); + arm_log_exception(cs); qemu_log_mask(CPU_LOG_INT, "...from EL%d to EL%d\n", arm_current_el(env), new_el); if (qemu_loglevel_mask(CPU_LOG_INT) @@ -10425,7 +11724,7 @@ void arm_cpu_do_interrupt(CPUState *cs) env->exception.syndrome); } - if (arm_is_psci_call(cpu, cs->exception_index)) { + if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) { arm_handle_psci_call(cpu); qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n"); return; @@ -10438,16 +11737,17 @@ void arm_cpu_do_interrupt(CPUState *cs) */ #ifdef CONFIG_TCG if (cs->exception_index == EXCP_SEMIHOST) { - handle_semihosting(cs); + tcg_handle_semihosting(cs); return; } #endif - /* Hooks may change global state so BQL should be held, also the + /* + * Hooks may change global state so BQL should be held, also the * BQL needs to be held for any modification of * cs->interrupt_request. */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); arm_call_pre_el_change_hook(cpu); @@ -10471,941 +11771,159 @@ uint64_t arm_sctlr(CPUARMState *env, int el) /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ if (el == 0) { ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); - el = (mmu_idx == ARMMMUIdx_E20_0 || mmu_idx == ARMMMUIdx_SE20_0) - ? 2 : 1; + el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; } return env->cp15.sctlr_el[el]; } -/* Return the SCTLR value which controls this address translation regime */ -static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - return env->cp15.sctlr_el[regime_el(env, mmu_idx)]; -} - -#ifndef CONFIG_USER_ONLY - -/* Return true if the specified stage of address translation is disabled */ -static inline bool regime_translation_disabled(CPUARMState *env, - ARMMMUIdx mmu_idx) -{ - uint64_t hcr_el2; - - if (arm_feature(env, ARM_FEATURE_M)) { - switch (env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] & - (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) { - case R_V7M_MPU_CTRL_ENABLE_MASK: - /* Enabled, but not for HardFault and NMI */ - return mmu_idx & ARM_MMU_IDX_M_NEGPRI; - case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK: - /* Enabled for all cases */ - return false; - case 0: - default: - /* HFNMIENA set and ENABLE clear is UNPREDICTABLE, but - * we warned about that in armv7m_nvic.c when the guest set it. - */ - return true; - } - } - - hcr_el2 = arm_hcr_el2_eff(env); - - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { - /* HCR.DC means HCR.VM behaves as 1 */ - return (hcr_el2 & (HCR_DC | HCR_VM)) == 0; - } - - if (hcr_el2 & HCR_TGE) { - /* TGE means that NS EL0/1 act as if SCTLR_EL1.M is zero */ - if (!regime_is_secure(env, mmu_idx) && regime_el(env, mmu_idx) == 1) { - return true; - } - } - - if ((hcr_el2 & HCR_DC) && arm_mmu_idx_is_stage1_of_2(mmu_idx)) { - /* HCR.DC means SCTLR_EL1.M behaves as 0 */ - return true; - } - - return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; -} - -static inline bool regime_translation_big_endian(CPUARMState *env, - ARMMMUIdx mmu_idx) -{ - return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; -} - -/* Return the TTBR associated with this translation regime */ -static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, - int ttbrn) +int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) { - if (mmu_idx == ARMMMUIdx_Stage2) { - return env->cp15.vttbr_el2; - } - if (mmu_idx == ARMMMUIdx_Stage2_S) { - return env->cp15.vsttbr_el2; - } - if (ttbrn == 0) { - return env->cp15.ttbr0_el[regime_el(env, mmu_idx)]; + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 37, 2); + } else if (regime_is_stage2(mmu_idx)) { + return 0; /* VTCR_EL2 */ } else { - return env->cp15.ttbr1_el[regime_el(env, mmu_idx)]; - } -} - -#endif /* !CONFIG_USER_ONLY */ - -/* Convert a possible stage1+2 MMU index into the appropriate - * stage 1 MMU index - */ -static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_SE10_0: - return ARMMMUIdx_Stage1_SE0; - case ARMMMUIdx_SE10_1: - return ARMMMUIdx_Stage1_SE1; - case ARMMMUIdx_SE10_1_PAN: - return ARMMMUIdx_Stage1_SE1_PAN; - case ARMMMUIdx_E10_0: - return ARMMMUIdx_Stage1_E0; - case ARMMMUIdx_E10_1: - return ARMMMUIdx_Stage1_E1; - case ARMMMUIdx_E10_1_PAN: - return ARMMMUIdx_Stage1_E1_PAN; - default: - return mmu_idx; + /* Replicate the single TBI bit so we always have 2 bits. */ + return extract32(tcr, 20, 1) * 3; } } -/* Return true if the translation regime is using LPAE format page tables */ -static inline bool regime_using_lpae_format(CPUARMState *env, - ARMMMUIdx mmu_idx) +int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) { - int el = regime_el(env, mmu_idx); - if (el == 2 || arm_el_is_aa64(env, el)) { - return true; - } - if (arm_feature(env, ARM_FEATURE_LPAE) - && (regime_tcr(env, mmu_idx)->raw_tcr & TTBCR_EAE)) { - return true; + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 51, 2); + } else if (regime_is_stage2(mmu_idx)) { + return 0; /* VTCR_EL2 */ + } else { + /* Replicate the single TBID bit so we always have 2 bits. */ + return extract32(tcr, 29, 1) * 3; } - return false; } -/* Returns true if the stage 1 translation regime is using LPAE format page - * tables. Used when raising alignment exceptions, whose FSR changes depending - * on whether the long or short descriptor format is in use. */ -bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) +int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx) { - mmu_idx = stage_1_mmu_idx(mmu_idx); - - return regime_using_lpae_format(env, mmu_idx); -} - -#ifndef CONFIG_USER_ONLY -static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_SE0: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSUser: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MSUserNegPri: - return true; - default: - return false; - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - g_assert_not_reached(); + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 57, 2); + } else { + /* Replicate the single TCMA bit so we always have 2 bits. */ + return extract32(tcr, 30, 1) * 3; } } -/* Translate section/page access permissions to page - * R/W protection flags - * - * @env: CPUARMState - * @mmu_idx: MMU index indicating required translation regime - * @ap: The 3-bit access permissions (AP[2:0]) - * @domain_prot: The 2-bit domain access permissions - */ -static inline int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, - int ap, int domain_prot) +static ARMGranuleSize tg0_to_gran_size(int tg) { - bool is_user = regime_is_user(env, mmu_idx); - - if (domain_prot == 3) { - return PAGE_READ | PAGE_WRITE; - } - - switch (ap) { + switch (tg) { case 0: - if (arm_feature(env, ARM_FEATURE_V7)) { - return 0; - } - switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) { - case SCTLR_S: - return is_user ? 0 : PAGE_READ; - case SCTLR_R: - return PAGE_READ; - default: - return 0; - } + return Gran4K; case 1: - return is_user ? 0 : PAGE_READ | PAGE_WRITE; + return Gran64K; case 2: - if (is_user) { - return PAGE_READ; - } else { - return PAGE_READ | PAGE_WRITE; - } - case 3: - return PAGE_READ | PAGE_WRITE; - case 4: /* Reserved. */ - return 0; - case 5: - return is_user ? 0 : PAGE_READ; - case 6: - return PAGE_READ; - case 7: - if (!arm_feature(env, ARM_FEATURE_V6K)) { - return 0; - } - return PAGE_READ; + return Gran16K; default: - g_assert_not_reached(); + return GranInvalid; } } -/* Translate section/page access permissions to page - * R/W protection flags. - * - * @ap: The 2-bit simple AP (AP[2:1]) - * @is_user: TRUE if accessing from PL0 - */ -static inline int simple_ap_to_rw_prot_is_user(int ap, bool is_user) +static ARMGranuleSize tg1_to_gran_size(int tg) { - switch (ap) { - case 0: - return is_user ? 0 : PAGE_READ | PAGE_WRITE; + switch (tg) { case 1: - return PAGE_READ | PAGE_WRITE; + return Gran16K; case 2: - return is_user ? 0 : PAGE_READ; + return Gran4K; case 3: - return PAGE_READ; + return Gran64K; default: - g_assert_not_reached(); - } -} - -static inline int -simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) -{ - return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); -} - -/* Translate S2 section/page access permissions to protection flags - * - * @env: CPUARMState - * @s2ap: The 2-bit stage2 access permissions (S2AP) - * @xn: XN (execute-never) bits - * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 - */ -static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) -{ - int prot = 0; - - if (s2ap & 1) { - prot |= PAGE_READ; - } - if (s2ap & 2) { - prot |= PAGE_WRITE; - } - - if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { - switch (xn) { - case 0: - prot |= PAGE_EXEC; - break; - case 1: - if (s1_is_el0) { - prot |= PAGE_EXEC; - } - break; - case 2: - break; - case 3: - if (!s1_is_el0) { - prot |= PAGE_EXEC; - } - break; - default: - g_assert_not_reached(); - } - } else { - if (!extract32(xn, 1, 1)) { - if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { - prot |= PAGE_EXEC; - } - } - } - return prot; -} - -/* Translate section/page access permissions to protection flags - * - * @env: CPUARMState - * @mmu_idx: MMU index indicating required translation regime - * @is_aa64: TRUE if AArch64 - * @ap: The 2-bit simple AP (AP[2:1]) - * @ns: NS (non-secure) bit - * @xn: XN (execute-never) bit - * @pxn: PXN (privileged execute-never) bit - */ -static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, - int ap, int ns, int xn, int pxn) -{ - bool is_user = regime_is_user(env, mmu_idx); - int prot_rw, user_rw; - bool have_wxn; - int wxn = 0; - - assert(mmu_idx != ARMMMUIdx_Stage2); - assert(mmu_idx != ARMMMUIdx_Stage2_S); - - user_rw = simple_ap_to_rw_prot_is_user(ap, true); - if (is_user) { - prot_rw = user_rw; - } else { - if (user_rw && regime_is_pan(env, mmu_idx)) { - /* PAN forbids data accesses but doesn't affect insn fetch */ - prot_rw = 0; - } else { - prot_rw = simple_ap_to_rw_prot_is_user(ap, false); - } - } - - if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) { - return prot_rw; - } - - /* TODO have_wxn should be replaced with - * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2) - * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE - * compatible processors have EL2, which is required for [U]WXN. - */ - have_wxn = arm_feature(env, ARM_FEATURE_LPAE); - - if (have_wxn) { - wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN; - } - - if (is_aa64) { - if (regime_has_2_ranges(mmu_idx) && !is_user) { - xn = pxn || (user_rw & PAGE_WRITE); - } - } else if (arm_feature(env, ARM_FEATURE_V7)) { - switch (regime_el(env, mmu_idx)) { - case 1: - case 3: - if (is_user) { - xn = xn || !(user_rw & PAGE_READ); - } else { - int uwxn = 0; - if (have_wxn) { - uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN; - } - xn = xn || !(prot_rw & PAGE_READ) || pxn || - (uwxn && (user_rw & PAGE_WRITE)); - } - break; - case 2: - break; - } - } else { - xn = wxn = 0; - } - - if (xn || (wxn && (prot_rw & PAGE_WRITE))) { - return prot_rw; - } - return prot_rw | PAGE_EXEC; -} - -static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, - uint32_t *table, uint32_t address) -{ - /* Note that we can only get here for an AArch32 PL0/PL1 lookup */ - TCR *tcr = regime_tcr(env, mmu_idx); - - if (address & tcr->mask) { - if (tcr->raw_tcr & TTBCR_PD1) { - /* Translation table walk disabled for TTBR1 */ - return false; - } - *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000; - } else { - if (tcr->raw_tcr & TTBCR_PD0) { - /* Translation table walk disabled for TTBR0 */ - return false; - } - *table = regime_ttbr(env, mmu_idx, 0) & tcr->base_mask; - } - *table |= (address >> 18) & 0x3ffc; - return true; -} - -/* Translate a S1 pagetable walk through S2 if needed. */ -static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx, - hwaddr addr, bool *is_secure, - ARMMMUFaultInfo *fi) -{ - if (arm_mmu_idx_is_stage1_of_2(mmu_idx) && - !regime_translation_disabled(env, ARMMMUIdx_Stage2)) { - target_ulong s2size; - hwaddr s2pa; - int s2prot; - int ret; - ARMMMUIdx s2_mmu_idx = *is_secure ? ARMMMUIdx_Stage2_S - : ARMMMUIdx_Stage2; - ARMCacheAttrs cacheattrs = {}; - MemTxAttrs txattrs = {}; - - ret = get_phys_addr_lpae(env, addr, MMU_DATA_LOAD, s2_mmu_idx, false, - &s2pa, &txattrs, &s2prot, &s2size, fi, - &cacheattrs); - if (ret) { - assert(fi->type != ARMFault_None); - fi->s2addr = addr; - fi->stage2 = true; - fi->s1ptw = true; - fi->s1ns = !*is_secure; - return ~0; - } - if ((arm_hcr_el2_eff(env) & HCR_PTW) && - (cacheattrs.attrs & 0xf0) == 0) { - /* - * PTW set and S1 walk touched S2 Device memory: - * generate Permission fault. - */ - fi->type = ARMFault_Permission; - fi->s2addr = addr; - fi->stage2 = true; - fi->s1ptw = true; - fi->s1ns = !*is_secure; - return ~0; - } - - if (arm_is_secure_below_el3(env)) { - /* Check if page table walk is to secure or non-secure PA space. */ - if (*is_secure) { - *is_secure = !(env->cp15.vstcr_el2.raw_tcr & VSTCR_SW); - } else { - *is_secure = !(env->cp15.vtcr_el2.raw_tcr & VTCR_NSW); - } - } else { - assert(!*is_secure); - } - - addr = s2pa; + return GranInvalid; } - return addr; } -/* All loads done in the course of a page table walk go through here. */ -static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure, - ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) +static inline bool have4k(ARMCPU *cpu, bool stage2) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - MemTxAttrs attrs = {}; - MemTxResult result = MEMTX_OK; - AddressSpace *as; - uint32_t data; - - addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi); - attrs.secure = is_secure; - as = arm_addressspace(cs, attrs); - if (fi->s1ptw) { - return 0; - } - if (regime_translation_big_endian(env, mmu_idx)) { - data = address_space_ldl_be(as, addr, attrs, &result); - } else { - data = address_space_ldl_le(as, addr, attrs, &result); - } - if (result == MEMTX_OK) { - return data; - } - fi->type = ARMFault_SyncExternalOnWalk; - fi->ea = arm_extabort_type(result); - return 0; + return stage2 ? cpu_isar_feature(aa64_tgran4_2, cpu) + : cpu_isar_feature(aa64_tgran4, cpu); } -static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure, - ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi) +static inline bool have16k(ARMCPU *cpu, bool stage2) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - MemTxAttrs attrs = {}; - MemTxResult result = MEMTX_OK; - AddressSpace *as; - uint64_t data; - - addr = S1_ptw_translate(env, mmu_idx, addr, &is_secure, fi); - attrs.secure = is_secure; - as = arm_addressspace(cs, attrs); - if (fi->s1ptw) { - return 0; - } - if (regime_translation_big_endian(env, mmu_idx)) { - data = address_space_ldq_be(as, addr, attrs, &result); - } else { - data = address_space_ldq_le(as, addr, attrs, &result); - } - if (result == MEMTX_OK) { - return data; - } - fi->type = ARMFault_SyncExternalOnWalk; - fi->ea = arm_extabort_type(result); - return 0; + return stage2 ? cpu_isar_feature(aa64_tgran16_2, cpu) + : cpu_isar_feature(aa64_tgran16, cpu); } -static bool get_phys_addr_v5(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, int *prot, - target_ulong *page_size, - ARMMMUFaultInfo *fi) +static inline bool have64k(ARMCPU *cpu, bool stage2) { - CPUState *cs = env_cpu(env); - int level = 1; - uint32_t table; - uint32_t desc; - int type; - int ap; - int domain = 0; - int domain_prot; - hwaddr phys_addr; - uint32_t dacr; - - /* Pagetable walk. */ - /* Lookup l1 descriptor. */ - if (!get_level1_table_address(env, mmu_idx, &table, address)) { - /* Section translation fault if page walk is disabled by PD0 or PD1 */ - fi->type = ARMFault_Translation; - goto do_fault; - } - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), - mmu_idx, fi); - if (fi->type != ARMFault_None) { - goto do_fault; - } - type = (desc & 3); - domain = (desc >> 5) & 0x0f; - if (regime_el(env, mmu_idx) == 1) { - dacr = env->cp15.dacr_ns; - } else { - dacr = env->cp15.dacr_s; - } - domain_prot = (dacr >> (domain * 2)) & 3; - if (type == 0) { - /* Section translation fault. */ - fi->type = ARMFault_Translation; - goto do_fault; - } - if (type != 2) { - level = 2; - } - if (domain_prot == 0 || domain_prot == 2) { - fi->type = ARMFault_Domain; - goto do_fault; - } - if (type == 2) { - /* 1Mb section. */ - phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); - ap = (desc >> 10) & 3; - *page_size = 1024 * 1024; - } else { - /* Lookup l2 entry. */ - if (type == 1) { - /* Coarse pagetable. */ - table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); - } else { - /* Fine pagetable. */ - table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); - } - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), - mmu_idx, fi); - if (fi->type != ARMFault_None) { - goto do_fault; - } - switch (desc & 3) { - case 0: /* Page translation fault. */ - fi->type = ARMFault_Translation; - goto do_fault; - case 1: /* 64k page. */ - phys_addr = (desc & 0xffff0000) | (address & 0xffff); - ap = (desc >> (4 + ((address >> 13) & 6))) & 3; - *page_size = 0x10000; - break; - case 2: /* 4k page. */ - phys_addr = (desc & 0xfffff000) | (address & 0xfff); - ap = (desc >> (4 + ((address >> 9) & 6))) & 3; - *page_size = 0x1000; - break; - case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */ - if (type == 1) { - /* ARMv6/XScale extended small page format */ - if (arm_feature(env, ARM_FEATURE_XSCALE) - || arm_feature(env, ARM_FEATURE_V6)) { - phys_addr = (desc & 0xfffff000) | (address & 0xfff); - *page_size = 0x1000; - } else { - /* UNPREDICTABLE in ARMv5; we choose to take a - * page translation fault. - */ - fi->type = ARMFault_Translation; - goto do_fault; - } - } else { - phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); - *page_size = 0x400; - } - ap = (desc >> 4) & 3; - break; - default: - /* Never happens, but compiler isn't smart enough to tell. */ - abort(); - } - } - *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); - *prot |= *prot ? PAGE_EXEC : 0; - if (!(*prot & (1 << access_type))) { - /* Access permission fault. */ - fi->type = ARMFault_Permission; - goto do_fault; - } - *phys_ptr = phys_addr; - return false; -do_fault: - fi->domain = domain; - fi->level = level; - return true; + return stage2 ? cpu_isar_feature(aa64_tgran64_2, cpu) + : cpu_isar_feature(aa64_tgran64, cpu); } -static bool get_phys_addr_v6(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, - target_ulong *page_size, ARMMMUFaultInfo *fi) +static ARMGranuleSize sanitize_gran_size(ARMCPU *cpu, ARMGranuleSize gran, + bool stage2) { - CPUState *cs = env_cpu(env); - ARMCPU *cpu = env_archcpu(env); - int level = 1; - uint32_t table; - uint32_t desc; - uint32_t xn; - uint32_t pxn = 0; - int type; - int ap; - int domain = 0; - int domain_prot; - hwaddr phys_addr; - uint32_t dacr; - bool ns; - - /* Pagetable walk. */ - /* Lookup l1 descriptor. */ - if (!get_level1_table_address(env, mmu_idx, &table, address)) { - /* Section translation fault if page walk is disabled by PD0 or PD1 */ - fi->type = ARMFault_Translation; - goto do_fault; - } - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), - mmu_idx, fi); - if (fi->type != ARMFault_None) { - goto do_fault; - } - type = (desc & 3); - if (type == 0 || (type == 3 && !cpu_isar_feature(aa32_pxn, cpu))) { - /* Section translation fault, or attempt to use the encoding - * which is Reserved on implementations without PXN. - */ - fi->type = ARMFault_Translation; - goto do_fault; - } - if ((type == 1) || !(desc & (1 << 18))) { - /* Page or Section. */ - domain = (desc >> 5) & 0x0f; - } - if (regime_el(env, mmu_idx) == 1) { - dacr = env->cp15.dacr_ns; - } else { - dacr = env->cp15.dacr_s; - } - if (type == 1) { - level = 2; - } - domain_prot = (dacr >> (domain * 2)) & 3; - if (domain_prot == 0 || domain_prot == 2) { - /* Section or Page domain fault */ - fi->type = ARMFault_Domain; - goto do_fault; - } - if (type != 1) { - if (desc & (1 << 18)) { - /* Supersection. */ - phys_addr = (desc & 0xff000000) | (address & 0x00ffffff); - phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32; - phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36; - *page_size = 0x1000000; - } else { - /* Section. */ - phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); - *page_size = 0x100000; - } - ap = ((desc >> 10) & 3) | ((desc >> 13) & 4); - xn = desc & (1 << 4); - pxn = desc & 1; - ns = extract32(desc, 19, 1); - } else { - if (cpu_isar_feature(aa32_pxn, cpu)) { - pxn = (desc >> 2) & 1; - } - ns = extract32(desc, 3, 1); - /* Lookup l2 entry. */ - table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); - desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx), - mmu_idx, fi); - if (fi->type != ARMFault_None) { - goto do_fault; - } - ap = ((desc >> 4) & 3) | ((desc >> 7) & 4); - switch (desc & 3) { - case 0: /* Page translation fault. */ - fi->type = ARMFault_Translation; - goto do_fault; - case 1: /* 64k page. */ - phys_addr = (desc & 0xffff0000) | (address & 0xffff); - xn = desc & (1 << 15); - *page_size = 0x10000; - break; - case 2: case 3: /* 4k page. */ - phys_addr = (desc & 0xfffff000) | (address & 0xfff); - xn = desc & 1; - *page_size = 0x1000; - break; - default: - /* Never happens, but compiler isn't smart enough to tell. */ - abort(); - } - } - if (domain_prot == 3) { - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - } else { - if (pxn && !regime_is_user(env, mmu_idx)) { - xn = 1; - } - if (xn && access_type == MMU_INST_FETCH) { - fi->type = ARMFault_Permission; - goto do_fault; - } - - if (arm_feature(env, ARM_FEATURE_V6K) && - (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) { - /* The simplified model uses AP[0] as an access control bit. */ - if ((ap & 1) == 0) { - /* Access flag fault. */ - fi->type = ARMFault_AccessFlag; - goto do_fault; - } - *prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); - } else { - *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); - } - if (*prot && !xn) { - *prot |= PAGE_EXEC; - } - if (!(*prot & (1 << access_type))) { - /* Access permission fault. */ - fi->type = ARMFault_Permission; - goto do_fault; - } - } - if (ns) { - /* The NS bit will (as required by the architecture) have no effect if - * the CPU doesn't support TZ or this is a non-secure translation - * regime, because the attribute will already be non-secure. - */ - attrs->secure = false; - } - *phys_ptr = phys_addr; - return false; -do_fault: - fi->domain = domain; - fi->level = level; - return true; -} - -/* - * check_s2_mmu_setup - * @cpu: ARMCPU - * @is_aa64: True if the translation regime is in AArch64 state - * @startlevel: Suggested starting level - * @inputsize: Bitsize of IPAs - * @stride: Page-table stride (See the ARM ARM) - * - * Returns true if the suggested S2 translation parameters are OK and - * false otherwise. - */ -static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level, - int inputsize, int stride) -{ - const int grainsize = stride + 3; - int startsizecheck; - - /* Negative levels are never allowed. */ - if (level < 0) { - return false; - } - - startsizecheck = inputsize - ((3 - level) * stride + grainsize); - if (startsizecheck < 1 || startsizecheck > stride + 4) { - return false; - } - - if (is_aa64) { - CPUARMState *env = &cpu->env; - unsigned int pamax = arm_pamax(cpu); - - switch (stride) { - case 13: /* 64KB Pages. */ - if (level == 0 || (level == 1 && pamax <= 42)) { - return false; - } - break; - case 11: /* 16KB Pages. */ - if (level == 0 || (level == 1 && pamax <= 40)) { - return false; - } - break; - case 9: /* 4KB Pages. */ - if (level == 0 && pamax <= 42) { - return false; - } - break; - default: - g_assert_not_reached(); + switch (gran) { + case Gran4K: + if (have4k(cpu, stage2)) { + return gran; } - - /* Inputsize checks. */ - if (inputsize > pamax && - (arm_el_is_aa64(env, 1) || inputsize > 40)) { - /* This is CONSTRAINED UNPREDICTABLE and we choose to fault. */ - return false; - } - } else { - /* AArch32 only supports 4KB pages. Assert on that. */ - assert(stride == 9); - - if (level == 0) { - return false; + break; + case Gran16K: + if (have16k(cpu, stage2)) { + return gran; } - } - return true; -} - -/* Translate from the 4-bit stage 2 representation of - * memory attributes (without cache-allocation hints) to - * the 8-bit representation of the stage 1 MAIR registers - * (which includes allocation hints). - * - * ref: shared/translation/attrs/S2AttrDecode() - * .../S2ConvertAttrsHints() - */ -static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs) -{ - uint8_t hiattr = extract32(s2attrs, 2, 2); - uint8_t loattr = extract32(s2attrs, 0, 2); - uint8_t hihint = 0, lohint = 0; - - if (hiattr != 0) { /* normal memory */ - if (arm_hcr_el2_eff(env) & HCR_CD) { /* cache disabled */ - hiattr = loattr = 1; /* non-cacheable */ - } else { - if (hiattr != 1) { /* Write-through or write-back */ - hihint = 3; /* RW allocate */ - } - if (loattr != 1) { /* Write-through or write-back */ - lohint = 3; /* RW allocate */ - } + break; + case Gran64K: + if (have64k(cpu, stage2)) { + return gran; } + break; + case GranInvalid: + break; } - - return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint; -} -#endif /* !CONFIG_USER_ONLY */ - -static int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx) -{ - if (regime_has_2_ranges(mmu_idx)) { - return extract64(tcr, 37, 2); - } else if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { - return 0; /* VTCR_EL2 */ - } else { - /* Replicate the single TBI bit so we always have 2 bits. */ - return extract32(tcr, 20, 1) * 3; + /* + * If the guest selects a granule size that isn't implemented, + * the architecture requires that we behave as if it selected one + * that is (with an IMPDEF choice of which one to pick). We choose + * to implement the smallest supported granule size. + */ + if (have4k(cpu, stage2)) { + return Gran4K; } -} - -static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) -{ - if (regime_has_2_ranges(mmu_idx)) { - return extract64(tcr, 51, 2); - } else if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { - return 0; /* VTCR_EL2 */ - } else { - /* Replicate the single TBID bit so we always have 2 bits. */ - return extract32(tcr, 29, 1) * 3; - } -} - -static int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx) -{ - if (regime_has_2_ranges(mmu_idx)) { - return extract64(tcr, 57, 2); - } else { - /* Replicate the single TCMA bit so we always have 2 bits. */ - return extract32(tcr, 30, 1) * 3; + if (have16k(cpu, stage2)) { + return Gran16K; } + assert(have64k(cpu, stage2)); + return Gran64K; } ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, - ARMMMUIdx mmu_idx, bool data) + ARMMMUIdx mmu_idx, bool data, + bool el1_is_aa32) { - uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; - bool epd, hpd, using16k, using64k; - int select, tsz, tbi, max_tsz; + uint64_t tcr = regime_tcr(env, mmu_idx); + bool epd, hpd, tsz_oob, ds, ha, hd; + int select, tsz, tbi, max_tsz, min_tsz, ps, sh; + ARMGranuleSize gran; + ARMCPU *cpu = env_archcpu(env); + bool stage2 = regime_is_stage2(mmu_idx); if (!regime_has_2_ranges(mmu_idx)) { select = 0; tsz = extract32(tcr, 0, 6); - using64k = extract32(tcr, 14, 1); - using16k = extract32(tcr, 15, 1); - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { + gran = tg0_to_gran_size(extract32(tcr, 14, 2)); + if (stage2) { /* VTCR_EL2 */ hpd = false; } else { hpd = extract32(tcr, 24, 1); } epd = false; + sh = extract32(tcr, 12, 2); + ps = extract32(tcr, 16, 3); + ha = extract32(tcr, 21, 1) && cpu_isar_feature(aa64_hafs, cpu); + hd = extract32(tcr, 22, 1) && cpu_isar_feature(aa64_hdbs, cpu); + ds = extract64(tcr, 32, 1); } else { + bool e0pd; + /* * Bit 55 is always between the two regions, and is canonical for * determining if address tagging is enabled. @@ -11413,1462 +11931,115 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, select = extract64(va, 55, 1); if (!select) { tsz = extract32(tcr, 0, 6); + gran = tg0_to_gran_size(extract32(tcr, 14, 2)); epd = extract32(tcr, 7, 1); - using64k = extract32(tcr, 14, 1); - using16k = extract32(tcr, 15, 1); + sh = extract32(tcr, 12, 2); hpd = extract64(tcr, 41, 1); + e0pd = extract64(tcr, 55, 1); } else { - int tg = extract32(tcr, 30, 2); - using16k = tg == 1; - using64k = tg == 3; tsz = extract32(tcr, 16, 6); + gran = tg1_to_gran_size(extract32(tcr, 30, 2)); epd = extract32(tcr, 23, 1); + sh = extract32(tcr, 28, 2); hpd = extract64(tcr, 42, 1); + e0pd = extract64(tcr, 56, 1); } - } - - if (cpu_isar_feature(aa64_st, env_archcpu(env))) { - max_tsz = 48 - using64k; - } else { - max_tsz = 39; - } - - tsz = MIN(tsz, max_tsz); - tsz = MAX(tsz, 16); /* TODO: ARMv8.2-LVA */ - - /* Present TBI as a composite with TBID. */ - tbi = aa64_va_parameter_tbi(tcr, mmu_idx); - if (!data) { - tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); - } - tbi = (tbi >> select) & 1; - - return (ARMVAParameters) { - .tsz = tsz, - .select = select, - .tbi = tbi, - .epd = epd, - .hpd = hpd, - .using16k = using16k, - .using64k = using64k, - }; -} - -#ifndef CONFIG_USER_ONLY -static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, - ARMMMUIdx mmu_idx) -{ - uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; - uint32_t el = regime_el(env, mmu_idx); - int select, tsz; - bool epd, hpd; - - assert(mmu_idx != ARMMMUIdx_Stage2_S); - - if (mmu_idx == ARMMMUIdx_Stage2) { - /* VTCR */ - bool sext = extract32(tcr, 4, 1); - bool sign = extract32(tcr, 3, 1); - - /* - * If the sign-extend bit is not the same as t0sz[3], the result - * is unpredictable. Flag this as a guest error. - */ - if (sign != sext) { - qemu_log_mask(LOG_GUEST_ERROR, - "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n"); - } - tsz = sextract32(tcr, 0, 4) + 8; - select = 0; - hpd = false; - epd = false; - } else if (el == 2) { - /* HTCR */ - tsz = extract32(tcr, 0, 3); - select = 0; - hpd = extract64(tcr, 24, 1); - epd = false; - } else { - int t0sz = extract32(tcr, 0, 3); - int t1sz = extract32(tcr, 16, 3); + ps = extract64(tcr, 32, 3); + ha = extract64(tcr, 39, 1) && cpu_isar_feature(aa64_hafs, cpu); + hd = extract64(tcr, 40, 1) && cpu_isar_feature(aa64_hdbs, cpu); + ds = extract64(tcr, 59, 1); - if (t1sz == 0) { - select = va > (0xffffffffu >> t0sz); - } else { - /* Note that we will detect errors later. */ - select = va >= ~(0xffffffffu >> t1sz); - } - if (!select) { - tsz = t0sz; - epd = extract32(tcr, 7, 1); - hpd = extract64(tcr, 41, 1); - } else { - tsz = t1sz; - epd = extract32(tcr, 23, 1); - hpd = extract64(tcr, 42, 1); + if (e0pd && cpu_isar_feature(aa64_e0pd, cpu) && + regime_is_user(env, mmu_idx)) { + epd = true; } - /* For aarch32, hpd0 is not enabled without t2e as well. */ - hpd &= extract32(tcr, 6, 1); } - return (ARMVAParameters) { - .tsz = tsz, - .select = select, - .epd = epd, - .hpd = hpd, - }; -} + gran = sanitize_gran_size(cpu, gran, stage2); -/** - * get_phys_addr_lpae: perform one stage of page table walk, LPAE format - * - * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, - * prot and page_size may not be filled in, and the populated fsr value provides - * information on why the translation aborted, in the format of a long-format - * DFSR/IFSR fault register, with the following caveats: - * * the WnR bit is never set (the caller must do this). - * - * @env: CPUARMState - * @address: virtual address to get physical address for - * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH - * @mmu_idx: MMU index indicating required translation regime - * @s1_is_el0: if @mmu_idx is ARMMMUIdx_Stage2 (so this is a stage 2 page table - * walk), must be true if this is stage 2 of a stage 1+2 walk for an - * EL0 access). If @mmu_idx is anything else, @s1_is_el0 is ignored. - * @phys_ptr: set to the physical address corresponding to the virtual address - * @attrs: set to the memory transaction attributes to use - * @prot: set to the permissions for the page containing phys_ptr - * @page_size_ptr: set to the size of the page containing phys_ptr - * @fi: set to fault info if the translation fails - * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes - */ -static bool get_phys_addr_lpae(CPUARMState *env, uint64_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - bool s1_is_el0, - hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, - target_ulong *page_size_ptr, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) -{ - ARMCPU *cpu = env_archcpu(env); - CPUState *cs = CPU(cpu); - /* Read an LPAE long-descriptor translation table. */ - ARMFaultType fault_type = ARMFault_Translation; - uint32_t level; - ARMVAParameters param; - uint64_t ttbr; - hwaddr descaddr, indexmask, indexmask_grainsize; - uint32_t tableattrs; - target_ulong page_size; - uint32_t attrs; - int32_t stride; - int addrsize, inputsize; - TCR *tcr = regime_tcr(env, mmu_idx); - int ap, ns, xn, pxn; - uint32_t el = regime_el(env, mmu_idx); - uint64_t descaddrmask; - bool aarch64 = arm_el_is_aa64(env, el); - bool guarded = false; - - /* TODO: This code does not support shareability levels. */ - if (aarch64) { - param = aa64_va_parameters(env, address, mmu_idx, - access_type != MMU_INST_FETCH); - level = 0; - addrsize = 64 - 8 * param.tbi; - inputsize = 64 - param.tsz; + if (cpu_isar_feature(aa64_st, cpu)) { + max_tsz = 48 - (gran == Gran64K); } else { - param = aa32_va_parameters(env, address, mmu_idx); - level = 1; - addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32); - inputsize = addrsize - param.tsz; + max_tsz = 39; } /* - * We determined the region when collecting the parameters, but we - * have not yet validated that the address is valid for the region. - * Extract the top bits and verify that they all match select. - * - * For aa32, if inputsize == addrsize, then we have selected the - * region by exclusion in aa32_va_parameters and there is no more - * validation to do here. - */ - if (inputsize < addrsize) { - target_ulong top_bits = sextract64(address, inputsize, - addrsize - inputsize); - if (-top_bits != param.select) { - /* The gap between the two regions is a Translation fault */ - fault_type = ARMFault_Translation; - goto do_fault; - } - } - - if (param.using64k) { - stride = 13; - } else if (param.using16k) { - stride = 11; - } else { - stride = 9; - } - - /* Note that QEMU ignores shareability and cacheability attributes, - * so we don't need to do anything with the SH, ORGN, IRGN fields - * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the - * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently - * implement any ASID-like capability so we can ignore it (instead - * we will always flush the TLB any time the ASID is changed). - */ - ttbr = regime_ttbr(env, mmu_idx, param.select); - - /* Here we should have set up all the parameters for the translation: - * inputsize, ttbr, epd, stride, tbi - */ - - if (param.epd) { - /* Translation table walk disabled => Translation fault on TLB miss - * Note: This is always 0 on 64-bit EL2 and EL3. - */ - goto do_fault; - } - - if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { - /* The starting level depends on the virtual address size (which can - * be up to 48 bits) and the translation granule size. It indicates - * the number of strides (stride bits at a time) needed to - * consume the bits of the input address. In the pseudocode this is: - * level = 4 - RoundUp((inputsize - grainsize) / stride) - * where their 'inputsize' is our 'inputsize', 'grainsize' is - * our 'stride + 3' and 'stride' is our 'stride'. - * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying: - * = 4 - (inputsize - stride - 3 + stride - 1) / stride - * = 4 - (inputsize - 4) / stride; - */ - level = 4 - (inputsize - 4) / stride; - } else { - /* For stage 2 translations the starting level is specified by the - * VTCR_EL2.SL0 field (whose interpretation depends on the page size) - */ - uint32_t sl0 = extract32(tcr->raw_tcr, 6, 2); - uint32_t startlevel; - bool ok; - - if (!aarch64 || stride == 9) { - /* AArch32 or 4KB pages */ - startlevel = 2 - sl0; - - if (cpu_isar_feature(aa64_st, cpu)) { - startlevel &= 3; + * DS is RES0 unless FEAT_LPA2 is supported for the given page size; + * adjust the effective value of DS, as documented. + */ + min_tsz = 16; + if (gran == Gran64K) { + if (cpu_isar_feature(aa64_lva, cpu)) { + min_tsz = 12; + } + ds = false; + } else if (ds) { + if (regime_is_stage2(mmu_idx)) { + if (gran == Gran16K) { + ds = cpu_isar_feature(aa64_tgran16_2_lpa2, cpu); + } else { + ds = cpu_isar_feature(aa64_tgran4_2_lpa2, cpu); } } else { - /* 16KB or 64KB pages */ - startlevel = 3 - sl0; - } - - /* Check that the starting level is valid. */ - ok = check_s2_mmu_setup(cpu, aarch64, startlevel, - inputsize, stride); - if (!ok) { - fault_type = ARMFault_Translation; - goto do_fault; - } - level = startlevel; - } - - indexmask_grainsize = (1ULL << (stride + 3)) - 1; - indexmask = (1ULL << (inputsize - (stride * (4 - level)))) - 1; - - /* Now we can extract the actual base address from the TTBR */ - descaddr = extract64(ttbr, 0, 48); - /* - * We rely on this masking to clear the RES0 bits at the bottom of the TTBR - * and also to mask out CnP (bit 0) which could validly be non-zero. - */ - descaddr &= ~indexmask; - - /* The address field in the descriptor goes up to bit 39 for ARMv7 - * but up to bit 47 for ARMv8, but we use the descaddrmask - * up to bit 39 for AArch32, because we don't need other bits in that case - * to construct next descriptor address (anyway they should be all zeroes). - */ - descaddrmask = ((1ull << (aarch64 ? 48 : 40)) - 1) & - ~indexmask_grainsize; - - /* Secure accesses start with the page table in secure memory and - * can be downgraded to non-secure at any step. Non-secure accesses - * remain non-secure. We implement this by just ORing in the NSTable/NS - * bits at each step. - */ - tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4); - for (;;) { - uint64_t descriptor; - bool nstable; - - descaddr |= (address >> (stride * (4 - level))) & indexmask; - descaddr &= ~7ULL; - nstable = extract32(tableattrs, 4, 1); - descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi); - if (fi->type != ARMFault_None) { - goto do_fault; - } - - if (!(descriptor & 1) || - (!(descriptor & 2) && (level == 3))) { - /* Invalid, or the Reserved level 3 encoding */ - goto do_fault; - } - descaddr = descriptor & descaddrmask; - - if ((descriptor & 2) && (level < 3)) { - /* Table entry. The top five bits are attributes which may - * propagate down through lower levels of the table (and - * which are all arranged so that 0 means "no effect", so - * we can gather them up by ORing in the bits at each level). - */ - tableattrs |= extract64(descriptor, 59, 5); - level++; - indexmask = indexmask_grainsize; - continue; - } - /* Block entry at level 1 or 2, or page entry at level 3. - * These are basically the same thing, although the number - * of bits we pull in from the vaddr varies. - */ - page_size = (1ULL << ((stride * (4 - level)) + 3)); - descaddr |= (address & (page_size - 1)); - /* Extract attributes from the descriptor */ - attrs = extract64(descriptor, 2, 10) - | (extract64(descriptor, 52, 12) << 10); - - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { - /* Stage 2 table descriptors do not include any attribute fields */ - break; - } - /* Merge in attributes from table descriptors */ - attrs |= nstable << 3; /* NS */ - guarded = extract64(descriptor, 50, 1); /* GP */ - if (param.hpd) { - /* HPD disables all the table attributes except NSTable. */ - break; - } - attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */ - /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 - * means "force PL1 access only", which means forcing AP[1] to 0. - */ - attrs &= ~(extract32(tableattrs, 2, 1) << 4); /* !APT[0] => AP[1] */ - attrs |= extract32(tableattrs, 3, 1) << 5; /* APT[1] => AP[2] */ - break; - } - /* Here descaddr is the final physical address, and attributes - * are all in attrs. - */ - fault_type = ARMFault_AccessFlag; - if ((attrs & (1 << 8)) == 0) { - /* Access flag */ - goto do_fault; - } - - ap = extract32(attrs, 4, 2); - - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { - ns = mmu_idx == ARMMMUIdx_Stage2; - xn = extract32(attrs, 11, 2); - *prot = get_S2prot(env, ap, xn, s1_is_el0); - } else { - ns = extract32(attrs, 3, 1); - xn = extract32(attrs, 12, 1); - pxn = extract32(attrs, 11, 1); - *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn); - } - - fault_type = ARMFault_Permission; - if (!(*prot & (1 << access_type))) { - goto do_fault; - } - - if (ns) { - /* The NS bit will (as required by the architecture) have no effect if - * the CPU doesn't support TZ or this is a non-secure translation - * regime, because the attribute will already be non-secure. - */ - txattrs->secure = false; - } - /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */ - if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) { - arm_tlb_bti_gp(txattrs) = true; - } - - if (mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S) { - cacheattrs->attrs = convert_stage2_attrs(env, extract32(attrs, 0, 4)); - } else { - /* Index into MAIR registers for cache attributes */ - uint8_t attrindx = extract32(attrs, 0, 3); - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; - assert(attrindx <= 7); - cacheattrs->attrs = extract64(mair, attrindx * 8, 8); - } - cacheattrs->shareability = extract32(attrs, 6, 2); - - *phys_ptr = descaddr; - *page_size_ptr = page_size; - return false; - -do_fault: - fi->type = fault_type; - fi->level = level; - /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */ - fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_Stage2 || - mmu_idx == ARMMMUIdx_Stage2_S); - fi->s1ns = mmu_idx == ARMMMUIdx_Stage2; - return true; -} - -static inline void get_phys_addr_pmsav7_default(CPUARMState *env, - ARMMMUIdx mmu_idx, - int32_t address, int *prot) -{ - if (!arm_feature(env, ARM_FEATURE_M)) { - *prot = PAGE_READ | PAGE_WRITE; - switch (address) { - case 0xF0000000 ... 0xFFFFFFFF: - if (regime_sctlr(env, mmu_idx) & SCTLR_V) { - /* hivecs execing is ok */ - *prot |= PAGE_EXEC; - } - break; - case 0x00000000 ... 0x7FFFFFFF: - *prot |= PAGE_EXEC; - break; - } - } else { - /* Default system address map for M profile cores. - * The architecture specifies which regions are execute-never; - * at the MPU level no other checks are defined. - */ - switch (address) { - case 0x00000000 ... 0x1fffffff: /* ROM */ - case 0x20000000 ... 0x3fffffff: /* SRAM */ - case 0x60000000 ... 0x7fffffff: /* RAM */ - case 0x80000000 ... 0x9fffffff: /* RAM */ - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - break; - case 0x40000000 ... 0x5fffffff: /* Peripheral */ - case 0xa0000000 ... 0xbfffffff: /* Device */ - case 0xc0000000 ... 0xdfffffff: /* Device */ - case 0xe0000000 ... 0xffffffff: /* System */ - *prot = PAGE_READ | PAGE_WRITE; - break; - default: - g_assert_not_reached(); - } - } -} - -static bool pmsav7_use_background_region(ARMCPU *cpu, - ARMMMUIdx mmu_idx, bool is_user) -{ - /* Return true if we should use the default memory map as a - * "background" region if there are no hits against any MPU regions. - */ - CPUARMState *env = &cpu->env; - - if (is_user) { - return false; - } - - if (arm_feature(env, ARM_FEATURE_M)) { - return env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] - & R_V7M_MPU_CTRL_PRIVDEFENA_MASK; - } else { - return regime_sctlr(env, mmu_idx) & SCTLR_BR; - } -} - -static inline bool m_is_ppb_region(CPUARMState *env, uint32_t address) -{ - /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */ - return arm_feature(env, ARM_FEATURE_M) && - extract32(address, 20, 12) == 0xe00; -} - -static inline bool m_is_system_region(CPUARMState *env, uint32_t address) -{ - /* True if address is in the M profile system region - * 0xe0000000 - 0xffffffff - */ - return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7; -} - -static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, int *prot, - target_ulong *page_size, - ARMMMUFaultInfo *fi) -{ - ARMCPU *cpu = env_archcpu(env); - int n; - bool is_user = regime_is_user(env, mmu_idx); - - *phys_ptr = address; - *page_size = TARGET_PAGE_SIZE; - *prot = 0; - - if (regime_translation_disabled(env, mmu_idx) || - m_is_ppb_region(env, address)) { - /* MPU disabled or M profile PPB access: use default memory map. - * The other case which uses the default memory map in the - * v7M ARM ARM pseudocode is exception vector reads from the vector - * table. In QEMU those accesses are done in arm_v7m_load_vector(), - * which always does a direct read using address_space_ldl(), rather - * than going via this function, so we don't need to check that here. - */ - get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); - } else { /* MPU enabled */ - for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { - /* region search */ - uint32_t base = env->pmsav7.drbar[n]; - uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5); - uint32_t rmask; - bool srdis = false; - - if (!(env->pmsav7.drsr[n] & 0x1)) { - continue; - } - - if (!rsize) { - qemu_log_mask(LOG_GUEST_ERROR, - "DRSR[%d]: Rsize field cannot be 0\n", n); - continue; - } - rsize++; - rmask = (1ull << rsize) - 1; - - if (base & rmask) { - qemu_log_mask(LOG_GUEST_ERROR, - "DRBAR[%d]: 0x%" PRIx32 " misaligned " - "to DRSR region size, mask = 0x%" PRIx32 "\n", - n, base, rmask); - continue; - } - - if (address < base || address > base + rmask) { - /* - * Address not in this region. We must check whether the - * region covers addresses in the same page as our address. - * In that case we must not report a size that covers the - * whole page for a subsequent hit against a different MPU - * region or the background region, because it would result in - * incorrect TLB hits for subsequent accesses to addresses that - * are in this MPU region. - */ - if (ranges_overlap(base, rmask, - address & TARGET_PAGE_MASK, - TARGET_PAGE_SIZE)) { - *page_size = 1; - } - continue; - } - - /* Region matched */ - - if (rsize >= 8) { /* no subregions for regions < 256 bytes */ - int i, snd; - uint32_t srdis_mask; - - rsize -= 3; /* sub region size (power of 2) */ - snd = ((address - base) >> rsize) & 0x7; - srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1); - - srdis_mask = srdis ? 0x3 : 0x0; - for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) { - /* This will check in groups of 2, 4 and then 8, whether - * the subregion bits are consistent. rsize is incremented - * back up to give the region size, considering consistent - * adjacent subregions as one region. Stop testing if rsize - * is already big enough for an entire QEMU page. - */ - int snd_rounded = snd & ~(i - 1); - uint32_t srdis_multi = extract32(env->pmsav7.drsr[n], - snd_rounded + 8, i); - if (srdis_mask ^ srdis_multi) { - break; - } - srdis_mask = (srdis_mask << i) | srdis_mask; - rsize++; - } - } - if (srdis) { - continue; - } - if (rsize < TARGET_PAGE_BITS) { - *page_size = 1 << rsize; - } - break; - } - - if (n == -1) { /* no hits */ - if (!pmsav7_use_background_region(cpu, mmu_idx, is_user)) { - /* background fault */ - fi->type = ARMFault_Background; - return true; - } - get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); - } else { /* a MPU hit! */ - uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3); - uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1); - - if (m_is_system_region(env, address)) { - /* System space is always execute never */ - xn = 1; - } - - if (is_user) { /* User mode AP bit decoding */ - switch (ap) { - case 0: - case 1: - case 5: - break; /* no access */ - case 3: - *prot |= PAGE_WRITE; - /* fall through */ - case 2: - case 6: - *prot |= PAGE_READ | PAGE_EXEC; - break; - case 7: - /* for v7M, same as 6; for R profile a reserved value */ - if (arm_feature(env, ARM_FEATURE_M)) { - *prot |= PAGE_READ | PAGE_EXEC; - break; - } - /* fall through */ - default: - qemu_log_mask(LOG_GUEST_ERROR, - "DRACR[%d]: Bad value for AP bits: 0x%" - PRIx32 "\n", n, ap); - } - } else { /* Priv. mode AP bits decoding */ - switch (ap) { - case 0: - break; /* no access */ - case 1: - case 2: - case 3: - *prot |= PAGE_WRITE; - /* fall through */ - case 5: - case 6: - *prot |= PAGE_READ | PAGE_EXEC; - break; - case 7: - /* for v7M, same as 6; for R profile a reserved value */ - if (arm_feature(env, ARM_FEATURE_M)) { - *prot |= PAGE_READ | PAGE_EXEC; - break; - } - /* fall through */ - default: - qemu_log_mask(LOG_GUEST_ERROR, - "DRACR[%d]: Bad value for AP bits: 0x%" - PRIx32 "\n", n, ap); - } - } - - /* execute never */ - if (xn) { - *prot &= ~PAGE_EXEC; - } - } - } - - fi->type = ARMFault_Permission; - fi->level = 1; - return !(*prot & (1 << access_type)); -} - -static bool v8m_is_sau_exempt(CPUARMState *env, - uint32_t address, MMUAccessType access_type) -{ - /* The architecture specifies that certain address ranges are - * exempt from v8M SAU/IDAU checks. - */ - return - (access_type == MMU_INST_FETCH && m_is_system_region(env, address)) || - (address >= 0xe0000000 && address <= 0xe0002fff) || - (address >= 0xe000e000 && address <= 0xe000efff) || - (address >= 0xe002e000 && address <= 0xe002efff) || - (address >= 0xe0040000 && address <= 0xe0041fff) || - (address >= 0xe00ff000 && address <= 0xe00fffff); -} - -void v8m_security_lookup(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - V8M_SAttributes *sattrs) -{ - /* Look up the security attributes for this address. Compare the - * pseudocode SecurityCheck() function. - * We assume the caller has zero-initialized *sattrs. - */ - ARMCPU *cpu = env_archcpu(env); - int r; - bool idau_exempt = false, idau_ns = true, idau_nsc = true; - int idau_region = IREGION_NOTVALID; - uint32_t addr_page_base = address & TARGET_PAGE_MASK; - uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); - - if (cpu->idau) { - IDAUInterfaceClass *iic = IDAU_INTERFACE_GET_CLASS(cpu->idau); - IDAUInterface *ii = IDAU_INTERFACE(cpu->idau); - - iic->check(ii, address, &idau_region, &idau_exempt, &idau_ns, - &idau_nsc); - } - - if (access_type == MMU_INST_FETCH && extract32(address, 28, 4) == 0xf) { - /* 0xf0000000..0xffffffff is always S for insn fetches */ - return; - } - - if (idau_exempt || v8m_is_sau_exempt(env, address, access_type)) { - sattrs->ns = !regime_is_secure(env, mmu_idx); - return; - } - - if (idau_region != IREGION_NOTVALID) { - sattrs->irvalid = true; - sattrs->iregion = idau_region; - } - - switch (env->sau.ctrl & 3) { - case 0: /* SAU.ENABLE == 0, SAU.ALLNS == 0 */ - break; - case 2: /* SAU.ENABLE == 0, SAU.ALLNS == 1 */ - sattrs->ns = true; - break; - default: /* SAU.ENABLE == 1 */ - for (r = 0; r < cpu->sau_sregion; r++) { - if (env->sau.rlar[r] & 1) { - uint32_t base = env->sau.rbar[r] & ~0x1f; - uint32_t limit = env->sau.rlar[r] | 0x1f; - - if (base <= address && limit >= address) { - if (base > addr_page_base || limit < addr_page_limit) { - sattrs->subpage = true; - } - if (sattrs->srvalid) { - /* If we hit in more than one region then we must report - * as Secure, not NS-Callable, with no valid region - * number info. - */ - sattrs->ns = false; - sattrs->nsc = false; - sattrs->sregion = 0; - sattrs->srvalid = false; - break; - } else { - if (env->sau.rlar[r] & 2) { - sattrs->nsc = true; - } else { - sattrs->ns = true; - } - sattrs->srvalid = true; - sattrs->sregion = r; - } - } else { - /* - * Address not in this region. We must check whether the - * region covers addresses in the same page as our address. - * In that case we must not report a size that covers the - * whole page for a subsequent hit against a different MPU - * region or the background region, because it would result - * in incorrect TLB hits for subsequent accesses to - * addresses that are in this MPU region. - */ - if (limit >= base && - ranges_overlap(base, limit - base + 1, - addr_page_base, - TARGET_PAGE_SIZE)) { - sattrs->subpage = true; - } - } + if (gran == Gran16K) { + ds = cpu_isar_feature(aa64_tgran16_lpa2, cpu); + } else { + ds = cpu_isar_feature(aa64_tgran4_lpa2, cpu); } } - break; - } - - /* - * The IDAU will override the SAU lookup results if it specifies - * higher security than the SAU does. - */ - if (!idau_ns) { - if (sattrs->ns || (!idau_nsc && sattrs->nsc)) { - sattrs->ns = false; - sattrs->nsc = idau_nsc; + if (ds) { + min_tsz = 12; } } -} - -bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, MemTxAttrs *txattrs, - int *prot, bool *is_subpage, - ARMMMUFaultInfo *fi, uint32_t *mregion) -{ - /* Perform a PMSAv8 MPU lookup (without also doing the SAU check - * that a full phys-to-virt translation does). - * mregion is (if not NULL) set to the region number which matched, - * or -1 if no region number is returned (MPU off, address did not - * hit a region, address hit in multiple regions). - * We set is_subpage to true if the region hit doesn't cover the - * entire TARGET_PAGE the address is within. - */ - ARMCPU *cpu = env_archcpu(env); - bool is_user = regime_is_user(env, mmu_idx); - uint32_t secure = regime_is_secure(env, mmu_idx); - int n; - int matchregion = -1; - bool hit = false; - uint32_t addr_page_base = address & TARGET_PAGE_MASK; - uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); - - *is_subpage = false; - *phys_ptr = address; - *prot = 0; - if (mregion) { - *mregion = -1; - } - - /* Unlike the ARM ARM pseudocode, we don't need to check whether this - * was an exception vector read from the vector table (which is always - * done using the default system address map), because those accesses - * are done in arm_v7m_load_vector(), which always does a direct - * read using address_space_ldl(), rather than going via this function. - */ - if (regime_translation_disabled(env, mmu_idx)) { /* MPU disabled */ - hit = true; - } else if (m_is_ppb_region(env, address)) { - hit = true; - } else { - if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) { - hit = true; - } - for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { - /* region search */ - /* Note that the base address is bits [31:5] from the register - * with bits [4:0] all zeroes, but the limit address is bits - * [31:5] from the register with bits [4:0] all ones. - */ - uint32_t base = env->pmsav8.rbar[secure][n] & ~0x1f; - uint32_t limit = env->pmsav8.rlar[secure][n] | 0x1f; - - if (!(env->pmsav8.rlar[secure][n] & 0x1)) { - /* Region disabled */ - continue; - } - - if (address < base || address > limit) { - /* - * Address not in this region. We must check whether the - * region covers addresses in the same page as our address. - * In that case we must not report a size that covers the - * whole page for a subsequent hit against a different MPU - * region or the background region, because it would result in - * incorrect TLB hits for subsequent accesses to addresses that - * are in this MPU region. - */ - if (limit >= base && - ranges_overlap(base, limit - base + 1, - addr_page_base, - TARGET_PAGE_SIZE)) { - *is_subpage = true; - } - continue; - } - - if (base > addr_page_base || limit < addr_page_limit) { - *is_subpage = true; - } - - if (matchregion != -1) { - /* Multiple regions match -- always a failure (unlike - * PMSAv7 where highest-numbered-region wins) - */ - fi->type = ARMFault_Permission; - fi->level = 1; - return true; - } - - matchregion = n; - hit = true; - } - } - - if (!hit) { - /* background fault */ - fi->type = ARMFault_Background; - return true; - } - - if (matchregion == -1) { - /* hit using the background region */ - get_phys_addr_pmsav7_default(env, mmu_idx, address, prot); - } else { - uint32_t ap = extract32(env->pmsav8.rbar[secure][matchregion], 1, 2); - uint32_t xn = extract32(env->pmsav8.rbar[secure][matchregion], 0, 1); - bool pxn = false; - - if (arm_feature(env, ARM_FEATURE_V8_1M)) { - pxn = extract32(env->pmsav8.rlar[secure][matchregion], 4, 1); - } - - if (m_is_system_region(env, address)) { - /* System space is always execute never */ - xn = 1; - } - - *prot = simple_ap_to_rw_prot(env, mmu_idx, ap); - if (*prot && !xn && !(pxn && !is_user)) { - *prot |= PAGE_EXEC; - } - /* We don't need to look the attribute up in the MAIR0/MAIR1 - * registers because that only tells us about cacheability. + if (stage2 && el1_is_aa32) { + /* + * For AArch32 EL1 the min txsz (and thus max IPA size) requirements + * are loosened: a configured IPA of 40 bits is permitted even if + * the implemented PA is less than that (and so a 40 bit IPA would + * fault for an AArch64 EL1). See R_DTLMN. */ - if (mregion) { - *mregion = matchregion; - } - } - - fi->type = ARMFault_Permission; - fi->level = 1; - return !(*prot & (1 << access_type)); -} - - -static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, MemTxAttrs *txattrs, - int *prot, target_ulong *page_size, - ARMMMUFaultInfo *fi) -{ - uint32_t secure = regime_is_secure(env, mmu_idx); - V8M_SAttributes sattrs = {}; - bool ret; - bool mpu_is_subpage; - - if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { - v8m_security_lookup(env, address, access_type, mmu_idx, &sattrs); - if (access_type == MMU_INST_FETCH) { - /* Instruction fetches always use the MMU bank and the - * transaction attribute determined by the fetch address, - * regardless of CPU state. This is painful for QEMU - * to handle, because it would mean we need to encode - * into the mmu_idx not just the (user, negpri) information - * for the current security state but also that for the - * other security state, which would balloon the number - * of mmu_idx values needed alarmingly. - * Fortunately we can avoid this because it's not actually - * possible to arbitrarily execute code from memory with - * the wrong security attribute: it will always generate - * an exception of some kind or another, apart from the - * special case of an NS CPU executing an SG instruction - * in S&NSC memory. So we always just fail the translation - * here and sort things out in the exception handler - * (including possibly emulating an SG instruction). - */ - if (sattrs.ns != !secure) { - if (sattrs.nsc) { - fi->type = ARMFault_QEMU_NSCExec; - } else { - fi->type = ARMFault_QEMU_SFault; - } - *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE; - *phys_ptr = address; - *prot = 0; - return true; - } - } else { - /* For data accesses we always use the MMU bank indicated - * by the current CPU state, but the security attributes - * might downgrade a secure access to nonsecure. - */ - if (sattrs.ns) { - txattrs->secure = false; - } else if (!secure) { - /* NS access to S memory must fault. - * Architecturally we should first check whether the - * MPU information for this address indicates that we - * are doing an unaligned access to Device memory, which - * should generate a UsageFault instead. QEMU does not - * currently check for that kind of unaligned access though. - * If we added it we would need to do so as a special case - * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt(). - */ - fi->type = ARMFault_QEMU_SFault; - *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE; - *phys_ptr = address; - *prot = 0; - return true; - } - } + min_tsz = MIN(min_tsz, 24); } - ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, phys_ptr, - txattrs, prot, &mpu_is_subpage, fi, NULL); - *page_size = sattrs.subpage || mpu_is_subpage ? 1 : TARGET_PAGE_SIZE; - return ret; -} - -static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, int *prot, - ARMMMUFaultInfo *fi) -{ - int n; - uint32_t mask; - uint32_t base; - bool is_user = regime_is_user(env, mmu_idx); - - if (regime_translation_disabled(env, mmu_idx)) { - /* MPU disabled. */ - *phys_ptr = address; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - return false; - } - - *phys_ptr = address; - for (n = 7; n >= 0; n--) { - base = env->cp15.c6_region[n]; - if ((base & 1) == 0) { - continue; - } - mask = 1 << ((base >> 1) & 0x1f); - /* Keep this shift separate from the above to avoid an - (undefined) << 32. */ - mask = (mask << 1) - 1; - if (((base ^ address) & ~mask) == 0) { - break; - } - } - if (n < 0) { - fi->type = ARMFault_Background; - return true; - } - - if (access_type == MMU_INST_FETCH) { - mask = env->cp15.pmsav5_insn_ap; + if (tsz > max_tsz) { + tsz = max_tsz; + tsz_oob = true; + } else if (tsz < min_tsz) { + tsz = min_tsz; + tsz_oob = true; } else { - mask = env->cp15.pmsav5_data_ap; + tsz_oob = false; } - mask = (mask >> (n * 4)) & 0xf; - switch (mask) { - case 0: - fi->type = ARMFault_Permission; - fi->level = 1; - return true; - case 1: - if (is_user) { - fi->type = ARMFault_Permission; - fi->level = 1; - return true; - } - *prot = PAGE_READ | PAGE_WRITE; - break; - case 2: - *prot = PAGE_READ; - if (!is_user) { - *prot |= PAGE_WRITE; - } - break; - case 3: - *prot = PAGE_READ | PAGE_WRITE; - break; - case 5: - if (is_user) { - fi->type = ARMFault_Permission; - fi->level = 1; - return true; - } - *prot = PAGE_READ; - break; - case 6: - *prot = PAGE_READ; - break; - default: - /* Bad permission. */ - fi->type = ARMFault_Permission; - fi->level = 1; - return true; - } - *prot |= PAGE_EXEC; - return false; -} - -/* Combine either inner or outer cacheability attributes for normal - * memory, according to table D4-42 and pseudocode procedure - * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). - * - * NB: only stage 1 includes allocation hints (RW bits), leading to - * some asymmetry. - */ -static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) -{ - if (s1 == 4 || s2 == 4) { - /* non-cacheable has precedence */ - return 4; - } else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) { - /* stage 1 write-through takes precedence */ - return s1; - } else if (extract32(s2, 2, 2) == 2) { - /* stage 2 write-through takes precedence, but the allocation hint - * is still taken from stage 1 - */ - return (2 << 2) | extract32(s1, 0, 2); - } else { /* write-back */ - return s1; - } -} - -/* Combine S1 and S2 cacheability/shareability attributes, per D4.5.4 - * and CombineS1S2Desc() - * - * @s1: Attributes from stage 1 walk - * @s2: Attributes from stage 2 walk - */ -static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) -{ - uint8_t s1lo, s2lo, s1hi, s2hi; - ARMCacheAttrs ret; - bool tagged = false; - - if (s1.attrs == 0xf0) { - tagged = true; - s1.attrs = 0xff; - } - - s1lo = extract32(s1.attrs, 0, 4); - s2lo = extract32(s2.attrs, 0, 4); - s1hi = extract32(s1.attrs, 4, 4); - s2hi = extract32(s2.attrs, 4, 4); - /* Combine shareability attributes (table D4-43) */ - if (s1.shareability == 2 || s2.shareability == 2) { - /* if either are outer-shareable, the result is outer-shareable */ - ret.shareability = 2; - } else if (s1.shareability == 3 || s2.shareability == 3) { - /* if either are inner-shareable, the result is inner-shareable */ - ret.shareability = 3; - } else { - /* both non-shareable */ - ret.shareability = 0; - } - - /* Combine memory type and cacheability attributes */ - if (s1hi == 0 || s2hi == 0) { - /* Device has precedence over normal */ - if (s1lo == 0 || s2lo == 0) { - /* nGnRnE has precedence over anything */ - ret.attrs = 0; - } else if (s1lo == 4 || s2lo == 4) { - /* non-Reordering has precedence over Reordering */ - ret.attrs = 4; /* nGnRE */ - } else if (s1lo == 8 || s2lo == 8) { - /* non-Gathering has precedence over Gathering */ - ret.attrs = 8; /* nGRE */ - } else { - ret.attrs = 0xc; /* GRE */ - } - - /* Any location for which the resultant memory type is any - * type of Device memory is always treated as Outer Shareable. - */ - ret.shareability = 2; - } else { /* Normal memory */ - /* Outer/inner cacheability combine independently */ - ret.attrs = combine_cacheattr_nibble(s1hi, s2hi) << 4 - | combine_cacheattr_nibble(s1lo, s2lo); - - if (ret.attrs == 0x44) { - /* Any location for which the resultant memory type is Normal - * Inner Non-cacheable, Outer Non-cacheable is always treated - * as Outer Shareable. - */ - ret.shareability = 2; - } - } - - /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */ - if (tagged && ret.attrs == 0xff) { - ret.attrs = 0xf0; + /* Present TBI as a composite with TBID. */ + tbi = aa64_va_parameter_tbi(tcr, mmu_idx); + if (!data) { + tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); } + tbi = (tbi >> select) & 1; - return ret; + return (ARMVAParameters) { + .tsz = tsz, + .ps = ps, + .sh = sh, + .select = select, + .tbi = tbi, + .epd = epd, + .hpd = hpd, + .tsz_oob = tsz_oob, + .ds = ds, + .ha = ha, + .hd = ha && hd, + .gran = gran, + }; } - -/* get_phys_addr - get the physical address for this virtual address - * - * Find the physical address corresponding to the given virtual address, - * by doing a translation table walk on MMU based systems or using the - * MPU state on MPU based systems. - * - * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, - * prot and page_size may not be filled in, and the populated fsr value provides - * information on why the translation aborted, in the format of a - * DFSR/IFSR fault register, with the following caveats: - * * we honour the short vs long DFSR format differences. - * * the WnR bit is never set (the caller must do this). - * * for PSMAv5 based systems we don't bother to return a full FSR format - * value. - * - * @env: CPUARMState - * @address: virtual address to get physical address for - * @access_type: 0 for read, 1 for write, 2 for execute - * @mmu_idx: MMU index indicating required translation regime - * @phys_ptr: set to the physical address corresponding to the virtual address - * @attrs: set to the memory transaction attributes to use - * @prot: set to the permissions for the page containing phys_ptr - * @page_size: set to the size of the page containing phys_ptr - * @fi: set to fault info if the translation fails - * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes +/* + * Note that signed overflow is undefined in C. The following routines are + * careful to use unsigned types where modulo arithmetic is required. + * Failure to do so _will_ break on newer gcc. */ -bool get_phys_addr(CPUARMState *env, target_ulong address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, - target_ulong *page_size, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) -{ - ARMMMUIdx s1_mmu_idx = stage_1_mmu_idx(mmu_idx); - - if (mmu_idx != s1_mmu_idx) { - /* Call ourselves recursively to do the stage 1 and then stage 2 - * translations if mmu_idx is a two-stage regime. - */ - if (arm_feature(env, ARM_FEATURE_EL2)) { - hwaddr ipa; - int s2_prot; - int ret; - ARMCacheAttrs cacheattrs2 = {}; - ARMMMUIdx s2_mmu_idx; - bool is_el0; - - ret = get_phys_addr(env, address, access_type, s1_mmu_idx, &ipa, - attrs, prot, page_size, fi, cacheattrs); - - /* If S1 fails or S2 is disabled, return early. */ - if (ret || regime_translation_disabled(env, ARMMMUIdx_Stage2)) { - *phys_ptr = ipa; - return ret; - } - - s2_mmu_idx = attrs->secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; - is_el0 = mmu_idx == ARMMMUIdx_E10_0 || mmu_idx == ARMMMUIdx_SE10_0; - - /* S1 is done. Now do S2 translation. */ - ret = get_phys_addr_lpae(env, ipa, access_type, s2_mmu_idx, is_el0, - phys_ptr, attrs, &s2_prot, - page_size, fi, &cacheattrs2); - fi->s2addr = ipa; - /* Combine the S1 and S2 perms. */ - *prot &= s2_prot; - - /* If S2 fails, return early. */ - if (ret) { - return ret; - } - - /* Combine the S1 and S2 cache attributes. */ - if (arm_hcr_el2_eff(env) & HCR_DC) { - /* - * HCR.DC forces the first stage attributes to - * Normal Non-Shareable, - * Inner Write-Back Read-Allocate Write-Allocate, - * Outer Write-Back Read-Allocate Write-Allocate. - * Do not overwrite Tagged within attrs. - */ - if (cacheattrs->attrs != 0xf0) { - cacheattrs->attrs = 0xff; - } - cacheattrs->shareability = 0; - } - *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); - - /* Check if IPA translates to secure or non-secure PA space. */ - if (arm_is_secure_below_el3(env)) { - if (attrs->secure) { - attrs->secure = - !(env->cp15.vstcr_el2.raw_tcr & (VSTCR_SA | VSTCR_SW)); - } else { - attrs->secure = - !((env->cp15.vtcr_el2.raw_tcr & (VTCR_NSA | VTCR_NSW)) - || (env->cp15.vstcr_el2.raw_tcr & VSTCR_SA)); - } - } - return 0; - } else { - /* - * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. - */ - mmu_idx = stage_1_mmu_idx(mmu_idx); - } - } - - /* The page table entries may downgrade secure to non-secure, but - * cannot upgrade an non-secure translation regime's attributes - * to secure. - */ - attrs->secure = regime_is_secure(env, mmu_idx); - attrs->user = regime_is_user(env, mmu_idx); - - /* Fast Context Switch Extension. This doesn't exist at all in v8. - * In v7 and earlier it affects all stage 1 translations. - */ - if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2 - && !arm_feature(env, ARM_FEATURE_V8)) { - if (regime_el(env, mmu_idx) == 3) { - address += env->cp15.fcseidr_s; - } else { - address += env->cp15.fcseidr_ns; - } - } - - if (arm_feature(env, ARM_FEATURE_PMSA)) { - bool ret; - *page_size = TARGET_PAGE_SIZE; - - if (arm_feature(env, ARM_FEATURE_V8)) { - /* PMSAv8 */ - ret = get_phys_addr_pmsav8(env, address, access_type, mmu_idx, - phys_ptr, attrs, prot, page_size, fi); - } else if (arm_feature(env, ARM_FEATURE_V7)) { - /* PMSAv7 */ - ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx, - phys_ptr, prot, page_size, fi); - } else { - /* Pre-v7 MPU */ - ret = get_phys_addr_pmsav5(env, address, access_type, mmu_idx, - phys_ptr, prot, fi); - } - qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32 - " mmu_idx %u -> %s (prot %c%c%c)\n", - access_type == MMU_DATA_LOAD ? "reading" : - (access_type == MMU_DATA_STORE ? "writing" : "execute"), - (uint32_t)address, mmu_idx, - ret ? "Miss" : "Hit", - *prot & PAGE_READ ? 'r' : '-', - *prot & PAGE_WRITE ? 'w' : '-', - *prot & PAGE_EXEC ? 'x' : '-'); - - return ret; - } - - /* Definitely a real MMU, not an MPU */ - - if (regime_translation_disabled(env, mmu_idx)) { - uint64_t hcr; - uint8_t memattr; - - /* - * MMU disabled. S1 addresses within aa64 translation regimes are - * still checked for bounds -- see AArch64.TranslateAddressS1Off. - */ - if (mmu_idx != ARMMMUIdx_Stage2 && mmu_idx != ARMMMUIdx_Stage2_S) { - int r_el = regime_el(env, mmu_idx); - if (arm_el_is_aa64(env, r_el)) { - int pamax = arm_pamax(env_archcpu(env)); - uint64_t tcr = env->cp15.tcr_el[r_el].raw_tcr; - int addrtop, tbi; - - tbi = aa64_va_parameter_tbi(tcr, mmu_idx); - if (access_type == MMU_INST_FETCH) { - tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); - } - tbi = (tbi >> extract64(address, 55, 1)) & 1; - addrtop = (tbi ? 55 : 63); - - if (extract64(address, pamax, addrtop - pamax + 1) != 0) { - fi->type = ARMFault_AddressSize; - fi->level = 0; - fi->stage2 = false; - return 1; - } - - /* - * When TBI is disabled, we've just validated that all of the - * bits above PAMax are zero, so logically we only need to - * clear the top byte for TBI. But it's clearer to follow - * the pseudocode set of addrdesc.paddress. - */ - address = extract64(address, 0, 52); - } - } - *phys_ptr = address; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - *page_size = TARGET_PAGE_SIZE; - - /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ - hcr = arm_hcr_el2_eff(env); - cacheattrs->shareability = 0; - if (hcr & HCR_DC) { - if (hcr & HCR_DCT) { - memattr = 0xf0; /* Tagged, Normal, WB, RWA */ - } else { - memattr = 0xff; /* Normal, WB, RWA */ - } - } else if (access_type == MMU_INST_FETCH) { - if (regime_sctlr(env, mmu_idx) & SCTLR_I) { - memattr = 0xee; /* Normal, WT, RA, NT */ - } else { - memattr = 0x44; /* Normal, NC, No */ - } - cacheattrs->shareability = 2; /* outer sharable */ - } else { - memattr = 0x00; /* Device, nGnRnE */ - } - cacheattrs->attrs = memattr; - return 0; - } - - if (regime_using_lpae_format(env, mmu_idx)) { - return get_phys_addr_lpae(env, address, access_type, mmu_idx, false, - phys_ptr, attrs, prot, page_size, - fi, cacheattrs); - } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) { - return get_phys_addr_v6(env, address, access_type, mmu_idx, - phys_ptr, attrs, prot, page_size, fi); - } else { - return get_phys_addr_v5(env, address, access_type, mmu_idx, - phys_ptr, prot, page_size, fi); - } -} - -hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, - MemTxAttrs *attrs) -{ - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - hwaddr phys_addr; - target_ulong page_size; - int prot; - bool ret; - ARMMMUFaultInfo fi = {}; - ARMMMUIdx mmu_idx = arm_mmu_idx(env); - ARMCacheAttrs cacheattrs = {}; - - *attrs = (MemTxAttrs) {}; - - ret = get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &phys_addr, - attrs, &prot, &page_size, &fi, &cacheattrs); - - if (ret) { - return -1; - } - return phys_addr; -} - -#endif - -/* Note that signed overflow is undefined in C. The following routines are - careful to use unsigned types where modulo arithmetic is required. - Failure to do so _will_ break on newer gcc. */ /* Signed saturating arithmetic. */ @@ -12879,10 +12050,11 @@ static inline uint16_t add16_sat(uint16_t a, uint16_t b) res = a + b; if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) { - if (a & 0x8000) + if (a & 0x8000) { res = 0x8000; - else + } else { res = 0x7fff; + } } return res; } @@ -12894,10 +12066,11 @@ static inline uint8_t add8_sat(uint8_t a, uint8_t b) res = a + b; if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) { - if (a & 0x80) + if (a & 0x80) { res = 0x80; - else + } else { res = 0x7f; + } } return res; } @@ -12909,10 +12082,11 @@ static inline uint16_t sub16_sat(uint16_t a, uint16_t b) res = a - b; if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) { - if (a & 0x8000) + if (a & 0x8000) { res = 0x8000; - else + } else { res = 0x7fff; + } } return res; } @@ -12924,10 +12098,11 @@ static inline uint8_t sub8_sat(uint8_t a, uint8_t b) res = a - b; if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) { - if (a & 0x80) + if (a & 0x80) { res = 0x80; - else + } else { res = 0x7f; + } } return res; } @@ -12945,34 +12120,38 @@ static inline uint16_t add16_usat(uint16_t a, uint16_t b) { uint16_t res; res = a + b; - if (res < a) + if (res < a) { res = 0xffff; + } return res; } static inline uint16_t sub16_usat(uint16_t a, uint16_t b) { - if (a > b) + if (a > b) { return a - b; - else + } else { return 0; + } } static inline uint8_t add8_usat(uint8_t a, uint8_t b) { uint8_t res; res = a + b; - if (res < a) + if (res < a) { res = 0xff; + } return res; } static inline uint8_t sub8_usat(uint8_t a, uint8_t b) { - if (a > b) + if (a > b) { return a - b; - else + } else { return 0; + } } #define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16); @@ -12990,7 +12169,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 16); \ if (sum >= 0) \ ge |= 3 << (n * 2); \ - } while(0) + } while (0) #define SARITH8(a, b, n, op) do { \ int32_t sum; \ @@ -12998,7 +12177,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 8); \ if (sum >= 0) \ ge |= 1 << n; \ - } while(0) + } while (0) #define ADD16(a, b, n) SARITH16(a, b, n, +) @@ -13017,7 +12196,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 16); \ if ((sum >> 16) == 1) \ ge |= 3 << (n * 2); \ - } while(0) + } while (0) #define ADD8(a, b, n) do { \ uint32_t sum; \ @@ -13025,7 +12204,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 8); \ if ((sum >> 8) == 1) \ ge |= 1 << n; \ - } while(0) + } while (0) #define SUB16(a, b, n) do { \ uint32_t sum; \ @@ -13033,7 +12212,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 16); \ if ((sum >> 16) == 0) \ ge |= 3 << (n * 2); \ - } while(0) + } while (0) #define SUB8(a, b, n) do { \ uint32_t sum; \ @@ -13041,7 +12220,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) RESULT(sum, n, 8); \ if ((sum >> 8) == 0) \ ge |= 1 << n; \ - } while(0) + } while (0) #define PFX u #define ARITH_GE @@ -13076,10 +12255,11 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) static inline uint8_t do_usad(uint8_t a, uint8_t b) { - if (a > b) + if (a > b) { return a - b; - else + } else { return b - a; + } } /* Unsigned sum of absolute byte differences. */ @@ -13099,18 +12279,23 @@ uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b) uint32_t mask; mask = 0; - if (flags & 1) + if (flags & 1) { mask |= 0xff; - if (flags & 2) + } + if (flags & 2) { mask |= 0xff00; - if (flags & 4) + } + if (flags & 4) { mask |= 0xff0000; - if (flags & 8) + } + if (flags & 8) { mask |= 0xff000000; + } return (a & mask) | (b & ~mask); } -/* CRC helpers. +/* + * CRC helpers. * The upper bytes of val (above the number specified by 'bytes') must have * been zeroed out by the caller. */ @@ -13134,13 +12319,17 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -/* Return the exception level to which FP-disabled exceptions should +/* + * Return the exception level to which FP-disabled exceptions should * be taken, or 0 if FP is enabled. */ int fp_exception_el(CPUARMState *env, int cur_el) { #ifndef CONFIG_USER_ONLY - /* CPACR and the CPTR registers don't exist before v6, so FP is + uint64_t hcr_el2; + + /* + * CPACR and the CPTR registers don't exist before v6, so FP is * always accessible */ if (!arm_feature(env, ARM_FEATURE_V6)) { @@ -13163,37 +12352,35 @@ int fp_exception_el(CPUARMState *env, int cur_el) return 0; } - /* The CPACR controls traps to EL1, or PL1 if we're 32 bit: + hcr_el2 = arm_hcr_el2_eff(env); + + /* + * The CPACR controls traps to EL1, or PL1 if we're 32 bit: * 0, 2 : trap EL0 and EL1/PL1 accesses * 1 : trap only EL0 accesses * 3 : trap no accesses * This register is ignored if E2H+TGE are both set. */ - if ((arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { - int fpen = extract32(env->cp15.cpacr_el1, 20, 2); + if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { + int fpen = FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, FPEN); switch (fpen) { + case 1: + if (cur_el != 0) { + break; + } + /* fall through */ case 0: case 2: - if (cur_el == 0 || cur_el == 1) { - /* Trap to PL1, which might be EL1 or EL3 */ - if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) { - return 3; - } - return 1; - } - if (cur_el == 3 && !is_a64(env)) { - /* Secure PL1 running at EL3 */ + /* Trap from Secure PL0 or PL1 to Secure PL1. */ + if (!arm_el_is_aa64(env, 3) + && (cur_el == 3 || arm_is_secure_below_el3(env))) { return 3; } - break; - case 1: - if (cur_el == 0) { + if (cur_el <= 1) { return 1; } break; - case 3: - break; } } @@ -13210,19 +12397,31 @@ int fp_exception_el(CPUARMState *env, int cur_el) } } - /* For the CPTR registers we don't need to guard with an ARM_FEATURE - * check because zero bits in the registers mean "don't trap". + /* + * CPTR_EL2 is present in v7VE or v8, and changes format + * with HCR_EL2.E2H (regardless of TGE). */ - - /* CPTR_EL2 : present in v7VE or v8 */ - if (cur_el <= 2 && extract32(env->cp15.cptr_el[2], 10, 1) - && arm_is_el2_enabled(env)) { - /* Trap FP ops at EL2, NS-EL1 or NS-EL0 to EL2 */ - return 2; + if (cur_el <= 2) { + if (hcr_el2 & HCR_E2H) { + switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, FPEN)) { + case 1: + if (cur_el != 0 || !(hcr_el2 & HCR_TGE)) { + break; + } + /* fall through */ + case 0: + case 2: + return 2; + } + } else if (arm_is_el2_enabled(env)) { + if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TFP)) { + return 2; + } + } } /* CPTR_EL3 : present in v8 */ - if (extract32(env->cp15.cptr_el[3], 10, 1)) { + if (FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TFP)) { /* Trap all FP ops to EL3 */ return 3; } @@ -13240,22 +12439,15 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) switch (mmu_idx) { case ARMMMUIdx_E10_0: case ARMMMUIdx_E20_0: - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_SE20_0: return 0; case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: return 1; case ARMMMUIdx_E2: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE2: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: return 2; - case ARMMMUIdx_SE3: + case ARMMMUIdx_E3: return 3; default: g_assert_not_reached(); @@ -13289,7 +12481,7 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) } break; case 1: - if (env->pstate & PSTATE_PAN) { + if (arm_pan_enabled(env)) { idx = ARMMMUIdx_E10_1_PAN; } else { idx = ARMMMUIdx_E10_1; @@ -13298,7 +12490,7 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) case 2: /* Note that TGE does not apply at EL2. */ if (arm_hcr_el2_eff(env) & HCR_E2H) { - if (env->pstate & PSTATE_PAN) { + if (arm_pan_enabled(env)) { idx = ARMMMUIdx_E20_2_PAN; } else { idx = ARMMMUIdx_E20_2; @@ -13308,15 +12500,11 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) } break; case 3: - return ARMMMUIdx_SE3; + return ARMMMUIdx_E3; default: g_assert_not_reached(); } - if (arm_is_secure_below_el3(env)) { - idx &= ~ARM_MMU_IDX_A_NS; - } - return idx; } @@ -13325,318 +12513,6 @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env) return arm_mmu_idx_el(env, arm_current_el(env)); } -#ifndef CONFIG_USER_ONLY -ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) -{ - return stage_1_mmu_idx(arm_mmu_idx(env)); -} -#endif - -static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx, - CPUARMTBFlags flags) -{ - DP_TBFLAG_ANY(flags, FPEXC_EL, fp_el); - DP_TBFLAG_ANY(flags, MMUIDX, arm_to_core_mmu_idx(mmu_idx)); - - if (arm_singlestep_active(env)) { - DP_TBFLAG_ANY(flags, SS_ACTIVE, 1); - } - return flags; -} - -static CPUARMTBFlags rebuild_hflags_common_32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx, - CPUARMTBFlags flags) -{ - bool sctlr_b = arm_sctlr_b(env); - - if (sctlr_b) { - DP_TBFLAG_A32(flags, SCTLR__B, 1); - } - if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) { - DP_TBFLAG_ANY(flags, BE_DATA, 1); - } - DP_TBFLAG_A32(flags, NS, !access_secure_reg(env)); - - return rebuild_hflags_common(env, fp_el, mmu_idx, flags); -} - -static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx) -{ - CPUARMTBFlags flags = {}; - uint32_t ccr = env->v7m.ccr[env->v7m.secure]; - - /* Without HaveMainExt, CCR.UNALIGN_TRP is RES1. */ - if (ccr & R_V7M_CCR_UNALIGN_TRP_MASK) { - DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); - } - - if (arm_v7m_is_handler_mode(env)) { - DP_TBFLAG_M32(flags, HANDLER, 1); - } - - /* - * v8M always applies stack limit checks unless CCR.STKOFHFNMIGN - * is suppressing them because the requested execution priority - * is less than 0. - */ - if (arm_feature(env, ARM_FEATURE_V8) && - !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) && - (ccr & R_V7M_CCR_STKOFHFNMIGN_MASK))) { - DP_TBFLAG_M32(flags, STACKCHECK, 1); - } - - return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); -} - -static CPUARMTBFlags rebuild_hflags_aprofile(CPUARMState *env) -{ - CPUARMTBFlags flags = {}; - - DP_TBFLAG_ANY(flags, DEBUG_TARGET_EL, arm_debug_target_el(env)); - return flags; -} - -static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, - ARMMMUIdx mmu_idx) -{ - CPUARMTBFlags flags = rebuild_hflags_aprofile(env); - int el = arm_current_el(env); - - if (arm_sctlr(env, el) & SCTLR_A) { - DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); - } - - if (arm_el_is_aa64(env, 1)) { - DP_TBFLAG_A32(flags, VFPEN, 1); - } - - if (el < 2 && env->cp15.hstr_el2 && - (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { - DP_TBFLAG_A32(flags, HSTR_ACTIVE, 1); - } - - if (env->uncached_cpsr & CPSR_IL) { - DP_TBFLAG_ANY(flags, PSTATE__IL, 1); - } - - return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); -} - -static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, - ARMMMUIdx mmu_idx) -{ - CPUARMTBFlags flags = rebuild_hflags_aprofile(env); - ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx); - uint64_t tcr = regime_tcr(env, mmu_idx)->raw_tcr; - uint64_t sctlr; - int tbii, tbid; - - DP_TBFLAG_ANY(flags, AARCH64_STATE, 1); - - /* Get control bits for tagged addresses. */ - tbid = aa64_va_parameter_tbi(tcr, mmu_idx); - tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx); - - DP_TBFLAG_A64(flags, TBII, tbii); - DP_TBFLAG_A64(flags, TBID, tbid); - - if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { - int sve_el = sve_exception_el(env, el); - uint32_t zcr_len; - - /* - * If SVE is disabled, but FP is enabled, - * then the effective len is 0. - */ - if (sve_el != 0 && fp_el == 0) { - zcr_len = 0; - } else { - zcr_len = sve_zcr_len_for_el(env, el); - } - DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el); - DP_TBFLAG_A64(flags, ZCR_LEN, zcr_len); - } - - sctlr = regime_sctlr(env, stage1); - - if (sctlr & SCTLR_A) { - DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); - } - - if (arm_cpu_data_is_big_endian_a64(el, sctlr)) { - DP_TBFLAG_ANY(flags, BE_DATA, 1); - } - - if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) { - /* - * In order to save space in flags, we record only whether - * pauth is "inactive", meaning all insns are implemented as - * a nop, or "active" when some action must be performed. - * The decision of which action to take is left to a helper. - */ - if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) { - DP_TBFLAG_A64(flags, PAUTH_ACTIVE, 1); - } - } - - if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { - /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */ - if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) { - DP_TBFLAG_A64(flags, BT, 1); - } - } - - /* Compute the condition for using AccType_UNPRIV for LDTR et al. */ - if (!(env->pstate & PSTATE_UAO)) { - switch (mmu_idx) { - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - /* TODO: ARMv8.3-NV */ - DP_TBFLAG_A64(flags, UNPRIV, 1); - break; - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: - /* - * Note that EL20_2 is gated by HCR_EL2.E2H == 1, but EL20_0 is - * gated by HCR_EL2.<E2H,TGE> == '11', and so is LDTR. - */ - if (env->cp15.hcr_el2 & HCR_TGE) { - DP_TBFLAG_A64(flags, UNPRIV, 1); - } - break; - default: - break; - } - } - - if (env->pstate & PSTATE_IL) { - DP_TBFLAG_ANY(flags, PSTATE__IL, 1); - } - - if (cpu_isar_feature(aa64_mte, env_archcpu(env))) { - /* - * Set MTE_ACTIVE if any access may be Checked, and leave clear - * if all accesses must be Unchecked: - * 1) If no TBI, then there are no tags in the address to check, - * 2) If Tag Check Override, then all accesses are Unchecked, - * 3) If Tag Check Fail == 0, then Checked access have no effect, - * 4) If no Allocation Tag Access, then all accesses are Unchecked. - */ - if (allocation_tag_access_enabled(env, el, sctlr)) { - DP_TBFLAG_A64(flags, ATA, 1); - if (tbid - && !(env->pstate & PSTATE_TCO) - && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { - DP_TBFLAG_A64(flags, MTE_ACTIVE, 1); - } - } - /* And again for unprivileged accesses, if required. */ - if (EX_TBFLAG_A64(flags, UNPRIV) - && tbid - && !(env->pstate & PSTATE_TCO) - && (sctlr & SCTLR_TCF0) - && allocation_tag_access_enabled(env, 0, sctlr)) { - DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1); - } - /* Cache TCMA as well as TBI. */ - DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); - } - - return rebuild_hflags_common(env, fp_el, mmu_idx, flags); -} - -static CPUARMTBFlags rebuild_hflags_internal(CPUARMState *env) -{ - int el = arm_current_el(env); - int fp_el = fp_exception_el(env, el); - ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); - - if (is_a64(env)) { - return rebuild_hflags_a64(env, el, fp_el, mmu_idx); - } else if (arm_feature(env, ARM_FEATURE_M)) { - return rebuild_hflags_m32(env, fp_el, mmu_idx); - } else { - return rebuild_hflags_a32(env, fp_el, mmu_idx); - } -} - -void arm_rebuild_hflags(CPUARMState *env) -{ - env->hflags = rebuild_hflags_internal(env); -} - -/* - * If we have triggered a EL state change we can't rely on the - * translator having passed it to us, we need to recompute. - */ -void HELPER(rebuild_hflags_m32_newel)(CPUARMState *env) -{ - int el = arm_current_el(env); - int fp_el = fp_exception_el(env, el); - ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); - - env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx); -} - -void HELPER(rebuild_hflags_m32)(CPUARMState *env, int el) -{ - int fp_el = fp_exception_el(env, el); - ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); - - env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx); -} - -/* - * If we have triggered a EL state change we can't rely on the - * translator having passed it to us, we need to recompute. - */ -void HELPER(rebuild_hflags_a32_newel)(CPUARMState *env) -{ - int el = arm_current_el(env); - int fp_el = fp_exception_el(env, el); - ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); - env->hflags = rebuild_hflags_a32(env, fp_el, mmu_idx); -} - -void HELPER(rebuild_hflags_a32)(CPUARMState *env, int el) -{ - int fp_el = fp_exception_el(env, el); - ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); - - env->hflags = rebuild_hflags_a32(env, fp_el, mmu_idx); -} - -void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el) -{ - int fp_el = fp_exception_el(env, el); - ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); - - env->hflags = rebuild_hflags_a64(env, el, fp_el, mmu_idx); -} - -static inline void assert_hflags_rebuild_correctly(CPUARMState *env) -{ -#ifdef CONFIG_DEBUG_TCG - CPUARMTBFlags c = env->hflags; - CPUARMTBFlags r = rebuild_hflags_internal(env); - - if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) { - fprintf(stderr, "TCG hflags mismatch " - "(current:(0x%08x,0x" TARGET_FMT_lx ")" - " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n", - c.flags, c.flags2, r.flags, r.flags2); - abort(); - } -#endif -} - static bool mve_no_pred(CPUARMState *env) { /* @@ -13666,8 +12542,8 @@ static bool mve_no_pred(CPUARMState *env) return true; } -void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *pflags) +void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags) { CPUARMTBFlags flags; @@ -13787,6 +12663,21 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) } } +static uint32_t sve_vqm1_for_el_sm_ena(CPUARMState *env, int el, bool sm) +{ + int exc_el; + + if (sm) { + exc_el = sme_exception_el(env, el); + } else { + exc_el = sve_exception_el(env, el); + } + if (exc_el) { + return 0; /* disabled */ + } + return sve_vqm1_for_el_sm(env, el, sm); +} + /* * Notice a change in SVE vector size when changing EL. */ @@ -13795,7 +12686,7 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, { ARMCPU *cpu = env_archcpu(env); int old_len, new_len; - bool old_a64, new_a64; + bool old_a64, new_a64, sm; /* Nothing to do if no SVE. */ if (!cpu_isar_feature(aa64_sve, cpu)) { @@ -13807,6 +12698,20 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, return; } + old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64; + new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64; + + /* + * Both AArch64.TakeException and AArch64.ExceptionReturn + * invoke ResetSVEState when taking an exception from, or + * returning to, AArch32 state when PSTATE.SM is enabled. + */ + sm = FIELD_EX64(env->svcr, SVCR, SM); + if (old_a64 != new_a64 && sm) { + arm_reset_sve_state(env); + return; + } + /* * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped * at ELx, or not available because the EL is in AArch32 state, then @@ -13819,12 +12724,13 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, * we already have the correct register contents when encountering the * vq0->vq0 transition between EL0->EL1. */ - old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64; - old_len = (old_a64 && !sve_exception_el(env, old_el) - ? sve_zcr_len_for_el(env, old_el) : 0); - new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64; - new_len = (new_a64 && !sve_exception_el(env, new_el) - ? sve_zcr_len_for_el(env, new_el) : 0); + old_len = new_len = 0; + if (old_a64) { + old_len = sve_vqm1_for_el_sm_ena(env, old_el, sm); + } + if (new_a64) { + new_len = sve_vqm1_for_el_sm_ena(env, new_el, sm); + } /* When changing vector length, clear inaccessible state. */ if (new_len < old_len) { @@ -13832,3 +12738,63 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el, } } #endif + +#ifndef CONFIG_USER_ONLY +ARMSecuritySpace arm_security_space(CPUARMState *env) +{ + if (arm_feature(env, ARM_FEATURE_M)) { + return arm_secure_to_space(env->v7m.secure); + } + + /* + * If EL3 is not supported then the secure state is implementation + * defined, in which case QEMU defaults to non-secure. + */ + if (!arm_feature(env, ARM_FEATURE_EL3)) { + return ARMSS_NonSecure; + } + + /* Check for AArch64 EL3 or AArch32 Mon. */ + if (is_a64(env)) { + if (extract32(env->pstate, 2, 2) == 3) { + if (cpu_isar_feature(aa64_rme, env_archcpu(env))) { + return ARMSS_Root; + } else { + return ARMSS_Secure; + } + } + } else { + if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) { + return ARMSS_Secure; + } + } + + return arm_security_space_below_el3(env); +} + +ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env) +{ + assert(!arm_feature(env, ARM_FEATURE_M)); + + /* + * If EL3 is not supported then the secure state is implementation + * defined, in which case QEMU defaults to non-secure. + */ + if (!arm_feature(env, ARM_FEATURE_EL3)) { + return ARMSS_NonSecure; + } + + /* + * Note NSE cannot be set without RME, and NSE & !NS is Reserved. + * Ignoring NSE when !NS retains consistency without having to + * modify other predicates. + */ + if (!(env->cp15.scr_el3 & SCR_NS)) { + return ARMSS_Secure; + } else if (env->cp15.scr_el3 & SCR_NSE) { + return ARMSS_Realm; + } else { + return ARMSS_NonSecure; + } +} +#endif /* !CONFIG_USER_ONLY */ diff --git a/target/arm/helper.h b/target/arm/helper.h index 448a86edfd..2b02733305 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -44,15 +44,19 @@ DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) -DEF_HELPER_2(exception_internal, void, env, i32) -DEF_HELPER_4(exception_with_syndrome, void, env, i32, i32, i32) -DEF_HELPER_2(exception_bkpt_insn, void, env, i32) +DEF_HELPER_2(exception_internal, noreturn, env, i32) +DEF_HELPER_3(exception_with_syndrome, noreturn, env, i32, i32) +DEF_HELPER_4(exception_with_syndrome_el, noreturn, env, i32, i32, i32) +DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32) +DEF_HELPER_2(exception_swstep, noreturn, env, i32) +DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl) DEF_HELPER_1(setend, void, env) DEF_HELPER_2(wfi, void, env, i32) DEF_HELPER_1(wfe, void, env) DEF_HELPER_1(yield, void, env) DEF_HELPER_1(pre_hvc, void, env) DEF_HELPER_2(pre_smc, void, env, i32) +DEF_HELPER_1(vesb, void, env) DEF_HELPER_3(cpsr_write, void, env, i32, i32) DEF_HELPER_2(cpsr_write_eret, void, env, i32) @@ -75,11 +79,14 @@ DEF_HELPER_2(v8m_stackcheck, void, env, i32) DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32) -DEF_HELPER_4(access_check_cp_reg, void, env, ptr, i32, i32) -DEF_HELPER_3(set_cp_reg, void, env, ptr, i32) -DEF_HELPER_2(get_cp_reg, i32, env, ptr) -DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64) -DEF_HELPER_2(get_cp_reg64, i64, env, ptr) +DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32) +DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32) +DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_3(set_cp_reg, void, env, cptr, i32) +DEF_HELPER_2(get_cp_reg, i32, env, cptr) +DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64) +DEF_HELPER_2(get_cp_reg64, i64, env, cptr) DEF_HELPER_2(get_r13_banked, i32, env, i32) DEF_HELPER_3(set_r13_banked, void, env, i32, i32) @@ -547,7 +554,9 @@ DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_aesd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_aesimc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -1015,9 +1024,28 @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 -#include "helper-a64.h" -#include "helper-sve.h" +#include "tcg/helper-a64.h" +#include "tcg/helper-sve.h" +#include "tcg/helper-sme.h" #endif -#include "helper-mve.h" +#include "tcg/helper-mve.h" diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index bff3e0cde7..65a5601804 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -10,7 +10,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qemu/error-report.h" #include "sysemu/runstate.h" @@ -18,6 +17,7 @@ #include "sysemu/hvf_int.h" #include "sysemu/hw_accel.h" #include "hvf_arm.h" +#include "cpregs.h" #include <mach/mach_time.h> @@ -28,16 +28,158 @@ #include "arm-powerctl.h" #include "target/arm/cpu.h" #include "target/arm/internals.h" +#include "target/arm/multiprocessing.h" +#include "target/arm/gtimer.h" #include "trace/trace-target_arm_hvf.h" #include "migration/vmstate.h" +#include "exec/gdbstub.h" + +#define MDSCR_EL1_SS_SHIFT 0 +#define MDSCR_EL1_MDE_SHIFT 15 + +static const uint16_t dbgbcr_regs[] = { + HV_SYS_REG_DBGBCR0_EL1, + HV_SYS_REG_DBGBCR1_EL1, + HV_SYS_REG_DBGBCR2_EL1, + HV_SYS_REG_DBGBCR3_EL1, + HV_SYS_REG_DBGBCR4_EL1, + HV_SYS_REG_DBGBCR5_EL1, + HV_SYS_REG_DBGBCR6_EL1, + HV_SYS_REG_DBGBCR7_EL1, + HV_SYS_REG_DBGBCR8_EL1, + HV_SYS_REG_DBGBCR9_EL1, + HV_SYS_REG_DBGBCR10_EL1, + HV_SYS_REG_DBGBCR11_EL1, + HV_SYS_REG_DBGBCR12_EL1, + HV_SYS_REG_DBGBCR13_EL1, + HV_SYS_REG_DBGBCR14_EL1, + HV_SYS_REG_DBGBCR15_EL1, +}; + +static const uint16_t dbgbvr_regs[] = { + HV_SYS_REG_DBGBVR0_EL1, + HV_SYS_REG_DBGBVR1_EL1, + HV_SYS_REG_DBGBVR2_EL1, + HV_SYS_REG_DBGBVR3_EL1, + HV_SYS_REG_DBGBVR4_EL1, + HV_SYS_REG_DBGBVR5_EL1, + HV_SYS_REG_DBGBVR6_EL1, + HV_SYS_REG_DBGBVR7_EL1, + HV_SYS_REG_DBGBVR8_EL1, + HV_SYS_REG_DBGBVR9_EL1, + HV_SYS_REG_DBGBVR10_EL1, + HV_SYS_REG_DBGBVR11_EL1, + HV_SYS_REG_DBGBVR12_EL1, + HV_SYS_REG_DBGBVR13_EL1, + HV_SYS_REG_DBGBVR14_EL1, + HV_SYS_REG_DBGBVR15_EL1, +}; + +static const uint16_t dbgwcr_regs[] = { + HV_SYS_REG_DBGWCR0_EL1, + HV_SYS_REG_DBGWCR1_EL1, + HV_SYS_REG_DBGWCR2_EL1, + HV_SYS_REG_DBGWCR3_EL1, + HV_SYS_REG_DBGWCR4_EL1, + HV_SYS_REG_DBGWCR5_EL1, + HV_SYS_REG_DBGWCR6_EL1, + HV_SYS_REG_DBGWCR7_EL1, + HV_SYS_REG_DBGWCR8_EL1, + HV_SYS_REG_DBGWCR9_EL1, + HV_SYS_REG_DBGWCR10_EL1, + HV_SYS_REG_DBGWCR11_EL1, + HV_SYS_REG_DBGWCR12_EL1, + HV_SYS_REG_DBGWCR13_EL1, + HV_SYS_REG_DBGWCR14_EL1, + HV_SYS_REG_DBGWCR15_EL1, +}; + +static const uint16_t dbgwvr_regs[] = { + HV_SYS_REG_DBGWVR0_EL1, + HV_SYS_REG_DBGWVR1_EL1, + HV_SYS_REG_DBGWVR2_EL1, + HV_SYS_REG_DBGWVR3_EL1, + HV_SYS_REG_DBGWVR4_EL1, + HV_SYS_REG_DBGWVR5_EL1, + HV_SYS_REG_DBGWVR6_EL1, + HV_SYS_REG_DBGWVR7_EL1, + HV_SYS_REG_DBGWVR8_EL1, + HV_SYS_REG_DBGWVR9_EL1, + HV_SYS_REG_DBGWVR10_EL1, + HV_SYS_REG_DBGWVR11_EL1, + HV_SYS_REG_DBGWVR12_EL1, + HV_SYS_REG_DBGWVR13_EL1, + HV_SYS_REG_DBGWVR14_EL1, + HV_SYS_REG_DBGWVR15_EL1, +}; + +static inline int hvf_arm_num_brps(hv_vcpu_config_t config) +{ + uint64_t val; + hv_return_t ret; + ret = hv_vcpu_config_get_feature_reg(config, HV_FEATURE_REG_ID_AA64DFR0_EL1, + &val); + assert_hvf_ok(ret); + return FIELD_EX64(val, ID_AA64DFR0, BRPS) + 1; +} + +static inline int hvf_arm_num_wrps(hv_vcpu_config_t config) +{ + uint64_t val; + hv_return_t ret; + ret = hv_vcpu_config_get_feature_reg(config, HV_FEATURE_REG_ID_AA64DFR0_EL1, + &val); + assert_hvf_ok(ret); + return FIELD_EX64(val, ID_AA64DFR0, WRPS) + 1; +} + +void hvf_arm_init_debug(void) +{ + hv_vcpu_config_t config; + config = hv_vcpu_config_create(); + + max_hw_bps = hvf_arm_num_brps(config); + hw_breakpoints = + g_array_sized_new(true, true, sizeof(HWBreakpoint), max_hw_bps); + + max_hw_wps = hvf_arm_num_wrps(config); + hw_watchpoints = + g_array_sized_new(true, true, sizeof(HWWatchpoint), max_hw_wps); +} + #define HVF_SYSREG(crn, crm, op0, op1, op2) \ ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2) #define PL1_WRITE_MASK 0x4 +#define SYSREG_OP0_SHIFT 20 +#define SYSREG_OP0_MASK 0x3 +#define SYSREG_OP0(sysreg) ((sysreg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK) +#define SYSREG_OP1_SHIFT 14 +#define SYSREG_OP1_MASK 0x7 +#define SYSREG_OP1(sysreg) ((sysreg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK) +#define SYSREG_CRN_SHIFT 10 +#define SYSREG_CRN_MASK 0xf +#define SYSREG_CRN(sysreg) ((sysreg >> SYSREG_CRN_SHIFT) & SYSREG_CRN_MASK) +#define SYSREG_CRM_SHIFT 1 +#define SYSREG_CRM_MASK 0xf +#define SYSREG_CRM(sysreg) ((sysreg >> SYSREG_CRM_SHIFT) & SYSREG_CRM_MASK) +#define SYSREG_OP2_SHIFT 17 +#define SYSREG_OP2_MASK 0x7 +#define SYSREG_OP2(sysreg) ((sysreg >> SYSREG_OP2_SHIFT) & SYSREG_OP2_MASK) + #define SYSREG(op0, op1, crn, crm, op2) \ - ((op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (crm << 1)) -#define SYSREG_MASK SYSREG(0x3, 0x7, 0xf, 0xf, 0x7) + ((op0 << SYSREG_OP0_SHIFT) | \ + (op1 << SYSREG_OP1_SHIFT) | \ + (crn << SYSREG_CRN_SHIFT) | \ + (crm << SYSREG_CRM_SHIFT) | \ + (op2 << SYSREG_OP2_SHIFT)) +#define SYSREG_MASK \ + SYSREG(SYSREG_OP0_MASK, \ + SYSREG_OP1_MASK, \ + SYSREG_CRN_MASK, \ + SYSREG_CRM_MASK, \ + SYSREG_OP2_MASK) #define SYSREG_OSLAR_EL1 SYSREG(2, 0, 1, 0, 4) #define SYSREG_OSLSR_EL1 SYSREG(2, 0, 1, 1, 4) #define SYSREG_OSDLR_EL1 SYSREG(2, 0, 1, 3, 4) @@ -55,6 +197,99 @@ #define SYSREG_PMCCNTR_EL0 SYSREG(3, 3, 9, 13, 0) #define SYSREG_PMCCFILTR_EL0 SYSREG(3, 3, 14, 15, 7) +#define SYSREG_ICC_AP0R0_EL1 SYSREG(3, 0, 12, 8, 4) +#define SYSREG_ICC_AP0R1_EL1 SYSREG(3, 0, 12, 8, 5) +#define SYSREG_ICC_AP0R2_EL1 SYSREG(3, 0, 12, 8, 6) +#define SYSREG_ICC_AP0R3_EL1 SYSREG(3, 0, 12, 8, 7) +#define SYSREG_ICC_AP1R0_EL1 SYSREG(3, 0, 12, 9, 0) +#define SYSREG_ICC_AP1R1_EL1 SYSREG(3, 0, 12, 9, 1) +#define SYSREG_ICC_AP1R2_EL1 SYSREG(3, 0, 12, 9, 2) +#define SYSREG_ICC_AP1R3_EL1 SYSREG(3, 0, 12, 9, 3) +#define SYSREG_ICC_ASGI1R_EL1 SYSREG(3, 0, 12, 11, 6) +#define SYSREG_ICC_BPR0_EL1 SYSREG(3, 0, 12, 8, 3) +#define SYSREG_ICC_BPR1_EL1 SYSREG(3, 0, 12, 12, 3) +#define SYSREG_ICC_CTLR_EL1 SYSREG(3, 0, 12, 12, 4) +#define SYSREG_ICC_DIR_EL1 SYSREG(3, 0, 12, 11, 1) +#define SYSREG_ICC_EOIR0_EL1 SYSREG(3, 0, 12, 8, 1) +#define SYSREG_ICC_EOIR1_EL1 SYSREG(3, 0, 12, 12, 1) +#define SYSREG_ICC_HPPIR0_EL1 SYSREG(3, 0, 12, 8, 2) +#define SYSREG_ICC_HPPIR1_EL1 SYSREG(3, 0, 12, 12, 2) +#define SYSREG_ICC_IAR0_EL1 SYSREG(3, 0, 12, 8, 0) +#define SYSREG_ICC_IAR1_EL1 SYSREG(3, 0, 12, 12, 0) +#define SYSREG_ICC_IGRPEN0_EL1 SYSREG(3, 0, 12, 12, 6) +#define SYSREG_ICC_IGRPEN1_EL1 SYSREG(3, 0, 12, 12, 7) +#define SYSREG_ICC_PMR_EL1 SYSREG(3, 0, 4, 6, 0) +#define SYSREG_ICC_RPR_EL1 SYSREG(3, 0, 12, 11, 3) +#define SYSREG_ICC_SGI0R_EL1 SYSREG(3, 0, 12, 11, 7) +#define SYSREG_ICC_SGI1R_EL1 SYSREG(3, 0, 12, 11, 5) +#define SYSREG_ICC_SRE_EL1 SYSREG(3, 0, 12, 12, 5) + +#define SYSREG_MDSCR_EL1 SYSREG(2, 0, 0, 2, 2) +#define SYSREG_DBGBVR0_EL1 SYSREG(2, 0, 0, 0, 4) +#define SYSREG_DBGBCR0_EL1 SYSREG(2, 0, 0, 0, 5) +#define SYSREG_DBGWVR0_EL1 SYSREG(2, 0, 0, 0, 6) +#define SYSREG_DBGWCR0_EL1 SYSREG(2, 0, 0, 0, 7) +#define SYSREG_DBGBVR1_EL1 SYSREG(2, 0, 0, 1, 4) +#define SYSREG_DBGBCR1_EL1 SYSREG(2, 0, 0, 1, 5) +#define SYSREG_DBGWVR1_EL1 SYSREG(2, 0, 0, 1, 6) +#define SYSREG_DBGWCR1_EL1 SYSREG(2, 0, 0, 1, 7) +#define SYSREG_DBGBVR2_EL1 SYSREG(2, 0, 0, 2, 4) +#define SYSREG_DBGBCR2_EL1 SYSREG(2, 0, 0, 2, 5) +#define SYSREG_DBGWVR2_EL1 SYSREG(2, 0, 0, 2, 6) +#define SYSREG_DBGWCR2_EL1 SYSREG(2, 0, 0, 2, 7) +#define SYSREG_DBGBVR3_EL1 SYSREG(2, 0, 0, 3, 4) +#define SYSREG_DBGBCR3_EL1 SYSREG(2, 0, 0, 3, 5) +#define SYSREG_DBGWVR3_EL1 SYSREG(2, 0, 0, 3, 6) +#define SYSREG_DBGWCR3_EL1 SYSREG(2, 0, 0, 3, 7) +#define SYSREG_DBGBVR4_EL1 SYSREG(2, 0, 0, 4, 4) +#define SYSREG_DBGBCR4_EL1 SYSREG(2, 0, 0, 4, 5) +#define SYSREG_DBGWVR4_EL1 SYSREG(2, 0, 0, 4, 6) +#define SYSREG_DBGWCR4_EL1 SYSREG(2, 0, 0, 4, 7) +#define SYSREG_DBGBVR5_EL1 SYSREG(2, 0, 0, 5, 4) +#define SYSREG_DBGBCR5_EL1 SYSREG(2, 0, 0, 5, 5) +#define SYSREG_DBGWVR5_EL1 SYSREG(2, 0, 0, 5, 6) +#define SYSREG_DBGWCR5_EL1 SYSREG(2, 0, 0, 5, 7) +#define SYSREG_DBGBVR6_EL1 SYSREG(2, 0, 0, 6, 4) +#define SYSREG_DBGBCR6_EL1 SYSREG(2, 0, 0, 6, 5) +#define SYSREG_DBGWVR6_EL1 SYSREG(2, 0, 0, 6, 6) +#define SYSREG_DBGWCR6_EL1 SYSREG(2, 0, 0, 6, 7) +#define SYSREG_DBGBVR7_EL1 SYSREG(2, 0, 0, 7, 4) +#define SYSREG_DBGBCR7_EL1 SYSREG(2, 0, 0, 7, 5) +#define SYSREG_DBGWVR7_EL1 SYSREG(2, 0, 0, 7, 6) +#define SYSREG_DBGWCR7_EL1 SYSREG(2, 0, 0, 7, 7) +#define SYSREG_DBGBVR8_EL1 SYSREG(2, 0, 0, 8, 4) +#define SYSREG_DBGBCR8_EL1 SYSREG(2, 0, 0, 8, 5) +#define SYSREG_DBGWVR8_EL1 SYSREG(2, 0, 0, 8, 6) +#define SYSREG_DBGWCR8_EL1 SYSREG(2, 0, 0, 8, 7) +#define SYSREG_DBGBVR9_EL1 SYSREG(2, 0, 0, 9, 4) +#define SYSREG_DBGBCR9_EL1 SYSREG(2, 0, 0, 9, 5) +#define SYSREG_DBGWVR9_EL1 SYSREG(2, 0, 0, 9, 6) +#define SYSREG_DBGWCR9_EL1 SYSREG(2, 0, 0, 9, 7) +#define SYSREG_DBGBVR10_EL1 SYSREG(2, 0, 0, 10, 4) +#define SYSREG_DBGBCR10_EL1 SYSREG(2, 0, 0, 10, 5) +#define SYSREG_DBGWVR10_EL1 SYSREG(2, 0, 0, 10, 6) +#define SYSREG_DBGWCR10_EL1 SYSREG(2, 0, 0, 10, 7) +#define SYSREG_DBGBVR11_EL1 SYSREG(2, 0, 0, 11, 4) +#define SYSREG_DBGBCR11_EL1 SYSREG(2, 0, 0, 11, 5) +#define SYSREG_DBGWVR11_EL1 SYSREG(2, 0, 0, 11, 6) +#define SYSREG_DBGWCR11_EL1 SYSREG(2, 0, 0, 11, 7) +#define SYSREG_DBGBVR12_EL1 SYSREG(2, 0, 0, 12, 4) +#define SYSREG_DBGBCR12_EL1 SYSREG(2, 0, 0, 12, 5) +#define SYSREG_DBGWVR12_EL1 SYSREG(2, 0, 0, 12, 6) +#define SYSREG_DBGWCR12_EL1 SYSREG(2, 0, 0, 12, 7) +#define SYSREG_DBGBVR13_EL1 SYSREG(2, 0, 0, 13, 4) +#define SYSREG_DBGBCR13_EL1 SYSREG(2, 0, 0, 13, 5) +#define SYSREG_DBGWVR13_EL1 SYSREG(2, 0, 0, 13, 6) +#define SYSREG_DBGWCR13_EL1 SYSREG(2, 0, 0, 13, 7) +#define SYSREG_DBGBVR14_EL1 SYSREG(2, 0, 0, 14, 4) +#define SYSREG_DBGBCR14_EL1 SYSREG(2, 0, 0, 14, 5) +#define SYSREG_DBGWVR14_EL1 SYSREG(2, 0, 0, 14, 6) +#define SYSREG_DBGWCR14_EL1 SYSREG(2, 0, 0, 14, 7) +#define SYSREG_DBGBVR15_EL1 SYSREG(2, 0, 0, 15, 4) +#define SYSREG_DBGBCR15_EL1 SYSREG(2, 0, 0, 15, 5) +#define SYSREG_DBGWVR15_EL1 SYSREG(2, 0, 0, 15, 6) +#define SYSREG_DBGWCR15_EL1 SYSREG(2, 0, 0, 15, 7) + #define WFX_IS_WFE (1 << 0) #define TMR_CTL_ENABLE (1 << 0) @@ -314,29 +549,29 @@ int hvf_get_registers(CPUState *cpu) int i; for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) { - ret = hv_vcpu_get_reg(cpu->hvf->fd, hvf_reg_match[i].reg, &val); + ret = hv_vcpu_get_reg(cpu->accel->fd, hvf_reg_match[i].reg, &val); *(uint64_t *)((void *)env + hvf_reg_match[i].offset) = val; assert_hvf_ok(ret); } for (i = 0; i < ARRAY_SIZE(hvf_fpreg_match); i++) { - ret = hv_vcpu_get_simd_fp_reg(cpu->hvf->fd, hvf_fpreg_match[i].reg, + ret = hv_vcpu_get_simd_fp_reg(cpu->accel->fd, hvf_fpreg_match[i].reg, &fpval); memcpy((void *)env + hvf_fpreg_match[i].offset, &fpval, sizeof(fpval)); assert_hvf_ok(ret); } val = 0; - ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_FPCR, &val); + ret = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_FPCR, &val); assert_hvf_ok(ret); vfp_set_fpcr(env, val); val = 0; - ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_FPSR, &val); + ret = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_FPSR, &val); assert_hvf_ok(ret); vfp_set_fpsr(env, val); - ret = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_CPSR, &val); + ret = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_CPSR, &val); assert_hvf_ok(ret); pstate_write(env, val); @@ -345,7 +580,93 @@ int hvf_get_registers(CPUState *cpu) continue; } - ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, hvf_sreg_match[i].reg, &val); + if (cpu->accel->guest_debug_enabled) { + /* Handle debug registers */ + switch (hvf_sreg_match[i].reg) { + case HV_SYS_REG_DBGBVR0_EL1: + case HV_SYS_REG_DBGBCR0_EL1: + case HV_SYS_REG_DBGWVR0_EL1: + case HV_SYS_REG_DBGWCR0_EL1: + case HV_SYS_REG_DBGBVR1_EL1: + case HV_SYS_REG_DBGBCR1_EL1: + case HV_SYS_REG_DBGWVR1_EL1: + case HV_SYS_REG_DBGWCR1_EL1: + case HV_SYS_REG_DBGBVR2_EL1: + case HV_SYS_REG_DBGBCR2_EL1: + case HV_SYS_REG_DBGWVR2_EL1: + case HV_SYS_REG_DBGWCR2_EL1: + case HV_SYS_REG_DBGBVR3_EL1: + case HV_SYS_REG_DBGBCR3_EL1: + case HV_SYS_REG_DBGWVR3_EL1: + case HV_SYS_REG_DBGWCR3_EL1: + case HV_SYS_REG_DBGBVR4_EL1: + case HV_SYS_REG_DBGBCR4_EL1: + case HV_SYS_REG_DBGWVR4_EL1: + case HV_SYS_REG_DBGWCR4_EL1: + case HV_SYS_REG_DBGBVR5_EL1: + case HV_SYS_REG_DBGBCR5_EL1: + case HV_SYS_REG_DBGWVR5_EL1: + case HV_SYS_REG_DBGWCR5_EL1: + case HV_SYS_REG_DBGBVR6_EL1: + case HV_SYS_REG_DBGBCR6_EL1: + case HV_SYS_REG_DBGWVR6_EL1: + case HV_SYS_REG_DBGWCR6_EL1: + case HV_SYS_REG_DBGBVR7_EL1: + case HV_SYS_REG_DBGBCR7_EL1: + case HV_SYS_REG_DBGWVR7_EL1: + case HV_SYS_REG_DBGWCR7_EL1: + case HV_SYS_REG_DBGBVR8_EL1: + case HV_SYS_REG_DBGBCR8_EL1: + case HV_SYS_REG_DBGWVR8_EL1: + case HV_SYS_REG_DBGWCR8_EL1: + case HV_SYS_REG_DBGBVR9_EL1: + case HV_SYS_REG_DBGBCR9_EL1: + case HV_SYS_REG_DBGWVR9_EL1: + case HV_SYS_REG_DBGWCR9_EL1: + case HV_SYS_REG_DBGBVR10_EL1: + case HV_SYS_REG_DBGBCR10_EL1: + case HV_SYS_REG_DBGWVR10_EL1: + case HV_SYS_REG_DBGWCR10_EL1: + case HV_SYS_REG_DBGBVR11_EL1: + case HV_SYS_REG_DBGBCR11_EL1: + case HV_SYS_REG_DBGWVR11_EL1: + case HV_SYS_REG_DBGWCR11_EL1: + case HV_SYS_REG_DBGBVR12_EL1: + case HV_SYS_REG_DBGBCR12_EL1: + case HV_SYS_REG_DBGWVR12_EL1: + case HV_SYS_REG_DBGWCR12_EL1: + case HV_SYS_REG_DBGBVR13_EL1: + case HV_SYS_REG_DBGBCR13_EL1: + case HV_SYS_REG_DBGWVR13_EL1: + case HV_SYS_REG_DBGWCR13_EL1: + case HV_SYS_REG_DBGBVR14_EL1: + case HV_SYS_REG_DBGBCR14_EL1: + case HV_SYS_REG_DBGWVR14_EL1: + case HV_SYS_REG_DBGWCR14_EL1: + case HV_SYS_REG_DBGBVR15_EL1: + case HV_SYS_REG_DBGBCR15_EL1: + case HV_SYS_REG_DBGWVR15_EL1: + case HV_SYS_REG_DBGWCR15_EL1: { + /* + * If the guest is being debugged, the vCPU's debug registers + * are holding the gdbstub's view of the registers (set in + * hvf_arch_update_guest_debug()). + * Since the environment is used to store only the guest's view + * of the registers, don't update it with the values from the + * vCPU but simply keep the values from the previous + * environment. + */ + const ARMCPRegInfo *ri; + ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_sreg_match[i].key); + val = read_raw_cp_reg(env, ri); + + arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val; + continue; + } + } + } + + ret = hv_vcpu_get_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, &val); assert_hvf_ok(ret); arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val; @@ -368,24 +689,24 @@ int hvf_put_registers(CPUState *cpu) for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) { val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset); - ret = hv_vcpu_set_reg(cpu->hvf->fd, hvf_reg_match[i].reg, val); + ret = hv_vcpu_set_reg(cpu->accel->fd, hvf_reg_match[i].reg, val); assert_hvf_ok(ret); } for (i = 0; i < ARRAY_SIZE(hvf_fpreg_match); i++) { memcpy(&fpval, (void *)env + hvf_fpreg_match[i].offset, sizeof(fpval)); - ret = hv_vcpu_set_simd_fp_reg(cpu->hvf->fd, hvf_fpreg_match[i].reg, + ret = hv_vcpu_set_simd_fp_reg(cpu->accel->fd, hvf_fpreg_match[i].reg, fpval); assert_hvf_ok(ret); } - ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_FPCR, vfp_get_fpcr(env)); + ret = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_FPCR, vfp_get_fpcr(env)); assert_hvf_ok(ret); - ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_FPSR, vfp_get_fpsr(env)); + ret = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_FPSR, vfp_get_fpsr(env)); assert_hvf_ok(ret); - ret = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_CPSR, pstate_read(env)); + ret = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_CPSR, pstate_read(env)); assert_hvf_ok(ret); aarch64_save_sp(env, arm_current_el(env)); @@ -396,12 +717,88 @@ int hvf_put_registers(CPUState *cpu) continue; } + if (cpu->accel->guest_debug_enabled) { + /* Handle debug registers */ + switch (hvf_sreg_match[i].reg) { + case HV_SYS_REG_DBGBVR0_EL1: + case HV_SYS_REG_DBGBCR0_EL1: + case HV_SYS_REG_DBGWVR0_EL1: + case HV_SYS_REG_DBGWCR0_EL1: + case HV_SYS_REG_DBGBVR1_EL1: + case HV_SYS_REG_DBGBCR1_EL1: + case HV_SYS_REG_DBGWVR1_EL1: + case HV_SYS_REG_DBGWCR1_EL1: + case HV_SYS_REG_DBGBVR2_EL1: + case HV_SYS_REG_DBGBCR2_EL1: + case HV_SYS_REG_DBGWVR2_EL1: + case HV_SYS_REG_DBGWCR2_EL1: + case HV_SYS_REG_DBGBVR3_EL1: + case HV_SYS_REG_DBGBCR3_EL1: + case HV_SYS_REG_DBGWVR3_EL1: + case HV_SYS_REG_DBGWCR3_EL1: + case HV_SYS_REG_DBGBVR4_EL1: + case HV_SYS_REG_DBGBCR4_EL1: + case HV_SYS_REG_DBGWVR4_EL1: + case HV_SYS_REG_DBGWCR4_EL1: + case HV_SYS_REG_DBGBVR5_EL1: + case HV_SYS_REG_DBGBCR5_EL1: + case HV_SYS_REG_DBGWVR5_EL1: + case HV_SYS_REG_DBGWCR5_EL1: + case HV_SYS_REG_DBGBVR6_EL1: + case HV_SYS_REG_DBGBCR6_EL1: + case HV_SYS_REG_DBGWVR6_EL1: + case HV_SYS_REG_DBGWCR6_EL1: + case HV_SYS_REG_DBGBVR7_EL1: + case HV_SYS_REG_DBGBCR7_EL1: + case HV_SYS_REG_DBGWVR7_EL1: + case HV_SYS_REG_DBGWCR7_EL1: + case HV_SYS_REG_DBGBVR8_EL1: + case HV_SYS_REG_DBGBCR8_EL1: + case HV_SYS_REG_DBGWVR8_EL1: + case HV_SYS_REG_DBGWCR8_EL1: + case HV_SYS_REG_DBGBVR9_EL1: + case HV_SYS_REG_DBGBCR9_EL1: + case HV_SYS_REG_DBGWVR9_EL1: + case HV_SYS_REG_DBGWCR9_EL1: + case HV_SYS_REG_DBGBVR10_EL1: + case HV_SYS_REG_DBGBCR10_EL1: + case HV_SYS_REG_DBGWVR10_EL1: + case HV_SYS_REG_DBGWCR10_EL1: + case HV_SYS_REG_DBGBVR11_EL1: + case HV_SYS_REG_DBGBCR11_EL1: + case HV_SYS_REG_DBGWVR11_EL1: + case HV_SYS_REG_DBGWCR11_EL1: + case HV_SYS_REG_DBGBVR12_EL1: + case HV_SYS_REG_DBGBCR12_EL1: + case HV_SYS_REG_DBGWVR12_EL1: + case HV_SYS_REG_DBGWCR12_EL1: + case HV_SYS_REG_DBGBVR13_EL1: + case HV_SYS_REG_DBGBCR13_EL1: + case HV_SYS_REG_DBGWVR13_EL1: + case HV_SYS_REG_DBGWCR13_EL1: + case HV_SYS_REG_DBGBVR14_EL1: + case HV_SYS_REG_DBGBCR14_EL1: + case HV_SYS_REG_DBGWVR14_EL1: + case HV_SYS_REG_DBGWCR14_EL1: + case HV_SYS_REG_DBGBVR15_EL1: + case HV_SYS_REG_DBGBCR15_EL1: + case HV_SYS_REG_DBGWVR15_EL1: + case HV_SYS_REG_DBGWCR15_EL1: + /* + * If the guest is being debugged, the vCPU's debug registers + * are already holding the gdbstub's view of the registers (set + * in hvf_arch_update_guest_debug()). + */ + continue; + } + } + val = arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx]; - ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, hvf_sreg_match[i].reg, val); + ret = hv_vcpu_set_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, val); assert_hvf_ok(ret); } - ret = hv_vcpu_set_vtimer_offset(cpu->hvf->fd, hvf_state->vtimer_offset); + ret = hv_vcpu_set_vtimer_offset(cpu->accel->fd, hvf_state->vtimer_offset); assert_hvf_ok(ret); return 0; @@ -422,7 +819,7 @@ static void hvf_set_reg(CPUState *cpu, int rt, uint64_t val) flush_cpu_state(cpu); if (rt < 31) { - r = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_X0 + rt, val); + r = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_X0 + rt, val); assert_hvf_ok(r); } } @@ -435,7 +832,7 @@ static uint64_t hvf_get_reg(CPUState *cpu, int rt) flush_cpu_state(cpu); if (rt < 31) { - r = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_X0 + rt, &val); + r = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_X0 + rt, &val); assert_hvf_ok(r); } @@ -455,6 +852,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 }, { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 }, { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 }, + /* Add ID_AA64ISAR2_EL1 here when HVF supports it */ { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 }, { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 }, { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 }, @@ -540,7 +938,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) hv_return_t ret; int i; - env->aarch64 = 1; + env->aarch64 = true; asm volatile("mrs %0, cntfrq_el0" : "=r"(arm_cpu->gt_cntfrq_hz)); /* Allocate enough space for our sysreg sync */ @@ -577,22 +975,22 @@ int hvf_arch_init_vcpu(CPUState *cpu) assert(write_cpustate_to_list(arm_cpu, false)); /* Set CP_NO_RAW system registers on init */ - ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_MIDR_EL1, + ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_MIDR_EL1, arm_cpu->midr); assert_hvf_ok(ret); - ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_MPIDR_EL1, + ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_MPIDR_EL1, arm_cpu->mp_affinity); assert_hvf_ok(ret); - ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64PFR0_EL1, &pfr); + ret = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64PFR0_EL1, &pfr); assert_hvf_ok(ret); pfr |= env->gicv3state ? (1 << 24) : 0; - ret = hv_vcpu_set_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64PFR0_EL1, pfr); + ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64PFR0_EL1, pfr); assert_hvf_ok(ret); /* We're limited to underlying hardware caps, override internal versions */ - ret = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_ID_AA64MMFR0_EL1, + ret = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1, &arm_cpu->isar.id_aa64mmfr0); assert_hvf_ok(ret); @@ -602,7 +1000,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) void hvf_kick_vcpu_thread(CPUState *cpu) { cpus_kick_thread(cpu); - hv_vcpus_exit(&cpu->hvf->fd, 1); + hv_vcpus_exit(&cpu->accel->fd, 1); } static void hvf_raise_exception(CPUState *cpu, uint32_t excp, @@ -620,7 +1018,7 @@ static void hvf_raise_exception(CPUState *cpu, uint32_t excp, static void hvf_psci_cpu_off(ARMCPU *arm_cpu) { - int32_t ret = arm_set_cpu_off(arm_cpu->mp_affinity); + int32_t ret = arm_set_cpu_off(arm_cpu_mp_affinity(arm_cpu)); assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS); } @@ -649,11 +1047,11 @@ static bool hvf_handle_psci_call(CPUState *cpu) int32_t ret = 0; trace_hvf_psci_call(param[0], param[1], param[2], param[3], - arm_cpu->mp_affinity); + arm_cpu_mp_affinity(arm_cpu)); switch (param[0]) { case QEMU_PSCI_0_2_FN_PSCI_VERSION: - ret = QEMU_PSCI_0_2_RET_VERSION_0_2; + ret = QEMU_PSCI_VERSION_1_1; break; case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE: ret = QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED; /* No trusted OS */ @@ -721,6 +1119,31 @@ static bool hvf_handle_psci_call(CPUState *cpu) case QEMU_PSCI_0_2_FN_MIGRATE: ret = QEMU_PSCI_RET_NOT_SUPPORTED; break; + case QEMU_PSCI_1_0_FN_PSCI_FEATURES: + switch (param[1]) { + case QEMU_PSCI_0_2_FN_PSCI_VERSION: + case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case QEMU_PSCI_0_2_FN_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN64_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN_SYSTEM_RESET: + case QEMU_PSCI_0_2_FN_SYSTEM_OFF: + case QEMU_PSCI_0_1_FN_CPU_ON: + case QEMU_PSCI_0_2_FN_CPU_ON: + case QEMU_PSCI_0_2_FN64_CPU_ON: + case QEMU_PSCI_0_1_FN_CPU_OFF: + case QEMU_PSCI_0_2_FN_CPU_OFF: + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: + case QEMU_PSCI_1_0_FN_PSCI_FEATURES: + ret = 0; + break; + case QEMU_PSCI_0_1_FN_MIGRATE: + case QEMU_PSCI_0_2_FN_MIGRATE: + default: + ret = QEMU_PSCI_RET_NOT_SUPPORTED; + } + break; default: return false; } @@ -729,6 +1152,52 @@ static bool hvf_handle_psci_call(CPUState *cpu) return true; } +static bool is_id_sysreg(uint32_t reg) +{ + return SYSREG_OP0(reg) == 3 && + SYSREG_OP1(reg) == 0 && + SYSREG_CRN(reg) == 0 && + SYSREG_CRM(reg) >= 1 && + SYSREG_CRM(reg) < 8; +} + +static uint32_t hvf_reg2cp_reg(uint32_t reg) +{ + return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, + (reg >> SYSREG_CRN_SHIFT) & SYSREG_CRN_MASK, + (reg >> SYSREG_CRM_SHIFT) & SYSREG_CRM_MASK, + (reg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK, + (reg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK, + (reg >> SYSREG_OP2_SHIFT) & SYSREG_OP2_MASK); +} + +static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + const ARMCPRegInfo *ri; + + ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg)); + if (ri) { + if (ri->accessfn) { + if (ri->accessfn(env, ri, true) != CP_ACCESS_OK) { + return false; + } + } + if (ri->type & ARM_CP_CONST) { + *val = ri->resetvalue; + } else if (ri->readfn) { + *val = ri->readfn(env, ri); + } else { + *val = CPREG_FIELD64(env, ri); + } + trace_hvf_vgic_read(ri->name, *val); + return true; + } + + return false; +} + static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) { ARMCPU *arm_cpu = ARM_CPU(cpu); @@ -780,24 +1249,131 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) case SYSREG_OSDLR_EL1: /* Dummy register */ break; + case SYSREG_ICC_AP0R0_EL1: + case SYSREG_ICC_AP0R1_EL1: + case SYSREG_ICC_AP0R2_EL1: + case SYSREG_ICC_AP0R3_EL1: + case SYSREG_ICC_AP1R0_EL1: + case SYSREG_ICC_AP1R1_EL1: + case SYSREG_ICC_AP1R2_EL1: + case SYSREG_ICC_AP1R3_EL1: + case SYSREG_ICC_ASGI1R_EL1: + case SYSREG_ICC_BPR0_EL1: + case SYSREG_ICC_BPR1_EL1: + case SYSREG_ICC_DIR_EL1: + case SYSREG_ICC_EOIR0_EL1: + case SYSREG_ICC_EOIR1_EL1: + case SYSREG_ICC_HPPIR0_EL1: + case SYSREG_ICC_HPPIR1_EL1: + case SYSREG_ICC_IAR0_EL1: + case SYSREG_ICC_IAR1_EL1: + case SYSREG_ICC_IGRPEN0_EL1: + case SYSREG_ICC_IGRPEN1_EL1: + case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_SGI0R_EL1: + case SYSREG_ICC_SGI1R_EL1: + case SYSREG_ICC_SRE_EL1: + case SYSREG_ICC_CTLR_EL1: + /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */ + if (!hvf_sysreg_read_cp(cpu, reg, &val)) { + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + } + break; + case SYSREG_DBGBVR0_EL1: + case SYSREG_DBGBVR1_EL1: + case SYSREG_DBGBVR2_EL1: + case SYSREG_DBGBVR3_EL1: + case SYSREG_DBGBVR4_EL1: + case SYSREG_DBGBVR5_EL1: + case SYSREG_DBGBVR6_EL1: + case SYSREG_DBGBVR7_EL1: + case SYSREG_DBGBVR8_EL1: + case SYSREG_DBGBVR9_EL1: + case SYSREG_DBGBVR10_EL1: + case SYSREG_DBGBVR11_EL1: + case SYSREG_DBGBVR12_EL1: + case SYSREG_DBGBVR13_EL1: + case SYSREG_DBGBVR14_EL1: + case SYSREG_DBGBVR15_EL1: + val = env->cp15.dbgbvr[SYSREG_CRM(reg)]; + break; + case SYSREG_DBGBCR0_EL1: + case SYSREG_DBGBCR1_EL1: + case SYSREG_DBGBCR2_EL1: + case SYSREG_DBGBCR3_EL1: + case SYSREG_DBGBCR4_EL1: + case SYSREG_DBGBCR5_EL1: + case SYSREG_DBGBCR6_EL1: + case SYSREG_DBGBCR7_EL1: + case SYSREG_DBGBCR8_EL1: + case SYSREG_DBGBCR9_EL1: + case SYSREG_DBGBCR10_EL1: + case SYSREG_DBGBCR11_EL1: + case SYSREG_DBGBCR12_EL1: + case SYSREG_DBGBCR13_EL1: + case SYSREG_DBGBCR14_EL1: + case SYSREG_DBGBCR15_EL1: + val = env->cp15.dbgbcr[SYSREG_CRM(reg)]; + break; + case SYSREG_DBGWVR0_EL1: + case SYSREG_DBGWVR1_EL1: + case SYSREG_DBGWVR2_EL1: + case SYSREG_DBGWVR3_EL1: + case SYSREG_DBGWVR4_EL1: + case SYSREG_DBGWVR5_EL1: + case SYSREG_DBGWVR6_EL1: + case SYSREG_DBGWVR7_EL1: + case SYSREG_DBGWVR8_EL1: + case SYSREG_DBGWVR9_EL1: + case SYSREG_DBGWVR10_EL1: + case SYSREG_DBGWVR11_EL1: + case SYSREG_DBGWVR12_EL1: + case SYSREG_DBGWVR13_EL1: + case SYSREG_DBGWVR14_EL1: + case SYSREG_DBGWVR15_EL1: + val = env->cp15.dbgwvr[SYSREG_CRM(reg)]; + break; + case SYSREG_DBGWCR0_EL1: + case SYSREG_DBGWCR1_EL1: + case SYSREG_DBGWCR2_EL1: + case SYSREG_DBGWCR3_EL1: + case SYSREG_DBGWCR4_EL1: + case SYSREG_DBGWCR5_EL1: + case SYSREG_DBGWCR6_EL1: + case SYSREG_DBGWCR7_EL1: + case SYSREG_DBGWCR8_EL1: + case SYSREG_DBGWCR9_EL1: + case SYSREG_DBGWCR10_EL1: + case SYSREG_DBGWCR11_EL1: + case SYSREG_DBGWCR12_EL1: + case SYSREG_DBGWCR13_EL1: + case SYSREG_DBGWCR14_EL1: + case SYSREG_DBGWCR15_EL1: + val = env->cp15.dbgwcr[SYSREG_CRM(reg)]; + break; default: + if (is_id_sysreg(reg)) { + /* ID system registers read as RES0 */ + val = 0; + break; + } cpu_synchronize_state(cpu); trace_hvf_unhandled_sysreg_read(env->pc, reg, - (reg >> 20) & 0x3, - (reg >> 14) & 0x7, - (reg >> 10) & 0xf, - (reg >> 1) & 0xf, - (reg >> 17) & 0x7); + SYSREG_OP0(reg), + SYSREG_OP1(reg), + SYSREG_CRN(reg), + SYSREG_CRM(reg), + SYSREG_OP2(reg)); hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); return 1; } trace_hvf_sysreg_read(reg, - (reg >> 20) & 0x3, - (reg >> 14) & 0x7, - (reg >> 10) & 0xf, - (reg >> 1) & 0xf, - (reg >> 17) & 0x7, + SYSREG_OP0(reg), + SYSREG_OP1(reg), + SYSREG_CRN(reg), + SYSREG_CRM(reg), + SYSREG_OP2(reg), val); hvf_set_reg(cpu, rt, val); @@ -880,17 +1456,44 @@ static void pmswinc_write(CPUARMState *env, uint64_t value) } } +static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + const ARMCPRegInfo *ri; + + ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg)); + + if (ri) { + if (ri->accessfn) { + if (ri->accessfn(env, ri, false) != CP_ACCESS_OK) { + return false; + } + } + if (ri->writefn) { + ri->writefn(env, ri, val); + } else { + CPREG_FIELD64(env, ri) = val; + } + + trace_hvf_vgic_write(ri->name, val); + return true; + } + + return false; +} + static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; trace_hvf_sysreg_write(reg, - (reg >> 20) & 0x3, - (reg >> 14) & 0x7, - (reg >> 10) & 0xf, - (reg >> 1) & 0xf, - (reg >> 17) & 0x7, + SYSREG_OP0(reg), + SYSREG_OP1(reg), + SYSREG_CRN(reg), + SYSREG_CRM(reg), + SYSREG_OP2(reg), val); switch (reg) { @@ -914,8 +1517,8 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) } } - env->cp15.c9_pmcr &= ~PMCR_WRITEABLE_MASK; - env->cp15.c9_pmcr |= (val & PMCR_WRITEABLE_MASK); + env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK; + env->cp15.c9_pmcr |= (val & PMCR_WRITABLE_MASK); pmu_op_finish(env); break; @@ -957,14 +1560,119 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_OSDLR_EL1: /* Dummy register */ break; + case SYSREG_ICC_AP0R0_EL1: + case SYSREG_ICC_AP0R1_EL1: + case SYSREG_ICC_AP0R2_EL1: + case SYSREG_ICC_AP0R3_EL1: + case SYSREG_ICC_AP1R0_EL1: + case SYSREG_ICC_AP1R1_EL1: + case SYSREG_ICC_AP1R2_EL1: + case SYSREG_ICC_AP1R3_EL1: + case SYSREG_ICC_ASGI1R_EL1: + case SYSREG_ICC_BPR0_EL1: + case SYSREG_ICC_BPR1_EL1: + case SYSREG_ICC_CTLR_EL1: + case SYSREG_ICC_DIR_EL1: + case SYSREG_ICC_EOIR0_EL1: + case SYSREG_ICC_EOIR1_EL1: + case SYSREG_ICC_HPPIR0_EL1: + case SYSREG_ICC_HPPIR1_EL1: + case SYSREG_ICC_IAR0_EL1: + case SYSREG_ICC_IAR1_EL1: + case SYSREG_ICC_IGRPEN0_EL1: + case SYSREG_ICC_IGRPEN1_EL1: + case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_SGI0R_EL1: + case SYSREG_ICC_SGI1R_EL1: + case SYSREG_ICC_SRE_EL1: + /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */ + if (!hvf_sysreg_write_cp(cpu, reg, val)) { + hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + } + break; + case SYSREG_MDSCR_EL1: + env->cp15.mdscr_el1 = val; + break; + case SYSREG_DBGBVR0_EL1: + case SYSREG_DBGBVR1_EL1: + case SYSREG_DBGBVR2_EL1: + case SYSREG_DBGBVR3_EL1: + case SYSREG_DBGBVR4_EL1: + case SYSREG_DBGBVR5_EL1: + case SYSREG_DBGBVR6_EL1: + case SYSREG_DBGBVR7_EL1: + case SYSREG_DBGBVR8_EL1: + case SYSREG_DBGBVR9_EL1: + case SYSREG_DBGBVR10_EL1: + case SYSREG_DBGBVR11_EL1: + case SYSREG_DBGBVR12_EL1: + case SYSREG_DBGBVR13_EL1: + case SYSREG_DBGBVR14_EL1: + case SYSREG_DBGBVR15_EL1: + env->cp15.dbgbvr[SYSREG_CRM(reg)] = val; + break; + case SYSREG_DBGBCR0_EL1: + case SYSREG_DBGBCR1_EL1: + case SYSREG_DBGBCR2_EL1: + case SYSREG_DBGBCR3_EL1: + case SYSREG_DBGBCR4_EL1: + case SYSREG_DBGBCR5_EL1: + case SYSREG_DBGBCR6_EL1: + case SYSREG_DBGBCR7_EL1: + case SYSREG_DBGBCR8_EL1: + case SYSREG_DBGBCR9_EL1: + case SYSREG_DBGBCR10_EL1: + case SYSREG_DBGBCR11_EL1: + case SYSREG_DBGBCR12_EL1: + case SYSREG_DBGBCR13_EL1: + case SYSREG_DBGBCR14_EL1: + case SYSREG_DBGBCR15_EL1: + env->cp15.dbgbcr[SYSREG_CRM(reg)] = val; + break; + case SYSREG_DBGWVR0_EL1: + case SYSREG_DBGWVR1_EL1: + case SYSREG_DBGWVR2_EL1: + case SYSREG_DBGWVR3_EL1: + case SYSREG_DBGWVR4_EL1: + case SYSREG_DBGWVR5_EL1: + case SYSREG_DBGWVR6_EL1: + case SYSREG_DBGWVR7_EL1: + case SYSREG_DBGWVR8_EL1: + case SYSREG_DBGWVR9_EL1: + case SYSREG_DBGWVR10_EL1: + case SYSREG_DBGWVR11_EL1: + case SYSREG_DBGWVR12_EL1: + case SYSREG_DBGWVR13_EL1: + case SYSREG_DBGWVR14_EL1: + case SYSREG_DBGWVR15_EL1: + env->cp15.dbgwvr[SYSREG_CRM(reg)] = val; + break; + case SYSREG_DBGWCR0_EL1: + case SYSREG_DBGWCR1_EL1: + case SYSREG_DBGWCR2_EL1: + case SYSREG_DBGWCR3_EL1: + case SYSREG_DBGWCR4_EL1: + case SYSREG_DBGWCR5_EL1: + case SYSREG_DBGWCR6_EL1: + case SYSREG_DBGWCR7_EL1: + case SYSREG_DBGWCR8_EL1: + case SYSREG_DBGWCR9_EL1: + case SYSREG_DBGWCR10_EL1: + case SYSREG_DBGWCR11_EL1: + case SYSREG_DBGWCR12_EL1: + case SYSREG_DBGWCR13_EL1: + case SYSREG_DBGWCR14_EL1: + case SYSREG_DBGWCR15_EL1: + env->cp15.dbgwcr[SYSREG_CRM(reg)] = val; + break; default: cpu_synchronize_state(cpu); trace_hvf_unhandled_sysreg_write(env->pc, reg, - (reg >> 20) & 0x3, - (reg >> 14) & 0x7, - (reg >> 10) & 0xf, - (reg >> 1) & 0xf, - (reg >> 17) & 0x7); + SYSREG_OP0(reg), + SYSREG_OP1(reg), + SYSREG_CRN(reg), + SYSREG_CRM(reg), + SYSREG_OP2(reg)); hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); return 1; } @@ -976,13 +1684,13 @@ static int hvf_inject_interrupts(CPUState *cpu) { if (cpu->interrupt_request & CPU_INTERRUPT_FIQ) { trace_hvf_inject_fiq(); - hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_FIQ, + hv_vcpu_set_pending_interrupt(cpu->accel->fd, HV_INTERRUPT_TYPE_FIQ, true); } if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { trace_hvf_inject_irq(); - hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_IRQ, + hv_vcpu_set_pending_interrupt(cpu->accel->fd, HV_INTERRUPT_TYPE_IRQ, true); } @@ -1014,10 +1722,10 @@ static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts) * Use pselect to sleep so that other threads can IPI us while we're * sleeping. */ - qatomic_mb_set(&cpu->thread_kicked, false); - qemu_mutex_unlock_iothread(); - pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask); - qemu_mutex_lock_iothread(); + qatomic_set_mb(&cpu->thread_kicked, false); + bql_unlock(); + pselect(0, 0, 0, 0, ts, &cpu->accel->unblock_ipi_mask); + bql_lock(); } static void hvf_wfi(CPUState *cpu) @@ -1037,7 +1745,7 @@ static void hvf_wfi(CPUState *cpu) return; } - r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl); + r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl); assert_hvf_ok(r); if (!(ctl & 1) || (ctl & 2)) { @@ -1046,7 +1754,7 @@ static void hvf_wfi(CPUState *cpu) return; } - r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval); + r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval); assert_hvf_ok(r); ticks_to_sleep = cval - hvf_vtimer_val(); @@ -1079,12 +1787,12 @@ static void hvf_sync_vtimer(CPUState *cpu) uint64_t ctl; bool irq_state; - if (!cpu->hvf->vtimer_masked) { + if (!cpu->accel->vtimer_masked) { /* We will get notified on vtimer changes by hvf, nothing to do */ return; } - r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl); + r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl); assert_hvf_ok(r); irq_state = (ctl & (TMR_CTL_ENABLE | TMR_CTL_IMASK | TMR_CTL_ISTATUS)) == @@ -1093,8 +1801,8 @@ static void hvf_sync_vtimer(CPUState *cpu) if (!irq_state) { /* Timer no longer asserting, we can unmask it */ - hv_vcpu_set_vtimer_mask(cpu->hvf->fd, false); - cpu->hvf->vtimer_masked = false; + hv_vcpu_set_vtimer_mask(cpu->accel->fd, false); + cpu->accel->vtimer_masked = false; } } @@ -1102,11 +1810,13 @@ int hvf_vcpu_exec(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); CPUARMState *env = &arm_cpu->env; - hv_vcpu_exit_t *hvf_exit = cpu->hvf->exit; + int ret; + hv_vcpu_exit_t *hvf_exit = cpu->accel->exit; hv_return_t r; bool advance_pc = false; - if (hvf_inject_interrupts(cpu)) { + if (!(cpu->singlestep_enabled & SSTEP_NOIRQ) && + hvf_inject_interrupts(cpu)) { return EXCP_INTERRUPT; } @@ -1116,33 +1826,77 @@ int hvf_vcpu_exec(CPUState *cpu) flush_cpu_state(cpu); - qemu_mutex_unlock_iothread(); - assert_hvf_ok(hv_vcpu_run(cpu->hvf->fd)); + bql_unlock(); + assert_hvf_ok(hv_vcpu_run(cpu->accel->fd)); /* handle VMEXIT */ uint64_t exit_reason = hvf_exit->reason; uint64_t syndrome = hvf_exit->exception.syndrome; uint32_t ec = syn_get_ec(syndrome); - qemu_mutex_lock_iothread(); + ret = 0; + bql_lock(); switch (exit_reason) { case HV_EXIT_REASON_EXCEPTION: /* This is the main one, handle below. */ break; case HV_EXIT_REASON_VTIMER_ACTIVATED: qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1); - cpu->hvf->vtimer_masked = true; + cpu->accel->vtimer_masked = true; return 0; case HV_EXIT_REASON_CANCELED: /* we got kicked, no exit to process */ return 0; default: - assert(0); + g_assert_not_reached(); } hvf_sync_vtimer(cpu); switch (ec) { + case EC_SOFTWARESTEP: { + ret = EXCP_DEBUG; + + if (!cpu->singlestep_enabled) { + error_report("EC_SOFTWARESTEP but single-stepping not enabled"); + } + break; + } + case EC_AA64_BKPT: { + ret = EXCP_DEBUG; + + cpu_synchronize_state(cpu); + + if (!hvf_find_sw_breakpoint(cpu, env->pc)) { + /* Re-inject into the guest */ + ret = 0; + hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0)); + } + break; + } + case EC_BREAKPOINT: { + ret = EXCP_DEBUG; + + cpu_synchronize_state(cpu); + + if (!find_hw_breakpoint(cpu, env->pc)) { + error_report("EC_BREAKPOINT but unknown hw breakpoint"); + } + break; + } + case EC_WATCHPOINT: { + ret = EXCP_DEBUG; + + cpu_synchronize_state(cpu); + + CPUWatchpoint *wp = + find_hw_watchpoint(cpu, hvf_exit->exception.virtual_address); + if (!wp) { + error_report("EXCP_DEBUG but unknown hw watchpoint"); + } + cpu->watchpoint_hit = wp; + break; + } case EC_DATAABORT: { bool isv = syndrome & ARM_EL_ISV; bool iswrite = (syndrome >> 6) & 1; @@ -1150,12 +1904,19 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t sas = (syndrome >> 22) & 3; uint32_t len = 1 << sas; uint32_t srt = (syndrome >> 16) & 0x1f; + uint32_t cm = (syndrome >> 8) & 0x1; uint64_t val = 0; trace_hvf_data_abort(env->pc, hvf_exit->exception.virtual_address, hvf_exit->exception.physical_address, isv, iswrite, s1ptw, len, srt); + if (cm) { + /* We don't cache MMIO regions */ + advance_pc = true; + break; + } + assert(isv); if (iswrite) { @@ -1178,16 +1939,16 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t rt = (syndrome >> 5) & 0x1f; uint32_t reg = syndrome & SYSREG_MASK; uint64_t val; - int ret = 0; + int sysreg_ret = 0; if (isread) { - ret = hvf_sysreg_read(cpu, reg, rt); + sysreg_ret = hvf_sysreg_read(cpu, reg, rt); } else { val = hvf_get_reg(cpu, rt); - ret = hvf_sysreg_write(cpu, reg, val); + sysreg_ret = hvf_sysreg_write(cpu, reg, val); } - advance_pc = !ret; + advance_pc = !sysreg_ret; break; } case EC_WFX_TRAP: @@ -1235,21 +1996,26 @@ int hvf_vcpu_exec(CPUState *cpu) flush_cpu_state(cpu); - r = hv_vcpu_get_reg(cpu->hvf->fd, HV_REG_PC, &pc); + r = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_PC, &pc); assert_hvf_ok(r); pc += 4; - r = hv_vcpu_set_reg(cpu->hvf->fd, HV_REG_PC, pc); + r = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_PC, pc); assert_hvf_ok(r); + + /* Handle single-stepping over instructions which trigger a VM exit */ + if (cpu->singlestep_enabled) { + ret = EXCP_DEBUG; + } } - return 0; + return ret; } static const VMStateDescription vmstate_hvf_vtimer = { .name = "hvf-vtimer", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(vtimer_val, HVFVTimer), VMSTATE_END_OF_LIST() }, @@ -1274,5 +2040,213 @@ int hvf_arch_init(void) hvf_state->vtimer_offset = mach_absolute_time(); vmstate_register(NULL, 0, &vmstate_hvf_vtimer, &vtimer); qemu_add_vm_change_state_handler(hvf_vm_state_change, &vtimer); + + hvf_arm_init_debug(); + + return 0; +} + +static const uint32_t brk_insn = 0xd4200000; + +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || + cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { + return -EINVAL; + } + return 0; +} + +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + static uint32_t brk; + + if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&brk, 4, 0) || + brk != brk_insn || + cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { + return -EINVAL; + } return 0; } + +int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + switch (type) { + case GDB_BREAKPOINT_HW: + return insert_hw_breakpoint(addr); + case GDB_WATCHPOINT_READ: + case GDB_WATCHPOINT_WRITE: + case GDB_WATCHPOINT_ACCESS: + return insert_hw_watchpoint(addr, len, type); + default: + return -ENOSYS; + } +} + +int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + switch (type) { + case GDB_BREAKPOINT_HW: + return delete_hw_breakpoint(addr); + case GDB_WATCHPOINT_READ: + case GDB_WATCHPOINT_WRITE: + case GDB_WATCHPOINT_ACCESS: + return delete_hw_watchpoint(addr, len, type); + default: + return -ENOSYS; + } +} + +void hvf_arch_remove_all_hw_breakpoints(void) +{ + if (cur_hw_wps > 0) { + g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); + } + if (cur_hw_bps > 0) { + g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); + } +} + +/* + * Update the vCPU with the gdbstub's view of debug registers. This view + * consists of all hardware breakpoints and watchpoints inserted so far while + * debugging the guest. + */ +static void hvf_put_gdbstub_debug_registers(CPUState *cpu) +{ + hv_return_t r = HV_SUCCESS; + int i; + + for (i = 0; i < cur_hw_bps; i++) { + HWBreakpoint *bp = get_hw_bp(i); + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbcr_regs[i], bp->bcr); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbvr_regs[i], bp->bvr); + assert_hvf_ok(r); + } + for (i = cur_hw_bps; i < max_hw_bps; i++) { + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbcr_regs[i], 0); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbvr_regs[i], 0); + assert_hvf_ok(r); + } + + for (i = 0; i < cur_hw_wps; i++) { + HWWatchpoint *wp = get_hw_wp(i); + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwcr_regs[i], wp->wcr); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwvr_regs[i], wp->wvr); + assert_hvf_ok(r); + } + for (i = cur_hw_wps; i < max_hw_wps; i++) { + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwcr_regs[i], 0); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwvr_regs[i], 0); + assert_hvf_ok(r); + } +} + +/* + * Update the vCPU with the guest's view of debug registers. This view is kept + * in the environment at all times. + */ +static void hvf_put_guest_debug_registers(CPUState *cpu) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + hv_return_t r = HV_SUCCESS; + int i; + + for (i = 0; i < max_hw_bps; i++) { + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbcr_regs[i], + env->cp15.dbgbcr[i]); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbvr_regs[i], + env->cp15.dbgbvr[i]); + assert_hvf_ok(r); + } + + for (i = 0; i < max_hw_wps; i++) { + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwcr_regs[i], + env->cp15.dbgwcr[i]); + assert_hvf_ok(r); + r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwvr_regs[i], + env->cp15.dbgwvr[i]); + assert_hvf_ok(r); + } +} + +static inline bool hvf_arm_hw_debug_active(CPUState *cpu) +{ + return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); +} + +static void hvf_arch_set_traps(void) +{ + CPUState *cpu; + bool should_enable_traps = false; + hv_return_t r = HV_SUCCESS; + + /* Check whether guest debugging is enabled for at least one vCPU; if it + * is, enable exiting the guest on all vCPUs */ + CPU_FOREACH(cpu) { + should_enable_traps |= cpu->accel->guest_debug_enabled; + } + CPU_FOREACH(cpu) { + /* Set whether debug exceptions exit the guest */ + r = hv_vcpu_set_trap_debug_exceptions(cpu->accel->fd, + should_enable_traps); + assert_hvf_ok(r); + + /* Set whether accesses to debug registers exit the guest */ + r = hv_vcpu_set_trap_debug_reg_accesses(cpu->accel->fd, + should_enable_traps); + assert_hvf_ok(r); + } +} + +void hvf_arch_update_guest_debug(CPUState *cpu) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + CPUARMState *env = &arm_cpu->env; + + /* Check whether guest debugging is enabled */ + cpu->accel->guest_debug_enabled = cpu->singlestep_enabled || + hvf_sw_breakpoints_active(cpu) || + hvf_arm_hw_debug_active(cpu); + + /* Update debug registers */ + if (cpu->accel->guest_debug_enabled) { + hvf_put_gdbstub_debug_registers(cpu); + } else { + hvf_put_guest_debug_registers(cpu); + } + + cpu_synchronize_state(cpu); + + /* Enable/disable single-stepping */ + if (cpu->singlestep_enabled) { + env->cp15.mdscr_el1 = + deposit64(env->cp15.mdscr_el1, MDSCR_EL1_SS_SHIFT, 1, 1); + pstate_write(env, pstate_read(env) | PSTATE_SS); + } else { + env->cp15.mdscr_el1 = + deposit64(env->cp15.mdscr_el1, MDSCR_EL1_SS_SHIFT, 1, 0); + } + + /* Enable/disable Breakpoint exceptions */ + if (hvf_arm_hw_debug_active(cpu)) { + env->cp15.mdscr_el1 = + deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 1); + } else { + env->cp15.mdscr_el1 = + deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 0); + } + + hvf_arch_set_traps(); +} + +bool hvf_arch_supports_guest_debug(void) +{ + return true; +} diff --git a/target/arm/hvf/meson.build b/target/arm/hvf/meson.build index 855e6cce5a..afc509a470 100644 --- a/target/arm/hvf/meson.build +++ b/target/arm/hvf/meson.build @@ -1,3 +1,3 @@ -arm_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files( +arm_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files( 'hvf.c', )) diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events index 820e8e0297..4fbbe4b45e 100644 --- a/target/arm/hvf/trace-events +++ b/target/arm/hvf/trace-events @@ -9,3 +9,5 @@ hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64 hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64 hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]" hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x" +hvf_vgic_write(const char *name, uint64_t val) "vgic write to %s [val=0x%016"PRIx64"]" +hvf_vgic_read(const char *name, uint64_t val) "vgic read from %s [val=0x%016"PRIx64"]" diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h index ea238cff83..e848c1d27d 100644 --- a/target/arm/hvf_arm.h +++ b/target/arm/hvf_arm.h @@ -13,6 +13,13 @@ #include "cpu.h" -void hvf_arm_set_cpu_features_from_host(struct ARMCPU *cpu); +/** + * hvf_arm_init_debug() - initialize guest debug capabilities + * + * Should be called only once before using guest debug capabilities. + */ +void hvf_arm_init_debug(void); + +void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu); #endif diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c new file mode 100644 index 0000000000..ebde2899cd --- /dev/null +++ b/target/arm/hyp_gdbstub.c @@ -0,0 +1,253 @@ +/* + * ARM implementation of KVM and HVF hooks, 64 bit specific code + * + * Copyright Mian-M. Hamayun 2013, Virtual Open Systems + * Copyright Alex Bennée 2014, Linaro + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "exec/gdbstub.h" + +/* Maximum and current break/watch point counts */ +int max_hw_bps, max_hw_wps; +GArray *hw_breakpoints, *hw_watchpoints; + +/** + * insert_hw_breakpoint() + * @addr: address of breakpoint + * + * See ARM ARM D2.9.1 for details but here we are only going to create + * simple un-linked breakpoints (i.e. we don't chain breakpoints + * together to match address and context or vmid). The hardware is + * capable of fancier matching but that will require exposing that + * fanciness to GDB's interface + * + * DBGBCR<n>_EL1, Debug Breakpoint Control Registers + * + * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 + * +------+------+-------+-----+----+------+-----+------+-----+---+ + * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | + * +------+------+-------+-----+----+------+-----+------+-----+---+ + * + * BT: Breakpoint type (0 = unlinked address match) + * LBN: Linked BP number (0 = unused) + * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) + * BAS: Byte Address Select (RES1 for AArch64) + * E: Enable bit + * + * DBGBVR<n>_EL1, Debug Breakpoint Value Registers + * + * 63 53 52 49 48 2 1 0 + * +------+-----------+----------+-----+ + * | RESS | VA[52:49] | VA[48:2] | 0 0 | + * +------+-----------+----------+-----+ + * + * Depending on the addressing mode bits the top bits of the register + * are a sign extension of the highest applicable VA bit. Some + * versions of GDB don't do it correctly so we ensure they are correct + * here so future PC comparisons will work properly. + */ + +int insert_hw_breakpoint(target_ulong addr) +{ + HWBreakpoint brk = { + .bcr = 0x1, /* BCR E=1, enable */ + .bvr = sextract64(addr, 0, 53) + }; + + if (cur_hw_bps >= max_hw_bps) { + return -ENOBUFS; + } + + brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ + brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ + + g_array_append_val(hw_breakpoints, brk); + + return 0; +} + +/** + * delete_hw_breakpoint() + * @pc: address of breakpoint + * + * Delete a breakpoint and shuffle any above down + */ + +int delete_hw_breakpoint(target_ulong pc) +{ + int i; + for (i = 0; i < hw_breakpoints->len; i++) { + HWBreakpoint *brk = get_hw_bp(i); + if (brk->bvr == pc) { + g_array_remove_index(hw_breakpoints, i); + return 0; + } + } + return -ENOENT; +} + +/** + * insert_hw_watchpoint() + * @addr: address of watch point + * @len: size of area + * @type: type of watch point + * + * See ARM ARM D2.10. As with the breakpoints we can do some advanced + * stuff if we want to. The watch points can be linked with the break + * points above to make them context aware. However for simplicity + * currently we only deal with simple read/write watch points. + * + * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers + * + * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 + * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ + * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | + * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ + * + * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) + * WT: 0 - unlinked, 1 - linked (not currently used) + * LBN: Linked BP number (not currently used) + * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) + * BAS: Byte Address Select + * LSC: Load/Store control (01: load, 10: store, 11: both) + * E: Enable + * + * The bottom 2 bits of the value register are masked. Therefore to + * break on any sizes smaller than an unaligned word you need to set + * MASK=0, BAS=bit per byte in question. For larger regions (^2) you + * need to ensure you mask the address as required and set BAS=0xff + */ + +int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type) +{ + HWWatchpoint wp = { + .wcr = R_DBGWCR_E_MASK, /* E=1, enable */ + .wvr = addr & (~0x7ULL), + .details = { .vaddr = addr, .len = len } + }; + + if (cur_hw_wps >= max_hw_wps) { + return -ENOBUFS; + } + + /* + * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, + * valid whether EL3 is implemented or not + */ + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, PAC, 3); + + switch (type) { + case GDB_WATCHPOINT_READ: + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 1); + wp.details.flags = BP_MEM_READ; + break; + case GDB_WATCHPOINT_WRITE: + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 2); + wp.details.flags = BP_MEM_WRITE; + break; + case GDB_WATCHPOINT_ACCESS: + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 3); + wp.details.flags = BP_MEM_ACCESS; + break; + default: + g_assert_not_reached(); + break; + } + if (len <= 8) { + /* we align the address and set the bits in BAS */ + int off = addr & 0x7; + int bas = (1 << len) - 1; + + wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); + } else { + /* For ranges above 8 bytes we need to be a power of 2 */ + if (is_power_of_2(len)) { + int bits = ctz64(len); + + wp.wvr &= ~((1 << bits) - 1); + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, MASK, bits); + wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, BAS, 0xff); + } else { + return -ENOBUFS; + } + } + + g_array_append_val(hw_watchpoints, wp); + return 0; +} + +bool check_watchpoint_in_range(int i, target_ulong addr) +{ + HWWatchpoint *wp = get_hw_wp(i); + uint64_t addr_top, addr_bottom = wp->wvr; + int bas = extract32(wp->wcr, 5, 8); + int mask = extract32(wp->wcr, 24, 4); + + if (mask) { + addr_top = addr_bottom + (1 << mask); + } else { + /* + * BAS must be contiguous but can offset against the base + * address in DBGWVR + */ + addr_bottom = addr_bottom + ctz32(bas); + addr_top = addr_bottom + clo32(bas); + } + + if (addr >= addr_bottom && addr <= addr_top) { + return true; + } + + return false; +} + +/** + * delete_hw_watchpoint() + * @addr: address of breakpoint + * + * Delete a breakpoint and shuffle any above down + */ + +int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type) +{ + int i; + for (i = 0; i < cur_hw_wps; i++) { + if (check_watchpoint_in_range(i, addr)) { + g_array_remove_index(hw_watchpoints, i); + return 0; + } + } + return -ENOENT; +} + +bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) +{ + int i; + + for (i = 0; i < cur_hw_bps; i++) { + HWBreakpoint *bp = get_hw_bp(i); + if (bp->bvr == pc) { + return true; + } + } + return false; +} + +CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) +{ + int i; + + for (i = 0; i < cur_hw_wps; i++) { + if (check_watchpoint_in_range(i, addr)) { + return &get_hw_wp(i)->details; + } + } + return NULL; +} diff --git a/target/arm/internals.h b/target/arm/internals.h index 9fbb364968..dd3da211a3 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -28,6 +28,7 @@ #include "hw/registerfields.h" #include "tcg/tcg-gvec-desc.h" #include "syndrome.h" +#include "cpu-features.h" /* register banks for CPU modes */ #define BANK_USRSYS 0 @@ -39,6 +40,11 @@ #define BANK_HYP 6 #define BANK_MON 7 +static inline int arm_env_mmu_index(CPUARMState *env) +{ + return EX_TBFLAG_ANY(env->hflags, MMUIDX); +} + static inline bool excp_is_internal(int excp) { /* Return true if this exception number represents a QEMU-internal @@ -81,6 +87,169 @@ FIELD(V7M_EXCRET, RES1, 7, 25) /* including the must-be-1 prefix */ */ #define FNC_RETURN_MIN_MAGIC 0xfefffffe +/* Bit definitions for DBGWCRn and DBGWCRn_EL1 */ +FIELD(DBGWCR, E, 0, 1) +FIELD(DBGWCR, PAC, 1, 2) +FIELD(DBGWCR, LSC, 3, 2) +FIELD(DBGWCR, BAS, 5, 8) +FIELD(DBGWCR, HMC, 13, 1) +FIELD(DBGWCR, SSC, 14, 2) +FIELD(DBGWCR, LBN, 16, 4) +FIELD(DBGWCR, WT, 20, 1) +FIELD(DBGWCR, MASK, 24, 5) +FIELD(DBGWCR, SSCE, 29, 1) + +#define VTCR_NSW (1u << 29) +#define VTCR_NSA (1u << 30) +#define VSTCR_SW VTCR_NSW +#define VSTCR_SA VTCR_NSA + +/* Bit definitions for CPACR (AArch32 only) */ +FIELD(CPACR, CP10, 20, 2) +FIELD(CPACR, CP11, 22, 2) +FIELD(CPACR, TRCDIS, 28, 1) /* matches CPACR_EL1.TTA */ +FIELD(CPACR, D32DIS, 30, 1) /* up to v7; RAZ in v8 */ +FIELD(CPACR, ASEDIS, 31, 1) + +/* Bit definitions for CPACR_EL1 (AArch64 only) */ +FIELD(CPACR_EL1, ZEN, 16, 2) +FIELD(CPACR_EL1, FPEN, 20, 2) +FIELD(CPACR_EL1, SMEN, 24, 2) +FIELD(CPACR_EL1, TTA, 28, 1) /* matches CPACR.TRCDIS */ + +/* Bit definitions for HCPTR (AArch32 only) */ +FIELD(HCPTR, TCP10, 10, 1) +FIELD(HCPTR, TCP11, 11, 1) +FIELD(HCPTR, TASE, 15, 1) +FIELD(HCPTR, TTA, 20, 1) +FIELD(HCPTR, TAM, 30, 1) /* matches CPTR_EL2.TAM */ +FIELD(HCPTR, TCPAC, 31, 1) /* matches CPTR_EL2.TCPAC */ + +/* Bit definitions for CPTR_EL2 (AArch64 only) */ +FIELD(CPTR_EL2, TZ, 8, 1) /* !E2H */ +FIELD(CPTR_EL2, TFP, 10, 1) /* !E2H, matches HCPTR.TCP10 */ +FIELD(CPTR_EL2, TSM, 12, 1) /* !E2H */ +FIELD(CPTR_EL2, ZEN, 16, 2) /* E2H */ +FIELD(CPTR_EL2, FPEN, 20, 2) /* E2H */ +FIELD(CPTR_EL2, SMEN, 24, 2) /* E2H */ +FIELD(CPTR_EL2, TTA, 28, 1) +FIELD(CPTR_EL2, TAM, 30, 1) /* matches HCPTR.TAM */ +FIELD(CPTR_EL2, TCPAC, 31, 1) /* matches HCPTR.TCPAC */ + +/* Bit definitions for CPTR_EL3 (AArch64 only) */ +FIELD(CPTR_EL3, EZ, 8, 1) +FIELD(CPTR_EL3, TFP, 10, 1) +FIELD(CPTR_EL3, ESM, 12, 1) +FIELD(CPTR_EL3, TTA, 20, 1) +FIELD(CPTR_EL3, TAM, 30, 1) +FIELD(CPTR_EL3, TCPAC, 31, 1) + +#define MDCR_MTPME (1U << 28) +#define MDCR_TDCC (1U << 27) +#define MDCR_HLP (1U << 26) /* MDCR_EL2 */ +#define MDCR_SCCD (1U << 23) /* MDCR_EL3 */ +#define MDCR_HCCD (1U << 23) /* MDCR_EL2 */ +#define MDCR_EPMAD (1U << 21) +#define MDCR_EDAD (1U << 20) +#define MDCR_TTRF (1U << 19) +#define MDCR_STE (1U << 18) /* MDCR_EL3 */ +#define MDCR_SPME (1U << 17) /* MDCR_EL3 */ +#define MDCR_HPMD (1U << 17) /* MDCR_EL2 */ +#define MDCR_SDD (1U << 16) +#define MDCR_SPD (3U << 14) +#define MDCR_TDRA (1U << 11) +#define MDCR_TDOSA (1U << 10) +#define MDCR_TDA (1U << 9) +#define MDCR_TDE (1U << 8) +#define MDCR_HPME (1U << 7) +#define MDCR_TPM (1U << 6) +#define MDCR_TPMCR (1U << 5) +#define MDCR_HPMN (0x1fU) + +/* Not all of the MDCR_EL3 bits are present in the 32-bit SDCR */ +#define SDCR_VALID_MASK (MDCR_MTPME | MDCR_TDCC | MDCR_SCCD | \ + MDCR_EPMAD | MDCR_EDAD | MDCR_TTRF | \ + MDCR_STE | MDCR_SPME | MDCR_SPD) + +#define TTBCR_N (7U << 0) /* TTBCR.EAE==0 */ +#define TTBCR_T0SZ (7U << 0) /* TTBCR.EAE==1 */ +#define TTBCR_PD0 (1U << 4) +#define TTBCR_PD1 (1U << 5) +#define TTBCR_EPD0 (1U << 7) +#define TTBCR_IRGN0 (3U << 8) +#define TTBCR_ORGN0 (3U << 10) +#define TTBCR_SH0 (3U << 12) +#define TTBCR_T1SZ (3U << 16) +#define TTBCR_A1 (1U << 22) +#define TTBCR_EPD1 (1U << 23) +#define TTBCR_IRGN1 (3U << 24) +#define TTBCR_ORGN1 (3U << 26) +#define TTBCR_SH1 (1U << 28) +#define TTBCR_EAE (1U << 31) + +FIELD(VTCR, T0SZ, 0, 6) +FIELD(VTCR, SL0, 6, 2) +FIELD(VTCR, IRGN0, 8, 2) +FIELD(VTCR, ORGN0, 10, 2) +FIELD(VTCR, SH0, 12, 2) +FIELD(VTCR, TG0, 14, 2) +FIELD(VTCR, PS, 16, 3) +FIELD(VTCR, VS, 19, 1) +FIELD(VTCR, HA, 21, 1) +FIELD(VTCR, HD, 22, 1) +FIELD(VTCR, HWU59, 25, 1) +FIELD(VTCR, HWU60, 26, 1) +FIELD(VTCR, HWU61, 27, 1) +FIELD(VTCR, HWU62, 28, 1) +FIELD(VTCR, NSW, 29, 1) +FIELD(VTCR, NSA, 30, 1) +FIELD(VTCR, DS, 32, 1) +FIELD(VTCR, SL2, 33, 1) + +#define HCRX_ENAS0 (1ULL << 0) +#define HCRX_ENALS (1ULL << 1) +#define HCRX_ENASR (1ULL << 2) +#define HCRX_FNXS (1ULL << 3) +#define HCRX_FGTNXS (1ULL << 4) +#define HCRX_SMPME (1ULL << 5) +#define HCRX_TALLINT (1ULL << 6) +#define HCRX_VINMI (1ULL << 7) +#define HCRX_VFNMI (1ULL << 8) +#define HCRX_CMOW (1ULL << 9) +#define HCRX_MCE2 (1ULL << 10) +#define HCRX_MSCEN (1ULL << 11) + +#define HPFAR_NS (1ULL << 63) + +#define HSTR_TTEE (1 << 16) +#define HSTR_TJDBX (1 << 17) + +/* + * Depending on the value of HCR_EL2.E2H, bits 0 and 1 + * have different bit definitions, and EL1PCTEN might be + * bit 0 or bit 10. We use _E2H1 and _E2H0 suffixes to + * disambiguate if necessary. + */ +FIELD(CNTHCTL, EL0PCTEN_E2H1, 0, 1) +FIELD(CNTHCTL, EL0VCTEN_E2H1, 1, 1) +FIELD(CNTHCTL, EL1PCTEN_E2H0, 0, 1) +FIELD(CNTHCTL, EL1PCEN_E2H0, 1, 1) +FIELD(CNTHCTL, EVNTEN, 2, 1) +FIELD(CNTHCTL, EVNTDIR, 3, 1) +FIELD(CNTHCTL, EVNTI, 4, 4) +FIELD(CNTHCTL, EL0VTEN, 8, 1) +FIELD(CNTHCTL, EL0PTEN, 9, 1) +FIELD(CNTHCTL, EL1PCTEN_E2H1, 10, 1) +FIELD(CNTHCTL, EL1PTEN, 11, 1) +FIELD(CNTHCTL, ECV, 12, 1) +FIELD(CNTHCTL, EL1TVT, 13, 1) +FIELD(CNTHCTL, EL1TVCT, 14, 1) +FIELD(CNTHCTL, EL1NVPCT, 15, 1) +FIELD(CNTHCTL, EL1NVVCT, 16, 1) +FIELD(CNTHCTL, EVNTIS, 17, 1) +FIELD(CNTHCTL, CNTVMASK, 18, 1) +FIELD(CNTHCTL, CNTPMASK, 19, 1) + /* We use a few fake FSR values for internal purposes in M profile. * M profile cores don't have A/R format FSRs, but currently our * get_phys_addr() code assumes A/R profile and reports failures via @@ -102,13 +271,13 @@ FIELD(V7M_EXCRET, RES1, 7, 25) /* including the must-be-1 prefix */ * and target exception level. This should be called from helper functions, * and never returns because we will longjump back up to the CPU main loop. */ -void QEMU_NORETURN raise_exception(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el); +G_NORETURN void raise_exception(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el); /* * Similarly, but also use unwinding to restore cpu state. */ -void QEMU_NORETURN raise_exception_ra(CPUARMState *env, uint32_t excp, +G_NORETURN void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, uint32_t target_el, uintptr_t ra); @@ -170,34 +339,39 @@ static inline int r14_bank_number(int mode) return (mode == ARM_CPU_MODE_HYP) ? BANK_USRSYS : bank_number(mode); } +void arm_cpu_register(const ARMCPUInfo *info); +void aarch64_cpu_register(const ARMCPUInfo *info); + +void register_cp_regs_for_features(ARMCPU *cpu); +void init_cpreg_list(ARMCPU *cpu); + void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); void arm_translate_init(void); +void arm_restore_state_to_opc(CPUState *cs, + const TranslationBlock *tb, + const uint64_t *data); + #ifdef CONFIG_TCG void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb); #endif /* CONFIG_TCG */ -/** - * aarch64_sve_zcr_get_valid_len: - * @cpu: cpu context - * @start_len: maximum len to consider - * - * Return the maximum supported sve vector length <= @start_len. - * Note that both @start_len and the return value are in units - * of ZCR_ELx.LEN, so the vector bit length is (x + 1) * 128. - */ -uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len); - -enum arm_fprounding { +typedef enum ARMFPRounding { FPROUNDING_TIEEVEN, FPROUNDING_POSINF, FPROUNDING_NEGINF, FPROUNDING_ZERO, FPROUNDING_TIEAWAY, FPROUNDING_ODD -}; +} ARMFPRounding; + +extern const FloatRoundMode arm_rmode_to_sf_map[6]; -int arm_rmode_to_sf(int rmode); +static inline FloatRoundMode arm_rmode_to_sf(ARMFPRounding rmode) +{ + assert((unsigned)rmode < ARRAY_SIZE(arm_rmode_to_sf_map)); + return arm_rmode_to_sf_map[rmode]; +} static inline void aarch64_save_sp(CPUARMState *env, int el) { @@ -243,24 +417,7 @@ static inline void update_spsel(CPUARMState *env, uint32_t imm) * Returns the implementation defined bit-width of physical addresses. * The ARMv8 reference manuals refer to this as PAMax(). */ -static inline unsigned int arm_pamax(ARMCPU *cpu) -{ - static const unsigned int pamax_map[] = { - [0] = 32, - [1] = 36, - [2] = 40, - [3] = 42, - [4] = 44, - [5] = 48, - }; - unsigned int parange = - FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); - - /* id_aa64mmfr0 is a read-only register so values outside of the - * supported mappings can be considered an implementation error. */ - assert(parange < ARRAY_SIZE(pamax_map)); - return pamax_map[parange]; -} +unsigned int arm_pamax(ARMCPU *cpu); /* Return true if extended addresses are enabled. * This is always the case if our translation regime is 64 bit, @@ -268,9 +425,13 @@ static inline unsigned int arm_pamax(ARMCPU *cpu) */ static inline bool extended_addresses_enabled(CPUARMState *env) { - TCR *tcr = &env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1]; + uint64_t tcr = env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1]; + if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + return true; + } return arm_el_is_aa64(env, 1) || - (arm_feature(env, ARM_FEATURE_LPAE) && (tcr->raw_tcr & TTBCR_EAE)); + (arm_feature(env, ARM_FEATURE_LPAE) && (tcr & TTBCR_EAE)); } /* Update a QEMU watchpoint based on the information the guest has set in the @@ -354,19 +515,33 @@ typedef enum ARMFaultType { ARMFault_AsyncExternal, ARMFault_Debug, ARMFault_TLBConflict, + ARMFault_UnsuppAtomicUpdate, ARMFault_Lockdown, ARMFault_Exclusive, ARMFault_ICacheMaint, ARMFault_QEMU_NSCExec, /* v8M: NS executing in S&NSC memory */ ARMFault_QEMU_SFault, /* v8M: SecureFault INVTRAN, INVEP or AUVIOL */ + ARMFault_GPCFOnWalk, + ARMFault_GPCFOnOutput, } ARMFaultType; +typedef enum ARMGPCF { + GPCF_None, + GPCF_AddressSize, + GPCF_Walk, + GPCF_EABT, + GPCF_Fail, +} ARMGPCF; + /** * ARMMMUFaultInfo: Information describing an ARM MMU Fault * @type: Type of fault + * @gpcf: Subtype of ARMFault_GPCFOn{Walk,Output}. * @level: Table walk level (for translation, access flag and permission faults) * @domain: Domain of the fault address (for non-LPAE CPUs only) * @s2addr: Address that caused a fault at stage 2 + * @paddr: physical address that caused a fault for gpc + * @paddr_space: physical address space that caused a fault for gpc * @stage2: True if we faulted at stage 2 * @s1ptw: True if we faulted at stage 2 while doing a stage 1 page-table walk * @s1ns: True if we faulted on a non-secure IPA while in secure state @@ -375,7 +550,10 @@ typedef enum ARMFaultType { typedef struct ARMMMUFaultInfo ARMMMUFaultInfo; struct ARMMMUFaultInfo { ARMFaultType type; + ARMGPCF gpcf; target_ulong s2addr; + target_ulong paddr; + ARMSecuritySpace paddr_space; int level; int domain; bool stage2; @@ -479,28 +657,51 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi) case ARMFault_None: return 0; case ARMFault_AddressSize: - fsc = fi->level & 3; + assert(fi->level >= -1 && fi->level <= 3); + if (fi->level < 0) { + fsc = 0b101001; + } else { + fsc = fi->level; + } break; case ARMFault_AccessFlag: - fsc = (fi->level & 3) | (0x2 << 2); + assert(fi->level >= 0 && fi->level <= 3); + fsc = 0b001000 | fi->level; break; case ARMFault_Permission: - fsc = (fi->level & 3) | (0x3 << 2); + assert(fi->level >= 0 && fi->level <= 3); + fsc = 0b001100 | fi->level; break; case ARMFault_Translation: - fsc = (fi->level & 3) | (0x1 << 2); + assert(fi->level >= -1 && fi->level <= 3); + if (fi->level < 0) { + fsc = 0b101011; + } else { + fsc = 0b000100 | fi->level; + } break; case ARMFault_SyncExternal: fsc = 0x10 | (fi->ea << 12); break; case ARMFault_SyncExternalOnWalk: - fsc = (fi->level & 3) | (0x5 << 2) | (fi->ea << 12); + assert(fi->level >= -1 && fi->level <= 3); + if (fi->level < 0) { + fsc = 0b010011; + } else { + fsc = 0b010100 | fi->level; + } + fsc |= fi->ea << 12; break; case ARMFault_SyncParity: fsc = 0x18; break; case ARMFault_SyncParityOnWalk: - fsc = (fi->level & 3) | (0x7 << 2); + assert(fi->level >= -1 && fi->level <= 3); + if (fi->level < 0) { + fsc = 0b011011; + } else { + fsc = 0b011100 | fi->level; + } break; case ARMFault_AsyncParity: fsc = 0x19; @@ -517,12 +718,26 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi) case ARMFault_TLBConflict: fsc = 0x30; break; + case ARMFault_UnsuppAtomicUpdate: + fsc = 0x31; + break; case ARMFault_Lockdown: fsc = 0x34; break; case ARMFault_Exclusive: fsc = 0x35; break; + case ARMFault_GPCFOnWalk: + assert(fi->level >= -1 && fi->level <= 3); + if (fi->level < 0) { + fsc = 0b100011; + } else { + fsc = 0b100100 | fi->level; + } + break; + case ARMFault_GPCFOnOutput: + fsc = 0b101000; + break; default: /* Other faults can't occur in a context that requires a * long-format status code. @@ -544,9 +759,17 @@ static inline bool arm_extabort_type(MemTxResult result) return result != MEMTX_DECODE_ERROR; } +#ifdef CONFIG_USER_ONLY +void arm_cpu_record_sigsegv(CPUState *cpu, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); +void arm_cpu_record_sigbus(CPUState *cpu, vaddr addr, + MMUAccessType access_type, uintptr_t ra); +#else bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); +#endif static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx) { @@ -570,32 +793,21 @@ static inline ARMMMUIdx core_to_aa64_mmu_idx(int mmu_idx) int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx); -/* - * Return the MMU index for a v7M CPU with all relevant information - * manually specified. - */ -ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env, - bool secstate, bool priv, bool negpri); - -/* - * Return the MMU index for a v7M CPU in the specified security and - * privilege state. - */ -ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env, - bool secstate, bool priv); - /* Return the MMU index for a v7M CPU in the specified security state */ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate); -/* Return true if the stage 1 translation regime is using LPAE format page - * tables */ +/* + * Return true if the stage 1 translation regime is using LPAE + * format page tables + */ bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx); /* Raise a data fault alignment exception for the specified virtual address */ -void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, - MMUAccessType access_type, - int mmu_idx, uintptr_t retaddr) QEMU_NORETURN; +G_NORETURN void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr); +#ifndef CONFIG_USER_ONLY /* arm_cpu_do_transaction_failed: handle a memory system error response * (eg "no device/memory present at address") by raising an external abort * exception @@ -605,6 +817,7 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, MMUAccessType access_type, int mmu_idx, MemTxAttrs attrs, MemTxResult response, uintptr_t retaddr); +#endif /* Call any registered EL change hooks */ static inline void arm_call_pre_el_change_hook(ARMCPU *cpu) @@ -629,112 +842,53 @@ static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx) case ARMMMUIdx_Stage1_E0: case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_Stage1_SE0: - case ARMMMUIdx_Stage1_SE1: - case ARMMMUIdx_Stage1_SE1_PAN: case ARMMMUIdx_E10_0: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_E20_0: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: return true; default: return false; } } -/* Return true if this address translation regime is secure */ -static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E2: - case ARMMMUIdx_Stage2: - case ARMMMUIdx_MPrivNegPri: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MPriv: - case ARMMMUIdx_MUser: - return false; - case ARMMMUIdx_SE3: - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: - case ARMMMUIdx_Stage1_SE0: - case ARMMMUIdx_Stage1_SE1: - case ARMMMUIdx_Stage1_SE1_PAN: - case ARMMMUIdx_SE2: - case ARMMMUIdx_Stage2_S: - case ARMMMUIdx_MSPrivNegPri: - case ARMMMUIdx_MSUserNegPri: - case ARMMMUIdx_MSPriv: - case ARMMMUIdx_MSUser: - return true; - default: - g_assert_not_reached(); - } -} - static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) { switch (mmu_idx) { case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_Stage1_SE1_PAN: case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_SE20_2_PAN: return true; default: return false; } } +static inline bool regime_is_stage2(ARMMMUIdx mmu_idx) +{ + return mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S; +} + /* Return the exception level which controls this address translation regime */ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) { switch (mmu_idx) { - case ARMMMUIdx_SE20_0: - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: case ARMMMUIdx_E20_0: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: case ARMMMUIdx_Stage2: case ARMMMUIdx_Stage2_S: - case ARMMMUIdx_SE2: case ARMMMUIdx_E2: return 2; - case ARMMMUIdx_SE3: + case ARMMMUIdx_E3: return 3; - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_Stage1_SE0: - return arm_el_is_aa64(env, 3) ? 1 : 3; - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: + case ARMMMUIdx_E10_0: case ARMMMUIdx_Stage1_E0: + return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_Stage1_SE1: - case ARMMMUIdx_Stage1_SE1_PAN: - case ARMMMUIdx_E10_0: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_MPrivNegPri: @@ -751,45 +905,79 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) } } -/* Return the TCR controlling this translation regime */ -static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) +static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_MUser: + case ARMMMUIdx_MSUser: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MSUserNegPri: + return true; + default: + return false; + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + g_assert_not_reached(); + } +} + +/* Return the SCTLR value which controls this address translation regime */ +static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + return env->cp15.sctlr_el[regime_el(env, mmu_idx)]; +} + +/* + * These are the fields in VTCR_EL2 which affect both the Secure stage 2 + * and the Non-Secure stage 2 translation regimes (and hence which are + * not present in VSTCR_EL2). + */ +#define VTCR_SHARED_FIELD_MASK \ + (R_VTCR_IRGN0_MASK | R_VTCR_ORGN0_MASK | R_VTCR_SH0_MASK | \ + R_VTCR_PS_MASK | R_VTCR_VS_MASK | R_VTCR_HA_MASK | R_VTCR_HD_MASK | \ + R_VTCR_DS_MASK) + +/* Return the value of the TCR controlling this translation regime */ +static inline uint64_t regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) { if (mmu_idx == ARMMMUIdx_Stage2) { - return &env->cp15.vtcr_el2; + return env->cp15.vtcr_el2; } if (mmu_idx == ARMMMUIdx_Stage2_S) { /* - * Note: Secure stage 2 nominally shares fields from VTCR_EL2, but - * those are not currently used by QEMU, so just return VSTCR_EL2. + * Secure stage 2 shares fields from VTCR_EL2. We merge those + * in with the VSTCR_EL2 value to synthesize a single VTCR_EL2 format + * value so the callers don't need to special case this. + * + * If a future architecture change defines bits in VSTCR_EL2 that + * overlap with these VTCR_EL2 fields we may need to revisit this. */ - return &env->cp15.vstcr_el2; + uint64_t v = env->cp15.vstcr_el2 & ~VTCR_SHARED_FIELD_MASK; + v |= env->cp15.vtcr_el2 & VTCR_SHARED_FIELD_MASK; + return v; } - return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; + return env->cp15.tcr_el[regime_el(env, mmu_idx)]; } -/* Return the FSR value for a debug exception (watchpoint, hardware - * breakpoint or BKPT insn) targeting the specified exception level. - */ -static inline uint32_t arm_debug_exception_fsr(CPUARMState *env) +/* Return true if the translation regime is using LPAE format page tables */ +static inline bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) { - ARMMMUFaultInfo fi = { .type = ARMFault_Debug }; - int target_el = arm_debug_target_el(env); - bool using_lpae = false; - - if (target_el == 2 || arm_el_is_aa64(env, target_el)) { - using_lpae = true; - } else { - if (arm_feature(env, ARM_FEATURE_LPAE) && - (env->cp15.tcr_el[target_el].raw_tcr & TTBCR_EAE)) { - using_lpae = true; - } + int el = regime_el(env, mmu_idx); + if (el == 2 || arm_el_is_aa64(env, el)) { + return true; } - - if (using_lpae) { - return arm_fi_to_lfsc(&fi); - } else { - return arm_fi_to_sfsc(&fi); + if (arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8)) { + return true; + } + if (arm_feature(env, ARM_FEATURE_LPAE) + && (regime_tcr(env, mmu_idx) & TTBCR_EAE)) { + return true; } + return false; } /** @@ -908,7 +1096,7 @@ static inline const char *aarch32_mode_name(uint32_t psr) * * Update the CPU_INTERRUPT_VIRQ bit in cs->interrupt_request, following * a change to either the input VIRQ line from the GIC or the HCR_EL2.VI bit. - * Must be called with the iothread lock held. + * Must be called with the BQL held. */ void arm_cpu_update_virq(ARMCPU *cpu); @@ -917,11 +1105,19 @@ void arm_cpu_update_virq(ARMCPU *cpu); * * Update the CPU_INTERRUPT_VFIQ bit in cs->interrupt_request, following * a change to either the input VFIQ line from the GIC or the HCR_EL2.VF bit. - * Must be called with the iothread lock held. + * Must be called with the BQL held. */ void arm_cpu_update_vfiq(ARMCPU *cpu); /** + * arm_cpu_update_vserr: Update CPU_INTERRUPT_VSERR bit + * + * Update the CPU_INTERRUPT_VSERR bit in cs->interrupt_request, + * following a change to the HCR_EL2.VSE bit. + */ +void arm_cpu_update_vserr(ARMCPU *cpu); + +/** * arm_mmu_idx_el: * @env: The cpu environment * @el: The EL to use. @@ -945,11 +1141,16 @@ ARMMMUIdx arm_mmu_idx(CPUARMState *env); * Return the ARMMMUIdx for the stage1 traversal for the current regime. */ #ifdef CONFIG_USER_ONLY +static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) +{ + return ARMMMUIdx_Stage1_E0; +} static inline ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) { return ARMMMUIdx_Stage1_E0; } #else +ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx); ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env); #endif @@ -966,9 +1167,6 @@ static inline bool arm_mmu_idx_is_stage1_of_2(ARMMMUIdx mmu_idx) case ARMMMUIdx_Stage1_E0: case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_Stage1_SE0: - case ARMMMUIdx_Stage1_SE1: - case ARMMMUIdx_Stage1_SE1_PAN: return true; default: return false; @@ -1035,37 +1233,70 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id) return valid; } +/* Granule size (i.e. page size) */ +typedef enum ARMGranuleSize { + /* Same order as TG0 encoding */ + Gran4K, + Gran64K, + Gran16K, + GranInvalid, +} ARMGranuleSize; + +/** + * arm_granule_bits: Return address size of the granule in bits + * + * Return the address size of the granule in bits. This corresponds + * to the pseudocode TGxGranuleBits(). + */ +static inline int arm_granule_bits(ARMGranuleSize gran) +{ + switch (gran) { + case Gran64K: + return 16; + case Gran16K: + return 14; + case Gran4K: + return 12; + default: + g_assert_not_reached(); + } +} + /* * Parameters of a given virtual address, as extracted from the * translation control register (TCR) for a given regime. */ typedef struct ARMVAParameters { unsigned tsz : 8; + unsigned ps : 3; + unsigned sh : 2; unsigned select : 1; bool tbi : 1; bool epd : 1; bool hpd : 1; - bool using16k : 1; - bool using64k : 1; + bool tsz_oob : 1; /* tsz has been clamped to legal range */ + bool ds : 1; + bool ha : 1; + bool hd : 1; + ARMGranuleSize gran : 2; } ARMVAParameters; +/** + * aa64_va_parameters: Return parameters for an AArch64 virtual address + * @env: CPU + * @va: virtual address to look up + * @mmu_idx: determines translation regime to use + * @data: true if this is a data access + * @el1_is_aa32: true if we are asking about stage 2 when EL1 is AArch32 + * (ignored if @mmu_idx is for a stage 1 regime; only affects tsz/tsz_oob) + */ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, - ARMMMUIdx mmu_idx, bool data); + ARMMMUIdx mmu_idx, bool data, + bool el1_is_aa32); -static inline int exception_target_el(CPUARMState *env) -{ - int target_el = MAX(1, arm_current_el(env)); - - /* - * No such thing as secure EL1 if EL3 is aarch32, - * so update the target EL to EL3 in this case. - */ - if (arm_is_secure(env) && !arm_el_is_aa64(env, 3) && target_el == 1) { - target_el = 3; - } - - return target_el; -} +int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx); +int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx); +int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx); /* Determine if allocation tags are available. */ static inline bool allocation_tag_access_enabled(CPUARMState *env, int el, @@ -1076,7 +1307,7 @@ static inline bool allocation_tag_access_enabled(CPUARMState *env, int el, && !(env->cp15.scr_el3 & SCR_ATA)) { return false; } - if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + if (el < 2 && arm_is_el2_enabled(env)) { uint64_t hcr = arm_hcr_el2_eff(env); if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) { return false; @@ -1101,40 +1332,81 @@ typedef struct V8M_SAttributes { void v8m_security_lookup(CPUARMState *env, uint32_t address, MMUAccessType access_type, ARMMMUIdx mmu_idx, - V8M_SAttributes *sattrs); - -bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, - MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, MemTxAttrs *txattrs, - int *prot, bool *is_subpage, - ARMMMUFaultInfo *fi, uint32_t *mregion); + bool secure, V8M_SAttributes *sattrs); /* Cacheability and shareability attributes for a memory access */ typedef struct ARMCacheAttrs { - unsigned int attrs:8; /* as in the MAIR register encoding */ + /* + * If is_s2_format is true, attrs is the S2 descriptor bits [5:2] + * Otherwise, attrs is the same as the MAIR_EL1 8-bit format + */ + unsigned int attrs:8; unsigned int shareability:2; /* as in the SH field of the VMSAv8-64 PTEs */ + bool is_s2_format:1; } ARMCacheAttrs; +/* Fields that are valid upon success. */ +typedef struct GetPhysAddrResult { + CPUTLBEntryFull f; + ARMCacheAttrs cacheattrs; +} GetPhysAddrResult; + +/** + * get_phys_addr: get the physical address for a virtual address + * @env: CPUARMState + * @address: virtual address to get physical address for + * @access_type: 0 for read, 1 for write, 2 for execute + * @mmu_idx: MMU index indicating required translation regime + * @result: set on translation success. + * @fi: set to fault info if the translation fails + * + * Find the physical address corresponding to the given virtual address, + * by doing a translation table walk on MMU based systems or using the + * MPU state on MPU based systems. + * + * Returns false if the translation was successful. Otherwise, phys_ptr, attrs, + * prot and page_size may not be filled in, and the populated fsr value provides + * information on why the translation aborted, in the format of a + * DFSR/IFSR fault register, with the following caveats: + * * we honour the short vs long DFSR format differences. + * * the WnR bit is never set (the caller must do this). + * * for PSMAv5 based systems we don't bother to return a full FSR format + * value. + */ bool get_phys_addr(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, - hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, - target_ulong *page_size, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) __attribute__((nonnull)); -void arm_log_exception(int idx); +/** + * get_phys_addr_with_space_nogpc: get the physical address for a virtual + * address + * @env: CPUARMState + * @address: virtual address to get physical address for + * @access_type: 0 for read, 1 for write, 2 for execute + * @mmu_idx: MMU index indicating required translation regime + * @space: security space for the access + * @result: set on translation success. + * @fi: set to fault info if the translation fails + * + * Similar to get_phys_addr, but use the given security space and don't perform + * a Granule Protection Check on the resulting address. + */ +bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address, + MMUAccessType access_type, + ARMMMUIdx mmu_idx, ARMSecuritySpace space, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) + __attribute__((nonnull)); -#endif /* !CONFIG_USER_ONLY */ +bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, + MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool is_secure, GetPhysAddrResult *result, + ARMMMUFaultInfo *fi, uint32_t *mregion); -/* - * The log2 of the words in the tag block, for GMID_EL1.BS. - * The is the maximum, 256 bytes, which manipulates 64-bits of tags. - */ -#define GMID_EL1_BS 6 +void arm_log_exception(CPUState *cs); -/* We associate one allocation tag per 16 bytes, the minimum. */ -#define LOG2_TAG_GRANULE 4 -#define TAG_GRANULE (1 << LOG2_TAG_GRANULE) +#endif /* !CONFIG_USER_ONLY */ /* * SVE predicates are 1/8 the size of SVE vectors, and cannot use @@ -1156,11 +1428,67 @@ FIELD(MTEDESC, MIDX, 0, 4) FIELD(MTEDESC, TBI, 4, 2) FIELD(MTEDESC, TCMA, 6, 2) FIELD(MTEDESC, WRITE, 8, 1) -FIELD(MTEDESC, SIZEM1, 9, SIMD_DATA_BITS - 9) /* size - 1 */ +FIELD(MTEDESC, ALIGN, 9, 3) +FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - SVE_MTEDESC_SHIFT - 12) /* size - 1 */ bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); +/** + * mte_mops_probe: Check where the next MTE failure is for a FEAT_MOPS operation + * @env: CPU env + * @ptr: start address of memory region (dirty pointer) + * @size: length of region (guaranteed not to cross a page boundary) + * @desc: MTEDESC descriptor word (0 means no MTE checks) + * Returns: the size of the region that can be copied without hitting + * an MTE tag failure + * + * Note that we assume that the caller has already checked the TBI + * and TCMA bits with mte_checks_needed() and an MTE check is definitely + * required. + */ +uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc); + +/** + * mte_mops_probe_rev: Check where the next MTE failure is for a FEAT_MOPS + * operation going in the reverse direction + * @env: CPU env + * @ptr: *end* address of memory region (dirty pointer) + * @size: length of region (guaranteed not to cross a page boundary) + * @desc: MTEDESC descriptor word (0 means no MTE checks) + * Returns: the size of the region that can be copied without hitting + * an MTE tag failure + * + * Note that we assume that the caller has already checked the TBI + * and TCMA bits with mte_checks_needed() and an MTE check is definitely + * required. + */ +uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc); + +/** + * mte_check_fail: Record an MTE tag check failure + * @env: CPU env + * @desc: MTEDESC descriptor word + * @dirty_ptr: Failing dirty address + * @ra: TCG retaddr + * + * This may never return (if the MTE tag checks are configured to fault). + */ +void mte_check_fail(CPUARMState *env, uint32_t desc, + uint64_t dirty_ptr, uintptr_t ra); + +/** + * mte_mops_set_tags: Set MTE tags for a portion of a FEAT_MOPS operation + * @env: CPU env + * @dirty_ptr: Start address of memory region (dirty pointer) + * @size: length of region (guaranteed not to cross page boundary) + * @desc: MTEDESC descriptor word + */ +void mte_mops_set_tags(CPUARMState *env, uint64_t dirty_ptr, uint64_t size, + uint32_t desc); + static inline int allocation_tag_from_addr(uint64_t ptr) { return extract64(ptr, 56, 4); @@ -1229,6 +1557,7 @@ enum MVEECIState { /* Definitions for the PMU registers */ #define PMCRN_MASK 0xf800 #define PMCRN_SHIFT 11 +#define PMCRLP 0x80 #define PMCRLC 0x40 #define PMCRDP 0x20 #define PMCRX 0x10 @@ -1237,10 +1566,10 @@ enum MVEECIState { #define PMCRP 0x2 #define PMCRE 0x1 /* - * Mask of PMCR bits writeable by guest (not including WO bits like C, P, + * Mask of PMCR bits writable by guest (not including WO bits like C, P, * which can be written as 1 to trigger behaviour but which stay RAZ). */ -#define PMCR_WRITEABLE_MASK (PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE) +#define PMCR_WRITABLE_MASK (PMCRLP | PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE) #define PMXEVTYPER_P 0x80000000 #define PMXEVTYPER_U 0x40000000 @@ -1261,13 +1590,153 @@ enum MVEECIState { static inline uint32_t pmu_num_counters(CPUARMState *env) { - return (env->cp15.c9_pmcr & PMCRN_MASK) >> PMCRN_SHIFT; + ARMCPU *cpu = env_archcpu(env); + + return (cpu->isar.reset_pmcr_el0 & PMCRN_MASK) >> PMCRN_SHIFT; } /* Bits allowed to be set/cleared for PMCNTEN* and PMINTEN* */ static inline uint64_t pmu_counter_mask(CPUARMState *env) { - return (1 << 31) | ((1 << pmu_num_counters(env)) - 1); + return (1ULL << 31) | ((1ULL << pmu_num_counters(env)) - 1); +} + +#ifdef TARGET_AARCH64 +GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cpu, int base_reg); +int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg); +int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg); +int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg); +int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg); +int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg); +int aarch64_gdb_set_pauth_reg(CPUState *cs, uint8_t *buf, int reg); +void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp); +void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp); +void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp); +void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp); +void aarch64_max_tcg_initfn(Object *obj); +void aarch64_add_pauth_properties(Object *obj); +void aarch64_add_sve_properties(Object *obj); +void aarch64_add_sme_properties(Object *obj); +#endif + +/* Read the CONTROL register as the MRS instruction would. */ +uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure); + +/* + * Return a pointer to the location where we currently store the + * stack pointer for the requested security state and thread mode. + * This pointer will become invalid if the CPU state is updated + * such that the stack pointers are switched around (eg changing + * the SPSEL control bit). + */ +uint32_t *arm_v7m_get_sp_ptr(CPUARMState *env, bool secure, + bool threadmode, bool spsel); + +bool el_is_in_host(CPUARMState *env, int el); + +void aa32_max_features(ARMCPU *cpu); +int exception_target_el(CPUARMState *env); +bool arm_singlestep_active(CPUARMState *env); +bool arm_generate_debug_exceptions(CPUARMState *env); + +/** + * pauth_ptr_mask: + * @param: parameters defining the MMU setup + * + * Return a mask of the address bits that contain the authentication code, + * given the MMU config defined by @param. + */ +static inline uint64_t pauth_ptr_mask(ARMVAParameters param) +{ + int bot_pac_bit = 64 - param.tsz; + int top_pac_bit = 64 - 8 * param.tbi; + + return MAKE_64BIT_MASK(bot_pac_bit, top_pac_bit - bot_pac_bit); +} + +/* Add the cpreg definitions for debug related system registers */ +void define_debug_regs(ARMCPU *cpu); + +/* Effective value of MDCR_EL2 */ +static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env) +{ + return arm_is_el2_enabled(env) ? env->cp15.mdcr_el2 : 0; } +/* Powers of 2 for sve_vq_map et al. */ +#define SVE_VQ_POW2_MAP \ + ((1 << (1 - 1)) | (1 << (2 - 1)) | \ + (1 << (4 - 1)) | (1 << (8 - 1)) | (1 << (16 - 1))) + +/* + * Return true if it is possible to take a fine-grained-trap to EL2. + */ +static inline bool arm_fgt_active(CPUARMState *env, int el) +{ + /* + * The Arm ARM only requires the "{E2H,TGE} != {1,1}" test for traps + * that can affect EL0, but it is harmless to do the test also for + * traps on registers that are only accessible at EL1 because if the test + * returns true then we can't be executing at EL1 anyway. + * FGT traps only happen when EL2 is enabled and EL1 is AArch64; + * traps from AArch32 only happen for the EL0 is AArch32 case. + */ + return cpu_isar_feature(aa64_fgt, env_archcpu(env)) && + el < 2 && arm_is_el2_enabled(env) && + arm_el_is_aa64(env, 1) && + (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && + (!arm_feature(env, ARM_FEATURE_EL3) || (env->cp15.scr_el3 & SCR_FGTEN)); +} + +void assert_hflags_rebuild_correctly(CPUARMState *env); + +/* + * Although the ARM implementation of hardware assisted debugging + * allows for different breakpoints per-core, the current GDB + * interface treats them as a global pool of registers (which seems to + * be the case for x86, ppc and s390). As a result we store one copy + * of registers which is used for all active cores. + * + * Write access is serialised by virtue of the GDB protocol which + * updates things. Read access (i.e. when the values are copied to the + * vCPU) is also gated by GDB's run control. + * + * This is not unreasonable as most of the time debugging kernels you + * never know which core will eventually execute your function. + */ + +typedef struct { + uint64_t bcr; + uint64_t bvr; +} HWBreakpoint; + +/* + * The watchpoint registers can cover more area than the requested + * watchpoint so we need to store the additional information + * somewhere. We also need to supply a CPUWatchpoint to the GDB stub + * when the watchpoint is hit. + */ +typedef struct { + uint64_t wcr; + uint64_t wvr; + CPUWatchpoint details; +} HWWatchpoint; + +/* Maximum and current break/watch point counts */ +extern int max_hw_bps, max_hw_wps; +extern GArray *hw_breakpoints, *hw_watchpoints; + +#define cur_hw_wps (hw_watchpoints->len) +#define cur_hw_bps (hw_breakpoints->len) +#define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) +#define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) + +bool find_hw_breakpoint(CPUState *cpu, target_ulong pc); +int insert_hw_breakpoint(target_ulong pc); +int delete_hw_breakpoint(target_ulong pc); + +bool check_watchpoint_in_range(int i, target_ulong addr); +CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr); +int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type); +int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type); #endif diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h index 580f1c1fee..7c6adc14f6 100644 --- a/target/arm/kvm-consts.h +++ b/target/arm/kvm-consts.h @@ -14,16 +14,16 @@ #ifndef ARM_KVM_CONSTS_H #define ARM_KVM_CONSTS_H +#ifdef NEED_CPU_H #ifdef CONFIG_KVM #include <linux/kvm.h> #include <linux/psci.h> - #define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(X != Y) +#endif +#endif -#else - +#ifndef MISMATCH_CHECK #define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(0) - #endif #define CP_REG_SIZE_SHIFT 52 @@ -77,6 +77,8 @@ MISMATCH_CHECK(QEMU_PSCI_0_1_FN_MIGRATE, KVM_PSCI_FN_MIGRATE); #define QEMU_PSCI_0_2_FN64_AFFINITY_INFO QEMU_PSCI_0_2_FN64(4) #define QEMU_PSCI_0_2_FN64_MIGRATE QEMU_PSCI_0_2_FN64(5) +#define QEMU_PSCI_1_0_FN_PSCI_FEATURES QEMU_PSCI_0_2_FN(10) + MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_SUSPEND, PSCI_0_2_FN_CPU_SUSPEND); MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_OFF, PSCI_0_2_FN_CPU_OFF); MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_ON, PSCI_0_2_FN_CPU_ON); @@ -84,18 +86,22 @@ MISMATCH_CHECK(QEMU_PSCI_0_2_FN_MIGRATE, PSCI_0_2_FN_MIGRATE); MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_SUSPEND, PSCI_0_2_FN64_CPU_SUSPEND); MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_ON, PSCI_0_2_FN64_CPU_ON); MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_MIGRATE, PSCI_0_2_FN64_MIGRATE); +MISMATCH_CHECK(QEMU_PSCI_1_0_FN_PSCI_FEATURES, PSCI_1_0_FN_PSCI_FEATURES); /* PSCI v0.2 return values used by TCG emulation of PSCI */ /* No Trusted OS migration to worry about when offlining CPUs */ #define QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED 2 -/* We implement version 0.2 only */ -#define QEMU_PSCI_0_2_RET_VERSION_0_2 2 +#define QEMU_PSCI_VERSION_0_1 0x00001 +#define QEMU_PSCI_VERSION_0_2 0x00002 +#define QEMU_PSCI_VERSION_1_0 0x10000 +#define QEMU_PSCI_VERSION_1_1 0x10001 MISMATCH_CHECK(QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED, PSCI_0_2_TOS_MP); -MISMATCH_CHECK(QEMU_PSCI_0_2_RET_VERSION_0_2, - (PSCI_VERSION_MAJOR(0) | PSCI_VERSION_MINOR(2))); +/* We don't bother to check every possible version value */ +MISMATCH_CHECK(QEMU_PSCI_VERSION_0_2, PSCI_VERSION(0, 2)); +MISMATCH_CHECK(QEMU_PSCI_VERSION_1_1, PSCI_VERSION(1, 1)); /* PSCI return values (inclusive of all PSCI versions) */ #define QEMU_PSCI_RET_SUCCESS 0 @@ -118,13 +124,10 @@ MISMATCH_CHECK(QEMU_PSCI_RET_INTERNAL_FAILURE, PSCI_RET_INTERNAL_FAILURE); MISMATCH_CHECK(QEMU_PSCI_RET_NOT_PRESENT, PSCI_RET_NOT_PRESENT); MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED); -/* Note that KVM uses overlapping values for AArch32 and AArch64 - * target CPU numbers. AArch32 targets: +/* + * Note that KVM uses overlapping values for AArch32 and AArch64 + * target CPU numbers. AArch64 targets: */ -#define QEMU_KVM_ARM_TARGET_CORTEX_A15 0 -#define QEMU_KVM_ARM_TARGET_CORTEX_A7 1 - -/* AArch64 targets: */ #define QEMU_KVM_ARM_TARGET_AEM_V8 0 #define QEMU_KVM_ARM_TARGET_FOUNDATION_V8 1 #define QEMU_KVM_ARM_TARGET_CORTEX_A57 2 diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c index 56a7099e6b..965a486b32 100644 --- a/target/arm/kvm-stub.c +++ b/target/arm/kvm-stub.c @@ -15,10 +15,10 @@ bool write_kvmstate_to_list(ARMCPU *cpu) { - abort(); + g_assert_not_reached(); } bool write_list_to_kvmstate(ARMCPU *cpu, int level) { - abort(); + g_assert_not_reached(); } diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 94b970bbf9..21ebbf3b8f 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -2,6 +2,8 @@ * ARM implementation of KVM hooks * * Copyright Christoffer Dall 2009-2010 + * Copyright Mian-M. Hamayun 2013, Virtual Open Systems + * Copyright Alex Bennée 2014, Linaro * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -13,13 +15,13 @@ #include <linux/kvm.h> -#include "qemu-common.h" #include "qemu/timer.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qom/object.h" #include "qapi/error.h" #include "sysemu/sysemu.h" +#include "sysemu/runstate.h" #include "sysemu/kvm.h" #include "sysemu/kvm_int.h" #include "kvm_arm.h" @@ -29,9 +31,14 @@ #include "hw/pci/pci.h" #include "exec/memattrs.h" #include "exec/address-spaces.h" +#include "exec/gdbstub.h" #include "hw/boards.h" #include "hw/irq.h" +#include "qapi/visitor.h" #include "qemu/log.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/ghes.h" +#include "target/arm/gtimer.h" const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO @@ -41,28 +48,54 @@ static bool cap_has_mp_state; static bool cap_has_inject_serror_esr; static bool cap_has_inject_ext_dabt; +/** + * ARMHostCPUFeatures: information about the host CPU (identified + * by asking the host kernel) + */ +typedef struct ARMHostCPUFeatures { + ARMISARegisters isar; + uint64_t features; + uint32_t target; + const char *dtb_compatible; +} ARMHostCPUFeatures; + static ARMHostCPUFeatures arm_host_cpu_features; -int kvm_arm_vcpu_init(CPUState *cs) +/** + * kvm_arm_vcpu_init: + * @cpu: ARMCPU + * + * Initialize (or reinitialize) the VCPU by invoking the + * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature + * bitmask specified in the CPUState. + * + * Returns: 0 if success else < 0 error code + */ +static int kvm_arm_vcpu_init(ARMCPU *cpu) { - ARMCPU *cpu = ARM_CPU(cs); struct kvm_vcpu_init init; init.target = cpu->kvm_target; memcpy(init.features, cpu->kvm_init_features, sizeof(init.features)); - return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init); + return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_INIT, &init); } -int kvm_arm_vcpu_finalize(CPUState *cs, int feature) -{ - return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_FINALIZE, &feature); -} - -void kvm_arm_init_serror_injection(CPUState *cs) +/** + * kvm_arm_vcpu_finalize: + * @cpu: ARMCPU + * @feature: feature to finalize + * + * Finalizes the configuration of the specified VCPU feature by + * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring + * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of + * KVM's API documentation. + * + * Returns: 0 if success else < 0 error code + */ +static int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature) { - cap_has_inject_serror_esr = kvm_check_extension(cs->kvm_state, - KVM_CAP_ARM_INJECT_SERROR_ESR); + return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_FINALIZE, &feature); } bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, @@ -80,7 +113,9 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, if (max_vm_pa_size < 0) { max_vm_pa_size = 0; } - vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); + do { + vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); + } while (vmfd == -1 && errno == EINTR); if (vmfd < 0) { goto err; } @@ -165,6 +200,260 @@ void kvm_arm_destroy_scratch_host_vcpu(int *fdarray) } } +static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) +{ + uint64_t ret; + struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; + int err; + + assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); + err = ioctl(fd, KVM_GET_ONE_REG, &idreg); + if (err < 0) { + return -1; + } + *pret = ret; + return 0; +} + +static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) +{ + struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; + + assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); + return ioctl(fd, KVM_GET_ONE_REG, &idreg); +} + +static bool kvm_arm_pauth_supported(void) +{ + return (kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_ADDRESS) && + kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC)); +} + +static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) +{ + /* Identify the feature bits corresponding to the host CPU, and + * fill out the ARMHostCPUClass fields accordingly. To do this + * we have to create a scratch VM, create a single CPU inside it, + * and then query that CPU for the relevant ID registers. + */ + int fdarray[3]; + bool sve_supported; + bool pmu_supported = false; + uint64_t features = 0; + int err; + + /* Old kernels may not know about the PREFERRED_TARGET ioctl: however + * we know these will only support creating one kind of guest CPU, + * which is its preferred CPU type. Fortunately these old kernels + * support only a very limited number of CPUs. + */ + static const uint32_t cpus_to_try[] = { + KVM_ARM_TARGET_AEM_V8, + KVM_ARM_TARGET_FOUNDATION_V8, + KVM_ARM_TARGET_CORTEX_A57, + QEMU_KVM_ARM_TARGET_NONE + }; + /* + * target = -1 informs kvm_arm_create_scratch_host_vcpu() + * to use the preferred target + */ + struct kvm_vcpu_init init = { .target = -1, }; + + /* + * Ask for SVE if supported, so that we can query ID_AA64ZFR0, + * which is otherwise RAZ. + */ + sve_supported = kvm_arm_sve_supported(); + if (sve_supported) { + init.features[0] |= 1 << KVM_ARM_VCPU_SVE; + } + + /* + * Ask for Pointer Authentication if supported, so that we get + * the unsanitized field values for AA64ISAR1_EL1. + */ + if (kvm_arm_pauth_supported()) { + init.features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | + 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); + } + + if (kvm_arm_pmu_supported()) { + init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; + pmu_supported = true; + } + + if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { + return false; + } + + ahcf->target = init.target; + ahcf->dtb_compatible = "arm,arm-v8"; + + err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, + ARM64_SYS_REG(3, 0, 0, 4, 0)); + if (unlikely(err < 0)) { + /* + * Before v4.15, the kernel only exposed a limited number of system + * registers, not including any of the interesting AArch64 ID regs. + * For the most part we could leave these fields as zero with minimal + * effect, since this does not affect the values seen by the guest. + * + * However, it could cause problems down the line for QEMU, + * so provide a minimal v8.0 default. + * + * ??? Could read MIDR and use knowledge from cpu64.c. + * ??? Could map a page of memory into our temp guest and + * run the tiniest of hand-crafted kernels to extract + * the values seen by the guest. + * ??? Either of these sounds like too much effort just + * to work around running a modern host kernel. + */ + ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ + err = 0; + } else { + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, + ARM64_SYS_REG(3, 0, 0, 4, 1)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0, + ARM64_SYS_REG(3, 0, 0, 4, 5)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, + ARM64_SYS_REG(3, 0, 0, 5, 0)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, + ARM64_SYS_REG(3, 0, 0, 5, 1)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, + ARM64_SYS_REG(3, 0, 0, 6, 0)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, + ARM64_SYS_REG(3, 0, 0, 6, 1)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2, + ARM64_SYS_REG(3, 0, 0, 6, 2)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, + ARM64_SYS_REG(3, 0, 0, 7, 0)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, + ARM64_SYS_REG(3, 0, 0, 7, 1)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, + ARM64_SYS_REG(3, 0, 0, 7, 2)); + + /* + * Note that if AArch32 support is not present in the host, + * the AArch32 sysregs are present to be read, but will + * return UNKNOWN values. This is neither better nor worse + * than skipping the reads and leaving 0, as we must avoid + * considering the values in every case. + */ + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, + ARM64_SYS_REG(3, 0, 0, 1, 0)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, + ARM64_SYS_REG(3, 0, 0, 1, 1)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, + ARM64_SYS_REG(3, 0, 0, 1, 2)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, + ARM64_SYS_REG(3, 0, 0, 1, 4)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, + ARM64_SYS_REG(3, 0, 0, 1, 5)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, + ARM64_SYS_REG(3, 0, 0, 1, 6)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, + ARM64_SYS_REG(3, 0, 0, 1, 7)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, + ARM64_SYS_REG(3, 0, 0, 2, 0)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, + ARM64_SYS_REG(3, 0, 0, 2, 1)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, + ARM64_SYS_REG(3, 0, 0, 2, 2)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, + ARM64_SYS_REG(3, 0, 0, 2, 3)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, + ARM64_SYS_REG(3, 0, 0, 2, 4)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, + ARM64_SYS_REG(3, 0, 0, 2, 5)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, + ARM64_SYS_REG(3, 0, 0, 2, 6)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, + ARM64_SYS_REG(3, 0, 0, 2, 7)); + + err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, + ARM64_SYS_REG(3, 0, 0, 3, 0)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, + ARM64_SYS_REG(3, 0, 0, 3, 1)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, + ARM64_SYS_REG(3, 0, 0, 3, 2)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, + ARM64_SYS_REG(3, 0, 0, 3, 4)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1, + ARM64_SYS_REG(3, 0, 0, 3, 5)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5, + ARM64_SYS_REG(3, 0, 0, 3, 6)); + + /* + * DBGDIDR is a bit complicated because the kernel doesn't + * provide an accessor for it in 64-bit mode, which is what this + * scratch VM is in, and there's no architected "64-bit sysreg + * which reads the same as the 32-bit register" the way there is + * for other ID registers. Instead we synthesize a value from the + * AArch64 ID_AA64DFR0, the same way the kernel code in + * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. + * We only do this if the CPU supports AArch32 at EL1. + */ + if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { + int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); + int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); + int ctx_cmps = + FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); + int version = 6; /* ARMv8 debug architecture */ + bool has_el3 = + !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); + uint32_t dbgdidr = 0; + + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); + dbgdidr |= (1 << 15); /* RES1 bit */ + ahcf->isar.dbgdidr = dbgdidr; + } + + if (pmu_supported) { + /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */ + err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0, + ARM64_SYS_REG(3, 3, 9, 12, 0)); + } + + if (sve_supported) { + /* + * There is a range of kernels between kernel commit 73433762fcae + * and f81cb2c3ad41 which have a bug where the kernel doesn't + * expose SYS_ID_AA64ZFR0_EL1 via the ONE_REG API unless the VM has + * enabled SVE support, which resulted in an error rather than RAZ. + * So only read the register if we set KVM_ARM_VCPU_SVE above. + */ + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, + ARM64_SYS_REG(3, 0, 0, 4, 4)); + } + } + + kvm_arm_destroy_scratch_host_vcpu(fdarray); + + if (err < 0) { + return false; + } + + /* + * We can assume any KVM supporting CPU is at least a v8 + * with VFPv4+Neon; this in turn implies most of the other + * feature bits. + */ + features |= 1ULL << ARM_FEATURE_V8; + features |= 1ULL << ARM_FEATURE_NEON; + features |= 1ULL << ARM_FEATURE_AARCH64; + features |= 1ULL << ARM_FEATURE_PMU; + features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; + + ahcf->features = features; + + return true; +} + void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) { CPUARMState *env = &cpu->env; @@ -208,10 +497,10 @@ static void kvm_steal_time_set(Object *obj, bool value, Error **errp) } /* KVM VCPU properties should be prefixed with "kvm-". */ -void kvm_arm_add_vcpu_properties(Object *obj) +void kvm_arm_add_vcpu_properties(ARMCPU *cpu) { - ARMCPU *cpu = ARM_CPU(obj); CPUARMState *env = &cpu->env; + Object *obj = OBJECT(cpu); if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { cpu->kvm_adjvtime = true; @@ -246,6 +535,13 @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa) return ret > 0 ? ret : 40; } +int kvm_arch_get_default_type(MachineState *ms) +{ + bool fixed_ipa; + int size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa); + return fixed_ipa ? 0 : size; +} + int kvm_arch_init(MachineState *ms, KVMState *s) { int ret = 0; @@ -262,6 +558,10 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); + /* Check whether user space can specify guest syndrome value */ + cap_has_inject_serror_esr = + kvm_check_extension(s, KVM_CAP_ARM_INJECT_SERROR_ESR); + if (ms->smp.cpus > 256 && !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { error_report("Using more than 256 vcpus requires a host kernel " @@ -279,6 +579,34 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + if (s->kvm_eager_split_size) { + uint32_t sizes; + + sizes = kvm_vm_check_extension(s, KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES); + if (!sizes) { + s->kvm_eager_split_size = 0; + warn_report("Eager Page Split support not available"); + } else if (!(s->kvm_eager_split_size & sizes)) { + error_report("Eager Page Split requested chunk size not valid"); + ret = -EINVAL; + } else { + ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0, + s->kvm_eager_split_size); + if (ret < 0) { + error_report("Enabling of Eager Page Split failed: %s", + strerror(-ret)); + } + } + } + + max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS); + hw_watchpoints = g_array_sized_new(true, true, + sizeof(HWWatchpoint), max_hw_wps); + + max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS); + hw_breakpoints = g_array_sized_new(true, true, + sizeof(HWBreakpoint), max_hw_bps); + return ret; } @@ -335,8 +663,10 @@ static void kvm_arm_devlistener_del(MemoryListener *listener, } static MemoryListener devlistener = { + .name = "kvm-arm", .region_add = kvm_arm_devlistener_add, .region_del = kvm_arm_devlistener_del, + .priority = MEMORY_LISTENER_PRIORITY_MIN, }; static void kvm_arm_set_device_addr(KVMDevice *kd) @@ -436,11 +766,36 @@ static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) return &cpu->cpreg_values[res - cpu->cpreg_indexes]; } -/* Initialize the ARMCPU cpreg list according to the kernel's +/** + * kvm_arm_reg_syncs_via_cpreg_list: + * @regidx: KVM register index + * + * Return true if this KVM register should be synchronized via the + * cpreg list of arbitrary system registers, false if it is synchronized + * by hand using code in kvm_arch_get/put_registers(). + */ +static bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) +{ + switch (regidx & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + case KVM_REG_ARM64_SVE: + return false; + default: + return true; + } +} + +/** + * kvm_arm_init_cpreg_list: + * @cpu: ARMCPU + * + * Initialize the ARMCPU cpreg list according to the kernel's * definition of what CPU registers it knows about (and throw away * the previous TCG-created cpreg list). + * + * Returns: 0 if success, else < 0 error code */ -int kvm_arm_init_cpreg_list(ARMCPU *cpu) +static int kvm_arm_init_cpreg_list(ARMCPU *cpu) { struct kvm_reg_list rl; struct kvm_reg_list *rlp; @@ -513,6 +868,28 @@ out: return ret; } +/** + * kvm_arm_cpreg_level: + * @regidx: KVM register index + * + * Return the level of this coprocessor/system register. Return value is + * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. + */ +static int kvm_arm_cpreg_level(uint64_t regidx) +{ + /* + * All system registers are assumed to be level KVM_PUT_RUNTIME_STATE. + * If a register should be written less often, you must add it here + * with a state of either KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. + */ + switch (regidx) { + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_PTIMER_CNT: + return KVM_PUT_FULL_STATE; + } + return KVM_PUT_RUNTIME_STATE; +} + bool write_kvmstate_to_list(ARMCPU *cpu) { CPUState *cs = CPU(cpu); @@ -520,27 +897,22 @@ bool write_kvmstate_to_list(ARMCPU *cpu) bool ok = true; for (i = 0; i < cpu->cpreg_array_len; i++) { - struct kvm_one_reg r; uint64_t regidx = cpu->cpreg_indexes[i]; uint32_t v32; int ret; - r.id = regidx; - switch (regidx & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U32: - r.addr = (uintptr_t)&v32; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + ret = kvm_get_one_reg(cs, regidx, &v32); if (!ret) { cpu->cpreg_values[i] = v32; } break; case KVM_REG_SIZE_U64: - r.addr = (uintptr_t)(cpu->cpreg_values + i); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + ret = kvm_get_one_reg(cs, regidx, cpu->cpreg_values + i); break; default: - abort(); + g_assert_not_reached(); } if (ret) { ok = false; @@ -556,7 +928,6 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) bool ok = true; for (i = 0; i < cpu->cpreg_array_len; i++) { - struct kvm_one_reg r; uint64_t regidx = cpu->cpreg_indexes[i]; uint32_t v32; int ret; @@ -565,19 +936,17 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) continue; } - r.id = regidx; switch (regidx & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U32: v32 = cpu->cpreg_values[i]; - r.addr = (uintptr_t)&v32; + ret = kvm_set_one_reg(cs, regidx, &v32); break; case KVM_REG_SIZE_U64: - r.addr = (uintptr_t)(cpu->cpreg_values + i); + ret = kvm_set_one_reg(cs, regidx, cpu->cpreg_values + i); break; default: - abort(); + g_assert_not_reached(); } - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); if (ret) { /* We might fail for "unknown register" and also for * "you tried to set a register which is constant with @@ -613,7 +982,7 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) /* Re-init VCPU so that all registers are set to * their respective reset values. */ - ret = kvm_arm_vcpu_init(CPU(cpu)); + ret = kvm_arm_vcpu_init(cpu); if (ret < 0) { fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); abort(); @@ -635,58 +1004,50 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) /* * Update KVM's MP_STATE based on what QEMU thinks it is */ -int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu) +static int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu) { if (cap_has_mp_state) { struct kvm_mp_state mp_state = { .mp_state = (cpu->power_state == PSCI_OFF) ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE }; - int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); - if (ret) { - fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n", - __func__, ret, strerror(-ret)); - return -1; - } + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); } - return 0; } /* * Sync the KVM MP_STATE into QEMU */ -int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) +static int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) { if (cap_has_mp_state) { struct kvm_mp_state mp_state; int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state); if (ret) { - fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n", - __func__, ret, strerror(-ret)); - abort(); + return ret; } cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ? PSCI_OFF : PSCI_ON; } - return 0; } -void kvm_arm_get_virtual_time(CPUState *cs) +/** + * kvm_arm_get_virtual_time: + * @cpu: ARMCPU + * + * Gets the VCPU's virtual counter and stores it in the KVM CPU state. + */ +static void kvm_arm_get_virtual_time(ARMCPU *cpu) { - ARMCPU *cpu = ARM_CPU(cs); - struct kvm_one_reg reg = { - .id = KVM_REG_ARM_TIMER_CNT, - .addr = (uintptr_t)&cpu->kvm_vtime, - }; int ret; if (cpu->kvm_vtime_dirty) { return; } - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(CPU(cpu), KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); if (ret) { error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); abort(); @@ -695,20 +1056,21 @@ void kvm_arm_get_virtual_time(CPUState *cs) cpu->kvm_vtime_dirty = true; } -void kvm_arm_put_virtual_time(CPUState *cs) +/** + * kvm_arm_put_virtual_time: + * @cpu: ARMCPU + * + * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. + */ +static void kvm_arm_put_virtual_time(ARMCPU *cpu) { - ARMCPU *cpu = ARM_CPU(cs); - struct kvm_one_reg reg = { - .id = KVM_REG_ARM_TIMER_CNT, - .addr = (uintptr_t)&cpu->kvm_vtime, - }; int ret; if (!cpu->kvm_vtime_dirty) { return; } - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(CPU(cpu), KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); if (ret) { error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); abort(); @@ -717,7 +1079,15 @@ void kvm_arm_put_virtual_time(CPUState *cs) cpu->kvm_vtime_dirty = false; } -int kvm_put_vcpu_events(ARMCPU *cpu) +/** + * kvm_put_vcpu_events: + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. + * + * Returns: 0 if success else < 0 error code + */ +static int kvm_put_vcpu_events(ARMCPU *cpu) { CPUARMState *env = &cpu->env; struct kvm_vcpu_events events; @@ -746,7 +1116,15 @@ int kvm_put_vcpu_events(ARMCPU *cpu) return ret; } -int kvm_get_vcpu_events(ARMCPU *cpu) +/** + * kvm_get_vcpu_events: + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. + * + * Returns: 0 if success else < 0 error code + */ +static int kvm_get_vcpu_events(ARMCPU *cpu) { CPUARMState *env = &cpu->env; struct kvm_vcpu_events events; @@ -770,6 +1148,63 @@ int kvm_get_vcpu_events(ARMCPU *cpu) return 0; } +#define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0) +#define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2) + +/* + * ESR_EL1 + * ISS encoding + * AARCH64: DFSC, bits [5:0] + * AARCH32: + * TTBCR.EAE == 0 + * FS[4] - DFSR[10] + * FS[3:0] - DFSR[3:0] + * TTBCR.EAE == 1 + * FS, bits [5:0] + */ +#define ESR_DFSC(aarch64, lpae, v) \ + ((aarch64 || (lpae)) ? ((v) & 0x3F) \ + : (((v) >> 6) | ((v) & 0x1F))) + +#define ESR_DFSC_EXTABT(aarch64, lpae) \ + ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8) + +/** + * kvm_arm_verify_ext_dabt_pending: + * @cpu: ARMCPU + * + * Verify the fault status code wrt the Ext DABT injection + * + * Returns: true if the fault status code is as expected, false otherwise + */ +static bool kvm_arm_verify_ext_dabt_pending(ARMCPU *cpu) +{ + CPUState *cs = CPU(cpu); + uint64_t dfsr_val; + + if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) { + CPUARMState *env = &cpu->env; + int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64); + int lpae = 0; + + if (!aarch64_mode) { + uint64_t ttbcr; + + if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) { + lpae = arm_feature(env, ARM_FEATURE_LPAE) + && (ttbcr & TTBCR_EAE); + } + } + /* + * The verification here is based on the DFSC bits + * of the ESR_EL1 reg only + */ + return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) == + ESR_DFSC_EXTABT(aarch64_mode, lpae)); + } + return false; +} + void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { ARMCPU *cpu = ARM_CPU(cs); @@ -784,7 +1219,7 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) * an IMPLEMENTATION DEFINED exception (for 32-bit EL1) */ if (!arm_feature(env, ARM_FEATURE_AARCH64) && - unlikely(!kvm_arm_verify_ext_dabt_pending(cs))) { + unlikely(!kvm_arm_verify_ext_dabt_pending(cpu))) { error_report("Data abort exception with no valid ISS generated by " "guest memory access. KVM unable to emulate faulting " @@ -816,7 +1251,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) if (run->s.regs.device_irq_level != cpu->device_irq_level) { switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level; - qemu_mutex_lock_iothread(); + bql_lock(); if (switched_level & KVM_ARM_DEV_EL1_VTIMER) { qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT], @@ -845,41 +1280,39 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) /* We also mark unknown levels as processed to not waste cycles */ cpu->device_irq_level = run->s.regs.device_irq_level; - qemu_mutex_unlock_iothread(); + bql_unlock(); } return MEMTXATTRS_UNSPECIFIED; } -void kvm_arm_vm_state_change(void *opaque, bool running, RunState state) +static void kvm_arm_vm_state_change(void *opaque, bool running, RunState state) { - CPUState *cs = opaque; - ARMCPU *cpu = ARM_CPU(cs); + ARMCPU *cpu = opaque; if (running) { if (cpu->kvm_adjvtime) { - kvm_arm_put_virtual_time(cs); + kvm_arm_put_virtual_time(cpu); } } else { if (cpu->kvm_adjvtime) { - kvm_arm_get_virtual_time(cs); + kvm_arm_get_virtual_time(cpu); } } } /** * kvm_arm_handle_dabt_nisv: - * @cs: CPUState + * @cpu: ARMCPU * @esr_iss: ISS encoding (limited) for the exception from Data Abort * ISV bit set to '0b0' -> no valid instruction syndrome * @fault_ipa: faulting address for the synchronous data abort * * Returns: 0 if the exception has been handled, < 0 otherwise */ -static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss, +static int kvm_arm_handle_dabt_nisv(ARMCPU *cpu, uint64_t esr_iss, uint64_t fault_ipa) { - ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; /* * Request KVM to inject the external data abort into the guest @@ -895,7 +1328,7 @@ static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss, */ events.exception.ext_dabt_pending = 1; /* KVM_CAP_ARM_INJECT_EXT_DABT implies KVM_CAP_VCPU_EVENTS */ - if (!kvm_vcpu_ioctl(cs, KVM_SET_VCPU_EVENTS, &events)) { + if (!kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events)) { env->ext_dabt_raised = 1; return 0; } @@ -908,19 +1341,97 @@ static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss, return -1; } +/** + * kvm_arm_handle_debug: + * @cpu: ARMCPU + * @debug_exit: debug part of the KVM exit structure + * + * Returns: TRUE if the debug exception was handled. + * + * See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register + * + * To minimise translating between kernel and user-space the kernel + * ABI just provides user-space with the full exception syndrome + * register value to be decoded in QEMU. + */ +static bool kvm_arm_handle_debug(ARMCPU *cpu, + struct kvm_debug_exit_arch *debug_exit) +{ + int hsr_ec = syn_get_ec(debug_exit->hsr); + CPUState *cs = CPU(cpu); + CPUARMState *env = &cpu->env; + + /* Ensure PC is synchronised */ + kvm_cpu_synchronize_state(cs); + + switch (hsr_ec) { + case EC_SOFTWARESTEP: + if (cs->singlestep_enabled) { + return true; + } else { + /* + * The kernel should have suppressed the guest's ability to + * single step at this point so something has gone wrong. + */ + error_report("%s: guest single-step while debugging unsupported" + " (%"PRIx64", %"PRIx32")", + __func__, env->pc, debug_exit->hsr); + return false; + } + break; + case EC_AA64_BKPT: + if (kvm_find_sw_breakpoint(cs, env->pc)) { + return true; + } + break; + case EC_BREAKPOINT: + if (find_hw_breakpoint(cs, env->pc)) { + return true; + } + break; + case EC_WATCHPOINT: + { + CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); + if (wp) { + cs->watchpoint_hit = wp; + return true; + } + break; + } + default: + error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", + __func__, debug_exit->hsr, env->pc); + } + + /* If we are not handling the debug exception it must belong to + * the guest. Let's re-use the existing TCG interrupt code to set + * everything up properly. + */ + cs->exception_index = EXCP_BKPT; + env->exception.syndrome = debug_exit->hsr; + env->exception.vaddress = debug_exit->far; + env->exception.target_el = 1; + bql_lock(); + arm_cpu_do_interrupt(cs); + bql_unlock(); + + return false; +} + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) { + ARMCPU *cpu = ARM_CPU(cs); int ret = 0; switch (run->exit_reason) { case KVM_EXIT_DEBUG: - if (kvm_arm_handle_debug(cs, &run->debug.arch)) { + if (kvm_arm_handle_debug(cpu, &run->debug.arch)) { ret = EXCP_DEBUG; } /* otherwise return to guest */ break; case KVM_EXIT_ARM_NISV: /* External DABT with no valid iss to decode */ - ret = kvm_arm_handle_dabt_nisv(cs, run->arm_nisv.esr_iss, + ret = kvm_arm_handle_dabt_nisv(cpu, run->arm_nisv.esr_iss, run->arm_nisv.fault_ipa); break; default: @@ -941,12 +1452,47 @@ int kvm_arch_process_async_events(CPUState *cs) return 0; } +/** + * kvm_arm_hw_debug_active: + * @cpu: ARMCPU + * + * Return: TRUE if any hardware breakpoints in use. + */ +static bool kvm_arm_hw_debug_active(ARMCPU *cpu) +{ + return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); +} + +/** + * kvm_arm_copy_hw_debug_data: + * @ptr: kvm_guest_debug_arch structure + * + * Copy the architecture specific debug registers into the + * kvm_guest_debug ioctl structure. + */ +static void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) +{ + int i; + memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); + + for (i = 0; i < max_hw_wps; i++) { + HWWatchpoint *wp = get_hw_wp(i); + ptr->dbg_wcr[i] = wp->wcr; + ptr->dbg_wvr[i] = wp->wvr; + } + for (i = 0; i < max_hw_bps; i++) { + HWBreakpoint *bp = get_hw_bp(i); + ptr->dbg_bcr[i] = bp->bcr; + ptr->dbg_bvr[i] = bp->bvr; + } +} + void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) { if (kvm_sw_breakpoints_active(cs)) { dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; } - if (kvm_arm_hw_debug_active(cs)) { + if (kvm_arm_hw_debug_active(ARM_CPU(cs))) { dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW; kvm_arm_copy_hw_debug_data(&dbg->arch); } @@ -959,7 +1505,7 @@ void kvm_arch_init_irq_routing(KVMState *s) int kvm_arch_irqchip_create(KVMState *s) { if (kvm_kernel_irqchip_split()) { - perror("-machine kernel_irqchip=split is not supported on ARM."); + error_report("-machine kernel_irqchip=split is not supported on ARM."); exit(1); } @@ -1052,7 +1598,825 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) return (data - 32) & 0xffff; } -bool kvm_arch_cpu_check_are_resettable(void) +static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) { + KVMState *s = KVM_STATE(obj); + uint64_t value = s->kvm_eager_split_size; + + visit_type_size(v, name, &value, errp); +} + +static void kvm_arch_set_eager_split_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint64_t value; + + if (s->fd != -1) { + error_setg(errp, "Unable to set early-split-size after KVM has been initialized"); + return; + } + + if (!visit_type_size(v, name, &value, errp)) { + return; + } + + if (value && !is_power_of_2(value)) { + error_setg(errp, "early-split-size must be a power of two"); + return; + } + + s->kvm_eager_split_size = value; +} + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ + object_class_property_add(oc, "eager-split-size", "size", + kvm_arch_get_eager_split_size, + kvm_arch_set_eager_split_size, NULL, NULL); + + object_class_property_set_description(oc, "eager-split-size", + "Eager Page Split chunk size for hugepages. (default: 0, disabled)"); +} + +int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + switch (type) { + case GDB_BREAKPOINT_HW: + return insert_hw_breakpoint(addr); + break; + case GDB_WATCHPOINT_READ: + case GDB_WATCHPOINT_WRITE: + case GDB_WATCHPOINT_ACCESS: + return insert_hw_watchpoint(addr, len, type); + default: + return -ENOSYS; + } +} + +int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + switch (type) { + case GDB_BREAKPOINT_HW: + return delete_hw_breakpoint(addr); + case GDB_WATCHPOINT_READ: + case GDB_WATCHPOINT_WRITE: + case GDB_WATCHPOINT_ACCESS: + return delete_hw_watchpoint(addr, len, type); + default: + return -ENOSYS; + } +} + +void kvm_arch_remove_all_hw_breakpoints(void) +{ + if (cur_hw_wps > 0) { + g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); + } + if (cur_hw_bps > 0) { + g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); + } +} + +static bool kvm_arm_set_device_attr(ARMCPU *cpu, struct kvm_device_attr *attr, + const char *name) +{ + int err; + + err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); + if (err != 0) { + error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); + return false; + } + + err = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEVICE_ATTR, attr); + if (err != 0) { + error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); + return false; + } + return true; } + +void kvm_arm_pmu_init(ARMCPU *cpu) +{ + struct kvm_device_attr attr = { + .group = KVM_ARM_VCPU_PMU_V3_CTRL, + .attr = KVM_ARM_VCPU_PMU_V3_INIT, + }; + + if (!cpu->has_pmu) { + return; + } + if (!kvm_arm_set_device_attr(cpu, &attr, "PMU")) { + error_report("failed to init PMU"); + abort(); + } +} + +void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq) +{ + struct kvm_device_attr attr = { + .group = KVM_ARM_VCPU_PMU_V3_CTRL, + .addr = (intptr_t)&irq, + .attr = KVM_ARM_VCPU_PMU_V3_IRQ, + }; + + if (!cpu->has_pmu) { + return; + } + if (!kvm_arm_set_device_attr(cpu, &attr, "PMU")) { + error_report("failed to set irq for PMU"); + abort(); + } +} + +void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) +{ + struct kvm_device_attr attr = { + .group = KVM_ARM_VCPU_PVTIME_CTRL, + .attr = KVM_ARM_VCPU_PVTIME_IPA, + .addr = (uint64_t)&ipa, + }; + + if (cpu->kvm_steal_time == ON_OFF_AUTO_OFF) { + return; + } + if (!kvm_arm_set_device_attr(cpu, &attr, "PVTIME IPA")) { + error_report("failed to init PVTIME IPA"); + abort(); + } +} + +void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) +{ + bool has_steal_time = kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); + + if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) { + if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + cpu->kvm_steal_time = ON_OFF_AUTO_OFF; + } else { + cpu->kvm_steal_time = ON_OFF_AUTO_ON; + } + } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) { + if (!has_steal_time) { + error_setg(errp, "'kvm-steal-time' cannot be enabled " + "on this host"); + return; + } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + /* + * DEN0057A chapter 2 says "This specification only covers + * systems in which the Execution state of the hypervisor + * as well as EL1 of virtual machines is AArch64.". And, + * to ensure that, the smc/hvc calls are only specified as + * smc64/hvc64. + */ + error_setg(errp, "'kvm-steal-time' cannot be enabled " + "for AArch32 guests"); + return; + } + } +} + +bool kvm_arm_aarch32_supported(void) +{ + return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); +} + +bool kvm_arm_sve_supported(void) +{ + return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); +} + +QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); + +uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) +{ + /* Only call this function if kvm_arm_sve_supported() returns true. */ + static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; + static bool probed; + uint32_t vq = 0; + int i; + + /* + * KVM ensures all host CPUs support the same set of vector lengths. + * So we only need to create the scratch VCPUs once and then cache + * the results. + */ + if (!probed) { + struct kvm_vcpu_init init = { + .target = -1, + .features[0] = (1 << KVM_ARM_VCPU_SVE), + }; + struct kvm_one_reg reg = { + .id = KVM_REG_ARM64_SVE_VLS, + .addr = (uint64_t)&vls[0], + }; + int fdarray[3], ret; + + probed = true; + + if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { + error_report("failed to create scratch VCPU with SVE enabled"); + abort(); + } + ret = ioctl(fdarray[2], KVM_GET_ONE_REG, ®); + kvm_arm_destroy_scratch_host_vcpu(fdarray); + if (ret) { + error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s", + strerror(errno)); + abort(); + } + + for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) { + if (vls[i]) { + vq = 64 - clz64(vls[i]) + i * 64; + break; + } + } + if (vq > ARM_MAX_VQ) { + warn_report("KVM supports vector lengths larger than " + "QEMU can enable"); + vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ); + } + } + + return vls[0]; +} + +static int kvm_arm_sve_set_vls(ARMCPU *cpu) +{ + uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq.map }; + + assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); + + return kvm_set_one_reg(CPU(cpu), KVM_REG_ARM64_SVE_VLS, &vls[0]); +} + +#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 + +int kvm_arch_init_vcpu(CPUState *cs) +{ + int ret; + uint64_t mpidr; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint64_t psciver; + + if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || + !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { + error_report("KVM is not supported for this guest CPU type"); + return -EINVAL; + } + + qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cpu); + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cs->start_powered_off) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; + } + if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { + cpu->psci_version = QEMU_PSCI_VERSION_0_2; + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; + } + if (!arm_feature(env, ARM_FEATURE_AARCH64)) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; + } + if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { + cpu->has_pmu = false; + } + if (cpu->has_pmu) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; + } else { + env->features &= ~(1ULL << ARM_FEATURE_PMU); + } + if (cpu_isar_feature(aa64_sve, cpu)) { + assert(kvm_arm_sve_supported()); + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE; + } + if (cpu_isar_feature(aa64_pauth, cpu)) { + cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS | + 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC); + } + + /* Do KVM_ARM_VCPU_INIT ioctl */ + ret = kvm_arm_vcpu_init(cpu); + if (ret) { + return ret; + } + + if (cpu_isar_feature(aa64_sve, cpu)) { + ret = kvm_arm_sve_set_vls(cpu); + if (ret) { + return ret; + } + ret = kvm_arm_vcpu_finalize(cpu, KVM_ARM_VCPU_SVE); + if (ret) { + return ret; + } + } + + /* + * KVM reports the exact PSCI version it is implementing via a + * special sysreg. If it is present, use its contents to determine + * what to report to the guest in the dtb (it is the PSCI version, + * in the same 15-bits major 16-bits minor format that PSCI_VERSION + * returns). + */ + if (!kvm_get_one_reg(cs, KVM_REG_ARM_PSCI_VERSION, &psciver)) { + cpu->psci_version = psciver; + } + + /* + * When KVM is in use, PSCI is emulated in-kernel and not by qemu. + * Currently KVM has its own idea about MPIDR assignment, so we + * override our defaults with what we get from KVM. + */ + ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); + if (ret) { + return ret; + } + cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; + + return kvm_arm_init_cpreg_list(cpu); +} + +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + return 0; +} + +/* Callers must hold the iothread mutex lock */ +static void kvm_inject_arm_sea(CPUState *c) +{ + ARMCPU *cpu = ARM_CPU(c); + CPUARMState *env = &cpu->env; + uint32_t esr; + bool same_el; + + c->exception_index = EXCP_DATA_ABORT; + env->exception.target_el = 1; + + /* + * Set the DFSC to synchronous external abort and set FnV to not valid, + * this will tell guest the FAR_ELx is UNKNOWN for this abort. + */ + same_el = arm_current_el(env) == env->exception.target_el; + esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10); + + env->exception.syndrome = esr; + + arm_cpu_do_interrupt(c); +} + +#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +#define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +#define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +static int kvm_arch_put_fpsimd(CPUState *cs) +{ + CPUARMState *env = &ARM_CPU(cs)->env; + int i, ret; + + for (i = 0; i < 32; i++) { + uint64_t *q = aa64_vfp_qreg(env, i); +#if HOST_BIG_ENDIAN + uint64_t fp_val[2] = { q[1], q[0] }; + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), + fp_val); +#else + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); +#endif + if (ret) { + return ret; + } + } + + return 0; +} + +/* + * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits + * and PREGS and the FFR have a slice size of 256 bits. However we simply hard + * code the slice index to zero for now as it's unlikely we'll need more than + * one slice for quite some time. + */ +static int kvm_arch_put_sve(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint64_t tmp[ARM_MAX_VQ * 2]; + uint64_t *r; + int n, ret; + + for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { + r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); + if (ret) { + return ret; + } + } + + for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { + r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], + DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); + if (ret) { + return ret; + } + } + + r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], + DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); + if (ret) { + return ret; + } + + return 0; +} + +int kvm_arch_put_registers(CPUState *cs, int level) +{ + uint64_t val; + uint32_t fpr; + int i, ret; + unsigned int el; + + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + /* If we are in AArch32 mode then we need to copy the AArch32 regs to the + * AArch64 registers before pushing them out to 64-bit KVM. + */ + if (!is_a64(env)) { + aarch64_sync_32_to_64(env); + } + + for (i = 0; i < 31; i++) { + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); + if (ret) { + return ret; + } + } + + /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the + * QEMU side we keep the current SP in xregs[31] as well. + */ + aarch64_save_sp(env, 1); + + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); + if (ret) { + return ret; + } + + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); + if (ret) { + return ret; + } + + /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ + if (is_a64(env)) { + val = pstate_read(env); + } else { + val = cpsr_read(env); + } + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); + if (ret) { + return ret; + } + + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); + if (ret) { + return ret; + } + + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); + if (ret) { + return ret; + } + + /* Saved Program State Registers + * + * Before we restore from the banked_spsr[] array we need to + * ensure that any modifications to env->spsr are correctly + * reflected in the banks. + */ + el = arm_current_el(env); + if (el > 0 && !is_a64(env)) { + i = bank_number(env->uncached_cpsr & CPSR_M); + env->banked_spsr[i] = env->spsr; + } + + /* KVM 0-4 map to QEMU banks 1-5 */ + for (i = 0; i < KVM_NR_SPSR; i++) { + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(spsr[i]), + &env->banked_spsr[i + 1]); + if (ret) { + return ret; + } + } + + if (cpu_isar_feature(aa64_sve, cpu)) { + ret = kvm_arch_put_sve(cs); + } else { + ret = kvm_arch_put_fpsimd(cs); + } + if (ret) { + return ret; + } + + fpr = vfp_get_fpsr(env); + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); + if (ret) { + return ret; + } + + fpr = vfp_get_fpcr(env); + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); + if (ret) { + return ret; + } + + write_cpustate_to_list(cpu, true); + + if (!write_list_to_kvmstate(cpu, level)) { + return -EINVAL; + } + + /* + * Setting VCPU events should be triggered after syncing the registers + * to avoid overwriting potential changes made by KVM upon calling + * KVM_SET_VCPU_EVENTS ioctl + */ + ret = kvm_put_vcpu_events(cpu); + if (ret) { + return ret; + } + + return kvm_arm_sync_mpstate_to_kvm(cpu); +} + +static int kvm_arch_get_fpsimd(CPUState *cs) +{ + CPUARMState *env = &ARM_CPU(cs)->env; + int i, ret; + + for (i = 0; i < 32; i++) { + uint64_t *q = aa64_vfp_qreg(env, i); + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); + if (ret) { + return ret; + } else { +#if HOST_BIG_ENDIAN + uint64_t t; + t = q[0], q[0] = q[1], q[1] = t; +#endif + } + } + + return 0; +} + +/* + * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits + * and PREGS and the FFR have a slice size of 256 bits. However we simply hard + * code the slice index to zero for now as it's unlikely we'll need more than + * one slice for quite some time. + */ +static int kvm_arch_get_sve(CPUState *cs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + uint64_t *r; + int n, ret; + + for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { + r = &env->vfp.zregs[n].d[0]; + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); + if (ret) { + return ret; + } + sve_bswap64(r, r, cpu->sve_max_vq * 2); + } + + for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { + r = &env->vfp.pregs[n].p[0]; + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); + if (ret) { + return ret; + } + sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); + } + + r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); + if (ret) { + return ret; + } + sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); + + return 0; +} + +int kvm_arch_get_registers(CPUState *cs) +{ + uint64_t val; + unsigned int el; + uint32_t fpr; + int i, ret; + + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + + for (i = 0; i < 31; i++) { + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); + if (ret) { + return ret; + } + } + + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); + if (ret) { + return ret; + } + + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); + if (ret) { + return ret; + } + + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); + if (ret) { + return ret; + } + + env->aarch64 = ((val & PSTATE_nRW) == 0); + if (is_a64(env)) { + pstate_write(env, val); + } else { + cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); + } + + /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the + * QEMU side we keep the current SP in xregs[31] as well. + */ + aarch64_restore_sp(env, 1); + + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); + if (ret) { + return ret; + } + + /* If we are in AArch32 mode then we need to sync the AArch32 regs with the + * incoming AArch64 regs received from 64-bit KVM. + * We must perform this after all of the registers have been acquired from + * the kernel. + */ + if (!is_a64(env)) { + aarch64_sync_64_to_32(env); + } + + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); + if (ret) { + return ret; + } + + /* Fetch the SPSR registers + * + * KVM SPSRs 0-4 map to QEMU banks 1-5 + */ + for (i = 0; i < KVM_NR_SPSR; i++) { + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(spsr[i]), + &env->banked_spsr[i + 1]); + if (ret) { + return ret; + } + } + + el = arm_current_el(env); + if (el > 0 && !is_a64(env)) { + i = bank_number(env->uncached_cpsr & CPSR_M); + env->spsr = env->banked_spsr[i]; + } + + if (cpu_isar_feature(aa64_sve, cpu)) { + ret = kvm_arch_get_sve(cs); + } else { + ret = kvm_arch_get_fpsimd(cs); + } + if (ret) { + return ret; + } + + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); + if (ret) { + return ret; + } + vfp_set_fpsr(env, fpr); + + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); + if (ret) { + return ret; + } + vfp_set_fpcr(env, fpr); + + ret = kvm_get_vcpu_events(cpu); + if (ret) { + return ret; + } + + if (!write_kvmstate_to_list(cpu)) { + return -EINVAL; + } + /* Note that it's OK to have registers which aren't in CPUState, + * so we can ignore a failure return here. + */ + write_list_to_cpustate(cpu); + + ret = kvm_arm_sync_mpstate_to_qemu(cpu); + + /* TODO: other registers */ + return ret; +} + +void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) +{ + ram_addr_t ram_addr; + hwaddr paddr; + + assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); + + if (acpi_ghes_present() && addr) { + ram_addr = qemu_ram_addr_from_host(addr); + if (ram_addr != RAM_ADDR_INVALID && + kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { + kvm_hwpoison_page_add(ram_addr); + /* + * If this is a BUS_MCEERR_AR, we know we have been called + * synchronously from the vCPU thread, so we can easily + * synchronize the state and inject an error. + * + * TODO: we currently don't tell the guest at all about + * BUS_MCEERR_AO. In that case we might either be being + * called synchronously from the vCPU thread, or a bit + * later from the main thread, so doing the injection of + * the error would be more complicated. + */ + if (code == BUS_MCEERR_AR) { + kvm_cpu_synchronize_state(c); + if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { + kvm_inject_arm_sea(c); + } else { + error_report("failed to record the error"); + abort(); + } + } + return; + } + if (code == BUS_MCEERR_AO) { + error_report("Hardware memory error at addr %p for memory used by " + "QEMU itself instead of guest system!", addr); + } + } + + if (code == BUS_MCEERR_AR) { + error_report("Hardware memory error!"); + exit(1); + } +} + +/* C6.6.29 BRK instruction */ +static const uint32_t brk_insn = 0xd4200000; + +int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) +{ + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { + return -EINVAL; + } + return 0; +} + +int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) +{ + static uint32_t brk; + + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || + brk != brk_insn || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { + return -EINVAL; + } + return 0; +} diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c deleted file mode 100644 index e790d6c9a5..0000000000 --- a/target/arm/kvm64.c +++ /dev/null @@ -1,1606 +0,0 @@ -/* - * ARM implementation of KVM hooks, 64 bit specific code - * - * Copyright Mian-M. Hamayun 2013, Virtual Open Systems - * Copyright Alex Bennée 2014, Linaro - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include <sys/ioctl.h> -#include <sys/ptrace.h> - -#include <linux/elf.h> -#include <linux/kvm.h> - -#include "qemu-common.h" -#include "qapi/error.h" -#include "cpu.h" -#include "qemu/timer.h" -#include "qemu/error-report.h" -#include "qemu/host-utils.h" -#include "qemu/main-loop.h" -#include "exec/gdbstub.h" -#include "sysemu/runstate.h" -#include "sysemu/kvm.h" -#include "sysemu/kvm_int.h" -#include "kvm_arm.h" -#include "internals.h" -#include "hw/acpi/acpi.h" -#include "hw/acpi/ghes.h" -#include "hw/arm/virt.h" - -static bool have_guest_debug; - -/* - * Although the ARM implementation of hardware assisted debugging - * allows for different breakpoints per-core, the current GDB - * interface treats them as a global pool of registers (which seems to - * be the case for x86, ppc and s390). As a result we store one copy - * of registers which is used for all active cores. - * - * Write access is serialised by virtue of the GDB protocol which - * updates things. Read access (i.e. when the values are copied to the - * vCPU) is also gated by GDB's run control. - * - * This is not unreasonable as most of the time debugging kernels you - * never know which core will eventually execute your function. - */ - -typedef struct { - uint64_t bcr; - uint64_t bvr; -} HWBreakpoint; - -/* The watchpoint registers can cover more area than the requested - * watchpoint so we need to store the additional information - * somewhere. We also need to supply a CPUWatchpoint to the GDB stub - * when the watchpoint is hit. - */ -typedef struct { - uint64_t wcr; - uint64_t wvr; - CPUWatchpoint details; -} HWWatchpoint; - -/* Maximum and current break/watch point counts */ -int max_hw_bps, max_hw_wps; -GArray *hw_breakpoints, *hw_watchpoints; - -#define cur_hw_wps (hw_watchpoints->len) -#define cur_hw_bps (hw_breakpoints->len) -#define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) -#define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) - -/** - * kvm_arm_init_debug() - check for guest debug capabilities - * @cs: CPUState - * - * kvm_check_extension returns the number of debug registers we have - * or 0 if we have none. - * - */ -static void kvm_arm_init_debug(CPUState *cs) -{ - have_guest_debug = kvm_check_extension(cs->kvm_state, - KVM_CAP_SET_GUEST_DEBUG); - - max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS); - hw_watchpoints = g_array_sized_new(true, true, - sizeof(HWWatchpoint), max_hw_wps); - - max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS); - hw_breakpoints = g_array_sized_new(true, true, - sizeof(HWBreakpoint), max_hw_bps); - return; -} - -/** - * insert_hw_breakpoint() - * @addr: address of breakpoint - * - * See ARM ARM D2.9.1 for details but here we are only going to create - * simple un-linked breakpoints (i.e. we don't chain breakpoints - * together to match address and context or vmid). The hardware is - * capable of fancier matching but that will require exposing that - * fanciness to GDB's interface - * - * DBGBCR<n>_EL1, Debug Breakpoint Control Registers - * - * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 - * +------+------+-------+-----+----+------+-----+------+-----+---+ - * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | - * +------+------+-------+-----+----+------+-----+------+-----+---+ - * - * BT: Breakpoint type (0 = unlinked address match) - * LBN: Linked BP number (0 = unused) - * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) - * BAS: Byte Address Select (RES1 for AArch64) - * E: Enable bit - * - * DBGBVR<n>_EL1, Debug Breakpoint Value Registers - * - * 63 53 52 49 48 2 1 0 - * +------+-----------+----------+-----+ - * | RESS | VA[52:49] | VA[48:2] | 0 0 | - * +------+-----------+----------+-----+ - * - * Depending on the addressing mode bits the top bits of the register - * are a sign extension of the highest applicable VA bit. Some - * versions of GDB don't do it correctly so we ensure they are correct - * here so future PC comparisons will work properly. - */ - -static int insert_hw_breakpoint(target_ulong addr) -{ - HWBreakpoint brk = { - .bcr = 0x1, /* BCR E=1, enable */ - .bvr = sextract64(addr, 0, 53) - }; - - if (cur_hw_bps >= max_hw_bps) { - return -ENOBUFS; - } - - brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ - brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ - - g_array_append_val(hw_breakpoints, brk); - - return 0; -} - -/** - * delete_hw_breakpoint() - * @pc: address of breakpoint - * - * Delete a breakpoint and shuffle any above down - */ - -static int delete_hw_breakpoint(target_ulong pc) -{ - int i; - for (i = 0; i < hw_breakpoints->len; i++) { - HWBreakpoint *brk = get_hw_bp(i); - if (brk->bvr == pc) { - g_array_remove_index(hw_breakpoints, i); - return 0; - } - } - return -ENOENT; -} - -/** - * insert_hw_watchpoint() - * @addr: address of watch point - * @len: size of area - * @type: type of watch point - * - * See ARM ARM D2.10. As with the breakpoints we can do some advanced - * stuff if we want to. The watch points can be linked with the break - * points above to make them context aware. However for simplicity - * currently we only deal with simple read/write watch points. - * - * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers - * - * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 - * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ - * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | - * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ - * - * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) - * WT: 0 - unlinked, 1 - linked (not currently used) - * LBN: Linked BP number (not currently used) - * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) - * BAS: Byte Address Select - * LSC: Load/Store control (01: load, 10: store, 11: both) - * E: Enable - * - * The bottom 2 bits of the value register are masked. Therefore to - * break on any sizes smaller than an unaligned word you need to set - * MASK=0, BAS=bit per byte in question. For larger regions (^2) you - * need to ensure you mask the address as required and set BAS=0xff - */ - -static int insert_hw_watchpoint(target_ulong addr, - target_ulong len, int type) -{ - HWWatchpoint wp = { - .wcr = 1, /* E=1, enable */ - .wvr = addr & (~0x7ULL), - .details = { .vaddr = addr, .len = len } - }; - - if (cur_hw_wps >= max_hw_wps) { - return -ENOBUFS; - } - - /* - * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, - * valid whether EL3 is implemented or not - */ - wp.wcr = deposit32(wp.wcr, 1, 2, 3); - - switch (type) { - case GDB_WATCHPOINT_READ: - wp.wcr = deposit32(wp.wcr, 3, 2, 1); - wp.details.flags = BP_MEM_READ; - break; - case GDB_WATCHPOINT_WRITE: - wp.wcr = deposit32(wp.wcr, 3, 2, 2); - wp.details.flags = BP_MEM_WRITE; - break; - case GDB_WATCHPOINT_ACCESS: - wp.wcr = deposit32(wp.wcr, 3, 2, 3); - wp.details.flags = BP_MEM_ACCESS; - break; - default: - g_assert_not_reached(); - break; - } - if (len <= 8) { - /* we align the address and set the bits in BAS */ - int off = addr & 0x7; - int bas = (1 << len) - 1; - - wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); - } else { - /* For ranges above 8 bytes we need to be a power of 2 */ - if (is_power_of_2(len)) { - int bits = ctz64(len); - - wp.wvr &= ~((1 << bits) - 1); - wp.wcr = deposit32(wp.wcr, 24, 4, bits); - wp.wcr = deposit32(wp.wcr, 5, 8, 0xff); - } else { - return -ENOBUFS; - } - } - - g_array_append_val(hw_watchpoints, wp); - return 0; -} - - -static bool check_watchpoint_in_range(int i, target_ulong addr) -{ - HWWatchpoint *wp = get_hw_wp(i); - uint64_t addr_top, addr_bottom = wp->wvr; - int bas = extract32(wp->wcr, 5, 8); - int mask = extract32(wp->wcr, 24, 4); - - if (mask) { - addr_top = addr_bottom + (1 << mask); - } else { - /* BAS must be contiguous but can offset against the base - * address in DBGWVR */ - addr_bottom = addr_bottom + ctz32(bas); - addr_top = addr_bottom + clo32(bas); - } - - if (addr >= addr_bottom && addr <= addr_top) { - return true; - } - - return false; -} - -/** - * delete_hw_watchpoint() - * @addr: address of breakpoint - * - * Delete a breakpoint and shuffle any above down - */ - -static int delete_hw_watchpoint(target_ulong addr, - target_ulong len, int type) -{ - int i; - for (i = 0; i < cur_hw_wps; i++) { - if (check_watchpoint_in_range(i, addr)) { - g_array_remove_index(hw_watchpoints, i); - return 0; - } - } - return -ENOENT; -} - - -int kvm_arch_insert_hw_breakpoint(target_ulong addr, - target_ulong len, int type) -{ - switch (type) { - case GDB_BREAKPOINT_HW: - return insert_hw_breakpoint(addr); - break; - case GDB_WATCHPOINT_READ: - case GDB_WATCHPOINT_WRITE: - case GDB_WATCHPOINT_ACCESS: - return insert_hw_watchpoint(addr, len, type); - default: - return -ENOSYS; - } -} - -int kvm_arch_remove_hw_breakpoint(target_ulong addr, - target_ulong len, int type) -{ - switch (type) { - case GDB_BREAKPOINT_HW: - return delete_hw_breakpoint(addr); - case GDB_WATCHPOINT_READ: - case GDB_WATCHPOINT_WRITE: - case GDB_WATCHPOINT_ACCESS: - return delete_hw_watchpoint(addr, len, type); - default: - return -ENOSYS; - } -} - - -void kvm_arch_remove_all_hw_breakpoints(void) -{ - if (cur_hw_wps > 0) { - g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); - } - if (cur_hw_bps > 0) { - g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); - } -} - -void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) -{ - int i; - memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); - - for (i = 0; i < max_hw_wps; i++) { - HWWatchpoint *wp = get_hw_wp(i); - ptr->dbg_wcr[i] = wp->wcr; - ptr->dbg_wvr[i] = wp->wvr; - } - for (i = 0; i < max_hw_bps; i++) { - HWBreakpoint *bp = get_hw_bp(i); - ptr->dbg_bcr[i] = bp->bcr; - ptr->dbg_bvr[i] = bp->bvr; - } -} - -bool kvm_arm_hw_debug_active(CPUState *cs) -{ - return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); -} - -static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) -{ - int i; - - for (i = 0; i < cur_hw_bps; i++) { - HWBreakpoint *bp = get_hw_bp(i); - if (bp->bvr == pc) { - return true; - } - } - return false; -} - -static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) -{ - int i; - - for (i = 0; i < cur_hw_wps; i++) { - if (check_watchpoint_in_range(i, addr)) { - return &get_hw_wp(i)->details; - } - } - return NULL; -} - -static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, - const char *name) -{ - int err; - - err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); - if (err != 0) { - error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); - return false; - } - - err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr); - if (err != 0) { - error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); - return false; - } - - return true; -} - -void kvm_arm_pmu_init(CPUState *cs) -{ - struct kvm_device_attr attr = { - .group = KVM_ARM_VCPU_PMU_V3_CTRL, - .attr = KVM_ARM_VCPU_PMU_V3_INIT, - }; - - if (!ARM_CPU(cs)->has_pmu) { - return; - } - if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) { - error_report("failed to init PMU"); - abort(); - } -} - -void kvm_arm_pmu_set_irq(CPUState *cs, int irq) -{ - struct kvm_device_attr attr = { - .group = KVM_ARM_VCPU_PMU_V3_CTRL, - .addr = (intptr_t)&irq, - .attr = KVM_ARM_VCPU_PMU_V3_IRQ, - }; - - if (!ARM_CPU(cs)->has_pmu) { - return; - } - if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) { - error_report("failed to set irq for PMU"); - abort(); - } -} - -void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) -{ - struct kvm_device_attr attr = { - .group = KVM_ARM_VCPU_PVTIME_CTRL, - .attr = KVM_ARM_VCPU_PVTIME_IPA, - .addr = (uint64_t)&ipa, - }; - - if (ARM_CPU(cs)->kvm_steal_time == ON_OFF_AUTO_OFF) { - return; - } - if (!kvm_arm_set_device_attr(cs, &attr, "PVTIME IPA")) { - error_report("failed to init PVTIME IPA"); - abort(); - } -} - -static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) -{ - uint64_t ret; - struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; - int err; - - assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); - err = ioctl(fd, KVM_GET_ONE_REG, &idreg); - if (err < 0) { - return -1; - } - *pret = ret; - return 0; -} - -static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) -{ - struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; - - assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); - return ioctl(fd, KVM_GET_ONE_REG, &idreg); -} - -bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) -{ - /* Identify the feature bits corresponding to the host CPU, and - * fill out the ARMHostCPUClass fields accordingly. To do this - * we have to create a scratch VM, create a single CPU inside it, - * and then query that CPU for the relevant ID registers. - */ - int fdarray[3]; - bool sve_supported; - uint64_t features = 0; - uint64_t t; - int err; - - /* Old kernels may not know about the PREFERRED_TARGET ioctl: however - * we know these will only support creating one kind of guest CPU, - * which is its preferred CPU type. Fortunately these old kernels - * support only a very limited number of CPUs. - */ - static const uint32_t cpus_to_try[] = { - KVM_ARM_TARGET_AEM_V8, - KVM_ARM_TARGET_FOUNDATION_V8, - KVM_ARM_TARGET_CORTEX_A57, - QEMU_KVM_ARM_TARGET_NONE - }; - /* - * target = -1 informs kvm_arm_create_scratch_host_vcpu() - * to use the preferred target - */ - struct kvm_vcpu_init init = { .target = -1, }; - - if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { - return false; - } - - ahcf->target = init.target; - ahcf->dtb_compatible = "arm,arm-v8"; - - err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, - ARM64_SYS_REG(3, 0, 0, 4, 0)); - if (unlikely(err < 0)) { - /* - * Before v4.15, the kernel only exposed a limited number of system - * registers, not including any of the interesting AArch64 ID regs. - * For the most part we could leave these fields as zero with minimal - * effect, since this does not affect the values seen by the guest. - * - * However, it could cause problems down the line for QEMU, - * so provide a minimal v8.0 default. - * - * ??? Could read MIDR and use knowledge from cpu64.c. - * ??? Could map a page of memory into our temp guest and - * run the tiniest of hand-crafted kernels to extract - * the values seen by the guest. - * ??? Either of these sounds like too much effort just - * to work around running a modern host kernel. - */ - ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ - err = 0; - } else { - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, - ARM64_SYS_REG(3, 0, 0, 4, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, - ARM64_SYS_REG(3, 0, 0, 5, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, - ARM64_SYS_REG(3, 0, 0, 5, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, - ARM64_SYS_REG(3, 0, 0, 6, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, - ARM64_SYS_REG(3, 0, 0, 6, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, - ARM64_SYS_REG(3, 0, 0, 7, 0)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, - ARM64_SYS_REG(3, 0, 0, 7, 1)); - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, - ARM64_SYS_REG(3, 0, 0, 7, 2)); - - /* - * Note that if AArch32 support is not present in the host, - * the AArch32 sysregs are present to be read, but will - * return UNKNOWN values. This is neither better nor worse - * than skipping the reads and leaving 0, as we must avoid - * considering the values in every case. - */ - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, - ARM64_SYS_REG(3, 0, 0, 1, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, - ARM64_SYS_REG(3, 0, 0, 1, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, - ARM64_SYS_REG(3, 0, 0, 3, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, - ARM64_SYS_REG(3, 0, 0, 1, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, - ARM64_SYS_REG(3, 0, 0, 1, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, - ARM64_SYS_REG(3, 0, 0, 1, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, - ARM64_SYS_REG(3, 0, 0, 1, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, - ARM64_SYS_REG(3, 0, 0, 1, 7)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, - ARM64_SYS_REG(3, 0, 0, 2, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, - ARM64_SYS_REG(3, 0, 0, 2, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, - ARM64_SYS_REG(3, 0, 0, 2, 2)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, - ARM64_SYS_REG(3, 0, 0, 2, 3)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, - ARM64_SYS_REG(3, 0, 0, 2, 4)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, - ARM64_SYS_REG(3, 0, 0, 2, 5)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, - ARM64_SYS_REG(3, 0, 0, 2, 6)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, - ARM64_SYS_REG(3, 0, 0, 2, 7)); - - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, - ARM64_SYS_REG(3, 0, 0, 3, 0)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, - ARM64_SYS_REG(3, 0, 0, 3, 1)); - err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, - ARM64_SYS_REG(3, 0, 0, 3, 2)); - - /* - * DBGDIDR is a bit complicated because the kernel doesn't - * provide an accessor for it in 64-bit mode, which is what this - * scratch VM is in, and there's no architected "64-bit sysreg - * which reads the same as the 32-bit register" the way there is - * for other ID registers. Instead we synthesize a value from the - * AArch64 ID_AA64DFR0, the same way the kernel code in - * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. - * We only do this if the CPU supports AArch32 at EL1. - */ - if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { - int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); - int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); - int ctx_cmps = - FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); - int version = 6; /* ARMv8 debug architecture */ - bool has_el3 = - !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); - uint32_t dbgdidr = 0; - - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); - dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); - dbgdidr |= (1 << 15); /* RES1 bit */ - ahcf->isar.dbgdidr = dbgdidr; - } - } - - sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0; - - /* Add feature bits that can't appear until after VCPU init. */ - if (sve_supported) { - t = ahcf->isar.id_aa64pfr0; - t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); - ahcf->isar.id_aa64pfr0 = t; - - /* - * Before v5.1, KVM did not support SVE and did not expose - * ID_AA64ZFR0_EL1 even as RAZ. After v5.1, KVM still does - * not expose the register to "user" requests like this - * unless the host supports SVE. - */ - err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, - ARM64_SYS_REG(3, 0, 0, 4, 4)); - } - - kvm_arm_destroy_scratch_host_vcpu(fdarray); - - if (err < 0) { - return false; - } - - /* - * We can assume any KVM supporting CPU is at least a v8 - * with VFPv4+Neon; this in turn implies most of the other - * feature bits. - */ - features |= 1ULL << ARM_FEATURE_V8; - features |= 1ULL << ARM_FEATURE_NEON; - features |= 1ULL << ARM_FEATURE_AARCH64; - features |= 1ULL << ARM_FEATURE_PMU; - features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; - - ahcf->features = features; - - return true; -} - -void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) -{ - bool has_steal_time = kvm_arm_steal_time_supported(); - - if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) { - if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - cpu->kvm_steal_time = ON_OFF_AUTO_OFF; - } else { - cpu->kvm_steal_time = ON_OFF_AUTO_ON; - } - } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) { - if (!has_steal_time) { - error_setg(errp, "'kvm-steal-time' cannot be enabled " - "on this host"); - return; - } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - /* - * DEN0057A chapter 2 says "This specification only covers - * systems in which the Execution state of the hypervisor - * as well as EL1 of virtual machines is AArch64.". And, - * to ensure that, the smc/hvc calls are only specified as - * smc64/hvc64. - */ - error_setg(errp, "'kvm-steal-time' cannot be enabled " - "for AArch32 guests"); - return; - } - } -} - -bool kvm_arm_aarch32_supported(void) -{ - return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); -} - -bool kvm_arm_sve_supported(void) -{ - return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); -} - -bool kvm_arm_steal_time_supported(void) -{ - return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); -} - -QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); - -void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) -{ - /* Only call this function if kvm_arm_sve_supported() returns true. */ - static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; - static bool probed; - uint32_t vq = 0; - int i, j; - - bitmap_zero(map, ARM_MAX_VQ); - - /* - * KVM ensures all host CPUs support the same set of vector lengths. - * So we only need to create the scratch VCPUs once and then cache - * the results. - */ - if (!probed) { - struct kvm_vcpu_init init = { - .target = -1, - .features[0] = (1 << KVM_ARM_VCPU_SVE), - }; - struct kvm_one_reg reg = { - .id = KVM_REG_ARM64_SVE_VLS, - .addr = (uint64_t)&vls[0], - }; - int fdarray[3], ret; - - probed = true; - - if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { - error_report("failed to create scratch VCPU with SVE enabled"); - abort(); - } - ret = ioctl(fdarray[2], KVM_GET_ONE_REG, ®); - kvm_arm_destroy_scratch_host_vcpu(fdarray); - if (ret) { - error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s", - strerror(errno)); - abort(); - } - - for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) { - if (vls[i]) { - vq = 64 - clz64(vls[i]) + i * 64; - break; - } - } - if (vq > ARM_MAX_VQ) { - warn_report("KVM supports vector lengths larger than " - "QEMU can enable"); - } - } - - for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) { - if (!vls[i]) { - continue; - } - for (j = 1; j <= 64; ++j) { - vq = j + i * 64; - if (vq > ARM_MAX_VQ) { - return; - } - if (vls[i] & (1UL << (j - 1))) { - set_bit(vq - 1, map); - } - } - } -} - -static int kvm_arm_sve_set_vls(CPUState *cs) -{ - uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0}; - struct kvm_one_reg reg = { - .id = KVM_REG_ARM64_SVE_VLS, - .addr = (uint64_t)&vls[0], - }; - ARMCPU *cpu = ARM_CPU(cs); - uint32_t vq; - int i, j; - - assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); - - for (vq = 1; vq <= cpu->sve_max_vq; ++vq) { - if (test_bit(vq - 1, cpu->sve_vq_map)) { - i = (vq - 1) / 64; - j = (vq - 1) % 64; - vls[i] |= 1UL << j; - } - } - - return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); -} - -#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 - -int kvm_arch_init_vcpu(CPUState *cs) -{ - int ret; - uint64_t mpidr; - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - - if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || - !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { - error_report("KVM is not supported for this guest CPU type"); - return -EINVAL; - } - - qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); - - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cs->start_powered_off) { - cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; - } - if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { - cpu->psci_version = 2; - cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; - } - if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; - } - if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { - cpu->has_pmu = false; - } - if (cpu->has_pmu) { - cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; - } else { - env->features &= ~(1ULL << ARM_FEATURE_PMU); - } - if (cpu_isar_feature(aa64_sve, cpu)) { - assert(kvm_arm_sve_supported()); - cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE; - } - - /* Do KVM_ARM_VCPU_INIT ioctl */ - ret = kvm_arm_vcpu_init(cs); - if (ret) { - return ret; - } - - if (cpu_isar_feature(aa64_sve, cpu)) { - ret = kvm_arm_sve_set_vls(cs); - if (ret) { - return ret; - } - ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE); - if (ret) { - return ret; - } - } - - /* - * When KVM is in use, PSCI is emulated in-kernel and not by qemu. - * Currently KVM has its own idea about MPIDR assignment, so we - * override our defaults with what we get from KVM. - */ - ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); - if (ret) { - return ret; - } - cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; - - kvm_arm_init_debug(cs); - - /* Check whether user space can specify guest syndrome value */ - kvm_arm_init_serror_injection(cs); - - return kvm_arm_init_cpreg_list(cpu); -} - -int kvm_arch_destroy_vcpu(CPUState *cs) -{ - return 0; -} - -bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) -{ - /* Return true if the regidx is a register we should synchronize - * via the cpreg_tuples array (ie is not a core or sve reg that - * we sync by hand in kvm_arch_get/put_registers()) - */ - switch (regidx & KVM_REG_ARM_COPROC_MASK) { - case KVM_REG_ARM_CORE: - case KVM_REG_ARM64_SVE: - return false; - default: - return true; - } -} - -typedef struct CPRegStateLevel { - uint64_t regidx; - int level; -} CPRegStateLevel; - -/* All system registers not listed in the following table are assumed to be - * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less - * often, you must add it to this table with a state of either - * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. - */ -static const CPRegStateLevel non_runtime_cpregs[] = { - { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, -}; - -int kvm_arm_cpreg_level(uint64_t regidx) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { - const CPRegStateLevel *l = &non_runtime_cpregs[i]; - if (l->regidx == regidx) { - return l->level; - } - } - - return KVM_PUT_RUNTIME_STATE; -} - -/* Callers must hold the iothread mutex lock */ -static void kvm_inject_arm_sea(CPUState *c) -{ - ARMCPU *cpu = ARM_CPU(c); - CPUARMState *env = &cpu->env; - uint32_t esr; - bool same_el; - - c->exception_index = EXCP_DATA_ABORT; - env->exception.target_el = 1; - - /* - * Set the DFSC to synchronous external abort and set FnV to not valid, - * this will tell guest the FAR_ELx is UNKNOWN for this abort. - */ - same_el = arm_current_el(env) == env->exception.target_el; - esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10); - - env->exception.syndrome = esr; - - arm_cpu_do_interrupt(c); -} - -#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ - KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) - -#define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ - KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) - -#define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ - KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) - -static int kvm_arch_put_fpsimd(CPUState *cs) -{ - CPUARMState *env = &ARM_CPU(cs)->env; - struct kvm_one_reg reg; - int i, ret; - - for (i = 0; i < 32; i++) { - uint64_t *q = aa64_vfp_qreg(env, i); -#ifdef HOST_WORDS_BIGENDIAN - uint64_t fp_val[2] = { q[1], q[0] }; - reg.addr = (uintptr_t)fp_val; -#else - reg.addr = (uintptr_t)q; -#endif - reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - } - - return 0; -} - -/* - * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits - * and PREGS and the FFR have a slice size of 256 bits. However we simply hard - * code the slice index to zero for now as it's unlikely we'll need more than - * one slice for quite some time. - */ -static int kvm_arch_put_sve(CPUState *cs) -{ - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - uint64_t tmp[ARM_MAX_VQ * 2]; - uint64_t *r; - struct kvm_one_reg reg; - int n, ret; - - for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { - r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - } - - for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { - r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], - DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - } - - r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], - DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_FFR(0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - - return 0; -} - -int kvm_arch_put_registers(CPUState *cs, int level) -{ - struct kvm_one_reg reg; - uint64_t val; - uint32_t fpr; - int i, ret; - unsigned int el; - - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - - /* If we are in AArch32 mode then we need to copy the AArch32 regs to the - * AArch64 registers before pushing them out to 64-bit KVM. - */ - if (!is_a64(env)) { - aarch64_sync_32_to_64(env); - } - - for (i = 0; i < 31; i++) { - reg.id = AARCH64_CORE_REG(regs.regs[i]); - reg.addr = (uintptr_t) &env->xregs[i]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - } - - /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the - * QEMU side we keep the current SP in xregs[31] as well. - */ - aarch64_save_sp(env, 1); - - reg.id = AARCH64_CORE_REG(regs.sp); - reg.addr = (uintptr_t) &env->sp_el[0]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - - reg.id = AARCH64_CORE_REG(sp_el1); - reg.addr = (uintptr_t) &env->sp_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - - /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ - if (is_a64(env)) { - val = pstate_read(env); - } else { - val = cpsr_read(env); - } - reg.id = AARCH64_CORE_REG(regs.pstate); - reg.addr = (uintptr_t) &val; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - - reg.id = AARCH64_CORE_REG(regs.pc); - reg.addr = (uintptr_t) &env->pc; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - - reg.id = AARCH64_CORE_REG(elr_el1); - reg.addr = (uintptr_t) &env->elr_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - - /* Saved Program State Registers - * - * Before we restore from the banked_spsr[] array we need to - * ensure that any modifications to env->spsr are correctly - * reflected in the banks. - */ - el = arm_current_el(env); - if (el > 0 && !is_a64(env)) { - i = bank_number(env->uncached_cpsr & CPSR_M); - env->banked_spsr[i] = env->spsr; - } - - /* KVM 0-4 map to QEMU banks 1-5 */ - for (i = 0; i < KVM_NR_SPSR; i++) { - reg.id = AARCH64_CORE_REG(spsr[i]); - reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - } - - if (cpu_isar_feature(aa64_sve, cpu)) { - ret = kvm_arch_put_sve(cs); - } else { - ret = kvm_arch_put_fpsimd(cs); - } - if (ret) { - return ret; - } - - reg.addr = (uintptr_t)(&fpr); - fpr = vfp_get_fpsr(env); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - - reg.addr = (uintptr_t)(&fpr); - fpr = vfp_get_fpcr(env); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (ret) { - return ret; - } - - write_cpustate_to_list(cpu, true); - - if (!write_list_to_kvmstate(cpu, level)) { - return -EINVAL; - } - - /* - * Setting VCPU events should be triggered after syncing the registers - * to avoid overwriting potential changes made by KVM upon calling - * KVM_SET_VCPU_EVENTS ioctl - */ - ret = kvm_put_vcpu_events(cpu); - if (ret) { - return ret; - } - - kvm_arm_sync_mpstate_to_kvm(cpu); - - return ret; -} - -static int kvm_arch_get_fpsimd(CPUState *cs) -{ - CPUARMState *env = &ARM_CPU(cs)->env; - struct kvm_one_reg reg; - int i, ret; - - for (i = 0; i < 32; i++) { - uint64_t *q = aa64_vfp_qreg(env, i); - reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); - reg.addr = (uintptr_t)q; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } else { -#ifdef HOST_WORDS_BIGENDIAN - uint64_t t; - t = q[0], q[0] = q[1], q[1] = t; -#endif - } - } - - return 0; -} - -/* - * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits - * and PREGS and the FFR have a slice size of 256 bits. However we simply hard - * code the slice index to zero for now as it's unlikely we'll need more than - * one slice for quite some time. - */ -static int kvm_arch_get_sve(CPUState *cs) -{ - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - struct kvm_one_reg reg; - uint64_t *r; - int n, ret; - - for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { - r = &env->vfp.zregs[n].d[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - sve_bswap64(r, r, cpu->sve_max_vq * 2); - } - - for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { - r = &env->vfp.pregs[n].p[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); - } - - r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_FFR(0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); - - return 0; -} - -int kvm_arch_get_registers(CPUState *cs) -{ - struct kvm_one_reg reg; - uint64_t val; - unsigned int el; - uint32_t fpr; - int i, ret; - - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - - for (i = 0; i < 31; i++) { - reg.id = AARCH64_CORE_REG(regs.regs[i]); - reg.addr = (uintptr_t) &env->xregs[i]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - } - - reg.id = AARCH64_CORE_REG(regs.sp); - reg.addr = (uintptr_t) &env->sp_el[0]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - - reg.id = AARCH64_CORE_REG(sp_el1); - reg.addr = (uintptr_t) &env->sp_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - - reg.id = AARCH64_CORE_REG(regs.pstate); - reg.addr = (uintptr_t) &val; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - - env->aarch64 = ((val & PSTATE_nRW) == 0); - if (is_a64(env)) { - pstate_write(env, val); - } else { - cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); - } - - /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the - * QEMU side we keep the current SP in xregs[31] as well. - */ - aarch64_restore_sp(env, 1); - - reg.id = AARCH64_CORE_REG(regs.pc); - reg.addr = (uintptr_t) &env->pc; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - - /* If we are in AArch32 mode then we need to sync the AArch32 regs with the - * incoming AArch64 regs received from 64-bit KVM. - * We must perform this after all of the registers have been acquired from - * the kernel. - */ - if (!is_a64(env)) { - aarch64_sync_64_to_32(env); - } - - reg.id = AARCH64_CORE_REG(elr_el1); - reg.addr = (uintptr_t) &env->elr_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - - /* Fetch the SPSR registers - * - * KVM SPSRs 0-4 map to QEMU banks 1-5 - */ - for (i = 0; i < KVM_NR_SPSR; i++) { - reg.id = AARCH64_CORE_REG(spsr[i]); - reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - } - - el = arm_current_el(env); - if (el > 0 && !is_a64(env)) { - i = bank_number(env->uncached_cpsr & CPSR_M); - env->spsr = env->banked_spsr[i]; - } - - if (cpu_isar_feature(aa64_sve, cpu)) { - ret = kvm_arch_get_sve(cs); - } else { - ret = kvm_arch_get_fpsimd(cs); - } - if (ret) { - return ret; - } - - reg.addr = (uintptr_t)(&fpr); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - vfp_set_fpsr(env, fpr); - - reg.addr = (uintptr_t)(&fpr); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (ret) { - return ret; - } - vfp_set_fpcr(env, fpr); - - ret = kvm_get_vcpu_events(cpu); - if (ret) { - return ret; - } - - if (!write_kvmstate_to_list(cpu)) { - return -EINVAL; - } - /* Note that it's OK to have registers which aren't in CPUState, - * so we can ignore a failure return here. - */ - write_list_to_cpustate(cpu); - - kvm_arm_sync_mpstate_to_qemu(cpu); - - /* TODO: other registers */ - return ret; -} - -void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) -{ - ram_addr_t ram_addr; - hwaddr paddr; - - assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); - - if (acpi_ghes_present() && addr) { - ram_addr = qemu_ram_addr_from_host(addr); - if (ram_addr != RAM_ADDR_INVALID && - kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { - kvm_hwpoison_page_add(ram_addr); - /* - * If this is a BUS_MCEERR_AR, we know we have been called - * synchronously from the vCPU thread, so we can easily - * synchronize the state and inject an error. - * - * TODO: we currently don't tell the guest at all about - * BUS_MCEERR_AO. In that case we might either be being - * called synchronously from the vCPU thread, or a bit - * later from the main thread, so doing the injection of - * the error would be more complicated. - */ - if (code == BUS_MCEERR_AR) { - kvm_cpu_synchronize_state(c); - if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { - kvm_inject_arm_sea(c); - } else { - error_report("failed to record the error"); - abort(); - } - } - return; - } - if (code == BUS_MCEERR_AO) { - error_report("Hardware memory error at addr %p for memory used by " - "QEMU itself instead of guest system!", addr); - } - } - - if (code == BUS_MCEERR_AR) { - error_report("Hardware memory error!"); - exit(1); - } -} - -/* C6.6.29 BRK instruction */ -static const uint32_t brk_insn = 0xd4200000; - -int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) -{ - if (have_guest_debug) { - if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || - cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { - return -EINVAL; - } - return 0; - } else { - error_report("guest debug not supported on this kernel"); - return -EINVAL; - } -} - -int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) -{ - static uint32_t brk; - - if (have_guest_debug) { - if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || - brk != brk_insn || - cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { - return -EINVAL; - } - return 0; - } else { - error_report("guest debug not supported on this kernel"); - return -EINVAL; - } -} - -/* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register - * - * To minimise translating between kernel and user-space the kernel - * ABI just provides user-space with the full exception syndrome - * register value to be decoded in QEMU. - */ - -bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) -{ - int hsr_ec = syn_get_ec(debug_exit->hsr); - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - - /* Ensure PC is synchronised */ - kvm_cpu_synchronize_state(cs); - - switch (hsr_ec) { - case EC_SOFTWARESTEP: - if (cs->singlestep_enabled) { - return true; - } else { - /* - * The kernel should have suppressed the guest's ability to - * single step at this point so something has gone wrong. - */ - error_report("%s: guest single-step while debugging unsupported" - " (%"PRIx64", %"PRIx32")", - __func__, env->pc, debug_exit->hsr); - return false; - } - break; - case EC_AA64_BKPT: - if (kvm_find_sw_breakpoint(cs, env->pc)) { - return true; - } - break; - case EC_BREAKPOINT: - if (find_hw_breakpoint(cs, env->pc)) { - return true; - } - break; - case EC_WATCHPOINT: - { - CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); - if (wp) { - cs->watchpoint_hit = wp; - return true; - } - break; - } - default: - error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", - __func__, debug_exit->hsr, env->pc); - } - - /* If we are not handling the debug exception it must belong to - * the guest. Let's re-use the existing TCG interrupt code to set - * everything up properly. - */ - cs->exception_index = EXCP_BKPT; - env->exception.syndrome = debug_exit->hsr; - env->exception.vaddress = debug_exit->far; - env->exception.target_el = 1; - qemu_mutex_lock_iothread(); - arm_cpu_do_interrupt(cs); - qemu_mutex_unlock_iothread(); - - return false; -} - -#define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0) -#define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2) - -/* - * ESR_EL1 - * ISS encoding - * AARCH64: DFSC, bits [5:0] - * AARCH32: - * TTBCR.EAE == 0 - * FS[4] - DFSR[10] - * FS[3:0] - DFSR[3:0] - * TTBCR.EAE == 1 - * FS, bits [5:0] - */ -#define ESR_DFSC(aarch64, lpae, v) \ - ((aarch64 || (lpae)) ? ((v) & 0x3F) \ - : (((v) >> 6) | ((v) & 0x1F))) - -#define ESR_DFSC_EXTABT(aarch64, lpae) \ - ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8) - -bool kvm_arm_verify_ext_dabt_pending(CPUState *cs) -{ - uint64_t dfsr_val; - - if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) { - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64); - int lpae = 0; - - if (!aarch64_mode) { - uint64_t ttbcr; - - if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) { - lpae = arm_feature(env, ARM_FEATURE_LPAE) - && (ttbcr & TTBCR_EAE); - } - } - /* - * The verification here is based on the DFSC bits - * of the ESR_EL1 reg only - */ - return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) == - ESR_DFSC_EXTABT(aarch64_mode, lpae)); - } - return false; -} diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index b7f78b5215..cfaa0d9bc7 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -12,39 +12,11 @@ #define QEMU_KVM_ARM_H #include "sysemu/kvm.h" -#include "exec/memory.h" -#include "qemu/error-report.h" #define KVM_ARM_VGIC_V2 (1 << 0) #define KVM_ARM_VGIC_V3 (1 << 1) /** - * kvm_arm_vcpu_init: - * @cs: CPUState - * - * Initialize (or reinitialize) the VCPU by invoking the - * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature - * bitmask specified in the CPUState. - * - * Returns: 0 if success else < 0 error code - */ -int kvm_arm_vcpu_init(CPUState *cs); - -/** - * kvm_arm_vcpu_finalize: - * @cs: CPUState - * @feature: feature to finalize - * - * Finalizes the configuration of the specified VCPU feature by - * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring - * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of - * KVM's API documentation. - * - * Returns: 0 if success else < 0 error code - */ -int kvm_arm_vcpu_finalize(CPUState *cs, int feature); - -/** * kvm_arm_register_device: * @mr: memory region for this device * @devid: the KVM device ID @@ -66,37 +38,6 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, uint64_t attr, int dev_fd, uint64_t addr_ormask); /** - * kvm_arm_init_cpreg_list: - * @cpu: ARMCPU - * - * Initialize the ARMCPU cpreg list according to the kernel's - * definition of what CPU registers it knows about (and throw away - * the previous TCG-created cpreg list). - * - * Returns: 0 if success, else < 0 error code - */ -int kvm_arm_init_cpreg_list(ARMCPU *cpu); - -/** - * kvm_arm_reg_syncs_via_cpreg_list: - * @regidx: KVM register index - * - * Return true if this KVM register should be synchronized via the - * cpreg list of arbitrary system registers, false if it is synchronized - * by hand using code in kvm_arch_get/put_registers(). - */ -bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); - -/** - * kvm_arm_cpreg_level: - * @regidx: KVM register index - * - * Return the level of this coprocessor/system register. Return value is - * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. - */ -int kvm_arm_cpreg_level(uint64_t regidx); - -/** * write_list_to_kvmstate: * @cpu: ARMCPU * @level: the state level to sync @@ -155,34 +96,6 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu); */ void kvm_arm_reset_vcpu(ARMCPU *cpu); -/** - * kvm_arm_init_serror_injection: - * @cs: CPUState - * - * Check whether KVM can set guest SError syndrome. - */ -void kvm_arm_init_serror_injection(CPUState *cs); - -/** - * kvm_get_vcpu_events: - * @cpu: ARMCPU - * - * Get VCPU related state from kvm. - * - * Returns: 0 if success else < 0 error code - */ -int kvm_get_vcpu_events(ARMCPU *cpu); - -/** - * kvm_put_vcpu_events: - * @cpu: ARMCPU - * - * Put VCPU related state to kvm. - * - * Returns: 0 if success else < 0 error code - */ -int kvm_put_vcpu_events(ARMCPU *cpu); - #ifdef CONFIG_KVM /** * kvm_arm_create_scratch_host_vcpu: @@ -215,37 +128,14 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, void kvm_arm_destroy_scratch_host_vcpu(int *fdarray); /** - * ARMHostCPUFeatures: information about the host CPU (identified - * by asking the host kernel) - */ -typedef struct ARMHostCPUFeatures { - ARMISARegisters isar; - uint64_t features; - uint32_t target; - const char *dtb_compatible; -} ARMHostCPUFeatures; - -/** - * kvm_arm_get_host_cpu_features: - * @ahcf: ARMHostCPUClass to fill in - * - * Probe the capabilities of the host kernel's preferred CPU and fill - * in the ARMHostCPUClass struct accordingly. - * - * Returns true on success and false otherwise. - */ -bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); - -/** * kvm_arm_sve_get_vls: - * @cs: CPUState - * @map: bitmap to fill in + * @cpu: ARMCPU * * Get all the SVE vector lengths supported by the KVM host, setting * the bits corresponding to their length in quadwords minus one - * (vq - 1) in @map up to ARM_MAX_VQ. + * (vq - 1) up to ARM_MAX_VQ. Return the resulting map. */ -void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); +uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu); /** * kvm_arm_set_cpu_features_from_host: @@ -258,12 +148,12 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); /** * kvm_arm_add_vcpu_properties: - * @obj: The CPU object to add the properties to + * @cpu: The CPU object to add the properties to * * Add all KVM specific CPU properties to the CPU object. These * are the CPU properties with "kvm-" prefixed names. */ -void kvm_arm_add_vcpu_properties(Object *obj); +void kvm_arm_add_vcpu_properties(ARMCPU *cpu); /** * kvm_arm_steal_time_finalize: @@ -276,14 +166,6 @@ void kvm_arm_add_vcpu_properties(Object *obj); void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp); /** - * kvm_arm_steal_time_supported: - * - * Returns: true if KVM can enable steal time reporting - * and false otherwise. - */ -bool kvm_arm_steal_time_supported(void); - -/** * kvm_arm_aarch32_supported: * * Returns: true if KVM can enable AArch32 mode @@ -316,57 +198,19 @@ bool kvm_arm_sve_supported(void); */ int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa); -/** - * kvm_arm_sync_mpstate_to_kvm: - * @cpu: ARMCPU - * - * If supported set the KVM MP_STATE based on QEMU's model. - * - * Returns 0 on success and -1 on failure. - */ -int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - -/** - * kvm_arm_sync_mpstate_to_qemu: - * @cpu: ARMCPU - * - * If supported get the MP_STATE from KVM and store in QEMU's model. - * - * Returns 0 on success and aborts on failure. - */ -int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -/** - * kvm_arm_get_virtual_time: - * @cs: CPUState - * - * Gets the VCPU's virtual counter and stores it in the KVM CPU state. - */ -void kvm_arm_get_virtual_time(CPUState *cs); - -/** - * kvm_arm_put_virtual_time: - * @cs: CPUState - * - * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. - */ -void kvm_arm_put_virtual_time(CPUState *cs); - -void kvm_arm_vm_state_change(void *opaque, bool running, RunState state); - int kvm_arm_vgic_probe(void); -void kvm_arm_pmu_set_irq(CPUState *cs, int irq); -void kvm_arm_pmu_init(CPUState *cs); +void kvm_arm_pmu_init(ARMCPU *cpu); +void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq); /** * kvm_arm_pvtime_init: - * @cs: CPUState + * @cpu: ARMCPU * @ipa: Per-vcpu guest physical base address of the pvtime structures * * Initializes PVTIME for the VCPU, setting the PVTIME IPA to @ipa. */ -void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa); +void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa); int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); @@ -391,11 +235,6 @@ static inline bool kvm_arm_sve_supported(void) return false; } -static inline bool kvm_arm_steal_time_supported(void) -{ - return false; -} - /* * These functions should never actually be called without KVM support. */ @@ -404,7 +243,7 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) g_assert_not_reached(); } -static inline void kvm_arm_add_vcpu_properties(Object *obj) +static inline void kvm_arm_add_vcpu_properties(ARMCPU *cpu) { g_assert_not_reached(); } @@ -419,17 +258,17 @@ static inline int kvm_arm_vgic_probe(void) g_assert_not_reached(); } -static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) +static inline void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq) { g_assert_not_reached(); } -static inline void kvm_arm_pmu_init(CPUState *cs) +static inline void kvm_arm_pmu_init(ARMCPU *cpu) { g_assert_not_reached(); } -static inline void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) +static inline void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa) { g_assert_not_reached(); } @@ -439,93 +278,11 @@ static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) g_assert_not_reached(); } -static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) +static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu) { g_assert_not_reached(); } #endif -static inline const char *gic_class_name(void) -{ - return kvm_irqchip_in_kernel() ? "kvm-arm-gic" : "arm_gic"; -} - -/** - * gicv3_class_name - * - * Return name of GICv3 class to use depending on whether KVM acceleration is - * in use. May throw an error if the chosen implementation is not available. - * - * Returns: class name to use - */ -static inline const char *gicv3_class_name(void) -{ - if (kvm_irqchip_in_kernel()) { - return "kvm-arm-gicv3"; - } else { - if (kvm_enabled()) { - error_report("Userspace GICv3 is not supported with KVM"); - exit(1); - } - return "arm-gicv3"; - } -} - -/** - * kvm_arm_handle_debug: - * @cs: CPUState - * @debug_exit: debug part of the KVM exit structure - * - * Returns: TRUE if the debug exception was handled. - */ -bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); - -/** - * kvm_arm_hw_debug_active: - * @cs: CPU State - * - * Return: TRUE if any hardware breakpoints in use. - */ -bool kvm_arm_hw_debug_active(CPUState *cs); - -/** - * kvm_arm_copy_hw_debug_data: - * @ptr: kvm_guest_debug_arch structure - * - * Copy the architecture specific debug registers into the - * kvm_guest_debug ioctl structure. - */ -struct kvm_guest_debug_arch; -void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); - -/** - * kvm_arm_verify_ext_dabt_pending: - * @cs: CPUState - * - * Verify the fault status code wrt the Ext DABT injection - * - * Returns: true if the fault status code is as expected, false otherwise - */ -bool kvm_arm_verify_ext_dabt_pending(CPUState *cs); - -/** - * its_class_name: - * - * Return the ITS class name to use depending on whether KVM acceleration - * and KVM CAP_SIGNAL_MSI are supported - * - * Returns: class name to use or NULL - */ -static inline const char *its_class_name(void) -{ - if (kvm_irqchip_in_kernel()) { - /* KVM implementation requires this capability */ - return kvm_direct_msi_enabled() ? "arm-its-kvm" : NULL; - } else { - /* Software emulation based model */ - return "arm-gicv3-its"; - } -} - #endif diff --git a/target/arm/machine.c b/target/arm/machine.c index c74d8c3f4b..b2b39b2475 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -2,9 +2,12 @@ #include "cpu.h" #include "qemu/error-report.h" #include "sysemu/kvm.h" +#include "sysemu/tcg.h" #include "kvm_arm.h" #include "internals.h" +#include "cpu-features.h" #include "migration/cpu.h" +#include "target/arm/gtimer.h" static bool vfp_needed(void *opaque) { @@ -47,7 +50,7 @@ static const VMStateDescription vmstate_vfp = { .version_id = 3, .minimum_version_id = 3, .needed = vfp_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { /* For compatibility, store Qn out of Zn here. */ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2), VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[1].d, ARMCPU, 0, 2), @@ -113,7 +116,7 @@ static const VMStateDescription vmstate_iwmmxt = { .version_id = 1, .minimum_version_id = 1, .needed = iwmmxt_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16), VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16), VMSTATE_END_OF_LIST() @@ -138,7 +141,7 @@ static const VMStateDescription vmstate_zreg_hi_reg = { .name = "cpu/sve/zreg_hi", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64_SUB_ARRAY(d, ARMVectorReg, 2, ARM_MAX_VQ - 2), VMSTATE_END_OF_LIST() } @@ -148,7 +151,7 @@ static const VMStateDescription vmstate_preg_reg = { .name = "cpu/sve/preg", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64_ARRAY(p, ARMPredicateReg, 2 * ARM_MAX_VQ / 8), VMSTATE_END_OF_LIST() } @@ -159,7 +162,7 @@ static const VMStateDescription vmstate_sve = { .version_id = 1, .minimum_version_id = 1, .needed = sve_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_STRUCT_ARRAY(env.vfp.zregs, ARMCPU, 32, 0, vmstate_zreg_hi_reg, ARMVectorReg), VMSTATE_STRUCT_ARRAY(env.vfp.pregs, ARMCPU, 17, 0, @@ -167,6 +170,39 @@ static const VMStateDescription vmstate_sve = { VMSTATE_END_OF_LIST() } }; + +static const VMStateDescription vmstate_vreg = { + .name = "vreg", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT64_ARRAY(d, ARMVectorReg, ARM_MAX_VQ * 2), + VMSTATE_END_OF_LIST() + } +}; + +static bool za_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + + /* + * When ZA storage is disabled, its contents are discarded. + * It will be zeroed when ZA storage is re-enabled. + */ + return FIELD_EX64(cpu->env.svcr, SVCR, ZA); +} + +static const VMStateDescription vmstate_za = { + .name = "cpu/sme", + .version_id = 1, + .minimum_version_id = 1, + .needed = za_needed, + .fields = (const VMStateField[]) { + VMSTATE_STRUCT_ARRAY(env.zarray, ARMCPU, ARM_MAX_VQ * 16, 0, + vmstate_vreg, ARMVectorReg), + VMSTATE_END_OF_LIST() + } +}; #endif /* AARCH64 */ static bool serror_needed(void *opaque) @@ -182,7 +218,7 @@ static const VMStateDescription vmstate_serror = { .version_id = 1, .minimum_version_id = 1, .needed = serror_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT8(env.serror.pending, ARMCPU), VMSTATE_UINT8(env.serror.has_esr, ARMCPU), VMSTATE_UINT64(env.serror.esr, ARMCPU), @@ -200,7 +236,7 @@ static const VMStateDescription vmstate_irq_line_state = { .version_id = 1, .minimum_version_id = 1, .needed = irq_line_state_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.irq_line_state, ARMCPU), VMSTATE_END_OF_LIST() } @@ -219,7 +255,7 @@ static const VMStateDescription vmstate_m_faultmask_primask = { .version_id = 1, .minimum_version_id = 1, .needed = m_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.v7m.faultmask[M_REG_NS], ARMCPU), VMSTATE_UINT32(env.v7m.primask[M_REG_NS], ARMCPU), VMSTATE_END_OF_LIST() @@ -254,7 +290,7 @@ static const VMStateDescription vmstate_m_csselr = { .version_id = 1, .minimum_version_id = 1, .needed = m_csselr_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32_ARRAY(env.v7m.csselr, ARMCPU, M_REG_NUM_BANKS), VMSTATE_VALIDATE("CSSELR is valid", csselr_vmstate_validate), VMSTATE_END_OF_LIST() @@ -266,7 +302,7 @@ static const VMStateDescription vmstate_m_scr = { .version_id = 1, .minimum_version_id = 1, .needed = m_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.v7m.scr[M_REG_NS], ARMCPU), VMSTATE_END_OF_LIST() } @@ -277,7 +313,7 @@ static const VMStateDescription vmstate_m_other_sp = { .version_id = 1, .minimum_version_id = 1, .needed = m_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.v7m.other_sp, ARMCPU), VMSTATE_END_OF_LIST() } @@ -296,7 +332,7 @@ static const VMStateDescription vmstate_m_v8m = { .version_id = 1, .minimum_version_id = 1, .needed = m_v8m_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32_ARRAY(env.v7m.msplim, ARMCPU, M_REG_NUM_BANKS), VMSTATE_UINT32_ARRAY(env.v7m.psplim, ARMCPU, M_REG_NUM_BANKS), VMSTATE_END_OF_LIST() @@ -308,7 +344,7 @@ static const VMStateDescription vmstate_m_fp = { .version_id = 1, .minimum_version_id = 1, .needed = vfp_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32_ARRAY(env.v7m.fpcar, ARMCPU, M_REG_NUM_BANKS), VMSTATE_UINT32_ARRAY(env.v7m.fpccr, ARMCPU, M_REG_NUM_BANKS), VMSTATE_UINT32_ARRAY(env.v7m.fpdscr, ARMCPU, M_REG_NUM_BANKS), @@ -330,7 +366,7 @@ static const VMStateDescription vmstate_m_mve = { .version_id = 1, .minimum_version_id = 1, .needed = mve_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.v7m.vpr, ARMCPU), VMSTATE_UINT32(env.v7m.ltpsize, ARMCPU), VMSTATE_END_OF_LIST() @@ -342,7 +378,7 @@ static const VMStateDescription vmstate_m = { .version_id = 4, .minimum_version_id = 4, .needed = m_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.v7m.vecbase[M_REG_NS], ARMCPU), VMSTATE_UINT32(env.v7m.basepri[M_REG_NS], ARMCPU), VMSTATE_UINT32(env.v7m.control[M_REG_NS], ARMCPU), @@ -356,7 +392,7 @@ static const VMStateDescription vmstate_m = { VMSTATE_INT32(env.v7m.exception, ARMCPU), VMSTATE_END_OF_LIST() }, - .subsections = (const VMStateDescription*[]) { + .subsections = (const VMStateDescription * const []) { &vmstate_m_faultmask_primask, &vmstate_m_csselr, &vmstate_m_scr, @@ -381,7 +417,7 @@ static const VMStateDescription vmstate_thumb2ee = { .version_id = 1, .minimum_version_id = 1, .needed = thumb2ee_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.teecr, ARMCPU), VMSTATE_UINT32(env.teehbr, ARMCPU), VMSTATE_END_OF_LIST() @@ -410,7 +446,7 @@ static const VMStateDescription vmstate_pmsav7 = { .version_id = 1, .minimum_version_id = 1, .needed = pmsav7_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_VARRAY_UINT32(env.pmsav7.drbar, ARMCPU, pmsav7_dregion, 0, vmstate_info_uint32, uint32_t), VMSTATE_VARRAY_UINT32(env.pmsav7.drsr, ARMCPU, pmsav7_dregion, 0, @@ -439,7 +475,7 @@ static const VMStateDescription vmstate_pmsav7_rnr = { .version_id = 1, .minimum_version_id = 1, .needed = pmsav7_rnr_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.pmsav7.rnr[M_REG_NS], ARMCPU), VMSTATE_END_OF_LIST() } @@ -454,12 +490,36 @@ static bool pmsav8_needed(void *opaque) arm_feature(env, ARM_FEATURE_V8); } +static bool pmsav8r_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUARMState *env = &cpu->env; + + return arm_feature(env, ARM_FEATURE_PMSA) && + arm_feature(env, ARM_FEATURE_V8) && + !arm_feature(env, ARM_FEATURE_M); +} + +static const VMStateDescription vmstate_pmsav8r = { + .name = "cpu/pmsav8/pmsav8r", + .version_id = 1, + .minimum_version_id = 1, + .needed = pmsav8r_needed, + .fields = (const VMStateField[]) { + VMSTATE_VARRAY_UINT32(env.pmsav8.hprbar, ARMCPU, + pmsav8r_hdregion, 0, vmstate_info_uint32, uint32_t), + VMSTATE_VARRAY_UINT32(env.pmsav8.hprlar, ARMCPU, + pmsav8r_hdregion, 0, vmstate_info_uint32, uint32_t), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_pmsav8 = { .name = "cpu/pmsav8", .version_id = 1, .minimum_version_id = 1, .needed = pmsav8_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_VARRAY_UINT32(env.pmsav8.rbar[M_REG_NS], ARMCPU, pmsav7_dregion, 0, vmstate_info_uint32, uint32_t), VMSTATE_VARRAY_UINT32(env.pmsav8.rlar[M_REG_NS], ARMCPU, pmsav7_dregion, @@ -467,6 +527,10 @@ static const VMStateDescription vmstate_pmsav8 = { VMSTATE_UINT32(env.pmsav8.mair0[M_REG_NS], ARMCPU), VMSTATE_UINT32(env.pmsav8.mair1[M_REG_NS], ARMCPU), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * const []) { + &vmstate_pmsav8r, + NULL } }; @@ -497,7 +561,7 @@ static const VMStateDescription vmstate_m_security = { .version_id = 1, .minimum_version_id = 1, .needed = m_security_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(env.v7m.secure, ARMCPU), VMSTATE_UINT32(env.v7m.other_ss_msp, ARMCPU), VMSTATE_UINT32(env.v7m.other_ss_psp, ARMCPU), @@ -661,7 +725,7 @@ static int cpu_pre_save(void *opaque) if (kvm_enabled()) { if (!write_kvmstate_to_list(cpu)) { /* This should never fail */ - abort(); + g_assert_not_reached(); } /* @@ -672,7 +736,7 @@ static int cpu_pre_save(void *opaque) } else { if (!write_cpustate_to_list(cpu, false)) { /* This should never fail. */ - abort(); + g_assert_not_reached(); } } @@ -709,7 +773,7 @@ static int cpu_pre_load(void *opaque) env->irq_line_state = UINT32_MAX; if (!kvm_enabled()) { - pmu_op_start(&cpu->env); + pmu_op_start(env); } return 0; @@ -778,8 +842,19 @@ static int cpu_post_load(void *opaque, int version_id) } } - hw_breakpoint_update_all(cpu); - hw_watchpoint_update_all(cpu); + /* + * Misaligned thumb pc is architecturally impossible. Fail the + * incoming migration. For TCG it would trigger the assert in + * thumb_tr_translate_insn(). + */ + if (!is_a64(env) && env->thumb && (env->regs[15] & 1)) { + return -1; + } + + if (tcg_enabled()) { + hw_breakpoint_update_all(cpu); + hw_watchpoint_update_all(cpu); + } /* * TCG gen_update_fp_context() relies on the invariant that @@ -794,10 +869,14 @@ static int cpu_post_load(void *opaque, int version_id) return -1; } } + if (!kvm_enabled()) { - pmu_op_finish(&cpu->env); + pmu_op_finish(env); + } + + if (tcg_enabled()) { + arm_rebuild_hflags(env); } - arm_rebuild_hflags(&cpu->env); return 0; } @@ -810,7 +889,7 @@ const VMStateDescription vmstate_arm_cpu = { .post_save = cpu_post_save, .pre_load = cpu_pre_load, .post_load = cpu_post_load, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32_ARRAY(env.regs, ARMCPU, 16), VMSTATE_UINT64_ARRAY(env.xregs, ARMCPU, 32), VMSTATE_UINT64(env.pc, ARMCPU), @@ -859,7 +938,7 @@ const VMStateDescription vmstate_arm_cpu = { }, VMSTATE_END_OF_LIST() }, - .subsections = (const VMStateDescription*[]) { + .subsections = (const VMStateDescription * const []) { &vmstate_vfp, &vmstate_iwmmxt, &vmstate_m, @@ -874,6 +953,7 @@ const VMStateDescription vmstate_arm_cpu = { &vmstate_m_security, #ifdef TARGET_AARCH64 &vmstate_sve, + &vmstate_za, #endif &vmstate_serror, &vmstate_irq_line_state, diff --git a/target/arm/meson.build b/target/arm/meson.build index 50f152214a..2e10464dbb 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -1,66 +1,41 @@ -gen = [ - decodetree.process('sve.decode', extra_args: '--decode=disas_sve'), - decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'), - decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'), - decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'), - decodetree.process('vfp.decode', extra_args: '--decode=disas_vfp'), - decodetree.process('vfp-uncond.decode', extra_args: '--decode=disas_vfp_uncond'), - decodetree.process('m-nocp.decode', extra_args: '--decode=disas_m_nocp'), - decodetree.process('mve.decode', extra_args: '--decode=disas_mve'), - decodetree.process('a32.decode', extra_args: '--static-decode=disas_a32'), - decodetree.process('a32-uncond.decode', extra_args: '--static-decode=disas_a32_uncond'), - decodetree.process('t32.decode', extra_args: '--static-decode=disas_t32'), - decodetree.process('t16.decode', extra_args: ['-w', '16', '--static-decode=disas_t16']), -] - arm_ss = ss.source_set() -arm_ss.add(gen) arm_ss.add(files( 'cpu.c', - 'crypto_helper.c', 'debug_helper.c', 'gdbstub.c', 'helper.c', - 'iwmmxt_helper.c', - 'm_helper.c', - 'mve_helper.c', - 'neon_helper.c', - 'op_helper.c', - 'tlb_helper.c', - 'translate.c', - 'translate-m-nocp.c', - 'translate-mve.c', - 'translate-neon.c', - 'translate-vfp.c', - 'vec_helper.c', 'vfp_helper.c', - 'cpu_tcg.c', )) arm_ss.add(zlib) -arm_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c', 'kvm64.c'), if_false: files('kvm-stub.c')) +arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c'), if_false: files('kvm-stub.c')) +arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c')) arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', 'gdbstub64.c', - 'helper-a64.c', - 'mte_helper.c', - 'pauth_helper.c', - 'sve_helper.c', - 'translate-a64.c', - 'translate-sve.c', )) -arm_softmmu_ss = ss.source_set() -arm_softmmu_ss.add(files( +arm_system_ss = ss.source_set() +arm_system_ss.add(files( 'arch_dump.c', 'arm-powerctl.c', + 'arm-qmp-cmds.c', + 'cortex-regs.c', 'machine.c', - 'monitor.c', - 'psci.c', + 'ptw.c', )) +arm_user_ss = ss.source_set() + subdir('hvf') +if 'CONFIG_TCG' in config_all_accel + subdir('tcg') +else + arm_ss.add(files('tcg-stubs.c')) +endif + target_arch += {'arm': arm_ss} -target_softmmu_arch += {'arm': arm_softmmu_ss} +target_system_arch += {'arm': arm_system_ss} +target_user_arch += {'arm': arm_user_ss} diff --git a/target/arm/multiprocessing.h b/target/arm/multiprocessing.h new file mode 100644 index 0000000000..81715d345c --- /dev/null +++ b/target/arm/multiprocessing.h @@ -0,0 +1,16 @@ +/* + * ARM multiprocessor CPU helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#ifndef TARGET_ARM_MULTIPROCESSING_H +#define TARGET_ARM_MULTIPROCESSING_H + +#include "target/arm/cpu-qom.h" + +uint64_t arm_cpu_mp_affinity(ARMCPU *cpu); + +#endif diff --git a/target/arm/ptw.c b/target/arm/ptw.c new file mode 100644 index 0000000000..31ae43f60e --- /dev/null +++ b/target/arm/ptw.c @@ -0,0 +1,3650 @@ +/* + * ARM page table walking. + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/range.h" +#include "qemu/main-loop.h" +#include "exec/exec-all.h" +#include "cpu.h" +#include "internals.h" +#include "cpu-features.h" +#include "idau.h" +#ifdef CONFIG_TCG +# include "tcg/oversized-guest.h" +#endif + +typedef struct S1Translate { + /* + * in_mmu_idx : specifies which TTBR, TCR, etc to use for the walk. + * Together with in_space, specifies the architectural translation regime. + */ + ARMMMUIdx in_mmu_idx; + /* + * in_ptw_idx: specifies which mmuidx to use for the actual + * page table descriptor load operations. This will be one of the + * ARMMMUIdx_Stage2* or one of the ARMMMUIdx_Phys_* indexes. + * If a Secure ptw is "downgraded" to NonSecure by an NSTable bit, + * this field is updated accordingly. + */ + ARMMMUIdx in_ptw_idx; + /* + * in_space: the security space for this walk. This plus + * the in_mmu_idx specify the architectural translation regime. + * If a Secure ptw is "downgraded" to NonSecure by an NSTable bit, + * this field is updated accordingly. + * + * Note that the security space for the in_ptw_idx may be different + * from that for the in_mmu_idx. We do not need to explicitly track + * the in_ptw_idx security space because: + * - if the in_ptw_idx is an ARMMMUIdx_Phys_* then the mmuidx + * itself specifies the security space + * - if the in_ptw_idx is an ARMMMUIdx_Stage2* then the security + * space used for ptw reads is the same as that of the security + * space of the stage 1 translation for all cases except where + * stage 1 is Secure; in that case the only possibilities for + * the ptw read are Secure and NonSecure, and the in_ptw_idx + * value being Stage2 vs Stage2_S distinguishes those. + */ + ARMSecuritySpace in_space; + /* + * in_debug: is this a QEMU debug access (gdbstub, etc)? Debug + * accesses will not update the guest page table access flags + * and will not change the state of the softmmu TLBs. + */ + bool in_debug; + /* + * If this is stage 2 of a stage 1+2 page table walk, then this must + * be true if stage 1 is an EL0 access; otherwise this is ignored. + * Stage 2 is indicated by in_mmu_idx set to ARMMMUIdx_Stage2{,_S}. + */ + bool in_s1_is_el0; + bool out_rw; + bool out_be; + ARMSecuritySpace out_space; + hwaddr out_virt; + hwaddr out_phys; + void *out_host; +} S1Translate; + +static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi); + +static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi); + +/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */ +static const uint8_t pamax_map[] = { + [0] = 32, + [1] = 36, + [2] = 40, + [3] = 42, + [4] = 44, + [5] = 48, + [6] = 52, +}; + +/* + * The cpu-specific constant value of PAMax; also used by hw/arm/virt. + * Note that machvirt_init calls this on a CPU that is inited but not realized! + */ +unsigned int arm_pamax(ARMCPU *cpu) +{ + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + unsigned int parange = + FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); + + /* + * id_aa64mmfr0 is a read-only register so values outside of the + * supported mappings can be considered an implementation error. + */ + assert(parange < ARRAY_SIZE(pamax_map)); + return pamax_map[parange]; + } + + if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) { + /* v7 or v8 with LPAE */ + return 40; + } + /* Anything else */ + return 32; +} + +/* + * Convert a possible stage1+2 MMU index into the appropriate stage 1 MMU index + */ +ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_E10_0: + return ARMMMUIdx_Stage1_E0; + case ARMMMUIdx_E10_1: + return ARMMMUIdx_Stage1_E1; + case ARMMMUIdx_E10_1_PAN: + return ARMMMUIdx_Stage1_E1_PAN; + default: + return mmu_idx; + } +} + +ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env) +{ + return stage_1_mmu_idx(arm_mmu_idx(env)); +} + +/* + * Return where we should do ptw loads from for a stage 2 walk. + * This depends on whether the address we are looking up is a + * Secure IPA or a NonSecure IPA, which we know from whether this is + * Stage2 or Stage2_S. + * If this is the Secure EL1&0 regime we need to check the NSW and SW bits. + */ +static ARMMMUIdx ptw_idx_for_stage_2(CPUARMState *env, ARMMMUIdx stage2idx) +{ + bool s2walk_secure; + + /* + * We're OK to check the current state of the CPU here because + * (1) we always invalidate all TLBs when the SCR_EL3.NS or SCR_EL3.NSE bit + * changes. + * (2) there's no way to do a lookup that cares about Stage 2 for a + * different security state to the current one for AArch64, and AArch32 + * never has a secure EL2. (AArch32 ATS12NSO[UP][RW] allow EL3 to do + * an NS stage 1+2 lookup while the NS bit is 0.) + */ + if (!arm_el_is_aa64(env, 3)) { + return ARMMMUIdx_Phys_NS; + } + + switch (arm_security_space_below_el3(env)) { + case ARMSS_NonSecure: + return ARMMMUIdx_Phys_NS; + case ARMSS_Realm: + return ARMMMUIdx_Phys_Realm; + case ARMSS_Secure: + if (stage2idx == ARMMMUIdx_Stage2_S) { + s2walk_secure = !(env->cp15.vstcr_el2 & VSTCR_SW); + } else { + s2walk_secure = !(env->cp15.vtcr_el2 & VTCR_NSW); + } + return s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS; + default: + g_assert_not_reached(); + } +} + +static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0; +} + +/* Return the TTBR associated with this translation regime */ +static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn) +{ + if (mmu_idx == ARMMMUIdx_Stage2) { + return env->cp15.vttbr_el2; + } + if (mmu_idx == ARMMMUIdx_Stage2_S) { + return env->cp15.vsttbr_el2; + } + if (ttbrn == 0) { + return env->cp15.ttbr0_el[regime_el(env, mmu_idx)]; + } else { + return env->cp15.ttbr1_el[regime_el(env, mmu_idx)]; + } +} + +/* Return true if the specified stage of address translation is disabled */ +static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, + ARMSecuritySpace space) +{ + uint64_t hcr_el2; + + if (arm_feature(env, ARM_FEATURE_M)) { + bool is_secure = arm_space_is_secure(space); + switch (env->v7m.mpu_ctrl[is_secure] & + (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) { + case R_V7M_MPU_CTRL_ENABLE_MASK: + /* Enabled, but not for HardFault and NMI */ + return mmu_idx & ARM_MMU_IDX_M_NEGPRI; + case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK: + /* Enabled for all cases */ + return false; + case 0: + default: + /* + * HFNMIENA set and ENABLE clear is UNPREDICTABLE, but + * we warned about that in armv7m_nvic.c when the guest set it. + */ + return true; + } + } + + + switch (mmu_idx) { + case ARMMMUIdx_Stage2: + case ARMMMUIdx_Stage2_S: + /* HCR.DC means HCR.VM behaves as 1 */ + hcr_el2 = arm_hcr_el2_eff_secstate(env, space); + return (hcr_el2 & (HCR_DC | HCR_VM)) == 0; + + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + /* TGE means that EL0/1 act as if SCTLR_EL1.M is zero */ + hcr_el2 = arm_hcr_el2_eff_secstate(env, space); + if (hcr_el2 & HCR_TGE) { + return true; + } + break; + + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + /* HCR.DC means SCTLR_EL1.M behaves as 0 */ + hcr_el2 = arm_hcr_el2_eff_secstate(env, space); + if (hcr_el2 & HCR_DC) { + return true; + } + break; + + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_E2: + case ARMMMUIdx_E3: + break; + + case ARMMMUIdx_Phys_S: + case ARMMMUIdx_Phys_NS: + case ARMMMUIdx_Phys_Root: + case ARMMMUIdx_Phys_Realm: + /* No translation for physical address spaces. */ + return true; + + default: + g_assert_not_reached(); + } + + return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0; +} + +static bool granule_protection_check(CPUARMState *env, uint64_t paddress, + ARMSecuritySpace pspace, + ARMMMUFaultInfo *fi) +{ + MemTxAttrs attrs = { + .secure = true, + .space = ARMSS_Root, + }; + ARMCPU *cpu = env_archcpu(env); + uint64_t gpccr = env->cp15.gpccr_el3; + unsigned pps, pgs, l0gptsz, level = 0; + uint64_t tableaddr, pps_mask, align, entry, index; + AddressSpace *as; + MemTxResult result; + int gpi; + + if (!FIELD_EX64(gpccr, GPCCR, GPC)) { + return true; + } + + /* + * GPC Priority 1 (R_GMGRR): + * R_JWCSM: If the configuration of GPCCR_EL3 is invalid, + * the access fails as GPT walk fault at level 0. + */ + + /* + * Configuration of PPS to a value exceeding the implemented + * physical address size is invalid. + */ + pps = FIELD_EX64(gpccr, GPCCR, PPS); + if (pps > FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE)) { + goto fault_walk; + } + pps = pamax_map[pps]; + pps_mask = MAKE_64BIT_MASK(0, pps); + + switch (FIELD_EX64(gpccr, GPCCR, SH)) { + case 0b10: /* outer shareable */ + break; + case 0b00: /* non-shareable */ + case 0b11: /* inner shareable */ + /* Inner and Outer non-cacheable requires Outer shareable. */ + if (FIELD_EX64(gpccr, GPCCR, ORGN) == 0 && + FIELD_EX64(gpccr, GPCCR, IRGN) == 0) { + goto fault_walk; + } + break; + default: /* reserved */ + goto fault_walk; + } + + switch (FIELD_EX64(gpccr, GPCCR, PGS)) { + case 0b00: /* 4KB */ + pgs = 12; + break; + case 0b01: /* 64KB */ + pgs = 16; + break; + case 0b10: /* 16KB */ + pgs = 14; + break; + default: /* reserved */ + goto fault_walk; + } + + /* Note this field is read-only and fixed at reset. */ + l0gptsz = 30 + FIELD_EX64(gpccr, GPCCR, L0GPTSZ); + + /* + * GPC Priority 2: Secure, Realm or Root address exceeds PPS. + * R_CPDSB: A NonSecure physical address input exceeding PPS + * does not experience any fault. + */ + if (paddress & ~pps_mask) { + if (pspace == ARMSS_NonSecure) { + return true; + } + goto fault_size; + } + + /* GPC Priority 3: the base address of GPTBR_EL3 exceeds PPS. */ + tableaddr = env->cp15.gptbr_el3 << 12; + if (tableaddr & ~pps_mask) { + goto fault_size; + } + + /* + * BADDR is aligned per a function of PPS and L0GPTSZ. + * These bits of GPTBR_EL3 are RES0, but are not a configuration error, + * unlike the RES0 bits of the GPT entries (R_XNKFZ). + */ + align = MAX(pps - l0gptsz + 3, 12); + align = MAKE_64BIT_MASK(0, align); + tableaddr &= ~align; + + as = arm_addressspace(env_cpu(env), attrs); + + /* Level 0 lookup. */ + index = extract64(paddress, l0gptsz, pps - l0gptsz); + tableaddr += index * 8; + entry = address_space_ldq_le(as, tableaddr, attrs, &result); + if (result != MEMTX_OK) { + goto fault_eabt; + } + + switch (extract32(entry, 0, 4)) { + case 1: /* block descriptor */ + if (entry >> 8) { + goto fault_walk; /* RES0 bits not 0 */ + } + gpi = extract32(entry, 4, 4); + goto found; + case 3: /* table descriptor */ + tableaddr = entry & ~0xf; + align = MAX(l0gptsz - pgs - 1, 12); + align = MAKE_64BIT_MASK(0, align); + if (tableaddr & (~pps_mask | align)) { + goto fault_walk; /* RES0 bits not 0 */ + } + break; + default: /* invalid */ + goto fault_walk; + } + + /* Level 1 lookup */ + level = 1; + index = extract64(paddress, pgs + 4, l0gptsz - pgs - 4); + tableaddr += index * 8; + entry = address_space_ldq_le(as, tableaddr, attrs, &result); + if (result != MEMTX_OK) { + goto fault_eabt; + } + + switch (extract32(entry, 0, 4)) { + case 1: /* contiguous descriptor */ + if (entry >> 10) { + goto fault_walk; /* RES0 bits not 0 */ + } + /* + * Because the softmmu tlb only works on units of TARGET_PAGE_SIZE, + * and because we cannot invalidate by pa, and thus will always + * flush entire tlbs, we don't actually care about the range here + * and can simply extract the GPI as the result. + */ + if (extract32(entry, 8, 2) == 0) { + goto fault_walk; /* reserved contig */ + } + gpi = extract32(entry, 4, 4); + break; + default: + index = extract64(paddress, pgs, 4); + gpi = extract64(entry, index * 4, 4); + break; + } + + found: + switch (gpi) { + case 0b0000: /* no access */ + break; + case 0b1111: /* all access */ + return true; + case 0b1000: + case 0b1001: + case 0b1010: + case 0b1011: + if (pspace == (gpi & 3)) { + return true; + } + break; + default: + goto fault_walk; /* reserved */ + } + + fi->gpcf = GPCF_Fail; + goto fault_common; + fault_eabt: + fi->gpcf = GPCF_EABT; + goto fault_common; + fault_size: + fi->gpcf = GPCF_AddressSize; + goto fault_common; + fault_walk: + fi->gpcf = GPCF_Walk; + fault_common: + fi->level = level; + fi->paddr = paddress; + fi->paddr_space = pspace; + return false; +} + +static bool S1_attrs_are_device(uint8_t attrs) +{ + /* + * This slightly under-decodes the MAIR_ELx field: + * 0b0000dd01 is Device with FEAT_XS, otherwise UNPREDICTABLE; + * 0b0000dd1x is UNPREDICTABLE. + */ + return (attrs & 0xf0) == 0; +} + +static bool S2_attrs_are_device(uint64_t hcr, uint8_t attrs) +{ + /* + * For an S1 page table walk, the stage 1 attributes are always + * some form of "this is Normal memory". The combined S1+S2 + * attributes are therefore only Device if stage 2 specifies Device. + * With HCR_EL2.FWB == 0 this is when descriptor bits [5:4] are 0b00, + * ie when cacheattrs.attrs bits [3:2] are 0b00. + * With HCR_EL2.FWB == 1 this is when descriptor bit [4] is 0, ie + * when cacheattrs.attrs bit [2] is 0. + */ + if (hcr & HCR_FWB) { + return (attrs & 0x4) == 0; + } else { + return (attrs & 0xc) == 0; + } +} + +static ARMSecuritySpace S2_security_space(ARMSecuritySpace s1_space, + ARMMMUIdx s2_mmu_idx) +{ + /* + * Return the security space to use for stage 2 when doing + * the S1 page table descriptor load. + */ + if (regime_is_stage2(s2_mmu_idx)) { + /* + * The security space for ptw reads is almost always the same + * as that of the security space of the stage 1 translation. + * The only exception is when stage 1 is Secure; in that case + * the ptw read might be to the Secure or the NonSecure space + * (but never Realm or Root), and the s2_mmu_idx tells us which. + * Root translations are always single-stage. + */ + if (s1_space == ARMSS_Secure) { + return arm_secure_to_space(s2_mmu_idx == ARMMMUIdx_Stage2_S); + } else { + assert(s2_mmu_idx != ARMMMUIdx_Stage2_S); + assert(s1_space != ARMSS_Root); + return s1_space; + } + } else { + /* ptw loads are from phys: the mmu idx itself says which space */ + return arm_phys_to_space(s2_mmu_idx); + } +} + +static bool fault_s1ns(ARMSecuritySpace space, ARMMMUIdx s2_mmu_idx) +{ + /* + * For stage 2 faults in Secure EL22, S1NS indicates + * whether the faulting IPA is in the Secure or NonSecure + * IPA space. For all other kinds of fault, it is false. + */ + return space == ARMSS_Secure && regime_is_stage2(s2_mmu_idx) + && s2_mmu_idx == ARMMMUIdx_Stage2_S; +} + +/* Translate a S1 pagetable walk through S2 if needed. */ +static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw, + hwaddr addr, ARMMMUFaultInfo *fi) +{ + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; + ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx; + uint8_t pte_attrs; + + ptw->out_virt = addr; + + if (unlikely(ptw->in_debug)) { + /* + * From gdbstub, do not use softmmu so that we don't modify the + * state of the cpu at all, including softmmu tlb contents. + */ + ARMSecuritySpace s2_space = S2_security_space(ptw->in_space, s2_mmu_idx); + S1Translate s2ptw = { + .in_mmu_idx = s2_mmu_idx, + .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx), + .in_space = s2_space, + .in_debug = true, + }; + GetPhysAddrResult s2 = { }; + + if (get_phys_addr_gpc(env, &s2ptw, addr, MMU_DATA_LOAD, &s2, fi)) { + goto fail; + } + + ptw->out_phys = s2.f.phys_addr; + pte_attrs = s2.cacheattrs.attrs; + ptw->out_host = NULL; + ptw->out_rw = false; + ptw->out_space = s2.f.attrs.space; + } else { +#ifdef CONFIG_TCG + CPUTLBEntryFull *full; + int flags; + + env->tlb_fi = fi; + flags = probe_access_full_mmu(env, addr, 0, MMU_DATA_LOAD, + arm_to_core_mmu_idx(s2_mmu_idx), + &ptw->out_host, &full); + env->tlb_fi = NULL; + + if (unlikely(flags & TLB_INVALID_MASK)) { + goto fail; + } + ptw->out_phys = full->phys_addr | (addr & ~TARGET_PAGE_MASK); + ptw->out_rw = full->prot & PAGE_WRITE; + pte_attrs = full->extra.arm.pte_attrs; + ptw->out_space = full->attrs.space; +#else + g_assert_not_reached(); +#endif + } + + if (regime_is_stage2(s2_mmu_idx)) { + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); + + if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) { + /* + * PTW set and S1 walk touched S2 Device memory: + * generate Permission fault. + */ + fi->type = ARMFault_Permission; + fi->s2addr = addr; + fi->stage2 = true; + fi->s1ptw = true; + fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx); + return false; + } + } + + ptw->out_be = regime_translation_big_endian(env, mmu_idx); + return true; + + fail: + assert(fi->type != ARMFault_None); + if (fi->type == ARMFault_GPCFOnOutput) { + fi->type = ARMFault_GPCFOnWalk; + } + fi->s2addr = addr; + fi->stage2 = regime_is_stage2(s2_mmu_idx); + fi->s1ptw = fi->stage2; + fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx); + return false; +} + +/* All loads done in the course of a page table walk go through here. */ +static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw, + ARMMMUFaultInfo *fi) +{ + CPUState *cs = env_cpu(env); + void *host = ptw->out_host; + uint32_t data; + + if (likely(host)) { + /* Page tables are in RAM, and we have the host address. */ + data = qatomic_read((uint32_t *)host); + if (ptw->out_be) { + data = be32_to_cpu(data); + } else { + data = le32_to_cpu(data); + } + } else { + /* Page tables are in MMIO. */ + MemTxAttrs attrs = { + .space = ptw->out_space, + .secure = arm_space_is_secure(ptw->out_space), + }; + AddressSpace *as = arm_addressspace(cs, attrs); + MemTxResult result = MEMTX_OK; + + if (ptw->out_be) { + data = address_space_ldl_be(as, ptw->out_phys, attrs, &result); + } else { + data = address_space_ldl_le(as, ptw->out_phys, attrs, &result); + } + if (unlikely(result != MEMTX_OK)) { + fi->type = ARMFault_SyncExternalOnWalk; + fi->ea = arm_extabort_type(result); + return 0; + } + } + return data; +} + +static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw, + ARMMMUFaultInfo *fi) +{ + CPUState *cs = env_cpu(env); + void *host = ptw->out_host; + uint64_t data; + + if (likely(host)) { + /* Page tables are in RAM, and we have the host address. */ +#ifdef CONFIG_ATOMIC64 + data = qatomic_read__nocheck((uint64_t *)host); + if (ptw->out_be) { + data = be64_to_cpu(data); + } else { + data = le64_to_cpu(data); + } +#else + if (ptw->out_be) { + data = ldq_be_p(host); + } else { + data = ldq_le_p(host); + } +#endif + } else { + /* Page tables are in MMIO. */ + MemTxAttrs attrs = { + .space = ptw->out_space, + .secure = arm_space_is_secure(ptw->out_space), + }; + AddressSpace *as = arm_addressspace(cs, attrs); + MemTxResult result = MEMTX_OK; + + if (ptw->out_be) { + data = address_space_ldq_be(as, ptw->out_phys, attrs, &result); + } else { + data = address_space_ldq_le(as, ptw->out_phys, attrs, &result); + } + if (unlikely(result != MEMTX_OK)) { + fi->type = ARMFault_SyncExternalOnWalk; + fi->ea = arm_extabort_type(result); + return 0; + } + } + return data; +} + +static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, + uint64_t new_val, S1Translate *ptw, + ARMMMUFaultInfo *fi) +{ +#if defined(TARGET_AARCH64) && defined(CONFIG_TCG) + uint64_t cur_val; + void *host = ptw->out_host; + + if (unlikely(!host)) { + /* Page table in MMIO Memory Region */ + CPUState *cs = env_cpu(env); + MemTxAttrs attrs = { + .space = ptw->out_space, + .secure = arm_space_is_secure(ptw->out_space), + }; + AddressSpace *as = arm_addressspace(cs, attrs); + MemTxResult result = MEMTX_OK; + bool need_lock = !bql_locked(); + + if (need_lock) { + bql_lock(); + } + if (ptw->out_be) { + cur_val = address_space_ldq_be(as, ptw->out_phys, attrs, &result); + if (unlikely(result != MEMTX_OK)) { + fi->type = ARMFault_SyncExternalOnWalk; + fi->ea = arm_extabort_type(result); + if (need_lock) { + bql_unlock(); + } + return old_val; + } + if (cur_val == old_val) { + address_space_stq_be(as, ptw->out_phys, new_val, attrs, &result); + if (unlikely(result != MEMTX_OK)) { + fi->type = ARMFault_SyncExternalOnWalk; + fi->ea = arm_extabort_type(result); + if (need_lock) { + bql_unlock(); + } + return old_val; + } + cur_val = new_val; + } + } else { + cur_val = address_space_ldq_le(as, ptw->out_phys, attrs, &result); + if (unlikely(result != MEMTX_OK)) { + fi->type = ARMFault_SyncExternalOnWalk; + fi->ea = arm_extabort_type(result); + if (need_lock) { + bql_unlock(); + } + return old_val; + } + if (cur_val == old_val) { + address_space_stq_le(as, ptw->out_phys, new_val, attrs, &result); + if (unlikely(result != MEMTX_OK)) { + fi->type = ARMFault_SyncExternalOnWalk; + fi->ea = arm_extabort_type(result); + if (need_lock) { + bql_unlock(); + } + return old_val; + } + cur_val = new_val; + } + } + if (need_lock) { + bql_unlock(); + } + return cur_val; + } + + /* + * Raising a stage2 Protection fault for an atomic update to a read-only + * page is delayed until it is certain that there is a change to make. + */ + if (unlikely(!ptw->out_rw)) { + int flags; + + env->tlb_fi = fi; + flags = probe_access_full_mmu(env, ptw->out_virt, 0, + MMU_DATA_STORE, + arm_to_core_mmu_idx(ptw->in_ptw_idx), + NULL, NULL); + env->tlb_fi = NULL; + + if (unlikely(flags & TLB_INVALID_MASK)) { + /* + * We know this must be a stage 2 fault because the granule + * protection table does not separately track read and write + * permission, so all GPC faults are caught in S1_ptw_translate(): + * we only get here for "readable but not writeable". + */ + assert(fi->type != ARMFault_None); + fi->s2addr = ptw->out_virt; + fi->stage2 = true; + fi->s1ptw = true; + fi->s1ns = fault_s1ns(ptw->in_space, ptw->in_ptw_idx); + return 0; + } + + /* In case CAS mismatches and we loop, remember writability. */ + ptw->out_rw = true; + } + +#ifdef CONFIG_ATOMIC64 + if (ptw->out_be) { + old_val = cpu_to_be64(old_val); + new_val = cpu_to_be64(new_val); + cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val); + cur_val = be64_to_cpu(cur_val); + } else { + old_val = cpu_to_le64(old_val); + new_val = cpu_to_le64(new_val); + cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val); + cur_val = le64_to_cpu(cur_val); + } +#else + /* + * We can't support the full 64-bit atomic cmpxchg on the host. + * Because this is only used for FEAT_HAFDBS, which is only for AA64, + * we know that TCG_OVERSIZED_GUEST is set, which means that we are + * running in round-robin mode and could only race with dma i/o. + */ +#if !TCG_OVERSIZED_GUEST +# error "Unexpected configuration" +#endif + bool locked = bql_locked(); + if (!locked) { + bql_lock(); + } + if (ptw->out_be) { + cur_val = ldq_be_p(host); + if (cur_val == old_val) { + stq_be_p(host, new_val); + } + } else { + cur_val = ldq_le_p(host); + if (cur_val == old_val) { + stq_le_p(host, new_val); + } + } + if (!locked) { + bql_unlock(); + } +#endif + + return cur_val; +#else + /* AArch32 does not have FEAT_HADFS; non-TCG guests only use debug-mode. */ + g_assert_not_reached(); +#endif +} + +static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx, + uint32_t *table, uint32_t address) +{ + /* Note that we can only get here for an AArch32 PL0/PL1 lookup */ + uint64_t tcr = regime_tcr(env, mmu_idx); + int maskshift = extract32(tcr, 0, 3); + uint32_t mask = ~(((uint32_t)0xffffffffu) >> maskshift); + uint32_t base_mask; + + if (address & mask) { + if (tcr & TTBCR_PD1) { + /* Translation table walk disabled for TTBR1 */ + return false; + } + *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000; + } else { + if (tcr & TTBCR_PD0) { + /* Translation table walk disabled for TTBR0 */ + return false; + } + base_mask = ~((uint32_t)0x3fffu >> maskshift); + *table = regime_ttbr(env, mmu_idx, 0) & base_mask; + } + *table |= (address >> 18) & 0x3ffc; + return true; +} + +/* + * Translate section/page access permissions to page R/W protection flags + * @env: CPUARMState + * @mmu_idx: MMU index indicating required translation regime + * @ap: The 3-bit access permissions (AP[2:0]) + * @domain_prot: The 2-bit domain access permissions + * @is_user: TRUE if accessing from PL0 + */ +static int ap_to_rw_prot_is_user(CPUARMState *env, ARMMMUIdx mmu_idx, + int ap, int domain_prot, bool is_user) +{ + if (domain_prot == 3) { + return PAGE_READ | PAGE_WRITE; + } + + switch (ap) { + case 0: + if (arm_feature(env, ARM_FEATURE_V7)) { + return 0; + } + switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) { + case SCTLR_S: + return is_user ? 0 : PAGE_READ; + case SCTLR_R: + return PAGE_READ; + default: + return 0; + } + case 1: + return is_user ? 0 : PAGE_READ | PAGE_WRITE; + case 2: + if (is_user) { + return PAGE_READ; + } else { + return PAGE_READ | PAGE_WRITE; + } + case 3: + return PAGE_READ | PAGE_WRITE; + case 4: /* Reserved. */ + return 0; + case 5: + return is_user ? 0 : PAGE_READ; + case 6: + return PAGE_READ; + case 7: + if (!arm_feature(env, ARM_FEATURE_V6K)) { + return 0; + } + return PAGE_READ; + default: + g_assert_not_reached(); + } +} + +/* + * Translate section/page access permissions to page R/W protection flags + * @env: CPUARMState + * @mmu_idx: MMU index indicating required translation regime + * @ap: The 3-bit access permissions (AP[2:0]) + * @domain_prot: The 2-bit domain access permissions + */ +static int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, + int ap, int domain_prot) +{ + return ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, + regime_is_user(env, mmu_idx)); +} + +/* + * Translate section/page access permissions to page R/W protection flags. + * @ap: The 2-bit simple AP (AP[2:1]) + * @is_user: TRUE if accessing from PL0 + */ +static int simple_ap_to_rw_prot_is_user(int ap, bool is_user) +{ + switch (ap) { + case 0: + return is_user ? 0 : PAGE_READ | PAGE_WRITE; + case 1: + return PAGE_READ | PAGE_WRITE; + case 2: + return is_user ? 0 : PAGE_READ; + case 3: + return PAGE_READ; + default: + g_assert_not_reached(); + } +} + +static int simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap) +{ + return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx)); +} + +static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw, + uint32_t address, MMUAccessType access_type, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +{ + int level = 1; + uint32_t table; + uint32_t desc; + int type; + int ap; + int domain = 0; + int domain_prot; + hwaddr phys_addr; + uint32_t dacr; + + /* Pagetable walk. */ + /* Lookup l1 descriptor. */ + if (!get_level1_table_address(env, ptw->in_mmu_idx, &table, address)) { + /* Section translation fault if page walk is disabled by PD0 or PD1 */ + fi->type = ARMFault_Translation; + goto do_fault; + } + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + type = (desc & 3); + domain = (desc >> 5) & 0x0f; + if (regime_el(env, ptw->in_mmu_idx) == 1) { + dacr = env->cp15.dacr_ns; + } else { + dacr = env->cp15.dacr_s; + } + domain_prot = (dacr >> (domain * 2)) & 3; + if (type == 0) { + /* Section translation fault. */ + fi->type = ARMFault_Translation; + goto do_fault; + } + if (type != 2) { + level = 2; + } + if (domain_prot == 0 || domain_prot == 2) { + fi->type = ARMFault_Domain; + goto do_fault; + } + if (type == 2) { + /* 1Mb section. */ + phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); + ap = (desc >> 10) & 3; + result->f.lg_page_size = 20; /* 1MB */ + } else { + /* Lookup l2 entry. */ + if (type == 1) { + /* Coarse pagetable. */ + table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); + } else { + /* Fine pagetable. */ + table = (desc & 0xfffff000) | ((address >> 8) & 0xffc); + } + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + switch (desc & 3) { + case 0: /* Page translation fault. */ + fi->type = ARMFault_Translation; + goto do_fault; + case 1: /* 64k page. */ + phys_addr = (desc & 0xffff0000) | (address & 0xffff); + ap = (desc >> (4 + ((address >> 13) & 6))) & 3; + result->f.lg_page_size = 16; + break; + case 2: /* 4k page. */ + phys_addr = (desc & 0xfffff000) | (address & 0xfff); + ap = (desc >> (4 + ((address >> 9) & 6))) & 3; + result->f.lg_page_size = 12; + break; + case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */ + if (type == 1) { + /* ARMv6/XScale extended small page format */ + if (arm_feature(env, ARM_FEATURE_XSCALE) + || arm_feature(env, ARM_FEATURE_V6)) { + phys_addr = (desc & 0xfffff000) | (address & 0xfff); + result->f.lg_page_size = 12; + } else { + /* + * UNPREDICTABLE in ARMv5; we choose to take a + * page translation fault. + */ + fi->type = ARMFault_Translation; + goto do_fault; + } + } else { + phys_addr = (desc & 0xfffffc00) | (address & 0x3ff); + result->f.lg_page_size = 10; + } + ap = (desc >> 4) & 3; + break; + default: + /* Never happens, but compiler isn't smart enough to tell. */ + g_assert_not_reached(); + } + } + result->f.prot = ap_to_rw_prot(env, ptw->in_mmu_idx, ap, domain_prot); + result->f.prot |= result->f.prot ? PAGE_EXEC : 0; + if (!(result->f.prot & (1 << access_type))) { + /* Access permission fault. */ + fi->type = ARMFault_Permission; + goto do_fault; + } + result->f.phys_addr = phys_addr; + return false; +do_fault: + fi->domain = domain; + fi->level = level; + return true; +} + +static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw, + uint32_t address, MMUAccessType access_type, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +{ + ARMCPU *cpu = env_archcpu(env); + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; + int level = 1; + uint32_t table; + uint32_t desc; + uint32_t xn; + uint32_t pxn = 0; + int type; + int ap; + int domain = 0; + int domain_prot; + hwaddr phys_addr; + uint32_t dacr; + bool ns; + int user_prot; + + /* Pagetable walk. */ + /* Lookup l1 descriptor. */ + if (!get_level1_table_address(env, mmu_idx, &table, address)) { + /* Section translation fault if page walk is disabled by PD0 or PD1 */ + fi->type = ARMFault_Translation; + goto do_fault; + } + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + type = (desc & 3); + if (type == 0 || (type == 3 && !cpu_isar_feature(aa32_pxn, cpu))) { + /* Section translation fault, or attempt to use the encoding + * which is Reserved on implementations without PXN. + */ + fi->type = ARMFault_Translation; + goto do_fault; + } + if ((type == 1) || !(desc & (1 << 18))) { + /* Page or Section. */ + domain = (desc >> 5) & 0x0f; + } + if (regime_el(env, mmu_idx) == 1) { + dacr = env->cp15.dacr_ns; + } else { + dacr = env->cp15.dacr_s; + } + if (type == 1) { + level = 2; + } + domain_prot = (dacr >> (domain * 2)) & 3; + if (domain_prot == 0 || domain_prot == 2) { + /* Section or Page domain fault */ + fi->type = ARMFault_Domain; + goto do_fault; + } + if (type != 1) { + if (desc & (1 << 18)) { + /* Supersection. */ + phys_addr = (desc & 0xff000000) | (address & 0x00ffffff); + phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32; + phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36; + result->f.lg_page_size = 24; /* 16MB */ + } else { + /* Section. */ + phys_addr = (desc & 0xfff00000) | (address & 0x000fffff); + result->f.lg_page_size = 20; /* 1MB */ + } + ap = ((desc >> 10) & 3) | ((desc >> 13) & 4); + xn = desc & (1 << 4); + pxn = desc & 1; + ns = extract32(desc, 19, 1); + } else { + if (cpu_isar_feature(aa32_pxn, cpu)) { + pxn = (desc >> 2) & 1; + } + ns = extract32(desc, 3, 1); + /* Lookup l2 entry. */ + table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc); + if (!S1_ptw_translate(env, ptw, table, fi)) { + goto do_fault; + } + desc = arm_ldl_ptw(env, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + ap = ((desc >> 4) & 3) | ((desc >> 7) & 4); + switch (desc & 3) { + case 0: /* Page translation fault. */ + fi->type = ARMFault_Translation; + goto do_fault; + case 1: /* 64k page. */ + phys_addr = (desc & 0xffff0000) | (address & 0xffff); + xn = desc & (1 << 15); + result->f.lg_page_size = 16; + break; + case 2: case 3: /* 4k page. */ + phys_addr = (desc & 0xfffff000) | (address & 0xfff); + xn = desc & 1; + result->f.lg_page_size = 12; + break; + default: + /* Never happens, but compiler isn't smart enough to tell. */ + g_assert_not_reached(); + } + } + if (domain_prot == 3) { + result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + } else { + if (pxn && !regime_is_user(env, mmu_idx)) { + xn = 1; + } + if (xn && access_type == MMU_INST_FETCH) { + fi->type = ARMFault_Permission; + goto do_fault; + } + + if (arm_feature(env, ARM_FEATURE_V6K) && + (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) { + /* The simplified model uses AP[0] as an access control bit. */ + if ((ap & 1) == 0) { + /* Access flag fault. */ + fi->type = ARMFault_AccessFlag; + goto do_fault; + } + result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1); + user_prot = simple_ap_to_rw_prot_is_user(ap >> 1, 1); + } else { + result->f.prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot); + user_prot = ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, 1); + } + if (result->f.prot && !xn) { + result->f.prot |= PAGE_EXEC; + } + if (!(result->f.prot & (1 << access_type))) { + /* Access permission fault. */ + fi->type = ARMFault_Permission; + goto do_fault; + } + if (regime_is_pan(env, mmu_idx) && + !regime_is_user(env, mmu_idx) && + user_prot && + access_type != MMU_INST_FETCH) { + /* Privileged Access Never fault */ + fi->type = ARMFault_Permission; + goto do_fault; + } + } + if (ns) { + /* The NS bit will (as required by the architecture) have no effect if + * the CPU doesn't support TZ or this is a non-secure translation + * regime, because the attribute will already be non-secure. + */ + result->f.attrs.secure = false; + result->f.attrs.space = ARMSS_NonSecure; + } + result->f.phys_addr = phys_addr; + return false; +do_fault: + fi->domain = domain; + fi->level = level; + return true; +} + +/* + * Translate S2 section/page access permissions to protection flags + * @env: CPUARMState + * @s2ap: The 2-bit stage2 access permissions (S2AP) + * @xn: XN (execute-never) bits + * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0 + */ +static int get_S2prot_noexecute(int s2ap) +{ + int prot = 0; + + if (s2ap & 1) { + prot |= PAGE_READ; + } + if (s2ap & 2) { + prot |= PAGE_WRITE; + } + return prot; +} + +static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0) +{ + int prot = get_S2prot_noexecute(s2ap); + + if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) { + switch (xn) { + case 0: + prot |= PAGE_EXEC; + break; + case 1: + if (s1_is_el0) { + prot |= PAGE_EXEC; + } + break; + case 2: + break; + case 3: + if (!s1_is_el0) { + prot |= PAGE_EXEC; + } + break; + default: + g_assert_not_reached(); + } + } else { + if (!extract32(xn, 1, 1)) { + if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) { + prot |= PAGE_EXEC; + } + } + } + return prot; +} + +/* + * Translate section/page access permissions to protection flags + * @env: CPUARMState + * @mmu_idx: MMU index indicating required translation regime + * @is_aa64: TRUE if AArch64 + * @ap: The 2-bit simple AP (AP[2:1]) + * @xn: XN (execute-never) bit + * @pxn: PXN (privileged execute-never) bit + * @in_pa: The original input pa space + * @out_pa: The output pa space, modified by NSTable, NS, and NSE + */ +static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64, + int ap, int xn, int pxn, + ARMSecuritySpace in_pa, ARMSecuritySpace out_pa) +{ + ARMCPU *cpu = env_archcpu(env); + bool is_user = regime_is_user(env, mmu_idx); + int prot_rw, user_rw; + bool have_wxn; + int wxn = 0; + + assert(!regime_is_stage2(mmu_idx)); + + user_rw = simple_ap_to_rw_prot_is_user(ap, true); + if (is_user) { + prot_rw = user_rw; + } else { + /* + * PAN controls can forbid data accesses but don't affect insn fetch. + * Plain PAN forbids data accesses if EL0 has data permissions; + * PAN3 forbids data accesses if EL0 has either data or exec perms. + * Note that for AArch64 the 'user can exec' case is exactly !xn. + * We make the IMPDEF choices that SCR_EL3.SIF and Realm EL2&0 + * do not affect EPAN. + */ + if (user_rw && regime_is_pan(env, mmu_idx)) { + prot_rw = 0; + } else if (cpu_isar_feature(aa64_pan3, cpu) && is_aa64 && + regime_is_pan(env, mmu_idx) && + (regime_sctlr(env, mmu_idx) & SCTLR_EPAN) && !xn) { + prot_rw = 0; + } else { + prot_rw = simple_ap_to_rw_prot_is_user(ap, false); + } + } + + if (in_pa != out_pa) { + switch (in_pa) { + case ARMSS_Root: + /* + * R_ZWRVD: permission fault for insn fetched from non-Root, + * I_WWBFB: SIF has no effect in EL3. + */ + return prot_rw; + case ARMSS_Realm: + /* + * R_PKTDS: permission fault for insn fetched from non-Realm, + * for Realm EL2 or EL2&0. The corresponding fault for EL1&0 + * happens during any stage2 translation. + */ + switch (mmu_idx) { + case ARMMMUIdx_E2: + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + return prot_rw; + default: + break; + } + break; + case ARMSS_Secure: + if (env->cp15.scr_el3 & SCR_SIF) { + return prot_rw; + } + break; + default: + /* Input NonSecure must have output NonSecure. */ + g_assert_not_reached(); + } + } + + /* TODO have_wxn should be replaced with + * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2) + * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE + * compatible processors have EL2, which is required for [U]WXN. + */ + have_wxn = arm_feature(env, ARM_FEATURE_LPAE); + + if (have_wxn) { + wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN; + } + + if (is_aa64) { + if (regime_has_2_ranges(mmu_idx) && !is_user) { + xn = pxn || (user_rw & PAGE_WRITE); + } + } else if (arm_feature(env, ARM_FEATURE_V7)) { + switch (regime_el(env, mmu_idx)) { + case 1: + case 3: + if (is_user) { + xn = xn || !(user_rw & PAGE_READ); + } else { + int uwxn = 0; + if (have_wxn) { + uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN; + } + xn = xn || !(prot_rw & PAGE_READ) || pxn || + (uwxn && (user_rw & PAGE_WRITE)); + } + break; + case 2: + break; + } + } else { + xn = wxn = 0; + } + + if (xn || (wxn && (prot_rw & PAGE_WRITE))) { + return prot_rw; + } + return prot_rw | PAGE_EXEC; +} + +static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va, + ARMMMUIdx mmu_idx) +{ + uint64_t tcr = regime_tcr(env, mmu_idx); + uint32_t el = regime_el(env, mmu_idx); + int select, tsz; + bool epd, hpd; + + assert(mmu_idx != ARMMMUIdx_Stage2_S); + + if (mmu_idx == ARMMMUIdx_Stage2) { + /* VTCR */ + bool sext = extract32(tcr, 4, 1); + bool sign = extract32(tcr, 3, 1); + + /* + * If the sign-extend bit is not the same as t0sz[3], the result + * is unpredictable. Flag this as a guest error. + */ + if (sign != sext) { + qemu_log_mask(LOG_GUEST_ERROR, + "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n"); + } + tsz = sextract32(tcr, 0, 4) + 8; + select = 0; + hpd = false; + epd = false; + } else if (el == 2) { + /* HTCR */ + tsz = extract32(tcr, 0, 3); + select = 0; + hpd = extract64(tcr, 24, 1); + epd = false; + } else { + int t0sz = extract32(tcr, 0, 3); + int t1sz = extract32(tcr, 16, 3); + + if (t1sz == 0) { + select = va > (0xffffffffu >> t0sz); + } else { + /* Note that we will detect errors later. */ + select = va >= ~(0xffffffffu >> t1sz); + } + if (!select) { + tsz = t0sz; + epd = extract32(tcr, 7, 1); + hpd = extract64(tcr, 41, 1); + } else { + tsz = t1sz; + epd = extract32(tcr, 23, 1); + hpd = extract64(tcr, 42, 1); + } + /* For aarch32, hpd0 is not enabled without t2e as well. */ + hpd &= extract32(tcr, 6, 1); + } + + return (ARMVAParameters) { + .tsz = tsz, + .select = select, + .epd = epd, + .hpd = hpd, + }; +} + +/* + * check_s2_mmu_setup + * @cpu: ARMCPU + * @is_aa64: True if the translation regime is in AArch64 state + * @tcr: VTCR_EL2 or VSTCR_EL2 + * @ds: Effective value of TCR.DS. + * @iasize: Bitsize of IPAs + * @stride: Page-table stride (See the ARM ARM) + * + * Decode the starting level of the S2 lookup, returning INT_MIN if + * the configuration is invalid. + */ +static int check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, uint64_t tcr, + bool ds, int iasize, int stride) +{ + int sl0, sl2, startlevel, granulebits, levels; + int s1_min_iasize, s1_max_iasize; + + sl0 = extract32(tcr, 6, 2); + if (is_aa64) { + /* + * AArch64.S2InvalidSL: Interpretation of SL depends on the page size, + * so interleave AArch64.S2StartLevel. + */ + switch (stride) { + case 9: /* 4KB */ + /* SL2 is RES0 unless DS=1 & 4KB granule. */ + sl2 = extract64(tcr, 33, 1); + if (ds && sl2) { + if (sl0 != 0) { + goto fail; + } + startlevel = -1; + } else { + startlevel = 2 - sl0; + switch (sl0) { + case 2: + if (arm_pamax(cpu) < 44) { + goto fail; + } + break; + case 3: + if (!cpu_isar_feature(aa64_st, cpu)) { + goto fail; + } + startlevel = 3; + break; + } + } + break; + case 11: /* 16KB */ + switch (sl0) { + case 2: + if (arm_pamax(cpu) < 42) { + goto fail; + } + break; + case 3: + if (!ds) { + goto fail; + } + break; + } + startlevel = 3 - sl0; + break; + case 13: /* 64KB */ + switch (sl0) { + case 2: + if (arm_pamax(cpu) < 44) { + goto fail; + } + break; + case 3: + goto fail; + } + startlevel = 3 - sl0; + break; + default: + g_assert_not_reached(); + } + } else { + /* + * Things are simpler for AArch32 EL2, with only 4k pages. + * There is no separate S2InvalidSL function, but AArch32.S2Walk + * begins with walkparms.sl0 in {'1x'}. + */ + assert(stride == 9); + if (sl0 >= 2) { + goto fail; + } + startlevel = 2 - sl0; + } + + /* AArch{64,32}.S2InconsistentSL are functionally equivalent. */ + levels = 3 - startlevel; + granulebits = stride + 3; + + s1_min_iasize = levels * stride + granulebits + 1; + s1_max_iasize = s1_min_iasize + (stride - 1) + 4; + + if (iasize >= s1_min_iasize && iasize <= s1_max_iasize) { + return startlevel; + } + + fail: + return INT_MIN; +} + +static bool lpae_block_desc_valid(ARMCPU *cpu, bool ds, + ARMGranuleSize gran, int level) +{ + /* + * See pseudocode AArch46.BlockDescSupported(): block descriptors + * are not valid at all levels, depending on the page size. + */ + switch (gran) { + case Gran4K: + return (level == 0 && ds) || level == 1 || level == 2; + case Gran16K: + return (level == 1 && ds) || level == 2; + case Gran64K: + return (level == 1 && arm_pamax(cpu) == 52) || level == 2; + default: + g_assert_not_reached(); + } +} + +static bool nv_nv1_enabled(CPUARMState *env, S1Translate *ptw) +{ + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); + return (hcr & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1); +} + +/** + * get_phys_addr_lpae: perform one stage of page table walk, LPAE format + * + * Returns false if the translation was successful. Otherwise, phys_ptr, + * attrs, prot and page_size may not be filled in, and the populated fsr + * value provides information on why the translation aborted, in the format + * of a long-format DFSR/IFSR fault register, with the following caveat: + * the WnR bit is never set (the caller must do this). + * + * @env: CPUARMState + * @ptw: Current and next stage parameters for the walk. + * @address: virtual address to get physical address for + * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH + * @result: set on translation success, + * @fi: set to fault info if the translation fails + */ +static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw, + uint64_t address, + MMUAccessType access_type, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +{ + ARMCPU *cpu = env_archcpu(env); + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; + int32_t level; + ARMVAParameters param; + uint64_t ttbr; + hwaddr descaddr, indexmask, indexmask_grainsize; + uint32_t tableattrs; + target_ulong page_size; + uint64_t attrs; + int32_t stride; + int addrsize, inputsize, outputsize; + uint64_t tcr = regime_tcr(env, mmu_idx); + int ap, xn, pxn; + uint32_t el = regime_el(env, mmu_idx); + uint64_t descaddrmask; + bool aarch64 = arm_el_is_aa64(env, el); + uint64_t descriptor, new_descriptor; + ARMSecuritySpace out_space; + bool device; + + /* TODO: This code does not support shareability levels. */ + if (aarch64) { + int ps; + + param = aa64_va_parameters(env, address, mmu_idx, + access_type != MMU_INST_FETCH, + !arm_el_is_aa64(env, 1)); + level = 0; + + /* + * If TxSZ is programmed to a value larger than the maximum, + * or smaller than the effective minimum, it is IMPLEMENTATION + * DEFINED whether we behave as if the field were programmed + * within bounds, or if a level 0 Translation fault is generated. + * + * With FEAT_LVA, fault on less than minimum becomes required, + * so our choice is to always raise the fault. + */ + if (param.tsz_oob) { + goto do_translation_fault; + } + + addrsize = 64 - 8 * param.tbi; + inputsize = 64 - param.tsz; + + /* + * Bound PS by PARANGE to find the effective output address size. + * ID_AA64MMFR0 is a read-only register so values outside of the + * supported mappings can be considered an implementation error. + */ + ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); + ps = MIN(ps, param.ps); + assert(ps < ARRAY_SIZE(pamax_map)); + outputsize = pamax_map[ps]; + + /* + * With LPA2, the effective output address (OA) size is at most 48 bits + * unless TCR.DS == 1 + */ + if (!param.ds && param.gran != Gran64K) { + outputsize = MIN(outputsize, 48); + } + } else { + param = aa32_va_parameters(env, address, mmu_idx); + level = 1; + addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32); + inputsize = addrsize - param.tsz; + outputsize = 40; + } + + /* + * We determined the region when collecting the parameters, but we + * have not yet validated that the address is valid for the region. + * Extract the top bits and verify that they all match select. + * + * For aa32, if inputsize == addrsize, then we have selected the + * region by exclusion in aa32_va_parameters and there is no more + * validation to do here. + */ + if (inputsize < addrsize) { + target_ulong top_bits = sextract64(address, inputsize, + addrsize - inputsize); + if (-top_bits != param.select) { + /* The gap between the two regions is a Translation fault */ + goto do_translation_fault; + } + } + + stride = arm_granule_bits(param.gran) - 3; + + /* + * Note that QEMU ignores shareability and cacheability attributes, + * so we don't need to do anything with the SH, ORGN, IRGN fields + * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the + * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently + * implement any ASID-like capability so we can ignore it (instead + * we will always flush the TLB any time the ASID is changed). + */ + ttbr = regime_ttbr(env, mmu_idx, param.select); + + /* + * Here we should have set up all the parameters for the translation: + * inputsize, ttbr, epd, stride, tbi + */ + + if (param.epd) { + /* + * Translation table walk disabled => Translation fault on TLB miss + * Note: This is always 0 on 64-bit EL2 and EL3. + */ + goto do_translation_fault; + } + + if (!regime_is_stage2(mmu_idx)) { + /* + * The starting level depends on the virtual address size (which can + * be up to 48 bits) and the translation granule size. It indicates + * the number of strides (stride bits at a time) needed to + * consume the bits of the input address. In the pseudocode this is: + * level = 4 - RoundUp((inputsize - grainsize) / stride) + * where their 'inputsize' is our 'inputsize', 'grainsize' is + * our 'stride + 3' and 'stride' is our 'stride'. + * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying: + * = 4 - (inputsize - stride - 3 + stride - 1) / stride + * = 4 - (inputsize - 4) / stride; + */ + level = 4 - (inputsize - 4) / stride; + } else { + int startlevel = check_s2_mmu_setup(cpu, aarch64, tcr, param.ds, + inputsize, stride); + if (startlevel == INT_MIN) { + level = 0; + goto do_translation_fault; + } + level = startlevel; + } + + indexmask_grainsize = MAKE_64BIT_MASK(0, stride + 3); + indexmask = MAKE_64BIT_MASK(0, inputsize - (stride * (4 - level))); + + /* Now we can extract the actual base address from the TTBR */ + descaddr = extract64(ttbr, 0, 48); + + /* + * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [5:2] of TTBR. + * + * Otherwise, if the base address is out of range, raise AddressSizeFault. + * In the pseudocode, this is !IsZero(baseregister<47:outputsize>), + * but we've just cleared the bits above 47, so simplify the test. + */ + if (outputsize > 48) { + descaddr |= extract64(ttbr, 2, 4) << 48; + } else if (descaddr >> outputsize) { + level = 0; + fi->type = ARMFault_AddressSize; + goto do_fault; + } + + /* + * We rely on this masking to clear the RES0 bits at the bottom of the TTBR + * and also to mask out CnP (bit 0) which could validly be non-zero. + */ + descaddr &= ~indexmask; + + /* + * For AArch32, the address field in the descriptor goes up to bit 39 + * for both v7 and v8. However, for v8 the SBZ bits [47:40] must be 0 + * or an AddressSize fault is raised. So for v8 we extract those SBZ + * bits as part of the address, which will be checked via outputsize. + * For AArch64, the address field goes up to bit 47, or 49 with FEAT_LPA2; + * the highest bits of a 52-bit output are placed elsewhere. + */ + if (param.ds) { + descaddrmask = MAKE_64BIT_MASK(0, 50); + } else if (arm_feature(env, ARM_FEATURE_V8)) { + descaddrmask = MAKE_64BIT_MASK(0, 48); + } else { + descaddrmask = MAKE_64BIT_MASK(0, 40); + } + descaddrmask &= ~indexmask_grainsize; + tableattrs = 0; + + next_level: + descaddr |= (address >> (stride * (4 - level))) & indexmask; + descaddr &= ~7ULL; + + /* + * Process the NSTable bit from the previous level. This changes + * the table address space and the output space from Secure to + * NonSecure. With RME, the EL3 translation regime does not change + * from Root to NonSecure. + */ + if (ptw->in_space == ARMSS_Secure + && !regime_is_stage2(mmu_idx) + && extract32(tableattrs, 4, 1)) { + /* + * Stage2_S -> Stage2 or Phys_S -> Phys_NS + * Assert the relative order of the secure/non-secure indexes. + */ + QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_S + 1 != ARMMMUIdx_Phys_NS); + QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2_S + 1 != ARMMMUIdx_Stage2); + ptw->in_ptw_idx += 1; + ptw->in_space = ARMSS_NonSecure; + } + + if (!S1_ptw_translate(env, ptw, descaddr, fi)) { + goto do_fault; + } + descriptor = arm_ldq_ptw(env, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + new_descriptor = descriptor; + + restart_atomic_update: + if (!(descriptor & 1) || + (!(descriptor & 2) && + !lpae_block_desc_valid(cpu, param.ds, param.gran, level))) { + /* Invalid, or a block descriptor at an invalid level */ + goto do_translation_fault; + } + + descaddr = descriptor & descaddrmask; + + /* + * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12] + * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of + * descaddr are in [9:8]. Otherwise, if descaddr is out of range, + * raise AddressSizeFault. + */ + if (outputsize > 48) { + if (param.ds) { + descaddr |= extract64(descriptor, 8, 2) << 50; + } else { + descaddr |= extract64(descriptor, 12, 4) << 48; + } + } else if (descaddr >> outputsize) { + fi->type = ARMFault_AddressSize; + goto do_fault; + } + + if ((descriptor & 2) && (level < 3)) { + /* + * Table entry. The top five bits are attributes which may + * propagate down through lower levels of the table (and + * which are all arranged so that 0 means "no effect", so + * we can gather them up by ORing in the bits at each level). + */ + tableattrs |= extract64(descriptor, 59, 5); + level++; + indexmask = indexmask_grainsize; + goto next_level; + } + + /* + * Block entry at level 1 or 2, or page entry at level 3. + * These are basically the same thing, although the number + * of bits we pull in from the vaddr varies. Note that although + * descaddrmask masks enough of the low bits of the descriptor + * to give a correct page or table address, the address field + * in a block descriptor is smaller; so we need to explicitly + * clear the lower bits here before ORing in the low vaddr bits. + * + * Afterward, descaddr is the final physical address. + */ + page_size = (1ULL << ((stride * (4 - level)) + 3)); + descaddr &= ~(hwaddr)(page_size - 1); + descaddr |= (address & (page_size - 1)); + + if (likely(!ptw->in_debug)) { + /* + * Access flag. + * If HA is enabled, prepare to update the descriptor below. + * Otherwise, pass the access fault on to software. + */ + if (!(descriptor & (1 << 10))) { + if (param.ha) { + new_descriptor |= 1 << 10; /* AF */ + } else { + fi->type = ARMFault_AccessFlag; + goto do_fault; + } + } + + /* + * Dirty Bit. + * If HD is enabled, pre-emptively set/clear the appropriate AP/S2AP + * bit for writeback. The actual write protection test may still be + * overridden by tableattrs, to be merged below. + */ + if (param.hd + && extract64(descriptor, 51, 1) /* DBM */ + && access_type == MMU_DATA_STORE) { + if (regime_is_stage2(mmu_idx)) { + new_descriptor |= 1ull << 7; /* set S2AP[1] */ + } else { + new_descriptor &= ~(1ull << 7); /* clear AP[2] */ + } + } + } + + /* + * Extract attributes from the (modified) descriptor, and apply + * table descriptors. Stage 2 table descriptors do not include + * any attribute fields. HPD disables all the table attributes + * except NSTable (which we have already handled). + */ + attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14)); + if (!regime_is_stage2(mmu_idx)) { + if (!param.hpd) { + attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */ + /* + * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1 + * means "force PL1 access only", which means forcing AP[1] to 0. + */ + attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */ + attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */ + } + } + + ap = extract32(attrs, 6, 2); + out_space = ptw->in_space; + if (regime_is_stage2(mmu_idx)) { + /* + * R_GYNXY: For stage2 in Realm security state, bit 55 is NS. + * The bit remains ignored for other security states. + * R_YMCSL: Executing an insn fetched from non-Realm causes + * a stage2 permission fault. + */ + if (out_space == ARMSS_Realm && extract64(attrs, 55, 1)) { + out_space = ARMSS_NonSecure; + result->f.prot = get_S2prot_noexecute(ap); + } else { + xn = extract64(attrs, 53, 2); + result->f.prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0); + } + } else { + int nse, ns = extract32(attrs, 5, 1); + switch (out_space) { + case ARMSS_Root: + /* + * R_GVZML: Bit 11 becomes the NSE field in the EL3 regime. + * R_XTYPW: NSE and NS together select the output pa space. + */ + nse = extract32(attrs, 11, 1); + out_space = (nse << 1) | ns; + if (out_space == ARMSS_Secure && + !cpu_isar_feature(aa64_sel2, cpu)) { + out_space = ARMSS_NonSecure; + } + break; + case ARMSS_Secure: + if (ns) { + out_space = ARMSS_NonSecure; + } + break; + case ARMSS_Realm: + switch (mmu_idx) { + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + /* I_CZPRF: For Realm EL1&0 stage1, NS bit is RES0. */ + break; + case ARMMMUIdx_E2: + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + /* + * R_LYKFZ, R_WGRZN: For Realm EL2 and EL2&1, + * NS changes the output to non-secure space. + */ + if (ns) { + out_space = ARMSS_NonSecure; + } + break; + default: + g_assert_not_reached(); + } + break; + case ARMSS_NonSecure: + /* R_QRMFF: For NonSecure state, the NS bit is RES0. */ + break; + default: + g_assert_not_reached(); + } + xn = extract64(attrs, 54, 1); + pxn = extract64(attrs, 53, 1); + + if (el == 1 && nv_nv1_enabled(env, ptw)) { + /* + * With FEAT_NV, when HCR_EL2.{NV,NV1} == {1,1}, the block/page + * descriptor bit 54 holds PXN, 53 is RES0, and the effective value + * of UXN is 0. Similarly for bits 59 and 60 in table descriptors + * (which we have already folded into bits 53 and 54 of attrs). + * AP[1] (descriptor bit 6, our ap bit 0) is treated as 0. + * Similarly, APTable[0] from the table descriptor is treated as 0; + * we already folded this into AP[1] and squashing that to 0 does + * the right thing. + */ + pxn = xn; + xn = 0; + ap &= ~1; + } + /* + * Note that we modified ptw->in_space earlier for NSTable, but + * result->f.attrs retains a copy of the original security space. + */ + result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, xn, pxn, + result->f.attrs.space, out_space); + } + + if (!(result->f.prot & (1 << access_type))) { + fi->type = ARMFault_Permission; + goto do_fault; + } + + /* If FEAT_HAFDBS has made changes, update the PTE. */ + if (new_descriptor != descriptor) { + new_descriptor = arm_casq_ptw(env, descriptor, new_descriptor, ptw, fi); + if (fi->type != ARMFault_None) { + goto do_fault; + } + /* + * I_YZSVV says that if the in-memory descriptor has changed, + * then we must use the information in that new value + * (which might include a different output address, different + * attributes, or generate a fault). + * Restart the handling of the descriptor value from scratch. + */ + if (new_descriptor != descriptor) { + descriptor = new_descriptor; + goto restart_atomic_update; + } + } + + result->f.attrs.space = out_space; + result->f.attrs.secure = arm_space_is_secure(out_space); + + if (regime_is_stage2(mmu_idx)) { + result->cacheattrs.is_s2_format = true; + result->cacheattrs.attrs = extract32(attrs, 2, 4); + /* + * Security state does not really affect HCR_EL2.FWB; + * we only need to filter FWB for aa32 or other FEAT. + */ + device = S2_attrs_are_device(arm_hcr_el2_eff(env), + result->cacheattrs.attrs); + } else { + /* Index into MAIR registers for cache attributes */ + uint8_t attrindx = extract32(attrs, 2, 3); + uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + assert(attrindx <= 7); + result->cacheattrs.is_s2_format = false; + result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8); + + /* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */ + if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) { + result->f.extra.arm.guarded = extract64(attrs, 50, 1); /* GP */ + } + device = S1_attrs_are_device(result->cacheattrs.attrs); + } + + /* + * Enable alignment checks on Device memory. + * + * Per R_XCHFJ, this check is mis-ordered. The correct ordering + * for alignment, permission, and stage 2 faults should be: + * - Alignment fault caused by the memory type + * - Permission fault + * - A stage 2 fault on the memory access + * but due to the way the TCG softmmu TLB operates, we will have + * implicitly done the permission check and the stage2 lookup in + * finding the TLB entry, so the alignment check cannot be done sooner. + * + * In v7, for a CPU without the Virtualization Extensions this + * access is UNPREDICTABLE; we choose to make it take the alignment + * fault as is required for a v7VE CPU. (QEMU doesn't emulate any + * CPUs with ARM_FEATURE_LPAE but not ARM_FEATURE_V7VE anyway.) + */ + if (device) { + result->f.tlb_fill_flags |= TLB_CHECK_ALIGNED; + } + + /* + * For FEAT_LPA2 and effective DS, the SH field in the attributes + * was re-purposed for output address bits. The SH attribute in + * that case comes from TCR_ELx, which we extracted earlier. + */ + if (param.ds) { + result->cacheattrs.shareability = param.sh; + } else { + result->cacheattrs.shareability = extract32(attrs, 8, 2); + } + + result->f.phys_addr = descaddr; + result->f.lg_page_size = ctz64(page_size); + return false; + + do_translation_fault: + fi->type = ARMFault_Translation; + do_fault: + if (fi->s1ptw) { + /* Retain the existing stage 2 fi->level */ + assert(fi->stage2); + } else { + fi->level = level; + fi->stage2 = regime_is_stage2(mmu_idx); + } + fi->s1ns = fault_s1ns(ptw->in_space, mmu_idx); + return true; +} + +static bool get_phys_addr_pmsav5(CPUARMState *env, + S1Translate *ptw, + uint32_t address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + int n; + uint32_t mask; + uint32_t base; + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; + bool is_user = regime_is_user(env, mmu_idx); + + if (regime_translation_disabled(env, mmu_idx, ptw->in_space)) { + /* MPU disabled. */ + result->f.phys_addr = address; + result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + return false; + } + + result->f.phys_addr = address; + for (n = 7; n >= 0; n--) { + base = env->cp15.c6_region[n]; + if ((base & 1) == 0) { + continue; + } + mask = 1 << ((base >> 1) & 0x1f); + /* Keep this shift separate from the above to avoid an + (undefined) << 32. */ + mask = (mask << 1) - 1; + if (((base ^ address) & ~mask) == 0) { + break; + } + } + if (n < 0) { + fi->type = ARMFault_Background; + return true; + } + + if (access_type == MMU_INST_FETCH) { + mask = env->cp15.pmsav5_insn_ap; + } else { + mask = env->cp15.pmsav5_data_ap; + } + mask = (mask >> (n * 4)) & 0xf; + switch (mask) { + case 0: + fi->type = ARMFault_Permission; + fi->level = 1; + return true; + case 1: + if (is_user) { + fi->type = ARMFault_Permission; + fi->level = 1; + return true; + } + result->f.prot = PAGE_READ | PAGE_WRITE; + break; + case 2: + result->f.prot = PAGE_READ; + if (!is_user) { + result->f.prot |= PAGE_WRITE; + } + break; + case 3: + result->f.prot = PAGE_READ | PAGE_WRITE; + break; + case 5: + if (is_user) { + fi->type = ARMFault_Permission; + fi->level = 1; + return true; + } + result->f.prot = PAGE_READ; + break; + case 6: + result->f.prot = PAGE_READ; + break; + default: + /* Bad permission. */ + fi->type = ARMFault_Permission; + fi->level = 1; + return true; + } + result->f.prot |= PAGE_EXEC; + return false; +} + +static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx, + int32_t address, uint8_t *prot) +{ + if (!arm_feature(env, ARM_FEATURE_M)) { + *prot = PAGE_READ | PAGE_WRITE; + switch (address) { + case 0xF0000000 ... 0xFFFFFFFF: + if (regime_sctlr(env, mmu_idx) & SCTLR_V) { + /* hivecs execing is ok */ + *prot |= PAGE_EXEC; + } + break; + case 0x00000000 ... 0x7FFFFFFF: + *prot |= PAGE_EXEC; + break; + } + } else { + /* Default system address map for M profile cores. + * The architecture specifies which regions are execute-never; + * at the MPU level no other checks are defined. + */ + switch (address) { + case 0x00000000 ... 0x1fffffff: /* ROM */ + case 0x20000000 ... 0x3fffffff: /* SRAM */ + case 0x60000000 ... 0x7fffffff: /* RAM */ + case 0x80000000 ... 0x9fffffff: /* RAM */ + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + break; + case 0x40000000 ... 0x5fffffff: /* Peripheral */ + case 0xa0000000 ... 0xbfffffff: /* Device */ + case 0xc0000000 ... 0xdfffffff: /* Device */ + case 0xe0000000 ... 0xffffffff: /* System */ + *prot = PAGE_READ | PAGE_WRITE; + break; + default: + g_assert_not_reached(); + } + } +} + +static bool m_is_ppb_region(CPUARMState *env, uint32_t address) +{ + /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */ + return arm_feature(env, ARM_FEATURE_M) && + extract32(address, 20, 12) == 0xe00; +} + +static bool m_is_system_region(CPUARMState *env, uint32_t address) +{ + /* + * True if address is in the M profile system region + * 0xe0000000 - 0xffffffff + */ + return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7; +} + +static bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx, + bool is_secure, bool is_user) +{ + /* + * Return true if we should use the default memory map as a + * "background" region if there are no hits against any MPU regions. + */ + CPUARMState *env = &cpu->env; + + if (is_user) { + return false; + } + + if (arm_feature(env, ARM_FEATURE_M)) { + return env->v7m.mpu_ctrl[is_secure] & R_V7M_MPU_CTRL_PRIVDEFENA_MASK; + } + + if (mmu_idx == ARMMMUIdx_Stage2) { + return false; + } + + return regime_sctlr(env, mmu_idx) & SCTLR_BR; +} + +static bool get_phys_addr_pmsav7(CPUARMState *env, + S1Translate *ptw, + uint32_t address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + ARMCPU *cpu = env_archcpu(env); + int n; + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; + bool is_user = regime_is_user(env, mmu_idx); + bool secure = arm_space_is_secure(ptw->in_space); + + result->f.phys_addr = address; + result->f.lg_page_size = TARGET_PAGE_BITS; + result->f.prot = 0; + + if (regime_translation_disabled(env, mmu_idx, ptw->in_space) || + m_is_ppb_region(env, address)) { + /* + * MPU disabled or M profile PPB access: use default memory map. + * The other case which uses the default memory map in the + * v7M ARM ARM pseudocode is exception vector reads from the vector + * table. In QEMU those accesses are done in arm_v7m_load_vector(), + * which always does a direct read using address_space_ldl(), rather + * than going via this function, so we don't need to check that here. + */ + get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->f.prot); + } else { /* MPU enabled */ + for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) { + /* region search */ + uint32_t base = env->pmsav7.drbar[n]; + uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5); + uint32_t rmask; + bool srdis = false; + + if (!(env->pmsav7.drsr[n] & 0x1)) { + continue; + } + + if (!rsize) { + qemu_log_mask(LOG_GUEST_ERROR, + "DRSR[%d]: Rsize field cannot be 0\n", n); + continue; + } + rsize++; + rmask = (1ull << rsize) - 1; + + if (base & rmask) { + qemu_log_mask(LOG_GUEST_ERROR, + "DRBAR[%d]: 0x%" PRIx32 " misaligned " + "to DRSR region size, mask = 0x%" PRIx32 "\n", + n, base, rmask); + continue; + } + + if (address < base || address > base + rmask) { + /* + * Address not in this region. We must check whether the + * region covers addresses in the same page as our address. + * In that case we must not report a size that covers the + * whole page for a subsequent hit against a different MPU + * region or the background region, because it would result in + * incorrect TLB hits for subsequent accesses to addresses that + * are in this MPU region. + */ + if (ranges_overlap(base, rmask, + address & TARGET_PAGE_MASK, + TARGET_PAGE_SIZE)) { + result->f.lg_page_size = 0; + } + continue; + } + + /* Region matched */ + + if (rsize >= 8) { /* no subregions for regions < 256 bytes */ + int i, snd; + uint32_t srdis_mask; + + rsize -= 3; /* sub region size (power of 2) */ + snd = ((address - base) >> rsize) & 0x7; + srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1); + + srdis_mask = srdis ? 0x3 : 0x0; + for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) { + /* + * This will check in groups of 2, 4 and then 8, whether + * the subregion bits are consistent. rsize is incremented + * back up to give the region size, considering consistent + * adjacent subregions as one region. Stop testing if rsize + * is already big enough for an entire QEMU page. + */ + int snd_rounded = snd & ~(i - 1); + uint32_t srdis_multi = extract32(env->pmsav7.drsr[n], + snd_rounded + 8, i); + if (srdis_mask ^ srdis_multi) { + break; + } + srdis_mask = (srdis_mask << i) | srdis_mask; + rsize++; + } + } + if (srdis) { + continue; + } + if (rsize < TARGET_PAGE_BITS) { + result->f.lg_page_size = rsize; + } + break; + } + + if (n == -1) { /* no hits */ + if (!pmsav7_use_background_region(cpu, mmu_idx, secure, is_user)) { + /* background fault */ + fi->type = ARMFault_Background; + return true; + } + get_phys_addr_pmsav7_default(env, mmu_idx, address, + &result->f.prot); + } else { /* a MPU hit! */ + uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3); + uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1); + + if (m_is_system_region(env, address)) { + /* System space is always execute never */ + xn = 1; + } + + if (is_user) { /* User mode AP bit decoding */ + switch (ap) { + case 0: + case 1: + case 5: + break; /* no access */ + case 3: + result->f.prot |= PAGE_WRITE; + /* fall through */ + case 2: + case 6: + result->f.prot |= PAGE_READ | PAGE_EXEC; + break; + case 7: + /* for v7M, same as 6; for R profile a reserved value */ + if (arm_feature(env, ARM_FEATURE_M)) { + result->f.prot |= PAGE_READ | PAGE_EXEC; + break; + } + /* fall through */ + default: + qemu_log_mask(LOG_GUEST_ERROR, + "DRACR[%d]: Bad value for AP bits: 0x%" + PRIx32 "\n", n, ap); + } + } else { /* Priv. mode AP bits decoding */ + switch (ap) { + case 0: + break; /* no access */ + case 1: + case 2: + case 3: + result->f.prot |= PAGE_WRITE; + /* fall through */ + case 5: + case 6: + result->f.prot |= PAGE_READ | PAGE_EXEC; + break; + case 7: + /* for v7M, same as 6; for R profile a reserved value */ + if (arm_feature(env, ARM_FEATURE_M)) { + result->f.prot |= PAGE_READ | PAGE_EXEC; + break; + } + /* fall through */ + default: + qemu_log_mask(LOG_GUEST_ERROR, + "DRACR[%d]: Bad value for AP bits: 0x%" + PRIx32 "\n", n, ap); + } + } + + /* execute never */ + if (xn) { + result->f.prot &= ~PAGE_EXEC; + } + } + } + + fi->type = ARMFault_Permission; + fi->level = 1; + return !(result->f.prot & (1 << access_type)); +} + +static uint32_t *regime_rbar(CPUARMState *env, ARMMMUIdx mmu_idx, + uint32_t secure) +{ + if (regime_el(env, mmu_idx) == 2) { + return env->pmsav8.hprbar; + } else { + return env->pmsav8.rbar[secure]; + } +} + +static uint32_t *regime_rlar(CPUARMState *env, ARMMMUIdx mmu_idx, + uint32_t secure) +{ + if (regime_el(env, mmu_idx) == 2) { + return env->pmsav8.hprlar; + } else { + return env->pmsav8.rlar[secure]; + } +} + +bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address, + MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool secure, GetPhysAddrResult *result, + ARMMMUFaultInfo *fi, uint32_t *mregion) +{ + /* + * Perform a PMSAv8 MPU lookup (without also doing the SAU check + * that a full phys-to-virt translation does). + * mregion is (if not NULL) set to the region number which matched, + * or -1 if no region number is returned (MPU off, address did not + * hit a region, address hit in multiple regions). + * If the region hit doesn't cover the entire TARGET_PAGE the address + * is within, then we set the result page_size to 1 to force the + * memory system to use a subpage. + */ + ARMCPU *cpu = env_archcpu(env); + bool is_user = regime_is_user(env, mmu_idx); + int n; + int matchregion = -1; + bool hit = false; + uint32_t addr_page_base = address & TARGET_PAGE_MASK; + uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); + int region_counter; + + if (regime_el(env, mmu_idx) == 2) { + region_counter = cpu->pmsav8r_hdregion; + } else { + region_counter = cpu->pmsav7_dregion; + } + + result->f.lg_page_size = TARGET_PAGE_BITS; + result->f.phys_addr = address; + result->f.prot = 0; + if (mregion) { + *mregion = -1; + } + + if (mmu_idx == ARMMMUIdx_Stage2) { + fi->stage2 = true; + } + + /* + * Unlike the ARM ARM pseudocode, we don't need to check whether this + * was an exception vector read from the vector table (which is always + * done using the default system address map), because those accesses + * are done in arm_v7m_load_vector(), which always does a direct + * read using address_space_ldl(), rather than going via this function. + */ + if (regime_translation_disabled(env, mmu_idx, arm_secure_to_space(secure))) { + /* MPU disabled */ + hit = true; + } else if (m_is_ppb_region(env, address)) { + hit = true; + } else { + if (pmsav7_use_background_region(cpu, mmu_idx, secure, is_user)) { + hit = true; + } + + uint32_t bitmask; + if (arm_feature(env, ARM_FEATURE_M)) { + bitmask = 0x1f; + } else { + bitmask = 0x3f; + fi->level = 0; + } + + for (n = region_counter - 1; n >= 0; n--) { + /* region search */ + /* + * Note that the base address is bits [31:x] from the register + * with bits [x-1:0] all zeroes, but the limit address is bits + * [31:x] from the register with bits [x:0] all ones. Where x is + * 5 for Cortex-M and 6 for Cortex-R + */ + uint32_t base = regime_rbar(env, mmu_idx, secure)[n] & ~bitmask; + uint32_t limit = regime_rlar(env, mmu_idx, secure)[n] | bitmask; + + if (!(regime_rlar(env, mmu_idx, secure)[n] & 0x1)) { + /* Region disabled */ + continue; + } + + if (address < base || address > limit) { + /* + * Address not in this region. We must check whether the + * region covers addresses in the same page as our address. + * In that case we must not report a size that covers the + * whole page for a subsequent hit against a different MPU + * region or the background region, because it would result in + * incorrect TLB hits for subsequent accesses to addresses that + * are in this MPU region. + */ + if (limit >= base && + ranges_overlap(base, limit - base + 1, + addr_page_base, + TARGET_PAGE_SIZE)) { + result->f.lg_page_size = 0; + } + continue; + } + + if (base > addr_page_base || limit < addr_page_limit) { + result->f.lg_page_size = 0; + } + + if (matchregion != -1) { + /* + * Multiple regions match -- always a failure (unlike + * PMSAv7 where highest-numbered-region wins) + */ + fi->type = ARMFault_Permission; + if (arm_feature(env, ARM_FEATURE_M)) { + fi->level = 1; + } + return true; + } + + matchregion = n; + hit = true; + } + } + + if (!hit) { + if (arm_feature(env, ARM_FEATURE_M)) { + fi->type = ARMFault_Background; + } else { + fi->type = ARMFault_Permission; + } + return true; + } + + if (matchregion == -1) { + /* hit using the background region */ + get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->f.prot); + } else { + uint32_t matched_rbar = regime_rbar(env, mmu_idx, secure)[matchregion]; + uint32_t matched_rlar = regime_rlar(env, mmu_idx, secure)[matchregion]; + uint32_t ap = extract32(matched_rbar, 1, 2); + uint32_t xn = extract32(matched_rbar, 0, 1); + bool pxn = false; + + if (arm_feature(env, ARM_FEATURE_V8_1M)) { + pxn = extract32(matched_rlar, 4, 1); + } + + if (m_is_system_region(env, address)) { + /* System space is always execute never */ + xn = 1; + } + + if (regime_el(env, mmu_idx) == 2) { + result->f.prot = simple_ap_to_rw_prot_is_user(ap, + mmu_idx != ARMMMUIdx_E2); + } else { + result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap); + } + + if (!arm_feature(env, ARM_FEATURE_M)) { + uint8_t attrindx = extract32(matched_rlar, 1, 3); + uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + uint8_t sh = extract32(matched_rlar, 3, 2); + + if (regime_sctlr(env, mmu_idx) & SCTLR_WXN && + result->f.prot & PAGE_WRITE && mmu_idx != ARMMMUIdx_Stage2) { + xn = 0x1; + } + + if ((regime_el(env, mmu_idx) == 1) && + regime_sctlr(env, mmu_idx) & SCTLR_UWXN && ap == 0x1) { + pxn = 0x1; + } + + result->cacheattrs.is_s2_format = false; + result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8); + result->cacheattrs.shareability = sh; + } + + if (result->f.prot && !xn && !(pxn && !is_user)) { + result->f.prot |= PAGE_EXEC; + } + + if (mregion) { + *mregion = matchregion; + } + } + + fi->type = ARMFault_Permission; + if (arm_feature(env, ARM_FEATURE_M)) { + fi->level = 1; + } + return !(result->f.prot & (1 << access_type)); +} + +static bool v8m_is_sau_exempt(CPUARMState *env, + uint32_t address, MMUAccessType access_type) +{ + /* + * The architecture specifies that certain address ranges are + * exempt from v8M SAU/IDAU checks. + */ + return + (access_type == MMU_INST_FETCH && m_is_system_region(env, address)) || + (address >= 0xe0000000 && address <= 0xe0002fff) || + (address >= 0xe000e000 && address <= 0xe000efff) || + (address >= 0xe002e000 && address <= 0xe002efff) || + (address >= 0xe0040000 && address <= 0xe0041fff) || + (address >= 0xe00ff000 && address <= 0xe00fffff); +} + +void v8m_security_lookup(CPUARMState *env, uint32_t address, + MMUAccessType access_type, ARMMMUIdx mmu_idx, + bool is_secure, V8M_SAttributes *sattrs) +{ + /* + * Look up the security attributes for this address. Compare the + * pseudocode SecurityCheck() function. + * We assume the caller has zero-initialized *sattrs. + */ + ARMCPU *cpu = env_archcpu(env); + int r; + bool idau_exempt = false, idau_ns = true, idau_nsc = true; + int idau_region = IREGION_NOTVALID; + uint32_t addr_page_base = address & TARGET_PAGE_MASK; + uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1); + + if (cpu->idau) { + IDAUInterfaceClass *iic = IDAU_INTERFACE_GET_CLASS(cpu->idau); + IDAUInterface *ii = IDAU_INTERFACE(cpu->idau); + + iic->check(ii, address, &idau_region, &idau_exempt, &idau_ns, + &idau_nsc); + } + + if (access_type == MMU_INST_FETCH && extract32(address, 28, 4) == 0xf) { + /* 0xf0000000..0xffffffff is always S for insn fetches */ + return; + } + + if (idau_exempt || v8m_is_sau_exempt(env, address, access_type)) { + sattrs->ns = !is_secure; + return; + } + + if (idau_region != IREGION_NOTVALID) { + sattrs->irvalid = true; + sattrs->iregion = idau_region; + } + + switch (env->sau.ctrl & 3) { + case 0: /* SAU.ENABLE == 0, SAU.ALLNS == 0 */ + break; + case 2: /* SAU.ENABLE == 0, SAU.ALLNS == 1 */ + sattrs->ns = true; + break; + default: /* SAU.ENABLE == 1 */ + for (r = 0; r < cpu->sau_sregion; r++) { + if (env->sau.rlar[r] & 1) { + uint32_t base = env->sau.rbar[r] & ~0x1f; + uint32_t limit = env->sau.rlar[r] | 0x1f; + + if (base <= address && limit >= address) { + if (base > addr_page_base || limit < addr_page_limit) { + sattrs->subpage = true; + } + if (sattrs->srvalid) { + /* + * If we hit in more than one region then we must report + * as Secure, not NS-Callable, with no valid region + * number info. + */ + sattrs->ns = false; + sattrs->nsc = false; + sattrs->sregion = 0; + sattrs->srvalid = false; + break; + } else { + if (env->sau.rlar[r] & 2) { + sattrs->nsc = true; + } else { + sattrs->ns = true; + } + sattrs->srvalid = true; + sattrs->sregion = r; + } + } else { + /* + * Address not in this region. We must check whether the + * region covers addresses in the same page as our address. + * In that case we must not report a size that covers the + * whole page for a subsequent hit against a different MPU + * region or the background region, because it would result + * in incorrect TLB hits for subsequent accesses to + * addresses that are in this MPU region. + */ + if (limit >= base && + ranges_overlap(base, limit - base + 1, + addr_page_base, + TARGET_PAGE_SIZE)) { + sattrs->subpage = true; + } + } + } + } + break; + } + + /* + * The IDAU will override the SAU lookup results if it specifies + * higher security than the SAU does. + */ + if (!idau_ns) { + if (sattrs->ns || (!idau_nsc && sattrs->nsc)) { + sattrs->ns = false; + sattrs->nsc = idau_nsc; + } + } +} + +static bool get_phys_addr_pmsav8(CPUARMState *env, + S1Translate *ptw, + uint32_t address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + V8M_SAttributes sattrs = {}; + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; + bool secure = arm_space_is_secure(ptw->in_space); + bool ret; + + if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { + v8m_security_lookup(env, address, access_type, mmu_idx, + secure, &sattrs); + if (access_type == MMU_INST_FETCH) { + /* + * Instruction fetches always use the MMU bank and the + * transaction attribute determined by the fetch address, + * regardless of CPU state. This is painful for QEMU + * to handle, because it would mean we need to encode + * into the mmu_idx not just the (user, negpri) information + * for the current security state but also that for the + * other security state, which would balloon the number + * of mmu_idx values needed alarmingly. + * Fortunately we can avoid this because it's not actually + * possible to arbitrarily execute code from memory with + * the wrong security attribute: it will always generate + * an exception of some kind or another, apart from the + * special case of an NS CPU executing an SG instruction + * in S&NSC memory. So we always just fail the translation + * here and sort things out in the exception handler + * (including possibly emulating an SG instruction). + */ + if (sattrs.ns != !secure) { + if (sattrs.nsc) { + fi->type = ARMFault_QEMU_NSCExec; + } else { + fi->type = ARMFault_QEMU_SFault; + } + result->f.lg_page_size = sattrs.subpage ? 0 : TARGET_PAGE_BITS; + result->f.phys_addr = address; + result->f.prot = 0; + return true; + } + } else { + /* + * For data accesses we always use the MMU bank indicated + * by the current CPU state, but the security attributes + * might downgrade a secure access to nonsecure. + */ + if (sattrs.ns) { + result->f.attrs.secure = false; + result->f.attrs.space = ARMSS_NonSecure; + } else if (!secure) { + /* + * NS access to S memory must fault. + * Architecturally we should first check whether the + * MPU information for this address indicates that we + * are doing an unaligned access to Device memory, which + * should generate a UsageFault instead. QEMU does not + * currently check for that kind of unaligned access though. + * If we added it we would need to do so as a special case + * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt(). + */ + fi->type = ARMFault_QEMU_SFault; + result->f.lg_page_size = sattrs.subpage ? 0 : TARGET_PAGE_BITS; + result->f.phys_addr = address; + result->f.prot = 0; + return true; + } + } + } + + ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, secure, + result, fi, NULL); + if (sattrs.subpage) { + result->f.lg_page_size = 0; + } + return ret; +} + +/* + * Translate from the 4-bit stage 2 representation of + * memory attributes (without cache-allocation hints) to + * the 8-bit representation of the stage 1 MAIR registers + * (which includes allocation hints). + * + * ref: shared/translation/attrs/S2AttrDecode() + * .../S2ConvertAttrsHints() + */ +static uint8_t convert_stage2_attrs(uint64_t hcr, uint8_t s2attrs) +{ + uint8_t hiattr = extract32(s2attrs, 2, 2); + uint8_t loattr = extract32(s2attrs, 0, 2); + uint8_t hihint = 0, lohint = 0; + + if (hiattr != 0) { /* normal memory */ + if (hcr & HCR_CD) { /* cache disabled */ + hiattr = loattr = 1; /* non-cacheable */ + } else { + if (hiattr != 1) { /* Write-through or write-back */ + hihint = 3; /* RW allocate */ + } + if (loattr != 1) { /* Write-through or write-back */ + lohint = 3; /* RW allocate */ + } + } + } + + return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint; +} + +/* + * Combine either inner or outer cacheability attributes for normal + * memory, according to table D4-42 and pseudocode procedure + * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM). + * + * NB: only stage 1 includes allocation hints (RW bits), leading to + * some asymmetry. + */ +static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) +{ + if (s1 == 4 || s2 == 4) { + /* non-cacheable has precedence */ + return 4; + } else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) { + /* stage 1 write-through takes precedence */ + return s1; + } else if (extract32(s2, 2, 2) == 2) { + /* stage 2 write-through takes precedence, but the allocation hint + * is still taken from stage 1 + */ + return (2 << 2) | extract32(s1, 0, 2); + } else { /* write-back */ + return s1; + } +} + +/* + * Combine the memory type and cacheability attributes of + * s1 and s2 for the HCR_EL2.FWB == 0 case, returning the + * combined attributes in MAIR_EL1 format. + */ +static uint8_t combined_attrs_nofwb(uint64_t hcr, + ARMCacheAttrs s1, ARMCacheAttrs s2) +{ + uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs; + + if (s2.is_s2_format) { + s2_mair_attrs = convert_stage2_attrs(hcr, s2.attrs); + } else { + s2_mair_attrs = s2.attrs; + } + + s1lo = extract32(s1.attrs, 0, 4); + s2lo = extract32(s2_mair_attrs, 0, 4); + s1hi = extract32(s1.attrs, 4, 4); + s2hi = extract32(s2_mair_attrs, 4, 4); + + /* Combine memory type and cacheability attributes */ + if (s1hi == 0 || s2hi == 0) { + /* Device has precedence over normal */ + if (s1lo == 0 || s2lo == 0) { + /* nGnRnE has precedence over anything */ + ret_attrs = 0; + } else if (s1lo == 4 || s2lo == 4) { + /* non-Reordering has precedence over Reordering */ + ret_attrs = 4; /* nGnRE */ + } else if (s1lo == 8 || s2lo == 8) { + /* non-Gathering has precedence over Gathering */ + ret_attrs = 8; /* nGRE */ + } else { + ret_attrs = 0xc; /* GRE */ + } + } else { /* Normal memory */ + /* Outer/inner cacheability combine independently */ + ret_attrs = combine_cacheattr_nibble(s1hi, s2hi) << 4 + | combine_cacheattr_nibble(s1lo, s2lo); + } + return ret_attrs; +} + +static uint8_t force_cacheattr_nibble_wb(uint8_t attr) +{ + /* + * Given the 4 bits specifying the outer or inner cacheability + * in MAIR format, return a value specifying Normal Write-Back, + * with the allocation and transient hints taken from the input + * if the input specified some kind of cacheable attribute. + */ + if (attr == 0 || attr == 4) { + /* + * 0 == an UNPREDICTABLE encoding + * 4 == Non-cacheable + * Either way, force Write-Back RW allocate non-transient + */ + return 0xf; + } + /* Change WriteThrough to WriteBack, keep allocation and transient hints */ + return attr | 4; +} + +/* + * Combine the memory type and cacheability attributes of + * s1 and s2 for the HCR_EL2.FWB == 1 case, returning the + * combined attributes in MAIR_EL1 format. + */ +static uint8_t combined_attrs_fwb(ARMCacheAttrs s1, ARMCacheAttrs s2) +{ + assert(s2.is_s2_format && !s1.is_s2_format); + + switch (s2.attrs) { + case 7: + /* Use stage 1 attributes */ + return s1.attrs; + case 6: + /* + * Force Normal Write-Back. Note that if S1 is Normal cacheable + * then we take the allocation hints from it; otherwise it is + * RW allocate, non-transient. + */ + if ((s1.attrs & 0xf0) == 0) { + /* S1 is Device */ + return 0xff; + } + /* Need to check the Inner and Outer nibbles separately */ + return force_cacheattr_nibble_wb(s1.attrs & 0xf) | + force_cacheattr_nibble_wb(s1.attrs >> 4) << 4; + case 5: + /* If S1 attrs are Device, use them; otherwise Normal Non-cacheable */ + if ((s1.attrs & 0xf0) == 0) { + return s1.attrs; + } + return 0x44; + case 0 ... 3: + /* Force Device, of subtype specified by S2 */ + return s2.attrs << 2; + default: + /* + * RESERVED values (including RES0 descriptor bit [5] being nonzero); + * arbitrarily force Device. + */ + return 0; + } +} + +/* + * Combine S1 and S2 cacheability/shareability attributes, per D4.5.4 + * and CombineS1S2Desc() + * + * @env: CPUARMState + * @s1: Attributes from stage 1 walk + * @s2: Attributes from stage 2 walk + */ +static ARMCacheAttrs combine_cacheattrs(uint64_t hcr, + ARMCacheAttrs s1, ARMCacheAttrs s2) +{ + ARMCacheAttrs ret; + bool tagged = false; + + assert(!s1.is_s2_format); + ret.is_s2_format = false; + + if (s1.attrs == 0xf0) { + tagged = true; + s1.attrs = 0xff; + } + + /* Combine shareability attributes (table D4-43) */ + if (s1.shareability == 2 || s2.shareability == 2) { + /* if either are outer-shareable, the result is outer-shareable */ + ret.shareability = 2; + } else if (s1.shareability == 3 || s2.shareability == 3) { + /* if either are inner-shareable, the result is inner-shareable */ + ret.shareability = 3; + } else { + /* both non-shareable */ + ret.shareability = 0; + } + + /* Combine memory type and cacheability attributes */ + if (hcr & HCR_FWB) { + ret.attrs = combined_attrs_fwb(s1, s2); + } else { + ret.attrs = combined_attrs_nofwb(hcr, s1, s2); + } + + /* + * Any location for which the resultant memory type is any + * type of Device memory is always treated as Outer Shareable. + * Any location for which the resultant memory type is Normal + * Inner Non-cacheable, Outer Non-cacheable is always treated + * as Outer Shareable. + * TODO: FEAT_XS adds another value (0x40) also meaning iNCoNC + */ + if ((ret.attrs & 0xf0) == 0 || ret.attrs == 0x44) { + ret.shareability = 2; + } + + /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */ + if (tagged && ret.attrs == 0xff) { + ret.attrs = 0xf0; + } + + return ret; +} + +/* + * MMU disabled. S1 addresses within aa64 translation regimes are + * still checked for bounds -- see AArch64.S1DisabledOutput(). + */ +static bool get_phys_addr_disabled(CPUARMState *env, + S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; + uint8_t memattr = 0x00; /* Device nGnRnE */ + uint8_t shareability = 0; /* non-shareable */ + int r_el; + + switch (mmu_idx) { + case ARMMMUIdx_Stage2: + case ARMMMUIdx_Stage2_S: + case ARMMMUIdx_Phys_S: + case ARMMMUIdx_Phys_NS: + case ARMMMUIdx_Phys_Root: + case ARMMMUIdx_Phys_Realm: + break; + + default: + r_el = regime_el(env, mmu_idx); + if (arm_el_is_aa64(env, r_el)) { + int pamax = arm_pamax(env_archcpu(env)); + uint64_t tcr = env->cp15.tcr_el[r_el]; + int addrtop, tbi; + + tbi = aa64_va_parameter_tbi(tcr, mmu_idx); + if (access_type == MMU_INST_FETCH) { + tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx); + } + tbi = (tbi >> extract64(address, 55, 1)) & 1; + addrtop = (tbi ? 55 : 63); + + if (extract64(address, pamax, addrtop - pamax + 1) != 0) { + fi->type = ARMFault_AddressSize; + fi->level = 0; + fi->stage2 = false; + return 1; + } + + /* + * When TBI is disabled, we've just validated that all of the + * bits above PAMax are zero, so logically we only need to + * clear the top byte for TBI. But it's clearer to follow + * the pseudocode set of addrdesc.paddress. + */ + address = extract64(address, 0, 52); + } + + /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ + if (r_el == 1) { + uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space); + if (hcr & HCR_DC) { + if (hcr & HCR_DCT) { + memattr = 0xf0; /* Tagged, Normal, WB, RWA */ + } else { + memattr = 0xff; /* Normal, WB, RWA */ + } + } + } + if (memattr == 0) { + if (access_type == MMU_INST_FETCH) { + if (regime_sctlr(env, mmu_idx) & SCTLR_I) { + memattr = 0xee; /* Normal, WT, RA, NT */ + } else { + memattr = 0x44; /* Normal, NC, No */ + } + } + shareability = 2; /* outer shareable */ + } + result->cacheattrs.is_s2_format = false; + break; + } + + result->f.phys_addr = address; + result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + result->f.lg_page_size = TARGET_PAGE_BITS; + result->cacheattrs.shareability = shareability; + result->cacheattrs.attrs = memattr; + return false; +} + +static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + hwaddr ipa; + int s1_prot, s1_lgpgsz; + ARMSecuritySpace in_space = ptw->in_space; + bool ret, ipa_secure, s1_guarded; + ARMCacheAttrs cacheattrs1; + ARMSecuritySpace ipa_space; + uint64_t hcr; + + ret = get_phys_addr_nogpc(env, ptw, address, access_type, result, fi); + + /* If S1 fails, return early. */ + if (ret) { + return ret; + } + + ipa = result->f.phys_addr; + ipa_secure = result->f.attrs.secure; + ipa_space = result->f.attrs.space; + + ptw->in_s1_is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0; + ptw->in_mmu_idx = ipa_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; + ptw->in_space = ipa_space; + ptw->in_ptw_idx = ptw_idx_for_stage_2(env, ptw->in_mmu_idx); + + /* + * S1 is done, now do S2 translation. + * Save the stage1 results so that we may merge prot and cacheattrs later. + */ + s1_prot = result->f.prot; + s1_lgpgsz = result->f.lg_page_size; + s1_guarded = result->f.extra.arm.guarded; + cacheattrs1 = result->cacheattrs; + memset(result, 0, sizeof(*result)); + + ret = get_phys_addr_nogpc(env, ptw, ipa, access_type, result, fi); + fi->s2addr = ipa; + + /* Combine the S1 and S2 perms. */ + result->f.prot &= s1_prot; + + /* If S2 fails, return early. */ + if (ret) { + return ret; + } + + /* + * If either S1 or S2 returned a result smaller than TARGET_PAGE_SIZE, + * this means "don't put this in the TLB"; in this case, return a + * result with lg_page_size == 0 to achieve that. Otherwise, + * use the maximum of the S1 & S2 page size, so that invalidation + * of pages > TARGET_PAGE_SIZE works correctly. (This works even though + * we know the combined result permissions etc only cover the minimum + * of the S1 and S2 page size, because we know that the common TLB code + * never actually creates TLB entries bigger than TARGET_PAGE_SIZE, + * and passing a larger page size value only affects invalidations.) + */ + if (result->f.lg_page_size < TARGET_PAGE_BITS || + s1_lgpgsz < TARGET_PAGE_BITS) { + result->f.lg_page_size = 0; + } else if (result->f.lg_page_size < s1_lgpgsz) { + result->f.lg_page_size = s1_lgpgsz; + } + + /* Combine the S1 and S2 cache attributes. */ + hcr = arm_hcr_el2_eff_secstate(env, in_space); + if (hcr & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + * Do not overwrite Tagged within attrs. + */ + if (cacheattrs1.attrs != 0xf0) { + cacheattrs1.attrs = 0xff; + } + cacheattrs1.shareability = 0; + } + result->cacheattrs = combine_cacheattrs(hcr, cacheattrs1, + result->cacheattrs); + + /* No BTI GP information in stage 2, we just use the S1 value */ + result->f.extra.arm.guarded = s1_guarded; + + /* + * Check if IPA translates to secure or non-secure PA space. + * Note that VSTCR overrides VTCR and {N}SW overrides {N}SA. + */ + if (in_space == ARMSS_Secure) { + result->f.attrs.secure = + !(env->cp15.vstcr_el2 & (VSTCR_SA | VSTCR_SW)) + && (ipa_secure + || !(env->cp15.vtcr_el2 & (VTCR_NSA | VTCR_NSW))); + result->f.attrs.space = arm_secure_to_space(result->f.attrs.secure); + } + + return false; +} + +static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + ARMMMUIdx mmu_idx = ptw->in_mmu_idx; + ARMMMUIdx s1_mmu_idx; + + /* + * The page table entries may downgrade Secure to NonSecure, but + * cannot upgrade a NonSecure translation regime's attributes + * to Secure or Realm. + */ + result->f.attrs.space = ptw->in_space; + result->f.attrs.secure = arm_space_is_secure(ptw->in_space); + + switch (mmu_idx) { + case ARMMMUIdx_Phys_S: + case ARMMMUIdx_Phys_NS: + case ARMMMUIdx_Phys_Root: + case ARMMMUIdx_Phys_Realm: + /* Checking Phys early avoids special casing later vs regime_el. */ + return get_phys_addr_disabled(env, ptw, address, access_type, + result, fi); + + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + /* + * First stage lookup uses second stage for ptw; only + * Secure has both S and NS IPA and starts with Stage2_S. + */ + ptw->in_ptw_idx = (ptw->in_space == ARMSS_Secure) ? + ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2; + break; + + case ARMMMUIdx_Stage2: + case ARMMMUIdx_Stage2_S: + /* + * Second stage lookup uses physical for ptw; whether this is S or + * NS may depend on the SW/NSW bits if this is a stage 2 lookup for + * the Secure EL2&0 regime. + */ + ptw->in_ptw_idx = ptw_idx_for_stage_2(env, mmu_idx); + break; + + case ARMMMUIdx_E10_0: + s1_mmu_idx = ARMMMUIdx_Stage1_E0; + goto do_twostage; + case ARMMMUIdx_E10_1: + s1_mmu_idx = ARMMMUIdx_Stage1_E1; + goto do_twostage; + case ARMMMUIdx_E10_1_PAN: + s1_mmu_idx = ARMMMUIdx_Stage1_E1_PAN; + do_twostage: + /* + * Call ourselves recursively to do the stage 1 and then stage 2 + * translations if mmu_idx is a two-stage regime, and EL2 present. + * Otherwise, a stage1+stage2 translation is just stage 1. + */ + ptw->in_mmu_idx = mmu_idx = s1_mmu_idx; + if (arm_feature(env, ARM_FEATURE_EL2) && + !regime_translation_disabled(env, ARMMMUIdx_Stage2, ptw->in_space)) { + return get_phys_addr_twostage(env, ptw, address, access_type, + result, fi); + } + /* fall through */ + + default: + /* Single stage uses physical for ptw. */ + ptw->in_ptw_idx = arm_space_to_phys(ptw->in_space); + break; + } + + result->f.attrs.user = regime_is_user(env, mmu_idx); + + /* + * Fast Context Switch Extension. This doesn't exist at all in v8. + * In v7 and earlier it affects all stage 1 translations. + */ + if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2 + && !arm_feature(env, ARM_FEATURE_V8)) { + if (regime_el(env, mmu_idx) == 3) { + address += env->cp15.fcseidr_s; + } else { + address += env->cp15.fcseidr_ns; + } + } + + if (arm_feature(env, ARM_FEATURE_PMSA)) { + bool ret; + result->f.lg_page_size = TARGET_PAGE_BITS; + + if (arm_feature(env, ARM_FEATURE_V8)) { + /* PMSAv8 */ + ret = get_phys_addr_pmsav8(env, ptw, address, access_type, + result, fi); + } else if (arm_feature(env, ARM_FEATURE_V7)) { + /* PMSAv7 */ + ret = get_phys_addr_pmsav7(env, ptw, address, access_type, + result, fi); + } else { + /* Pre-v7 MPU */ + ret = get_phys_addr_pmsav5(env, ptw, address, access_type, + result, fi); + } + qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32 + " mmu_idx %u -> %s (prot %c%c%c)\n", + access_type == MMU_DATA_LOAD ? "reading" : + (access_type == MMU_DATA_STORE ? "writing" : "execute"), + (uint32_t)address, mmu_idx, + ret ? "Miss" : "Hit", + result->f.prot & PAGE_READ ? 'r' : '-', + result->f.prot & PAGE_WRITE ? 'w' : '-', + result->f.prot & PAGE_EXEC ? 'x' : '-'); + + return ret; + } + + /* Definitely a real MMU, not an MPU */ + + if (regime_translation_disabled(env, mmu_idx, ptw->in_space)) { + return get_phys_addr_disabled(env, ptw, address, access_type, + result, fi); + } + + if (regime_using_lpae_format(env, mmu_idx)) { + return get_phys_addr_lpae(env, ptw, address, access_type, result, fi); + } else if (arm_feature(env, ARM_FEATURE_V7) || + regime_sctlr(env, mmu_idx) & SCTLR_XP) { + return get_phys_addr_v6(env, ptw, address, access_type, result, fi); + } else { + return get_phys_addr_v5(env, ptw, address, access_type, result, fi); + } +} + +static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw, + target_ulong address, + MMUAccessType access_type, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + if (get_phys_addr_nogpc(env, ptw, address, access_type, result, fi)) { + return true; + } + if (!granule_protection_check(env, result->f.phys_addr, + result->f.attrs.space, fi)) { + fi->type = ARMFault_GPCFOnOutput; + return true; + } + return false; +} + +bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address, + MMUAccessType access_type, + ARMMMUIdx mmu_idx, ARMSecuritySpace space, + GetPhysAddrResult *result, + ARMMMUFaultInfo *fi) +{ + S1Translate ptw = { + .in_mmu_idx = mmu_idx, + .in_space = space, + }; + return get_phys_addr_nogpc(env, &ptw, address, access_type, result, fi); +} + +bool get_phys_addr(CPUARMState *env, target_ulong address, + MMUAccessType access_type, ARMMMUIdx mmu_idx, + GetPhysAddrResult *result, ARMMMUFaultInfo *fi) +{ + S1Translate ptw = { + .in_mmu_idx = mmu_idx, + }; + ARMSecuritySpace ss; + + switch (mmu_idx) { + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_E2: + ss = arm_security_space_below_el3(env); + break; + case ARMMMUIdx_Stage2: + /* + * For Secure EL2, we need this index to be NonSecure; + * otherwise this will already be NonSecure or Realm. + */ + ss = arm_security_space_below_el3(env); + if (ss == ARMSS_Secure) { + ss = ARMSS_NonSecure; + } + break; + case ARMMMUIdx_Phys_NS: + case ARMMMUIdx_MPrivNegPri: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MPriv: + case ARMMMUIdx_MUser: + ss = ARMSS_NonSecure; + break; + case ARMMMUIdx_Stage2_S: + case ARMMMUIdx_Phys_S: + case ARMMMUIdx_MSPrivNegPri: + case ARMMMUIdx_MSUserNegPri: + case ARMMMUIdx_MSPriv: + case ARMMMUIdx_MSUser: + ss = ARMSS_Secure; + break; + case ARMMMUIdx_E3: + if (arm_feature(env, ARM_FEATURE_AARCH64) && + cpu_isar_feature(aa64_rme, env_archcpu(env))) { + ss = ARMSS_Root; + } else { + ss = ARMSS_Secure; + } + break; + case ARMMMUIdx_Phys_Root: + ss = ARMSS_Root; + break; + case ARMMMUIdx_Phys_Realm: + ss = ARMSS_Realm; + break; + default: + g_assert_not_reached(); + } + + ptw.in_space = ss; + return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi); +} + +hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, + MemTxAttrs *attrs) +{ + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + ARMMMUIdx mmu_idx = arm_mmu_idx(env); + ARMSecuritySpace ss = arm_security_space(env); + S1Translate ptw = { + .in_mmu_idx = mmu_idx, + .in_space = ss, + .in_debug = true, + }; + GetPhysAddrResult res = {}; + ARMMMUFaultInfo fi = {}; + bool ret; + + ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, &res, &fi); + *attrs = res.f.attrs; + + if (ret) { + return -1; + } + return res.f.phys_addr; +} diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h index f30f4130a2..3244e0740d 100644 --- a/target/arm/syndrome.h +++ b/target/arm/syndrome.h @@ -25,6 +25,8 @@ #ifndef TARGET_ARM_SYNDROME_H #define TARGET_ARM_SYNDROME_H +#include "qemu/bitops.h" + /* Valid Syndrome Register EC field values */ enum arm_exception_class { EC_UNCATEGORIZED = 0x00, @@ -48,12 +50,17 @@ enum arm_exception_class { EC_AA64_SMC = 0x17, EC_SYSTEMREGISTERTRAP = 0x18, EC_SVEACCESSTRAP = 0x19, + EC_ERETTRAP = 0x1a, + EC_PACFAIL = 0x1c, + EC_SMETRAP = 0x1d, + EC_GPC = 0x1e, EC_INSNABORT = 0x20, EC_INSNABORT_SAME_EL = 0x21, EC_PCALIGNMENT = 0x22, EC_DATAABORT = 0x24, EC_DATAABORT_SAME_EL = 0x25, EC_SPALIGNMENT = 0x26, + EC_MOP = 0x27, EC_AA32_FPTRAP = 0x28, EC_AA64_FPTRAP = 0x2c, EC_SERROR = 0x2f, @@ -68,17 +75,33 @@ enum arm_exception_class { EC_AA64_BKPT = 0x3c, }; +typedef enum { + SME_ET_AccessTrap, + SME_ET_Streaming, + SME_ET_NotStreaming, + SME_ET_InactiveZA, +} SMEExceptionType; + +#define ARM_EL_EC_LENGTH 6 #define ARM_EL_EC_SHIFT 26 #define ARM_EL_IL_SHIFT 25 #define ARM_EL_ISV_SHIFT 24 #define ARM_EL_IL (1 << ARM_EL_IL_SHIFT) #define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT) +/* In the Data Abort syndrome */ +#define ARM_EL_VNCR (1 << 13) + static inline uint32_t syn_get_ec(uint32_t syn) { return syn >> ARM_EL_EC_SHIFT; } +static inline uint32_t syn_set_ec(uint32_t syn, uint32_t ec) +{ + return deposit32(syn, ARM_EL_EC_SHIFT, ARM_EL_EC_LENGTH, ec); +} + /* * Utility functions for constructing various kinds of syndrome value. * Note that in general we follow the AArch64 syndrome values; in a @@ -185,12 +208,13 @@ static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm, | (rt2 << 10) | (rt << 5) | (crm << 1) | isread; } -static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit) +static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit, + int coproc) { - /* AArch32 FP trap or any AArch64 FP/SIMD trap: TA == 0 coproc == 0xa */ + /* AArch32 FP trap or any AArch64 FP/SIMD trap: TA == 0 */ return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT) | (is_16bit ? 0 : ARM_EL_IL) - | (cv << 24) | (cond << 20) | 0xa; + | (cv << 24) | (cond << 20) | coproc; } static inline uint32_t syn_simd_access_trap(int cv, int cond, bool is_16bit) @@ -203,17 +227,38 @@ static inline uint32_t syn_simd_access_trap(int cv, int cond, bool is_16bit) static inline uint32_t syn_sve_access_trap(void) { - return EC_SVEACCESSTRAP << ARM_EL_EC_SHIFT; + return (EC_SVEACCESSTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL; +} + +/* + * eret_op is bits [1:0] of the ERET instruction, so: + * 0 for ERET, 2 for ERETAA, 3 for ERETAB. + */ +static inline uint32_t syn_erettrap(int eret_op) +{ + return (EC_ERETTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL | eret_op; +} + +static inline uint32_t syn_smetrap(SMEExceptionType etype, bool is_16bit) +{ + return (EC_SMETRAP << ARM_EL_EC_SHIFT) + | (is_16bit ? 0 : ARM_EL_IL) | etype; +} + +static inline uint32_t syn_pacfail(bool data, int keynumber) +{ + int error_code = (data << 1) | keynumber; + return (EC_PACFAIL << ARM_EL_EC_SHIFT) | ARM_EL_IL | error_code; } static inline uint32_t syn_pactrap(void) { - return EC_PACTRAP << ARM_EL_EC_SHIFT; + return (EC_PACTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL; } static inline uint32_t syn_btitrap(int btype) { - return (EC_BTITRAP << ARM_EL_EC_SHIFT) | btype; + return (EC_BTITRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL | btype; } static inline uint32_t syn_bxjtrap(int cv, int cond, int rm) @@ -222,6 +267,14 @@ static inline uint32_t syn_bxjtrap(int cv, int cond, int rm) (cv << 24) | (cond << 20) | rm; } +static inline uint32_t syn_gpc(int s2ptw, int ind, int gpcsc, int vncr, + int cm, int s1ptw, int wnr, int fsc) +{ + return (EC_GPC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (s2ptw << 21) + | (ind << 20) | (gpcsc << 14) | (vncr << 13) | (cm << 8) + | (s1ptw << 7) | (wnr << 6) | fsc; +} + static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc) { return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) @@ -252,6 +305,16 @@ static inline uint32_t syn_data_abort_with_iss(int same_el, | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc; } +/* + * Faults due to FEAT_NV2 VNCR_EL2-based accesses report as same-EL + * Data Aborts with the VNCR bit set. + */ +static inline uint32_t syn_data_abort_vncr(int ea, int wnr, int fsc) +{ + return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (1 << ARM_EL_EC_SHIFT) + | ARM_EL_IL | ARM_EL_VNCR | (wnr << 6) | fsc; +} + static inline uint32_t syn_swstep(int same_el, int isv, int ex) { return (EC_SOFTWARESTEP << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT) @@ -282,4 +345,25 @@ static inline uint32_t syn_illegalstate(void) return (EC_ILLEGALSTATE << ARM_EL_EC_SHIFT) | ARM_EL_IL; } +static inline uint32_t syn_pcalignment(void) +{ + return (EC_PCALIGNMENT << ARM_EL_EC_SHIFT) | ARM_EL_IL; +} + +static inline uint32_t syn_serror(uint32_t extra) +{ + return (EC_SERROR << ARM_EL_EC_SHIFT) | ARM_EL_IL | extra; +} + +static inline uint32_t syn_mop(bool is_set, bool is_setg, int options, + bool epilogue, bool wrong_option, bool option_a, + int destreg, int srcreg, int sizereg) +{ + return (EC_MOP << ARM_EL_EC_SHIFT) | ARM_EL_IL | + (is_set << 24) | (is_setg << 23) | (options << 19) | + (epilogue << 18) | (wrong_option << 17) | (option_a << 16) | + (destreg << 10) | (srcreg << 5) | sizereg; +} + + #endif /* TARGET_ARM_SYNDROME_H */ diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c new file mode 100644 index 0000000000..152b172e24 --- /dev/null +++ b/target/arm/tcg-stubs.c @@ -0,0 +1,27 @@ +/* + * QEMU ARM stubs for some TCG helper functions + * + * Copyright 2021 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" + +void write_v7m_exception(CPUARMState *env, uint32_t new_exc) +{ + g_assert_not_reached(); +} + +void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, + uint32_t target_el, uintptr_t ra) +{ + g_assert_not_reached(); +} +/* Temporarily while cpu_get_tb_cpu_state() is still in common code */ +void assert_hflags_rebuild_correctly(CPUARMState *env) +{ +} diff --git a/target/arm/a32-uncond.decode b/target/arm/tcg/a32-uncond.decode index 2339de2e94..2339de2e94 100644 --- a/target/arm/a32-uncond.decode +++ b/target/arm/tcg/a32-uncond.decode diff --git a/target/arm/a32.decode b/target/arm/tcg/a32.decode index fcd8cd4f7d..f2ca480949 100644 --- a/target/arm/a32.decode +++ b/target/arm/tcg/a32.decode @@ -187,13 +187,17 @@ SMULTT .... 0001 0110 .... 0000 .... 1110 .... @rd0mn { { - YIELD ---- 0011 0010 0000 1111 ---- 0000 0001 - WFE ---- 0011 0010 0000 1111 ---- 0000 0010 - WFI ---- 0011 0010 0000 1111 ---- 0000 0011 + [ + YIELD ---- 0011 0010 0000 1111 ---- 0000 0001 + WFE ---- 0011 0010 0000 1111 ---- 0000 0010 + WFI ---- 0011 0010 0000 1111 ---- 0000 0011 - # TODO: Implement SEV, SEVL; may help SMP performance. - # SEV ---- 0011 0010 0000 1111 ---- 0000 0100 - # SEVL ---- 0011 0010 0000 1111 ---- 0000 0101 + # TODO: Implement SEV, SEVL; may help SMP performance. + # SEV ---- 0011 0010 0000 1111 ---- 0000 0100 + # SEVL ---- 0011 0010 0000 1111 ---- 0000 0101 + + ESB ---- 0011 0010 0000 1111 ---- 0001 0000 + ] # The canonical nop ends in 00000000, but the whole of the # rest of the space executes as nop if otherwise unsupported. diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode new file mode 100644 index 0000000000..8a20dce3c8 --- /dev/null +++ b/target/arm/tcg/a64.decode @@ -0,0 +1,591 @@ +# AArch64 A64 allowed instruction decoding +# +# Copyright (c) 2023 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +# +# This file is processed by scripts/decodetree.py +# + +&r rn +&ri rd imm +&rri_sf rd rn imm sf +&i imm + + +### Data Processing - Immediate + +# PC-rel addressing + +%imm_pcrel 5:s19 29:2 +@pcrel . .. ..... ................... rd:5 &ri imm=%imm_pcrel + +ADR 0 .. 10000 ................... ..... @pcrel +ADRP 1 .. 10000 ................... ..... @pcrel + +# Add/subtract (immediate) + +%imm12_sh12 10:12 !function=shl_12 +@addsub_imm sf:1 .. ...... . imm:12 rn:5 rd:5 +@addsub_imm12 sf:1 .. ...... . ............ rn:5 rd:5 imm=%imm12_sh12 + +ADD_i . 00 100010 0 ............ ..... ..... @addsub_imm +ADD_i . 00 100010 1 ............ ..... ..... @addsub_imm12 +ADDS_i . 01 100010 0 ............ ..... ..... @addsub_imm +ADDS_i . 01 100010 1 ............ ..... ..... @addsub_imm12 + +SUB_i . 10 100010 0 ............ ..... ..... @addsub_imm +SUB_i . 10 100010 1 ............ ..... ..... @addsub_imm12 +SUBS_i . 11 100010 0 ............ ..... ..... @addsub_imm +SUBS_i . 11 100010 1 ............ ..... ..... @addsub_imm12 + +# Add/subtract (immediate with tags) + +&rri_tag rd rn uimm6 uimm4 +@addsub_imm_tag . .. ...... . uimm6:6 .. uimm4:4 rn:5 rd:5 &rri_tag + +ADDG_i 1 00 100011 0 ...... 00 .... ..... ..... @addsub_imm_tag +SUBG_i 1 10 100011 0 ...... 00 .... ..... ..... @addsub_imm_tag + +# Logical (immediate) + +&rri_log rd rn sf dbm +@logic_imm_64 1 .. ...... dbm:13 rn:5 rd:5 &rri_log sf=1 +@logic_imm_32 0 .. ...... 0 dbm:12 rn:5 rd:5 &rri_log sf=0 + +AND_i . 00 100100 . ...... ...... ..... ..... @logic_imm_64 +AND_i . 00 100100 . ...... ...... ..... ..... @logic_imm_32 +ORR_i . 01 100100 . ...... ...... ..... ..... @logic_imm_64 +ORR_i . 01 100100 . ...... ...... ..... ..... @logic_imm_32 +EOR_i . 10 100100 . ...... ...... ..... ..... @logic_imm_64 +EOR_i . 10 100100 . ...... ...... ..... ..... @logic_imm_32 +ANDS_i . 11 100100 . ...... ...... ..... ..... @logic_imm_64 +ANDS_i . 11 100100 . ...... ...... ..... ..... @logic_imm_32 + +# Move wide (immediate) + +&movw rd sf imm hw +@movw_64 1 .. ...... hw:2 imm:16 rd:5 &movw sf=1 +@movw_32 0 .. ...... 0 hw:1 imm:16 rd:5 &movw sf=0 + +MOVN . 00 100101 .. ................ ..... @movw_64 +MOVN . 00 100101 .. ................ ..... @movw_32 +MOVZ . 10 100101 .. ................ ..... @movw_64 +MOVZ . 10 100101 .. ................ ..... @movw_32 +MOVK . 11 100101 .. ................ ..... @movw_64 +MOVK . 11 100101 .. ................ ..... @movw_32 + +# Bitfield + +&bitfield rd rn sf immr imms +@bitfield_64 1 .. ...... 1 immr:6 imms:6 rn:5 rd:5 &bitfield sf=1 +@bitfield_32 0 .. ...... 0 0 immr:5 0 imms:5 rn:5 rd:5 &bitfield sf=0 + +SBFM . 00 100110 . ...... ...... ..... ..... @bitfield_64 +SBFM . 00 100110 . ...... ...... ..... ..... @bitfield_32 +BFM . 01 100110 . ...... ...... ..... ..... @bitfield_64 +BFM . 01 100110 . ...... ...... ..... ..... @bitfield_32 +UBFM . 10 100110 . ...... ...... ..... ..... @bitfield_64 +UBFM . 10 100110 . ...... ...... ..... ..... @bitfield_32 + +# Extract + +&extract rd rn rm imm sf + +EXTR 1 00 100111 1 0 rm:5 imm:6 rn:5 rd:5 &extract sf=1 +EXTR 0 00 100111 0 0 rm:5 0 imm:5 rn:5 rd:5 &extract sf=0 + +# Branches + +%imm26 0:s26 !function=times_4 +@branch . ..... .......................... &i imm=%imm26 + +B 0 00101 .......................... @branch +BL 1 00101 .......................... @branch + +%imm19 5:s19 !function=times_4 +&cbz rt imm sf nz + +CBZ sf:1 011010 nz:1 ................... rt:5 &cbz imm=%imm19 + +%imm14 5:s14 !function=times_4 +%imm31_19 31:1 19:5 +&tbz rt imm nz bitpos + +TBZ . 011011 nz:1 ..... .............. rt:5 &tbz imm=%imm14 bitpos=%imm31_19 + +# B.cond and BC.cond +B_cond 0101010 0 ................... c:1 cond:4 imm=%imm19 + +BR 1101011 0000 11111 000000 rn:5 00000 &r +BLR 1101011 0001 11111 000000 rn:5 00000 &r +RET 1101011 0010 11111 000000 rn:5 00000 &r + +&braz rn m +BRAZ 1101011 0000 11111 00001 m:1 rn:5 11111 &braz # BRAAZ, BRABZ +BLRAZ 1101011 0001 11111 00001 m:1 rn:5 11111 &braz # BLRAAZ, BLRABZ + +&reta m +RETA 1101011 0010 11111 00001 m:1 11111 11111 &reta # RETAA, RETAB + +&bra rn rm m +BRA 1101011 1000 11111 00001 m:1 rn:5 rm:5 &bra # BRAA, BRAB +BLRA 1101011 1001 11111 00001 m:1 rn:5 rm:5 &bra # BLRAA, BLRAB + +ERET 1101011 0100 11111 000000 11111 00000 +ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB + +# We don't need to decode DRPS because it always UNDEFs except when +# the processor is in halting debug state (which we don't implement). +# The pattern is listed here as documentation. +# DRPS 1101011 0101 11111 000000 11111 00000 + +# Hint instruction group +{ + [ + YIELD 1101 0101 0000 0011 0010 0000 001 11111 + WFE 1101 0101 0000 0011 0010 0000 010 11111 + WFI 1101 0101 0000 0011 0010 0000 011 11111 + # We implement WFE to never block, so our SEV/SEVL are NOPs + # SEV 1101 0101 0000 0011 0010 0000 100 11111 + # SEVL 1101 0101 0000 0011 0010 0000 101 11111 + # Our DGL is a NOP because we don't merge memory accesses anyway. + # DGL 1101 0101 0000 0011 0010 0000 110 11111 + XPACLRI 1101 0101 0000 0011 0010 0000 111 11111 + PACIA1716 1101 0101 0000 0011 0010 0001 000 11111 + PACIB1716 1101 0101 0000 0011 0010 0001 010 11111 + AUTIA1716 1101 0101 0000 0011 0010 0001 100 11111 + AUTIB1716 1101 0101 0000 0011 0010 0001 110 11111 + ESB 1101 0101 0000 0011 0010 0010 000 11111 + PACIAZ 1101 0101 0000 0011 0010 0011 000 11111 + PACIASP 1101 0101 0000 0011 0010 0011 001 11111 + PACIBZ 1101 0101 0000 0011 0010 0011 010 11111 + PACIBSP 1101 0101 0000 0011 0010 0011 011 11111 + AUTIAZ 1101 0101 0000 0011 0010 0011 100 11111 + AUTIASP 1101 0101 0000 0011 0010 0011 101 11111 + AUTIBZ 1101 0101 0000 0011 0010 0011 110 11111 + AUTIBSP 1101 0101 0000 0011 0010 0011 111 11111 + ] + # The canonical NOP has CRm == op2 == 0, but all of the space + # that isn't specifically allocated to an instruction must NOP + NOP 1101 0101 0000 0011 0010 ---- --- 11111 +} + +# Barriers + +CLREX 1101 0101 0000 0011 0011 ---- 010 11111 +DSB_DMB 1101 0101 0000 0011 0011 domain:2 types:2 10- 11111 +ISB 1101 0101 0000 0011 0011 ---- 110 11111 +SB 1101 0101 0000 0011 0011 0000 111 11111 + +# PSTATE + +CFINV 1101 0101 0000 0 000 0100 0000 000 11111 +XAFLAG 1101 0101 0000 0 000 0100 0000 001 11111 +AXFLAG 1101 0101 0000 0 000 0100 0000 010 11111 + +# These are architecturally all "MSR (immediate)"; we decode the destination +# register too because there is no commonality in our implementation. +@msr_i .... .... .... . ... .... imm:4 ... ..... +MSR_i_UAO 1101 0101 0000 0 000 0100 .... 011 11111 @msr_i +MSR_i_PAN 1101 0101 0000 0 000 0100 .... 100 11111 @msr_i +MSR_i_SPSEL 1101 0101 0000 0 000 0100 .... 101 11111 @msr_i +MSR_i_SBSS 1101 0101 0000 0 011 0100 .... 001 11111 @msr_i +MSR_i_DIT 1101 0101 0000 0 011 0100 .... 010 11111 @msr_i +MSR_i_TCO 1101 0101 0000 0 011 0100 .... 100 11111 @msr_i +MSR_i_DAIFSET 1101 0101 0000 0 011 0100 .... 110 11111 @msr_i +MSR_i_DAIFCLEAR 1101 0101 0000 0 011 0100 .... 111 11111 @msr_i +MSR_i_SVCR 1101 0101 0000 0 011 0100 0 mask:2 imm:1 011 11111 + +# MRS, MSR (register), SYS, SYSL. These are all essentially the +# same instruction as far as QEMU is concerned. +# NB: op0 is bits [20:19], but op0=0b00 is other insns, so we have +# to hand-decode it. +SYS 1101 0101 00 l:1 01 op1:3 crn:4 crm:4 op2:3 rt:5 op0=1 +SYS 1101 0101 00 l:1 10 op1:3 crn:4 crm:4 op2:3 rt:5 op0=2 +SYS 1101 0101 00 l:1 11 op1:3 crn:4 crm:4 op2:3 rt:5 op0=3 + +# Exception generation + +@i16 .... .... ... imm:16 ... .. &i +SVC 1101 0100 000 ................ 000 01 @i16 +HVC 1101 0100 000 ................ 000 10 @i16 +SMC 1101 0100 000 ................ 000 11 @i16 +BRK 1101 0100 001 ................ 000 00 @i16 +HLT 1101 0100 010 ................ 000 00 @i16 +# These insns always UNDEF unless in halting debug state, which +# we don't implement. So we don't need to decode them. The patterns +# are listed here as documentation. +# DCPS1 1101 0100 101 ................ 000 01 @i16 +# DCPS2 1101 0100 101 ................ 000 10 @i16 +# DCPS3 1101 0100 101 ................ 000 11 @i16 + +# Loads and stores + +&stxr rn rt rt2 rs sz lasr +&stlr rn rt sz lasr +@stxr sz:2 ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr +@stlr sz:2 ...... ... ..... lasr:1 ..... rn:5 rt:5 &stlr +%imm1_30_p2 30:1 !function=plus_2 +@stxp .. ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr sz=%imm1_30_p2 +STXR .. 001000 000 ..... . ..... ..... ..... @stxr # inc STLXR +LDXR .. 001000 010 ..... . ..... ..... ..... @stxr # inc LDAXR +STLR .. 001000 100 11111 . 11111 ..... ..... @stlr # inc STLLR +LDAR .. 001000 110 11111 . 11111 ..... ..... @stlr # inc LDLAR + +STXP 1 . 001000 001 ..... . ..... ..... ..... @stxp # inc STLXP +LDXP 1 . 001000 011 ..... . ..... ..... ..... @stxp # inc LDAXP + +# CASP, CASPA, CASPAL, CASPL (we don't decode the bits that determine +# acquire/release semantics because QEMU's cmpxchg always has those) +CASP 0 . 001000 0 - 1 rs:5 - 11111 rn:5 rt:5 sz=%imm1_30_p2 +# CAS, CASA, CASAL, CASL +CAS sz:2 001000 1 - 1 rs:5 - 11111 rn:5 rt:5 + +&ldlit rt imm sz sign +@ldlit .. ... . .. ................... rt:5 &ldlit imm=%imm19 + +LD_lit 00 011 0 00 ................... ..... @ldlit sz=2 sign=0 +LD_lit 01 011 0 00 ................... ..... @ldlit sz=3 sign=0 +LD_lit 10 011 0 00 ................... ..... @ldlit sz=2 sign=1 +LD_lit_v 00 011 1 00 ................... ..... @ldlit sz=2 sign=0 +LD_lit_v 01 011 1 00 ................... ..... @ldlit sz=3 sign=0 +LD_lit_v 10 011 1 00 ................... ..... @ldlit sz=4 sign=0 + +# PRFM +NOP 11 011 0 00 ------------------- ----- + +&ldstpair rt2 rt rn imm sz sign w p +@ldstpair .. ... . ... . imm:s7 rt2:5 rn:5 rt:5 &ldstpair + +# STNP, LDNP: Signed offset, non-temporal hint. We don't emulate caches +# so we ignore hints about data access patterns, and handle these like +# plain signed offset. +STP 00 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 00 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP 10 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP 10 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 00 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP_v 00 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP_v 01 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP_v 01 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 10 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 +LDP_v 10 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 + +# STP and LDP: post-indexed +STP 00 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP 00 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP 01 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=1 w=1 +STP 10 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +LDP 10 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STP_v 00 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP_v 00 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +STP_v 01 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +LDP_v 01 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STP_v 10 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1 +LDP_v 10 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1 + +# STP and LDP: offset +STP 00 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 00 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 01 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=0 +STP 10 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP 10 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 00 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP_v 00 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP_v 01 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP_v 01 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 10 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 +LDP_v 10 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 + +# STP and LDP: pre-indexed +STP 00 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP 00 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP 01 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=1 +STP 10 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +LDP 10 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +STP_v 00 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP_v 00 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +STP_v 01 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +LDP_v 01 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +STP_v 10 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1 +LDP_v 10 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1 + +# STGP: store tag and pair +STGP 01 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STGP 01 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STGP 01 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 + +# Load/store register (unscaled immediate) +&ldst_imm rt rn imm sz sign w p unpriv ext +@ldst_imm .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=0 +@ldst_imm_pre .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=1 +@ldst_imm_post .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=1 w=1 +@ldst_imm_user .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=1 p=0 w=0 + +STR_i sz:2 111 0 00 00 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 +LDR_i 00 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=0 +LDR_i 01 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=1 +LDR_i 10 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=2 +LDR_i 11 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=3 +LDR_i 00 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=0 +LDR_i 01 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=1 +LDR_i 10 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=2 +LDR_i 00 111 0 00 11 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=1 sz=0 +LDR_i 01 111 0 00 11 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=1 sz=1 + +STR_i sz:2 111 0 00 00 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 +LDR_i 00 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=0 +LDR_i 01 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=1 +LDR_i 10 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=2 +LDR_i 11 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=3 +LDR_i 00 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=0 +LDR_i 01 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=1 +LDR_i 10 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=2 +LDR_i 00 111 0 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=1 sz=0 +LDR_i 01 111 0 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=1 sz=1 + +STR_i sz:2 111 0 00 00 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=0 +LDR_i 00 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=0 +LDR_i 01 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=1 +LDR_i 10 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=2 +LDR_i 11 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=0 sz=3 +LDR_i 00 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=0 +LDR_i 01 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=1 +LDR_i 10 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=2 +LDR_i 00 111 0 00 11 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=1 sz=0 +LDR_i 01 111 0 00 11 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=1 sz=1 + +STR_i sz:2 111 0 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 +LDR_i 00 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=0 +LDR_i 01 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=1 +LDR_i 10 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=2 +LDR_i 11 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=3 +LDR_i 00 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=0 +LDR_i 01 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=1 +LDR_i 10 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=2 +LDR_i 00 111 0 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=1 sz=0 +LDR_i 01 111 0 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=1 sz=1 + +# PRFM : prefetch memory: a no-op for QEMU +NOP 11 111 0 00 10 0 --------- 00 ----- ----- + +STR_v_i sz:2 111 1 00 00 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 +STR_v_i 00 111 1 00 10 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=4 +LDR_v_i sz:2 111 1 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 +LDR_v_i 00 111 1 00 11 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=4 + +STR_v_i sz:2 111 1 00 00 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 +STR_v_i 00 111 1 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=4 +LDR_v_i sz:2 111 1 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 +LDR_v_i 00 111 1 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=4 + +STR_v_i sz:2 111 1 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 +STR_v_i 00 111 1 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4 +LDR_v_i sz:2 111 1 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 +LDR_v_i 00 111 1 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4 + +# Load/store with an unsigned 12 bit immediate, which is scaled by the +# element size. The function gets the sz:imm and returns the scaled immediate. +%uimm_scaled 10:12 sz:3 !function=uimm_scaled + +@ldst_uimm .. ... . .. .. ............ rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=0 imm=%uimm_scaled + +STR_i sz:2 111 0 01 00 ............ ..... ..... @ldst_uimm sign=0 ext=0 +LDR_i 00 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=0 +LDR_i 01 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=1 +LDR_i 10 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=2 +LDR_i 11 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=3 +LDR_i 00 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=0 +LDR_i 01 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=1 +LDR_i 10 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=2 +LDR_i 00 111 0 01 11 ............ ..... ..... @ldst_uimm sign=1 ext=1 sz=0 +LDR_i 01 111 0 01 11 ............ ..... ..... @ldst_uimm sign=1 ext=1 sz=1 + +# PRFM +NOP 11 111 0 01 10 ------------ ----- ----- + +STR_v_i sz:2 111 1 01 00 ............ ..... ..... @ldst_uimm sign=0 ext=0 +STR_v_i 00 111 1 01 10 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 +LDR_v_i sz:2 111 1 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 +LDR_v_i 00 111 1 01 11 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 + +# Load/store with register offset +&ldst rm rn rt sign ext sz opt s +@ldst .. ... . .. .. . rm:5 opt:3 s:1 .. rn:5 rt:5 &ldst +STR sz:2 111 0 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 +LDR 00 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=0 +LDR 01 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=1 +LDR 10 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=2 +LDR 11 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=3 +LDR 00 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=0 +LDR 01 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=1 +LDR 10 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=2 +LDR 00 111 0 00 11 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=1 sz=0 +LDR 01 111 0 00 11 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=1 sz=1 + +# PRFM +NOP 11 111 0 00 10 1 ----- -1- - 10 ----- ----- + +STR_v sz:2 111 1 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 +STR_v 00 111 1 00 10 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4 +LDR_v sz:2 111 1 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 +LDR_v 00 111 1 00 11 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4 + +# Atomic memory operations +&atomic rs rn rt a r sz +@atomic sz:2 ... . .. a:1 r:1 . rs:5 . ... .. rn:5 rt:5 &atomic +LDADD .. 111 0 00 . . 1 ..... 0000 00 ..... ..... @atomic +LDCLR .. 111 0 00 . . 1 ..... 0001 00 ..... ..... @atomic +LDEOR .. 111 0 00 . . 1 ..... 0010 00 ..... ..... @atomic +LDSET .. 111 0 00 . . 1 ..... 0011 00 ..... ..... @atomic +LDSMAX .. 111 0 00 . . 1 ..... 0100 00 ..... ..... @atomic +LDSMIN .. 111 0 00 . . 1 ..... 0101 00 ..... ..... @atomic +LDUMAX .. 111 0 00 . . 1 ..... 0110 00 ..... ..... @atomic +LDUMIN .. 111 0 00 . . 1 ..... 0111 00 ..... ..... @atomic +SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic + +LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 + +# Load/store register (pointer authentication) + +# LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous +%ldra_imm 22:s1 12:9 !function=times_8 + +LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm + +&ldapr_stlr_i rn rt imm sz sign ext +@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i +STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 +LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 +LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0 +LDAPR_i 01 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=1 +LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=2 +LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0 +LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1 + +# Load/store multiple structures +# The 4-bit opcode in [15:12] encodes repeat count and structure elements +&ldst_mult rm rn rt sz q p rpt selem +@ldst_mult . q:1 ...... p:1 . . rm:5 .... sz:2 rn:5 rt:5 &ldst_mult +ST_mult 0 . 001100 . 0 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4 +ST_mult 0 . 001100 . 0 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1 +ST_mult 0 . 001100 . 0 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3 +ST_mult 0 . 001100 . 0 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1 +ST_mult 0 . 001100 . 0 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1 +ST_mult 0 . 001100 . 0 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2 +ST_mult 0 . 001100 . 0 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1 + +LD_mult 0 . 001100 . 1 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4 +LD_mult 0 . 001100 . 1 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1 +LD_mult 0 . 001100 . 1 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3 +LD_mult 0 . 001100 . 1 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1 +LD_mult 0 . 001100 . 1 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1 +LD_mult 0 . 001100 . 1 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2 +LD_mult 0 . 001100 . 1 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1 + +# Load/store single structure +&ldst_single rm rn rt p selem index scale + +%ldst_single_selem 13:1 21:1 !function=plus_1 + +%ldst_single_index_b 30:1 10:3 +%ldst_single_index_h 30:1 11:2 +%ldst_single_index_s 30:1 12:1 + +@ldst_single_b .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ + &ldst_single scale=0 selem=%ldst_single_selem \ + index=%ldst_single_index_b +@ldst_single_h .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ + &ldst_single scale=1 selem=%ldst_single_selem \ + index=%ldst_single_index_h +@ldst_single_s .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \ + &ldst_single scale=2 selem=%ldst_single_selem \ + index=%ldst_single_index_s +@ldst_single_d . index:1 ...... p:1 .. rm:5 ...... rn:5 rt:5 \ + &ldst_single scale=3 selem=%ldst_single_selem + +ST_single 0 . 001101 . 0 . ..... 00 . ... ..... ..... @ldst_single_b +ST_single 0 . 001101 . 0 . ..... 01 . ..0 ..... ..... @ldst_single_h +ST_single 0 . 001101 . 0 . ..... 10 . .00 ..... ..... @ldst_single_s +ST_single 0 . 001101 . 0 . ..... 10 . 001 ..... ..... @ldst_single_d + +LD_single 0 . 001101 . 1 . ..... 00 . ... ..... ..... @ldst_single_b +LD_single 0 . 001101 . 1 . ..... 01 . ..0 ..... ..... @ldst_single_h +LD_single 0 . 001101 . 1 . ..... 10 . .00 ..... ..... @ldst_single_s +LD_single 0 . 001101 . 1 . ..... 10 . 001 ..... ..... @ldst_single_d + +# Replicating load case +LD_single_repl 0 q:1 001101 p:1 1 . rm:5 11 . 0 scale:2 rn:5 rt:5 selem=%ldst_single_selem + +%tag_offset 12:s9 !function=scale_by_log2_tag_granule +&ldst_tag rn rt imm p w +@ldst_tag ........ .. . ......... .. rn:5 rt:5 &ldst_tag imm=%tag_offset +@ldst_tag_mult ........ .. . 000000000 .. rn:5 rt:5 &ldst_tag imm=0 + +STZGM 11011001 00 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 +STG 11011001 00 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 +STG 11011001 00 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 +STG 11011001 00 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 + +LDG 11011001 01 1 ......... 00 ..... ..... @ldst_tag p=0 w=0 +STZG 11011001 01 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 +STZG 11011001 01 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 +STZG 11011001 01 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 + +STGM 11011001 10 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 +ST2G 11011001 10 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 +ST2G 11011001 10 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 +ST2G 11011001 10 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 + +LDGM 11011001 11 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 +STZ2G 11011001 11 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 +STZ2G 11011001 11 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 +STZ2G 11011001 11 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 + +# Memory operations (memset, memcpy, memmove) +# Each of these comes in a set of three, eg SETP (prologue), SETM (main), +# SETE (epilogue), and each of those has different flavours to +# indicate whether memory accesses should be unpriv or non-temporal. +# We don't distinguish temporal and non-temporal accesses, but we +# do need to report it in syndrome register values. + +# Memset +&set rs rn rd unpriv nontemp +# op2 bit 1 is nontemporal bit +@set .. ......... rs:5 .. nontemp:1 unpriv:1 .. rn:5 rd:5 &set + +SETP 00 011001110 ..... 00 . . 01 ..... ..... @set +SETM 00 011001110 ..... 01 . . 01 ..... ..... @set +SETE 00 011001110 ..... 10 . . 01 ..... ..... @set + +# Like SET, but also setting MTE tags +SETGP 00 011101110 ..... 00 . . 01 ..... ..... @set +SETGM 00 011101110 ..... 01 . . 01 ..... ..... @set +SETGE 00 011101110 ..... 10 . . 01 ..... ..... @set + +# Memmove/Memcopy: the CPY insns allow overlapping src/dest and +# copy in the correct direction; the CPYF insns always copy forwards. +# +# options has the nontemporal and unpriv bits for src and dest +&cpy rs rn rd options +@cpy .. ... . ..... rs:5 options:4 .. rn:5 rd:5 &cpy + +CPYFP 00 011 0 01000 ..... .... 01 ..... ..... @cpy +CPYFM 00 011 0 01010 ..... .... 01 ..... ..... @cpy +CPYFE 00 011 0 01100 ..... .... 01 ..... ..... @cpy +CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy +CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy +CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy diff --git a/target/arm/arm_ldst.h b/target/arm/tcg/arm_ldst.h index cee0548a1c..cee0548a1c 100644 --- a/target/arm/arm_ldst.h +++ b/target/arm/tcg/arm_ldst.h diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c new file mode 100644 index 0000000000..c059c681e9 --- /dev/null +++ b/target/arm/tcg/cpu-v7m.c @@ -0,0 +1,290 @@ +/* + * QEMU ARMv7-M TCG-only CPUs. + * + * Copyright (c) 2012 SUSE LINUX Products GmbH + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "hw/core/tcg-cpu-ops.h" +#include "internals.h" + +#if !defined(CONFIG_USER_ONLY) + +#include "hw/intc/armv7m_nvic.h" + +static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + CPUClass *cc = CPU_GET_CLASS(cs); + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; + bool ret = false; + + /* + * ARMv7-M interrupt masking works differently than -A or -R. + * There is no FIQ/IRQ distinction. Instead of I and F bits + * masking FIQ and IRQ interrupts, an exception is taken only + * if it is higher priority than the current execution priority + * (which depends on state like BASEPRI, FAULTMASK and the + * currently active exception). + */ + if (interrupt_request & CPU_INTERRUPT_HARD + && (armv7m_nvic_can_take_pending_exception(env->nvic))) { + cs->exception_index = EXCP_IRQ; + cc->tcg_ops->do_interrupt(cs); + ret = true; + } + return ret; +} + +#endif /* !CONFIG_USER_ONLY */ + +static void cortex_m0_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_V6); + set_feature(&cpu->env, ARM_FEATURE_M); + + cpu->midr = 0x410cc200; + + /* + * These ID register values are not guest visible, because + * we do not implement the Main Extension. They must be set + * to values corresponding to the Cortex-M0's implemented + * features, because QEMU generally controls its emulation + * by looking at ID register fields. We use the same values as + * for the M3. + */ + cpu->isar.id_pfr0 = 0x00000030; + cpu->isar.id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00000030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x00000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; +} + +static void cortex_m3_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + cpu->midr = 0x410fc231; + cpu->pmsav7_dregion = 8; + cpu->isar.id_pfr0 = 0x00000030; + cpu->isar.id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00000030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x00000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; +} + +static void cortex_m4_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); + cpu->midr = 0x410fc240; /* r0p0 */ + cpu->pmsav7_dregion = 8; + cpu->isar.mvfr0 = 0x10110021; + cpu->isar.mvfr1 = 0x11000011; + cpu->isar.mvfr2 = 0x00000000; + cpu->isar.id_pfr0 = 0x00000030; + cpu->isar.id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00000030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x00000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; + cpu->isar.id_isar3 = 0x01111110; + cpu->isar.id_isar4 = 0x01310102; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; +} + +static void cortex_m7_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V7); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); + cpu->midr = 0x411fc272; /* r1p2 */ + cpu->pmsav7_dregion = 8; + cpu->isar.mvfr0 = 0x10110221; + cpu->isar.mvfr1 = 0x12000011; + cpu->isar.mvfr2 = 0x00000040; + cpu->isar.id_pfr0 = 0x00000030; + cpu->isar.id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00100030; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02112000; + cpu->isar.id_isar2 = 0x20232231; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; +} + +static void cortex_m33_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + set_feature(&cpu->env, ARM_FEATURE_M_SECURITY); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); + cpu->midr = 0x410fd213; /* r0p3 */ + cpu->pmsav7_dregion = 16; + cpu->sau_sregion = 8; + cpu->isar.mvfr0 = 0x10110021; + cpu->isar.mvfr1 = 0x11000011; + cpu->isar.mvfr2 = 0x00000040; + cpu->isar.id_pfr0 = 0x00000030; + cpu->isar.id_pfr1 = 0x00000210; + cpu->isar.id_dfr0 = 0x00200000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00101F40; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01000000; + cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02212000; + cpu->isar.id_isar2 = 0x20232232; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; + cpu->clidr = 0x00000000; + cpu->ctr = 0x8000c000; +} + +static void cortex_m55_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_V8_1M); + set_feature(&cpu->env, ARM_FEATURE_M); + set_feature(&cpu->env, ARM_FEATURE_M_MAIN); + set_feature(&cpu->env, ARM_FEATURE_M_SECURITY); + set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); + cpu->midr = 0x410fd221; /* r0p1 */ + cpu->revidr = 0; + cpu->pmsav7_dregion = 16; + cpu->sau_sregion = 8; + /* These are the MVFR* values for the FPU + full MVE configuration */ + cpu->isar.mvfr0 = 0x10110221; + cpu->isar.mvfr1 = 0x12100211; + cpu->isar.mvfr2 = 0x00000040; + cpu->isar.id_pfr0 = 0x20000030; + cpu->isar.id_pfr1 = 0x00000230; + cpu->isar.id_dfr0 = 0x10200000; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00111040; + cpu->isar.id_mmfr1 = 0x00000000; + cpu->isar.id_mmfr2 = 0x01000000; + cpu->isar.id_mmfr3 = 0x00000011; + cpu->isar.id_isar0 = 0x01103110; + cpu->isar.id_isar1 = 0x02212000; + cpu->isar.id_isar2 = 0x20232232; + cpu->isar.id_isar3 = 0x01111131; + cpu->isar.id_isar4 = 0x01310132; + cpu->isar.id_isar5 = 0x00000000; + cpu->isar.id_isar6 = 0x00000000; + cpu->clidr = 0x00000000; /* caches not implemented */ + cpu->ctr = 0x8303c003; +} + +static const TCGCPUOps arm_v7m_tcg_ops = { + .initialize = arm_translate_init, + .synchronize_from_tb = arm_cpu_synchronize_from_tb, + .debug_excp_handler = arm_debug_excp_handler, + .restore_state_to_opc = arm_restore_state_to_opc, + +#ifdef CONFIG_USER_ONLY + .record_sigsegv = arm_cpu_record_sigsegv, + .record_sigbus = arm_cpu_record_sigbus, +#else + .tlb_fill = arm_cpu_tlb_fill, + .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt, + .do_interrupt = arm_v7m_cpu_do_interrupt, + .do_transaction_failed = arm_cpu_do_transaction_failed, + .do_unaligned_access = arm_cpu_do_unaligned_access, + .adjust_watchpoint_address = arm_adjust_watchpoint_address, + .debug_check_watchpoint = arm_debug_check_watchpoint, + .debug_check_breakpoint = arm_debug_check_breakpoint, +#endif /* !CONFIG_USER_ONLY */ +}; + +static void arm_v7m_class_init(ObjectClass *oc, void *data) +{ + ARMCPUClass *acc = ARM_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; + cc->tcg_ops = &arm_v7m_tcg_ops; + cc->gdb_core_xml_file = "arm-m-profile.xml"; +} + +static const ARMCPUInfo arm_v7m_cpus[] = { + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m4", .initfn = cortex_m4_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m7", .initfn = cortex_m7_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m33", .initfn = cortex_m33_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m55", .initfn = cortex_m55_initfn, + .class_init = arm_v7m_class_init }, +}; + +static void arm_v7m_cpu_register_types(void) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(arm_v7m_cpus); ++i) { + arm_cpu_register(&arm_v7m_cpus[i]); + } +} + +type_init(arm_v7m_cpu_register_types) diff --git a/target/arm/cpu_tcg.c b/target/arm/tcg/cpu32.c index 0d5adccf1a..de8f2be941 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/tcg/cpu32.c @@ -1,5 +1,5 @@ /* - * QEMU ARM TCG CPUs. + * QEMU ARM TCG-only CPUs. * * Copyright (c) 2012 SUSE LINUX Products GmbH * @@ -10,43 +10,90 @@ #include "qemu/osdep.h" #include "cpu.h" -#ifdef CONFIG_TCG #include "hw/core/tcg-cpu-ops.h" -#endif /* CONFIG_TCG */ #include "internals.h" #include "target/arm/idau.h" #if !defined(CONFIG_USER_ONLY) #include "hw/boards.h" #endif +#include "cpregs.h" -/* CPU models. These are not needed for the AArch64 linux-user build. */ -#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) -#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) -static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +/* Share AArch32 -cpu max features with AArch64. */ +void aa32_max_features(ARMCPU *cpu) { - CPUClass *cc = CPU_GET_CLASS(cs); - ARMCPU *cpu = ARM_CPU(cs); - CPUARMState *env = &cpu->env; - bool ret = false; - - /* - * ARMv7-M interrupt masking works differently than -A or -R. - * There is no FIQ/IRQ distinction. Instead of I and F bits - * masking FIQ and IRQ interrupts, an exception is taken only - * if it is higher priority than the current execution priority - * (which depends on state like BASEPRI, FAULTMASK and the - * currently active exception). - */ - if (interrupt_request & CPU_INTERRUPT_HARD - && (armv7m_nvic_can_take_pending_exception(env->nvic))) { - cs->exception_index = EXCP_IRQ; - cc->tcg_ops->do_interrupt(cs); - ret = true; - } - return ret; + uint32_t t; + + /* Add additional features supported by QEMU */ + t = cpu->isar.id_isar5; + t = FIELD_DP32(t, ID_ISAR5, AES, 2); /* FEAT_PMULL */ + t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); /* FEAT_SHA1 */ + t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); /* FEAT_SHA256 */ + t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); + t = FIELD_DP32(t, ID_ISAR5, RDM, 1); /* FEAT_RDM */ + t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); /* FEAT_FCMA */ + cpu->isar.id_isar5 = t; + + t = cpu->isar.id_isar6; + t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); /* FEAT_JSCVT */ + t = FIELD_DP32(t, ID_ISAR6, DP, 1); /* Feat_DotProd */ + t = FIELD_DP32(t, ID_ISAR6, FHM, 1); /* FEAT_FHM */ + t = FIELD_DP32(t, ID_ISAR6, SB, 1); /* FEAT_SB */ + t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); /* FEAT_SPECRES */ + t = FIELD_DP32(t, ID_ISAR6, BF16, 1); /* FEAT_AA32BF16 */ + t = FIELD_DP32(t, ID_ISAR6, I8MM, 1); /* FEAT_AA32I8MM */ + cpu->isar.id_isar6 = t; + + t = cpu->isar.mvfr1; + t = FIELD_DP32(t, MVFR1, FPHP, 3); /* FEAT_FP16 */ + t = FIELD_DP32(t, MVFR1, SIMDHP, 2); /* FEAT_FP16 */ + cpu->isar.mvfr1 = t; + + t = cpu->isar.mvfr2; + t = FIELD_DP32(t, MVFR2, SIMDMISC, 3); /* SIMD MaxNum */ + t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ + cpu->isar.mvfr2 = t; + + t = cpu->isar.id_mmfr3; + t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */ + cpu->isar.id_mmfr3 = t; + + t = cpu->isar.id_mmfr4; + t = FIELD_DP32(t, ID_MMFR4, HPDS, 2); /* FEAT_HPDS2 */ + t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ + t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* FEAT_TTCNP */ + t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* FEAT_XNX */ + t = FIELD_DP32(t, ID_MMFR4, EVT, 2); /* FEAT_EVT */ + cpu->isar.id_mmfr4 = t; + + t = cpu->isar.id_mmfr5; + t = FIELD_DP32(t, ID_MMFR5, ETS, 1); /* FEAT_ETS */ + cpu->isar.id_mmfr5 = t; + + t = cpu->isar.id_pfr0; + t = FIELD_DP32(t, ID_PFR0, CSV2, 2); /* FEAT_CVS2 */ + t = FIELD_DP32(t, ID_PFR0, DIT, 1); /* FEAT_DIT */ + t = FIELD_DP32(t, ID_PFR0, RAS, 1); /* FEAT_RAS */ + cpu->isar.id_pfr0 = t; + + t = cpu->isar.id_pfr2; + t = FIELD_DP32(t, ID_PFR2, CSV3, 1); /* FEAT_CSV3 */ + t = FIELD_DP32(t, ID_PFR2, SSBS, 1); /* FEAT_SSBS */ + cpu->isar.id_pfr2 = t; + + t = cpu->isar.id_dfr0; + t = FIELD_DP32(t, ID_DFR0, COPDBG, 9); /* FEAT_Debugv8p4 */ + t = FIELD_DP32(t, ID_DFR0, COPSDBG, 9); /* FEAT_Debugv8p4 */ + t = FIELD_DP32(t, ID_DFR0, PERFMON, 6); /* FEAT_PMUv3p5 */ + cpu->isar.id_dfr0 = t; + + t = cpu->isar.id_dfr1; + t = FIELD_DP32(t, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ + cpu->isar.id_dfr1 = t; } -#endif /* !CONFIG_USER_ONLY && CONFIG_TCG */ + +/* CPU models. These are not needed for the AArch64 linux-user build. */ +#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) static void arm926_initfn(Object *obj) { @@ -263,7 +310,6 @@ static const ARMCPRegInfo cortexa8_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, { .name = "L2AUXCR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2, .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL }; static void cortex_a8_initfn(Object *obj) @@ -276,6 +322,7 @@ static void cortex_a8_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); cpu->midr = 0x410fc080; cpu->reset_fpsid = 0x410330c0; cpu->isar.mvfr0 = 0x11110222; @@ -301,6 +348,7 @@ static void cortex_a8_initfn(Object *obj) cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ cpu->ccsidr[2] = 0xf0000000; /* No L2 icache. */ cpu->reset_auxcr = 2; + cpu->isar.reset_pmcr_el0 = 0x41002000; define_arm_cp_regs(cpu, cortexa8_cp_reginfo); } @@ -331,7 +379,6 @@ static const ARMCPRegInfo cortexa9_cp_reginfo[] = { .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST }, { .name = "TLB_ATTR", .cp = 15, .crn = 15, .crm = 7, .opc1 = 5, .opc2 = 2, .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST }, - REGINFO_SENTINEL }; static void cortex_a9_initfn(Object *obj) @@ -343,6 +390,7 @@ static void cortex_a9_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); /* * Note that A9 supports the MP extensions even for * A9UP and single-core A9MP (which are both different @@ -373,6 +421,7 @@ static void cortex_a9_initfn(Object *obj) cpu->clidr = (1 << 27) | (1 << 24) | 3; cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -397,7 +446,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { #endif { .name = "L2ECTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 3, .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - REGINFO_SENTINEL }; static void cortex_a7_initfn(Object *obj) @@ -414,7 +462,6 @@ static void cortex_a7_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); - cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7; cpu->midr = 0x410fc075; cpu->reset_fpsid = 0x41023075; cpu->isar.mvfr0 = 0x10110222; @@ -439,10 +486,13 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.id_isar3 = 0x11112131; cpu->isar.id_isar4 = 0x10011142; cpu->isar.dbgdidr = 0x3515f005; + cpu->isar.dbgdevid = 0x01110f13; + cpu->isar.dbgdevid1 = 0x1; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -460,8 +510,9 @@ static void cortex_a15_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); set_feature(&cpu->env, ARM_FEATURE_PMU); - cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15; - cpu->midr = 0x412fc0f1; + /* r4p0 cpu, not requiring expensive tlb flush errata */ + cpu->midr = 0x414fc0f0; + cpu->revidr = 0x0; cpu->reset_fpsid = 0x410430f0; cpu->isar.mvfr0 = 0x10110222; cpu->isar.mvfr1 = 0x11111111; @@ -481,202 +532,16 @@ static void cortex_a15_initfn(Object *obj) cpu->isar.id_isar3 = 0x11112131; cpu->isar.id_isar4 = 0x10011142; cpu->isar.dbgdidr = 0x3515f021; + cpu->isar.dbgdevid = 0x01110f13; + cpu->isar.dbgdevid1 = 0x0; cpu->clidr = 0x0a200023; cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + cpu->isar.reset_pmcr_el0 = 0x410F3000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } -static void cortex_m0_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - set_feature(&cpu->env, ARM_FEATURE_V6); - set_feature(&cpu->env, ARM_FEATURE_M); - - cpu->midr = 0x410cc200; - - /* - * These ID register values are not guest visible, because - * we do not implement the Main Extension. They must be set - * to values corresponding to the Cortex-M0's implemented - * features, because QEMU generally controls its emulation - * by looking at ID register fields. We use the same values as - * for the M3. - */ - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; -} - -static void cortex_m3_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - cpu->midr = 0x410fc231; - cpu->pmsav7_dregion = 8; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; -} - -static void cortex_m4_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); - cpu->midr = 0x410fc240; /* r0p0 */ - cpu->pmsav7_dregion = 8; - cpu->isar.mvfr0 = 0x10110021; - cpu->isar.mvfr1 = 0x11000011; - cpu->isar.mvfr2 = 0x00000000; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00000030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x00000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01141110; - cpu->isar.id_isar1 = 0x02111000; - cpu->isar.id_isar2 = 0x21112231; - cpu->isar.id_isar3 = 0x01111110; - cpu->isar.id_isar4 = 0x01310102; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; -} - -static void cortex_m7_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - set_feature(&cpu->env, ARM_FEATURE_V7); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); - cpu->midr = 0x411fc272; /* r1p2 */ - cpu->pmsav7_dregion = 8; - cpu->isar.mvfr0 = 0x10110221; - cpu->isar.mvfr1 = 0x12000011; - cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000200; - cpu->isar.id_dfr0 = 0x00100000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00100030; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02112000; - cpu->isar.id_isar2 = 0x20232231; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; -} - -static void cortex_m33_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - set_feature(&cpu->env, ARM_FEATURE_M_SECURITY); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); - cpu->midr = 0x410fd213; /* r0p3 */ - cpu->pmsav7_dregion = 16; - cpu->sau_sregion = 8; - cpu->isar.mvfr0 = 0x10110021; - cpu->isar.mvfr1 = 0x11000011; - cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x00000030; - cpu->isar.id_pfr1 = 0x00000210; - cpu->isar.id_dfr0 = 0x00200000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00101F40; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000000; - cpu->isar.id_isar0 = 0x01101110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; - cpu->clidr = 0x00000000; - cpu->ctr = 0x8000c000; -} - -static void cortex_m55_initfn(Object *obj) -{ - ARMCPU *cpu = ARM_CPU(obj); - - set_feature(&cpu->env, ARM_FEATURE_V8); - set_feature(&cpu->env, ARM_FEATURE_V8_1M); - set_feature(&cpu->env, ARM_FEATURE_M); - set_feature(&cpu->env, ARM_FEATURE_M_MAIN); - set_feature(&cpu->env, ARM_FEATURE_M_SECURITY); - set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP); - cpu->midr = 0x410fd221; /* r0p1 */ - cpu->revidr = 0; - cpu->pmsav7_dregion = 16; - cpu->sau_sregion = 8; - /* These are the MVFR* values for the FPU + full MVE configuration */ - cpu->isar.mvfr0 = 0x10110221; - cpu->isar.mvfr1 = 0x12100211; - cpu->isar.mvfr2 = 0x00000040; - cpu->isar.id_pfr0 = 0x20000030; - cpu->isar.id_pfr1 = 0x00000230; - cpu->isar.id_dfr0 = 0x10200000; - cpu->id_afr0 = 0x00000000; - cpu->isar.id_mmfr0 = 0x00111040; - cpu->isar.id_mmfr1 = 0x00000000; - cpu->isar.id_mmfr2 = 0x01000000; - cpu->isar.id_mmfr3 = 0x00000011; - cpu->isar.id_isar0 = 0x01103110; - cpu->isar.id_isar1 = 0x02212000; - cpu->isar.id_isar2 = 0x20232232; - cpu->isar.id_isar3 = 0x01111131; - cpu->isar.id_isar4 = 0x01310132; - cpu->isar.id_isar5 = 0x00000000; - cpu->isar.id_isar6 = 0x00000000; - cpu->clidr = 0x00000000; /* caches not implemented */ - cpu->ctr = 0x8303c003; -} - static const ARMCPRegInfo cortexr5_cp_reginfo[] = { /* Dummy the TCM region regs for the moment */ { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, @@ -685,7 +550,6 @@ static const ARMCPRegInfo cortexr5_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST }, { .name = "DCACHE_INVAL", .cp = 15, .opc1 = 0, .crn = 15, .crm = 5, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP }, - REGINFO_SENTINEL }; static void cortex_r5_initfn(Object *obj) @@ -714,9 +578,160 @@ static void cortex_r5_initfn(Object *obj) cpu->isar.id_isar6 = 0x0; cpu->mp_is_up = true; cpu->pmsav7_dregion = 16; + cpu->isar.reset_pmcr_el0 = 0x41151800; define_arm_cp_regs(cpu, cortexr5_cp_reginfo); } +static const ARMCPRegInfo cortex_r52_cp_reginfo[] = { + { .name = "CPUACTLR", .cp = 15, .opc1 = 0, .crm = 15, + .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 }, + { .name = "IMP_ATCMREGIONR", + .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_BTCMREGIONR", + .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_CTCMREGIONR", + .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_CSCTLR", + .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_BPCTLR", + .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_MEMPROTCLR", + .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_SLAVEPCTLR", + .cp = 15, .opc1 = 0, .crn = 11, .crm = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_PERIPHREGIONR", + .cp = 15, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_FLASHIFREGIONR", + .cp = 15, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_BUILDOPTR", + .cp = 15, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_PINOPTR", + .cp = 15, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_QOSR", + .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_BUSTIMEOUTR", + .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_INTMONR", + .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 4, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_ICERR0", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_ICERR1", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 0, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_DCERR0", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_DCERR1", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_TCMERR0", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_TCMERR1", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_TCMSYNDR0", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_TCMSYNDR1", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_FLASHERR0", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 3, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_FLASHERR1", + .cp = 15, .opc1 = 2, .crn = 15, .crm = 3, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_CDBGDR0", + .cp = 15, .opc1 = 3, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_CBDGBR1", + .cp = 15, .opc1 = 3, .crn = 15, .crm = 0, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_TESTR0", + .cp = 15, .opc1 = 4, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IMP_TESTR1", + .cp = 15, .opc1 = 4, .crn = 15, .crm = 0, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 }, + { .name = "IMP_CDBGDCI", + .cp = 15, .opc1 = 0, .crn = 15, .crm = 15, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 }, + { .name = "IMP_CDBGDCT", + .cp = 15, .opc1 = 3, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 }, + { .name = "IMP_CDBGICT", + .cp = 15, .opc1 = 3, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 }, + { .name = "IMP_CDBGDCD", + .cp = 15, .opc1 = 3, .crn = 15, .crm = 4, .opc2 = 0, + .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 }, + { .name = "IMP_CDBGICD", + .cp = 15, .opc1 = 3, .crn = 15, .crm = 4, .opc2 = 1, + .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 }, +}; + + +static void cortex_r52_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_PMSA); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_AUXCR); + cpu->midr = 0x411fd133; /* r1p3 */ + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034023; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + cpu->ctr = 0x8144c004; + cpu->reset_sctlr = 0x30c50838; + cpu->isar.id_pfr0 = 0x00000131; + cpu->isar.id_pfr1 = 0x10111001; + cpu->isar.id_dfr0 = 0x03010006; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x00211040; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01200000; + cpu->isar.id_mmfr3 = 0xf0102211; + cpu->isar.id_mmfr4 = 0x00000010; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232142; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x00010001; + cpu->isar.dbgdidr = 0x77168000; + cpu->clidr = (1 << 27) | (1 << 24) | 0x3; + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ + + cpu->pmsav7_dregion = 16; + cpu->pmsav8r_hdregion = 16; + + define_arm_cp_regs(cpu, cortex_r52_cp_reginfo); +} + static void cortex_r5f_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -894,38 +909,6 @@ static void pxa270c5_initfn(Object *obj) cpu->reset_sctlr = 0x00000078; } -#ifdef CONFIG_TCG -static const struct TCGCPUOps arm_v7m_tcg_ops = { - .initialize = arm_translate_init, - .synchronize_from_tb = arm_cpu_synchronize_from_tb, - .tlb_fill = arm_cpu_tlb_fill, - .debug_excp_handler = arm_debug_excp_handler, - -#if !defined(CONFIG_USER_ONLY) - .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt, - .do_interrupt = arm_v7m_cpu_do_interrupt, - .do_transaction_failed = arm_cpu_do_transaction_failed, - .do_unaligned_access = arm_cpu_do_unaligned_access, - .adjust_watchpoint_address = arm_adjust_watchpoint_address, - .debug_check_watchpoint = arm_debug_check_watchpoint, - .debug_check_breakpoint = arm_debug_check_breakpoint, -#endif /* !CONFIG_USER_ONLY */ -}; -#endif /* CONFIG_TCG */ - -static void arm_v7m_class_init(ObjectClass *oc, void *data) -{ - ARMCPUClass *acc = ARM_CPU_CLASS(oc); - CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -#ifdef CONFIG_TCG - cc->tcg_ops = &arm_v7m_tcg_ops; -#endif /* CONFIG_TCG */ - - cc->gdb_core_xml_file = "arm-m-profile.xml"; -} - #ifndef TARGET_AARCH64 /* * -cpu max: a CPU with as many features enabled as our emulation supports. @@ -936,70 +919,58 @@ static void arm_max_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); - cortex_a15_initfn(obj); + /* aarch64_a57_initfn, advertising none of the aarch64 features */ + cpu->dtb_compatible = "arm,cortex-a57"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + cpu->midr = 0x411fd070; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034070; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50838; + cpu->isar.id_pfr0 = 0x00000131; + cpu->isar.id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x10101105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.dbgdidr = 0x3516d000; + cpu->isar.dbgdevid = 0x00110f13; + cpu->isar.dbgdevid1 = 0x2; + cpu->isar.reset_pmcr_el0 = 0x41013000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ + cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */ + define_cortex_a72_a57_a53_cp_reginfo(cpu); - /* old-style VFP short-vector support */ - cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); + aa32_max_features(cpu); #ifdef CONFIG_USER_ONLY /* - * We don't set these in system emulation mode for the moment, - * since we don't correctly set (all of) the ID registers to - * advertise them. + * Break with true ARMv8 and add back old-style VFP short-vector support. + * Only do this for user-mode, where -cpu max is the default, so that + * older v6 and v7 programs are more likely to work without adjustment. */ - set_feature(&cpu->env, ARM_FEATURE_V8); - { - uint32_t t; - - t = cpu->isar.id_isar5; - t = FIELD_DP32(t, ID_ISAR5, AES, 2); - t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); - t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); - t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); - t = FIELD_DP32(t, ID_ISAR5, RDM, 1); - t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); - cpu->isar.id_isar5 = t; - - t = cpu->isar.id_isar6; - t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); - t = FIELD_DP32(t, ID_ISAR6, DP, 1); - t = FIELD_DP32(t, ID_ISAR6, FHM, 1); - t = FIELD_DP32(t, ID_ISAR6, SB, 1); - t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); - t = FIELD_DP32(t, ID_ISAR6, BF16, 1); - t = FIELD_DP32(t, ID_ISAR6, I8MM, 1); - cpu->isar.id_isar6 = t; - - t = cpu->isar.mvfr1; - t = FIELD_DP32(t, MVFR1, FPHP, 3); /* v8.2-FP16 */ - t = FIELD_DP32(t, MVFR1, SIMDHP, 2); /* v8.2-FP16 */ - cpu->isar.mvfr1 = t; - - t = cpu->isar.mvfr2; - t = FIELD_DP32(t, MVFR2, SIMDMISC, 3); /* SIMD MaxNum */ - t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ - cpu->isar.mvfr2 = t; - - t = cpu->isar.id_mmfr3; - t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */ - cpu->isar.id_mmfr3 = t; - - t = cpu->isar.id_mmfr4; - t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ - t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ - t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */ - t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* TTS2UXN */ - cpu->isar.id_mmfr4 = t; - - t = cpu->isar.id_pfr0; - t = FIELD_DP32(t, ID_PFR0, DIT, 1); - cpu->isar.id_pfr0 = t; - - t = cpu->isar.id_pfr2; - t = FIELD_DP32(t, ID_PFR2, SSBS, 1); - cpu->isar.id_pfr2 = t; - } -#endif /* CONFIG_USER_ONLY */ + cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); +#endif } #endif /* !TARGET_AARCH64 */ @@ -1020,20 +991,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, { .name = "cortex-a15", .initfn = cortex_a15_initfn }, - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m4", .initfn = cortex_m4_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m7", .initfn = cortex_m7_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m33", .initfn = cortex_m33_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m55", .initfn = cortex_m55_initfn, - .class_init = arm_v7m_class_init }, { .name = "cortex-r5", .initfn = cortex_r5_initfn }, { .name = "cortex-r5f", .initfn = cortex_r5f_initfn }, + { .name = "cortex-r52", .initfn = cortex_r52_initfn }, { .name = "ti925t", .initfn = ti925t_initfn }, { .name = "sa1100", .initfn = sa1100_initfn }, { .name = "sa1110", .initfn = sa1110_initfn }, diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c new file mode 100644 index 0000000000..9f7a9f3d2c --- /dev/null +++ b/target/arm/tcg/cpu64.c @@ -0,0 +1,1295 @@ +/* + * QEMU AArch64 TCG CPUs + * + * Copyright (c) 2013 Linaro Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see + * <http://www.gnu.org/licenses/gpl-2.0.html> + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "cpu.h" +#include "qemu/module.h" +#include "qapi/visitor.h" +#include "hw/qdev-properties.h" +#include "qemu/units.h" +#include "internals.h" +#include "cpu-features.h" +#include "cpregs.h" + +static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize, + unsigned cachesize) +{ + unsigned lg_linesize = ctz32(linesize); + unsigned sets; + + /* + * The 64-bit CCSIDR_EL1 format is: + * [55:32] number of sets - 1 + * [23:3] associativity - 1 + * [2:0] log2(linesize) - 4 + * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc + */ + assert(assoc != 0); + assert(is_power_of_2(linesize)); + assert(lg_linesize >= 4 && lg_linesize <= 7 + 4); + + /* sets * associativity * linesize == cachesize. */ + sets = cachesize / (assoc * linesize); + assert(cachesize % (assoc * linesize) == 0); + + return ((uint64_t)(sets - 1) << 32) + | ((assoc - 1) << 3) + | (lg_linesize - 4); +} + +static void aarch64_a35_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a35"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* From B2.2 AArch64 identification registers. */ + cpu->midr = 0x411fd040; + cpu->revidr = 0; + cpu->ctr = 0x84448004; + cpu->isar.id_pfr0 = 0x00000131; + cpu->isar.id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_aa64pfr0 = 0x00002222; + cpu->isar.id_aa64pfr1 = 0; + cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64dfr1 = 0; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64isar1 = 0; + cpu->isar.id_aa64mmfr0 = 0x00101122; + cpu->isar.id_aa64mmfr1 = 0; + cpu->clidr = 0x0a200023; + cpu->dcz_blocksize = 4; + + /* From B2.4 AArch64 Virtual Memory control registers */ + cpu->reset_sctlr = 0x00c50838; + + /* From B2.10 AArch64 performance monitor registers */ + cpu->isar.reset_pmcr_el0 = 0x410a3000; + + /* From B2.29 Cache ID registers */ + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ + cpu->ccsidr[2] = 0x703fe03a; /* 512KB L2 cache */ + + /* From B3.5 VGIC Type register */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* From C6.4 Debug ID Register */ + cpu->isar.dbgdidr = 0x3516d000; + /* From C6.5 Debug Device ID Register */ + cpu->isar.dbgdevid = 0x00110f13; + /* From C6.6 Debug Device ID Register 1 */ + cpu->isar.dbgdevid1 = 0x2; + + /* From Cortex-A35 SIMD and Floating-point Support r1p0 */ + /* From 3.2 AArch32 register summary */ + cpu->reset_fpsid = 0x41034043; + + /* From 2.2 AArch64 register summary */ + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + + /* These values are the same with A53/A57/A72. */ + define_cortex_a72_a57_a53_cp_reginfo(cpu); +} + +static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint32_t value; + + /* All vector lengths are disabled when SVE is off. */ + if (!cpu_isar_feature(aa64_sve, cpu)) { + value = 0; + } else { + value = cpu->sve_max_vq; + } + visit_type_uint32(v, name, &value, errp); +} + +static void cpu_max_set_sve_max_vq(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint32_t max_vq; + + if (!visit_type_uint32(v, name, &max_vq, errp)) { + return; + } + + if (max_vq == 0 || max_vq > ARM_MAX_VQ) { + error_setg(errp, "unsupported SVE vector length"); + error_append_hint(errp, "Valid sve-max-vq in range [1-%d]\n", + ARM_MAX_VQ); + return; + } + + cpu->sve_max_vq = max_vq; +} + +static bool cpu_arm_get_rme(Object *obj, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + return cpu_isar_feature(aa64_rme, cpu); +} + +static void cpu_arm_set_rme(Object *obj, bool value, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint64_t t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, RME, value); + cpu->isar.id_aa64pfr0 = t; +} + +static void cpu_max_set_l0gptsz(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint32_t value; + + if (!visit_type_uint32(v, name, &value, errp)) { + return; + } + + /* Encode the value for the GPCCR_EL3 field. */ + switch (value) { + case 30: + case 34: + case 36: + case 39: + cpu->reset_l0gptsz = value - 30; + break; + default: + error_setg(errp, "invalid value for l0gptsz"); + error_append_hint(errp, "valid values are 30, 34, 36, 39\n"); + break; + } +} + +static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint32_t value = cpu->reset_l0gptsz + 30; + + visit_type_uint32(v, name, &value, errp); +} + +static Property arm_cpu_lpa2_property = + DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true); + +static void aarch64_a55_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a55"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by B2.4 AArch64 registers by functional group */ + cpu->clidr = 0x82000023; + cpu->ctr = 0x84448004; /* L1Ip = VIPT */ + cpu->dcz_blocksize = 4; /* 64 bytes */ + cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; + cpu->isar.id_aa64isar0 = 0x0000100010211120ull; + cpu->isar.id_aa64isar1 = 0x0000000000100001ull; + cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; + cpu->isar.id_aa64pfr0 = 0x0000000010112222ull; + cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_dfr0 = 0x04010088; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x01011121; + cpu->isar.id_isar6 = 0x00000010; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_mmfr4 = 0x00021110; + cpu->isar.id_pfr0 = 0x10010131; + cpu->isar.id_pfr1 = 0x00011011; + cpu->isar.id_pfr2 = 0x00000011; + cpu->midr = 0x412FD050; /* r2p0 */ + cpu->revidr = 0; + + /* From B2.23 CCSIDR_EL1 */ + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x200fe01a; /* 32KB L1 icache */ + cpu->ccsidr[2] = 0x703fe07a; /* 512KB L2 cache */ + + /* From B2.96 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From B4.45 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From D5.4 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x410b3000; +} + +static void aarch64_a72_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a72"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + cpu->midr = 0x410fd083; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034080; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x12111111; + cpu->isar.mvfr2 = 0x00000043; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50838; + cpu->isar.id_pfr0 = 0x00000131; + cpu->isar.id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_aa64pfr0 = 0x00002222; + cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001124; + cpu->isar.dbgdidr = 0x3516d000; + cpu->isar.dbgdevid = 0x01110f13; + cpu->isar.dbgdevid1 = 0x2; + cpu->isar.reset_pmcr_el0 = 0x41023000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ + cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */ + cpu->dcz_blocksize = 4; /* 64 bytes */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + define_cortex_a72_a57_a53_cp_reginfo(cpu); +} + +static void aarch64_a76_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a76"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by B2.4 AArch64 registers by functional group */ + cpu->clidr = 0x82000023; + cpu->ctr = 0x8444C004; + cpu->dcz_blocksize = 4; + cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; + cpu->isar.id_aa64isar0 = 0x0000100010211120ull; + cpu->isar.id_aa64isar1 = 0x0000000000100001ull; + cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; + cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */ + cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_dfr0 = 0x04010088; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x01011121; + cpu->isar.id_isar6 = 0x00000010; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_mmfr4 = 0x00021110; + cpu->isar.id_pfr0 = 0x10010131; + cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ + cpu->isar.id_pfr2 = 0x00000011; + cpu->midr = 0x414fd0b1; /* r4p1 */ + cpu->revidr = 0; + + /* From B2.18 CCSIDR_EL1 */ + cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */ + cpu->ccsidr[2] = 0x707fe03a; /* 512KB L2 cache */ + + /* From B2.93 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From B4.23 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* From B5.1 AdvSIMD AArch64 register summary */ + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From D5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x410b3000; +} + +static void aarch64_a64fx_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,a64fx"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + cpu->midr = 0x461f0010; + cpu->revidr = 0x00000000; + cpu->ctr = 0x86668006; + cpu->reset_sctlr = 0x30000180; + cpu->isar.id_aa64pfr0 = 0x0000000101111111; /* No RAS Extensions */ + cpu->isar.id_aa64pfr1 = 0x0000000000000000; + cpu->isar.id_aa64dfr0 = 0x0000000010305408; + cpu->isar.id_aa64dfr1 = 0x0000000000000000; + cpu->id_aa64afr0 = 0x0000000000000000; + cpu->id_aa64afr1 = 0x0000000000000000; + cpu->isar.id_aa64mmfr0 = 0x0000000000001122; + cpu->isar.id_aa64mmfr1 = 0x0000000011212100; + cpu->isar.id_aa64mmfr2 = 0x0000000000001011; + cpu->isar.id_aa64isar0 = 0x0000000010211120; + cpu->isar.id_aa64isar1 = 0x0000000000010001; + cpu->isar.id_aa64zfr0 = 0x0000000000000000; + cpu->clidr = 0x0000000080000023; + cpu->ccsidr[0] = 0x7007e01c; /* 64KB L1 dcache */ + cpu->ccsidr[1] = 0x2007e01c; /* 64KB L1 icache */ + cpu->ccsidr[2] = 0x70ffe07c; /* 8MB L2 cache */ + cpu->dcz_blocksize = 6; /* 256 bytes */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* The A64FX supports only 128, 256 and 512 bit vector lengths */ + aarch64_add_sve_properties(obj); + cpu->sve_vq.supported = (1 << 0) /* 128bit */ + | (1 << 1) /* 256bit */ + | (1 << 3); /* 512bit */ + + cpu->isar.reset_pmcr_el0 = 0x46014040; + + /* TODO: Add A64FX specific HPC extension registers */ +} + +static CPAccessResult access_actlr_w(CPUARMState *env, const ARMCPRegInfo *r, + bool read) +{ + if (!read) { + int el = arm_current_el(env); + + /* Because ACTLR_EL2 is constant 0, writes below EL2 trap to EL2. */ + if (el < 2 && arm_is_el2_enabled(env)) { + return CP_ACCESS_TRAP_EL2; + } + /* Because ACTLR_EL3 is constant 0, writes below EL3 trap to EL3. */ + if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) { + return CP_ACCESS_TRAP_EL3; + } + } + return CP_ACCESS_OK; +} + +static const ARMCPRegInfo neoverse_n1_cp_reginfo[] = { + { .name = "ATCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + /* Traps and enables are the same as for TCR_EL1. */ + .accessfn = access_tvm_trvm, .fgt = FGT_TCR_EL1, }, + { .name = "ATCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "ATCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "ATCR_EL12", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 5, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "AVTCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR3_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + /* + * Report CPUCFR_EL1.SCU as 1, as we do not implement the DSU + * (and in particular its system registers). + */ + { .name = "CPUCFR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 4 }, + { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 4, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0x961563010, + .accessfn = access_actlr_w }, + { .name = "CPUPCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPMR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 3, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPOR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 2, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPSELR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPWRCTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "ERXPFGCDN_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "ERXPFGCTL_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "ERXPFGF_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, +}; + +static void define_neoverse_n1_cp_reginfo(ARMCPU *cpu) +{ + define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo); +} + +static const ARMCPRegInfo neoverse_v1_cp_reginfo[] = { + { .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR3_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, +}; + +static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu) +{ + /* + * The Neoverse V1 has all of the Neoverse N1's IMPDEF + * registers and a few more of its own. + */ + define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo); + define_arm_cp_regs(cpu, neoverse_v1_cp_reginfo); +} + +static void aarch64_neoverse_n1_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,neoverse-n1"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by B2.4 AArch64 registers by functional group */ + cpu->clidr = 0x82000023; + cpu->ctr = 0x8444c004; + cpu->dcz_blocksize = 4; + cpu->isar.id_aa64dfr0 = 0x0000000110305408ull; + cpu->isar.id_aa64isar0 = 0x0000100010211120ull; + cpu->isar.id_aa64isar1 = 0x0000000000100001ull; + cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; + cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */ + cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_dfr0 = 0x04010088; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x01011121; + cpu->isar.id_isar6 = 0x00000010; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_mmfr4 = 0x00021110; + cpu->isar.id_pfr0 = 0x10010131; + cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ + cpu->isar.id_pfr2 = 0x00000011; + cpu->midr = 0x414fd0c1; /* r4p1 */ + cpu->revidr = 0; + + /* From B2.23 CCSIDR_EL1 */ + cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */ + cpu->ccsidr[2] = 0x70ffe03a; /* 1MB L2 cache */ + + /* From B2.98 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From B4.23 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* From B5.1 AdvSIMD AArch64 register summary */ + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From D5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x410c3000; + + define_neoverse_n1_cp_reginfo(cpu); +} + +static void aarch64_neoverse_v1_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,neoverse-v1"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by 3.2.4 AArch64 registers by functional group */ + cpu->clidr = 0x82000023; + cpu->ctr = 0xb444c004; /* With DIC and IDC set */ + cpu->dcz_blocksize = 4; + cpu->id_aa64afr0 = 0x00000000; + cpu->id_aa64afr1 = 0x00000000; + cpu->isar.id_aa64dfr0 = 0x000001f210305519ull; + cpu->isar.id_aa64dfr1 = 0x00000000; + cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */ + cpu->isar.id_aa64isar1 = 0x0111000001211032ull; + cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull; + cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */ + cpu->isar.id_aa64pfr1 = 0x0000000000000020ull; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_dfr0 = 0x15011099; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x11011121; + cpu->isar.id_isar6 = 0x01100111; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_mmfr4 = 0x01021110; + cpu->isar.id_pfr0 = 0x21110131; + cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ + cpu->isar.id_pfr2 = 0x00000011; + cpu->midr = 0x411FD402; /* r1p2 */ + cpu->revidr = 0; + + /* + * The Neoverse-V1 r1p2 TRM lists 32-bit format CCSIDR_EL1 values, + * but also says it implements CCIDX, which means they should be + * 64-bit format. So we here use values which are based on the textual + * information in chapter 2 of the TRM: + * + * L1: 4-way set associative 64-byte line size, total size 64K. + * L2: 8-way set associative, 64 byte line size, either 512K or 1MB. + * L3: No L3 (this matches the CLIDR_EL1 value). + */ + cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */ + cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */ + cpu->ccsidr[2] = make_ccsidr64(8, 64, 1 * MiB); /* L2 cache */ + + /* From 3.2.115 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From 3.4.8 ICC_CTLR_EL3 and 3.4.23 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* From 3.5.1 AdvSIMD AArch64 register summary */ + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From 3.7.5 ID_AA64ZFR0_EL1 */ + cpu->isar.id_aa64zfr0 = 0x0000100000100000; + cpu->sve_vq.supported = (1 << 0) /* 128bit */ + | (1 << 1); /* 256bit */ + + /* From 5.5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x41213000; + + define_neoverse_v1_cp_reginfo(cpu); + + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); +} + +static const ARMCPRegInfo cortex_a710_cp_reginfo[] = { + { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR3_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR4_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 4, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 4, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPWRCTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "ATCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR5_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR6_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR7_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "ATCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "AVTCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR4_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 4, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR5_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 5, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR6_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 4, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "ATCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPSELR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPOR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 2, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPMR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 3, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPOR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 4, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPMR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 5, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPFR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 6, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + /* + * Report CPUCFR_EL1.SCU as 1, as we do not implement the DSU + * (and in particular its system registers). + */ + { .name = "CPUCFR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 4 }, + + /* + * Stub RAMINDEX, as we don't actually implement caches, BTB, + * or anything else with cpu internal memory. + * "Read" zeros into the IDATA* and DDATA* output registers. + */ + { .name = "RAMINDEX_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL3_W, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IDATA0_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IDATA1_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 1, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IDATA2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 2, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DDATA0_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 0, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DDATA1_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 1, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DDATA2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 2, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, +}; + +static void aarch64_a710_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a710"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by Section B.4: AArch64 registers */ + cpu->midr = 0x412FD471; /* r2p1 */ + cpu->revidr = 0; + cpu->isar.id_pfr0 = 0x21110131; + cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ + cpu->isar.id_dfr0 = 0x16011099; + cpu->id_afr0 = 0; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ + cpu->isar.id_mmfr4 = 0x21021110; + cpu->isar.id_isar6 = 0x01111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + cpu->isar.id_pfr2 = 0x00000011; + cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ + cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; + cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ + cpu->isar.id_aa64dfr0 = 0x000011f010305619ull; + cpu->isar.id_aa64dfr1 = 0; + cpu->id_aa64afr0 = 0; + cpu->id_aa64afr1 = 0; + cpu->isar.id_aa64isar0 = 0x0221111110212120ull; /* with Crypto */ + cpu->isar.id_aa64isar1 = 0x0010111101211052ull; + cpu->isar.id_aa64mmfr0 = 0x0000022200101122ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x1221011110101011ull; + cpu->clidr = 0x0000001482000023ull; + cpu->gm_blocksize = 4; + cpu->ctr = 0x000000049444c004ull; + cpu->dcz_blocksize = 4; + /* TODO FEAT_MPAM: mpamidr_el1 = 0x0000_0001_0006_003f */ + + /* Section B.5.2: PMCR_EL0 */ + cpu->isar.reset_pmcr_el0 = 0xa000; /* with 20 counters */ + + /* Section B.6.7: ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* Section 14: Scalable Vector Extensions support */ + cpu->sve_vq.supported = 1 << 0; /* 128bit */ + + /* + * The cortex-a710 TRM does not list CCSIDR values. The layout of + * the caches are in text in Table 7-1, Table 8-1, and Table 9-1. + * + * L1: 4-way set associative 64-byte line size, total either 32K or 64K. + * L2: 8-way set associative 64 byte line size, total either 256K or 512K. + */ + cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */ + cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */ + cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */ + + /* FIXME: Not documented -- copied from neoverse-v1 */ + cpu->reset_sctlr = 0x30c50838; + + define_arm_cp_regs(cpu, cortex_a710_cp_reginfo); + + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); +} + +/* Extra IMPDEF regs in the N2 beyond those in the A710 */ +static const ARMCPRegInfo neoverse_n2_cp_reginfo[] = { + { .name = "CPURNDBR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 3, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPURNDPEID_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 3, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, +}; + +static void aarch64_neoverse_n2_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,neoverse-n2"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by Section B.5: AArch64 ID registers */ + cpu->midr = 0x410FD493; /* r0p3 */ + cpu->revidr = 0; + cpu->isar.id_pfr0 = 0x21110131; + cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ + cpu->isar.id_dfr0 = 0x16011099; + cpu->id_afr0 = 0; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ + cpu->isar.id_mmfr4 = 0x01021110; + cpu->isar.id_isar6 = 0x01111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + cpu->isar.id_pfr2 = 0x00000011; + cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ + cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; + cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ + cpu->isar.id_aa64dfr0 = 0x000011f210305619ull; + cpu->isar.id_aa64dfr1 = 0; + cpu->id_aa64afr0 = 0; + cpu->id_aa64afr1 = 0; + cpu->isar.id_aa64isar0 = 0x1221111110212120ull; /* with Crypto and FEAT_RNG */ + cpu->isar.id_aa64isar1 = 0x0011111101211052ull; + cpu->isar.id_aa64mmfr0 = 0x0000022200101125ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x1221011112101011ull; + cpu->clidr = 0x0000001482000023ull; + cpu->gm_blocksize = 4; + cpu->ctr = 0x00000004b444c004ull; + cpu->dcz_blocksize = 4; + /* TODO FEAT_MPAM: mpamidr_el1 = 0x0000_0001_001e_01ff */ + + /* Section B.7.2: PMCR_EL0 */ + cpu->isar.reset_pmcr_el0 = 0x3000; /* with 6 counters */ + + /* Section B.8.9: ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* Section 14: Scalable Vector Extensions support */ + cpu->sve_vq.supported = 1 << 0; /* 128bit */ + + /* + * The Neoverse N2 TRM does not list CCSIDR values. The layout of + * the caches are in text in Table 7-1, Table 8-1, and Table 9-1. + * + * L1: 4-way set associative 64-byte line size, total 64K. + * L2: 8-way set associative 64 byte line size, total either 512K or 1024K. + */ + cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */ + cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */ + cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */ + + /* FIXME: Not documented -- copied from neoverse-v1 */ + cpu->reset_sctlr = 0x30c50838; + + /* + * The Neoverse N2 has all of the Cortex-A710 IMPDEF registers, + * and a few more RNG related ones. + */ + define_arm_cp_regs(cpu, cortex_a710_cp_reginfo); + define_arm_cp_regs(cpu, neoverse_n2_cp_reginfo); + + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); +} + +/* + * -cpu max: a CPU with as many features enabled as our emulation supports. + * The version of '-cpu max' for qemu-system-arm is defined in cpu32.c; + * this only needs to handle 64 bits. + */ +void aarch64_max_tcg_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + uint64_t t; + uint32_t u; + + /* + * Reset MIDR so the guest doesn't mistake our 'max' CPU type for a real + * one and try to apply errata workarounds or use impdef features we + * don't provide. + * An IMPLEMENTER field of 0 means "reserved for software use"; + * ARCHITECTURE must be 0xf indicating "v7 or later, check ID registers + * to see which features are present"; + * the VARIANT, PARTNUM and REVISION fields are all implementation + * defined and we choose to define PARTNUM just in case guest + * code needs to distinguish this QEMU CPU from other software + * implementations, though this shouldn't be needed. + */ + t = FIELD_DP64(0, MIDR_EL1, IMPLEMENTER, 0); + t = FIELD_DP64(t, MIDR_EL1, ARCHITECTURE, 0xf); + t = FIELD_DP64(t, MIDR_EL1, PARTNUM, 'Q'); + t = FIELD_DP64(t, MIDR_EL1, VARIANT, 0); + t = FIELD_DP64(t, MIDR_EL1, REVISION, 0); + cpu->midr = t; + + /* + * We're going to set FEAT_S2FWB, which mandates that CLIDR_EL1.{LoUU,LoUIS} + * are zero. + */ + u = cpu->clidr; + u = FIELD_DP32(u, CLIDR_EL1, LOUIS, 0); + u = FIELD_DP32(u, CLIDR_EL1, LOUU, 0); + cpu->clidr = u; + + /* + * Set CTR_EL0.DIC and IDC to tell the guest it doesnt' need to + * do any cache maintenance for data-to-instruction or + * instruction-to-guest coherence. (Our cache ops are nops.) + */ + t = cpu->ctr; + t = FIELD_DP64(t, CTR_EL0, IDC, 1); + t = FIELD_DP64(t, CTR_EL0, DIC, 1); + cpu->ctr = t; + + t = cpu->isar.id_aa64isar0; + t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* FEAT_PMULL */ + t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); /* FEAT_SHA1 */ + t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* FEAT_SHA512 */ + t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); /* FEAT_CRC32 */ + t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); /* FEAT_LSE */ + t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); /* FEAT_RDM */ + t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); /* FEAT_SHA3 */ + t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); /* FEAT_SM3 */ + t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1); /* FEAT_SM4 */ + t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1); /* FEAT_DotProd */ + t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 1); /* FEAT_FHM */ + t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* FEAT_FlagM2 */ + t = FIELD_DP64(t, ID_AA64ISAR0, TLB, 2); /* FEAT_TLBIRANGE */ + t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); /* FEAT_RNG */ + cpu->isar.id_aa64isar0 = t; + + t = cpu->isar.id_aa64isar1; + t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2); /* FEAT_DPB2 */ + t = FIELD_DP64(t, ID_AA64ISAR1, APA, PauthFeat_FPACCOMBINED); + t = FIELD_DP64(t, ID_AA64ISAR1, API, 1); + t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1); /* FEAT_JSCVT */ + t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); /* FEAT_FCMA */ + t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 2); /* FEAT_LRCPC2 */ + t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); /* FEAT_FRINTTS */ + t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); /* FEAT_SB */ + t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); /* FEAT_SPECRES */ + t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1); /* FEAT_BF16 */ + t = FIELD_DP64(t, ID_AA64ISAR1, DGH, 1); /* FEAT_DGH */ + t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */ + cpu->isar.id_aa64isar1 = t; + + t = cpu->isar.id_aa64isar2; + t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ + t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ + cpu->isar.id_aa64isar2 = t; + + t = cpu->isar.id_aa64pfr0; + t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */ + t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */ + t = FIELD_DP64(t, ID_AA64PFR0, RAS, 2); /* FEAT_RASv1p1 + FEAT_DoubleFault */ + t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); + t = FIELD_DP64(t, ID_AA64PFR0, SEL2, 1); /* FEAT_SEL2 */ + t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */ + t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 2); /* FEAT_CSV2_2 */ + t = FIELD_DP64(t, ID_AA64PFR0, CSV3, 1); /* FEAT_CSV3 */ + cpu->isar.id_aa64pfr0 = t; + + t = cpu->isar.id_aa64pfr1; + t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); /* FEAT_BTI */ + t = FIELD_DP64(t, ID_AA64PFR1, SSBS, 2); /* FEAT_SSBS2 */ + /* + * Begin with full support for MTE. This will be downgraded to MTE=0 + * during realize if the board provides no tag memory, much like + * we do for EL2 with the virtualization=on property. + */ + t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */ + t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */ + t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */ + cpu->isar.id_aa64pfr1 = t; + + t = cpu->isar.id_aa64mmfr0; + t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 6); /* FEAT_LPA: 52 bits */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 1); /* 16k pages supported */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 2); /* 16k stage2 supported */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN64_2, 2); /* 64k stage2 supported */ + t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 2); /* 4k stage2 supported */ + t = FIELD_DP64(t, ID_AA64MMFR0, FGT, 1); /* FEAT_FGT */ + t = FIELD_DP64(t, ID_AA64MMFR0, ECV, 2); /* FEAT_ECV */ + cpu->isar.id_aa64mmfr0 = t; + + t = cpu->isar.id_aa64mmfr1; + t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */ + t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */ + t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */ + t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 2); /* FEAT_HPDS2 */ + t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1); /* FEAT_LOR */ + t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 3); /* FEAT_PAN3 */ + t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ + t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 1); /* FEAT_ETS */ + t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ + t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ + cpu->isar.id_aa64mmfr1 = t; + + t = cpu->isar.id_aa64mmfr2; + t = FIELD_DP64(t, ID_AA64MMFR2, CNP, 1); /* FEAT_TTCNP */ + t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1); /* FEAT_UAO */ + t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */ + t = FIELD_DP64(t, ID_AA64MMFR2, VARANGE, 1); /* FEAT_LVA */ + t = FIELD_DP64(t, ID_AA64MMFR2, NV, 2); /* FEAT_NV2 */ + t = FIELD_DP64(t, ID_AA64MMFR2, ST, 1); /* FEAT_TTST */ + t = FIELD_DP64(t, ID_AA64MMFR2, AT, 1); /* FEAT_LSE2 */ + t = FIELD_DP64(t, ID_AA64MMFR2, IDS, 1); /* FEAT_IDST */ + t = FIELD_DP64(t, ID_AA64MMFR2, FWB, 1); /* FEAT_S2FWB */ + t = FIELD_DP64(t, ID_AA64MMFR2, TTL, 1); /* FEAT_TTL */ + t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */ + t = FIELD_DP64(t, ID_AA64MMFR2, EVT, 2); /* FEAT_EVT */ + t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */ + cpu->isar.id_aa64mmfr2 = t; + + t = cpu->isar.id_aa64zfr0; + t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1); + t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */ + t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */ + t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1); /* FEAT_BF16 */ + t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */ + t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */ + t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */ + t = FIELD_DP64(t, ID_AA64ZFR0, F32MM, 1); /* FEAT_F32MM */ + t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 1); /* FEAT_F64MM */ + cpu->isar.id_aa64zfr0 = t; + + t = cpu->isar.id_aa64dfr0; + t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 9); /* FEAT_Debugv8p4 */ + t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 6); /* FEAT_PMUv3p5 */ + t = FIELD_DP64(t, ID_AA64DFR0, HPMN0, 1); /* FEAT_HPMN0 */ + cpu->isar.id_aa64dfr0 = t; + + t = cpu->isar.id_aa64smfr0; + t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */ + t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */ + t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */ + t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */ + cpu->isar.id_aa64smfr0 = t; + + /* Replicate the same data to the 32-bit id registers. */ + aa32_max_features(cpu); + +#ifdef CONFIG_USER_ONLY + /* + * For usermode -cpu max we can use a larger and more efficient DCZ + * blocksize since we don't have to follow what the hardware does. + */ + cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */ + cpu->dcz_blocksize = 7; /* 512 bytes */ +#endif + cpu->gm_blocksize = 6; /* 256 bytes */ + + cpu->sve_vq.supported = MAKE_64BIT_MASK(0, ARM_MAX_VQ); + cpu->sme_vq.supported = SVE_VQ_POW2_MAP; + + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); + aarch64_add_sme_properties(obj); + object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq, + cpu_max_set_sve_max_vq, NULL, NULL); + object_property_add_bool(obj, "x-rme", cpu_arm_get_rme, cpu_arm_set_rme); + object_property_add(obj, "x-l0gptsz", "uint32", cpu_max_get_l0gptsz, + cpu_max_set_l0gptsz, NULL, NULL); + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); +} + +static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, + { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, + { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, + { .name = "cortex-a710", .initfn = aarch64_a710_initfn }, + { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, + { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, + { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, + { .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn }, +}; + +static void aarch64_cpu_register_types(void) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { + aarch64_cpu_register(&aarch64_cpus[i]); + } +} + +type_init(aarch64_cpu_register_types) diff --git a/target/arm/crypto_helper.c b/target/arm/tcg/crypto_helper.c index 28a84c2dbd..7cadd61e12 100644 --- a/target/arm/crypto_helper.c +++ b/target/arm/tcg/crypto_helper.c @@ -14,7 +14,8 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" -#include "crypto/aes.h" +#include "crypto/aes-round.h" +#include "crypto/sm4.h" #include "vec_internal.h" union CRYPTO_STATE { @@ -23,7 +24,7 @@ union CRYPTO_STATE { uint64_t l[2]; }; -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN #define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8]) #define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2]) #else @@ -44,197 +45,104 @@ static void clear_tail_16(void *vd, uint32_t desc) clear_tail(vd, opr_sz, max_sz); } -static void do_crypto_aese(uint64_t *rd, uint64_t *rn, - uint64_t *rm, bool decrypt) -{ - static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; - static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; - union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; - union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; - int i; +static const AESState aes_zero = { }; - /* xor state vector with round key */ - rk.l[0] ^= st.l[0]; - rk.l[1] ^= st.l[1]; +void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); - /* combine ShiftRows operation and sbox substitution */ - for (i = 0; i < 16; i++) { - CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])]; + for (i = 0; i < opr_sz; i += 16) { + AESState *ad = (AESState *)(vd + i); + AESState *st = (AESState *)(vn + i); + AESState *rk = (AESState *)(vm + i); + AESState t; + + /* + * Our uint64_t are in the wrong order for big-endian. + * The Arm AddRoundKey comes first, while the API AddRoundKey + * comes last: perform the xor here, and provide zero to API. + */ + if (HOST_BIG_ENDIAN) { + t.d[0] = st->d[1] ^ rk->d[1]; + t.d[1] = st->d[0] ^ rk->d[0]; + aesenc_SB_SR_AK(&t, &t, &aes_zero, false); + ad->d[0] = t.d[1]; + ad->d[1] = t.d[0]; + } else { + t.v = st->v ^ rk->v; + aesenc_SB_SR_AK(ad, &t, &aes_zero, false); + } } - - rd[0] = st.l[0]; - rd[1] = st.l[1]; + clear_tail(vd, opr_sz, simd_maxsz(desc)); } -void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) +void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc); - bool decrypt = simd_data(desc); for (i = 0; i < opr_sz; i += 16) { - do_crypto_aese(vd + i, vn + i, vm + i, decrypt); + AESState *ad = (AESState *)(vd + i); + AESState *st = (AESState *)(vn + i); + AESState *rk = (AESState *)(vm + i); + AESState t; + + /* Our uint64_t are in the wrong order for big-endian. */ + if (HOST_BIG_ENDIAN) { + t.d[0] = st->d[1] ^ rk->d[1]; + t.d[1] = st->d[0] ^ rk->d[0]; + aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false); + ad->d[0] = t.d[1]; + ad->d[1] = t.d[0]; + } else { + t.v = st->v ^ rk->v; + aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false); + } } clear_tail(vd, opr_sz, simd_maxsz(desc)); } -static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) -{ - static uint32_t const mc[][256] = { { - /* MixColumns lookup table */ - 0x00000000, 0x03010102, 0x06020204, 0x05030306, - 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e, - 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16, - 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e, - 0x30101020, 0x33111122, 0x36121224, 0x35131326, - 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e, - 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36, - 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e, - 0x60202040, 0x63212142, 0x66222244, 0x65232346, - 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e, - 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56, - 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e, - 0x50303060, 0x53313162, 0x56323264, 0x55333366, - 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e, - 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76, - 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e, - 0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386, - 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e, - 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96, - 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e, - 0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6, - 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae, - 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6, - 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe, - 0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6, - 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce, - 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6, - 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde, - 0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6, - 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee, - 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6, - 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe, - 0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d, - 0x97848413, 0x94858511, 0x91868617, 0x92878715, - 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d, - 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05, - 0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d, - 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735, - 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d, - 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25, - 0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d, - 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755, - 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d, - 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45, - 0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d, - 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775, - 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d, - 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65, - 0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d, - 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795, - 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d, - 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85, - 0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd, - 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5, - 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad, - 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5, - 0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd, - 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5, - 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd, - 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5, - 0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd, - 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5, - 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed, - 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5, - }, { - /* Inverse MixColumns lookup table */ - 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, - 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a, - 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362, - 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, - 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2, - 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca, - 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, - 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba, - 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9, - 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, - 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9, - 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81, - 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, - 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411, - 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859, - 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, - 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf, - 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987, - 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, - 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7, - 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f, - 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, - 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f, - 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117, - 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, - 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c, - 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14, - 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, - 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684, - 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc, - 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, - 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc, - 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753, - 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, - 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23, - 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b, - 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, - 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b, - 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3, - 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, - 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88, - 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0, - 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, - 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0, - 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68, - 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, - 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418, - 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020, - 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, - 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6, - 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e, - 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, - 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e, - 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526, - 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, - 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56, - 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25, - 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, - 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255, - 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d, - 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, - 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd, - 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, - 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, - } }; - - union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; - int i; +void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); - for (i = 0; i < 16; i += 4) { - CR_ST_WORD(st, i >> 2) = - mc[decrypt][CR_ST_BYTE(st, i)] ^ - rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^ - rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^ - rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24); + for (i = 0; i < opr_sz; i += 16) { + AESState *ad = (AESState *)(vd + i); + AESState *st = (AESState *)(vm + i); + AESState t; + + /* Our uint64_t are in the wrong order for big-endian. */ + if (HOST_BIG_ENDIAN) { + t.d[0] = st->d[1]; + t.d[1] = st->d[0]; + aesenc_MC(&t, &t, false); + ad->d[0] = t.d[1]; + ad->d[1] = t.d[0]; + } else { + aesenc_MC(ad, st, false); + } } - - rd[0] = st.l[0]; - rd[1] = st.l[1]; + clear_tail(vd, opr_sz, simd_maxsz(desc)); } -void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) +void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc); - bool decrypt = simd_data(desc); for (i = 0; i < opr_sz; i += 16) { - do_crypto_aesmc(vd + i, vm + i, decrypt); + AESState *ad = (AESState *)(vd + i); + AESState *st = (AESState *)(vm + i); + AESState t; + + /* Our uint64_t are in the wrong order for big-endian. */ + if (HOST_BIG_ENDIAN) { + t.d[0] = st->d[1]; + t.d[1] = st->d[0]; + aesdec_IMC(&t, &t, false); + ad->d[0] = t.d[1]; + ad->d[1] = t.d[0]; + } else { + aesdec_IMC(ad, st, false); + } } clear_tail(vd, opr_sz, simd_maxsz(desc)); } @@ -694,41 +602,6 @@ DO_SM3TT(crypto_sm3tt2b, 3) #undef DO_SM3TT -static uint8_t const sm4_sbox[] = { - 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, - 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, - 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, - 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, - 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, - 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, - 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, - 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, - 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, - 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, - 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, - 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, - 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, - 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, - 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, - 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, - 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, - 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, - 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, - 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, - 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, - 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, - 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, - 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, - 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, - 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, - 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, - 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, - 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, - 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, - 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, - 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, -}; - static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) { union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; @@ -741,10 +614,7 @@ static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) CR_ST_WORD(d, (i + 3) % 4) ^ CR_ST_WORD(n, i); - t = sm4_sbox[t & 0xff] | - sm4_sbox[(t >> 8) & 0xff] << 8 | - sm4_sbox[(t >> 16) & 0xff] << 16 | - sm4_sbox[(t >> 24) & 0xff] << 24; + t = sm4_subword(t); CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ rol32(t, 24); @@ -778,10 +648,7 @@ static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) CR_ST_WORD(d, (i + 3) % 4) ^ CR_ST_WORD(m, i); - t = sm4_sbox[t & 0xff] | - sm4_sbox[(t >> 8) & 0xff] << 8 | - sm4_sbox[(t >> 16) & 0xff] << 16 | - sm4_sbox[(t >> 24) & 0xff] << 24; + t = sm4_subword(t); CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); } diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c new file mode 100644 index 0000000000..ebaa7f00df --- /dev/null +++ b/target/arm/tcg/helper-a64.c @@ -0,0 +1,1857 @@ +/* + * AArch64 specific helpers + * + * Copyright (c) 2013 Alexander Graf <agraf@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "cpu.h" +#include "gdbstub/helpers.h" +#include "exec/helper-proto.h" +#include "qemu/host-utils.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "qemu/bitops.h" +#include "internals.h" +#include "qemu/crc32c.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "qemu/int128.h" +#include "qemu/atomic128.h" +#include "fpu/softfloat.h" +#include <zlib.h> /* For crc32 */ + +/* C2.4.7 Multiply and divide */ +/* special cases for 0 and LLONG_MIN are mandated by the standard */ +uint64_t HELPER(udiv64)(uint64_t num, uint64_t den) +{ + if (den == 0) { + return 0; + } + return num / den; +} + +int64_t HELPER(sdiv64)(int64_t num, int64_t den) +{ + if (den == 0) { + return 0; + } + if (num == LLONG_MIN && den == -1) { + return LLONG_MIN; + } + return num / den; +} + +uint64_t HELPER(rbit64)(uint64_t x) +{ + return revbit64(x); +} + +void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm) +{ + update_spsel(env, imm); +} + +static void daif_check(CPUARMState *env, uint32_t op, + uint32_t imm, uintptr_t ra) +{ + /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */ + if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) { + raise_exception_ra(env, EXCP_UDEF, + syn_aa64_sysregtrap(0, extract32(op, 0, 3), + extract32(op, 3, 3), 4, + imm, 0x1f, 0), + exception_target_el(env), ra); + } +} + +void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm) +{ + daif_check(env, 0x1e, imm, GETPC()); + env->daif |= (imm << 6) & PSTATE_DAIF; + arm_rebuild_hflags(env); +} + +void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm) +{ + daif_check(env, 0x1f, imm, GETPC()); + env->daif &= ~((imm << 6) & PSTATE_DAIF); + arm_rebuild_hflags(env); +} + +/* Convert a softfloat float_relation_ (as returned by + * the float*_compare functions) to the correct ARM + * NZCV flag state. + */ +static inline uint32_t float_rel_to_flags(int res) +{ + uint64_t flags; + switch (res) { + case float_relation_equal: + flags = PSTATE_Z | PSTATE_C; + break; + case float_relation_less: + flags = PSTATE_N; + break; + case float_relation_greater: + flags = PSTATE_C; + break; + case float_relation_unordered: + default: + flags = PSTATE_C | PSTATE_V; + break; + } + return flags; +} + +uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status) +{ + return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status) +{ + return float_rel_to_flags(float16_compare(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status) +{ + return float_rel_to_flags(float32_compare_quiet(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status) +{ + return float_rel_to_flags(float32_compare(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status) +{ + return float_rel_to_flags(float64_compare_quiet(x, y, fp_status)); +} + +uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status) +{ + return float_rel_to_flags(float64_compare(x, y, fp_status)); +} + +float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + + if ((float32_is_zero(a) && float32_is_infinity(b)) || + (float32_is_infinity(a) && float32_is_zero(b))) { + /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ + return make_float32((1U << 30) | + ((float32_val(a) ^ float32_val(b)) & (1U << 31))); + } + return float32_mul(a, b, fpst); +} + +float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + + if ((float64_is_zero(a) && float64_is_infinity(b)) || + (float64_is_infinity(a) && float64_is_zero(b))) { + /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ + return make_float64((1ULL << 62) | + ((float64_val(a) ^ float64_val(b)) & (1ULL << 63))); + } + return float64_mul(a, b, fpst); +} + +/* 64bit/double versions of the neon float compare functions */ +uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_eq_quiet(a, b, fpst); +} + +uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_le(b, a, fpst); +} + +uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + return -float64_lt(b, a, fpst); +} + +/* Reciprocal step and sqrt step. Note that unlike the A32/T32 + * versions, these do a fully fused multiply-add or + * multiply-add-and-halve. + */ + +uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float16_squash_input_denormal(a, fpst); + b = float16_squash_input_denormal(b, fpst); + + a = float16_chs(a); + if ((float16_is_infinity(a) && float16_is_zero(b)) || + (float16_is_infinity(b) && float16_is_zero(a))) { + return float16_two; + } + return float16_muladd(a, b, float16_two, 0, fpst); +} + +float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + + a = float32_chs(a); + if ((float32_is_infinity(a) && float32_is_zero(b)) || + (float32_is_infinity(b) && float32_is_zero(a))) { + return float32_two; + } + return float32_muladd(a, b, float32_two, 0, fpst); +} + +float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + + a = float64_chs(a); + if ((float64_is_infinity(a) && float64_is_zero(b)) || + (float64_is_infinity(b) && float64_is_zero(a))) { + return float64_two; + } + return float64_muladd(a, b, float64_two, 0, fpst); +} + +uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float16_squash_input_denormal(a, fpst); + b = float16_squash_input_denormal(b, fpst); + + a = float16_chs(a); + if ((float16_is_infinity(a) && float16_is_zero(b)) || + (float16_is_infinity(b) && float16_is_zero(a))) { + return float16_one_point_five; + } + return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst); +} + +float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float32_squash_input_denormal(a, fpst); + b = float32_squash_input_denormal(b, fpst); + + a = float32_chs(a); + if ((float32_is_infinity(a) && float32_is_zero(b)) || + (float32_is_infinity(b) && float32_is_zero(a))) { + return float32_one_point_five; + } + return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst); +} + +float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float64_squash_input_denormal(a, fpst); + b = float64_squash_input_denormal(b, fpst); + + a = float64_chs(a); + if ((float64_is_infinity(a) && float64_is_zero(b)) || + (float64_is_infinity(b) && float64_is_zero(a))) { + return float64_one_point_five; + } + return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst); +} + +/* Pairwise long add: add pairs of adjacent elements into + * double-width elements in the result (eg _s8 is an 8x8->16 op) + */ +uint64_t HELPER(neon_addlp_s8)(uint64_t a) +{ + uint64_t nsignmask = 0x0080008000800080ULL; + uint64_t wsignmask = 0x8000800080008000ULL; + uint64_t elementmask = 0x00ff00ff00ff00ffULL; + uint64_t tmp1, tmp2; + uint64_t res, signres; + + /* Extract odd elements, sign extend each to a 16 bit field */ + tmp1 = a & elementmask; + tmp1 ^= nsignmask; + tmp1 |= wsignmask; + tmp1 = (tmp1 - nsignmask) ^ wsignmask; + /* Ditto for the even elements */ + tmp2 = (a >> 8) & elementmask; + tmp2 ^= nsignmask; + tmp2 |= wsignmask; + tmp2 = (tmp2 - nsignmask) ^ wsignmask; + + /* calculate the result by summing bits 0..14, 16..22, etc, + * and then adjusting the sign bits 15, 23, etc manually. + * This ensures the addition can't overflow the 16 bit field. + */ + signres = (tmp1 ^ tmp2) & wsignmask; + res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask); + res ^= signres; + + return res; +} + +uint64_t HELPER(neon_addlp_u8)(uint64_t a) +{ + uint64_t tmp; + + tmp = a & 0x00ff00ff00ff00ffULL; + tmp += (a >> 8) & 0x00ff00ff00ff00ffULL; + return tmp; +} + +uint64_t HELPER(neon_addlp_s16)(uint64_t a) +{ + int32_t reslo, reshi; + + reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16); + reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48); + + return (uint32_t)reslo | (((uint64_t)reshi) << 32); +} + +uint64_t HELPER(neon_addlp_u16)(uint64_t a) +{ + uint64_t tmp; + + tmp = a & 0x0000ffff0000ffffULL; + tmp += (a >> 16) & 0x0000ffff0000ffffULL; + return tmp; +} + +/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ +uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) +{ + float_status *fpst = fpstp; + uint16_t val16, sbit; + int16_t exp; + + if (float16_is_any_nan(a)) { + float16 nan = a; + if (float16_is_signaling_nan(a, fpst)) { + float_raise(float_flag_invalid, fpst); + if (!fpst->default_nan_mode) { + nan = float16_silence_nan(a, fpst); + } + } + if (fpst->default_nan_mode) { + nan = float16_default_nan(fpst); + } + return nan; + } + + a = float16_squash_input_denormal(a, fpst); + + val16 = float16_val(a); + sbit = 0x8000 & val16; + exp = extract32(val16, 10, 5); + + if (exp == 0) { + return make_float16(deposit32(sbit, 10, 5, 0x1e)); + } else { + return make_float16(deposit32(sbit, 10, 5, ~exp)); + } +} + +float32 HELPER(frecpx_f32)(float32 a, void *fpstp) +{ + float_status *fpst = fpstp; + uint32_t val32, sbit; + int32_t exp; + + if (float32_is_any_nan(a)) { + float32 nan = a; + if (float32_is_signaling_nan(a, fpst)) { + float_raise(float_flag_invalid, fpst); + if (!fpst->default_nan_mode) { + nan = float32_silence_nan(a, fpst); + } + } + if (fpst->default_nan_mode) { + nan = float32_default_nan(fpst); + } + return nan; + } + + a = float32_squash_input_denormal(a, fpst); + + val32 = float32_val(a); + sbit = 0x80000000ULL & val32; + exp = extract32(val32, 23, 8); + + if (exp == 0) { + return make_float32(sbit | (0xfe << 23)); + } else { + return make_float32(sbit | (~exp & 0xff) << 23); + } +} + +float64 HELPER(frecpx_f64)(float64 a, void *fpstp) +{ + float_status *fpst = fpstp; + uint64_t val64, sbit; + int64_t exp; + + if (float64_is_any_nan(a)) { + float64 nan = a; + if (float64_is_signaling_nan(a, fpst)) { + float_raise(float_flag_invalid, fpst); + if (!fpst->default_nan_mode) { + nan = float64_silence_nan(a, fpst); + } + } + if (fpst->default_nan_mode) { + nan = float64_default_nan(fpst); + } + return nan; + } + + a = float64_squash_input_denormal(a, fpst); + + val64 = float64_val(a); + sbit = 0x8000000000000000ULL & val64; + exp = extract64(float64_val(a), 52, 11); + + if (exp == 0) { + return make_float64(sbit | (0x7feULL << 52)); + } else { + return make_float64(sbit | (~exp & 0x7ffULL) << 52); + } +} + +float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env) +{ + /* Von Neumann rounding is implemented by using round-to-zero + * and then setting the LSB of the result if Inexact was raised. + */ + float32 r; + float_status *fpst = &env->vfp.fp_status; + float_status tstat = *fpst; + int exflags; + + set_float_rounding_mode(float_round_to_zero, &tstat); + set_float_exception_flags(0, &tstat); + r = float64_to_float32(a, &tstat); + exflags = get_float_exception_flags(&tstat); + if (exflags & float_flag_inexact) { + r = make_float32(float32_val(r) | 1); + } + exflags |= get_float_exception_flags(fpst); + set_float_exception_flags(exflags, fpst); + return r; +} + +/* 64-bit versions of the CRC helpers. Note that although the operation + * (and the prototypes of crc32c() and crc32() mean that only the bottom + * 32 bits of the accumulator and result are used, we pass and return + * uint64_t for convenience of the generated code. Unlike the 32-bit + * instruction set versions, val may genuinely have 64 bits of data in it. + * The upper bytes of val (above the number specified by 'bytes') must have + * been zeroed out by the caller. + */ +uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes) +{ + uint8_t buf[8]; + + stq_le_p(buf, val); + + /* zlib crc32 converts the accumulator and output to one's complement. */ + return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff; +} + +uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) +{ + uint8_t buf[8]; + + stq_le_p(buf, val); + + /* Linux crc32c converts the output to one's complement. */ + return crc32c(acc, buf, bytes) ^ 0xffffffff; +} + +/* + * AdvSIMD half-precision + */ + +#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix)) + +#define ADVSIMD_HALFOP(name) \ +uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \ +{ \ + float_status *fpst = fpstp; \ + return float16_ ## name(a, b, fpst); \ +} + +ADVSIMD_HALFOP(add) +ADVSIMD_HALFOP(sub) +ADVSIMD_HALFOP(mul) +ADVSIMD_HALFOP(div) +ADVSIMD_HALFOP(min) +ADVSIMD_HALFOP(max) +ADVSIMD_HALFOP(minnum) +ADVSIMD_HALFOP(maxnum) + +#define ADVSIMD_TWOHALFOP(name) \ +uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \ +{ \ + float16 a1, a2, b1, b2; \ + uint32_t r1, r2; \ + float_status *fpst = fpstp; \ + a1 = extract32(two_a, 0, 16); \ + a2 = extract32(two_a, 16, 16); \ + b1 = extract32(two_b, 0, 16); \ + b2 = extract32(two_b, 16, 16); \ + r1 = float16_ ## name(a1, b1, fpst); \ + r2 = float16_ ## name(a2, b2, fpst); \ + return deposit32(r1, 16, 16, r2); \ +} + +ADVSIMD_TWOHALFOP(add) +ADVSIMD_TWOHALFOP(sub) +ADVSIMD_TWOHALFOP(mul) +ADVSIMD_TWOHALFOP(div) +ADVSIMD_TWOHALFOP(min) +ADVSIMD_TWOHALFOP(max) +ADVSIMD_TWOHALFOP(minnum) +ADVSIMD_TWOHALFOP(maxnum) + +/* Data processing - scalar floating-point and advanced SIMD */ +static float16 float16_mulx(float16 a, float16 b, void *fpstp) +{ + float_status *fpst = fpstp; + + a = float16_squash_input_denormal(a, fpst); + b = float16_squash_input_denormal(b, fpst); + + if ((float16_is_zero(a) && float16_is_infinity(b)) || + (float16_is_infinity(a) && float16_is_zero(b))) { + /* 2.0 with the sign bit set to sign(A) XOR sign(B) */ + return make_float16((1U << 14) | + ((float16_val(a) ^ float16_val(b)) & (1U << 15))); + } + return float16_mul(a, b, fpst); +} + +ADVSIMD_HALFOP(mulx) +ADVSIMD_TWOHALFOP(mulx) + +/* fused multiply-accumulate */ +uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, + void *fpstp) +{ + float_status *fpst = fpstp; + return float16_muladd(a, b, c, 0, fpst); +} + +uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b, + uint32_t two_c, void *fpstp) +{ + float_status *fpst = fpstp; + float16 a1, a2, b1, b2, c1, c2; + uint32_t r1, r2; + a1 = extract32(two_a, 0, 16); + a2 = extract32(two_a, 16, 16); + b1 = extract32(two_b, 0, 16); + b2 = extract32(two_b, 16, 16); + c1 = extract32(two_c, 0, 16); + c2 = extract32(two_c, 16, 16); + r1 = float16_muladd(a1, b1, c1, 0, fpst); + r2 = float16_muladd(a2, b2, c2, 0, fpst); + return deposit32(r1, 16, 16, r2); +} + +/* + * Floating point comparisons produce an integer result. Softfloat + * routines return float_relation types which we convert to the 0/-1 + * Neon requires. + */ + +#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 + +uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + int compare = float16_compare_quiet(a, b, fpst); + return ADVSIMD_CMPRES(compare == float_relation_equal); +} + +uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + int compare = float16_compare(a, b, fpst); + return ADVSIMD_CMPRES(compare == float_relation_greater || + compare == float_relation_equal); +} + +uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + int compare = float16_compare(a, b, fpst); + return ADVSIMD_CMPRES(compare == float_relation_greater); +} + +uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float16 f0 = float16_abs(a); + float16 f1 = float16_abs(b); + int compare = float16_compare(f0, f1, fpst); + return ADVSIMD_CMPRES(compare == float_relation_greater || + compare == float_relation_equal); +} + +uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float16 f0 = float16_abs(a); + float16 f1 = float16_abs(b); + int compare = float16_compare(f0, f1, fpst); + return ADVSIMD_CMPRES(compare == float_relation_greater); +} + +/* round to integral */ +uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status) +{ + return float16_round_to_int(x, fp_status); +} + +uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) +{ + int old_flags = get_float_exception_flags(fp_status), new_flags; + float16 ret; + + ret = float16_round_to_int(x, fp_status); + + /* Suppress any inexact exceptions the conversion produced */ + if (!(old_flags & float_flag_inexact)) { + new_flags = get_float_exception_flags(fp_status); + set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status); + } + + return ret; +} + +/* + * Half-precision floating point conversion functions + * + * There are a multitude of conversion functions with various + * different rounding modes. This is dealt with by the calling code + * setting the mode appropriately before calling the helper. + */ + +uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp) +{ + float_status *fpst = fpstp; + + /* Invalid if we are passed a NaN */ + if (float16_is_any_nan(a)) { + float_raise(float_flag_invalid, fpst); + return 0; + } + return float16_to_int16(a, fpst); +} + +uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp) +{ + float_status *fpst = fpstp; + + /* Invalid if we are passed a NaN */ + if (float16_is_any_nan(a)) { + float_raise(float_flag_invalid, fpst); + return 0; + } + return float16_to_uint16(a, fpst); +} + +static int el_from_spsr(uint32_t spsr) +{ + /* Return the exception level that this SPSR is requesting a return to, + * or -1 if it is invalid (an illegal return) + */ + if (spsr & PSTATE_nRW) { + switch (spsr & CPSR_M) { + case ARM_CPU_MODE_USR: + return 0; + case ARM_CPU_MODE_HYP: + return 2; + case ARM_CPU_MODE_FIQ: + case ARM_CPU_MODE_IRQ: + case ARM_CPU_MODE_SVC: + case ARM_CPU_MODE_ABT: + case ARM_CPU_MODE_UND: + case ARM_CPU_MODE_SYS: + return 1; + case ARM_CPU_MODE_MON: + /* Returning to Mon from AArch64 is never possible, + * so this is an illegal return. + */ + default: + return -1; + } + } else { + if (extract32(spsr, 1, 1)) { + /* Return with reserved M[1] bit set */ + return -1; + } + if (extract32(spsr, 0, 4) == 1) { + /* return to EL0 with M[0] bit set */ + return -1; + } + return extract32(spsr, 2, 2); + } +} + +static void cpsr_write_from_spsr_elx(CPUARMState *env, + uint32_t val) +{ + uint32_t mask; + + /* Save SPSR_ELx.SS into PSTATE. */ + env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS); + val &= ~PSTATE_SS; + + /* Move DIT to the correct location for CPSR */ + if (val & PSTATE_DIT) { + val &= ~PSTATE_DIT; + val |= CPSR_DIT; + } + + mask = aarch32_cpsr_valid_mask(env->features, \ + &env_archcpu(env)->isar); + cpsr_write(env, val, mask, CPSRWriteRaw); +} + +void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) +{ + int cur_el = arm_current_el(env); + unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el); + uint32_t spsr = env->banked_spsr[spsr_idx]; + int new_el; + bool return_to_aa64 = (spsr & PSTATE_nRW) == 0; + + aarch64_save_sp(env, cur_el); + + arm_clear_exclusive(env); + + /* We must squash the PSTATE.SS bit to zero unless both of the + * following hold: + * 1. debug exceptions are currently disabled + * 2. singlestep will be active in the EL we return to + * We check 1 here and 2 after we've done the pstate/cpsr write() to + * transition to the EL we're going to. + */ + if (arm_generate_debug_exceptions(env)) { + spsr &= ~PSTATE_SS; + } + + /* + * FEAT_RME forbids return from EL3 with an invalid security state. + * We don't need an explicit check for FEAT_RME here because we enforce + * in scr_write() that you can't set the NSE bit without it. + */ + if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { + goto illegal_return; + } + + new_el = el_from_spsr(spsr); + if (new_el == -1) { + goto illegal_return; + } + if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) { + /* Disallow return to an EL which is unimplemented or higher + * than the current one. + */ + goto illegal_return; + } + + if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { + /* Return to an EL which is configured for a different register width */ + goto illegal_return; + } + + if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) { + goto illegal_return; + } + + bql_lock(); + arm_call_pre_el_change_hook(env_archcpu(env)); + bql_unlock(); + + if (!return_to_aa64) { + env->aarch64 = false; + /* We do a raw CPSR write because aarch64_sync_64_to_32() + * will sort the register banks out for us, and we've already + * caught all the bad-mode cases in el_from_spsr(). + */ + cpsr_write_from_spsr_elx(env, spsr); + if (!arm_singlestep_active(env)) { + env->pstate &= ~PSTATE_SS; + } + aarch64_sync_64_to_32(env); + + if (spsr & CPSR_T) { + env->regs[15] = new_pc & ~0x1; + } else { + env->regs[15] = new_pc & ~0x3; + } + helper_rebuild_hflags_a32(env, new_el); + qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " + "AArch32 EL%d PC 0x%" PRIx32 "\n", + cur_el, new_el, env->regs[15]); + } else { + int tbii; + + env->aarch64 = true; + spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar); + pstate_write(env, spsr); + if (!arm_singlestep_active(env)) { + env->pstate &= ~PSTATE_SS; + } + aarch64_restore_sp(env, new_el); + helper_rebuild_hflags_a64(env, new_el); + + /* + * Apply TBI to the exception return address. We had to delay this + * until after we selected the new EL, so that we could select the + * correct TBI+TBID bits. This is made easier by waiting until after + * the hflags rebuild, since we can pull the composite TBII field + * from there. + */ + tbii = EX_TBFLAG_A64(env->hflags, TBII); + if ((tbii >> extract64(new_pc, 55, 1)) & 1) { + /* TBI is enabled. */ + int core_mmu_idx = arm_env_mmu_index(env); + if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) { + new_pc = sextract64(new_pc, 0, 56); + } else { + new_pc = extract64(new_pc, 0, 56); + } + } + env->pc = new_pc; + + qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to " + "AArch64 EL%d PC 0x%" PRIx64 "\n", + cur_el, new_el, env->pc); + } + + /* + * Note that cur_el can never be 0. If new_el is 0, then + * el0_a64 is return_to_aa64, else el0_a64 is ignored. + */ + aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); + + bql_lock(); + arm_call_el_change_hook(env_archcpu(env)); + bql_unlock(); + + return; + +illegal_return: + /* Illegal return events of various kinds have architecturally + * mandated behaviour: + * restore NZCV and DAIF from SPSR_ELx + * set PSTATE.IL + * restore PC from ELR_ELx + * no change to exception level, execution state or stack pointer + */ + env->pstate |= PSTATE_IL; + env->pc = new_pc; + spsr &= PSTATE_NZCV | PSTATE_DAIF; + spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF); + pstate_write(env, spsr); + if (!arm_singlestep_active(env)) { + env->pstate &= ~PSTATE_SS; + } + helper_rebuild_hflags_a64(env, cur_el); + qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: " + "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc); +} + +/* + * Square Root and Reciprocal square root + */ + +uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) +{ + float_status *s = fpstp; + + return float16_sqrt(a, s); +} + +void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) +{ + /* + * Implement DC ZVA, which zeroes a fixed-length block of memory. + * Note that we do not implement the (architecturally mandated) + * alignment fault for attempts to use this on Device memory + * (which matches the usual QEMU behaviour of not implementing either + * alignment faults or any memory attribute handling). + */ + int blocklen = 4 << env_archcpu(env)->dcz_blocksize; + uint64_t vaddr = vaddr_in & ~(blocklen - 1); + int mmu_idx = arm_env_mmu_index(env); + void *mem; + + /* + * Trapless lookup. In addition to actual invalid page, may + * return NULL for I/O, watchpoints, clean pages, etc. + */ + mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); + +#ifndef CONFIG_USER_ONLY + if (unlikely(!mem)) { + uintptr_t ra = GETPC(); + + /* + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. + */ + (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); + mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); + + if (unlikely(!mem)) { + /* + * The only remaining reason for mem == NULL is I/O. + * Just do a series of byte writes as the architecture demands. + */ + for (int i = 0; i < blocklen; i++) { + cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); + } + return; + } + } +#endif + + memset(mem, 0, blocklen); +} + +void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, + uint32_t access_type, uint32_t mmu_idx) +{ + arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, + mmu_idx, GETPC()); +} + +/* Memory operations (memset, memmove, memcpy) */ + +/* + * Return true if the CPY* and SET* insns can execute; compare + * pseudocode CheckMOPSEnabled(), though we refactor it a little. + */ +static bool mops_enabled(CPUARMState *env) +{ + int el = arm_current_el(env); + + if (el < 2 && + (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && + !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { + return false; + } + + if (el == 0) { + if (!el_is_in_host(env, 0)) { + return env->cp15.sctlr_el[1] & SCTLR_MSCEN; + } else { + return env->cp15.sctlr_el[2] & SCTLR_MSCEN; + } + } + return true; +} + +static void check_mops_enabled(CPUARMState *env, uintptr_t ra) +{ + if (!mops_enabled(env)) { + raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), + exception_target_el(env), ra); + } +} + +/* + * Return the target exception level for an exception due + * to mismatched arguments in a FEAT_MOPS copy or set. + * Compare pseudocode MismatchedCpySetTargetEL() + */ +static int mops_mismatch_exception_target_el(CPUARMState *env) +{ + int el = arm_current_el(env); + + if (el > 1) { + return el; + } + if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { + return 2; + } + if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { + return 2; + } + return 1; +} + +/* + * Check whether an M or E instruction was executed with a CF value + * indicating the wrong option for this implementation. + * Assumes we are always Option A. + */ +static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, + uintptr_t ra) +{ + if (env->CF != 0) { + syndrome |= 1 << 17; /* Set the wrong-option bit */ + raise_exception_ra(env, EXCP_UDEF, syndrome, + mops_mismatch_exception_target_el(env), ra); + } +} + +/* + * Return the maximum number of bytes we can transfer starting at addr + * without crossing a page boundary. + */ +static uint64_t page_limit(uint64_t addr) +{ + return TARGET_PAGE_ALIGN(addr + 1) - addr; +} + +/* + * Return the number of bytes we can copy starting from addr and working + * backwards without crossing a page boundary. + */ +static uint64_t page_limit_rev(uint64_t addr) +{ + return (addr & ~TARGET_PAGE_MASK) + 1; +} + +/* + * Perform part of a memory set on an area of guest memory starting at + * toaddr (a dirty address) and extending for setsize bytes. + * + * Returns the number of bytes actually set, which might be less than + * setsize; the caller should loop until the whole set has been done. + * The caller should ensure that the guest registers are correct + * for the possibility that the first byte of the set encounters + * an exception or watchpoint. We guarantee not to take any faults + * for bytes other than the first. + */ +static uint64_t set_step(CPUARMState *env, uint64_t toaddr, + uint64_t setsize, uint32_t data, int memidx, + uint32_t *mtedesc, uintptr_t ra) +{ + void *mem; + + setsize = MIN(setsize, page_limit(toaddr)); + if (*mtedesc) { + uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); + if (mtesize == 0) { + /* Trap, or not. All CPU state is up to date */ + mte_check_fail(env, *mtedesc, toaddr, ra); + /* Continue, with no further MTE checks required */ + *mtedesc = 0; + } else { + /* Advance to the end, or to the tag mismatch */ + setsize = MIN(setsize, mtesize); + } + } + + toaddr = useronly_clean_ptr(toaddr); + /* + * Trapless lookup: returns NULL for invalid page, I/O, + * watchpoints, clean pages, etc. + */ + mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); + +#ifndef CONFIG_USER_ONLY + if (unlikely(!mem)) { + /* + * Slow-path: just do one byte write. This will handle the + * watchpoint, invalid page, etc handling correctly. + * For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ + cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); + return 1; + } +#endif + /* Easy case: just memset the host memory */ + memset(mem, data, setsize); + return setsize; +} + +/* + * Similar, but setting tags. The architecture requires us to do this + * in 16-byte chunks. SETP accesses are not tag checked; they set + * the tags. + */ +static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, + uint64_t setsize, uint32_t data, int memidx, + uint32_t *mtedesc, uintptr_t ra) +{ + void *mem; + uint64_t cleanaddr; + + setsize = MIN(setsize, page_limit(toaddr)); + + cleanaddr = useronly_clean_ptr(toaddr); + /* + * Trapless lookup: returns NULL for invalid page, I/O, + * watchpoints, clean pages, etc. + */ + mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); + +#ifndef CONFIG_USER_ONLY + if (unlikely(!mem)) { + /* + * Slow-path: just do one write. This will handle the + * watchpoint, invalid page, etc handling correctly. + * The architecture requires that we do 16 bytes at a time, + * and we know both ptr and size are 16 byte aligned. + * For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ + uint64_t repldata = data * 0x0101010101010101ULL; + MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); + cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); + mte_mops_set_tags(env, toaddr, 16, *mtedesc); + return 16; + } +#endif + /* Easy case: just memset the host memory */ + memset(mem, data, setsize); + mte_mops_set_tags(env, toaddr, setsize, *mtedesc); + return setsize; +} + +typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, + uint64_t setsize, uint32_t data, + int memidx, uint32_t *mtedesc, uintptr_t ra); + +/* Extract register numbers from a MOPS exception syndrome value */ +static int mops_destreg(uint32_t syndrome) +{ + return extract32(syndrome, 10, 5); +} + +static int mops_srcreg(uint32_t syndrome) +{ + return extract32(syndrome, 5, 5); +} + +static int mops_sizereg(uint32_t syndrome) +{ + return extract32(syndrome, 0, 5); +} + +/* + * Return true if TCMA and TBI bits mean we need to do MTE checks. + * We only need to do this once per MOPS insn, not for every page. + */ +static bool mte_checks_needed(uint64_t ptr, uint32_t desc) +{ + int bit55 = extract64(ptr, 55, 1); + + /* + * Note that tbi_check() returns true for "access checked" but + * tcma_check() returns true for "access unchecked". + */ + if (!tbi_check(desc, bit55)) { + return false; + } + return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); +} + +/* Take an exception if the SETG addr/size are not granule aligned */ +static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t memidx, uintptr_t ra) +{ + if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || + !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { + arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, + memidx, ra); + + } +} + +static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg) +{ + /* + * Runtime equivalent of cpu_reg() -- return the CPU register value, + * for contexts when index 31 means XZR (not SP). + */ + return reg == 31 ? 0 : env->xregs[reg]; +} + +/* + * For the Memory Set operation, our implementation chooses + * always to use "option A", where we update Xd to the final + * address in the SETP insn, and set Xn to be -(bytes remaining). + * On SETM and SETE insns we only need update Xn. + * + * @env: CPU + * @syndrome: syndrome value for mismatch exceptions + * (also contains the register numbers we need to use) + * @mtedesc: MTE descriptor word + * @stepfn: function which does a single part of the set operation + * @is_setg: true if this is the tag-setting SETG variant + */ +static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, + StepFn *stepfn, bool is_setg, uintptr_t ra) +{ + /* Prologue: we choose to do up to the next page boundary */ + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint8_t data = arm_reg_or_xzr(env, rs); + uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); + uint64_t toaddr = env->xregs[rd]; + uint64_t setsize = env->xregs[rn]; + uint64_t stagesetsize, step; + + check_mops_enabled(env, ra); + + if (setsize > INT64_MAX) { + setsize = INT64_MAX; + if (is_setg) { + setsize &= ~0xf; + } + } + + if (unlikely(is_setg)) { + check_setg_alignment(env, toaddr, setsize, memidx, ra); + } else if (!mte_checks_needed(toaddr, mtedesc)) { + mtedesc = 0; + } + + stagesetsize = MIN(setsize, page_limit(toaddr)); + while (stagesetsize) { + env->xregs[rd] = toaddr; + env->xregs[rn] = setsize; + step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); + toaddr += step; + setsize -= step; + stagesetsize -= step; + } + /* Insn completed, so update registers to the Option A format */ + env->xregs[rd] = toaddr + setsize; + env->xregs[rn] = -setsize; + + /* Set NZCV = 0000 to indicate we are an Option A implementation */ + env->NF = 0; + env->ZF = 1; /* our env->ZF encoding is inverted */ + env->CF = 0; + env->VF = 0; + return; +} + +void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); +} + +void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); +} + +static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, + StepFn *stepfn, bool is_setg, uintptr_t ra) +{ + /* Main: we choose to do all the full-page chunks */ + CPUState *cs = env_cpu(env); + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint8_t data = arm_reg_or_xzr(env, rs); + uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; + uint64_t setsize = -env->xregs[rn]; + uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); + uint64_t step, stagesetsize; + + check_mops_enabled(env, ra); + + /* + * We're allowed to NOP out "no data to copy" before the consistency + * checks; we choose to do so. + */ + if (env->xregs[rn] == 0) { + return; + } + + check_mops_wrong_option(env, syndrome, ra); + + /* + * Our implementation will work fine even if we have an unaligned + * destination address, and because we update Xn every time around + * the loop below and the return value from stepfn() may be less + * than requested, we might find toaddr is unaligned. So we don't + * have an IMPDEF check for alignment here. + */ + + if (unlikely(is_setg)) { + check_setg_alignment(env, toaddr, setsize, memidx, ra); + } else if (!mte_checks_needed(toaddr, mtedesc)) { + mtedesc = 0; + } + + /* Do the actual memset: we leave the last partial page to SETE */ + stagesetsize = setsize & TARGET_PAGE_MASK; + while (stagesetsize > 0) { + step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); + toaddr += step; + setsize -= step; + stagesetsize -= step; + env->xregs[rn] = -setsize; + if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { + cpu_loop_exit_restore(cs, ra); + } + } +} + +void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); +} + +void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); +} + +static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, + StepFn *stepfn, bool is_setg, uintptr_t ra) +{ + /* Epilogue: do the last partial page */ + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint8_t data = arm_reg_or_xzr(env, rs); + uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; + uint64_t setsize = -env->xregs[rn]; + uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); + uint64_t step; + + check_mops_enabled(env, ra); + + /* + * We're allowed to NOP out "no data to copy" before the consistency + * checks; we choose to do so. + */ + if (setsize == 0) { + return; + } + + check_mops_wrong_option(env, syndrome, ra); + + /* + * Our implementation has no address alignment requirements, but + * we do want to enforce the "less than a page" size requirement, + * so we don't need to have the "check for interrupts" here. + */ + if (setsize >= TARGET_PAGE_SIZE) { + raise_exception_ra(env, EXCP_UDEF, syndrome, + mops_mismatch_exception_target_el(env), ra); + } + + if (unlikely(is_setg)) { + check_setg_alignment(env, toaddr, setsize, memidx, ra); + } else if (!mte_checks_needed(toaddr, mtedesc)) { + mtedesc = 0; + } + + /* Do the actual memset */ + while (setsize > 0) { + step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); + toaddr += step; + setsize -= step; + env->xregs[rn] = -setsize; + } +} + +void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); +} + +void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); +} + +/* + * Perform part of a memory copy from the guest memory at fromaddr + * and extending for copysize bytes, to the guest memory at + * toaddr. Both addresses are dirty. + * + * Returns the number of bytes actually set, which might be less than + * copysize; the caller should loop until the whole copy has been done. + * The caller should ensure that the guest registers are correct + * for the possibility that the first byte of the copy encounters + * an exception or watchpoint. We guarantee not to take any faults + * for bytes other than the first. + */ +static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, + uint64_t copysize, int wmemidx, int rmemidx, + uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) +{ + void *rmem; + void *wmem; + + /* Don't cross a page boundary on either source or destination */ + copysize = MIN(copysize, page_limit(toaddr)); + copysize = MIN(copysize, page_limit(fromaddr)); + /* + * Handle MTE tag checks: either handle the tag mismatch for byte 0, + * or else copy up to but not including the byte with the mismatch. + */ + if (*rdesc) { + uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); + if (mtesize == 0) { + mte_check_fail(env, *rdesc, fromaddr, ra); + *rdesc = 0; + } else { + copysize = MIN(copysize, mtesize); + } + } + if (*wdesc) { + uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); + if (mtesize == 0) { + mte_check_fail(env, *wdesc, toaddr, ra); + *wdesc = 0; + } else { + copysize = MIN(copysize, mtesize); + } + } + + toaddr = useronly_clean_ptr(toaddr); + fromaddr = useronly_clean_ptr(fromaddr); + /* Trapless lookup of whether we can get a host memory pointer */ + wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); + rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); + +#ifndef CONFIG_USER_ONLY + /* + * If we don't have host memory for both source and dest then just + * do a single byte copy. This will handle watchpoints, invalid pages, + * etc correctly. For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ + if (unlikely(!rmem || !wmem)) { + uint8_t byte; + if (rmem) { + byte = *(uint8_t *)rmem; + } else { + byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); + } + if (wmem) { + *(uint8_t *)wmem = byte; + } else { + cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); + } + return 1; + } +#endif + /* Easy case: just memmove the host memory */ + memmove(wmem, rmem, copysize); + return copysize; +} + +/* + * Do part of a backwards memory copy. Here toaddr and fromaddr point + * to the *last* byte to be copied. + */ +static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, + uint64_t fromaddr, + uint64_t copysize, int wmemidx, int rmemidx, + uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) +{ + void *rmem; + void *wmem; + + /* Don't cross a page boundary on either source or destination */ + copysize = MIN(copysize, page_limit_rev(toaddr)); + copysize = MIN(copysize, page_limit_rev(fromaddr)); + + /* + * Handle MTE tag checks: either handle the tag mismatch for byte 0, + * or else copy up to but not including the byte with the mismatch. + */ + if (*rdesc) { + uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); + if (mtesize == 0) { + mte_check_fail(env, *rdesc, fromaddr, ra); + *rdesc = 0; + } else { + copysize = MIN(copysize, mtesize); + } + } + if (*wdesc) { + uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); + if (mtesize == 0) { + mte_check_fail(env, *wdesc, toaddr, ra); + *wdesc = 0; + } else { + copysize = MIN(copysize, mtesize); + } + } + + toaddr = useronly_clean_ptr(toaddr); + fromaddr = useronly_clean_ptr(fromaddr); + /* Trapless lookup of whether we can get a host memory pointer */ + wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); + rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); + +#ifndef CONFIG_USER_ONLY + /* + * If we don't have host memory for both source and dest then just + * do a single byte copy. This will handle watchpoints, invalid pages, + * etc correctly. For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ + if (unlikely(!rmem || !wmem)) { + uint8_t byte; + if (rmem) { + byte = *(uint8_t *)rmem; + } else { + byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); + } + if (wmem) { + *(uint8_t *)wmem = byte; + } else { + cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); + } + return 1; + } +#endif + /* + * Easy case: just memmove the host memory. Note that wmem and + * rmem here point to the *last* byte to copy. + */ + memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); + return copysize; +} + +/* + * for the Memory Copy operation, our implementation chooses always + * to use "option A", where we update Xd and Xs to the final addresses + * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. + * + * @env: CPU + * @syndrome: syndrome value for mismatch exceptions + * (also contains the register numbers we need to use) + * @wdesc: MTE descriptor for the writes (destination) + * @rdesc: MTE descriptor for the reads (source) + * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) + */ +static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc, uint32_t move, uintptr_t ra) +{ + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); + uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); + bool forwards = true; + uint64_t toaddr = env->xregs[rd]; + uint64_t fromaddr = env->xregs[rs]; + uint64_t copysize = env->xregs[rn]; + uint64_t stagecopysize, step; + + check_mops_enabled(env, ra); + + + if (move) { + /* + * Copy backwards if necessary. The direction for a non-overlapping + * copy is IMPDEF; we choose forwards. + */ + if (copysize > 0x007FFFFFFFFFFFFFULL) { + copysize = 0x007FFFFFFFFFFFFFULL; + } + uint64_t fs = extract64(fromaddr, 0, 56); + uint64_t ts = extract64(toaddr, 0, 56); + uint64_t fe = extract64(fromaddr + copysize, 0, 56); + + if (fs < ts && fe > ts) { + forwards = false; + } + } else { + if (copysize > INT64_MAX) { + copysize = INT64_MAX; + } + } + + if (!mte_checks_needed(fromaddr, rdesc)) { + rdesc = 0; + } + if (!mte_checks_needed(toaddr, wdesc)) { + wdesc = 0; + } + + if (forwards) { + stagecopysize = MIN(copysize, page_limit(toaddr)); + stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); + while (stagecopysize) { + env->xregs[rd] = toaddr; + env->xregs[rs] = fromaddr; + env->xregs[rn] = copysize; + step = copy_step(env, toaddr, fromaddr, stagecopysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr += step; + fromaddr += step; + copysize -= step; + stagecopysize -= step; + } + /* Insn completed, so update registers to the Option A format */ + env->xregs[rd] = toaddr + copysize; + env->xregs[rs] = fromaddr + copysize; + env->xregs[rn] = -copysize; + } else { + /* + * In a reverse copy the to and from addrs in Xs and Xd are the start + * of the range, but it's more convenient for us to work with pointers + * to the last byte being copied. + */ + toaddr += copysize - 1; + fromaddr += copysize - 1; + stagecopysize = MIN(copysize, page_limit_rev(toaddr)); + stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); + while (stagecopysize) { + env->xregs[rn] = copysize; + step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + copysize -= step; + stagecopysize -= step; + toaddr -= step; + fromaddr -= step; + } + /* + * Insn completed, so update registers to the Option A format. + * For a reverse copy this is no different to the CPYP input format. + */ + env->xregs[rn] = copysize; + } + + /* Set NZCV = 0000 to indicate we are an Option A implementation */ + env->NF = 0; + env->ZF = 1; /* our env->ZF encoding is inverted */ + env->CF = 0; + env->VF = 0; + return; +} + +void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); +} + +void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); +} + +static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc, uint32_t move, uintptr_t ra) +{ + /* Main: we choose to copy until less than a page remaining */ + CPUState *cs = env_cpu(env); + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); + uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); + bool forwards = true; + uint64_t toaddr, fromaddr, copysize, step; + + check_mops_enabled(env, ra); + + /* We choose to NOP out "no data to copy" before consistency checks */ + if (env->xregs[rn] == 0) { + return; + } + + check_mops_wrong_option(env, syndrome, ra); + + if (move) { + forwards = (int64_t)env->xregs[rn] < 0; + } + + if (forwards) { + toaddr = env->xregs[rd] + env->xregs[rn]; + fromaddr = env->xregs[rs] + env->xregs[rn]; + copysize = -env->xregs[rn]; + } else { + copysize = env->xregs[rn]; + /* This toaddr and fromaddr point to the *last* byte to copy */ + toaddr = env->xregs[rd] + copysize - 1; + fromaddr = env->xregs[rs] + copysize - 1; + } + + if (!mte_checks_needed(fromaddr, rdesc)) { + rdesc = 0; + } + if (!mte_checks_needed(toaddr, wdesc)) { + wdesc = 0; + } + + /* Our implementation has no particular parameter requirements for CPYM */ + + /* Do the actual memmove */ + if (forwards) { + while (copysize >= TARGET_PAGE_SIZE) { + step = copy_step(env, toaddr, fromaddr, copysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr += step; + fromaddr += step; + copysize -= step; + env->xregs[rn] = -copysize; + if (copysize >= TARGET_PAGE_SIZE && + unlikely(cpu_loop_exit_requested(cs))) { + cpu_loop_exit_restore(cs, ra); + } + } + } else { + while (copysize >= TARGET_PAGE_SIZE) { + step = copy_step_rev(env, toaddr, fromaddr, copysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr -= step; + fromaddr -= step; + copysize -= step; + env->xregs[rn] = copysize; + if (copysize >= TARGET_PAGE_SIZE && + unlikely(cpu_loop_exit_requested(cs))) { + cpu_loop_exit_restore(cs, ra); + } + } + } +} + +void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); +} + +void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); +} + +static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc, uint32_t move, uintptr_t ra) +{ + /* Epilogue: do the last partial page */ + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); + uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); + bool forwards = true; + uint64_t toaddr, fromaddr, copysize, step; + + check_mops_enabled(env, ra); + + /* We choose to NOP out "no data to copy" before consistency checks */ + if (env->xregs[rn] == 0) { + return; + } + + check_mops_wrong_option(env, syndrome, ra); + + if (move) { + forwards = (int64_t)env->xregs[rn] < 0; + } + + if (forwards) { + toaddr = env->xregs[rd] + env->xregs[rn]; + fromaddr = env->xregs[rs] + env->xregs[rn]; + copysize = -env->xregs[rn]; + } else { + copysize = env->xregs[rn]; + /* This toaddr and fromaddr point to the *last* byte to copy */ + toaddr = env->xregs[rd] + copysize - 1; + fromaddr = env->xregs[rs] + copysize - 1; + } + + if (!mte_checks_needed(fromaddr, rdesc)) { + rdesc = 0; + } + if (!mte_checks_needed(toaddr, wdesc)) { + wdesc = 0; + } + + /* Check the size; we don't want to have do a check-for-interrupts */ + if (copysize >= TARGET_PAGE_SIZE) { + raise_exception_ra(env, EXCP_UDEF, syndrome, + mops_mismatch_exception_target_el(env), ra); + } + + /* Do the actual memmove */ + if (forwards) { + while (copysize > 0) { + step = copy_step(env, toaddr, fromaddr, copysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr += step; + fromaddr += step; + copysize -= step; + env->xregs[rn] = -copysize; + } + } else { + while (copysize > 0) { + step = copy_step_rev(env, toaddr, fromaddr, copysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr -= step; + fromaddr -= step; + copysize -= step; + env->xregs[rn] = copysize; + } + } +} + +void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); +} + +void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); +} diff --git a/target/arm/helper-a64.h b/target/arm/tcg/helper-a64.h index 7b706571bb..575a5dab7d 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -50,14 +50,6 @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr) DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) -DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64) -DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG, - i64, env, i64, i64, i64) -DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64) -DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG, - i64, env, i64, i64, i64) -DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64) -DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64) DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) @@ -98,9 +90,13 @@ DEF_HELPER_FLAGS_3(pacda, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(pacdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(pacga, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autia, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(autia_combined, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autib, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(autib_combined, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(autda_combined, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(autdb_combined, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) @@ -118,3 +114,20 @@ DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) + +DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG, + noreturn, env, i64, i32, i32) + +DEF_HELPER_3(setp, void, env, i32, i32) +DEF_HELPER_3(setm, void, env, i32, i32) +DEF_HELPER_3(sete, void, env, i32, i32) +DEF_HELPER_3(setgp, void, env, i32, i32) +DEF_HELPER_3(setgm, void, env, i32, i32) +DEF_HELPER_3(setge, void, env, i32, i32) + +DEF_HELPER_4(cpyp, void, env, i32, i32, i32) +DEF_HELPER_4(cpym, void, env, i32, i32, i32) +DEF_HELPER_4(cpye, void, env, i32, i32, i32) +DEF_HELPER_4(cpyfp, void, env, i32, i32, i32) +DEF_HELPER_4(cpyfm, void, env, i32, i32, i32) +DEF_HELPER_4(cpyfe, void, env, i32, i32, i32) diff --git a/target/arm/helper-mve.h b/target/arm/tcg/helper-mve.h index 76bd25006d..76bd25006d 100644 --- a/target/arm/helper-mve.h +++ b/target/arm/tcg/helper-mve.h diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h new file mode 100644 index 0000000000..27eef49a11 --- /dev/null +++ b/target/arm/tcg/helper-sme.h @@ -0,0 +1,146 @@ +/* + * AArch64 SME specific helper definitions + * + * Copyright (c) 2022 Linaro, Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +DEF_HELPER_FLAGS_3(set_svcr, TCG_CALL_NO_RWG, void, env, i32, i32) + +DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32) + +/* Move to/from vertical array slices, i.e. columns, so 'c'. */ +DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/helper-sve.h b/target/arm/tcg/helper-sve.h index dc629f851a..cc4e1d8948 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/tcg/helper-sve.h @@ -325,6 +325,8 @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -717,6 +719,8 @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c new file mode 100644 index 0000000000..5da1b0fc1d --- /dev/null +++ b/target/arm/tcg/hflags.c @@ -0,0 +1,485 @@ +/* + * ARM hflags + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "cpu-features.h" +#include "exec/helper-proto.h" +#include "cpregs.h" + +static inline bool fgt_svc(CPUARMState *env, int el) +{ + /* + * Assuming fine-grained-traps are active, return true if we + * should be trapping on SVC instructions. Only AArch64 can + * trap on an SVC at EL1, but we don't need to special-case this + * because if this is AArch32 EL1 then arm_fgt_active() is false. + * We also know el is 0 or 1. + */ + return el == 0 ? + FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, SVC_EL0) : + FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, SVC_EL1); +} + +/* Return true if memory alignment should be enforced. */ +static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr) +{ +#ifdef CONFIG_USER_ONLY + return false; +#else + /* Check the alignment enable bit. */ + if (sctlr & SCTLR_A) { + return true; + } + + /* + * If translation is disabled, then the default memory type is + * Device(-nGnRnE) instead of Normal, which requires that alignment + * be enforced. Since this affects all ram, it is most efficient + * to handle this during translation. + */ + if (sctlr & SCTLR_M) { + /* Translation enabled: memory type in PTE via MAIR_ELx. */ + return false; + } + if (el < 2 && (arm_hcr_el2_eff(env) & (HCR_DC | HCR_VM))) { + /* Stage 2 translation enabled: memory type in PTE. */ + return false; + } + return true; +#endif +} + +static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx, + CPUARMTBFlags flags) +{ + DP_TBFLAG_ANY(flags, FPEXC_EL, fp_el); + DP_TBFLAG_ANY(flags, MMUIDX, arm_to_core_mmu_idx(mmu_idx)); + + if (arm_singlestep_active(env)) { + DP_TBFLAG_ANY(flags, SS_ACTIVE, 1); + } + + return flags; +} + +static CPUARMTBFlags rebuild_hflags_common_32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx, + CPUARMTBFlags flags) +{ + bool sctlr_b = arm_sctlr_b(env); + + if (sctlr_b) { + DP_TBFLAG_A32(flags, SCTLR__B, 1); + } + if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) { + DP_TBFLAG_ANY(flags, BE_DATA, 1); + } + DP_TBFLAG_A32(flags, NS, !access_secure_reg(env)); + + return rebuild_hflags_common(env, fp_el, mmu_idx, flags); +} + +static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx) +{ + CPUARMTBFlags flags = {}; + uint32_t ccr = env->v7m.ccr[env->v7m.secure]; + + /* Without HaveMainExt, CCR.UNALIGN_TRP is RES1. */ + if (ccr & R_V7M_CCR_UNALIGN_TRP_MASK) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); + } + + if (arm_v7m_is_handler_mode(env)) { + DP_TBFLAG_M32(flags, HANDLER, 1); + } + + /* + * v8M always applies stack limit checks unless CCR.STKOFHFNMIGN + * is suppressing them because the requested execution priority + * is less than 0. + */ + if (arm_feature(env, ARM_FEATURE_V8) && + !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) && + (ccr & R_V7M_CCR_STKOFHFNMIGN_MASK))) { + DP_TBFLAG_M32(flags, STACKCHECK, 1); + } + + if (arm_feature(env, ARM_FEATURE_M_SECURITY) && env->v7m.secure) { + DP_TBFLAG_M32(flags, SECURE, 1); + } + + return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); +} + +/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */ +static bool sme_fa64(CPUARMState *env, int el) +{ + if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) { + return false; + } + + if (el <= 1 && !el_is_in_host(env, el)) { + if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) { + return false; + } + } + if (el <= 2 && arm_is_el2_enabled(env)) { + if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) { + return false; + } + } + if (arm_feature(env, ARM_FEATURE_EL3)) { + if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) { + return false; + } + } + + return true; +} + +static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, + ARMMMUIdx mmu_idx) +{ + CPUARMTBFlags flags = {}; + int el = arm_current_el(env); + uint64_t sctlr = arm_sctlr(env, el); + + if (aprofile_require_alignment(env, el, sctlr)) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); + } + + if (arm_el_is_aa64(env, 1)) { + DP_TBFLAG_A32(flags, VFPEN, 1); + } + + if (el < 2 && env->cp15.hstr_el2 && arm_is_el2_enabled(env) && + (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { + DP_TBFLAG_A32(flags, HSTR_ACTIVE, 1); + } + + if (arm_fgt_active(env, el)) { + DP_TBFLAG_ANY(flags, FGT_ACTIVE, 1); + if (fgt_svc(env, el)) { + DP_TBFLAG_ANY(flags, FGT_SVC, 1); + } + } + + if (env->uncached_cpsr & CPSR_IL) { + DP_TBFLAG_ANY(flags, PSTATE__IL, 1); + } + + /* + * The SME exception we are testing for is raised via + * AArch64.CheckFPAdvSIMDEnabled(), as called from + * AArch32.CheckAdvSIMDOrFPEnabled(). + */ + if (el == 0 + && FIELD_EX64(env->svcr, SVCR, SM) + && (!arm_is_el2_enabled(env) + || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE))) + && arm_el_is_aa64(env, 1) + && !sme_fa64(env, el)) { + DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1); + } + + return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); +} + +static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, + ARMMMUIdx mmu_idx) +{ + CPUARMTBFlags flags = {}; + ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx); + uint64_t tcr = regime_tcr(env, mmu_idx); + uint64_t hcr = arm_hcr_el2_eff(env); + uint64_t sctlr; + int tbii, tbid; + + DP_TBFLAG_ANY(flags, AARCH64_STATE, 1); + + /* Get control bits for tagged addresses. */ + tbid = aa64_va_parameter_tbi(tcr, mmu_idx); + tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx); + + DP_TBFLAG_A64(flags, TBII, tbii); + DP_TBFLAG_A64(flags, TBID, tbid); + + if (cpu_isar_feature(aa64_sve, env_archcpu(env))) { + int sve_el = sve_exception_el(env, el); + + /* + * If either FP or SVE are disabled, translator does not need len. + * If SVE EL > FP EL, FP exception has precedence, and translator + * does not need SVE EL. Save potential re-translations by forcing + * the unneeded data to zero. + */ + if (fp_el != 0) { + if (sve_el > fp_el) { + sve_el = 0; + } + } else if (sve_el == 0) { + DP_TBFLAG_A64(flags, VL, sve_vqm1_for_el(env, el)); + } + DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el); + } + if (cpu_isar_feature(aa64_sme, env_archcpu(env))) { + int sme_el = sme_exception_el(env, el); + bool sm = FIELD_EX64(env->svcr, SVCR, SM); + + DP_TBFLAG_A64(flags, SMEEXC_EL, sme_el); + if (sme_el == 0) { + /* Similarly, do not compute SVL if SME is disabled. */ + int svl = sve_vqm1_for_el_sm(env, el, true); + DP_TBFLAG_A64(flags, SVL, svl); + if (sm) { + /* If SVE is disabled, we will not have set VL above. */ + DP_TBFLAG_A64(flags, VL, svl); + } + } + if (sm) { + DP_TBFLAG_A64(flags, PSTATE_SM, 1); + DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el)); + } + DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA)); + } + + sctlr = regime_sctlr(env, stage1); + + if (aprofile_require_alignment(env, el, sctlr)) { + DP_TBFLAG_ANY(flags, ALIGN_MEM, 1); + } + + if (arm_cpu_data_is_big_endian_a64(el, sctlr)) { + DP_TBFLAG_ANY(flags, BE_DATA, 1); + } + + if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) { + /* + * In order to save space in flags, we record only whether + * pauth is "inactive", meaning all insns are implemented as + * a nop, or "active" when some action must be performed. + * The decision of which action to take is left to a helper. + */ + if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) { + DP_TBFLAG_A64(flags, PAUTH_ACTIVE, 1); + } + } + + if (cpu_isar_feature(aa64_bti, env_archcpu(env))) { + /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */ + if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) { + DP_TBFLAG_A64(flags, BT, 1); + } + } + + if (cpu_isar_feature(aa64_lse2, env_archcpu(env))) { + if (sctlr & SCTLR_nAA) { + DP_TBFLAG_A64(flags, NAA, 1); + } + } + + /* Compute the condition for using AccType_UNPRIV for LDTR et al. */ + if (!(env->pstate & PSTATE_UAO)) { + switch (mmu_idx) { + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + /* FEAT_NV: NV,NV1 == 1,1 means we don't do UNPRIV accesses */ + if ((hcr & (HCR_NV | HCR_NV1)) != (HCR_NV | HCR_NV1)) { + DP_TBFLAG_A64(flags, UNPRIV, 1); + } + break; + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + /* + * Note that EL20_2 is gated by HCR_EL2.E2H == 1, but EL20_0 is + * gated by HCR_EL2.<E2H,TGE> == '11', and so is LDTR. + */ + if (env->cp15.hcr_el2 & HCR_TGE) { + DP_TBFLAG_A64(flags, UNPRIV, 1); + } + break; + default: + break; + } + } + + if (env->pstate & PSTATE_IL) { + DP_TBFLAG_ANY(flags, PSTATE__IL, 1); + } + + if (arm_fgt_active(env, el)) { + DP_TBFLAG_ANY(flags, FGT_ACTIVE, 1); + if (FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, ERET)) { + DP_TBFLAG_A64(flags, TRAP_ERET, 1); + } + if (fgt_svc(env, el)) { + DP_TBFLAG_ANY(flags, FGT_SVC, 1); + } + } + + /* + * ERET can also be trapped for FEAT_NV. arm_hcr_el2_eff() takes care + * of "is EL2 enabled" and the NV bit can only be set if FEAT_NV is present. + */ + if (el == 1 && (hcr & HCR_NV)) { + DP_TBFLAG_A64(flags, TRAP_ERET, 1); + DP_TBFLAG_A64(flags, NV, 1); + if (hcr & HCR_NV1) { + DP_TBFLAG_A64(flags, NV1, 1); + } + if (hcr & HCR_NV2) { + DP_TBFLAG_A64(flags, NV2, 1); + if (hcr & HCR_E2H) { + DP_TBFLAG_A64(flags, NV2_MEM_E20, 1); + } + if (env->cp15.sctlr_el[2] & SCTLR_EE) { + DP_TBFLAG_A64(flags, NV2_MEM_BE, 1); + } + } + } + + if (cpu_isar_feature(aa64_mte, env_archcpu(env))) { + /* + * Set MTE_ACTIVE if any access may be Checked, and leave clear + * if all accesses must be Unchecked: + * 1) If no TBI, then there are no tags in the address to check, + * 2) If Tag Check Override, then all accesses are Unchecked, + * 3) If Tag Check Fail == 0, then Checked access have no effect, + * 4) If no Allocation Tag Access, then all accesses are Unchecked. + */ + if (allocation_tag_access_enabled(env, el, sctlr)) { + DP_TBFLAG_A64(flags, ATA, 1); + if (tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { + DP_TBFLAG_A64(flags, MTE_ACTIVE, 1); + if (!EX_TBFLAG_A64(flags, UNPRIV)) { + /* + * In non-unpriv contexts (eg EL0), unpriv load/stores + * act like normal ones; duplicate the MTE info to + * avoid translate-a64.c having to check UNPRIV to see + * whether it is OK to index into MTE_ACTIVE[]. + */ + DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1); + } + } + } + /* And again for unprivileged accesses, if required. */ + if (EX_TBFLAG_A64(flags, UNPRIV) + && tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & SCTLR_TCF0) + && allocation_tag_access_enabled(env, 0, sctlr)) { + DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1); + } + /* + * For unpriv tag-setting accesses we also need ATA0. Again, in + * contexts where unpriv and normal insns are the same we + * duplicate the ATA bit to save effort for translate-a64.c. + */ + if (EX_TBFLAG_A64(flags, UNPRIV)) { + if (allocation_tag_access_enabled(env, 0, sctlr)) { + DP_TBFLAG_A64(flags, ATA0, 1); + } + } else { + DP_TBFLAG_A64(flags, ATA0, EX_TBFLAG_A64(flags, ATA)); + } + /* Cache TCMA as well as TBI. */ + DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); + } + + return rebuild_hflags_common(env, fp_el, mmu_idx, flags); +} + +static CPUARMTBFlags rebuild_hflags_internal(CPUARMState *env) +{ + int el = arm_current_el(env); + int fp_el = fp_exception_el(env, el); + ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); + + if (is_a64(env)) { + return rebuild_hflags_a64(env, el, fp_el, mmu_idx); + } else if (arm_feature(env, ARM_FEATURE_M)) { + return rebuild_hflags_m32(env, fp_el, mmu_idx); + } else { + return rebuild_hflags_a32(env, fp_el, mmu_idx); + } +} + +void arm_rebuild_hflags(CPUARMState *env) +{ + env->hflags = rebuild_hflags_internal(env); +} + +/* + * If we have triggered a EL state change we can't rely on the + * translator having passed it to us, we need to recompute. + */ +void HELPER(rebuild_hflags_m32_newel)(CPUARMState *env) +{ + int el = arm_current_el(env); + int fp_el = fp_exception_el(env, el); + ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); + + env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx); +} + +void HELPER(rebuild_hflags_m32)(CPUARMState *env, int el) +{ + int fp_el = fp_exception_el(env, el); + ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); + + env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx); +} + +/* + * If we have triggered a EL state change we can't rely on the + * translator having passed it to us, we need to recompute. + */ +void HELPER(rebuild_hflags_a32_newel)(CPUARMState *env) +{ + int el = arm_current_el(env); + int fp_el = fp_exception_el(env, el); + ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); + env->hflags = rebuild_hflags_a32(env, fp_el, mmu_idx); +} + +void HELPER(rebuild_hflags_a32)(CPUARMState *env, int el) +{ + int fp_el = fp_exception_el(env, el); + ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); + + env->hflags = rebuild_hflags_a32(env, fp_el, mmu_idx); +} + +void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el) +{ + int fp_el = fp_exception_el(env, el); + ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el); + + env->hflags = rebuild_hflags_a64(env, el, fp_el, mmu_idx); +} + +void assert_hflags_rebuild_correctly(CPUARMState *env) +{ +#ifdef CONFIG_DEBUG_TCG + CPUARMTBFlags c = env->hflags; + CPUARMTBFlags r = rebuild_hflags_internal(env); + + if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) { + fprintf(stderr, "TCG hflags mismatch " + "(current:(0x%08x,0x" TARGET_FMT_lx ")" + " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n", + c.flags, c.flags2, r.flags, r.flags2); + abort(); + } +#endif +} diff --git a/target/arm/iwmmxt_helper.c b/target/arm/tcg/iwmmxt_helper.c index 610b1b2103..610b1b2103 100644 --- a/target/arm/iwmmxt_helper.c +++ b/target/arm/tcg/iwmmxt_helper.c diff --git a/target/arm/m-nocp.decode b/target/arm/tcg/m-nocp.decode index b65c801c97..b65c801c97 100644 --- a/target/arm/m-nocp.decode +++ b/target/arm/tcg/m-nocp.decode diff --git a/target/arm/m_helper.c b/target/arm/tcg/m_helper.c index 47903b3dc3..d1f1e02acc 100644 --- a/target/arm/m_helper.c +++ b/target/arm/tcg/m_helper.c @@ -7,32 +7,22 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" -#include "target/arm/idau.h" -#include "trace.h" #include "cpu.h" #include "internals.h" -#include "exec/gdbstub.h" +#include "cpu-features.h" +#include "gdbstub/helpers.h" #include "exec/helper-proto.h" -#include "qemu/host-utils.h" #include "qemu/main-loop.h" #include "qemu/bitops.h" -#include "qemu/crc32c.h" -#include "qemu/qemu-print.h" +#include "qemu/log.h" #include "exec/exec-all.h" -#include <zlib.h> /* For crc32 */ -#include "semihosting/semihost.h" -#include "sysemu/cpus.h" -#include "sysemu/kvm.h" -#include "qemu/range.h" -#include "qapi/qapi-commands-machine-target.h" -#include "qapi/error.h" -#include "qemu/guest-random.h" #ifdef CONFIG_TCG -#include "arm_ldst.h" #include "exec/cpu_ldst.h" #include "semihosting/common-semi.h" #endif +#if !defined(CONFIG_USER_ONLY) +#include "hw/intc/armv7m_nvic.h" +#endif static void v7m_msr_xpsr(CPUARMState *env, uint32_t mask, uint32_t reg, uint32_t val) @@ -68,7 +58,7 @@ static uint32_t v7m_mrs_xpsr(CPUARMState *env, uint32_t reg, unsigned el) return xpsr_read(env) & mask; } -static uint32_t v7m_mrs_control(CPUARMState *env, uint32_t secure) +uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure) { uint32_t value = env->v7m.control[secure]; @@ -105,7 +95,7 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) case 0 ... 7: /* xPSR sub-fields */ return v7m_mrs_xpsr(env, reg, 0); case 20: /* CONTROL */ - return v7m_mrs_control(env, 0); + return arm_v7m_mrs_control(env, 0); default: /* Unprivileged reads others as zero. */ return 0; @@ -159,13 +149,55 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) * R: 0 because unpriv and A flag not set * SRVALID: 0 because NS * MRVALID: 0 because unpriv and A flag not set - * SREGION: 0 becaus SRVALID is 0 + * SREGION: 0 because SRVALID is 0 * MREGION: 0 because MRVALID is 0 */ return 0; } -#else +ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate) +{ + return ARMMMUIdx_MUser; +} + +#else /* !CONFIG_USER_ONLY */ + +static ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env, + bool secstate, bool priv, bool negpri) +{ + ARMMMUIdx mmu_idx = ARM_MMU_IDX_M; + + if (priv) { + mmu_idx |= ARM_MMU_IDX_M_PRIV; + } + + if (negpri) { + mmu_idx |= ARM_MMU_IDX_M_NEGPRI; + } + + if (secstate) { + mmu_idx |= ARM_MMU_IDX_M_S; + } + + return mmu_idx; +} + +static ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env, + bool secstate, bool priv) +{ + bool negpri = armv7m_nvic_neg_prio_requested(env->nvic, secstate); + + return arm_v7m_mmu_idx_all(env, secstate, priv, negpri); +} + +/* Return the MMU index for a v7M CPU in the specified security state */ +ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate) +{ + bool priv = arm_v7m_is_handler_mode(env) || + !(env->v7m.control[secstate] & 1); + + return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv); +} /* * What kind of stack write are we doing? This affects how exceptions @@ -182,19 +214,14 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, { CPUState *cs = CPU(cpu); CPUARMState *env = &cpu->env; - MemTxAttrs attrs = {}; MemTxResult txres; - target_ulong page_size; - hwaddr physaddr; - int prot; + GetPhysAddrResult res = {}; ARMMMUFaultInfo fi = {}; - ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; - if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, &cacheattrs)) { + if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &res, &fi)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { if (mode == STACK_LAZYFP) { @@ -227,8 +254,8 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, } goto pend_fault; } - address_space_stl_le(arm_addressspace(cs, attrs), physaddr, value, - attrs, &txres); + address_space_stl_le(arm_addressspace(cs, res.f.attrs), res.f.phys_addr, + value, res.f.attrs, &txres); if (txres != MEMTX_OK) { /* BusFault trying to write the data */ if (mode == STACK_LAZYFP) { @@ -275,20 +302,15 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, { CPUState *cs = CPU(cpu); CPUARMState *env = &cpu->env; - MemTxAttrs attrs = {}; MemTxResult txres; - target_ulong page_size; - hwaddr physaddr; - int prot; + GetPhysAddrResult res = {}; ARMMMUFaultInfo fi = {}; - ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; uint32_t value; - if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, &cacheattrs)) { + if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { qemu_log_mask(CPU_LOG_INT, @@ -307,8 +329,8 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, goto pend_fault; } - value = address_space_ldl(arm_addressspace(cs, attrs), physaddr, - attrs, &txres); + value = address_space_ldl(arm_addressspace(cs, res.f.attrs), + res.f.phys_addr, res.f.attrs, &txres); if (txres != MEMTX_OK) { /* BusFault trying to read the data */ qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.UNSTKERR\n"); @@ -351,8 +373,8 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) bool ts = is_secure && (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK); bool take_exception; - /* Take the iothread lock as we are going to touch the NVIC */ - qemu_mutex_lock_iothread(); + /* Take the BQL as we are going to touch the NVIC */ + bql_lock(); /* Check the background context had access to the FPU */ if (!v7m_cpacr_pass(env, is_secure, is_priv)) { @@ -406,7 +428,7 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) take_exception = !stacked_ok && armv7m_nvic_can_take_pending_exception(env->nvic); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (take_exception) { raise_exception_ra(env, EXCP_LAZYFP, 0, 1, GETPC()); @@ -563,7 +585,7 @@ void HELPER(v7m_bxns)(CPUARMState *env, uint32_t dest) env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK; } switch_v7m_security_state(env, dest & 1); - env->thumb = 1; + env->thumb = true; env->regs[15] = dest & ~1; arm_rebuild_hflags(env); } @@ -589,7 +611,7 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest) * except that the low bit doesn't indicate Thumb/not. */ env->regs[14] = nextinst; - env->thumb = 1; + env->thumb = true; env->regs[15] = dest & ~1; return; } @@ -625,47 +647,11 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest) } env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK; switch_v7m_security_state(env, 0); - env->thumb = 1; + env->thumb = true; env->regs[15] = dest; arm_rebuild_hflags(env); } -static uint32_t *get_v7m_sp_ptr(CPUARMState *env, bool secure, bool threadmode, - bool spsel) -{ - /* - * Return a pointer to the location where we currently store the - * stack pointer for the requested security state and thread mode. - * This pointer will become invalid if the CPU state is updated - * such that the stack pointers are switched around (eg changing - * the SPSEL control bit). - * Compare the v8M ARM ARM pseudocode LookUpSP_with_security_mode(). - * Unlike that pseudocode, we require the caller to pass us in the - * SPSEL control bit value; this is because we also use this - * function in handling of pushing of the callee-saves registers - * part of the v8M stack frame (pseudocode PushCalleeStack()), - * and in the tailchain codepath the SPSEL bit comes from the exception - * return magic LR value from the previous exception. The pseudocode - * opencodes the stack-selection in PushCalleeStack(), but we prefer - * to make this utility function generic enough to do the job. - */ - bool want_psp = threadmode && spsel; - - if (secure == env->v7m.secure) { - if (want_psp == v7m_using_psp(env)) { - return &env->regs[13]; - } else { - return &env->v7m.other_sp; - } - } else { - if (want_psp) { - return &env->v7m.other_ss_psp; - } else { - return &env->v7m.other_ss_msp; - } - } -} - static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure, uint32_t *pvec) { @@ -678,6 +664,10 @@ static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure, ARMMMUIdx mmu_idx; bool exc_secure; + qemu_log_mask(CPU_LOG_INT, + "...loading from element %d of %s vector table at 0x%x\n", + exc, targets_secure ? "secure" : "non-secure", addr); + mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, targets_secure, true); /* @@ -694,7 +684,8 @@ static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure, if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { V8M_SAttributes sattrs = {}; - v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, &sattrs); + v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, + targets_secure, &sattrs); if (sattrs.ns) { attrs.secure = false; } else if (!targets_secure) { @@ -718,6 +709,7 @@ static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure, goto load_fail; } *pvec = vector_entry; + qemu_log_mask(CPU_LOG_INT, "...loaded new PC 0x%x\n", *pvec); return true; load_fail: @@ -784,8 +776,8 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain, !mode; mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv); - frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode, - lr & R_V7M_EXCRET_SPSEL_MASK); + frame_sp_p = arm_v7m_get_sp_ptr(env, M_REG_S, mode, + lr & R_V7M_EXCRET_SPSEL_MASK); want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK); if (want_psp) { limit = env->v7m.psplim[M_REG_S]; @@ -898,7 +890,7 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain, } lr &= ~R_V7M_EXCRET_ES_MASK; - if (targets_secure || !arm_feature(env, ARM_FEATURE_M_SECURITY)) { + if (targets_secure) { lr |= R_V7M_EXCRET_ES_MASK; } lr &= ~R_V7M_EXCRET_SPSEL_MASK; @@ -992,7 +984,7 @@ static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr, * that we will need later in order to do lazy FP reg stacking. */ bool is_secure = env->v7m.secure; - void *nvic = env->nvic; + NVICState *nvic = env->nvic; /* * Some bits are unbanked and live always in fpccr[M_REG_S]; some bits * are banked and we want to update the bit in the bank for the @@ -1630,10 +1622,8 @@ static void do_v7m_exception_exit(ARMCPU *cpu) * use 'frame_sp_p' after we do something that makes it invalid. */ bool spsel = env->v7m.control[return_to_secure] & R_V7M_CONTROL_SPSEL_MASK; - uint32_t *frame_sp_p = get_v7m_sp_ptr(env, - return_to_secure, - !return_to_handler, - spsel); + uint32_t *frame_sp_p = arm_v7m_get_sp_ptr(env, return_to_secure, + !return_to_handler, spsel); uint32_t frameptr = *frame_sp_p; bool pop_ok = true; ARMMMUIdx mmu_idx; @@ -1930,7 +1920,7 @@ static bool do_v7m_function_return(ARMCPU *cpu) { bool threadmode, spsel; - TCGMemOpIdx oi; + MemOpIdx oi; ARMMMUIdx mmu_idx; uint32_t *frame_sp_p; uint32_t frameptr; @@ -1939,7 +1929,7 @@ static bool do_v7m_function_return(ARMCPU *cpu) threadmode = !arm_v7m_is_handler_mode(env); spsel = env->v7m.control[M_REG_S] & R_V7M_CONTROL_SPSEL_MASK; - frame_sp_p = get_v7m_sp_ptr(env, true, threadmode, spsel); + frame_sp_p = arm_v7m_get_sp_ptr(env, true, threadmode, spsel); frameptr = *frame_sp_p; /* @@ -1947,9 +1937,9 @@ static bool do_v7m_function_return(ARMCPU *cpu) * do them as secure, so work out what MMU index that is. */ mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true); - oi = make_memop_idx(MO_LE, arm_to_core_mmu_idx(mmu_idx)); - newpc = helper_le_ldul_mmu(env, frameptr, oi, 0); - newpsr = helper_le_ldul_mmu(env, frameptr + 4, oi, 0); + oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx)); + newpc = cpu_ldl_mmu(env, frameptr, oi, 0); + newpsr = cpu_ldl_mmu(env, frameptr + 4, oi, 0); /* Consistency checks on new IPSR */ newpsr_exc = newpsr & XPSR_EXCP; @@ -1984,7 +1974,7 @@ static bool do_v7m_function_return(ARMCPU *cpu) return true; } -static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, +static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool secure, uint32_t addr, uint16_t *insn) { /* @@ -2002,15 +1992,11 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, CPUState *cs = CPU(cpu); CPUARMState *env = &cpu->env; V8M_SAttributes sattrs = {}; - MemTxAttrs attrs = {}; + GetPhysAddrResult res = {}; ARMMMUFaultInfo fi = {}; - ARMCacheAttrs cacheattrs = {}; MemTxResult txres; - target_ulong page_size; - hwaddr physaddr; - int prot; - v8m_security_lookup(env, addr, MMU_INST_FETCH, mmu_idx, &sattrs); + v8m_security_lookup(env, addr, MMU_INST_FETCH, mmu_idx, secure, &sattrs); if (!sattrs.nsc || sattrs.ns) { /* * This must be the second half of the insn, and it straddles a @@ -2022,16 +2008,15 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, "...really SecureFault with SFSR.INVEP\n"); return false; } - if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, &cacheattrs)) { + if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, &res, &fi)) { /* the MPU lookup failed */ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure); qemu_log_mask(CPU_LOG_INT, "...really MemManage with CFSR.IACCVIOL\n"); return false; } - *insn = address_space_lduw_le(arm_addressspace(cs, attrs), physaddr, - attrs, &txres); + *insn = address_space_lduw_le(arm_addressspace(cs, res.f.attrs), + res.f.phys_addr, res.f.attrs, &txres); if (txres != MEMTX_OK) { env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_IBUSERR_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS, false); @@ -2054,17 +2039,12 @@ static bool v7m_read_sg_stack_word(ARMCPU *cpu, ARMMMUIdx mmu_idx, */ CPUState *cs = CPU(cpu); CPUARMState *env = &cpu->env; - MemTxAttrs attrs = {}; MemTxResult txres; - target_ulong page_size; - hwaddr physaddr; - int prot; + GetPhysAddrResult res = {}; ARMMMUFaultInfo fi = {}; - ARMCacheAttrs cacheattrs = {}; uint32_t value; - if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, &cacheattrs)) { + if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { qemu_log_mask(CPU_LOG_INT, @@ -2082,8 +2062,8 @@ static bool v7m_read_sg_stack_word(ARMCPU *cpu, ARMMMUIdx mmu_idx, } return false; } - value = address_space_ldl(arm_addressspace(cs, attrs), physaddr, - attrs, &txres); + value = address_space_ldl(arm_addressspace(cs, res.f.attrs), + res.f.phys_addr, res.f.attrs, &txres); if (txres != MEMTX_OK) { /* BusFault trying to read the data */ qemu_log_mask(CPU_LOG_INT, @@ -2121,7 +2101,7 @@ static bool v7m_handle_execute_nsc(ARMCPU *cpu) /* We want to do the MPU lookup as secure; work out what mmu_idx that is */ mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true); - if (!v7m_read_half_insn(cpu, mmu_idx, env->regs[15], &insn)) { + if (!v7m_read_half_insn(cpu, mmu_idx, true, env->regs[15], &insn)) { return false; } @@ -2137,7 +2117,7 @@ static bool v7m_handle_execute_nsc(ARMCPU *cpu) goto gen_invep; } - if (!v7m_read_half_insn(cpu, mmu_idx, env->regs[15] + 2, &insn)) { + if (!v7m_read_half_insn(cpu, mmu_idx, true, env->regs[15] + 2, &insn)) { return false; } @@ -2206,7 +2186,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) uint32_t lr; bool ignore_stackfaults; - arm_log_exception(cs->exception_index); + arm_log_exception(cs); /* * For exceptions we just mark as pending on the NVIC, and let that @@ -2266,7 +2246,13 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) * Note that for M profile we don't have a guest facing FSR, but * the env->exception.fsr will be populated by the code that * raises the fault, in the A profile short-descriptor format. + * + * Log the exception.vaddress now regardless of subtype, because + * logging below only logs it when it goes into a guest visible + * register. */ + qemu_log_mask(CPU_LOG_INT, "...at fault address 0x%x\n", + (uint32_t)env->exception.vaddress); switch (env->exception.fsr & 0xf) { case M_FAKE_FSR_NSC_EXEC: /* @@ -2361,7 +2347,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs) "...handling as semihosting call 0x%x\n", env->regs[0]); #ifdef CONFIG_TCG - env->regs[0] = do_common_semihosting(cs); + do_common_semihosting(cs); #else g_assert_not_reached(); #endif @@ -2443,7 +2429,7 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) case 0 ... 7: /* xPSR sub-fields */ return v7m_mrs_xpsr(env, reg, el); case 20: /* CONTROL */ - return v7m_mrs_control(env, env->v7m.secure); + return arm_v7m_mrs_control(env, env->v7m.secure); case 0x94: /* CONTROL_NS */ /* * We have to handle this here because unprivileged Secure code @@ -2488,11 +2474,17 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) } return env->v7m.primask[M_REG_NS]; case 0x91: /* BASEPRI_NS */ + if (!arm_feature(env, ARM_FEATURE_M_MAIN)) { + goto bad_reg; + } if (!env->v7m.secure) { return 0; } return env->v7m.basepri[M_REG_NS]; case 0x93: /* FAULTMASK_NS */ + if (!arm_feature(env, ARM_FEATURE_M_MAIN)) { + goto bad_reg; + } if (!env->v7m.secure) { return 0; } @@ -2538,8 +2530,14 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg) return env->v7m.primask[env->v7m.secure]; case 17: /* BASEPRI */ case 18: /* BASEPRI_MAX */ + if (!arm_feature(env, ARM_FEATURE_M_MAIN)) { + goto bad_reg; + } return env->v7m.basepri[env->v7m.secure]; case 19: /* FAULTMASK */ + if (!arm_feature(env, ARM_FEATURE_M_MAIN)) { + goto bad_reg; + } return env->v7m.faultmask[env->v7m.secure]; default: bad_reg: @@ -2604,13 +2602,19 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val) env->v7m.primask[M_REG_NS] = val & 1; return; case 0x91: /* BASEPRI_NS */ - if (!env->v7m.secure || !arm_feature(env, ARM_FEATURE_M_MAIN)) { + if (!arm_feature(env, ARM_FEATURE_M_MAIN)) { + goto bad_reg; + } + if (!env->v7m.secure) { return; } env->v7m.basepri[M_REG_NS] = val & 0xff; return; case 0x93: /* FAULTMASK_NS */ - if (!env->v7m.secure || !arm_feature(env, ARM_FEATURE_M_MAIN)) { + if (!arm_feature(env, ARM_FEATURE_M_MAIN)) { + goto bad_reg; + } + if (!env->v7m.secure) { return; } env->v7m.faultmask[M_REG_NS] = val & 1; @@ -2778,15 +2782,10 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) V8M_SAttributes sattrs = {}; uint32_t tt_resp; bool r, rw, nsr, nsrw, mrvalid; - int prot; - ARMMMUFaultInfo fi = {}; - MemTxAttrs attrs = {}; - hwaddr phys_addr; ARMMMUIdx mmu_idx; uint32_t mregion; bool targetpriv; bool targetsec = env->v7m.secure; - bool is_subpage; /* * Work out what the security state and privilege level we're @@ -2817,18 +2816,20 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) * inspecting the other MPU state. */ if (arm_current_el(env) != 0 || alt) { + GetPhysAddrResult res = {}; + ARMMMUFaultInfo fi = {}; + /* We can ignore the return value as prot is always set */ - pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, - &phys_addr, &attrs, &prot, &is_subpage, - &fi, &mregion); + pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, targetsec, + &res, &fi, &mregion); if (mregion == -1) { mrvalid = false; mregion = 0; } else { mrvalid = true; } - r = prot & PAGE_READ; - rw = prot & PAGE_WRITE; + r = res.f.prot & PAGE_READ; + rw = res.f.prot & PAGE_WRITE; } else { r = false; rw = false; @@ -2837,7 +2838,8 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) } if (env->v7m.secure) { - v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, &sattrs); + v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, + targetsec, &sattrs); nsr = sattrs.ns && r; nsrw = sattrs.ns && rw; } else { @@ -2863,39 +2865,38 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op) #endif /* !CONFIG_USER_ONLY */ -ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env, - bool secstate, bool priv, bool negpri) +uint32_t *arm_v7m_get_sp_ptr(CPUARMState *env, bool secure, bool threadmode, + bool spsel) { - ARMMMUIdx mmu_idx = ARM_MMU_IDX_M; - - if (priv) { - mmu_idx |= ARM_MMU_IDX_M_PRIV; - } - - if (negpri) { - mmu_idx |= ARM_MMU_IDX_M_NEGPRI; - } + /* + * Return a pointer to the location where we currently store the + * stack pointer for the requested security state and thread mode. + * This pointer will become invalid if the CPU state is updated + * such that the stack pointers are switched around (eg changing + * the SPSEL control bit). + * Compare the v8M ARM ARM pseudocode LookUpSP_with_security_mode(). + * Unlike that pseudocode, we require the caller to pass us in the + * SPSEL control bit value; this is because we also use this + * function in handling of pushing of the callee-saves registers + * part of the v8M stack frame (pseudocode PushCalleeStack()), + * and in the tailchain codepath the SPSEL bit comes from the exception + * return magic LR value from the previous exception. The pseudocode + * opencodes the stack-selection in PushCalleeStack(), but we prefer + * to make this utility function generic enough to do the job. + */ + bool want_psp = threadmode && spsel; - if (secstate) { - mmu_idx |= ARM_MMU_IDX_M_S; + if (secure == env->v7m.secure) { + if (want_psp == v7m_using_psp(env)) { + return &env->regs[13]; + } else { + return &env->v7m.other_sp; + } + } else { + if (want_psp) { + return &env->v7m.other_ss_psp; + } else { + return &env->v7m.other_ss_msp; + } } - - return mmu_idx; -} - -ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env, - bool secstate, bool priv) -{ - bool negpri = armv7m_nvic_neg_prio_requested(env->nvic, secstate); - - return arm_v7m_mmu_idx_all(env, secstate, priv, negpri); -} - -/* Return the MMU index for a v7M CPU in the specified security state */ -ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate) -{ - bool priv = arm_v7m_is_handler_mode(env) || - !(env->v7m.control[secstate] & 1); - - return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv); } diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build new file mode 100644 index 0000000000..3b1a9f0fc5 --- /dev/null +++ b/target/arm/tcg/meson.build @@ -0,0 +1,60 @@ +gen_a64 = [ + decodetree.process('a64.decode', extra_args: ['--static-decode=disas_a64']), + decodetree.process('sve.decode', extra_args: '--decode=disas_sve'), + decodetree.process('sme.decode', extra_args: '--decode=disas_sme'), + decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'), +] + +gen_a32 = [ + decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'), + decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'), + decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'), + decodetree.process('vfp.decode', extra_args: '--decode=disas_vfp'), + decodetree.process('vfp-uncond.decode', extra_args: '--decode=disas_vfp_uncond'), + decodetree.process('m-nocp.decode', extra_args: '--decode=disas_m_nocp'), + decodetree.process('mve.decode', extra_args: '--decode=disas_mve'), + decodetree.process('a32.decode', extra_args: '--static-decode=disas_a32'), + decodetree.process('a32-uncond.decode', extra_args: '--static-decode=disas_a32_uncond'), + decodetree.process('t32.decode', extra_args: '--static-decode=disas_t32'), + decodetree.process('t16.decode', extra_args: ['-w', '16', '--static-decode=disas_t16']), +] + +arm_ss.add(gen_a32) +arm_ss.add(when: 'TARGET_AARCH64', if_true: gen_a64) + +arm_ss.add(files( + 'cpu32.c', + 'translate.c', + 'translate-m-nocp.c', + 'translate-mve.c', + 'translate-neon.c', + 'translate-vfp.c', + 'crypto_helper.c', + 'hflags.c', + 'iwmmxt_helper.c', + 'm_helper.c', + 'mve_helper.c', + 'neon_helper.c', + 'op_helper.c', + 'tlb_helper.c', + 'vec_helper.c', +)) + +arm_ss.add(when: 'TARGET_AARCH64', if_true: files( + 'cpu64.c', + 'translate-a64.c', + 'translate-sve.c', + 'translate-sme.c', + 'helper-a64.c', + 'mte_helper.c', + 'pauth_helper.c', + 'sme_helper.c', + 'sve_helper.c', +)) + +arm_system_ss.add(files( + 'psci.c', +)) + +arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) +arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) diff --git a/target/arm/mte_helper.c b/target/arm/tcg/mte_helper.c index 724175210b..d971b81370 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/tcg/mte_helper.c @@ -18,12 +18,14 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "cpu.h" #include "internals.h" #include "exec/exec-all.h" #include "exec/ram_addr.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" +#include "hw/core/tcg-cpu-ops.h" #include "qapi/error.h" #include "qemu/guest-random.h" @@ -48,14 +50,14 @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) } /** - * allocation_tag_mem: + * allocation_tag_mem_probe: * @env: the cpu environment * @ptr_mmu_idx: the addressing regime to use for the virtual address * @ptr: the virtual address for which to look up tag memory * @ptr_access: the access to use for the virtual address * @ptr_size: the number of bytes in the normal memory access * @tag_access: the access to use for the tag memory - * @tag_size: the number of bytes in the tag memory access + * @probe: true to merely probe, never taking an exception * @ra: the return address for exception handling * * Our tag memory is formatted as a sequence of little-endian nibbles. @@ -64,18 +66,25 @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) * for the higher addr. * * Here, resolve the physical address from the virtual address, and return - * a pointer to the corresponding tag byte. Exit with exception if the - * virtual address is not accessible for @ptr_access. - * - * The @ptr_size and @tag_size values may not have an obvious relation - * due to the alignment of @ptr, and the number of tag checks required. + * a pointer to the corresponding tag byte. * * If there is no tag storage corresponding to @ptr, return NULL. + * + * If the page is inaccessible for @ptr_access, or has a watchpoint, there are + * three options: + * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not + * accessible, and do not take watchpoint traps. The calling code must + * handle those cases in the right priority compared to MTE traps. + * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees + * that the page is going to be accessible. We will take watchpoint traps. + * (3) probe = false, ra != 0 : non-probe -- we will take both memory access + * traps and watchpoint traps. + * (probe = true, ra != 0 is invalid and will assert.) */ -static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, - uint64_t ptr, MMUAccessType ptr_access, - int ptr_size, MMUAccessType tag_access, - int tag_size, uintptr_t ra) +static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + bool probe, uintptr_t ra) { #ifdef CONFIG_USER_ONLY uint64_t clean_ptr = useronly_clean_ptr(ptr); @@ -83,11 +92,11 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, uint8_t *tags; uintptr_t index; + assert(!(probe && ra)); + if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) { - /* SIGSEGV */ - arm_cpu_tlb_fill(env_cpu(env), ptr, ptr_size, ptr_access, - ptr_mmu_idx, false, ra); - g_assert_not_reached(); + cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access, + !(flags & PAGE_VALID), ra); } /* Require both MAP_ANON and PROT_MTE for the page. */ @@ -96,20 +105,14 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, } tags = page_get_target_data(clean_ptr); - if (tags == NULL) { - size_t alloc_size = TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1); - tags = page_alloc_target_data(clean_ptr, alloc_size); - assert(tags != NULL); - } index = extract32(ptr, LOG2_TAG_GRANULE + 1, TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1); return tags + index; #else - uintptr_t index; - CPUIOTLBEntry *iotlbentry; + CPUTLBEntryFull *full; + MemTxAttrs attrs; int in_page, flags; - ram_addr_t ptr_ra; hwaddr ptr_paddr, tag_paddr, xlat; MemoryRegion *mr; ARMASIdx tag_asi; @@ -121,34 +124,20 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, * exception for inaccessible pages, and resolves the virtual address * into the softmmu tlb. * - * When RA == 0, this is for mte_probe. The page is expected to be - * valid. Indicate to probe_access_flags no-fault, then assert that - * we received a valid page. + * When RA == 0, this is either a pure probe or a no-fault-expected probe. + * Indicate to probe_access_flags no-fault, then either return NULL + * for the pure probe, or assert that we received a valid page for the + * no-fault-expected probe. */ - flags = probe_access_flags(env, ptr, ptr_access, ptr_mmu_idx, - ra == 0, &host, ra); + flags = probe_access_full(env, ptr, 0, ptr_access, ptr_mmu_idx, + ra == 0, &host, &full, ra); + if (probe && (flags & TLB_INVALID_MASK)) { + return NULL; + } assert(!(flags & TLB_INVALID_MASK)); - /* - * Find the iotlbentry for ptr. This *must* be present in the TLB - * because we just found the mapping. - * TODO: Perhaps there should be a cputlb helper that returns a - * matching tlb entry + iotlb entry. - */ - index = tlb_index(env, ptr_mmu_idx, ptr); -# ifdef CONFIG_DEBUG_TCG - { - CPUTLBEntry *entry = tlb_entry(env, ptr_mmu_idx, ptr); - target_ulong comparator = (ptr_access == MMU_DATA_LOAD - ? entry->addr_read - : tlb_addr_write(entry)); - g_assert(tlb_hit(comparator, ptr)); - } -# endif - iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index]; - /* If the virtual page MemAttr != Tagged, access unchecked. */ - if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) { + if (full->extra.arm.pte_attrs != 0xf0) { return NULL; } @@ -164,6 +153,14 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, } /* + * Remember these values across the second lookup below, + * which may invalidate this pointer via tlb resize. + */ + ptr_paddr = full->phys_addr | (ptr & ~TARGET_PAGE_MASK); + attrs = full->attrs; + full = NULL; + + /* * The Normal memory access can extend to the next page. E.g. a single * 8-byte access to the last byte of a page will check only the last * tag on the first page. @@ -171,43 +168,26 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, */ in_page = -(ptr | TARGET_PAGE_MASK); if (unlikely(ptr_size > in_page)) { - void *ignore; - flags |= probe_access_flags(env, ptr + in_page, ptr_access, - ptr_mmu_idx, ra == 0, &ignore, ra); + flags |= probe_access_full(env, ptr + in_page, 0, ptr_access, + ptr_mmu_idx, ra == 0, &host, &full, ra); assert(!(flags & TLB_INVALID_MASK)); } /* Any debug exception has priority over a tag check exception. */ - if (unlikely(flags & TLB_WATCHPOINT)) { + if (!probe && unlikely(flags & TLB_WATCHPOINT)) { int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE; assert(ra != 0); - cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, - iotlbentry->attrs, wp, ra); + cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, attrs, wp, ra); } - /* - * Find the physical address within the normal mem space. - * The memory region lookup must succeed because TLB_MMIO was - * not set in the cputlb lookup above. - */ - mr = memory_region_from_host(host, &ptr_ra); - tcg_debug_assert(mr != NULL); - tcg_debug_assert(memory_region_is_ram(mr)); - ptr_paddr = ptr_ra; - do { - ptr_paddr += mr->addr; - mr = mr->container; - } while (mr); - /* Convert to the physical address in tag space. */ tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1); /* Look up the address in tag space. */ - tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS; + tag_asi = attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS; tag_as = cpu_get_address_space(env_cpu(env), tag_asi); mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL, - tag_access == MMU_DATA_STORE, - iotlbentry->attrs); + tag_access == MMU_DATA_STORE, attrs); /* * Note that @mr will never be NULL. If there is nothing in the address @@ -236,6 +216,15 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, #endif } +static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + uintptr_t ra) +{ + return allocation_tag_mem_probe(env, ptr_mmu_idx, ptr, ptr_access, + ptr_size, tag_access, false, ra); +} + uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) { uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16); @@ -302,13 +291,13 @@ static int load_tag1(uint64_t ptr, uint8_t *mem) uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt) { - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); uint8_t *mem; int rtag = 0; /* Trap if accessing an invalid page. */ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1, - MMU_DATA_LOAD, 1, GETPC()); + MMU_DATA_LOAD, GETPC()); /* Load if page supports tags. */ if (mem) { @@ -322,7 +311,7 @@ static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra) { if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) { arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, - cpu_mmu_index(env, false), ra); + arm_env_mmu_index(env), ra); g_assert_not_reached(); } } @@ -355,14 +344,14 @@ typedef void stg_store1(uint64_t, uint8_t *, int); static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt, uintptr_t ra, stg_store1 store1) { - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); uint8_t *mem; check_tag_aligned(env, ptr, ra); /* Trap if accessing an invalid page. */ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE, - MMU_DATA_STORE, 1, ra); + MMU_DATA_STORE, ra); /* Store if page supports tags. */ if (mem) { @@ -382,7 +371,7 @@ void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr) { - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); uintptr_t ra = GETPC(); check_tag_aligned(env, ptr, ra); @@ -392,7 +381,7 @@ void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr) static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, uintptr_t ra, stg_store1 store1) { - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); int tag = allocation_tag_from_addr(xt); uint8_t *mem1, *mem2; @@ -405,10 +394,10 @@ static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, if (ptr & TAG_GRANULE) { /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */ mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, - TAG_GRANULE, MMU_DATA_STORE, 1, ra); + TAG_GRANULE, MMU_DATA_STORE, ra); mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE, MMU_DATA_STORE, TAG_GRANULE, - MMU_DATA_STORE, 1, ra); + MMU_DATA_STORE, ra); /* Store if page(s) support tags. */ if (mem1) { @@ -420,7 +409,7 @@ static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, } else { /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */ mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, - 2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra); + 2 * TAG_GRANULE, MMU_DATA_STORE, ra); if (mem1) { tag |= tag << 4; qatomic_set(mem1, tag); @@ -440,7 +429,7 @@ void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) { - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); uintptr_t ra = GETPC(); int in_page = -(ptr | TARGET_PAGE_MASK); @@ -454,46 +443,80 @@ void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) } } -#define LDGM_STGM_SIZE (4 << GMID_EL1_BS) - uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) { - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); uintptr_t ra = GETPC(); + int gm_bs = env_archcpu(env)->gm_blocksize; + int gm_bs_bytes = 4 << gm_bs; void *tag_mem; + uint64_t ret; + int shift; - ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes); /* Trap if accessing an invalid page. */ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, - LDGM_STGM_SIZE, MMU_DATA_LOAD, - LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + gm_bs_bytes, MMU_DATA_LOAD, ra); /* The tag is squashed to zero if the page does not support tags. */ if (!tag_mem) { return 0; } - QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); /* - * We are loading 64-bits worth of tags. The ordering of elements - * within the word corresponds to a 64-bit little-endian operation. + * The ordering of elements within the word corresponds to + * a little-endian operation. Computation of shift comes from + * + * index = address<LOG2_TAG_GRANULE+3:LOG2_TAG_GRANULE> + * data<index*4+3:index*4> = tag + * + * Because of the alignment of ptr above, BS=6 has shift=0. + * All memory operations are aligned. Defer support for BS=2, + * requiring insertion or extraction of a nibble, until we + * support a cpu that requires it. */ - return ldq_le_p(tag_mem); + switch (gm_bs) { + case 3: + /* 32 bytes -> 2 tags -> 8 result bits */ + ret = *(uint8_t *)tag_mem; + break; + case 4: + /* 64 bytes -> 4 tags -> 16 result bits */ + ret = cpu_to_le16(*(uint16_t *)tag_mem); + break; + case 5: + /* 128 bytes -> 8 tags -> 32 result bits */ + ret = cpu_to_le32(*(uint32_t *)tag_mem); + break; + case 6: + /* 256 bytes -> 16 tags -> 64 result bits */ + return cpu_to_le64(*(uint64_t *)tag_mem); + default: + /* + * CPU configured with unsupported/invalid gm blocksize. + * This is detected early in arm_cpu_realizefn. + */ + g_assert_not_reached(); + } + shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4; + return ret << shift; } void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) { - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); uintptr_t ra = GETPC(); + int gm_bs = env_archcpu(env)->gm_blocksize; + int gm_bs_bytes = 4 << gm_bs; void *tag_mem; + int shift; - ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes); /* Trap if accessing an invalid page. */ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, - LDGM_STGM_SIZE, MMU_DATA_LOAD, - LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + gm_bs_bytes, MMU_DATA_LOAD, ra); /* * Tag store only happens if the page support tags, @@ -503,18 +526,36 @@ void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) return; } - QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); - /* - * We are storing 64-bits worth of tags. The ordering of elements - * within the word corresponds to a 64-bit little-endian operation. - */ - stq_le_p(tag_mem, val); + /* See LDGM for comments on BS and on shift. */ + shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4; + val >>= shift; + switch (gm_bs) { + case 3: + /* 32 bytes -> 2 tags -> 8 result bits */ + *(uint8_t *)tag_mem = val; + break; + case 4: + /* 64 bytes -> 4 tags -> 16 result bits */ + *(uint16_t *)tag_mem = cpu_to_le16(val); + break; + case 5: + /* 128 bytes -> 8 tags -> 32 result bits */ + *(uint32_t *)tag_mem = cpu_to_le32(val); + break; + case 6: + /* 256 bytes -> 16 tags -> 64 result bits */ + *(uint64_t *)tag_mem = cpu_to_le64(val); + break; + default: + /* cpu configured with unsupported gm blocksize. */ + g_assert_not_reached(); + } } void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) { uintptr_t ra = GETPC(); - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); int log2_dcz_bytes, log2_tag_bytes; intptr_t dcz_bytes, tag_bytes; uint8_t *mem; @@ -531,7 +572,7 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) ptr &= -dcz_bytes; mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, - MMU_DATA_STORE, tag_bytes, ra); + MMU_DATA_STORE, ra); if (mem) { int tag_pair = (val & 0xf) * 0x11; memset(mem, tag_pair, tag_bytes); @@ -576,8 +617,8 @@ static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr, } /* Record a tag check failure. */ -static void mte_check_fail(CPUARMState *env, uint32_t desc, - uint64_t dirty_ptr, uintptr_t ra) +void mte_check_fail(CPUARMState *env, uint32_t desc, + uint64_t dirty_ptr, uintptr_t ra) { int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); @@ -694,6 +735,55 @@ static int checkN(uint8_t *mem, int odd, int cmp, int count) } /** + * checkNrev: + * @tag: tag memory to test + * @odd: true to begin testing at tags at odd nibble + * @cmp: the tag to compare against + * @count: number of tags to test + * + * Return the number of successful tests. + * Thus a return value < @count indicates a failure. + * + * This is like checkN, but it runs backwards, checking the + * tags starting with @tag and then the tags preceding it. + * This is needed by the backwards-memory-copying operations. + */ +static int checkNrev(uint8_t *mem, int odd, int cmp, int count) +{ + int n = 0, diff; + + /* Replicate the test tag and compare. */ + cmp *= 0x11; + diff = *mem-- ^ cmp; + + if (!odd) { + goto start_even; + } + + while (1) { + /* Test odd tag. */ + if (unlikely((diff) & 0xf0)) { + break; + } + if (++n == count) { + break; + } + + start_even: + /* Test even tag. */ + if (unlikely((diff) & 0x0f)) { + break; + } + if (++n == count) { + break; + } + + diff = *mem-- ^ cmp; + } + return n; +} + +/** * mte_probe_int() - helper for mte_probe and mte_check * @env: CPU environment * @desc: MTEDESC descriptor @@ -711,8 +801,7 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, int mmu_idx, ptr_tag, bit55; uint64_t ptr_last, prev_page, next_page; uint64_t tag_first, tag_last; - uint64_t tag_byte_first, tag_byte_last; - uint32_t sizem1, tag_count, tag_size, n, c; + uint32_t sizem1, tag_count, n, c; uint8_t *mem1, *mem2; MMUAccessType type; @@ -742,19 +831,14 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE); tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; - /* Round the bounds to twice the tag granule, and compute the bytes. */ - tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); - tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE); - /* Locate the page boundaries. */ prev_page = ptr & TARGET_PAGE_MASK; next_page = prev_page + TARGET_PAGE_SIZE; if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) { /* Memory access stays on one page. */ - tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1, - MMU_DATA_LOAD, tag_size, ra); + MMU_DATA_LOAD, ra); if (!mem1) { return 1; } @@ -762,14 +846,12 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); } else { /* Memory access crosses to next page. */ - tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE); mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, - MMU_DATA_LOAD, tag_size, ra); + MMU_DATA_LOAD, ra); - tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1; mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, ptr_last - next_page + 1, - MMU_DATA_LOAD, tag_size, ra); + MMU_DATA_LOAD, ra); /* * Perform all of the comparisons. @@ -818,6 +900,24 @@ uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra) uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr) { + /* + * R_XCHFJ: Alignment check not caused by memory type is priority 1, + * higher than any translation fault. When MTE is disabled, tcg + * performs the alignment check during the code generated for the + * memory access. With MTE enabled, we must check this here before + * raising any translation fault in allocation_tag_mem. + */ + unsigned align = FIELD_EX32(desc, MTEDESC, ALIGN); + if (unlikely(align)) { + align = (1u << align) - 1; + if (unlikely(ptr & align)) { + int idx = FIELD_EX32(desc, MTEDESC, MIDX); + bool w = FIELD_EX32(desc, MTEDESC, WRITE); + MMUAccessType type = w ? MMU_DATA_STORE : MMU_DATA_LOAD; + arm_cpu_do_unaligned_access(env_cpu(env), ptr, type, idx, GETPC()); + } + } + return mte_check(env, desc, ptr, GETPC()); } @@ -879,7 +979,7 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); (void) probe_write(env, ptr, 1, mmu_idx, ra); mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, - dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); + dcz_bytes, MMU_DATA_LOAD, ra); if (!mem) { goto done; } @@ -940,3 +1040,151 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) done: return useronly_clean_ptr(ptr); } + +uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc) +{ + int mmu_idx, tag_count; + uint64_t ptr_tag, tag_first, tag_last; + void *mem; + bool w = FIELD_EX32(desc, MTEDESC, WRITE); + uint32_t n; + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + /* True probe; this will never fault */ + mem = allocation_tag_mem_probe(env, mmu_idx, ptr, + w ? MMU_DATA_STORE : MMU_DATA_LOAD, + size, MMU_DATA_LOAD, true, 0); + if (!mem) { + return size; + } + + /* + * TODO: checkN() is not designed for checks of the size we expect + * for FEAT_MOPS operations, so we should implement this differently. + * Maybe we should do something like + * if (region start and size are aligned nicely) { + * do direct loads of 64 tag bits at a time; + * } else { + * call checkN() + * } + */ + /* Round the bounds to the tag granule, and compute the number of tags. */ + ptr_tag = allocation_tag_from_addr(ptr); + tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); + tag_last = QEMU_ALIGN_DOWN(ptr + size - 1, TAG_GRANULE); + tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; + n = checkN(mem, ptr & TAG_GRANULE, ptr_tag, tag_count); + if (likely(n == tag_count)) { + return size; + } + + /* + * Failure; for the first granule, it's at @ptr. Otherwise + * it's at the first byte of the nth granule. Calculate how + * many bytes we can access without hitting that failure. + */ + if (n == 0) { + return 0; + } else { + return n * TAG_GRANULE - (ptr - tag_first); + } +} + +uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc) +{ + int mmu_idx, tag_count; + uint64_t ptr_tag, tag_first, tag_last; + void *mem; + bool w = FIELD_EX32(desc, MTEDESC, WRITE); + uint32_t n; + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + /* + * True probe; this will never fault. Note that our caller passes + * us a pointer to the end of the region, but allocation_tag_mem_probe() + * wants a pointer to the start. Because we know we don't span a page + * boundary and that allocation_tag_mem_probe() doesn't otherwise care + * about the size, pass in a size of 1 byte. This is simpler than + * adjusting the ptr to point to the start of the region and then having + * to adjust the returned 'mem' to get the end of the tag memory. + */ + mem = allocation_tag_mem_probe(env, mmu_idx, ptr, + w ? MMU_DATA_STORE : MMU_DATA_LOAD, + 1, MMU_DATA_LOAD, true, 0); + if (!mem) { + return size; + } + + /* + * TODO: checkNrev() is not designed for checks of the size we expect + * for FEAT_MOPS operations, so we should implement this differently. + * Maybe we should do something like + * if (region start and size are aligned nicely) { + * do direct loads of 64 tag bits at a time; + * } else { + * call checkN() + * } + */ + /* Round the bounds to the tag granule, and compute the number of tags. */ + ptr_tag = allocation_tag_from_addr(ptr); + tag_first = QEMU_ALIGN_DOWN(ptr - (size - 1), TAG_GRANULE); + tag_last = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); + tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; + n = checkNrev(mem, ptr & TAG_GRANULE, ptr_tag, tag_count); + if (likely(n == tag_count)) { + return size; + } + + /* + * Failure; for the first granule, it's at @ptr. Otherwise + * it's at the last byte of the nth granule. Calculate how + * many bytes we can access without hitting that failure. + */ + if (n == 0) { + return 0; + } else { + return (n - 1) * TAG_GRANULE + ((ptr + 1) - tag_last); + } +} + +void mte_mops_set_tags(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc) +{ + int mmu_idx, tag_count; + uint64_t ptr_tag; + void *mem; + + if (!desc) { + /* Tags not actually enabled */ + return; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + /* True probe: this will never fault */ + mem = allocation_tag_mem_probe(env, mmu_idx, ptr, MMU_DATA_STORE, size, + MMU_DATA_STORE, true, 0); + if (!mem) { + return; + } + + /* + * We know that ptr and size are both TAG_GRANULE aligned; store + * the tag from the pointer value into the tag memory. + */ + ptr_tag = allocation_tag_from_addr(ptr); + tag_count = size / TAG_GRANULE; + if (ptr & TAG_GRANULE) { + /* Not 2*TAG_GRANULE-aligned: store tag to first nibble */ + store_tag1_parallel(TAG_GRANULE, mem, ptr_tag); + mem++; + tag_count--; + } + memset(mem, ptr_tag | (ptr_tag << 4), tag_count / 2); + if (tag_count & 1) { + /* Final trailing unaligned nibble */ + mem += tag_count / 2; + store_tag1_parallel(0, mem, ptr_tag); + } +} diff --git a/target/arm/mve.decode b/target/arm/tcg/mve.decode index 14a4f39802..14a4f39802 100644 --- a/target/arm/mve.decode +++ b/target/arm/tcg/mve.decode diff --git a/target/arm/mve_helper.c b/target/arm/tcg/mve_helper.c index 846962bf4c..8b99736aad 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/tcg/mve_helper.c @@ -26,6 +26,7 @@ #include "exec/exec-all.h" #include "tcg/tcg.h" #include "fpu/softfloat.h" +#include "crypto/clmul.h" static uint16_t mve_eci_mask(CPUARMState *env) { @@ -726,7 +727,7 @@ static void mergemask_sb(int8_t *d, int8_t r, uint16_t mask) static void mergemask_uh(uint16_t *d, uint16_t r, uint16_t mask) { - uint16_t bmask = expand_pred_b_data[mask & 3]; + uint16_t bmask = expand_pred_b(mask); *d = (*d & ~bmask) | (r & bmask); } @@ -737,7 +738,7 @@ static void mergemask_sh(int16_t *d, int16_t r, uint16_t mask) static void mergemask_uw(uint32_t *d, uint32_t r, uint16_t mask) { - uint32_t bmask = expand_pred_b_data[mask & 0xf]; + uint32_t bmask = expand_pred_b(mask); *d = (*d & ~bmask) | (r & bmask); } @@ -748,7 +749,7 @@ static void mergemask_sw(int32_t *d, int32_t r, uint16_t mask) static void mergemask_uq(uint64_t *d, uint64_t r, uint16_t mask) { - uint64_t bmask = expand_pred_b_data[mask & 0xff]; + uint64_t bmask = expand_pred_b(mask); *d = (*d & ~bmask) | (r & bmask); } @@ -924,8 +925,8 @@ DO_1OP_IMM(vorri, DO_ORRI) bool qc = false; \ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ bool sat = false; \ - TYPE r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], &sat); \ - mergemask(&d[H##ESIZE(e)], r, mask); \ + TYPE r_ = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], &sat); \ + mergemask(&d[H##ESIZE(e)], r_, mask); \ qc |= sat & mask & 1; \ } \ if (qc) { \ @@ -984,17 +985,10 @@ DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL) * Polynomial multiply. We can always do this generating 64 bits * of the result at a time, so we don't need to use DO_2OP_L. */ -#define VMULLPH_MASK 0x00ff00ff00ff00ffULL -#define VMULLPW_MASK 0x0000ffff0000ffffULL -#define DO_VMULLPBH(N, M) pmull_h((N) & VMULLPH_MASK, (M) & VMULLPH_MASK) -#define DO_VMULLPTH(N, M) DO_VMULLPBH((N) >> 8, (M) >> 8) -#define DO_VMULLPBW(N, M) pmull_w((N) & VMULLPW_MASK, (M) & VMULLPW_MASK) -#define DO_VMULLPTW(N, M) DO_VMULLPBW((N) >> 16, (M) >> 16) - -DO_2OP(vmullpbh, 8, uint64_t, DO_VMULLPBH) -DO_2OP(vmullpth, 8, uint64_t, DO_VMULLPTH) -DO_2OP(vmullpbw, 8, uint64_t, DO_VMULLPBW) -DO_2OP(vmullptw, 8, uint64_t, DO_VMULLPTW) +DO_2OP(vmullpbh, 8, uint64_t, clmul_8x4_even) +DO_2OP(vmullpth, 8, uint64_t, clmul_8x4_odd) +DO_2OP(vmullpbw, 8, uint64_t, clmul_16x2_even) +DO_2OP(vmullptw, 8, uint64_t, clmul_16x2_odd) /* * Because the computation type is at least twice as large as required, @@ -1256,11 +1250,11 @@ DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W) #define WRAP_QRSHL_HELPER(FN, N, M, ROUND, satp) \ ({ \ uint32_t su32 = 0; \ - typeof(N) r = FN(N, (int8_t)(M), sizeof(N) * 8, ROUND, &su32); \ + typeof(N) qrshl_ret = FN(N, (int8_t)(M), sizeof(N) * 8, ROUND, &su32); \ if (su32) { \ *satp = true; \ } \ - r; \ + qrshl_ret; \ }) #define DO_SQSHL_OP(N, M, satp) \ @@ -1298,12 +1292,12 @@ DO_2OP_SAT_U(vqrshlu, DO_UQRSHL_OP) for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ bool sat = false; \ if ((e & 1) == XCHG) { \ - TYPE r = FN(n[H##ESIZE(e)], \ + TYPE vqdmladh_ret = FN(n[H##ESIZE(e)], \ m[H##ESIZE(e - XCHG)], \ n[H##ESIZE(e + (1 - 2 * XCHG))], \ m[H##ESIZE(e + (1 - XCHG))], \ ROUND, &sat); \ - mergemask(&d[H##ESIZE(e)], r, mask); \ + mergemask(&d[H##ESIZE(e)], vqdmladh_ret, mask); \ qc |= sat & mask & 1; \ } \ } \ @@ -2460,7 +2454,7 @@ static inline int64_t do_sqrshl48_d(int64_t src, int64_t shift, return extval; } } else if (shift < 48) { - int64_t extval = sextract64(src << shift, 0, 48); + extval = sextract64(src << shift, 0, 48); if (!sat || src == (extval >> shift)) { return extval; } @@ -2492,7 +2486,7 @@ static inline uint64_t do_uqrshl48_d(uint64_t src, int64_t shift, return extval; } } else if (shift < 48) { - uint64_t extval = extract64(src << shift, 0, 48); + extval = extract64(src << shift, 0, 48); if (!sat || src == (extval >> shift)) { return extval; } diff --git a/target/arm/neon-dp.decode b/target/arm/tcg/neon-dp.decode index fd3a01bfa0..fd3a01bfa0 100644 --- a/target/arm/neon-dp.decode +++ b/target/arm/tcg/neon-dp.decode diff --git a/target/arm/neon-ls.decode b/target/arm/tcg/neon-ls.decode index c5f364cbc0..c5f364cbc0 100644 --- a/target/arm/neon-ls.decode +++ b/target/arm/tcg/neon-ls.decode diff --git a/target/arm/neon-shared.decode b/target/arm/tcg/neon-shared.decode index 8e6bd0b61f..8e6bd0b61f 100644 --- a/target/arm/neon-shared.decode +++ b/target/arm/tcg/neon-shared.decode diff --git a/target/arm/neon_helper.c b/target/arm/tcg/neon_helper.c index 338b9189d5..bc6c4a54e9 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/tcg/neon_helper.c @@ -23,7 +23,7 @@ typedef struct \ { \ type v1; \ } neon_##name; -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN #define NEON_TYPE2(name, type) \ typedef struct \ { \ diff --git a/target/arm/op_helper.c b/target/arm/tcg/op_helper.c index 70b42b55fd..c199b69fbf 100644 --- a/target/arm/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -21,12 +21,29 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "internals.h" +#include "cpu-features.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" +#include "cpregs.h" #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) +int exception_target_el(CPUARMState *env) +{ + int target_el = MAX(1, arm_current_el(env)); + + /* + * No such thing as secure EL1 if EL3 is aarch32, + * so update the target EL to EL3 in this case. + */ + if (arm_is_secure(env) && !arm_el_is_aa64(env, 3) && target_el == 1) { + target_el = 3; + } + + return target_el; +} + void raise_exception(CPUARMState *env, uint32_t excp, uint32_t syndrome, uint32_t target_el) { @@ -62,7 +79,7 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, * we must restore CPU state here before setting the syndrome * the caller passed us, and cannot use cpu_loop_exit_restore(). */ - cpu_restore_state(cs, ra, true); + cpu_restore_state(cs, ra); raise_exception(env, excp, syndrome, target_el); } @@ -104,6 +121,61 @@ void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue) } } +/* Sign/zero extend */ +uint32_t HELPER(sxtb16)(uint32_t x) +{ + uint32_t res; + res = (uint16_t)(int8_t)x; + res |= (uint32_t)(int8_t)(x >> 16) << 16; + return res; +} + +static void handle_possible_div0_trap(CPUARMState *env, uintptr_t ra) +{ + /* + * Take a division-by-zero exception if necessary; otherwise return + * to get the usual non-trapping division behaviour (result of 0) + */ + if (arm_feature(env, ARM_FEATURE_M) + && (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_DIV_0_TRP_MASK)) { + raise_exception_ra(env, EXCP_DIVBYZERO, 0, 1, ra); + } +} + +uint32_t HELPER(uxtb16)(uint32_t x) +{ + uint32_t res; + res = (uint16_t)(uint8_t)x; + res |= (uint32_t)(uint8_t)(x >> 16) << 16; + return res; +} + +int32_t HELPER(sdiv)(CPUARMState *env, int32_t num, int32_t den) +{ + if (den == 0) { + handle_possible_div0_trap(env, GETPC()); + return 0; + } + if (num == INT_MIN && den == -1) { + return INT_MIN; + } + return num / den; +} + +uint32_t HELPER(udiv)(CPUARMState *env, uint32_t num, uint32_t den) +{ + if (den == 0) { + handle_possible_div0_trap(env, GETPC()); + return 0; + } + return num / den; +} + +uint32_t HELPER(rbit)(uint32_t x) +{ + return revbit32(x); +} + uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b) { uint32_t res = a + b; @@ -365,7 +437,7 @@ void HELPER(yield)(CPUARMState *env) * those EXCP values which are special cases for QEMU to interrupt * execution and not to be used for exceptions which are passed to * the guest (those must all have syndrome information and thus should - * use exception_with_syndrome). + * use exception_with_syndrome*). */ void HELPER(exception_internal)(CPUARMState *env, uint32_t excp) { @@ -377,39 +449,20 @@ void HELPER(exception_internal)(CPUARMState *env, uint32_t excp) } /* Raise an exception with the specified syndrome register value */ -void HELPER(exception_with_syndrome)(CPUARMState *env, uint32_t excp, - uint32_t syndrome, uint32_t target_el) +void HELPER(exception_with_syndrome_el)(CPUARMState *env, uint32_t excp, + uint32_t syndrome, uint32_t target_el) { raise_exception(env, excp, syndrome, target_el); } -/* Raise an EXCP_BKPT with the specified syndrome register value, - * targeting the correct exception level for debug exceptions. +/* + * Raise an exception with the specified syndrome register value + * to the default target el. */ -void HELPER(exception_bkpt_insn)(CPUARMState *env, uint32_t syndrome) +void HELPER(exception_with_syndrome)(CPUARMState *env, uint32_t excp, + uint32_t syndrome) { - int debug_el = arm_debug_target_el(env); - int cur_el = arm_current_el(env); - - /* FSR will only be used if the debug target EL is AArch32. */ - env->exception.fsr = arm_debug_exception_fsr(env); - /* FAR is UNKNOWN: clear vaddress to avoid potentially exposing - * values to the guest that it shouldn't be able to see at its - * exception/security level. - */ - env->exception.vaddress = 0; - /* - * Other kinds of architectural debug exception are ignored if - * they target an exception level below the current one (in QEMU - * this is checked by arm_generate_debug_exceptions()). Breakpoint - * instructions are special because they always generate an exception - * to somewhere: if they can't go to the configured debug exception - * level they are taken to the current exception level. - */ - if (debug_el < cur_el) { - debug_el = cur_el; - } - raise_exception(env, EXCP_BKPT, syndrome, debug_el); + raise_exception(env, excp, syndrome, exception_target_el(env)); } uint32_t HELPER(cpsr_read)(CPUARMState *env) @@ -429,9 +482,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val) { uint32_t mask; - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_pre_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); mask = aarch32_cpsr_valid_mask(env->features, &env_archcpu(env)->isar); cpsr_write(env, val, mask, CPSRWriteExceptionReturn); @@ -444,9 +497,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val) env->regs[15] &= (env->thumb ? ~1 : ~3); arm_rebuild_hflags(env); - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* Access to user mode registers from privileged modes. */ @@ -517,10 +570,24 @@ static void msr_mrs_banked_exc_checks(CPUARMState *env, uint32_t tgtmode, */ int curmode = env->uncached_cpsr & CPSR_M; - if (regno == 17) { - /* ELR_Hyp: a special case because access from tgtmode is OK */ - if (curmode != ARM_CPU_MODE_HYP && curmode != ARM_CPU_MODE_MON) { - goto undef; + if (tgtmode == ARM_CPU_MODE_HYP) { + /* + * Handle Hyp target regs first because some are special cases + * which don't want the usual "not accessible from tgtmode" check. + */ + switch (regno) { + case 16 ... 17: /* ELR_Hyp, SPSR_Hyp */ + if (curmode != ARM_CPU_MODE_HYP && curmode != ARM_CPU_MODE_MON) { + goto undef; + } + break; + case 13: + if (curmode != ARM_CPU_MODE_MON) { + goto undef; + } + break; + default: + g_assert_not_reached(); } return; } @@ -551,13 +618,6 @@ static void msr_mrs_banked_exc_checks(CPUARMState *env, uint32_t tgtmode, } } - if (tgtmode == ARM_CPU_MODE_HYP) { - /* SPSR_Hyp, r13_hyp: accessible from Monitor mode only */ - if (curmode != ARM_CPU_MODE_MON) { - goto undef; - } - } - return; undef: @@ -572,7 +632,12 @@ void HELPER(msr_banked)(CPUARMState *env, uint32_t value, uint32_t tgtmode, switch (regno) { case 16: /* SPSRs */ - env->banked_spsr[bank_number(tgtmode)] = value; + if (tgtmode == (env->uncached_cpsr & CPSR_M)) { + /* Only happens for SPSR_Hyp access in Hyp mode */ + env->spsr = value; + } else { + env->banked_spsr[bank_number(tgtmode)] = value; + } break; case 17: /* ELR_Hyp */ env->elr_el[2] = value; @@ -606,7 +671,12 @@ uint32_t HELPER(mrs_banked)(CPUARMState *env, uint32_t tgtmode, uint32_t regno) switch (regno) { case 16: /* SPSRs */ - return env->banked_spsr[bank_number(tgtmode)]; + if (tgtmode == (env->uncached_cpsr & CPSR_M)) { + /* Only happens for SPSR_Hyp access in Hyp mode */ + return env->spsr; + } else { + return env->banked_spsr[bank_number(tgtmode)]; + } case 17: /* ELR_Hyp */ return env->elr_el[2]; case 13: @@ -627,22 +697,46 @@ uint32_t HELPER(mrs_banked)(CPUARMState *env, uint32_t tgtmode, uint32_t regno) } } -void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip, uint32_t syndrome, - uint32_t isread) +const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key, + uint32_t syndrome, uint32_t isread) { - const ARMCPRegInfo *ri = rip; + ARMCPU *cpu = env_archcpu(env); + const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, key); + CPAccessResult res = CP_ACCESS_OK; int target_el; + assert(ri != NULL); + if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14 && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) { - raise_exception(env, EXCP_UDEF, syndrome, exception_target_el(env)); + res = CP_ACCESS_TRAP; + goto fail; + } + + if (ri->accessfn) { + res = ri->accessfn(env, ri, isread); } /* - * Check for an EL2 trap due to HSTR_EL2. We expect EL0 accesses - * to sysregs non accessible at EL0 to have UNDEF-ed already. + * If the access function indicates a trap from EL0 to EL1 then + * that always takes priority over the HSTR_EL2 trap. (If it indicates + * a trap to EL3, then the HSTR_EL2 trap takes priority; if it indicates + * a trap to EL2, then the syndrome is the same either way so we don't + * care whether technically the architecture says that HSTR_EL2 trap or + * the other trap takes priority. So we take the "check HSTR_EL2" path + * for all of those cases.) */ - if (!is_a64(env) && arm_current_el(env) < 2 && ri->cp == 15 && + if (res != CP_ACCESS_OK && ((res & CP_ACCESS_EL_MASK) == 0) && + arm_current_el(env) == 0) { + goto fail; + } + + /* + * HSTR_EL2 traps from EL1 are checked earlier, in generated code; + * we only need to check here for traps from EL0. + */ + if (!is_a64(env) && arm_current_el(env) == 0 && ri->cp == 15 && + arm_is_el2_enabled(env) && (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) { uint32_t mask = 1 << ri->crn; @@ -654,86 +748,150 @@ void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip, uint32_t syndrome, mask &= ~((1 << 4) | (1 << 14)); if (env->cp15.hstr_el2 & mask) { - target_el = 2; - goto exept; + res = CP_ACCESS_TRAP_EL2; + goto fail; } } - if (!ri->accessfn) { - return; + /* + * Fine-grained traps also are lower priority than undef-to-EL1, + * higher priority than trap-to-EL3, and we don't care about priority + * order with other EL2 traps because the syndrome value is the same. + */ + if (arm_fgt_active(env, arm_current_el(env))) { + uint64_t trapword = 0; + unsigned int idx = FIELD_EX32(ri->fgt, FGT, IDX); + unsigned int bitpos = FIELD_EX32(ri->fgt, FGT, BITPOS); + bool rev = FIELD_EX32(ri->fgt, FGT, REV); + bool trapbit; + + if (ri->fgt & FGT_EXEC) { + assert(idx < ARRAY_SIZE(env->cp15.fgt_exec)); + trapword = env->cp15.fgt_exec[idx]; + } else if (isread && (ri->fgt & FGT_R)) { + assert(idx < ARRAY_SIZE(env->cp15.fgt_read)); + trapword = env->cp15.fgt_read[idx]; + } else if (!isread && (ri->fgt & FGT_W)) { + assert(idx < ARRAY_SIZE(env->cp15.fgt_write)); + trapword = env->cp15.fgt_write[idx]; + } + + trapbit = extract64(trapword, bitpos, 1); + if (trapbit != rev) { + res = CP_ACCESS_TRAP_EL2; + goto fail; + } } - switch (ri->accessfn(env, ri, isread)) { - case CP_ACCESS_OK: - return; + if (likely(res == CP_ACCESS_OK)) { + return ri; + } + + fail: + switch (res & ~CP_ACCESS_EL_MASK) { case CP_ACCESS_TRAP: - target_el = exception_target_el(env); - break; - case CP_ACCESS_TRAP_EL2: - /* Requesting a trap to EL2 when we're in EL3 is - * a bug in the access function. - */ - assert(arm_current_el(env) != 3); - target_el = 2; - break; - case CP_ACCESS_TRAP_EL3: - target_el = 3; break; case CP_ACCESS_TRAP_UNCATEGORIZED: - target_el = exception_target_el(env); - syndrome = syn_uncategorized(); - break; - case CP_ACCESS_TRAP_UNCATEGORIZED_EL2: - target_el = 2; + /* Only CP_ACCESS_TRAP traps are direct to a specified EL */ + assert((res & CP_ACCESS_EL_MASK) == 0); + if (cpu_isar_feature(aa64_ids, cpu) && isread && + arm_cpreg_in_idspace(ri)) { + /* + * FEAT_IDST says this should be reported as EC_SYSTEMREGISTERTRAP, + * not EC_UNCATEGORIZED + */ + break; + } syndrome = syn_uncategorized(); break; - case CP_ACCESS_TRAP_UNCATEGORIZED_EL3: - target_el = 3; - syndrome = syn_uncategorized(); + default: + g_assert_not_reached(); + } + + target_el = res & CP_ACCESS_EL_MASK; + switch (target_el) { + case 0: + target_el = exception_target_el(env); break; - case CP_ACCESS_TRAP_FP_EL2: - target_el = 2; - /* Since we are an implementation that takes exceptions on a trapped - * conditional insn only if the insn has passed its condition code - * check, we take the IMPDEF choice to always report CV=1 COND=0xe - * (which is also the required value for AArch64 traps). - */ - syndrome = syn_fp_access_trap(1, 0xe, false); + case 2: + assert(arm_current_el(env) != 3); + assert(arm_is_el2_enabled(env)); break; - case CP_ACCESS_TRAP_FP_EL3: - target_el = 3; - syndrome = syn_fp_access_trap(1, 0xe, false); + case 3: + assert(arm_feature(env, ARM_FEATURE_EL3)); break; default: + /* No "direct" traps to EL1 */ g_assert_not_reached(); } -exept: raise_exception(env, EXCP_UDEF, syndrome, target_el); } -void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value) +const void *HELPER(lookup_cp_reg)(CPUARMState *env, uint32_t key) +{ + ARMCPU *cpu = env_archcpu(env); + const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, key); + + assert(ri != NULL); + return ri; +} + +/* + * Test for HCR_EL2.TIDCP at EL1. + * Since implementation defined registers are rare, and within QEMU + * most of them are no-op, do not waste HFLAGS space for this and + * always use a helper. + */ +void HELPER(tidcp_el1)(CPUARMState *env, uint32_t syndrome) +{ + if (arm_hcr_el2_eff(env) & HCR_TIDCP) { + raise_exception_ra(env, EXCP_UDEF, syndrome, 2, GETPC()); + } +} + +/* + * Similarly, for FEAT_TIDCP1 at EL0. + * We have already checked for the presence of the feature. + */ +void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome) +{ + /* See arm_sctlr(), but we also need the sctlr el. */ + ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); + int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; + + /* + * The bit is not valid unless the target el is aa64, but since the + * bit test is simpler perform that first and check validity after. + */ + if ((env->cp15.sctlr_el[target_el] & SCTLR_TIDCP) + && arm_el_is_aa64(env, target_el)) { + raise_exception_ra(env, EXCP_UDEF, syndrome, target_el, GETPC()); + } +} + +void HELPER(set_cp_reg)(CPUARMState *env, const void *rip, uint32_t value) { const ARMCPRegInfo *ri = rip; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); ri->writefn(env, ri, value); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { ri->writefn(env, ri, value); } } -uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip) +uint32_t HELPER(get_cp_reg)(CPUARMState *env, const void *rip) { const ARMCPRegInfo *ri = rip; uint32_t res; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); res = ri->readfn(env, ri); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { res = ri->readfn(env, ri); } @@ -741,28 +899,28 @@ uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip) return res; } -void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value) +void HELPER(set_cp_reg64)(CPUARMState *env, const void *rip, uint64_t value) { const ARMCPRegInfo *ri = rip; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); ri->writefn(env, ri, value); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { ri->writefn(env, ri, value); } } -uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip) +uint64_t HELPER(get_cp_reg64)(CPUARMState *env, const void *rip) { const ARMCPRegInfo *ri = rip; uint64_t res; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); res = ri->readfn(env, ri); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { res = ri->readfn(env, ri); } @@ -844,7 +1002,14 @@ void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome) * * Conduit SMC, valid call Trap to EL2 PSCI Call * Conduit SMC, inval call Trap to EL2 Undef insn - * Conduit not SMC Undef insn Undef insn + * Conduit not SMC Undef or trap[1] Undef insn + * + * [1] In this case: + * - if HCR_EL2.NV == 1 we must trap to EL2 + * - if HCR_EL2.NV == 0 then newer architecture revisions permit + * AArch64 (but not AArch32) to trap to EL2 as an IMPDEF choice + * - otherwise we must UNDEF + * We take the IMPDEF choice to always UNDEF if HCR_EL2.NV == 0. */ /* On ARMv8 with EL3 AArch64, SMD applies to both S and NS state. @@ -858,9 +1023,12 @@ void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome) : smd_flag && !secure; if (!arm_feature(env, ARM_FEATURE_EL3) && + !(arm_hcr_el2_eff(env) & HCR_NV) && cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) { - /* If we have no EL3 then SMC always UNDEFs and can't be - * trapped to EL2. PSCI-via-SMC is a sort of ersatz EL3 + /* + * If we have no EL3 then traditionally SMC always UNDEFs and can't be + * trapped to EL2. For nested virtualization, SMC can be trapped to + * the outer hypervisor. PSCI-via-SMC is a sort of ersatz EL3 * firmware within QEMU, and we want an EL2 guest to be able * to forbid its EL1 from making PSCI calls into QEMU's * "firmware" via HCR.TSC, so for these purposes treat @@ -972,3 +1140,46 @@ void HELPER(probe_access)(CPUARMState *env, target_ulong ptr, access_type, mmu_idx, ra); } } + +/* + * This function corresponds to AArch64.vESBOperation(). + * Note that the AArch32 version is not functionally different. + */ +void HELPER(vesb)(CPUARMState *env) +{ + /* + * The EL2Enabled() check is done inside arm_hcr_el2_eff, + * and will return HCR_EL2.VSE == 0, so nothing happens. + */ + uint64_t hcr = arm_hcr_el2_eff(env); + bool enabled = !(hcr & HCR_TGE) && (hcr & HCR_AMO); + bool pending = enabled && (hcr & HCR_VSE); + bool masked = (env->daif & PSTATE_A); + + /* If VSE pending and masked, defer the exception. */ + if (pending && masked) { + uint32_t syndrome; + + if (arm_el_is_aa64(env, 1)) { + /* Copy across IDS and ISS from VSESR. */ + syndrome = env->cp15.vsesr_el2 & 0x1ffffff; + } else { + ARMMMUFaultInfo fi = { .type = ARMFault_AsyncExternal }; + + if (extended_addresses_enabled(env)) { + syndrome = arm_fi_to_lfsc(&fi); + } else { + syndrome = arm_fi_to_sfsc(&fi); + } + /* Copy across AET and ExT from VSESR. */ + syndrome |= env->cp15.vsesr_el2 & 0xd000; + } + + /* Set VDISR_EL2.A along with the syndrome. */ + env->cp15.vdisr_el2 = syndrome | (1u << 31); + + /* Clear pending virtual SError */ + env->cp15.hcr_el2 &= ~HCR_VSE; + cpu_reset_interrupt(env_cpu(env), CPU_INTERRUPT_VSERR); + } +} diff --git a/target/arm/pauth_helper.c b/target/arm/tcg/pauth_helper.c index cd6df18150..c4b143024f 100644 --- a/target/arm/pauth_helper.c +++ b/target/arm/tcg/pauth_helper.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internals.h" +#include "cpu-features.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" @@ -96,6 +97,21 @@ static uint64_t pac_sub(uint64_t i) return o; } +static uint64_t pac_sub1(uint64_t i) +{ + static const uint8_t sub1[16] = { + 0xa, 0xd, 0xe, 0x6, 0xf, 0x7, 0x3, 0x5, + 0x9, 0x8, 0x0, 0xc, 0xb, 0x1, 0x2, 0x4, + }; + uint64_t o = 0; + int b; + + for (b = 0; b < 64; b += 4) { + o |= (uint64_t)sub1[(i >> b) & 0xf] << b; + } + return o; +} + static uint64_t pac_inv_sub(uint64_t i) { static const uint8_t inv_sub[16] = { @@ -209,7 +225,7 @@ static uint64_t tweak_inv_shuffle(uint64_t i) } static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier, - ARMPACKey key) + ARMPACKey key, bool isqarma3) { static const uint64_t RC[5] = { 0x0000000000000000ull, @@ -219,6 +235,7 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier, 0x452821E638D01377ull, }; const uint64_t alpha = 0xC0AC29B7C97C50DDull; + int iterations = isqarma3 ? 2 : 4; /* * Note that in the ARM pseudocode, key0 contains bits <127:64> * and key1 contains bits <63:0> of the 128-bit key. @@ -231,7 +248,7 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier, runningmod = modifier; workingval = data ^ key0; - for (i = 0; i <= 4; ++i) { + for (i = 0; i <= iterations; ++i) { roundkey = key1 ^ runningmod; workingval ^= roundkey; workingval ^= RC[i]; @@ -239,32 +256,48 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier, workingval = pac_cell_shuffle(workingval); workingval = pac_mult(workingval); } - workingval = pac_sub(workingval); + if (isqarma3) { + workingval = pac_sub1(workingval); + } else { + workingval = pac_sub(workingval); + } runningmod = tweak_shuffle(runningmod); } roundkey = modk0 ^ runningmod; workingval ^= roundkey; workingval = pac_cell_shuffle(workingval); workingval = pac_mult(workingval); - workingval = pac_sub(workingval); + if (isqarma3) { + workingval = pac_sub1(workingval); + } else { + workingval = pac_sub(workingval); + } workingval = pac_cell_shuffle(workingval); workingval = pac_mult(workingval); workingval ^= key1; workingval = pac_cell_inv_shuffle(workingval); - workingval = pac_inv_sub(workingval); + if (isqarma3) { + workingval = pac_sub1(workingval); + } else { + workingval = pac_inv_sub(workingval); + } workingval = pac_mult(workingval); workingval = pac_cell_inv_shuffle(workingval); workingval ^= key0; workingval ^= runningmod; - for (i = 0; i <= 4; ++i) { - workingval = pac_inv_sub(workingval); - if (i < 4) { + for (i = 0; i <= iterations; ++i) { + if (isqarma3) { + workingval = pac_sub1(workingval); + } else { + workingval = pac_inv_sub(workingval); + } + if (i < iterations) { workingval = pac_mult(workingval); workingval = pac_cell_inv_shuffle(workingval); } runningmod = tweak_inv_shuffle(runningmod); roundkey = key1 ^ runningmod; - workingval ^= RC[4 - i]; + workingval ^= RC[iterations - i]; workingval ^= roundkey; workingval ^= alpha; } @@ -282,8 +315,10 @@ static uint64_t pauth_computepac_impdef(uint64_t data, uint64_t modifier, static uint64_t pauth_computepac(CPUARMState *env, uint64_t data, uint64_t modifier, ARMPACKey key) { - if (cpu_isar_feature(aa64_pauth_arch, env_archcpu(env))) { - return pauth_computepac_architected(data, modifier, key); + if (cpu_isar_feature(aa64_pauth_qarma5, env_archcpu(env))) { + return pauth_computepac_architected(data, modifier, key, false); + } else if (cpu_isar_feature(aa64_pauth_qarma3, env_archcpu(env))) { + return pauth_computepac_architected(data, modifier, key, true); } else { return pauth_computepac_impdef(data, modifier, key); } @@ -292,8 +327,10 @@ static uint64_t pauth_computepac(CPUARMState *env, uint64_t data, static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, ARMPACKey *key, bool data) { + ARMCPU *cpu = env_archcpu(env); ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env); - ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data); + ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false); + ARMPauthFeature pauth_feature = cpu_isar_feature(pauth_feature, cpu); uint64_t pac, ext_ptr, ext, test; int bot_bit, top_bit; @@ -317,17 +354,26 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, */ test = sextract64(ptr, bot_bit, top_bit - bot_bit); if (test != 0 && test != -1) { - /* - * Note that our top_bit is one greater than the pseudocode's - * version, hence "- 2" here. - */ - pac ^= MAKE_64BIT_MASK(top_bit - 2, 1); + if (pauth_feature >= PauthFeat_2) { + /* No action required */ + } else if (pauth_feature == PauthFeat_EPAC) { + pac = 0; + } else { + /* + * Note that our top_bit is one greater than the pseudocode's + * version, hence "- 2" here. + */ + pac ^= MAKE_64BIT_MASK(top_bit - 2, 1); + } } /* * Preserve the determination between upper and lower at bit 55, * and insert pointer authentication code. */ + if (pauth_feature >= PauthFeat_2) { + pac ^= ptr; + } if (param.tbi) { ptr &= ~MAKE_64BIT_MASK(bot_bit, 55 - bot_bit + 1); pac &= MAKE_64BIT_MASK(bot_bit, 54 - bot_bit + 1); @@ -341,29 +387,56 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param) { + uint64_t mask = pauth_ptr_mask(param); + /* Note that bit 55 is used whether or not the regime has 2 ranges. */ - uint64_t extfield = sextract64(ptr, 55, 1); - int bot_pac_bit = 64 - param.tsz; - int top_pac_bit = 64 - 8 * param.tbi; + if (extract64(ptr, 55, 1)) { + return ptr | mask; + } else { + return ptr & ~mask; + } +} - return deposit64(ptr, bot_pac_bit, top_pac_bit - bot_pac_bit, extfield); +static G_NORETURN +void pauth_fail_exception(CPUARMState *env, bool data, + int keynumber, uintptr_t ra) +{ + raise_exception_ra(env, EXCP_UDEF, syn_pacfail(data, keynumber), + exception_target_el(env), ra); } static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier, - ARMPACKey *key, bool data, int keynumber) + ARMPACKey *key, bool data, int keynumber, + uintptr_t ra, bool is_combined) { + ARMCPU *cpu = env_archcpu(env); ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env); - ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data); + ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false); + ARMPauthFeature pauth_feature = cpu_isar_feature(pauth_feature, cpu); int bot_bit, top_bit; - uint64_t pac, orig_ptr, test; + uint64_t pac, orig_ptr, cmp_mask; orig_ptr = pauth_original_ptr(ptr, param); pac = pauth_computepac(env, orig_ptr, modifier, *key); bot_bit = 64 - param.tsz; top_bit = 64 - 8 * param.tbi; - test = (pac ^ ptr) & ~MAKE_64BIT_MASK(55, 1); - if (unlikely(extract64(test, bot_bit, top_bit - bot_bit))) { + cmp_mask = MAKE_64BIT_MASK(bot_bit, top_bit - bot_bit); + cmp_mask &= ~MAKE_64BIT_MASK(55, 1); + + if (pauth_feature >= PauthFeat_2) { + ARMPauthFeature fault_feature = + is_combined ? PauthFeat_FPACCOMBINED : PauthFeat_FPAC; + uint64_t result = ptr ^ (pac & cmp_mask); + + if (pauth_feature >= fault_feature + && ((result ^ sextract64(result, 55, 1)) & cmp_mask)) { + pauth_fail_exception(env, data, keynumber, ra); + } + return result; + } + + if ((pac ^ ptr) & cmp_mask) { int error_code = (keynumber << 1) | (keynumber ^ 1); if (param.tbi) { return deposit64(orig_ptr, 53, 2, error_code); @@ -377,20 +450,20 @@ static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier, static uint64_t pauth_strip(CPUARMState *env, uint64_t ptr, bool data) { ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env); - ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data); + ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false); return pauth_original_ptr(ptr, param); } -static void QEMU_NORETURN pauth_trap(CPUARMState *env, int target_el, - uintptr_t ra) +static G_NORETURN +void pauth_trap(CPUARMState *env, int target_el, uintptr_t ra) { raise_exception_ra(env, EXCP_UDEF, syn_pactrap(), target_el, ra); } static void pauth_check_trap(CPUARMState *env, int el, uintptr_t ra) { - if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) { + if (el < 2 && arm_is_el2_enabled(env)) { uint64_t hcr = arm_hcr_el2_eff(env); bool trap = !(hcr & HCR_API); if (el == 0) { @@ -464,44 +537,88 @@ uint64_t HELPER(pacga)(CPUARMState *env, uint64_t x, uint64_t y) return pac & 0xffffffff00000000ull; } -uint64_t HELPER(autia)(CPUARMState *env, uint64_t x, uint64_t y) +static uint64_t pauth_autia(CPUARMState *env, uint64_t x, uint64_t y, + uintptr_t ra, bool is_combined) { int el = arm_current_el(env); if (!pauth_key_enabled(env, el, SCTLR_EnIA)) { return x; } - pauth_check_trap(env, el, GETPC()); - return pauth_auth(env, x, y, &env->keys.apia, false, 0); + pauth_check_trap(env, el, ra); + return pauth_auth(env, x, y, &env->keys.apia, false, 0, ra, is_combined); } -uint64_t HELPER(autib)(CPUARMState *env, uint64_t x, uint64_t y) +uint64_t HELPER(autia)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autia(env, x, y, GETPC(), false); +} + +uint64_t HELPER(autia_combined)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autia(env, x, y, GETPC(), true); +} + +static uint64_t pauth_autib(CPUARMState *env, uint64_t x, uint64_t y, + uintptr_t ra, bool is_combined) { int el = arm_current_el(env); if (!pauth_key_enabled(env, el, SCTLR_EnIB)) { return x; } - pauth_check_trap(env, el, GETPC()); - return pauth_auth(env, x, y, &env->keys.apib, false, 1); + pauth_check_trap(env, el, ra); + return pauth_auth(env, x, y, &env->keys.apib, false, 1, ra, is_combined); } -uint64_t HELPER(autda)(CPUARMState *env, uint64_t x, uint64_t y) +uint64_t HELPER(autib)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autib(env, x, y, GETPC(), false); +} + +uint64_t HELPER(autib_combined)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autib(env, x, y, GETPC(), true); +} + +static uint64_t pauth_autda(CPUARMState *env, uint64_t x, uint64_t y, + uintptr_t ra, bool is_combined) { int el = arm_current_el(env); if (!pauth_key_enabled(env, el, SCTLR_EnDA)) { return x; } - pauth_check_trap(env, el, GETPC()); - return pauth_auth(env, x, y, &env->keys.apda, true, 0); + pauth_check_trap(env, el, ra); + return pauth_auth(env, x, y, &env->keys.apda, true, 0, ra, is_combined); } -uint64_t HELPER(autdb)(CPUARMState *env, uint64_t x, uint64_t y) +uint64_t HELPER(autda)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autda(env, x, y, GETPC(), false); +} + +uint64_t HELPER(autda_combined)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autda(env, x, y, GETPC(), true); +} + +static uint64_t pauth_autdb(CPUARMState *env, uint64_t x, uint64_t y, + uintptr_t ra, bool is_combined) { int el = arm_current_el(env); if (!pauth_key_enabled(env, el, SCTLR_EnDB)) { return x; } - pauth_check_trap(env, el, GETPC()); - return pauth_auth(env, x, y, &env->keys.apdb, true, 1); + pauth_check_trap(env, el, ra); + return pauth_auth(env, x, y, &env->keys.apdb, true, 1, ra, is_combined); +} + +uint64_t HELPER(autdb)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autdb(env, x, y, GETPC(), false); +} + +uint64_t HELPER(autdb_combined)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autdb(env, x, y, GETPC(), true); } uint64_t HELPER(xpaci)(CPUARMState *env, uint64_t a) diff --git a/target/arm/psci.c b/target/arm/tcg/psci.c index 6709e28013..51d2ca3d30 100644 --- a/target/arm/psci.c +++ b/target/arm/tcg/psci.c @@ -24,18 +24,17 @@ #include "sysemu/runstate.h" #include "internals.h" #include "arm-powerctl.h" +#include "target/arm/multiprocessing.h" bool arm_is_psci_call(ARMCPU *cpu, int excp_type) { - /* Return true if the r0/x0 value indicates a PSCI call and - * the exception type matches the configured PSCI conduit. This is - * called before the SMC/HVC instruction is executed, to decide whether - * we should treat it as a PSCI call or with the architecturally + /* + * Return true if the exception type matches the configured PSCI conduit. + * This is called before the SMC/HVC instruction is executed, to decide + * whether we should treat it as a PSCI call or with the architecturally * defined behaviour for an SMC or HVC (which might be UNDEF or trap * to EL2 or to EL3). */ - CPUARMState *env = &cpu->env; - uint64_t param = is_a64(env) ? env->xregs[0] : env->regs[0]; switch (excp_type) { case EXCP_HVC: @@ -52,34 +51,14 @@ bool arm_is_psci_call(ARMCPU *cpu, int excp_type) return false; } - switch (param) { - case QEMU_PSCI_0_2_FN_PSCI_VERSION: - case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE: - case QEMU_PSCI_0_2_FN_AFFINITY_INFO: - case QEMU_PSCI_0_2_FN64_AFFINITY_INFO: - case QEMU_PSCI_0_2_FN_SYSTEM_RESET: - case QEMU_PSCI_0_2_FN_SYSTEM_OFF: - case QEMU_PSCI_0_1_FN_CPU_ON: - case QEMU_PSCI_0_2_FN_CPU_ON: - case QEMU_PSCI_0_2_FN64_CPU_ON: - case QEMU_PSCI_0_1_FN_CPU_OFF: - case QEMU_PSCI_0_2_FN_CPU_OFF: - case QEMU_PSCI_0_1_FN_CPU_SUSPEND: - case QEMU_PSCI_0_2_FN_CPU_SUSPEND: - case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: - case QEMU_PSCI_0_1_FN_MIGRATE: - case QEMU_PSCI_0_2_FN_MIGRATE: - return true; - default: - return false; - } + return true; } void arm_handle_psci_call(ARMCPU *cpu) { /* * This function partially implements the logic for dispatching Power State - * Coordination Interface (PSCI) calls (as described in ARM DEN 0022B.b), + * Coordination Interface (PSCI) calls (as described in ARM DEN 0022D.b), * to the extent required for bringing up and taking down secondary cores, * and for handling reset and poweroff requests. * Additional information about the calling convention used is available in @@ -102,7 +81,7 @@ void arm_handle_psci_call(ARMCPU *cpu) } if ((param[0] & QEMU_PSCI_0_2_64BIT) && !is_a64(env)) { - ret = QEMU_PSCI_RET_INVALID_PARAMS; + ret = QEMU_PSCI_RET_NOT_SUPPORTED; goto err; } @@ -111,7 +90,7 @@ void arm_handle_psci_call(ARMCPU *cpu) ARMCPU *target_cpu; case QEMU_PSCI_0_2_FN_PSCI_VERSION: - ret = QEMU_PSCI_0_2_RET_VERSION_0_2; + ret = QEMU_PSCI_VERSION_1_1; break; case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE: ret = QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED; /* No trusted OS */ @@ -129,7 +108,7 @@ void arm_handle_psci_call(ARMCPU *cpu) } target_cpu = ARM_CPU(target_cpu_state); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); ret = target_cpu->power_state; break; default: @@ -192,12 +171,40 @@ void arm_handle_psci_call(ARMCPU *cpu) } helper_wfi(env, 4); break; + case QEMU_PSCI_1_0_FN_PSCI_FEATURES: + switch (param[1]) { + case QEMU_PSCI_0_2_FN_PSCI_VERSION: + case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case QEMU_PSCI_0_2_FN_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN64_AFFINITY_INFO: + case QEMU_PSCI_0_2_FN_SYSTEM_RESET: + case QEMU_PSCI_0_2_FN_SYSTEM_OFF: + case QEMU_PSCI_0_1_FN_CPU_ON: + case QEMU_PSCI_0_2_FN_CPU_ON: + case QEMU_PSCI_0_2_FN64_CPU_ON: + case QEMU_PSCI_0_1_FN_CPU_OFF: + case QEMU_PSCI_0_2_FN_CPU_OFF: + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: + case QEMU_PSCI_1_0_FN_PSCI_FEATURES: + if (!(param[1] & QEMU_PSCI_0_2_64BIT) || is_a64(env)) { + ret = 0; + break; + } + /* fallthrough */ + case QEMU_PSCI_0_1_FN_MIGRATE: + case QEMU_PSCI_0_2_FN_MIGRATE: + default: + ret = QEMU_PSCI_RET_NOT_SUPPORTED; + break; + } + break; case QEMU_PSCI_0_1_FN_MIGRATE: case QEMU_PSCI_0_2_FN_MIGRATE: + default: ret = QEMU_PSCI_RET_NOT_SUPPORTED; break; - default: - g_assert_not_reached(); } err: @@ -209,7 +216,7 @@ err: return; cpu_off: - ret = arm_set_cpu_off(cpu->mp_affinity); + ret = arm_set_cpu_off(arm_cpu_mp_affinity(cpu)); /* notreached */ /* sanity check in case something failed */ assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS); diff --git a/target/arm/tcg/sme-fa64.decode b/target/arm/tcg/sme-fa64.decode new file mode 100644 index 0000000000..47708ccc8d --- /dev/null +++ b/target/arm/tcg/sme-fa64.decode @@ -0,0 +1,60 @@ +# AArch64 SME allowed instruction decoding +# +# Copyright (c) 2022 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +# +# This file is processed by scripts/decodetree.py +# + +# These patterns are taken from Appendix E1.1 of DDI0616 A.a, +# Arm Architecture Reference Manual Supplement, +# The Scalable Matrix Extension (SME), for Armv9-A + +{ + [ + OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0] + OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0] + OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0] + OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0] + OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0] + OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0] + OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0] + ] + FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations +} + +{ + [ + OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar) + OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16) + OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar) + OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16) + ] + FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations +} + +FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store +FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions +FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS + +# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions +# We don't actually need to include these, as the default is OK. +# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations +# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers +# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal) +# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm) +# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset) +# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm) diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode new file mode 100644 index 0000000000..628804e37a --- /dev/null +++ b/target/arm/tcg/sme.decode @@ -0,0 +1,88 @@ +# AArch64 SME instruction descriptions +# +# Copyright (c) 2022 Linaro, Ltd +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see <http://www.gnu.org/licenses/>. + +# +# This file is processed by scripts/decodetree.py +# + +### SME Misc + +ZERO 11000000 00 001 00000000000 imm:8 + +### SME Move into/from Array + +%mova_rs 13:2 !function=plus_12 +&mova esz rs pg zr za_imm v:bool to_vec:bool + +MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \ + &mova to_vec=0 rs=%mova_rs +MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \ + &mova to_vec=0 rs=%mova_rs esz=4 + +MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \ + &mova to_vec=1 rs=%mova_rs +MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \ + &mova to_vec=1 rs=%mova_rs esz=4 + +### SME Memory + +&ldst esz rs pg rn rm za_imm v:bool st:bool + +LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ + &ldst rs=%mova_rs +LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \ + &ldst esz=4 rs=%mova_rs + +&ldstr rv rn imm +@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \ + &ldstr rv=%mova_rs + +LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr +STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr + +### SME Add Vector to Array + +&adda zad zn pm pn +@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda +@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda + +ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32 +ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32 +ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64 +ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64 + +### SME Outer Product + +&op zad zn zm pm pn sub:bool +@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op +@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op + +FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32 +FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64 + +BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32 +FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32 + +SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32 +SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32 +USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32 +UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32 + +SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64 +SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64 +USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64 +UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64 diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c new file mode 100644 index 0000000000..e2e0575039 --- /dev/null +++ b/target/arm/tcg/sme_helper.c @@ -0,0 +1,1179 @@ +/* + * ARM SME Operations + * + * Copyright (c) 2022 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "tcg/tcg-gvec-desc.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" +#include "qemu/int128.h" +#include "fpu/softfloat.h" +#include "vec_internal.h" +#include "sve_ldst_internal.h" + +void helper_set_svcr(CPUARMState *env, uint32_t val, uint32_t mask) +{ + aarch64_set_svcr(env, val, mask); +} + +void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl) +{ + uint32_t i; + + /* + * Special case clearing the entire ZA space. + * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any + * parts of the ZA storage outside of SVL. + */ + if (imm == 0xff) { + memset(env->zarray, 0, sizeof(env->zarray)); + return; + } + + /* + * Recall that ZAnH.D[m] is spread across ZA[n+8*m], + * so each row is discontiguous within ZA[]. + */ + for (i = 0; i < svl; i++) { + if (imm & (1 << (i % 8))) { + memset(&env->zarray[i], 0, svl); + } + } +} + + +/* + * When considering the ZA storage as an array of elements of + * type T, the index within that array of the Nth element of + * a vertical slice of a tile can be calculated like this, + * regardless of the size of type T. This is because the tiles + * are interleaved, so if type T is size N bytes then row 1 of + * the tile is N rows away from row 0. The division by N to + * convert a byte offset into an array index and the multiplication + * by N to convert from vslice-index-within-the-tile to + * the index within the ZA storage cancel out. + */ +#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg)) + +/* + * When doing byte arithmetic on the ZA storage, the element + * byteoff bytes away in a tile vertical slice is always this + * many bytes away in the ZA storage, regardless of the + * size of the tile element, assuming that byteoff is a multiple + * of the element size. Again this is because of the interleaving + * of the tiles. For instance if we have 1 byte per element then + * each row of the ZA storage has one byte of the vslice data, + * and (counting from 0) byte 8 goes in row 8 of the storage + * at offset (8 * row-size-in-bytes). + * If we have 8 bytes per element then each row of the ZA storage + * has 8 bytes of the data, but there are 8 interleaved tiles and + * so byte 8 of the data goes into row 1 of the tile, + * which is again row 8 of the storage, so the offset is still + * (8 * row-size-in-bytes). Similarly for other element sizes. + */ +#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg)) + + +/* + * Move Zreg vector to ZArray column. + */ +#define DO_MOVA_C(NAME, TYPE, H) \ +void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \ +{ \ + int i, oprsz = simd_oprsz(desc); \ + for (i = 0; i < oprsz; ) { \ + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \ + } \ + i += sizeof(TYPE); \ + pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +DO_MOVA_C(sme_mova_cz_b, uint8_t, H1) +DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2) +DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4) + +void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc) +{ + int i, oprsz = simd_oprsz(desc) / 8; + uint8_t *pg = vg; + uint64_t *n = vn; + uint64_t *a = za; + + for (i = 0; i < oprsz; i++) { + if (pg[H1(i)] & 1) { + a[tile_vslice_index(i)] = n[i]; + } + } +} + +void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc) +{ + int i, oprsz = simd_oprsz(desc) / 16; + uint16_t *pg = vg; + Int128 *n = vn; + Int128 *a = za; + + /* + * Int128 is used here simply to copy 16 bytes, and to simplify + * the address arithmetic. + */ + for (i = 0; i < oprsz; i++) { + if (pg[H2(i)] & 1) { + a[tile_vslice_index(i)] = n[i]; + } + } +} + +#undef DO_MOVA_C + +/* + * Move ZArray column to Zreg vector. + */ +#define DO_MOVA_Z(NAME, TYPE, H) \ +void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \ +{ \ + int i, oprsz = simd_oprsz(desc); \ + for (i = 0; i < oprsz; ) { \ + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ + do { \ + if (pg & 1) { \ + *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \ + } \ + i += sizeof(TYPE); \ + pg >>= sizeof(TYPE); \ + } while (i & 15); \ + } \ +} + +DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1) +DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2) +DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4) + +void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc) +{ + int i, oprsz = simd_oprsz(desc) / 8; + uint8_t *pg = vg; + uint64_t *d = vd; + uint64_t *a = za; + + for (i = 0; i < oprsz; i++) { + if (pg[H1(i)] & 1) { + d[i] = a[tile_vslice_index(i)]; + } + } +} + +void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc) +{ + int i, oprsz = simd_oprsz(desc) / 16; + uint16_t *pg = vg; + Int128 *d = vd; + Int128 *a = za; + + /* + * Int128 is used here simply to copy 16 bytes, and to simplify + * the address arithmetic. + */ + for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) { + if (pg[H2(i)] & 1) { + d[i] = a[tile_vslice_index(i)]; + } + } +} + +#undef DO_MOVA_Z + +/* + * Clear elements in a tile slice comprising len bytes. + */ + +typedef void ClearFn(void *ptr, size_t off, size_t len); + +static void clear_horizontal(void *ptr, size_t off, size_t len) +{ + memset(ptr + off, 0, len); +} + +static void clear_vertical_b(void *vptr, size_t off, size_t len) +{ + for (size_t i = 0; i < len; ++i) { + *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0; + } +} + +static void clear_vertical_h(void *vptr, size_t off, size_t len) +{ + for (size_t i = 0; i < len; i += 2) { + *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0; + } +} + +static void clear_vertical_s(void *vptr, size_t off, size_t len) +{ + for (size_t i = 0; i < len; i += 4) { + *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0; + } +} + +static void clear_vertical_d(void *vptr, size_t off, size_t len) +{ + for (size_t i = 0; i < len; i += 8) { + *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0; + } +} + +static void clear_vertical_q(void *vptr, size_t off, size_t len) +{ + for (size_t i = 0; i < len; i += 16) { + memset(vptr + tile_vslice_offset(i + off), 0, 16); + } +} + +/* + * Copy elements from an array into a tile slice comprising len bytes. + */ + +typedef void CopyFn(void *dst, const void *src, size_t len); + +static void copy_horizontal(void *dst, const void *src, size_t len) +{ + memcpy(dst, src, len); +} + +static void copy_vertical_b(void *vdst, const void *vsrc, size_t len) +{ + const uint8_t *src = vsrc; + uint8_t *dst = vdst; + size_t i; + + for (i = 0; i < len; ++i) { + dst[tile_vslice_index(i)] = src[i]; + } +} + +static void copy_vertical_h(void *vdst, const void *vsrc, size_t len) +{ + const uint16_t *src = vsrc; + uint16_t *dst = vdst; + size_t i; + + for (i = 0; i < len / 2; ++i) { + dst[tile_vslice_index(i)] = src[i]; + } +} + +static void copy_vertical_s(void *vdst, const void *vsrc, size_t len) +{ + const uint32_t *src = vsrc; + uint32_t *dst = vdst; + size_t i; + + for (i = 0; i < len / 4; ++i) { + dst[tile_vslice_index(i)] = src[i]; + } +} + +static void copy_vertical_d(void *vdst, const void *vsrc, size_t len) +{ + const uint64_t *src = vsrc; + uint64_t *dst = vdst; + size_t i; + + for (i = 0; i < len / 8; ++i) { + dst[tile_vslice_index(i)] = src[i]; + } +} + +static void copy_vertical_q(void *vdst, const void *vsrc, size_t len) +{ + for (size_t i = 0; i < len; i += 16) { + memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16); + } +} + +/* + * Host and TLB primitives for vertical tile slice addressing. + */ + +#define DO_LD(NAME, TYPE, HOST, TLB) \ +static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \ +{ \ + TYPE val = HOST(host); \ + *(TYPE *)(za + tile_vslice_offset(off)) = val; \ +} \ +static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \ + intptr_t off, target_ulong addr, uintptr_t ra) \ +{ \ + TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \ + *(TYPE *)(za + tile_vslice_offset(off)) = val; \ +} + +#define DO_ST(NAME, TYPE, HOST, TLB) \ +static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \ +{ \ + TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \ + HOST(host, val); \ +} \ +static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \ + intptr_t off, target_ulong addr, uintptr_t ra) \ +{ \ + TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \ + TLB(env, useronly_clean_ptr(addr), val, ra); \ +} + +/* + * The ARMVectorReg elements are stored in host-endian 64-bit units. + * For 128-bit quantities, the sequence defined by the Elem[] pseudocode + * corresponds to storing the two 64-bit pieces in little-endian order. + */ +#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \ +static inline void HNAME##_host(void *za, intptr_t off, void *host) \ +{ \ + uint64_t val0 = HOST(host), val1 = HOST(host + 8); \ + uint64_t *ptr = za + off; \ + ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \ +} \ +static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ +{ \ + HNAME##_host(za, tile_vslice_offset(off), host); \ +} \ +static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \ + target_ulong addr, uintptr_t ra) \ +{ \ + uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \ + uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \ + uint64_t *ptr = za + off; \ + ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \ +} \ +static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \ + target_ulong addr, uintptr_t ra) \ +{ \ + HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \ +} + +#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \ +static inline void HNAME##_host(void *za, intptr_t off, void *host) \ +{ \ + uint64_t *ptr = za + off; \ + HOST(host, ptr[BE]); \ + HOST(host + 8, ptr[!BE]); \ +} \ +static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \ +{ \ + HNAME##_host(za, tile_vslice_offset(off), host); \ +} \ +static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \ + target_ulong addr, uintptr_t ra) \ +{ \ + uint64_t *ptr = za + off; \ + TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \ + TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \ +} \ +static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \ + target_ulong addr, uintptr_t ra) \ +{ \ + HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \ +} + +DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra) +DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra) +DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra) +DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra) +DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra) +DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra) +DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra) + +DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra) +DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra) + +DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra) +DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra) +DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra) +DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra) +DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra) +DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra) +DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra) + +DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra) +DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra) + +#undef DO_LD +#undef DO_ST +#undef DO_LDQ +#undef DO_STQ + +/* + * Common helper for all contiguous predicated loads. + */ + +static inline QEMU_ALWAYS_INLINE +void sme_ld1(CPUARMState *env, void *za, uint64_t *vg, + const target_ulong addr, uint32_t desc, const uintptr_t ra, + const int esz, uint32_t mtedesc, bool vertical, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn, + ClearFn *clr_fn, + CopyFn *cpy_fn) +{ + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t esize = 1 << esz; + intptr_t reg_off, reg_last; + SVEContLdSt info; + void *host; + int flags; + + /* Find the active elements. */ + if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) { + /* The entire predicate was false; no load occurs. */ + clr_fn(za, 0, reg_max); + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra); + + /* Handle watchpoints for all active elements. */ + sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize, + BP_MEM_READ, ra); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mtedesc) { + sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize, + mtedesc, ra); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. Perform the load + * into scratch memory to preserve register state until the end. + */ + ARMVectorReg scratch = { }; + + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[1]; + if (reg_last < 0) { + reg_last = info.reg_off_split; + if (reg_last < 0) { + reg_last = info.reg_off_last[0]; + } + } + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + tlb_fn(env, &scratch, reg_off, addr + reg_off, ra); + } + reg_off += esize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + + cpy_fn(za, &scratch, reg_max); + return; +#endif + } + + /* The entire operation is in RAM, on valid pages. */ + + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[0]; + host = info.page[0].host; + + if (!vertical) { + memset(za, 0, reg_max); + } else if (reg_off) { + clr_fn(za, 0, reg_off); + } + + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + host_fn(za, reg_off, host + reg_off); + } else if (vertical) { + clr_fn(za, reg_off, esize); + } + reg_off += esize; + } while (reg_off <= reg_last && (reg_off & 63)); + } + + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + reg_off = info.reg_off_split; + if (unlikely(reg_off >= 0)) { + tlb_fn(env, za, reg_off, addr + reg_off, ra); + } + + reg_off = info.reg_off_first[1]; + if (unlikely(reg_off >= 0)) { + reg_last = info.reg_off_last[1]; + host = info.page[1].host; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + host_fn(za, reg_off, host + reg_off); + } else if (vertical) { + clr_fn(za, reg_off, esize); + } + reg_off += esize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } +} + +static inline QEMU_ALWAYS_INLINE +void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg, + target_ulong addr, uint32_t desc, uintptr_t ra, + const int esz, bool vertical, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn, + ClearFn *clr_fn, + CopyFn *cpy_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(mtedesc, bit55) || + tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical, + host_fn, tlb_fn, clr_fn, cpy_fn); +} + +#define DO_LD(L, END, ESZ) \ +void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \ + sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \ + clear_horizontal, copy_horizontal); \ +} \ +void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \ + sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \ + clear_vertical_##L, copy_vertical_##L); \ +} \ +void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \ + sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \ + clear_horizontal, copy_horizontal); \ +} \ +void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \ + sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \ + clear_vertical_##L, copy_vertical_##L); \ +} + +DO_LD(b, , MO_8) +DO_LD(h, _be, MO_16) +DO_LD(h, _le, MO_16) +DO_LD(s, _be, MO_32) +DO_LD(s, _le, MO_32) +DO_LD(d, _be, MO_64) +DO_LD(d, _le, MO_64) +DO_LD(q, _be, MO_128) +DO_LD(q, _le, MO_128) + +#undef DO_LD + +/* + * Common helper for all contiguous predicated stores. + */ + +static inline QEMU_ALWAYS_INLINE +void sme_st1(CPUARMState *env, void *za, uint64_t *vg, + const target_ulong addr, uint32_t desc, const uintptr_t ra, + const int esz, uint32_t mtedesc, bool vertical, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + const intptr_t reg_max = simd_oprsz(desc); + const intptr_t esize = 1 << esz; + intptr_t reg_off, reg_last; + SVEContLdSt info; + void *host; + int flags; + + /* Find the active elements. */ + if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) { + /* The entire predicate was false; no store occurs. */ + return; + } + + /* Probe the page(s). Exit with exception for any invalid page. */ + sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra); + + /* Handle watchpoints for all active elements. */ + sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize, + BP_MEM_WRITE, ra); + + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mtedesc) { + sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize, + mtedesc, ra); + } + + flags = info.page[0].flags | info.page[1].flags; + if (unlikely(flags != 0)) { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + /* + * At least one page includes MMIO. + * Any bus operation can fail with cpu_transaction_failed, + * which for ARM will raise SyncExternal. We cannot avoid + * this fault and will leave with the store incomplete. + */ + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[1]; + if (reg_last < 0) { + reg_last = info.reg_off_split; + if (reg_last < 0) { + reg_last = info.reg_off_last[0]; + } + } + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + tlb_fn(env, za, reg_off, addr + reg_off, ra); + } + reg_off += esize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + return; +#endif + } + + reg_off = info.reg_off_first[0]; + reg_last = info.reg_off_last[0]; + host = info.page[0].host; + + while (reg_off <= reg_last) { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + host_fn(za, reg_off, host + reg_off); + } + reg_off += 1 << esz; + } while (reg_off <= reg_last && (reg_off & 63)); + } + + /* + * Use the slow path to manage the cross-page misalignment. + * But we know this is RAM and cannot trap. + */ + reg_off = info.reg_off_split; + if (unlikely(reg_off >= 0)) { + tlb_fn(env, za, reg_off, addr + reg_off, ra); + } + + reg_off = info.reg_off_first[1]; + if (unlikely(reg_off >= 0)) { + reg_last = info.reg_off_last[1]; + host = info.page[1].host; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + host_fn(za, reg_off, host + reg_off); + } + reg_off += 1 << esz; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } +} + +static inline QEMU_ALWAYS_INLINE +void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr, + uint32_t desc, uintptr_t ra, int esz, bool vertical, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(mtedesc, bit55) || + tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc, + vertical, host_fn, tlb_fn); +} + +#define DO_ST(L, END, ESZ) \ +void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \ + sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \ +} \ +void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \ + sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \ +} \ +void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \ + sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \ +} \ +void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \ + sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \ +} + +DO_ST(b, , MO_8) +DO_ST(h, _be, MO_16) +DO_ST(h, _le, MO_16) +DO_ST(s, _be, MO_32) +DO_ST(s, _le, MO_32) +DO_ST(d, _be, MO_64) +DO_ST(d, _le, MO_64) +DO_ST(q, _be, MO_128) +DO_ST(q, _le, MO_128) + +#undef DO_ST + +void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn, + void *vpm, uint32_t desc) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 4; + uint64_t *pn = vpn, *pm = vpm; + uint32_t *zda = vzda, *zn = vzn; + + for (row = 0; row < oprsz; ) { + uint64_t pa = pn[row >> 4]; + do { + if (pa & 1) { + for (col = 0; col < oprsz; ) { + uint64_t pb = pm[col >> 4]; + do { + if (pb & 1) { + zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)]; + } + pb >>= 4; + } while (++col & 15); + } + } + pa >>= 4; + } while (++row & 15); + } +} + +void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn, + void *vpm, uint32_t desc) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; + uint8_t *pn = vpn, *pm = vpm; + uint64_t *zda = vzda, *zn = vzn; + + for (row = 0; row < oprsz; ++row) { + if (pn[H1(row)] & 1) { + for (col = 0; col < oprsz; ++col) { + if (pm[H1(col)] & 1) { + zda[tile_vslice_index(row) + col] += zn[col]; + } + } + } + } +} + +void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn, + void *vpm, uint32_t desc) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 4; + uint64_t *pn = vpn, *pm = vpm; + uint32_t *zda = vzda, *zn = vzn; + + for (row = 0; row < oprsz; ) { + uint64_t pa = pn[row >> 4]; + do { + if (pa & 1) { + uint32_t zn_row = zn[H4(row)]; + for (col = 0; col < oprsz; ) { + uint64_t pb = pm[col >> 4]; + do { + if (pb & 1) { + zda[tile_vslice_index(row) + H4(col)] += zn_row; + } + pb >>= 4; + } while (++col & 15); + } + } + pa >>= 4; + } while (++row & 15); + } +} + +void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn, + void *vpm, uint32_t desc) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; + uint8_t *pn = vpn, *pm = vpm; + uint64_t *zda = vzda, *zn = vzn; + + for (row = 0; row < oprsz; ++row) { + if (pn[H1(row)] & 1) { + uint64_t zn_row = zn[row]; + for (col = 0; col < oprsz; ++col) { + if (pm[H1(col)] & 1) { + zda[tile_vslice_index(row) + col] += zn_row; + } + } + } + } +} + +void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, void *vst, uint32_t desc) +{ + intptr_t row, col, oprsz = simd_maxsz(desc); + uint32_t neg = simd_data(desc) << 31; + uint16_t *pn = vpn, *pm = vpm; + float_status fpst; + + /* + * Make a copy of float_status because this operation does not + * update the cumulative fp exception status. It also produces + * default nans. + */ + fpst = *(float_status *)vst; + set_default_nan_mode(true, &fpst); + + for (row = 0; row < oprsz; ) { + uint16_t pa = pn[H2(row >> 4)]; + do { + if (pa & 1) { + void *vza_row = vza + tile_vslice_offset(row); + uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg; + + for (col = 0; col < oprsz; ) { + uint16_t pb = pm[H2(col >> 4)]; + do { + if (pb & 1) { + uint32_t *a = vza_row + H1_4(col); + uint32_t *m = vzm + H1_4(col); + *a = float32_muladd(n, *m, *a, 0, vst); + } + col += 4; + pb >>= 4; + } while (col & 15); + } + } + row += 4; + pa >>= 4; + } while (row & 15); + } +} + +void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, void *vst, uint32_t desc) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; + uint64_t neg = (uint64_t)simd_data(desc) << 63; + uint64_t *za = vza, *zn = vzn, *zm = vzm; + uint8_t *pn = vpn, *pm = vpm; + float_status fpst = *(float_status *)vst; + + set_default_nan_mode(true, &fpst); + + for (row = 0; row < oprsz; ++row) { + if (pn[H1(row)] & 1) { + uint64_t *za_row = &za[tile_vslice_index(row)]; + uint64_t n = zn[row] ^ neg; + + for (col = 0; col < oprsz; ++col) { + if (pm[H1(col)] & 1) { + uint64_t *a = &za_row[col]; + *a = float64_muladd(n, zm[col], *a, 0, &fpst); + } + } + } + } +} + +/* + * Alter PAIR as needed for controlling predicates being false, + * and for NEG on an enabled row element. + */ +static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg) +{ + /* + * The pseudocode uses a conditional negate after the conditional zero. + * It is simpler here to unconditionally negate before conditional zero. + */ + pair ^= neg; + if (!(pg & 1)) { + pair &= 0xffff0000u; + } + if (!(pg & 4)) { + pair &= 0x0000ffffu; + } + return pair; +} + +static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, + float_status *s_std, float_status *s_odd) +{ + float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std); + float64 e1c = float16_to_float64(e1 >> 16, true, s_std); + float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std); + float64 e2c = float16_to_float64(e2 >> 16, true, s_std); + float64 t64; + float32 t32; + + /* + * The ARM pseudocode function FPDot performs both multiplies + * and the add with a single rounding operation. Emulate this + * by performing the first multiply in round-to-odd, then doing + * the second multiply as fused multiply-add, and rounding to + * float32 all in one step. + */ + t64 = float64_mul(e1r, e2r, s_odd); + t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std); + + /* This conversion is exact, because we've already rounded. */ + t32 = float64_to_float32(t64, s_std); + + /* The final accumulation step is not fused. */ + return float32_add(sum, t32, s_std); +} + +void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, void *vst, uint32_t desc) +{ + intptr_t row, col, oprsz = simd_maxsz(desc); + uint32_t neg = simd_data(desc) * 0x80008000u; + uint16_t *pn = vpn, *pm = vpm; + float_status fpst_odd, fpst_std; + + /* + * Make a copy of float_status because this operation does not + * update the cumulative fp exception status. It also produces + * default nans. Make a second copy with round-to-odd -- see above. + */ + fpst_std = *(float_status *)vst; + set_default_nan_mode(true, &fpst_std); + fpst_odd = fpst_std; + set_float_rounding_mode(float_round_to_odd, &fpst_odd); + + for (row = 0; row < oprsz; ) { + uint16_t prow = pn[H2(row >> 4)]; + do { + void *vza_row = vza + tile_vslice_offset(row); + uint32_t n = *(uint32_t *)(vzn + H1_4(row)); + + n = f16mop_adj_pair(n, prow, neg); + + for (col = 0; col < oprsz; ) { + uint16_t pcol = pm[H2(col >> 4)]; + do { + if (prow & pcol & 0b0101) { + uint32_t *a = vza_row + H1_4(col); + uint32_t m = *(uint32_t *)(vzm + H1_4(col)); + + m = f16mop_adj_pair(m, pcol, 0); + *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd); + } + col += 4; + pcol >>= 4; + } while (col & 15); + } + row += 4; + prow >>= 4; + } while (row & 15); + } +} + +void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn, + void *vpm, uint32_t desc) +{ + intptr_t row, col, oprsz = simd_maxsz(desc); + uint32_t neg = simd_data(desc) * 0x80008000u; + uint16_t *pn = vpn, *pm = vpm; + + for (row = 0; row < oprsz; ) { + uint16_t prow = pn[H2(row >> 4)]; + do { + void *vza_row = vza + tile_vslice_offset(row); + uint32_t n = *(uint32_t *)(vzn + H1_4(row)); + + n = f16mop_adj_pair(n, prow, neg); + + for (col = 0; col < oprsz; ) { + uint16_t pcol = pm[H2(col >> 4)]; + do { + if (prow & pcol & 0b0101) { + uint32_t *a = vza_row + H1_4(col); + uint32_t m = *(uint32_t *)(vzm + H1_4(col)); + + m = f16mop_adj_pair(m, pcol, 0); + *a = bfdotadd(*a, n, m); + } + col += 4; + pcol >>= 4; + } while (col & 15); + } + row += 4; + prow >>= 4; + } while (row & 15); + } +} + +typedef uint32_t IMOPFn32(uint32_t, uint32_t, uint32_t, uint8_t, bool); +static inline void do_imopa_s(uint32_t *za, uint32_t *zn, uint32_t *zm, + uint8_t *pn, uint8_t *pm, + uint32_t desc, IMOPFn32 *fn) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 4; + bool neg = simd_data(desc); + + for (row = 0; row < oprsz; ++row) { + uint8_t pa = (pn[H1(row >> 1)] >> ((row & 1) * 4)) & 0xf; + uint32_t *za_row = &za[tile_vslice_index(row)]; + uint32_t n = zn[H4(row)]; + + for (col = 0; col < oprsz; ++col) { + uint8_t pb = pm[H1(col >> 1)] >> ((col & 1) * 4); + uint32_t *a = &za_row[H4(col)]; + + *a = fn(n, zm[H4(col)], *a, pa & pb, neg); + } + } +} + +typedef uint64_t IMOPFn64(uint64_t, uint64_t, uint64_t, uint8_t, bool); +static inline void do_imopa_d(uint64_t *za, uint64_t *zn, uint64_t *zm, + uint8_t *pn, uint8_t *pm, + uint32_t desc, IMOPFn64 *fn) +{ + intptr_t row, col, oprsz = simd_oprsz(desc) / 8; + bool neg = simd_data(desc); + + for (row = 0; row < oprsz; ++row) { + uint8_t pa = pn[H1(row)]; + uint64_t *za_row = &za[tile_vslice_index(row)]; + uint64_t n = zn[row]; + + for (col = 0; col < oprsz; ++col) { + uint8_t pb = pm[H1(col)]; + uint64_t *a = &za_row[col]; + + *a = fn(n, zm[col], *a, pa & pb, neg); + } + } +} + +#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \ +static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \ +{ \ + uint32_t sum = 0; \ + /* Apply P to N as a mask, making the inactive elements 0. */ \ + n &= expand_pred_b(p); \ + sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ + sum += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \ + sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ + sum += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \ + return neg ? a - sum : a + sum; \ +} + +#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \ +static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \ +{ \ + uint64_t sum = 0; \ + /* Apply P to N as a mask, making the inactive elements 0. */ \ + n &= expand_pred_h(p); \ + sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \ + sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \ + sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \ + sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \ + return neg ? a - sum : a + sum; \ +} + +DEF_IMOP_32(smopa_s, int8_t, int8_t) +DEF_IMOP_32(umopa_s, uint8_t, uint8_t) +DEF_IMOP_32(sumopa_s, int8_t, uint8_t) +DEF_IMOP_32(usmopa_s, uint8_t, int8_t) + +DEF_IMOP_64(smopa_d, int16_t, int16_t) +DEF_IMOP_64(umopa_d, uint16_t, uint16_t) +DEF_IMOP_64(sumopa_d, int16_t, uint16_t) +DEF_IMOP_64(usmopa_d, uint16_t, int16_t) + +#define DEF_IMOPH(NAME, S) \ + void HELPER(sme_##NAME##_##S)(void *vza, void *vzn, void *vzm, \ + void *vpn, void *vpm, uint32_t desc) \ + { do_imopa_##S(vza, vzn, vzm, vpn, vpm, desc, NAME##_##S); } + +DEF_IMOPH(smopa, s) +DEF_IMOPH(umopa, s) +DEF_IMOPH(sumopa, s) +DEF_IMOPH(usmopa, s) + +DEF_IMOPH(smopa, d) +DEF_IMOPH(umopa, d) +DEF_IMOPH(sumopa, d) +DEF_IMOPH(usmopa, d) diff --git a/target/arm/sve.decode b/target/arm/tcg/sve.decode index c60b9f0fec..04b6fcc0cf 100644 --- a/target/arm/sve.decode +++ b/target/arm/tcg/sve.decode @@ -449,14 +449,17 @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5 # SVE index generation (register start, register increment) INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm -### SVE Stack Allocation Group +### SVE / Streaming SVE Stack Allocation Group # SVE stack frame adjustment ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6 +ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6 ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6 +ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6 # SVE stack frame size RDVL 00000100 101 11111 01010 imm:s6 rd:5 +RDSVL 00000100 101 11111 01011 imm:s6 rd:5 ### SVE Bitwise Shift - Unpredicated Group @@ -528,8 +531,14 @@ DUPM 00000101 11 0000 dbm:13 rd:5 FCPY 00000101 .. 01 .... 110 imm:8 ..... @rdn_pg4 # SVE copy integer immediate (predicated) -CPY_m_i 00000101 .. 01 .... 01 . ........ ..... @rdn_pg4 imm=%sh8_i8s -CPY_z_i 00000101 .. 01 .... 00 . ........ ..... @rdn_pg4 imm=%sh8_i8s +{ + INVALID 00000101 00 01 ---- 01 1 -------- ----- + CPY_m_i 00000101 .. 01 .... 01 . ........ ..... @rdn_pg4 imm=%sh8_i8s +} +{ + INVALID 00000101 00 01 ---- 00 1 -------- ----- + CPY_z_i 00000101 .. 01 .... 00 . ........ ..... @rdn_pg4 imm=%sh8_i8s +} ### SVE Permute - Extract Group @@ -643,6 +652,7 @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn +REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0 # SVE vector splice (predicated, destructive) SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm @@ -787,16 +797,40 @@ WHILE_ptr 00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4 FDUP 00100101 esz:2 111 00 1110 imm:8 rd:5 # SVE broadcast integer immediate (unpredicated) -DUP_i 00100101 esz:2 111 00 011 . ........ rd:5 imm=%sh8_i8s +{ + INVALID 00100101 00 111 00 011 1 -------- ----- + DUP_i 00100101 esz:2 111 00 011 . ........ rd:5 imm=%sh8_i8s +} # SVE integer add/subtract immediate (unpredicated) -ADD_zzi 00100101 .. 100 000 11 . ........ ..... @rdn_sh_i8u -SUB_zzi 00100101 .. 100 001 11 . ........ ..... @rdn_sh_i8u -SUBR_zzi 00100101 .. 100 011 11 . ........ ..... @rdn_sh_i8u -SQADD_zzi 00100101 .. 100 100 11 . ........ ..... @rdn_sh_i8u -UQADD_zzi 00100101 .. 100 101 11 . ........ ..... @rdn_sh_i8u -SQSUB_zzi 00100101 .. 100 110 11 . ........ ..... @rdn_sh_i8u -UQSUB_zzi 00100101 .. 100 111 11 . ........ ..... @rdn_sh_i8u +{ + INVALID 00100101 00 100 000 11 1 -------- ----- + ADD_zzi 00100101 .. 100 000 11 . ........ ..... @rdn_sh_i8u +} +{ + INVALID 00100101 00 100 001 11 1 -------- ----- + SUB_zzi 00100101 .. 100 001 11 . ........ ..... @rdn_sh_i8u +} +{ + INVALID 00100101 00 100 011 11 1 -------- ----- + SUBR_zzi 00100101 .. 100 011 11 . ........ ..... @rdn_sh_i8u +} +{ + INVALID 00100101 00 100 100 11 1 -------- ----- + SQADD_zzi 00100101 .. 100 100 11 . ........ ..... @rdn_sh_i8u +} +{ + INVALID 00100101 00 100 101 11 1 -------- ----- + UQADD_zzi 00100101 .. 100 101 11 . ........ ..... @rdn_sh_i8u +} +{ + INVALID 00100101 00 100 110 11 1 -------- ----- + SQSUB_zzi 00100101 .. 100 110 11 . ........ ..... @rdn_sh_i8u +} +{ + INVALID 00100101 00 100 111 11 1 -------- ----- + UQSUB_zzi 00100101 .. 100 111 11 . ........ ..... @rdn_sh_i8u +} # SVE integer min/max immediate (unpredicated) SMAX_zzi 00100101 .. 101 000 110 ........ ..... @rdn_i8s @@ -1153,10 +1187,10 @@ LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \ @rpri_load_msz nreg=0 # SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets) -PRF 1000010 00 -1 ----- 0-- --- ----- 0 ---- +PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ---- # SVE 32-bit gather prefetch (vector plus immediate) -PRF 1000010 -- 00 ----- 111 --- ----- 0 ---- +PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ---- # SVE contiguous prefetch (scalar plus immediate) PRF 1000010 11 1- ----- 0-- --- ----- 0 ---- @@ -1193,13 +1227,13 @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \ @rpri_g_load esz=3 # SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets) -PRF 1100010 00 11 ----- 1-- --- ----- 0 ---- +PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ---- # SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets) -PRF 1100010 00 -1 ----- 0-- --- ----- 0 ---- +PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ---- # SVE 64-bit gather prefetch (vector plus immediate) -PRF 1100010 -- 00 ----- 111 --- ----- 0 ---- +PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ---- ### SVE Memory Store Group @@ -1568,17 +1602,15 @@ SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx USDOT_zzzz 01000100 .. 0 ..... 011 110 ..... ..... @rda_rn_rm ### SVE2 floating point matrix multiply accumulate -{ - BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 - FMMLA 01100100 .. 1 ..... 111 001 ..... ..... @rda_rn_rm -} +BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 +FMMLA_s 01100100 10 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 +FMMLA_d 01100100 11 1 ..... 111 001 ..... ..... @rda_rn_rm_e0 ### SVE2 Memory Gather Load Group -# SVE2 64-bit gather non-temporal load -# (scalar plus unpacked 32-bit unscaled offsets) +# SVE2 64-bit gather non-temporal load (scalar plus 64-bit unscaled offsets) LDNT1_zprz 1100010 msz:2 00 rm:5 1 u:1 0 pg:3 rn:5 rd:5 \ - &rprr_gather_load xs=0 esz=3 scale=0 ff=0 + &rprr_gather_load xs=2 esz=3 scale=0 ff=0 # SVE2 32-bit gather non-temporal load (scalar plus 32-bit unscaled offsets) LDNT1_zprz 1000010 msz:2 00 rm:5 10 u:1 pg:3 rn:5 rd:5 \ @@ -1597,8 +1629,8 @@ STNT1_zprz 1110010 .. 10 ..... 001 ... ..... ..... \ ### SVE2 Crypto Extensions # SVE2 crypto unary operations -# AESMC and AESIMC -AESMC 01000101 00 10000011100 decrypt:1 00000 rd:5 +AESMC 01000101 00 10000011100 0 00000 rd:5 +AESIMC 01000101 00 10000011100 1 00000 rd:5 # SVE2 crypto destructive binary operations AESE 01000101 00 10001 0 11100 0 ..... ..... @rdn_rm_e0 @@ -1643,3 +1675,28 @@ BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2 ### SVE2 floating-point bfloat16 dot-product (indexed) BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2 + +### SVE broadcast predicate element + +&psel esz pd pn pm rv imm +%psel_rv 16:2 !function=plus_12 +%psel_imm_b 22:2 19:2 +%psel_imm_h 22:2 20:1 +%psel_imm_s 22:2 +%psel_imm_d 23:1 +@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \ + &psel rv=%psel_rv + +PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \ + @psel esz=0 imm=%psel_imm_b +PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \ + @psel esz=1 imm=%psel_imm_h +PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \ + @psel esz=2 imm=%psel_imm_s +PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \ + @psel esz=3 imm=%psel_imm_d + +### SVE clamp + +SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm +UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm diff --git a/target/arm/sve_helper.c b/target/arm/tcg/sve_helper.c index dab5f1d1cd..6853f58c19 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -21,12 +21,13 @@ #include "cpu.h" #include "internals.h" #include "exec/exec-all.h" -#include "exec/cpu_ldst.h" #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" #include "tcg/tcg.h" #include "vec_internal.h" +#include "sve_ldst_internal.h" +#include "hw/core/tcg-cpu-ops.h" /* Return a value for NZCV as per the ARM PredTest pseudofunction. @@ -103,44 +104,6 @@ uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words) return flags; } -/* - * Expand active predicate bits to bytes, for byte elements. - * (The data table itself is in vec_helper.c as MVE also needs it.) - */ -static inline uint64_t expand_pred_b(uint8_t byte) -{ - return expand_pred_b_data[byte]; -} - -/* Similarly for half-word elements. - * for (i = 0; i < 256; ++i) { - * unsigned long m = 0; - * if (i & 0xaa) { - * continue; - * } - * for (j = 0; j < 8; j += 2) { - * if ((i >> j) & 1) { - * m |= 0xfffful << (j << 3); - * } - * } - * printf("[0x%x] = 0x%016lx,\n", i, m); - * } - */ -static inline uint64_t expand_pred_h(uint8_t byte) -{ - static const uint64_t word[] = { - [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000, - [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000, - [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000, - [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000, - [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000, - [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000, - [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000, - [0x55] = 0xffffffffffffffff, - }; - return word[byte & 0x55]; -} - /* Similarly for single word elements. */ static inline uint64_t expand_pred_s(uint8_t byte) { @@ -969,6 +932,22 @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64) DO_ZPZ_D(sve_revw_d, uint64_t, wswap64) +void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t *d = vd, *n = vn; + uint8_t *pg = vg; + + for (i = 0; i < opr_sz; i += 2) { + if (pg[H1(i)] & 1) { + uint64_t n0 = n[i + 0]; + uint64_t n1 = n[i + 1]; + d[i + 0] = n1; + d[i + 1] = n0; + } + } +} + DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8) DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16) DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32) @@ -2802,7 +2781,7 @@ static void swap_memmove(void *vd, void *vs, size_t n) uintptr_t o = (d | s | n) & 7; size_t i; -#ifndef HOST_WORDS_BIGENDIAN +#if !HOST_BIG_ENDIAN o = 0; #endif switch (o) { @@ -2864,7 +2843,7 @@ static void swap_memzero(void *vd, size_t n) return; } -#ifndef HOST_WORDS_BIGENDIAN +#if !HOST_BIG_ENDIAN o = 0; #endif switch (o) { @@ -3382,19 +3361,21 @@ void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc) void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ { \ intptr_t oprsz = simd_oprsz(desc); \ + intptr_t odd_ofs = simd_data(desc); \ intptr_t i, oprsz_2 = oprsz / 2; \ ARMVectorReg tmp_n, tmp_m; \ /* We produce output faster than we consume input. \ Therefore we must be mindful of possible overlap. */ \ if (unlikely((vn - vd) < (uintptr_t)oprsz)) { \ - vn = memcpy(&tmp_n, vn, oprsz_2); \ + vn = memcpy(&tmp_n, vn, oprsz); \ } \ if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \ - vm = memcpy(&tmp_m, vm, oprsz_2); \ + vm = memcpy(&tmp_m, vm, oprsz); \ } \ for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \ - *(TYPE *)(vd + H(2 * i + 0)) = *(TYPE *)(vn + H(i)); \ - *(TYPE *)(vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)(vm + H(i)); \ + *(TYPE *)(vd + H(2 * i + 0)) = *(TYPE *)(vn + odd_ofs + H(i)); \ + *(TYPE *)(vd + H(2 * i + sizeof(TYPE))) = \ + *(TYPE *)(vm + odd_ofs + H(i)); \ } \ if (sizeof(TYPE) == 16 && unlikely(oprsz & 16)) { \ memset(vd + oprsz - 16, 0, 16); \ @@ -3601,6 +3582,18 @@ void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm, } } +void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm, + void *vg, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc) / 16; + Int128 *d = vd, *n = vn, *m = vm; + uint16_t *pg = vg; + + for (i = 0; i < opr_sz; i += 1) { + d[i] = (pg[H2(i)] & 1 ? n : m)[i]; + } +} + /* Two operand comparison controlled by a predicate. * ??? It is very tempting to want to be able to expand this inline * with x86 instructions, e.g. @@ -5300,111 +5293,6 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va, */ /* - * Load one element into @vd + @reg_off from @host. - * The controlling predicate is known to be true. - */ -typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); - -/* - * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). - * The controlling predicate is known to be true. - */ -typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, - target_ulong vaddr, uintptr_t retaddr); - -/* - * Generate the above primitives. - */ - -#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ -static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ -{ \ - TYPEM val = HOST(host); \ - *(TYPEE *)(vd + H(reg_off)) = val; \ -} - -#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ -static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ -{ HOST(host, (TYPEM)*(TYPEE *)(vd + H(reg_off))); } - -#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, uintptr_t ra) \ -{ \ - *(TYPEE *)(vd + H(reg_off)) = \ - (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \ -} - -#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ -static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ - target_ulong addr, uintptr_t ra) \ -{ \ - TLB(env, useronly_clean_ptr(addr), \ - (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ -} - -#define DO_LD_PRIM_1(NAME, H, TE, TM) \ - DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ - DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) - -DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) -DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) -DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) -DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) -DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) -DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t) -DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t) - -#define DO_ST_PRIM_1(NAME, H, TE, TM) \ - DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ - DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) - -DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) -DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) -DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) -DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t) - -#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ - DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ - DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ - DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ - DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) - -#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ - DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ - DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ - DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ - DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) - -DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) -DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) -DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) -DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw) -DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw) - -DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) -DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) -DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw) - -DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) -DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl) -DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl) - -DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) -DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl) - -DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq) -DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) - -#undef DO_LD_TLB -#undef DO_ST_TLB -#undef DO_LD_HOST -#undef DO_LD_PRIM_1 -#undef DO_ST_PRIM_1 -#undef DO_LD_PRIM_2 -#undef DO_ST_PRIM_2 - -/* * Skip through a sequence of inactive elements in the guarding predicate @vg, * beginning at @reg_off bounded by @reg_max. Return the offset of the active * element >= @reg_off, or @reg_max if there were no active elements at all. @@ -5444,16 +5332,9 @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off, * exit via page fault exception. */ -typedef struct { - void *host; - int flags; - MemTxAttrs attrs; -} SVEHostPage; - -static bool sve_probe_page(SVEHostPage *info, bool nofault, - CPUARMState *env, target_ulong addr, - int mem_off, MMUAccessType access_type, - int mmu_idx, uintptr_t retaddr) +bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env, + target_ulong addr, int mem_off, MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) { int flags; @@ -5471,8 +5352,14 @@ static bool sve_probe_page(SVEHostPage *info, bool nofault, */ addr = useronly_clean_ptr(addr); - flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, +#ifdef CONFIG_USER_ONLY + flags = probe_access_flags(env, addr, 0, access_type, mmu_idx, nofault, &info->host, retaddr); +#else + CPUTLBEntryFull *full; + flags = probe_access_full(env, addr, 0, access_type, mmu_idx, nofault, + &info->host, &full, retaddr); +#endif info->flags = flags; if (flags & TLB_INVALID_MASK) { @@ -5480,88 +5367,27 @@ static bool sve_probe_page(SVEHostPage *info, bool nofault, return false; } - /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ - info->host -= mem_off; - #ifdef CONFIG_USER_ONLY memset(&info->attrs, 0, sizeof(info->attrs)); + /* Require both ANON and MTE; see allocation_tag_mem(). */ + info->tagged = (flags & PAGE_ANON) && (flags & PAGE_MTE); #else - /* - * Find the iotlbentry for addr and return the transaction attributes. - * This *must* be present in the TLB because we just found the mapping. - */ - { - uintptr_t index = tlb_index(env, mmu_idx, addr); - -# ifdef CONFIG_DEBUG_TCG - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); - target_ulong comparator = (access_type == MMU_DATA_LOAD - ? entry->addr_read - : tlb_addr_write(entry)); - g_assert(tlb_hit(comparator, addr)); -# endif - - CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index]; - info->attrs = iotlbentry->attrs; - } + info->attrs = full->attrs; + info->tagged = full->extra.arm.pte_attrs == 0xf0; #endif + /* Ensure that info->host[] is relative to addr, not addr + mem_off. */ + info->host -= mem_off; return true; } - -/* - * Analyse contiguous data, protected by a governing predicate. - */ - -typedef enum { - FAULT_NO, - FAULT_FIRST, - FAULT_ALL, -} SVEContFault; - -typedef struct { - /* - * First and last element wholly contained within the two pages. - * mem_off_first[0] and reg_off_first[0] are always set >= 0. - * reg_off_last[0] may be < 0 if the first element crosses pages. - * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] - * are set >= 0 only if there are complete elements on a second page. - * - * The reg_off_* offsets are relative to the internal vector register. - * The mem_off_first offset is relative to the memory address; the - * two offsets are different when a load operation extends, a store - * operation truncates, or for multi-register operations. - */ - int16_t mem_off_first[2]; - int16_t reg_off_first[2]; - int16_t reg_off_last[2]; - - /* - * One element that is misaligned and spans both pages, - * or -1 if there is no such active element. - */ - int16_t mem_off_split; - int16_t reg_off_split; - - /* - * The byte offset at which the entire operation crosses a page boundary. - * Set >= 0 if and only if the entire operation spans two pages. - */ - int16_t page_split; - - /* TLB data for the two pages. */ - SVEHostPage page[2]; -} SVEContLdSt; - /* * Find first active element on each page, and a loose bound for the * final element on each page. Identify any single element that spans * the page boundary. Return true if there are any active elements. */ -static bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, - uint64_t *vg, intptr_t reg_max, - int esz, int msize) +bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg, + intptr_t reg_max, int esz, int msize) { const int esize = 1 << esz; const uint64_t pg_mask = pred_esz_masks[esz]; @@ -5651,11 +5477,11 @@ static bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, * Control the generation of page faults with @fault. Return false if * there is no work to do, which can only happen with @fault == FAULT_NO. */ -static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, - CPUARMState *env, target_ulong addr, - MMUAccessType access_type, uintptr_t retaddr) +bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, + CPUARMState *env, target_ulong addr, + MMUAccessType access_type, uintptr_t retaddr) { - int mmu_idx = cpu_mmu_index(env, false); + int mmu_idx = arm_env_mmu_index(env); int mem_off = info->mem_off_first[0]; bool nofault = fault == FAULT_NO; bool have_work = true; @@ -5709,12 +5535,12 @@ static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, return have_work; } -static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, - int esize, int msize, int wp_access, - uintptr_t retaddr) -{ #ifndef CONFIG_USER_ONLY +void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, int wp_access, + uintptr_t retaddr) +{ intptr_t mem_off, reg_off, reg_last; int flags0 = info->page[0].flags; int flags1 = info->page[1].flags; @@ -5770,17 +5596,17 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, } while (reg_off & 63); } while (reg_off <= reg_last); } -#endif } +#endif -static void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, - uint64_t *vg, target_ulong addr, int esize, - int msize, uint32_t mtedesc, uintptr_t ra) +void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra) { intptr_t mem_off, reg_off, reg_last; /* Process the page only if MemAttr == Tagged. */ - if (arm_tlb_mte_tagged(&info->page[0].attrs)) { + if (info->page[0].tagged) { mem_off = info->mem_off_first[0]; reg_off = info->reg_off_first[0]; reg_last = info->reg_off_split; @@ -5801,7 +5627,7 @@ static void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, } mem_off = info->mem_off_first[1]; - if (mem_off >= 0 && arm_tlb_mte_tagged(&info->page[1].attrs)) { + if (mem_off >= 0 && info->page[1].tagged) { reg_off = info->reg_off_first[1]; reg_last = info->reg_off_last[1]; @@ -5862,9 +5688,6 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, flags = info.page[0].flags | info.page[1].flags; if (unlikely(flags != 0)) { -#ifdef CONFIG_USER_ONLY - g_assert_not_reached(); -#else /* * At least one page includes MMIO. * Any bus operation can fail with cpu_transaction_failed, @@ -5901,7 +5724,6 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, memcpy(&env->vfp.zregs[(rd + i) & 31], &scratch[i], reg_max); } return; -#endif } /* The entire operation is in RAM, on valid pages. */ @@ -5978,8 +5800,8 @@ void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); /* Perform gross MTE suppression early. */ - if (!tbi_check(desc, bit55) || - tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + if (!tbi_check(mtedesc, bit55) || + tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { mtedesc = 0; } @@ -6118,7 +5940,7 @@ DO_LDN_2(4, dd, MO_64) * linux-user/ in its get_user/put_user macros. * * TODO: Construct some helpers, written in assembly, that interact with - * handle_cpu_signal to produce memory ops which can properly report errors + * host_signal_handler to produce memory ops which can properly report errors * without racing. */ @@ -6180,7 +6002,7 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, * Disable MTE checking if the Tagged bit is not set. Since TBI must * be set within MTEDESC for MTE, !mtedesc => !mte_active. */ - if (arm_tlb_mte_tagged(&info.page[0].attrs)) { + if (!info.page[0].tagged) { mtedesc = 0; } @@ -6334,8 +6156,8 @@ void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); /* Perform gross MTE suppression early. */ - if (!tbi_check(desc, bit55) || - tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + if (!tbi_check(mtedesc, bit55) || + tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { mtedesc = 0; } @@ -6588,8 +6410,8 @@ void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); /* Perform gross MTE suppression early. */ - if (!tbi_check(desc, bit55) || - tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + if (!tbi_check(mtedesc, bit55) || + tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) { mtedesc = 0; } @@ -6707,7 +6529,7 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const int mmu_idx = cpu_mmu_index(env, false); + const int mmu_idx = arm_env_mmu_index(env); const intptr_t reg_max = simd_oprsz(desc); const int scale = simd_data(desc); ARMVectorReg scratch; @@ -6731,10 +6553,14 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, cpu_check_watchpoint(env_cpu(env), addr, msize, info.attrs, BP_MEM_READ, retaddr); } - if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + if (mtedesc && info.tagged) { mte_check(env, mtedesc, addr, retaddr); } - host_fn(&scratch, reg_off, info.host); + if (unlikely(info.flags & TLB_MMIO)) { + tlb_fn(env, &scratch, reg_off, addr, retaddr); + } else { + host_fn(&scratch, reg_off, info.host); + } } else { /* Element crosses the page boundary. */ sve_probe_page(&info2, false, env, addr + in_page, 0, @@ -6744,7 +6570,7 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, msize, info.attrs, BP_MEM_READ, retaddr); } - if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + if (mtedesc && info.tagged) { mte_check(env, mtedesc, addr, retaddr); } tlb_fn(env, &scratch, reg_off, addr, retaddr); @@ -6889,7 +6715,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const int mmu_idx = cpu_mmu_index(env, false); + const int mmu_idx = arm_env_mmu_index(env); const intptr_t reg_max = simd_oprsz(desc); const int scale = simd_data(desc); const int esize = 1 << esz; @@ -6897,6 +6723,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, intptr_t reg_off; SVEHostPage info; target_ulong addr, in_page; + ARMVectorReg scratch; /* Skip to the first true predicate. */ reg_off = find_next_active(vg, 0, reg_max, esz); @@ -6906,6 +6733,11 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, return; } + /* Protect against overlap between vd and vm. */ + if (unlikely(vd == vm)) { + vm = memcpy(&scratch, vm, reg_max); + } + /* * Probe the first element, allowing faults. */ @@ -6945,9 +6777,7 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, (env_cpu(env), addr, msize) & BP_MEM_READ)) { goto fault; } - if (mtedesc && - arm_tlb_mte_tagged(&info.attrs) && - !mte_probe(env, mtedesc, addr)) { + if (mtedesc && info.tagged && !mte_probe(env, mtedesc, addr)) { goto fault; } @@ -7090,7 +6920,7 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { - const int mmu_idx = cpu_mmu_index(env, false); + const int mmu_idx = arm_env_mmu_index(env); const intptr_t reg_max = simd_oprsz(desc); const int scale = simd_data(desc); void *host[ARM_MAX_VQ * 4]; @@ -7112,7 +6942,9 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, if (likely(in_page >= msize)) { sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE, mmu_idx, retaddr); - host[i] = info.host; + if (!(info.flags & TLB_MMIO)) { + host[i] = info.host; + } } else { /* * Element crosses the page boundary. @@ -7131,7 +6963,7 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, info.attrs, BP_MEM_WRITE, retaddr); } - if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + if (mtedesc && info.tagged) { mte_check(env, mtedesc, addr, retaddr); } } diff --git a/target/arm/tcg/sve_ldst_internal.h b/target/arm/tcg/sve_ldst_internal.h new file mode 100644 index 0000000000..4f159ec4ad --- /dev/null +++ b/target/arm/tcg/sve_ldst_internal.h @@ -0,0 +1,222 @@ +/* + * ARM SVE Load/Store Helpers + * + * Copyright (c) 2018-2022 Linaro + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef TARGET_ARM_SVE_LDST_INTERNAL_H +#define TARGET_ARM_SVE_LDST_INTERNAL_H + +#include "exec/cpu_ldst.h" + +/* + * Load one element into @vd + @reg_off from @host. + * The controlling predicate is known to be true. + */ +typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host); + +/* + * Load one element into @vd + @reg_off from (@env, @vaddr, @ra). + * The controlling predicate is known to be true. + */ +typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off, + target_ulong vaddr, uintptr_t retaddr); + +/* + * Generate the above primitives. + */ + +#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static inline void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ +{ TYPEM val = HOST(host); *(TYPEE *)(vd + H(reg_off)) = val; } + +#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \ +static inline void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ +{ TYPEM val = *(TYPEE *)(vd + H(reg_off)); HOST(host, val); } + +#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \ +static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \ + intptr_t reg_off, target_ulong addr, uintptr_t ra) \ +{ \ + TYPEM val = TLB(env, useronly_clean_ptr(addr), ra); \ + *(TYPEE *)(vd + H(reg_off)) = val; \ +} + +#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ +static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \ + intptr_t reg_off, target_ulong addr, uintptr_t ra) \ +{ \ + TYPEM val = *(TYPEE *)(vd + H(reg_off)); \ + TLB(env, useronly_clean_ptr(addr), val, ra); \ +} + +#define DO_LD_PRIM_1(NAME, H, TE, TM) \ + DO_LD_HOST(NAME, H, TE, TM, ldub_p) \ + DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra) + +DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t) +DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t) +DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t) +DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t) +DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t) +DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t) +DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t) + +#define DO_ST_PRIM_1(NAME, H, TE, TM) \ + DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \ + DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra) + +DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t) +DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t) +DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t) +DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t) + +#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \ + DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \ + DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \ + DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \ + DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra) + +#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \ + DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \ + DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \ + DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \ + DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra) + +DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw) +DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw) +DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw) +DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw) +DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw) + +DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw) +DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw) +DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw) + +DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl) +DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl) +DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl) + +DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl) +DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl) + +DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq) +DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq) + +#undef DO_LD_TLB +#undef DO_ST_TLB +#undef DO_LD_HOST +#undef DO_LD_PRIM_1 +#undef DO_ST_PRIM_1 +#undef DO_LD_PRIM_2 +#undef DO_ST_PRIM_2 + +/* + * Resolve the guest virtual address to info->host and info->flags. + * If @nofault, return false if the page is invalid, otherwise + * exit via page fault exception. + */ + +typedef struct { + void *host; + int flags; + MemTxAttrs attrs; + bool tagged; +} SVEHostPage; + +bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env, + target_ulong addr, int mem_off, MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr); + +/* + * Analyse contiguous data, protected by a governing predicate. + */ + +typedef enum { + FAULT_NO, + FAULT_FIRST, + FAULT_ALL, +} SVEContFault; + +typedef struct { + /* + * First and last element wholly contained within the two pages. + * mem_off_first[0] and reg_off_first[0] are always set >= 0. + * reg_off_last[0] may be < 0 if the first element crosses pages. + * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1] + * are set >= 0 only if there are complete elements on a second page. + * + * The reg_off_* offsets are relative to the internal vector register. + * The mem_off_first offset is relative to the memory address; the + * two offsets are different when a load operation extends, a store + * operation truncates, or for multi-register operations. + */ + int16_t mem_off_first[2]; + int16_t reg_off_first[2]; + int16_t reg_off_last[2]; + + /* + * One element that is misaligned and spans both pages, + * or -1 if there is no such active element. + */ + int16_t mem_off_split; + int16_t reg_off_split; + + /* + * The byte offset at which the entire operation crosses a page boundary. + * Set >= 0 if and only if the entire operation spans two pages. + */ + int16_t page_split; + + /* TLB data for the two pages. */ + SVEHostPage page[2]; +} SVEContLdSt; + +/* + * Find first active element on each page, and a loose bound for the + * final element on each page. Identify any single element that spans + * the page boundary. Return true if there are any active elements. + */ +bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg, + intptr_t reg_max, int esz, int msize); + +/* + * Resolve the guest virtual addresses to info->page[]. + * Control the generation of page faults with @fault. Return false if + * there is no work to do, which can only happen with @fault == FAULT_NO. + */ +bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, + CPUARMState *env, target_ulong addr, + MMUAccessType access_type, uintptr_t retaddr); + +#ifdef CONFIG_USER_ONLY +static inline void +sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, uint64_t *vg, + target_ulong addr, int esize, int msize, + int wp_access, uintptr_t retaddr) +{ } +#else +void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, int wp_access, + uintptr_t retaddr); +#endif + +void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, uint64_t *vg, + target_ulong addr, int esize, int msize, + uint32_t mtedesc, uintptr_t ra); + +#endif /* TARGET_ARM_SVE_LDST_INTERNAL_H */ diff --git a/target/arm/t16.decode b/target/arm/tcg/t16.decode index 646c74929d..646c74929d 100644 --- a/target/arm/t16.decode +++ b/target/arm/tcg/t16.decode diff --git a/target/arm/t32.decode b/target/arm/tcg/t32.decode index 78fadef9d6..f21ad0167a 100644 --- a/target/arm/t32.decode +++ b/target/arm/tcg/t32.decode @@ -364,17 +364,17 @@ CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm [ # Hints, and CPS { - YIELD 1111 0011 1010 1111 1000 0000 0000 0001 - WFE 1111 0011 1010 1111 1000 0000 0000 0010 - WFI 1111 0011 1010 1111 1000 0000 0000 0011 + [ + YIELD 1111 0011 1010 1111 1000 0000 0000 0001 + WFE 1111 0011 1010 1111 1000 0000 0000 0010 + WFI 1111 0011 1010 1111 1000 0000 0000 0011 - # TODO: Implement SEV, SEVL; may help SMP performance. - # SEV 1111 0011 1010 1111 1000 0000 0000 0100 - # SEVL 1111 0011 1010 1111 1000 0000 0000 0101 + # TODO: Implement SEV, SEVL; may help SMP performance. + # SEV 1111 0011 1010 1111 1000 0000 0000 0100 + # SEVL 1111 0011 1010 1111 1000 0000 0000 0101 - # For M-profile minimal-RAS ESB can be a NOP, which is the - # default behaviour since it is in the hint space. - # ESB 1111 0011 1010 1111 1000 0000 0001 0000 + ESB 1111 0011 1010 1111 1000 0000 0001 0000 + ] # The canonical nop ends in 0000 0000, but the whole rest # of the space is "reserved hint, behaves as nop". diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c new file mode 100644 index 0000000000..885bf4ec14 --- /dev/null +++ b/target/arm/tcg/tlb_helper.c @@ -0,0 +1,398 @@ +/* + * ARM TLB (Translation lookaside buffer) helpers. + * + * This code is licensed under the GNU GPL v2 or later. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "cpu-features.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" + + +/* + * Returns true if the stage 1 translation regime is using LPAE format page + * tables. Used when raising alignment exceptions, whose FSR changes depending + * on whether the long or short descriptor format is in use. + */ +bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + mmu_idx = stage_1_mmu_idx(mmu_idx); + return regime_using_lpae_format(env, mmu_idx); +} + +static inline uint32_t merge_syn_data_abort(uint32_t template_syn, + ARMMMUFaultInfo *fi, + unsigned int target_el, + bool same_el, bool is_write, + int fsc) +{ + uint32_t syn; + + /* + * ISV is only set for stage-2 data aborts routed to EL2 and + * never for stage-1 page table walks faulting on stage 2 + * or for stage-1 faults. + * + * Furthermore, ISV is only set for certain kinds of load/stores. + * If the template syndrome does not have ISV set, we should leave + * it cleared. + * + * See ARMv8 specs, D7-1974: + * ISS encoding for an exception from a Data Abort, the + * ISV field. + * + * TODO: FEAT_LS64/FEAT_LS64_V/FEAT_SL64_ACCDATA: Translation, + * Access Flag, and Permission faults caused by LD64B, ST64B, + * ST64BV, or ST64BV0 insns report syndrome info even for stage-1 + * faults and regardless of the target EL. + */ + if (template_syn & ARM_EL_VNCR) { + /* + * FEAT_NV2 faults on accesses via VNCR_EL2 are a special case: + * they are always reported as "same EL", even though we are going + * from EL1 to EL2. + */ + assert(!fi->stage2); + syn = syn_data_abort_vncr(fi->ea, is_write, fsc); + } else if (!(template_syn & ARM_EL_ISV) || target_el != 2 + || fi->s1ptw || !fi->stage2) { + syn = syn_data_abort_no_iss(same_el, 0, + fi->ea, 0, fi->s1ptw, is_write, fsc); + } else { + /* + * Fields: IL, ISV, SAS, SSE, SRT, SF and AR come from the template + * syndrome created at translation time. + * Now we create the runtime syndrome with the remaining fields. + */ + syn = syn_data_abort_with_iss(same_el, + 0, 0, 0, 0, 0, + fi->ea, 0, fi->s1ptw, is_write, fsc, + true); + /* Merge the runtime syndrome with the template syndrome. */ + syn |= template_syn; + } + return syn; +} + +static uint32_t compute_fsr_fsc(CPUARMState *env, ARMMMUFaultInfo *fi, + int target_el, int mmu_idx, uint32_t *ret_fsc) +{ + ARMMMUIdx arm_mmu_idx = core_to_arm_mmu_idx(env, mmu_idx); + uint32_t fsr, fsc; + + /* + * For M-profile there is no guest-facing FSR. We compute a + * short-form value for env->exception.fsr which we will then + * examine in arm_v7m_cpu_do_interrupt(). In theory we could + * use the LPAE format instead as long as both bits of code agree + * (and arm_fi_to_lfsc() handled the M-profile specific + * ARMFault_QEMU_NSCExec and ARMFault_QEMU_SFault cases). + */ + if (!arm_feature(env, ARM_FEATURE_M) && + (target_el == 2 || arm_el_is_aa64(env, target_el) || + arm_s1_regime_using_lpae_format(env, arm_mmu_idx))) { + /* + * LPAE format fault status register : bottom 6 bits are + * status code in the same form as needed for syndrome + */ + fsr = arm_fi_to_lfsc(fi); + fsc = extract32(fsr, 0, 6); + } else { + fsr = arm_fi_to_sfsc(fi); + /* + * Short format FSR : this fault will never actually be reported + * to an EL that uses a syndrome register. Use a (currently) + * reserved FSR code in case the constructed syndrome does leak + * into the guest somehow. + */ + fsc = 0x3f; + } + + *ret_fsc = fsc; + return fsr; +} + +static bool report_as_gpc_exception(ARMCPU *cpu, int current_el, + ARMMMUFaultInfo *fi) +{ + bool ret; + + switch (fi->gpcf) { + case GPCF_None: + return false; + case GPCF_AddressSize: + case GPCF_Walk: + case GPCF_EABT: + /* R_PYTGX: GPT faults are reported as GPC. */ + ret = true; + break; + case GPCF_Fail: + /* + * R_BLYPM: A GPF at EL3 is reported as insn or data abort. + * R_VBZMW, R_LXHQR: A GPF at EL[0-2] is reported as a GPC + * if SCR_EL3.GPF is set, otherwise an insn or data abort. + */ + ret = (cpu->env.cp15.scr_el3 & SCR_GPF) && current_el != 3; + break; + default: + g_assert_not_reached(); + } + + assert(cpu_isar_feature(aa64_rme, cpu)); + assert(fi->type == ARMFault_GPCFOnWalk || + fi->type == ARMFault_GPCFOnOutput); + if (fi->gpcf == GPCF_AddressSize) { + assert(fi->level == 0); + } else { + assert(fi->level >= 0 && fi->level <= 1); + } + + return ret; +} + +static unsigned encode_gpcsc(ARMMMUFaultInfo *fi) +{ + static uint8_t const gpcsc[] = { + [GPCF_AddressSize] = 0b000000, + [GPCF_Walk] = 0b000100, + [GPCF_Fail] = 0b001100, + [GPCF_EABT] = 0b010100, + }; + + /* Note that we've validated fi->gpcf and fi->level above. */ + return gpcsc[fi->gpcf] | fi->level; +} + +static G_NORETURN +void arm_deliver_fault(ARMCPU *cpu, vaddr addr, + MMUAccessType access_type, + int mmu_idx, ARMMMUFaultInfo *fi) +{ + CPUARMState *env = &cpu->env; + int target_el = exception_target_el(env); + int current_el = arm_current_el(env); + bool same_el; + uint32_t syn, exc, fsr, fsc; + /* + * We know this must be a data or insn abort, and that + * env->exception.syndrome contains the template syndrome set + * up at translate time. So we can check only the VNCR bit + * (and indeed syndrome does not have the EC field in it, + * because we masked that out in disas_set_insn_syndrome()) + */ + bool is_vncr = (access_type != MMU_INST_FETCH) && + (env->exception.syndrome & ARM_EL_VNCR); + + if (is_vncr) { + /* FEAT_NV2 faults on accesses via VNCR_EL2 go to EL2 */ + target_el = 2; + } + + if (report_as_gpc_exception(cpu, current_el, fi)) { + target_el = 3; + + fsr = compute_fsr_fsc(env, fi, target_el, mmu_idx, &fsc); + + syn = syn_gpc(fi->stage2 && fi->type == ARMFault_GPCFOnWalk, + access_type == MMU_INST_FETCH, + encode_gpcsc(fi), is_vncr, + 0, fi->s1ptw, + access_type == MMU_DATA_STORE, fsc); + + env->cp15.mfar_el3 = fi->paddr; + switch (fi->paddr_space) { + case ARMSS_Secure: + break; + case ARMSS_NonSecure: + env->cp15.mfar_el3 |= R_MFAR_NS_MASK; + break; + case ARMSS_Root: + env->cp15.mfar_el3 |= R_MFAR_NSE_MASK; + break; + case ARMSS_Realm: + env->cp15.mfar_el3 |= R_MFAR_NSE_MASK | R_MFAR_NS_MASK; + break; + default: + g_assert_not_reached(); + } + + exc = EXCP_GPC; + goto do_raise; + } + + /* If SCR_EL3.GPF is unset, GPF may still be routed to EL2. */ + if (fi->gpcf == GPCF_Fail && target_el < 2) { + if (arm_hcr_el2_eff(env) & HCR_GPF) { + target_el = 2; + } + } + + if (fi->stage2) { + target_el = 2; + env->cp15.hpfar_el2 = extract64(fi->s2addr, 12, 47) << 4; + if (arm_is_secure_below_el3(env) && fi->s1ns) { + env->cp15.hpfar_el2 |= HPFAR_NS; + } + } + + same_el = current_el == target_el; + fsr = compute_fsr_fsc(env, fi, target_el, mmu_idx, &fsc); + + if (access_type == MMU_INST_FETCH) { + syn = syn_insn_abort(same_el, fi->ea, fi->s1ptw, fsc); + exc = EXCP_PREFETCH_ABORT; + } else { + syn = merge_syn_data_abort(env->exception.syndrome, fi, target_el, + same_el, access_type == MMU_DATA_STORE, + fsc); + if (access_type == MMU_DATA_STORE + && arm_feature(env, ARM_FEATURE_V6)) { + fsr |= (1 << 11); + } + exc = EXCP_DATA_ABORT; + } + + do_raise: + env->exception.vaddress = addr; + env->exception.fsr = fsr; + raise_exception(env, exc, syn, target_el); +} + +/* Raise a data fault alignment exception for the specified virtual address */ +void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + ARMCPU *cpu = ARM_CPU(cs); + ARMMMUFaultInfo fi = {}; + + /* now we have a real cpu fault */ + cpu_restore_state(cs, retaddr); + + fi.type = ARMFault_Alignment; + arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi); +} + +void helper_exception_pc_alignment(CPUARMState *env, target_ulong pc) +{ + ARMMMUFaultInfo fi = { .type = ARMFault_Alignment }; + int target_el = exception_target_el(env); + int mmu_idx = arm_env_mmu_index(env); + uint32_t fsc; + + env->exception.vaddress = pc; + + /* + * Note that the fsc is not applicable to this exception, + * since any syndrome is pcalignment not insn_abort. + */ + env->exception.fsr = compute_fsr_fsc(env, &fi, target_el, mmu_idx, &fsc); + raise_exception(env, EXCP_PREFETCH_ABORT, syn_pcalignment(), target_el); +} + +#if !defined(CONFIG_USER_ONLY) + +/* + * arm_cpu_do_transaction_failed: handle a memory system error response + * (eg "no device/memory present at address") by raising an external abort + * exception + */ +void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, + vaddr addr, unsigned size, + MMUAccessType access_type, + int mmu_idx, MemTxAttrs attrs, + MemTxResult response, uintptr_t retaddr) +{ + ARMCPU *cpu = ARM_CPU(cs); + ARMMMUFaultInfo fi = {}; + + /* now we have a real cpu fault */ + cpu_restore_state(cs, retaddr); + + fi.ea = arm_extabort_type(response); + fi.type = ARMFault_SyncExternal; + arm_deliver_fault(cpu, addr, access_type, mmu_idx, &fi); +} + +bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr) +{ + ARMCPU *cpu = ARM_CPU(cs); + GetPhysAddrResult res = {}; + ARMMMUFaultInfo local_fi, *fi; + int ret; + + /* + * Allow S1_ptw_translate to see any fault generated here. + * Since this may recurse, read and clear. + */ + fi = cpu->env.tlb_fi; + if (fi) { + cpu->env.tlb_fi = NULL; + } else { + fi = memset(&local_fi, 0, sizeof(local_fi)); + } + + /* + * Walk the page table and (if the mapping exists) add the page + * to the TLB. On success, return true. Otherwise, if probing, + * return false. Otherwise populate fsr with ARM DFSR/IFSR fault + * register format, and signal the fault. + */ + ret = get_phys_addr(&cpu->env, address, access_type, + core_to_arm_mmu_idx(&cpu->env, mmu_idx), + &res, fi); + if (likely(!ret)) { + /* + * Map a single [sub]page. Regions smaller than our declared + * target page size are handled specially, so for those we + * pass in the exact addresses. + */ + if (res.f.lg_page_size >= TARGET_PAGE_BITS) { + res.f.phys_addr &= TARGET_PAGE_MASK; + address &= TARGET_PAGE_MASK; + } + + res.f.extra.arm.pte_attrs = res.cacheattrs.attrs; + res.f.extra.arm.shareability = res.cacheattrs.shareability; + + tlb_set_page_full(cs, mmu_idx, address, &res.f); + return true; + } else if (probe) { + return false; + } else { + /* now we have a real cpu fault */ + cpu_restore_state(cs, retaddr); + arm_deliver_fault(cpu, address, access_type, mmu_idx, fi); + } +} +#else +void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra) +{ + ARMMMUFaultInfo fi = { + .type = maperr ? ARMFault_Translation : ARMFault_Permission, + .level = 3, + }; + ARMCPU *cpu = ARM_CPU(cs); + + /* + * We report both ESR and FAR to signal handlers. + * For now, it's easiest to deliver the fault normally. + */ + cpu_restore_state(cs, ra); + arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi); +} + +void arm_cpu_record_sigbus(CPUState *cs, vaddr addr, + MMUAccessType access_type, uintptr_t ra) +{ + arm_cpu_do_unaligned_access(cs, addr, access_type, MMU_USER_IDX, ra); +} +#endif /* !defined(CONFIG_USER_ONLY) */ diff --git a/target/arm/translate-a32.h b/target/arm/tcg/translate-a32.h index 88f15df60e..19de6e0a1a 100644 --- a/target/arm/translate-a32.h +++ b/target/arm/tcg/translate-a32.h @@ -17,8 +17,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef TARGET_ARM_TRANSLATE_A64_H -#define TARGET_ARM_TRANSLATE_A64_H +#ifndef TARGET_ARM_TRANSLATE_A32_H +#define TARGET_ARM_TRANSLATE_A32_H /* Prototypes for autogenerated disassembler functions */ bool disas_m_nocp(DisasContext *dc, uint32_t insn); @@ -40,7 +40,7 @@ void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop); TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs); void gen_set_cpsr(TCGv_i32 var, uint32_t mask); void gen_set_condexec(DisasContext *s); -void gen_set_pc_im(DisasContext *s, target_ulong val); +void gen_update_pc(DisasContext *s, target_long diff); void gen_lookup_tb(DisasContext *s); long vfp_reg_offset(bool dp, unsigned reg); long neon_full_reg_offset(unsigned reg); @@ -55,20 +55,36 @@ bool mve_skip_vmov(DisasContext *s, int vn, int index, int size); static inline TCGv_i32 load_cpu_offset(int offset) { TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_ld_i32(tmp, cpu_env, offset); + tcg_gen_ld_i32(tmp, tcg_env, offset); return tmp; } -#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name)) - -static inline void store_cpu_offset(TCGv_i32 var, int offset) -{ - tcg_gen_st_i32(var, cpu_env, offset); - tcg_temp_free_i32(var); -} - -#define store_cpu_field(var, name) \ - store_cpu_offset(var, offsetof(CPUARMState, name)) +/* Load from a 32-bit field to a TCGv_i32 */ +#define load_cpu_field(name) \ + ({ \ + QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 4); \ + load_cpu_offset(offsetof(CPUARMState, name)); \ + }) + +/* Load from the low half of a 64-bit field to a TCGv_i32 */ +#define load_cpu_field_low32(name) \ + ({ \ + QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 8); \ + load_cpu_offset(offsetoflow32(CPUARMState, name)); \ + }) + +void store_cpu_offset(TCGv_i32 var, int offset, int size); + +#define store_cpu_field(val, name) \ + ({ \ + QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 4 \ + && sizeof_field(CPUARMState, name) != 1); \ + store_cpu_offset(val, offsetof(CPUARMState, name), \ + sizeof_field(CPUARMState, name)); \ + }) + +#define store_cpu_field_constant(val, name) \ + store_cpu_field(tcg_constant_i32(val), name) /* Create a new temporary and set it to the value of a CPU register. */ static inline TCGv_i32 load_reg(DisasContext *s, int reg) @@ -114,13 +130,13 @@ void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, int index) { - gen_aa32_ld_i64(s, val, a32, index, MO_Q); + gen_aa32_ld_i64(s, val, a32, index, MO_UQ); } static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32, int index) { - gen_aa32_st_i64(s, val, a32, index, MO_Q); + gen_aa32_st_i64(s, val, a32, index, MO_UQ); } DO_GEN_LD(8u, MO_UB) diff --git a/target/arm/translate-a64.c b/target/arm/tcg/translate-a64.c index ab6b346e35..2666d52711 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -18,25 +18,14 @@ */ #include "qemu/osdep.h" -#include "cpu.h" #include "exec/exec-all.h" -#include "tcg/tcg-op.h" -#include "tcg/tcg-op-gvec.h" +#include "translate.h" +#include "translate-a64.h" #include "qemu/log.h" +#include "disas/disas.h" #include "arm_ldst.h" -#include "translate.h" -#include "internals.h" -#include "qemu/host-utils.h" - #include "semihosting/semihost.h" -#include "exec/gen-icount.h" - -#include "exec/helper-proto.h" -#include "exec/helper-gen.h" -#include "exec/log.h" - -#include "translate-a64.h" -#include "qemu/atomic128.h" +#include "cpregs.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; @@ -58,6 +47,35 @@ enum a64_shift_type { A64_SHIFT_TYPE_ROR = 3 }; +/* + * Helpers for extracting complex instruction fields + */ + +/* + * For load/store with an unsigned 12 bit immediate scaled by the element + * size. The input has the immediate field in bits [14:3] and the element + * size in [2:0]. + */ +static int uimm_scaled(DisasContext *s, int x) +{ + unsigned imm = x >> 3; + unsigned scale = extract32(x, 0, 3); + return imm << scale; +} + +/* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ +static int scale_by_log2_tag_granule(DisasContext *s, int x) +{ + return x << LOG2_TAG_GRANULE; +} + +/* + * Include the generated decoders. + */ + +#include "decode-sme-fa64.c.inc" +#include "decode-a64.c.inc" + /* Table based decoder typedefs - used when the relevant bits for decode * are too awkwardly scattered across the instruction (eg SIMD). */ @@ -74,23 +92,31 @@ void a64_translate_init(void) { int i; - cpu_pc = tcg_global_mem_new_i64(cpu_env, + cpu_pc = tcg_global_mem_new_i64(tcg_env, offsetof(CPUARMState, pc), "pc"); for (i = 0; i < 32; i++) { - cpu_X[i] = tcg_global_mem_new_i64(cpu_env, + cpu_X[i] = tcg_global_mem_new_i64(tcg_env, offsetof(CPUARMState, xregs[i]), regnames[i]); } - cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env, + cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, offsetof(CPUARMState, exclusive_high), "exclusive_high"); } /* - * Return the core mmu_idx to use for A64 "unprivileged load/store" insns + * Return the core mmu_idx to use for A64 load/store insns which + * have a "unprivileged load/store" variant. Those insns access + * EL0 if executed from an EL which has control over EL0 (usually + * EL1) but behave like normal loads and stores if executed from + * elsewhere (eg EL3). + * + * @unpriv : true for the unprivileged encoding; false for the + * normal encoding (in which case we will return the same + * thing as get_mem_index(). */ -static int get_a64_user_mem_index(DisasContext *s) +static int get_a64_user_mem_index(DisasContext *s, bool unpriv) { /* * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, @@ -98,7 +124,7 @@ static int get_a64_user_mem_index(DisasContext *s) */ ARMMMUIdx useridx = s->mmu_idx; - if (s->unpriv) { + if (unpriv && s->unpriv) { /* * We have pre-computed the condition for AccType_UNPRIV. * Therefore we should never get here with a mmu_idx for @@ -113,14 +139,6 @@ static int get_a64_user_mem_index(DisasContext *s) case ARMMMUIdx_E20_2_PAN: useridx = ARMMMUIdx_E20_0; break; - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - useridx = ARMMMUIdx_SE10_0; - break; - case ARMMMUIdx_SE20_2: - case ARMMMUIdx_SE20_2_PAN: - useridx = ARMMMUIdx_SE20_0; - break; default: g_assert_not_reached(); } @@ -128,32 +146,42 @@ static int get_a64_user_mem_index(DisasContext *s) return arm_to_core_mmu_idx(useridx); } -static void reset_btype(DisasContext *s) +static void set_btype_raw(int val) { - if (s->btype != 0) { - TCGv_i32 zero = tcg_const_i32(0); - tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype)); - tcg_temp_free_i32(zero); - s->btype = 0; - } + tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, + offsetof(CPUARMState, btype)); } static void set_btype(DisasContext *s, int val) { - TCGv_i32 tcg_val; - /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ tcg_debug_assert(val >= 1 && val <= 3); - - tcg_val = tcg_const_i32(val); - tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype)); - tcg_temp_free_i32(tcg_val); + set_btype_raw(val); s->btype = -1; } -void gen_a64_set_pc_im(uint64_t val) +static void reset_btype(DisasContext *s) { - tcg_gen_movi_i64(cpu_pc, val); + if (s->btype != 0) { + set_btype_raw(0); + s->btype = 0; + } +} + +static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) +{ + assert(s->pc_save != -1); + if (tb_cflags(s->base.tb) & CF_PCREL) { + tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); + } else { + tcg_gen_movi_i64(dest, s->pc_curr + diff); + } +} + +void gen_a64_update_pc(DisasContext *s, target_long diff) +{ + gen_pc_plus_diff(s, cpu_pc, diff); + s->pc_save = s->pc_curr + diff; } /* @@ -163,7 +191,7 @@ void gen_a64_set_pc_im(uint64_t val) * + for EL2 and EL3 there is only one TBI bit, and if it is set * then the address is zero-extended, clearing bits [63:56] * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 - * and TBI1 controls addressses with bit 55 == 1. + * and TBI1 controls addresses with bit 55 == 1. * If the appropriate TBI bit is set for the address then * the address is sign-extended from bit 55 into bits [63:56] * @@ -207,6 +235,7 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) * then loading an address into the PC will clear out any tag. */ gen_top_byte_ignore(s, cpu_pc, src, s->tbii); + s->pc_save = -1; } /* @@ -223,7 +252,7 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) { - TCGv_i64 clean = new_tmp_a64(s); + TCGv_i64 clean = tcg_temp_new_i64(); #ifdef CONFIG_USER_ONLY gen_top_byte_ignore(s, clean, addr, s->tbid); #else @@ -241,14 +270,10 @@ static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, MMUAccessType acc, int log2_size) { - TCGv_i32 t_acc = tcg_const_i32(acc); - TCGv_i32 t_idx = tcg_const_i32(get_mem_index(s)); - TCGv_i32 t_size = tcg_const_i32(1 << log2_size); - - gen_helper_probe_access(cpu_env, ptr, t_acc, t_idx, t_size); - tcg_temp_free_i32(t_acc); - tcg_temp_free_i32(t_idx); - tcg_temp_free_i32(t_size); + gen_helper_probe_access(tcg_env, ptr, + tcg_constant_i32(acc), + tcg_constant_i32(get_mem_index(s)), + tcg_constant_i32(1 << log2_size)); } /* @@ -259,11 +284,10 @@ static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, */ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, bool is_write, bool tag_checked, - int log2_size, bool is_unpriv, + MemOp memop, bool is_unpriv, int core_idx) { if (tag_checked && s->mte_active[is_unpriv]) { - TCGv_i32 tcg_desc; TCGv_i64 ret; int desc = 0; @@ -271,12 +295,11 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1); - tcg_desc = tcg_const_i32(desc); + desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop)); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); - ret = new_tmp_a64(s); - gen_helper_mte_check(ret, cpu_env, tcg_desc, addr); - tcg_temp_free_i32(tcg_desc); + ret = tcg_temp_new_i64(); + gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); return ret; } @@ -284,9 +307,9 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, } TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int log2_size) + bool tag_checked, MemOp memop) { - return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, + return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, false, get_mem_index(s)); } @@ -294,10 +317,9 @@ TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, * For MTE, check multiple logical sequential accesses. */ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int size) + bool tag_checked, int total_size, MemOp single_mop) { if (tag_checked && s->mte_active[0]) { - TCGv_i32 tcg_desc; TCGv_i64 ret; int desc = 0; @@ -305,18 +327,100 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1); - tcg_desc = tcg_const_i32(desc); + desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop)); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); - ret = new_tmp_a64(s); - gen_helper_mte_check(ret, cpu_env, tcg_desc, addr); - tcg_temp_free_i32(tcg_desc); + ret = tcg_temp_new_i64(); + gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); return ret; } return clean_data_tbi(s, addr); } +/* + * Generate the special alignment check that applies to AccType_ATOMIC + * and AccType_ORDERED insns under FEAT_LSE2: the access need not be + * naturally aligned, but it must not cross a 16-byte boundary. + * See AArch64.CheckAlignment(). + */ +static void check_lse2_align(DisasContext *s, int rn, int imm, + bool is_write, MemOp mop) +{ + TCGv_i32 tmp; + TCGv_i64 addr; + TCGLabel *over_label; + MMUAccessType type; + int mmu_idx; + + tmp = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); + tcg_gen_addi_i32(tmp, tmp, imm & 15); + tcg_gen_andi_i32(tmp, tmp, 15); + tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); + + over_label = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); + + addr = tcg_temp_new_i64(); + tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); + + type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, + mmu_idx = get_mem_index(s); + gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), + tcg_constant_i32(mmu_idx)); + + gen_set_label(over_label); + +} + +/* Handle the alignment check for AccType_ATOMIC instructions. */ +static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) +{ + MemOp size = mop & MO_SIZE; + + if (size == MO_8) { + return mop; + } + + /* + * If size == MO_128, this is a LDXP, and the operation is single-copy + * atomic for each doubleword, not the entire quadword; it still must + * be quadword aligned. + */ + if (size == MO_128) { + return finalize_memop_atom(s, MO_128 | MO_ALIGN, + MO_ATOM_IFALIGN_PAIR); + } + if (dc_isar_feature(aa64_lse2, s)) { + check_lse2_align(s, rn, 0, true, mop); + } else { + mop |= MO_ALIGN; + } + return finalize_memop(s, mop); +} + +/* Handle the alignment check for AccType_ORDERED instructions. */ +static MemOp check_ordered_align(DisasContext *s, int rn, int imm, + bool is_write, MemOp mop) +{ + MemOp size = mop & MO_SIZE; + + if (size == MO_8) { + return mop; + } + if (size == MO_128) { + return finalize_memop_atom(s, MO_128 | MO_ALIGN, + MO_ATOM_IFALIGN_PAIR); + } + if (!dc_isar_feature(aa64_lse2, s)) { + mop |= MO_ALIGN; + } else if (!s->naa) { + check_lse2_align(s, rn, imm, is_write, mop); + } + return finalize_memop(s, mop); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -328,44 +432,37 @@ static void a64_test_cc(DisasCompare64 *c64, int cc) arm_test_cc(&c32, cc); - /* Sign-extend the 32-bit value so that the GE/LT comparisons work - * properly. The NE/EQ comparisons are also fine with this choice. */ + /* + * Sign-extend the 32-bit value so that the GE/LT comparisons work + * properly. The NE/EQ comparisons are also fine with this choice. + */ c64->cond = c32.cond; c64->value = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(c64->value, c32.value); - - arm_free_cc(&c32); } -static void a64_free_cc(DisasCompare64 *c64) +static void gen_rebuild_hflags(DisasContext *s) { - tcg_temp_free_i64(c64->value); + gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); } static void gen_exception_internal(int excp) { - TCGv_i32 tcg_excp = tcg_const_i32(excp); - assert(excp_is_internal(excp)); - gen_helper_exception_internal(cpu_env, tcg_excp); - tcg_temp_free_i32(tcg_excp); + gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); } -static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp) +static void gen_exception_internal_insn(DisasContext *s, int excp) { - gen_a64_set_pc_im(pc); + gen_a64_update_pc(s, 0); gen_exception_internal(excp); s->base.is_jmp = DISAS_NORETURN; } static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) { - TCGv_i32 tcg_syn; - - gen_a64_set_pc_im(s->pc_curr); - tcg_syn = tcg_const_i32(syndrome); - gen_helper_exception_bkpt_insn(cpu_env, tcg_syn); - tcg_temp_free_i32(tcg_syn); + gen_a64_update_pc(s, 0); + gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); s->base.is_jmp = DISAS_NORETURN; } @@ -393,19 +490,30 @@ static inline bool use_goto_tb(DisasContext *s, uint64_t dest) return translator_use_goto_tb(&s->base, dest); } -static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) +static void gen_goto_tb(DisasContext *s, int n, int64_t diff) { - if (use_goto_tb(s, dest)) { - tcg_gen_goto_tb(n); - gen_a64_set_pc_im(dest); + if (use_goto_tb(s, s->pc_curr + diff)) { + /* + * For pcrel, the pc must always be up-to-date on entry to + * the linked TB, so that it can use simple additions for all + * further adjustments. For !pcrel, the linked TB is compiled + * to know its full virtual address, so we can delay the + * update to pc to the unlinked path. A long chain of links + * can thus avoid many updates to the PC. + */ + if (tb_cflags(s->base.tb) & CF_PCREL) { + gen_a64_update_pc(s, diff); + tcg_gen_goto_tb(n); + } else { + tcg_gen_goto_tb(n); + gen_a64_update_pc(s, diff); + } tcg_gen_exit_tb(s->base.tb, n); s->base.is_jmp = DISAS_NORETURN; } else { - gen_a64_set_pc_im(dest); + gen_a64_update_pc(s, diff); if (s->ss_active) { gen_step_complete_exception(s); - } else if (s->base.singlestep_enabled) { - gen_exception_internal(EXCP_DEBUG); } else { tcg_gen_lookup_and_goto_ptr(); s->base.is_jmp = DISAS_NORETURN; @@ -413,42 +521,6 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) } } -static void init_tmp_a64_array(DisasContext *s) -{ -#ifdef CONFIG_DEBUG_TCG - memset(s->tmp_a64, 0, sizeof(s->tmp_a64)); -#endif - s->tmp_a64_count = 0; -} - -static void free_tmp_a64(DisasContext *s) -{ - int i; - for (i = 0; i < s->tmp_a64_count; i++) { - tcg_temp_free_i64(s->tmp_a64[i]); - } - init_tmp_a64_array(s); -} - -TCGv_i64 new_tmp_a64(DisasContext *s) -{ - assert(s->tmp_a64_count < TMP_A64_MAX); - return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64(); -} - -TCGv_i64 new_tmp_a64_local(DisasContext *s) -{ - assert(s->tmp_a64_count < TMP_A64_MAX); - return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64(); -} - -TCGv_i64 new_tmp_a64_zero(DisasContext *s) -{ - TCGv_i64 t = new_tmp_a64(s); - tcg_gen_movi_i64(t, 0); - return t; -} - /* * Register access functions * @@ -467,7 +539,9 @@ TCGv_i64 new_tmp_a64_zero(DisasContext *s) TCGv_i64 cpu_reg(DisasContext *s, int reg) { if (reg == 31) { - return new_tmp_a64_zero(s); + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_movi_i64(t, 0); + return t; } else { return cpu_X[reg]; } @@ -485,7 +559,7 @@ TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) */ TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) { - TCGv_i64 v = new_tmp_a64(s); + TCGv_i64 v = tcg_temp_new_i64(); if (reg != 31) { if (sf) { tcg_gen_mov_i64(v, cpu_X[reg]); @@ -500,7 +574,7 @@ TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) { - TCGv_i64 v = new_tmp_a64(s); + TCGv_i64 v = tcg_temp_new_i64(); if (sf) { tcg_gen_mov_i64(v, cpu_X[reg]); } else { @@ -535,7 +609,7 @@ static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) { TCGv_i64 v = tcg_temp_new_i64(); - tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64)); + tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); return v; } @@ -543,7 +617,7 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) { TCGv_i32 v = tcg_temp_new_i32(); - tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32)); + tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); return v; } @@ -551,7 +625,7 @@ static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) { TCGv_i32 v = tcg_temp_new_i32(); - tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); + tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); return v; } @@ -571,7 +645,7 @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) { unsigned ofs = fp_reg_offset(s, reg, MO_64); - tcg_gen_st_i64(v, cpu_env, ofs); + tcg_gen_st_i64(v, tcg_env, ofs); clear_vec_high(s, false, reg); } @@ -581,7 +655,6 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) tcg_gen_extu_i32_i64(tmp, v); write_fp_dreg(s, reg, tmp); - tcg_temp_free_i64(tmp); } /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ @@ -650,7 +723,6 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), fpst, is_q ? 16 : 8, vec_full_reg_size(s), data, fn); - tcg_temp_free_ptr(fpst); } /* Expand a 3-operand + qc + operation using an out-of-line helper. */ @@ -659,12 +731,11 @@ static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, { TCGv_ptr qc_ptr = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), vec_full_reg_offset(s, rm), qc_ptr, is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); - tcg_temp_free_ptr(qc_ptr); } /* Expand a 4-operand operation using an out-of-line helper. */ @@ -692,7 +763,6 @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, vec_full_reg_offset(s, rm), vec_full_reg_offset(s, ra), fpst, is_q ? 16 : 8, vec_full_reg_size(s), data, fn); - tcg_temp_free_ptr(fpst); } /* Set ZF and NF based on a 64 bit result. This is alas fiddlier @@ -718,96 +788,102 @@ static inline void gen_logic_CC(int sf, TCGv_i64 result) } /* dest = T0 + T1; compute C, N, V and Z flags */ -static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) { - if (sf) { - TCGv_i64 result, flag, tmp; - result = tcg_temp_new_i64(); - flag = tcg_temp_new_i64(); - tmp = tcg_temp_new_i64(); + TCGv_i64 result, flag, tmp; + result = tcg_temp_new_i64(); + flag = tcg_temp_new_i64(); + tmp = tcg_temp_new_i64(); - tcg_gen_movi_i64(tmp, 0); - tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); + tcg_gen_movi_i64(tmp, 0); + tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); - tcg_gen_extrl_i64_i32(cpu_CF, flag); + tcg_gen_extrl_i64_i32(cpu_CF, flag); - gen_set_NZ64(result); + gen_set_NZ64(result); - tcg_gen_xor_i64(flag, result, t0); - tcg_gen_xor_i64(tmp, t0, t1); - tcg_gen_andc_i64(flag, flag, tmp); - tcg_temp_free_i64(tmp); - tcg_gen_extrh_i64_i32(cpu_VF, flag); + tcg_gen_xor_i64(flag, result, t0); + tcg_gen_xor_i64(tmp, t0, t1); + tcg_gen_andc_i64(flag, flag, tmp); + tcg_gen_extrh_i64_i32(cpu_VF, flag); - tcg_gen_mov_i64(dest, result); - tcg_temp_free_i64(result); - tcg_temp_free_i64(flag); - } else { - /* 32 bit arithmetic */ - TCGv_i32 t0_32 = tcg_temp_new_i32(); - TCGv_i32 t1_32 = tcg_temp_new_i32(); - TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_mov_i64(dest, result); +} - tcg_gen_movi_i32(tmp, 0); - tcg_gen_extrl_i64_i32(t0_32, t0); - tcg_gen_extrl_i64_i32(t1_32, t1); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); - tcg_gen_mov_i32(cpu_ZF, cpu_NF); - tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); - tcg_gen_xor_i32(tmp, t0_32, t1_32); - tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); - tcg_gen_extu_i32_i64(dest, cpu_NF); +static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +{ + TCGv_i32 t0_32 = tcg_temp_new_i32(); + TCGv_i32 t1_32 = tcg_temp_new_i32(); + TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(t0_32); - tcg_temp_free_i32(t1_32); + tcg_gen_movi_i32(tmp, 0); + tcg_gen_extrl_i64_i32(t0_32, t0); + tcg_gen_extrl_i64_i32(t1_32, t1); + tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); + tcg_gen_xor_i32(tmp, t0_32, t1_32); + tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); + tcg_gen_extu_i32_i64(dest, cpu_NF); +} + +static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +{ + if (sf) { + gen_add64_CC(dest, t0, t1); + } else { + gen_add32_CC(dest, t0, t1); } } /* dest = T0 - T1; compute C, N, V and Z flags */ -static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) { - if (sf) { - /* 64 bit arithmetic */ - TCGv_i64 result, flag, tmp; + /* 64 bit arithmetic */ + TCGv_i64 result, flag, tmp; - result = tcg_temp_new_i64(); - flag = tcg_temp_new_i64(); - tcg_gen_sub_i64(result, t0, t1); + result = tcg_temp_new_i64(); + flag = tcg_temp_new_i64(); + tcg_gen_sub_i64(result, t0, t1); - gen_set_NZ64(result); + gen_set_NZ64(result); - tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); - tcg_gen_extrl_i64_i32(cpu_CF, flag); + tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); + tcg_gen_extrl_i64_i32(cpu_CF, flag); - tcg_gen_xor_i64(flag, result, t0); - tmp = tcg_temp_new_i64(); - tcg_gen_xor_i64(tmp, t0, t1); - tcg_gen_and_i64(flag, flag, tmp); - tcg_temp_free_i64(tmp); - tcg_gen_extrh_i64_i32(cpu_VF, flag); - tcg_gen_mov_i64(dest, result); - tcg_temp_free_i64(flag); - tcg_temp_free_i64(result); - } else { - /* 32 bit arithmetic */ - TCGv_i32 t0_32 = tcg_temp_new_i32(); - TCGv_i32 t1_32 = tcg_temp_new_i32(); - TCGv_i32 tmp; + tcg_gen_xor_i64(flag, result, t0); + tmp = tcg_temp_new_i64(); + tcg_gen_xor_i64(tmp, t0, t1); + tcg_gen_and_i64(flag, flag, tmp); + tcg_gen_extrh_i64_i32(cpu_VF, flag); + tcg_gen_mov_i64(dest, result); +} - tcg_gen_extrl_i64_i32(t0_32, t0); - tcg_gen_extrl_i64_i32(t1_32, t1); - tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); - tcg_gen_mov_i32(cpu_ZF, cpu_NF); - tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); - tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); - tmp = tcg_temp_new_i32(); - tcg_gen_xor_i32(tmp, t0_32, t1_32); - tcg_temp_free_i32(t0_32); - tcg_temp_free_i32(t1_32); - tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); - tcg_temp_free_i32(tmp); - tcg_gen_extu_i32_i64(dest, cpu_NF); +static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +{ + /* 32 bit arithmetic */ + TCGv_i32 t0_32 = tcg_temp_new_i32(); + TCGv_i32 t1_32 = tcg_temp_new_i32(); + TCGv_i32 tmp; + + tcg_gen_extrl_i64_i32(t0_32, t0); + tcg_gen_extrl_i64_i32(t1_32, t1); + tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); + tcg_gen_mov_i32(cpu_ZF, cpu_NF); + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); + tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, t0_32, t1_32); + tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); + tcg_gen_extu_i32_i64(dest, cpu_NF); +} + +static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) +{ + if (sf) { + gen_sub64_CC(dest, t0, t1); + } else { + gen_sub32_CC(dest, t0, t1); } } @@ -818,7 +894,6 @@ static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_extu_i32_i64(flag, cpu_CF); tcg_gen_add_i64(dest, t0, t1); tcg_gen_add_i64(dest, dest, flag); - tcg_temp_free_i64(flag); if (!sf) { tcg_gen_ext32u_i64(dest, dest); @@ -829,15 +904,15 @@ static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) { if (sf) { - TCGv_i64 result, cf_64, vf_64, tmp; - result = tcg_temp_new_i64(); - cf_64 = tcg_temp_new_i64(); - vf_64 = tcg_temp_new_i64(); - tmp = tcg_const_i64(0); + TCGv_i64 result = tcg_temp_new_i64(); + TCGv_i64 cf_64 = tcg_temp_new_i64(); + TCGv_i64 vf_64 = tcg_temp_new_i64(); + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); tcg_gen_extu_i32_i64(cf_64, cpu_CF); - tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp); - tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp); + tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); + tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); tcg_gen_extrl_i64_i32(cpu_CF, cf_64); gen_set_NZ64(result); @@ -847,31 +922,22 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_extrh_i64_i32(cpu_VF, vf_64); tcg_gen_mov_i64(dest, result); - - tcg_temp_free_i64(tmp); - tcg_temp_free_i64(vf_64); - tcg_temp_free_i64(cf_64); - tcg_temp_free_i64(result); } else { - TCGv_i32 t0_32, t1_32, tmp; - t0_32 = tcg_temp_new_i32(); - t1_32 = tcg_temp_new_i32(); - tmp = tcg_const_i32(0); + TCGv_i32 t0_32 = tcg_temp_new_i32(); + TCGv_i32 t1_32 = tcg_temp_new_i32(); + TCGv_i32 tmp = tcg_temp_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_extrl_i64_i32(t0_32, t0); tcg_gen_extrl_i64_i32(t1_32, t1); - tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp); - tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp); + tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); + tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); tcg_gen_xor_i32(tmp, t0_32, t1_32); tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); tcg_gen_extu_i32_i64(dest, cpu_NF); - - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(t1_32); - tcg_temp_free_i32(t0_32); } } @@ -888,7 +954,6 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - memop = finalize_memop(s, memop); tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); if (iss_valid) { @@ -923,7 +988,6 @@ static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, bool iss_valid, unsigned int iss_srt, bool iss_sf, bool iss_ar) { - memop = finalize_memop(s, memop); tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); if (extend && (memop & MO_SIGN)) { @@ -957,73 +1021,50 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, /* * Store from FP register to memory */ -static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) +static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) { /* This writes the bottom N bits of a 128 bit wide vector to memory */ TCGv_i64 tmplo = tcg_temp_new_i64(); - MemOp mop; - tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); + tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); - if (size < 4) { - mop = finalize_memop(s, size); + if ((mop & MO_SIZE) < MO_128) { tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { - bool be = s->be_data == MO_BE; - TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); TCGv_i64 tmphi = tcg_temp_new_i64(); + TCGv_i128 t16 = tcg_temp_new_i128(); - tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); + tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); + tcg_gen_concat_i64_i128(t16, tmplo, tmphi); - mop = s->be_data | MO_Q; - tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), - mop | (s->align_mem ? MO_ALIGN_16 : 0)); - tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, - get_mem_index(s), mop); - - tcg_temp_free_i64(tcg_hiaddr); - tcg_temp_free_i64(tmphi); + tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); } - - tcg_temp_free_i64(tmplo); } /* * Load from memory to FP register */ -static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) +static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) { /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); TCGv_i64 tmphi = NULL; - MemOp mop; - if (size < 4) { - mop = finalize_memop(s, size); + if ((mop & MO_SIZE) < MO_128) { tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); } else { - bool be = s->be_data == MO_BE; - TCGv_i64 tcg_hiaddr; + TCGv_i128 t16 = tcg_temp_new_i128(); - tmphi = tcg_temp_new_i64(); - tcg_hiaddr = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); - mop = s->be_data | MO_Q; - tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), - mop | (s->align_mem ? MO_ALIGN_16 : 0)); - tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); - tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, - get_mem_index(s), mop); - tcg_temp_free_i64(tcg_hiaddr); + tmphi = tcg_temp_new_i64(); + tcg_gen_extr_i128_i64(tmplo, tmphi, t16); } - tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); - tcg_temp_free_i64(tmplo); + tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); if (tmphi) { - tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); - tcg_temp_free_i64(tmphi); + tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); } clear_vec_high(s, tmphi != NULL, destidx); } @@ -1045,28 +1086,28 @@ static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, int element, MemOp memop) { int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); - switch (memop) { + switch ((unsigned)memop) { case MO_8: - tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off); + tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); break; case MO_16: - tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off); + tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); break; case MO_32: - tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off); + tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); break; case MO_8|MO_SIGN: - tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off); + tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); break; case MO_16|MO_SIGN: - tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off); + tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); break; case MO_32|MO_SIGN: - tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off); + tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); break; case MO_64: case MO_64|MO_SIGN: - tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off); + tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); break; default: g_assert_not_reached(); @@ -1079,20 +1120,20 @@ static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); switch (memop) { case MO_8: - tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off); + tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); break; case MO_16: - tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off); + tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); break; case MO_8|MO_SIGN: - tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off); + tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); break; case MO_16|MO_SIGN: - tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off); + tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); break; case MO_32: case MO_32|MO_SIGN: - tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off); + tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); break; default: g_assert_not_reached(); @@ -1106,16 +1147,16 @@ static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); switch (memop) { case MO_8: - tcg_gen_st8_i64(tcg_src, cpu_env, vect_off); + tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); break; case MO_16: - tcg_gen_st16_i64(tcg_src, cpu_env, vect_off); + tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); break; case MO_32: - tcg_gen_st32_i64(tcg_src, cpu_env, vect_off); + tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); break; case MO_64: - tcg_gen_st_i64(tcg_src, cpu_env, vect_off); + tcg_gen_st_i64(tcg_src, tcg_env, vect_off); break; default: g_assert_not_reached(); @@ -1128,13 +1169,13 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); switch (memop) { case MO_8: - tcg_gen_st8_i32(tcg_src, cpu_env, vect_off); + tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); break; case MO_16: - tcg_gen_st16_i32(tcg_src, cpu_env, vect_off); + tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); break; case MO_32: - tcg_gen_st_i32(tcg_src, cpu_env, vect_off); + tcg_gen_st_i32(tcg_src, tcg_env, vect_off); break; default: g_assert_not_reached(); @@ -1149,8 +1190,6 @@ static void do_vec_st(DisasContext *s, int srcidx, int element, read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); - - tcg_temp_free_i64(tcg_tmp); } /* Load from memory to vector register */ @@ -1161,8 +1200,6 @@ static void do_vec_ld(DisasContext *s, int destidx, int element, tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); - - tcg_temp_free_i64(tcg_tmp); } /* Check that FP/Neon access is enabled. If it is, return @@ -1172,35 +1209,109 @@ static void do_vec_ld(DisasContext *s, int destidx, int element, * unallocated-encoding checks (otherwise the syndrome information * for the resulting exception will be incorrect). */ -static bool fp_access_check(DisasContext *s) +static bool fp_access_check_only(DisasContext *s) { if (s->fp_excp_el) { assert(!s->fp_access_checked); s->fp_access_checked = true; - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + gen_exception_insn_el(s, 0, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, false, 0), + s->fp_excp_el); return false; } s->fp_access_checked = true; return true; } -/* Check that SVE access is enabled. If it is, return true. +static bool fp_access_check(DisasContext *s) +{ + if (!fp_access_check_only(s)) { + return false; + } + if (s->sme_trap_nonstreaming && s->is_nonstreaming) { + gen_exception_insn(s, 0, EXCP_UDEF, + syn_smetrap(SME_ET_Streaming, false)); + return false; + } + return true; +} + +/* + * Check that SVE access is enabled. If it is, return true. * If not, emit code to generate an appropriate exception and return false. + * This function corresponds to CheckSVEEnabled(). */ bool sve_access_check(DisasContext *s) { - if (s->sve_excp_el) { - assert(!s->sve_access_checked); - s->sve_access_checked = true; - - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_sve_access_trap(), s->sve_excp_el); - return false; + if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { + assert(dc_isar_feature(aa64_sme, s)); + if (!sme_sm_enabled_check(s)) { + goto fail_exit; + } + } else if (s->sve_excp_el) { + gen_exception_insn_el(s, 0, EXCP_UDEF, + syn_sve_access_trap(), s->sve_excp_el); + goto fail_exit; } s->sve_access_checked = true; return fp_access_check(s); + + fail_exit: + /* Assert that we only raise one exception per instruction. */ + assert(!s->sve_access_checked); + s->sve_access_checked = true; + return false; +} + +/* + * Check that SME access is enabled, raise an exception if not. + * Note that this function corresponds to CheckSMEAccess and is + * only used directly for cpregs. + */ +static bool sme_access_check(DisasContext *s) +{ + if (s->sme_excp_el) { + gen_exception_insn_el(s, 0, EXCP_UDEF, + syn_smetrap(SME_ET_AccessTrap, false), + s->sme_excp_el); + return false; + } + return true; +} + +/* This function corresponds to CheckSMEEnabled. */ +bool sme_enabled_check(DisasContext *s) +{ + /* + * Note that unlike sve_excp_el, we have not constrained sme_excp_el + * to be zero when fp_excp_el has priority. This is because we need + * sme_excp_el by itself for cpregs access checks. + */ + if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { + s->fp_access_checked = true; + return sme_access_check(s); + } + return fp_access_check_only(s); +} + +/* Common subroutine for CheckSMEAnd*Enabled. */ +bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) +{ + if (!sme_enabled_check(s)) { + return false; + } + if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { + gen_exception_insn(s, 0, EXCP_UDEF, + syn_smetrap(SME_ET_NotStreaming, false)); + return false; + } + if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { + gen_exception_insn(s, 0, EXCP_UDEF, + syn_smetrap(SME_ET_InactiveZA, false)); + return false; + } + return true; } /* @@ -1214,41 +1325,8 @@ static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, int extsize = extract32(option, 0, 2); bool is_signed = extract32(option, 2, 1); - if (is_signed) { - switch (extsize) { - case 0: - tcg_gen_ext8s_i64(tcg_out, tcg_in); - break; - case 1: - tcg_gen_ext16s_i64(tcg_out, tcg_in); - break; - case 2: - tcg_gen_ext32s_i64(tcg_out, tcg_in); - break; - case 3: - tcg_gen_mov_i64(tcg_out, tcg_in); - break; - } - } else { - switch (extsize) { - case 0: - tcg_gen_ext8u_i64(tcg_out, tcg_in); - break; - case 1: - tcg_gen_ext16u_i64(tcg_out, tcg_in); - break; - case 2: - tcg_gen_ext32u_i64(tcg_out, tcg_in); - break; - case 3: - tcg_gen_mov_i64(tcg_out, tcg_in); - break; - } - } - - if (shift) { - tcg_gen_shli_i64(tcg_out, tcg_out, shift); - } + tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); + tcg_gen_shli_i64(tcg_out, tcg_out, shift); } static inline void gen_check_sp_alignment(DisasContext *s) @@ -1297,296 +1375,515 @@ static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, * match up with those in the manual. */ -/* Unconditional branch (immediate) - * 31 30 26 25 0 - * +----+-----------+-------------------------------------+ - * | op | 0 0 1 0 1 | imm26 | - * +----+-----------+-------------------------------------+ - */ -static void disas_uncond_b_imm(DisasContext *s, uint32_t insn) +static bool trans_B(DisasContext *s, arg_i *a) { - uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4; - - if (insn & (1U << 31)) { - /* BL Branch with link */ - tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next); - } - - /* B Branch / BL Branch with link */ reset_btype(s); - gen_goto_tb(s, 0, addr); + gen_goto_tb(s, 0, a->imm); + return true; } -/* Compare and branch (immediate) - * 31 30 25 24 23 5 4 0 - * +----+-------------+----+---------------------+--------+ - * | sf | 0 1 1 0 1 0 | op | imm19 | Rt | - * +----+-------------+----+---------------------+--------+ - */ -static void disas_comp_b_imm(DisasContext *s, uint32_t insn) +static bool trans_BL(DisasContext *s, arg_i *a) { - unsigned int sf, op, rt; - uint64_t addr; - TCGLabel *label_match; - TCGv_i64 tcg_cmp; + gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); + reset_btype(s); + gen_goto_tb(s, 0, a->imm); + return true; +} - sf = extract32(insn, 31, 1); - op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */ - rt = extract32(insn, 0, 5); - addr = s->pc_curr + sextract32(insn, 5, 19) * 4; - tcg_cmp = read_cpu_reg(s, rt, sf); - label_match = gen_new_label(); +static bool trans_CBZ(DisasContext *s, arg_cbz *a) +{ + DisasLabel match; + TCGv_i64 tcg_cmp; + tcg_cmp = read_cpu_reg(s, a->rt, a->sf); reset_btype(s); - tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, - tcg_cmp, 0, label_match); - gen_goto_tb(s, 0, s->base.pc_next); - gen_set_label(label_match); - gen_goto_tb(s, 1, addr); + match = gen_disas_label(s); + tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, + tcg_cmp, 0, match.label); + gen_goto_tb(s, 0, 4); + set_disas_label(s, match); + gen_goto_tb(s, 1, a->imm); + return true; } -/* Test and branch (immediate) - * 31 30 25 24 23 19 18 5 4 0 - * +----+-------------+----+-------+-------------+------+ - * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt | - * +----+-------------+----+-------+-------------+------+ - */ -static void disas_test_b_imm(DisasContext *s, uint32_t insn) +static bool trans_TBZ(DisasContext *s, arg_tbz *a) { - unsigned int bit_pos, op, rt; - uint64_t addr; - TCGLabel *label_match; + DisasLabel match; TCGv_i64 tcg_cmp; - bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5); - op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */ - addr = s->pc_curr + sextract32(insn, 5, 14) * 4; - rt = extract32(insn, 0, 5); - tcg_cmp = tcg_temp_new_i64(); - tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos)); - label_match = gen_new_label(); + tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); reset_btype(s); - tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, - tcg_cmp, 0, label_match); - tcg_temp_free_i64(tcg_cmp); - gen_goto_tb(s, 0, s->base.pc_next); - gen_set_label(label_match); - gen_goto_tb(s, 1, addr); -} - -/* Conditional branch (immediate) - * 31 25 24 23 5 4 3 0 - * +---------------+----+---------------------+----+------+ - * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond | - * +---------------+----+---------------------+----+------+ - */ -static void disas_cond_b_imm(DisasContext *s, uint32_t insn) -{ - unsigned int cond; - uint64_t addr; - if ((insn & (1 << 4)) || (insn & (1 << 24))) { - unallocated_encoding(s); - return; - } - addr = s->pc_curr + sextract32(insn, 5, 19) * 4; - cond = extract32(insn, 0, 4); + match = gen_disas_label(s); + tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, + tcg_cmp, 0, match.label); + gen_goto_tb(s, 0, 4); + set_disas_label(s, match); + gen_goto_tb(s, 1, a->imm); + return true; +} +static bool trans_B_cond(DisasContext *s, arg_B_cond *a) +{ + /* BC.cond is only present with FEAT_HBC */ + if (a->c && !dc_isar_feature(aa64_hbc, s)) { + return false; + } reset_btype(s); - if (cond < 0x0e) { + if (a->cond < 0x0e) { /* genuinely conditional branches */ - TCGLabel *label_match = gen_new_label(); - arm_gen_test_cc(cond, label_match); - gen_goto_tb(s, 0, s->base.pc_next); - gen_set_label(label_match); - gen_goto_tb(s, 1, addr); + DisasLabel match = gen_disas_label(s); + arm_gen_test_cc(a->cond, match.label); + gen_goto_tb(s, 0, 4); + set_disas_label(s, match); + gen_goto_tb(s, 1, a->imm); } else { /* 0xe and 0xf are both "always" conditions */ - gen_goto_tb(s, 0, addr); + gen_goto_tb(s, 0, a->imm); } + return true; } -/* HINT instruction group, including various allocated HINTs */ -static void handle_hint(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static void set_btype_for_br(DisasContext *s, int rn) { - unsigned int selector = crm << 3 | op2; + if (dc_isar_feature(aa64_bti, s)) { + /* BR to {x16,x17} or !guard -> 1, else 3. */ + set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); + } +} - if (op1 != 3) { - unallocated_encoding(s); - return; +static void set_btype_for_blr(DisasContext *s) +{ + if (dc_isar_feature(aa64_bti, s)) { + /* BLR sets BTYPE to 2, regardless of source guarded page. */ + set_btype(s, 2); } +} - switch (selector) { - case 0b00000: /* NOP */ - break; - case 0b00011: /* WFI */ - s->base.is_jmp = DISAS_WFI; - break; - case 0b00001: /* YIELD */ - /* When running in MTTCG we don't generate jumps to the yield and - * WFE helpers as it won't affect the scheduling of other vCPUs. - * If we wanted to more completely model WFE/SEV so we don't busy - * spin unnecessarily we would need to do something more involved. +static bool trans_BR(DisasContext *s, arg_r *a) +{ + gen_a64_set_pc(s, cpu_reg(s, a->rn)); + set_btype_for_br(s, a->rn); + s->base.is_jmp = DISAS_JUMP; + return true; +} + +static bool trans_BLR(DisasContext *s, arg_r *a) +{ + TCGv_i64 dst = cpu_reg(s, a->rn); + TCGv_i64 lr = cpu_reg(s, 30); + if (dst == lr) { + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_mov_i64(tmp, dst); + dst = tmp; + } + gen_pc_plus_diff(s, lr, curr_insn_len(s)); + gen_a64_set_pc(s, dst); + set_btype_for_blr(s); + s->base.is_jmp = DISAS_JUMP; + return true; +} + +static bool trans_RET(DisasContext *s, arg_r *a) +{ + gen_a64_set_pc(s, cpu_reg(s, a->rn)); + s->base.is_jmp = DISAS_JUMP; + return true; +} + +static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, + TCGv_i64 modifier, bool use_key_a) +{ + TCGv_i64 truedst; + /* + * Return the branch target for a BRAA/RETA/etc, which is either + * just the destination dst, or that value with the pauth check + * done and the code removed from the high bits. + */ + if (!s->pauth_active) { + return dst; + } + + truedst = tcg_temp_new_i64(); + if (use_key_a) { + gen_helper_autia_combined(truedst, tcg_env, dst, modifier); + } else { + gen_helper_autib_combined(truedst, tcg_env, dst, modifier); + } + return truedst; +} + +static bool trans_BRAZ(DisasContext *s, arg_braz *a) +{ + TCGv_i64 dst; + + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + + dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); + gen_a64_set_pc(s, dst); + set_btype_for_br(s, a->rn); + s->base.is_jmp = DISAS_JUMP; + return true; +} + +static bool trans_BLRAZ(DisasContext *s, arg_braz *a) +{ + TCGv_i64 dst, lr; + + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + + dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); + lr = cpu_reg(s, 30); + if (dst == lr) { + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_mov_i64(tmp, dst); + dst = tmp; + } + gen_pc_plus_diff(s, lr, curr_insn_len(s)); + gen_a64_set_pc(s, dst); + set_btype_for_blr(s); + s->base.is_jmp = DISAS_JUMP; + return true; +} + +static bool trans_RETA(DisasContext *s, arg_reta *a) +{ + TCGv_i64 dst; + + dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); + gen_a64_set_pc(s, dst); + s->base.is_jmp = DISAS_JUMP; + return true; +} + +static bool trans_BRA(DisasContext *s, arg_bra *a) +{ + TCGv_i64 dst; + + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); + gen_a64_set_pc(s, dst); + set_btype_for_br(s, a->rn); + s->base.is_jmp = DISAS_JUMP; + return true; +} + +static bool trans_BLRA(DisasContext *s, arg_bra *a) +{ + TCGv_i64 dst, lr; + + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); + lr = cpu_reg(s, 30); + if (dst == lr) { + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_mov_i64(tmp, dst); + dst = tmp; + } + gen_pc_plus_diff(s, lr, curr_insn_len(s)); + gen_a64_set_pc(s, dst); + set_btype_for_blr(s); + s->base.is_jmp = DISAS_JUMP; + return true; +} + +static bool trans_ERET(DisasContext *s, arg_ERET *a) +{ + TCGv_i64 dst; + + if (s->current_el == 0) { + return false; + } + if (s->trap_eret) { + gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); + return true; + } + dst = tcg_temp_new_i64(); + tcg_gen_ld_i64(dst, tcg_env, + offsetof(CPUARMState, elr_el[s->current_el])); + + translator_io_start(&s->base); + + gen_helper_exception_return(tcg_env, dst); + /* Must exit loop to check un-masked IRQs */ + s->base.is_jmp = DISAS_EXIT; + return true; +} + +static bool trans_ERETA(DisasContext *s, arg_reta *a) +{ + TCGv_i64 dst; + + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + if (s->current_el == 0) { + return false; + } + /* The FGT trap takes precedence over an auth trap. */ + if (s->trap_eret) { + gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); + return true; + } + dst = tcg_temp_new_i64(); + tcg_gen_ld_i64(dst, tcg_env, + offsetof(CPUARMState, elr_el[s->current_el])); + + dst = auth_branch_target(s, dst, cpu_X[31], !a->m); + + translator_io_start(&s->base); + + gen_helper_exception_return(tcg_env, dst); + /* Must exit loop to check un-masked IRQs */ + s->base.is_jmp = DISAS_EXIT; + return true; +} + +static bool trans_NOP(DisasContext *s, arg_NOP *a) +{ + return true; +} + +static bool trans_YIELD(DisasContext *s, arg_YIELD *a) +{ + /* + * When running in MTTCG we don't generate jumps to the yield and + * WFE helpers as it won't affect the scheduling of other vCPUs. + * If we wanted to more completely model WFE/SEV so we don't busy + * spin unnecessarily we would need to do something more involved. + */ + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + s->base.is_jmp = DISAS_YIELD; + } + return true; +} + +static bool trans_WFI(DisasContext *s, arg_WFI *a) +{ + s->base.is_jmp = DISAS_WFI; + return true; +} + +static bool trans_WFE(DisasContext *s, arg_WFI *a) +{ + /* + * When running in MTTCG we don't generate jumps to the yield and + * WFE helpers as it won't affect the scheduling of other vCPUs. + * If we wanted to more completely model WFE/SEV so we don't busy + * spin unnecessarily we would need to do something more involved. + */ + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + s->base.is_jmp = DISAS_WFE; + } + return true; +} + +static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) +{ + if (s->pauth_active) { + gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); + } + return true; +} + +static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) +{ + if (s->pauth_active) { + gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); + } + return true; +} + +static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) +{ + if (s->pauth_active) { + gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); + } + return true; +} + +static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) +{ + if (s->pauth_active) { + gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); + } + return true; +} + +static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) +{ + if (s->pauth_active) { + gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); + } + return true; +} + +static bool trans_ESB(DisasContext *s, arg_ESB *a) +{ + /* Without RAS, we must implement this as NOP. */ + if (dc_isar_feature(aa64_ras, s)) { + /* + * QEMU does not have a source of physical SErrors, + * so we are only concerned with virtual SErrors. + * The pseudocode in the ARM for this case is + * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then + * AArch64.vESBOperation(); + * Most of the condition can be evaluated at translation time. + * Test for EL2 present, and defer test for SEL2 to runtime. */ - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - s->base.is_jmp = DISAS_YIELD; - } - break; - case 0b00010: /* WFE */ - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - s->base.is_jmp = DISAS_WFE; - } - break; - case 0b00100: /* SEV */ - case 0b00101: /* SEVL */ - /* we treat all as NOP at least for now */ - break; - case 0b00111: /* XPACLRI */ - if (s->pauth_active) { - gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); - } - break; - case 0b01000: /* PACIA1716 */ - if (s->pauth_active) { - gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); - } - break; - case 0b01010: /* PACIB1716 */ - if (s->pauth_active) { - gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); - } - break; - case 0b01100: /* AUTIA1716 */ - if (s->pauth_active) { - gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); - } - break; - case 0b01110: /* AUTIB1716 */ - if (s->pauth_active) { - gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); - } - break; - case 0b11000: /* PACIAZ */ - if (s->pauth_active) { - gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], - new_tmp_a64_zero(s)); - } - break; - case 0b11001: /* PACIASP */ - if (s->pauth_active) { - gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); + if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { + gen_helper_vesb(tcg_env); } - break; - case 0b11010: /* PACIBZ */ - if (s->pauth_active) { - gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], - new_tmp_a64_zero(s)); - } - break; - case 0b11011: /* PACIBSP */ - if (s->pauth_active) { - gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); - } - break; - case 0b11100: /* AUTIAZ */ - if (s->pauth_active) { - gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], - new_tmp_a64_zero(s)); - } - break; - case 0b11101: /* AUTIASP */ - if (s->pauth_active) { - gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); - } - break; - case 0b11110: /* AUTIBZ */ - if (s->pauth_active) { - gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], - new_tmp_a64_zero(s)); - } - break; - case 0b11111: /* AUTIBSP */ - if (s->pauth_active) { - gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); - } - break; - default: - /* default specified as NOP equivalent */ - break; } + return true; } -static void gen_clrex(DisasContext *s, uint32_t insn) +static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) +{ + if (s->pauth_active) { + gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); + } + return true; +} + +static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) +{ + if (s->pauth_active) { + gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); + } + return true; +} + +static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) +{ + if (s->pauth_active) { + gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); + } + return true; +} + +static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) +{ + if (s->pauth_active) { + gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); + } + return true; +} + +static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) +{ + if (s->pauth_active) { + gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); + } + return true; +} + +static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) +{ + if (s->pauth_active) { + gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); + } + return true; +} + +static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) +{ + if (s->pauth_active) { + gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); + } + return true; +} + +static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) +{ + if (s->pauth_active) { + gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); + } + return true; +} + +static bool trans_CLREX(DisasContext *s, arg_CLREX *a) { tcg_gen_movi_i64(cpu_exclusive_addr, -1); + return true; } -/* CLREX, DSB, DMB, ISB */ -static void handle_sync(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) { + /* We handle DSB and DMB the same way */ TCGBar bar; - if (op1 != 3) { - unallocated_encoding(s); - return; + switch (a->types) { + case 1: /* MBReqTypes_Reads */ + bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; + break; + case 2: /* MBReqTypes_Writes */ + bar = TCG_BAR_SC | TCG_MO_ST_ST; + break; + default: /* MBReqTypes_All */ + bar = TCG_BAR_SC | TCG_MO_ALL; + break; } + tcg_gen_mb(bar); + return true; +} - switch (op2) { - case 2: /* CLREX */ - gen_clrex(s, insn); - return; - case 4: /* DSB */ - case 5: /* DMB */ - switch (crm & 3) { - case 1: /* MBReqTypes_Reads */ - bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; - break; - case 2: /* MBReqTypes_Writes */ - bar = TCG_BAR_SC | TCG_MO_ST_ST; - break; - default: /* MBReqTypes_All */ - bar = TCG_BAR_SC | TCG_MO_ALL; - break; - } - tcg_gen_mb(bar); - return; - case 6: /* ISB */ - /* We need to break the TB after this insn to execute - * a self-modified code correctly and also to take - * any pending interrupts immediately. - */ - reset_btype(s); - gen_goto_tb(s, 0, s->base.pc_next); - return; +static bool trans_ISB(DisasContext *s, arg_ISB *a) +{ + /* + * We need to break the TB after this insn to execute + * self-modifying code correctly and also to take + * any pending interrupts immediately. + */ + reset_btype(s); + gen_goto_tb(s, 0, 4); + return true; +} - case 7: /* SB */ - if (crm != 0 || !dc_isar_feature(aa64_sb, s)) { - goto do_unallocated; - } - /* - * TODO: There is no speculation barrier opcode for TCG; - * MB and end the TB instead. - */ - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); - gen_goto_tb(s, 0, s->base.pc_next); - return; +static bool trans_SB(DisasContext *s, arg_SB *a) +{ + if (!dc_isar_feature(aa64_sb, s)) { + return false; + } + /* + * TODO: There is no speculation barrier opcode for TCG; + * MB and end the TB instead. + */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + gen_goto_tb(s, 0, 4); + return true; +} - default: - do_unallocated: - unallocated_encoding(s); - return; +static bool trans_CFINV(DisasContext *s, arg_CFINV *a) +{ + if (!dc_isar_feature(aa64_condm_4, s)) { + return false; } + tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); + return true; } -static void gen_xaflag(void) +static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) { - TCGv_i32 z = tcg_temp_new_i32(); + TCGv_i32 z; + + if (!dc_isar_feature(aa64_condm_5, s)) { + return false; + } + + z = tcg_temp_new_i32(); tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); @@ -1611,11 +1908,15 @@ static void gen_xaflag(void) /* C | Z */ tcg_gen_or_i32(cpu_CF, cpu_CF, z); - tcg_temp_free_i32(z); + return true; } -static void gen_axflag(void) +static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) { + if (!dc_isar_feature(aa64_condm_5, s)) { + return false; + } + tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ @@ -1624,144 +1925,134 @@ static void gen_axflag(void) tcg_gen_movi_i32(cpu_NF, 0); tcg_gen_movi_i32(cpu_VF, 0); + + return true; } -/* MSR (immediate) - move immediate to processor state field */ -static void handle_msr_i(DisasContext *s, uint32_t insn, - unsigned int op1, unsigned int op2, unsigned int crm) +static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) { - TCGv_i32 t1; - int op = op1 << 3 | op2; - - /* End the TB by default, chaining is ok. */ + if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { + return false; + } + if (a->imm & 1) { + set_pstate_bits(PSTATE_UAO); + } else { + clear_pstate_bits(PSTATE_UAO); + } + gen_rebuild_hflags(s); s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - switch (op) { - case 0x00: /* CFINV */ - if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) { - goto do_unallocated; - } - tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); - s->base.is_jmp = DISAS_NEXT; - break; - - case 0x01: /* XAFlag */ - if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { - goto do_unallocated; - } - gen_xaflag(); - s->base.is_jmp = DISAS_NEXT; - break; - - case 0x02: /* AXFlag */ - if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { - goto do_unallocated; - } - gen_axflag(); - s->base.is_jmp = DISAS_NEXT; - break; +static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) +{ + if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { + return false; + } + if (a->imm & 1) { + set_pstate_bits(PSTATE_PAN); + } else { + clear_pstate_bits(PSTATE_PAN); + } + gen_rebuild_hflags(s); + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x03: /* UAO */ - if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { - goto do_unallocated; - } - if (crm & 1) { - set_pstate_bits(PSTATE_UAO); - } else { - clear_pstate_bits(PSTATE_UAO); - } - t1 = tcg_const_i32(s->current_el); - gen_helper_rebuild_hflags_a64(cpu_env, t1); - tcg_temp_free_i32(t1); - break; +static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) +{ + if (s->current_el == 0) { + return false; + } + gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x04: /* PAN */ - if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { - goto do_unallocated; - } - if (crm & 1) { - set_pstate_bits(PSTATE_PAN); - } else { - clear_pstate_bits(PSTATE_PAN); - } - t1 = tcg_const_i32(s->current_el); - gen_helper_rebuild_hflags_a64(cpu_env, t1); - tcg_temp_free_i32(t1); - break; +static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) +{ + if (!dc_isar_feature(aa64_ssbs, s)) { + return false; + } + if (a->imm & 1) { + set_pstate_bits(PSTATE_SSBS); + } else { + clear_pstate_bits(PSTATE_SSBS); + } + /* Don't need to rebuild hflags since SSBS is a nop */ + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x05: /* SPSel */ - if (s->current_el == 0) { - goto do_unallocated; - } - t1 = tcg_const_i32(crm & PSTATE_SP); - gen_helper_msr_i_spsel(cpu_env, t1); - tcg_temp_free_i32(t1); - break; +static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) +{ + if (!dc_isar_feature(aa64_dit, s)) { + return false; + } + if (a->imm & 1) { + set_pstate_bits(PSTATE_DIT); + } else { + clear_pstate_bits(PSTATE_DIT); + } + /* There's no need to rebuild hflags because DIT is a nop */ + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x19: /* SSBS */ - if (!dc_isar_feature(aa64_ssbs, s)) { - goto do_unallocated; - } - if (crm & 1) { - set_pstate_bits(PSTATE_SSBS); +static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) +{ + if (dc_isar_feature(aa64_mte, s)) { + /* Full MTE is enabled -- set the TCO bit as directed. */ + if (a->imm & 1) { + set_pstate_bits(PSTATE_TCO); } else { - clear_pstate_bits(PSTATE_SSBS); - } - /* Don't need to rebuild hflags since SSBS is a nop */ - break; + clear_pstate_bits(PSTATE_TCO); + } + gen_rebuild_hflags(s); + /* Many factors, including TCO, go into MTE_ACTIVE. */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; + return true; + } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { + /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ + return true; + } else { + /* Insn not present */ + return false; + } +} - case 0x1a: /* DIT */ - if (!dc_isar_feature(aa64_dit, s)) { - goto do_unallocated; - } - if (crm & 1) { - set_pstate_bits(PSTATE_DIT); - } else { - clear_pstate_bits(PSTATE_DIT); - } - /* There's no need to rebuild hflags because DIT is a nop */ - break; +static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) +{ + gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); + s->base.is_jmp = DISAS_TOO_MANY; + return true; +} - case 0x1e: /* DAIFSet */ - t1 = tcg_const_i32(crm); - gen_helper_msr_i_daifset(cpu_env, t1); - tcg_temp_free_i32(t1); - break; +static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) +{ + gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); + /* Exit the cpu loop to re-evaluate pending IRQs. */ + s->base.is_jmp = DISAS_UPDATE_EXIT; + return true; +} - case 0x1f: /* DAIFClear */ - t1 = tcg_const_i32(crm); - gen_helper_msr_i_daifclear(cpu_env, t1); - tcg_temp_free_i32(t1); - /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ - s->base.is_jmp = DISAS_UPDATE_EXIT; - break; +static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) +{ + if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { + return false; + } + if (sme_access_check(s)) { + int old = s->pstate_sm | (s->pstate_za << 1); + int new = a->imm * 3; - case 0x1c: /* TCO */ - if (dc_isar_feature(aa64_mte, s)) { - /* Full MTE is enabled -- set the TCO bit as directed. */ - if (crm & 1) { - set_pstate_bits(PSTATE_TCO); - } else { - clear_pstate_bits(PSTATE_TCO); - } - t1 = tcg_const_i32(s->current_el); - gen_helper_rebuild_hflags_a64(cpu_env, t1); - tcg_temp_free_i32(t1); - /* Many factors, including TCO, go into MTE_ACTIVE. */ - s->base.is_jmp = DISAS_UPDATE_NOCHAIN; - } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { - /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ - s->base.is_jmp = DISAS_NEXT; - } else { - goto do_unallocated; + if ((old ^ new) & a->mask) { + /* At least one bit changes. */ + gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), + tcg_constant_i32(a->mask)); + s->base.is_jmp = DISAS_TOO_MANY; } - break; - - default: - do_unallocated: - unallocated_encoding(s); - return; } + return true; } static void gen_get_nzcv(TCGv_i64 tcg_rt) @@ -1781,9 +2072,6 @@ static void gen_get_nzcv(TCGv_i64 tcg_rt) tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); /* generate result */ tcg_gen_extu_i32_i64(tcg_rt, nzcv); - - tcg_temp_free_i32(nzcv); - tcg_temp_free_i32(tmp); } static void gen_set_nzcv(TCGv_i64 tcg_rt) @@ -1804,7 +2092,29 @@ static void gen_set_nzcv(TCGv_i64 tcg_rt) /* bit 28, V */ tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); - tcg_temp_free_i32(nzcv); +} + +static void gen_sysreg_undef(DisasContext *s, bool isread, + uint8_t op0, uint8_t op1, uint8_t op2, + uint8_t crn, uint8_t crm, uint8_t rt) +{ + /* + * Generate code to emit an UNDEF with correct syndrome + * information for a failed system register access. + * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, + * but if FEAT_IDST is implemented then read accesses to registers + * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP + * syndrome. + */ + uint32_t syndrome; + + if (isread && dc_isar_feature(aa64_ids, s) && + arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { + syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); + } else { + syndrome = syn_uncategorized(); + } + gen_exception_insn(s, 0, EXCP_UDEF, syndrome); } /* MRS - move from system register @@ -1814,16 +2124,38 @@ static void gen_set_nzcv(TCGv_i64 tcg_rt) * These are all essentially the same insn in 'read' and 'write' * versions, with varying op0 fields. */ -static void handle_sys(DisasContext *s, uint32_t insn, bool isread, +static void handle_sys(DisasContext *s, bool isread, unsigned int op0, unsigned int op1, unsigned int op2, unsigned int crn, unsigned int crm, unsigned int rt) { - const ARMCPRegInfo *ri; + uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, + crn, crm, op0, op1, op2); + const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); + bool need_exit_tb = false; + bool nv_trap_to_el2 = false; + bool nv_redirect_reg = false; + bool skip_fp_access_checks = false; + bool nv2_mem_redirect = false; + TCGv_ptr tcg_ri = NULL; TCGv_i64 tcg_rt; + uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); - ri = get_arm_cp_reginfo(s->cp_regs, - ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, - crn, crm, op0, op1, op2)); + if (crn == 11 || crn == 15) { + /* + * Check for TIDCP trap, which must take precedence over + * the UNDEF for "no such register" etc. + */ + switch (s->current_el) { + case 0: + if (dc_isar_feature(aa64_tidcp1, s)) { + gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); + } + break; + case 1: + gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); + break; + } + } if (!ri) { /* Unknown register; this might be a guest error or a QEMU @@ -1832,43 +2164,159 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", isread ? "read" : "write", op0, op1, crn, crm, op2); - unallocated_encoding(s); + gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); return; } + if (s->nv2 && ri->nv2_redirect_offset) { + /* + * Some registers always redirect to memory; some only do so if + * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in + * pairs which share an offset; see the table in R_CSRPQ). + */ + if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { + nv2_mem_redirect = s->nv1; + } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { + nv2_mem_redirect = !s->nv1; + } else { + nv2_mem_redirect = true; + } + } + /* Check access permissions */ if (!cp_access_ok(s->current_el, ri, isread)) { - unallocated_encoding(s); - return; + /* + * FEAT_NV/NV2 handling does not do the usual FP access checks + * for registers only accessible at EL2 (though it *does* do them + * for registers accessible at EL1). + */ + skip_fp_access_checks = true; + if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { + /* + * This is one of the few EL2 registers which should redirect + * to the equivalent EL1 register. We do that after running + * the EL2 register's accessfn. + */ + nv_redirect_reg = true; + assert(!nv2_mem_redirect); + } else if (nv2_mem_redirect) { + /* + * NV2 redirect-to-memory takes precedence over trap to EL2 or + * UNDEF to EL1. + */ + } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { + /* + * This register / instruction exists and is an EL2 register, so + * we must trap to EL2 if accessed in nested virtualization EL1 + * instead of UNDEFing. We'll do that after the usual access checks. + * (This makes a difference only for a couple of registers like + * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority + * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have + * an accessfn which does nothing when called from EL1, because + * the trap-to-EL3 controls which would apply to that register + * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) + */ + nv_trap_to_el2 = true; + } else { + gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); + return; + } } - if (ri->accessfn) { + if (ri->accessfn || (ri->fgt && s->fgt_active)) { /* Emit code to perform further access permissions checks at * runtime; this may result in an exception. */ - TCGv_ptr tmpptr; - TCGv_i32 tcg_syn, tcg_isread; - uint32_t syndrome; - - gen_a64_set_pc_im(s->pc_curr); - tmpptr = tcg_const_ptr(ri); - syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); - tcg_syn = tcg_const_i32(syndrome); - tcg_isread = tcg_const_i32(isread); - gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread); - tcg_temp_free_ptr(tmpptr); - tcg_temp_free_i32(tcg_syn); - tcg_temp_free_i32(tcg_isread); + gen_a64_update_pc(s, 0); + tcg_ri = tcg_temp_new_ptr(); + gen_helper_access_check_cp_reg(tcg_ri, tcg_env, + tcg_constant_i32(key), + tcg_constant_i32(syndrome), + tcg_constant_i32(isread)); } else if (ri->type & ARM_CP_RAISES_EXC) { /* * The readfn or writefn might raise an exception; * synchronize the CPU state in case it does. */ - gen_a64_set_pc_im(s->pc_curr); + gen_a64_update_pc(s, 0); + } + + if (!skip_fp_access_checks) { + if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { + return; + } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { + return; + } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { + return; + } + } + + if (nv_trap_to_el2) { + gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); + return; + } + + if (nv_redirect_reg) { + /* + * FEAT_NV2 redirection of an EL2 register to an EL1 register. + * Conveniently in all cases the encoding of the EL1 register is + * identical to the EL2 register except that opc1 is 0. + * Get the reginfo for the EL1 register to use for the actual access. + * We don't use the EL1 register's access function, and + * fine-grained-traps on EL1 also do not apply here. + */ + key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, + crn, crm, op0, 0, op2); + ri = get_arm_cp_reginfo(s->cp_regs, key); + assert(ri); + assert(cp_access_ok(s->current_el, ri, isread)); + /* + * We might not have done an update_pc earlier, so check we don't + * need it. We could support this in future if necessary. + */ + assert(!(ri->type & ARM_CP_RAISES_EXC)); + } + + if (nv2_mem_redirect) { + /* + * This system register is being redirected into an EL2 memory access. + * This means it is not an IO operation, doesn't change hflags, + * and need not end the TB, because it has no side effects. + * + * The access is 64-bit single copy atomic, guaranteed aligned because + * of the definition of VCNR_EL2. Its endianness depends on + * SCTLR_EL2.EE, not on the data endianness of EL1. + * It is done under either the EL2 translation regime or the EL2&0 + * translation regime, depending on HCR_EL2.E2H. It behaves as if + * PSTATE.PAN is 0. + */ + TCGv_i64 ptr = tcg_temp_new_i64(); + MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; + ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; + int memidx = arm_to_core_mmu_idx(armmemidx); + uint32_t syn; + + mop |= (s->nv2_mem_be ? MO_BE : MO_LE); + + tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); + tcg_gen_addi_i64(ptr, ptr, + (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); + tcg_rt = cpu_reg(s, rt); + + syn = syn_data_abort_vncr(0, !isread, 0); + disas_set_insn_syndrome(s, syn); + if (isread) { + tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); + } else { + tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); + } + return; } /* Handle special cases first */ - switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) { + switch (ri->type & ARM_CP_SPECIAL_MASK) { + case 0: + break; case ARM_CP_NOP: return; case ARM_CP_NZCV: @@ -1880,30 +2328,33 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, } return; case ARM_CP_CURRENTEL: - /* Reads as current EL value from pstate, which is + { + /* + * Reads as current EL value from pstate, which is * guaranteed to be constant by the tb flags. + * For nested virt we should report EL2. */ + int el = s->nv ? 2 : s->current_el; tcg_rt = cpu_reg(s, rt); - tcg_gen_movi_i64(tcg_rt, s->current_el << 2); + tcg_gen_movi_i64(tcg_rt, el << 2); return; + } case ARM_CP_DC_ZVA: /* Writes clear the aligned block of memory which rt points into. */ if (s->mte_active[0]) { - TCGv_i32 t_desc; int desc = 0; desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); - t_desc = tcg_const_i32(desc); - tcg_rt = new_tmp_a64(s); - gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt)); - tcg_temp_free_i32(t_desc); + tcg_rt = tcg_temp_new_i64(); + gen_helper_mte_check_zva(tcg_rt, tcg_env, + tcg_constant_i32(desc), cpu_reg(s, rt)); } else { tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); } - gen_helper_dc_zva(cpu_env, tcg_rt); + gen_helper_dc_zva(tcg_env, tcg_rt); return; case ARM_CP_DC_GVA: { @@ -1917,12 +2368,11 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, clean_addr = clean_data_tbi(s, tcg_rt); gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); - if (s->ata) { + if (s->ata[0]) { /* Extract the tag from the register to match STZGM. */ tag = tcg_temp_new_i64(); tcg_gen_shri_i64(tag, tcg_rt, 56); - gen_helper_stzgm_tags(cpu_env, clean_addr, tag); - tcg_temp_free_i64(tag); + gen_helper_stzgm_tags(tcg_env, clean_addr, tag); } } return; @@ -1933,28 +2383,23 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ tcg_rt = cpu_reg(s, rt); clean_addr = clean_data_tbi(s, tcg_rt); - gen_helper_dc_zva(cpu_env, clean_addr); + gen_helper_dc_zva(tcg_env, clean_addr); - if (s->ata) { + if (s->ata[0]) { /* Extract the tag from the register to match STZGM. */ tag = tcg_temp_new_i64(); tcg_gen_shri_i64(tag, tcg_rt, 56); - gen_helper_stzgm_tags(cpu_env, clean_addr, tag); - tcg_temp_free_i64(tag); + gen_helper_stzgm_tags(tcg_env, clean_addr, tag); } } return; default: - break; - } - if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) { - return; - } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { - return; + g_assert_not_reached(); } - if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { - gen_io_start(); + if (ri->type & ARM_CP_IO) { + /* I/O operations must end the TB here (whether read or write) */ + need_exit_tb = translator_io_start(&s->base); } tcg_rt = cpu_reg(s, rt); @@ -1963,422 +2408,124 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, if (ri->type & ARM_CP_CONST) { tcg_gen_movi_i64(tcg_rt, ri->resetvalue); } else if (ri->readfn) { - TCGv_ptr tmpptr; - tmpptr = tcg_const_ptr(ri); - gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr); - tcg_temp_free_ptr(tmpptr); + if (!tcg_ri) { + tcg_ri = gen_lookup_cp_reg(key); + } + gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); } else { - tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset); + tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); } } else { if (ri->type & ARM_CP_CONST) { /* If not forbidden by access permissions, treat as WI */ return; } else if (ri->writefn) { - TCGv_ptr tmpptr; - tmpptr = tcg_const_ptr(ri); - gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt); - tcg_temp_free_ptr(tmpptr); + if (!tcg_ri) { + tcg_ri = gen_lookup_cp_reg(key); + } + gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); } else { - tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset); + tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); } } - if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { - /* I/O operations must end the TB here (whether read or write) */ - s->base.is_jmp = DISAS_UPDATE_EXIT; - } if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { /* - * A write to any coprocessor regiser that ends a TB + * A write to any coprocessor register that ends a TB * must rebuild the hflags for the next TB. */ - TCGv_i32 tcg_el = tcg_const_i32(s->current_el); - gen_helper_rebuild_hflags_a64(cpu_env, tcg_el); - tcg_temp_free_i32(tcg_el); + gen_rebuild_hflags(s); /* * We default to ending the TB on a coprocessor register write, * but allow this to be suppressed by the register definition * (usually only necessary to work around guest bugs). */ + need_exit_tb = true; + } + if (need_exit_tb) { s->base.is_jmp = DISAS_UPDATE_EXIT; } } -/* System - * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0 - * +---------------------+---+-----+-----+-------+-------+-----+------+ - * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt | - * +---------------------+---+-----+-----+-------+-------+-----+------+ - */ -static void disas_system(DisasContext *s, uint32_t insn) -{ - unsigned int l, op0, op1, crn, crm, op2, rt; - l = extract32(insn, 21, 1); - op0 = extract32(insn, 19, 2); - op1 = extract32(insn, 16, 3); - crn = extract32(insn, 12, 4); - crm = extract32(insn, 8, 4); - op2 = extract32(insn, 5, 3); - rt = extract32(insn, 0, 5); - - if (op0 == 0) { - if (l || rt != 31) { - unallocated_encoding(s); - return; - } - switch (crn) { - case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */ - handle_hint(s, insn, op1, op2, crm); - break; - case 3: /* CLREX, DSB, DMB, ISB */ - handle_sync(s, insn, op1, op2, crm); - break; - case 4: /* MSR (immediate) */ - handle_msr_i(s, insn, op1, op2, crm); - break; - default: - unallocated_encoding(s); - break; - } - return; - } - handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); +static bool trans_SYS(DisasContext *s, arg_SYS *a) +{ + handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); + return true; } -/* Exception generation - * - * 31 24 23 21 20 5 4 2 1 0 - * +-----------------+-----+------------------------+-----+----+ - * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL | - * +-----------------------+------------------------+----------+ - */ -static void disas_exc(DisasContext *s, uint32_t insn) +static bool trans_SVC(DisasContext *s, arg_i *a) { - int opc = extract32(insn, 21, 3); - int op2_ll = extract32(insn, 0, 5); - int imm16 = extract32(insn, 5, 16); - TCGv_i32 tmp; - - switch (opc) { - case 0: - /* For SVC, HVC and SMC we advance the single-step state - * machine before taking the exception. This is architecturally - * mandated, to ensure that single-stepping a system call - * instruction works properly. - */ - switch (op2_ll) { - case 1: /* SVC */ - gen_ss_advance(s); - gen_exception_insn(s, s->base.pc_next, EXCP_SWI, - syn_aa64_svc(imm16), default_exception_el(s)); - break; - case 2: /* HVC */ - if (s->current_el == 0) { - unallocated_encoding(s); - break; - } - /* The pre HVC helper handles cases when HVC gets trapped - * as an undefined insn by runtime configuration. - */ - gen_a64_set_pc_im(s->pc_curr); - gen_helper_pre_hvc(cpu_env); - gen_ss_advance(s); - gen_exception_insn(s, s->base.pc_next, EXCP_HVC, - syn_aa64_hvc(imm16), 2); - break; - case 3: /* SMC */ - if (s->current_el == 0) { - unallocated_encoding(s); - break; - } - gen_a64_set_pc_im(s->pc_curr); - tmp = tcg_const_i32(syn_aa64_smc(imm16)); - gen_helper_pre_smc(cpu_env, tmp); - tcg_temp_free_i32(tmp); - gen_ss_advance(s); - gen_exception_insn(s, s->base.pc_next, EXCP_SMC, - syn_aa64_smc(imm16), 3); - break; - default: - unallocated_encoding(s); - break; - } - break; - case 1: - if (op2_ll != 0) { - unallocated_encoding(s); - break; - } - /* BRK */ - gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16)); - break; - case 2: - if (op2_ll != 0) { - unallocated_encoding(s); - break; - } - /* HLT. This has two purposes. - * Architecturally, it is an external halting debug instruction. - * Since QEMU doesn't implement external debug, we treat this as - * it is required for halting debug disabled: it will UNDEF. - * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. - */ - if (semihosting_enabled() && imm16 == 0xf000) { -#ifndef CONFIG_USER_ONLY - /* In system mode, don't allow userspace access to semihosting, - * to provide some semblance of security (and for consistency - * with our 32-bit semihosting). - */ - if (s->current_el == 0) { - unsupported_encoding(s, insn); - break; - } -#endif - gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST); - } else { - unsupported_encoding(s, insn); - } - break; - case 5: - if (op2_ll < 1 || op2_ll > 3) { - unallocated_encoding(s); - break; - } - /* DCPS1, DCPS2, DCPS3 */ - unsupported_encoding(s, insn); - break; - default: - unallocated_encoding(s); - break; + /* + * For SVC, HVC and SMC we advance the single-step state + * machine before taking the exception. This is architecturally + * mandated, to ensure that single-stepping a system call + * instruction works properly. + */ + uint32_t syndrome = syn_aa64_svc(a->imm); + if (s->fgt_svc) { + gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); + return true; } + gen_ss_advance(s); + gen_exception_insn(s, 4, EXCP_SWI, syndrome); + return true; } -/* Unconditional branch (register) - * 31 25 24 21 20 16 15 10 9 5 4 0 - * +---------------+-------+-------+-------+------+-------+ - * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 | - * +---------------+-------+-------+-------+------+-------+ - */ -static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) +static bool trans_HVC(DisasContext *s, arg_i *a) { - unsigned int opc, op2, op3, rn, op4; - unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ - TCGv_i64 dst; - TCGv_i64 modifier; - - opc = extract32(insn, 21, 4); - op2 = extract32(insn, 16, 5); - op3 = extract32(insn, 10, 6); - rn = extract32(insn, 5, 5); - op4 = extract32(insn, 0, 5); + int target_el = s->current_el == 3 ? 3 : 2; - if (op2 != 0x1f) { - goto do_unallocated; - } - - switch (opc) { - case 0: /* BR */ - case 1: /* BLR */ - case 2: /* RET */ - btype_mod = opc; - switch (op3) { - case 0: - /* BR, BLR, RET */ - if (op4 != 0) { - goto do_unallocated; - } - dst = cpu_reg(s, rn); - break; - - case 2: - case 3: - if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - if (opc == 2) { - /* RETAA, RETAB */ - if (rn != 0x1f || op4 != 0x1f) { - goto do_unallocated; - } - rn = 30; - modifier = cpu_X[31]; - } else { - /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */ - if (op4 != 0x1f) { - goto do_unallocated; - } - modifier = new_tmp_a64_zero(s); - } - if (s->pauth_active) { - dst = new_tmp_a64(s); - if (op3 == 2) { - gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); - } else { - gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); - } - } else { - dst = cpu_reg(s, rn); - } - break; - - default: - goto do_unallocated; - } - gen_a64_set_pc(s, dst); - /* BLR also needs to load return address */ - if (opc == 1) { - tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next); - } - break; - - case 8: /* BRAA */ - case 9: /* BLRAA */ - if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - if ((op3 & ~1) != 2) { - goto do_unallocated; - } - btype_mod = opc & 1; - if (s->pauth_active) { - dst = new_tmp_a64(s); - modifier = cpu_reg_sp(s, op4); - if (op3 == 2) { - gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); - } else { - gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); - } - } else { - dst = cpu_reg(s, rn); - } - gen_a64_set_pc(s, dst); - /* BLRAA also needs to load return address */ - if (opc == 9) { - tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next); - } - break; - - case 4: /* ERET */ - if (s->current_el == 0) { - goto do_unallocated; - } - switch (op3) { - case 0: /* ERET */ - if (op4 != 0) { - goto do_unallocated; - } - dst = tcg_temp_new_i64(); - tcg_gen_ld_i64(dst, cpu_env, - offsetof(CPUARMState, elr_el[s->current_el])); - break; - - case 2: /* ERETAA */ - case 3: /* ERETAB */ - if (!dc_isar_feature(aa64_pauth, s)) { - goto do_unallocated; - } - if (rn != 0x1f || op4 != 0x1f) { - goto do_unallocated; - } - dst = tcg_temp_new_i64(); - tcg_gen_ld_i64(dst, cpu_env, - offsetof(CPUARMState, elr_el[s->current_el])); - if (s->pauth_active) { - modifier = cpu_X[31]; - if (op3 == 2) { - gen_helper_autia(dst, cpu_env, dst, modifier); - } else { - gen_helper_autib(dst, cpu_env, dst, modifier); - } - } - break; - - default: - goto do_unallocated; - } - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - - gen_helper_exception_return(cpu_env, dst); - tcg_temp_free_i64(dst); - /* Must exit loop to check un-masked IRQs */ - s->base.is_jmp = DISAS_EXIT; - return; - - case 5: /* DRPS */ - if (op3 != 0 || op4 != 0 || rn != 0x1f) { - goto do_unallocated; - } else { - unsupported_encoding(s, insn); - } - return; - - default: - do_unallocated: + if (s->current_el == 0) { unallocated_encoding(s); - return; + return true; } + /* + * The pre HVC helper handles cases when HVC gets trapped + * as an undefined insn by runtime configuration. + */ + gen_a64_update_pc(s, 0); + gen_helper_pre_hvc(tcg_env); + /* Architecture requires ss advance before we do the actual work */ + gen_ss_advance(s); + gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); + return true; +} - switch (btype_mod) { - case 0: /* BR */ - if (dc_isar_feature(aa64_bti, s)) { - /* BR to {x16,x17} or !guard -> 1, else 3. */ - set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); - } - break; - - case 1: /* BLR */ - if (dc_isar_feature(aa64_bti, s)) { - /* BLR sets BTYPE to 2, regardless of source guarded page. */ - set_btype(s, 2); - } - break; - - default: /* RET or none of the above. */ - /* BTYPE will be set to 0 by normal end-of-insn processing. */ - break; +static bool trans_SMC(DisasContext *s, arg_i *a) +{ + if (s->current_el == 0) { + unallocated_encoding(s); + return true; } + gen_a64_update_pc(s, 0); + gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); + /* Architecture requires ss advance before we do the actual work */ + gen_ss_advance(s); + gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); + return true; +} - s->base.is_jmp = DISAS_JUMP; +static bool trans_BRK(DisasContext *s, arg_i *a) +{ + gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); + return true; } -/* Branches, exception generating and system instructions */ -static void disas_b_exc_sys(DisasContext *s, uint32_t insn) +static bool trans_HLT(DisasContext *s, arg_i *a) { - switch (extract32(insn, 25, 7)) { - case 0x0a: case 0x0b: - case 0x4a: case 0x4b: /* Unconditional branch (immediate) */ - disas_uncond_b_imm(s, insn); - break; - case 0x1a: case 0x5a: /* Compare & branch (immediate) */ - disas_comp_b_imm(s, insn); - break; - case 0x1b: case 0x5b: /* Test & branch (immediate) */ - disas_test_b_imm(s, insn); - break; - case 0x2a: /* Conditional branch (immediate) */ - disas_cond_b_imm(s, insn); - break; - case 0x6a: /* Exception generation / System */ - if (insn & (1 << 24)) { - if (extract32(insn, 22, 2) == 0) { - disas_system(s, insn); - } else { - unallocated_encoding(s); - } - } else { - disas_exc(s, insn); - } - break; - case 0x6b: /* Unconditional branch (register) */ - disas_uncond_b_reg(s, insn); - break; - default: + /* + * HLT. This has two purposes. + * Architecturally, it is an external halting debug instruction. + * Since QEMU doesn't implement external debug, we treat this as + * it is required for halting debug disabled: it will UNDEF. + * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. + */ + if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { + gen_exception_internal_insn(s, EXCP_SEMIHOST); + } else { unallocated_encoding(s); - break; } + return true; } /* @@ -2392,19 +2539,22 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) * races in multi-threaded linux-user and when MTTCG softmmu is * enabled. */ -static void gen_load_exclusive(DisasContext *s, int rt, int rt2, - TCGv_i64 addr, int size, bool is_pair) +static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, + int size, bool is_pair) { int idx = get_mem_index(s); - MemOp memop = s->be_data; + TCGv_i64 dirty_addr, clean_addr; + MemOp memop = check_atomic_align(s, rn, size + is_pair); + + s->is_ldex = true; + dirty_addr = cpu_reg_sp(s, rn); + clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); g_assert(size <= 3); if (is_pair) { g_assert(size >= 2); if (size == 2) { - /* The pair must be single-copy atomic for the doubleword. */ - memop |= MO_64 | MO_ALIGN; - tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); + tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); if (s->be_data == MO_LE) { tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); @@ -2413,30 +2563,29 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); } } else { - /* The pair must be single-copy atomic for *each* doubleword, not - the entire quadword, however it must be quadword aligned. */ - memop |= MO_64; - tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, - memop | MO_ALIGN_16); + TCGv_i128 t16 = tcg_temp_new_i128(); - TCGv_i64 addr2 = tcg_temp_new_i64(); - tcg_gen_addi_i64(addr2, addr, 8); - tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); - tcg_temp_free_i64(addr2); + tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(cpu_exclusive_val, + cpu_exclusive_high, t16); + } else { + tcg_gen_extr_i128_i64(cpu_exclusive_high, + cpu_exclusive_val, t16); + } tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); } } else { - memop |= size | MO_ALIGN; - tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); + tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); } - tcg_gen_mov_i64(cpu_exclusive_addr, addr); + tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); } static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, - TCGv_i64 addr, int size, int is_pair) + int rn, int size, int is_pair) { /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] * && (!is_pair || env->exclusive_high == [addr + datasize])) { @@ -2452,9 +2601,46 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, */ TCGLabel *fail_label = gen_new_label(); TCGLabel *done_label = gen_new_label(); - TCGv_i64 tmp; + TCGv_i64 tmp, clean_addr; + MemOp memop; - tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); + /* + * FIXME: We are out of spec here. We have recorded only the address + * from load_exclusive, not the entire range, and we assume that the + * size of the access on both sides match. The architecture allows the + * store to be smaller than the load, so long as the stored bytes are + * within the range recorded by the load. + */ + + /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ + clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); + + /* + * The write, and any associated faults, only happen if the virtual + * and physical addresses pass the exclusive monitor check. These + * faults are exceedingly unlikely, because normally the guest uses + * the exact same address register for the load_exclusive, and we + * would have recognized these faults there. + * + * It is possible to trigger an alignment fault pre-LSE2, e.g. with an + * unaligned 4-byte write within the range of an aligned 8-byte load. + * With LSE2, the store would need to cross a 16-byte boundary when the + * load did not, which would mean the store is outside the range + * recorded for the monitor, which would have failed a corrected monitor + * check above. For now, we assume no size change and retain the + * MO_ALIGN to let tcg know what we checked in the load_exclusive. + * + * It is possible to trigger an MTE fault, by performing the load with + * a virtual address with a valid tag and performing the store with the + * same virtual address and a different invalid tag. + */ + memop = size + is_pair; + if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { + memop |= MO_ALIGN; + } + memop = finalize_memop(s, memop); + gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); tmp = tcg_temp_new_i64(); if (is_pair) { @@ -2466,39 +2652,46 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, } tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, tmp, - get_mem_index(s), - MO_64 | MO_ALIGN | s->be_data); + get_mem_index(s), memop); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - if (!HAVE_CMPXCHG128) { - gen_helper_exit_atomic(cpu_env); - s->base.is_jmp = DISAS_NORETURN; - } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env, - cpu_exclusive_addr, - cpu_reg(s, rt), - cpu_reg(s, rt2)); - } else { - gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env, - cpu_exclusive_addr, - cpu_reg(s, rt), - cpu_reg(s, rt2)); - } - } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); } else { - gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); + TCGv_i128 t16 = tcg_temp_new_i128(); + TCGv_i128 c16 = tcg_temp_new_i128(); + TCGv_i64 a, b; + + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); + tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, + cpu_exclusive_high); + } else { + tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); + tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, + cpu_exclusive_val); + } + + tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, + get_mem_index(s), memop); + + a = tcg_temp_new_i64(); + b = tcg_temp_new_i64(); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(a, b, t16); + } else { + tcg_gen_extr_i128_i64(b, a, t16); + } + + tcg_gen_xor_i64(a, a, cpu_exclusive_val); + tcg_gen_xor_i64(b, b, cpu_exclusive_high); + tcg_gen_or_i64(tmp, a, b); + + tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); } } else { tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, - cpu_reg(s, rt), get_mem_index(s), - size | MO_ALIGN | s->be_data); + cpu_reg(s, rt), get_mem_index(s), memop); tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); } tcg_gen_mov_i64(cpu_reg(s, rd), tmp); - tcg_temp_free_i64(tmp); tcg_gen_br(done_label); gen_set_label(fail_label); @@ -2514,13 +2707,15 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt, TCGv_i64 tcg_rt = cpu_reg(s, rt); int memidx = get_mem_index(s); TCGv_i64 clean_addr; + MemOp memop; if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); - tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, - size | MO_ALIGN | s->be_data); + memop = check_atomic_align(s, rn, size); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); + tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, + memidx, memop); } static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, @@ -2532,13 +2727,15 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, TCGv_i64 t2 = cpu_reg(s, rt + 1); TCGv_i64 clean_addr; int memidx = get_mem_index(s); + MemOp memop; if (rn == 31) { gen_check_sp_alignment(s); } /* This is a single atomic access, despite the "pair". */ - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); + memop = check_atomic_align(s, rn, size + 1); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); if (size == 2) { TCGv_i64 cmp = tcg_temp_new_i64(); @@ -2552,1157 +2749,914 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, tcg_gen_concat32_i64(cmp, s2, s1); } - tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, - MO_64 | MO_ALIGN | s->be_data); - tcg_temp_free_i64(val); + tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); if (s->be_data == MO_LE) { tcg_gen_extr32_i64(s1, s2, cmp); } else { tcg_gen_extr32_i64(s2, s1, cmp); } - tcg_temp_free_i64(cmp); - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - if (HAVE_CMPXCHG128) { - TCGv_i32 tcg_rs = tcg_const_i32(rs); - if (s->be_data == MO_LE) { - gen_helper_casp_le_parallel(cpu_env, tcg_rs, - clean_addr, t1, t2); - } else { - gen_helper_casp_be_parallel(cpu_env, tcg_rs, - clean_addr, t1, t2); - } - tcg_temp_free_i32(tcg_rs); + } else { + TCGv_i128 cmp = tcg_temp_new_i128(); + TCGv_i128 val = tcg_temp_new_i128(); + + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(val, t1, t2); + tcg_gen_concat_i64_i128(cmp, s1, s2); } else { - gen_helper_exit_atomic(cpu_env); - s->base.is_jmp = DISAS_NORETURN; + tcg_gen_concat_i64_i128(val, t2, t1); + tcg_gen_concat_i64_i128(cmp, s2, s1); } - } else { - TCGv_i64 d1 = tcg_temp_new_i64(); - TCGv_i64 d2 = tcg_temp_new_i64(); - TCGv_i64 a2 = tcg_temp_new_i64(); - TCGv_i64 c1 = tcg_temp_new_i64(); - TCGv_i64 c2 = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_const_i64(0); - - /* Load the two words, in memory order. */ - tcg_gen_qemu_ld_i64(d1, clean_addr, memidx, - MO_64 | MO_ALIGN_16 | s->be_data); - tcg_gen_addi_i64(a2, clean_addr, 8); - tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data); - - /* Compare the two words, also in memory order. */ - tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1); - tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2); - tcg_gen_and_i64(c2, c2, c1); - - /* If compare equal, write back new data, else write back old data. */ - tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1); - tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2); - tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data); - tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data); - tcg_temp_free_i64(a2); - tcg_temp_free_i64(c1); - tcg_temp_free_i64(c2); - tcg_temp_free_i64(zero); - - /* Write back the data from memory to Rs. */ - tcg_gen_mov_i64(s1, d1); - tcg_gen_mov_i64(s2, d2); - tcg_temp_free_i64(d1); - tcg_temp_free_i64(d2); - } -} - -/* Update the Sixty-Four bit (SF) registersize. This logic is derived + + tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); + + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(s1, s2, cmp); + } else { + tcg_gen_extr_i128_i64(s2, s1, cmp); + } + } +} + +/* + * Compute the ISS.SF bit for syndrome information if an exception + * is taken on a load or store. This indicates whether the instruction + * is accessing a 32-bit or 64-bit register. This logic is derived * from the ARMv8 specs for LDR (Shared decode for all encodings). */ -static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc) +static bool ldst_iss_sf(int size, bool sign, bool ext) { - int opc0 = extract32(opc, 0, 1); - int regsize; - if (is_signed) { - regsize = opc0 ? 32 : 64; + if (sign) { + /* + * Signed loads are 64 bit results if we are not going to + * do a zero-extend from 32 to 64 after the load. + * (For a store, sign and ext are always false.) + */ + return !ext; } else { - regsize = size == 3 ? 64 : 32; + /* Unsigned loads/stores work at the specified size */ + return size == MO_64; } - return regsize == 64; } -/* Load/store exclusive - * - * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 - * +-----+-------------+----+---+----+------+----+-------+------+------+ - * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt | - * +-----+-------------+----+---+----+------+----+-------+------+------+ - * - * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit - * L: 0 -> store, 1 -> load - * o2: 0 -> exclusive, 1 -> not - * o1: 0 -> single register, 1 -> register pair - * o0: 1 -> load-acquire/store-release, 0 -> not - */ -static void disas_ldst_excl(DisasContext *s, uint32_t insn) +static bool trans_STXR(DisasContext *s, arg_stxr *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rt2 = extract32(insn, 10, 5); - int rs = extract32(insn, 16, 5); - int is_lasr = extract32(insn, 15, 1); - int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; - int size = extract32(insn, 30, 2); - TCGv_i64 clean_addr; - - switch (o2_L_o1_o0) { - case 0x0: /* STXR */ - case 0x1: /* STLXR */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - true, rn != 31, size); - gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); - return; - - case 0x4: /* LDXR */ - case 0x5: /* LDAXR */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - false, rn != 31, size); - s->is_ldex = true; - gen_load_exclusive(s, rt, rt2, clean_addr, size, false); - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - } - return; - - case 0x8: /* STLLR */ - if (!dc_isar_feature(aa64_lor, s)) { - break; - } - /* StoreLORelease is the same as Store-Release for QEMU. */ - /* fall through */ - case 0x9: /* STLR */ - /* Generate ISS for non-exclusive accesses including LASR. */ - if (rn == 31) { - gen_check_sp_alignment(s); - } + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + if (a->lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - true, rn != 31, size); - /* TODO: ARMv8.4-LSE SCTLR.nAA */ - do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt, - disas_ldst_compute_iss_sf(size, false, 0), is_lasr); - return; + } + gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); + return true; +} - case 0xc: /* LDLAR */ - if (!dc_isar_feature(aa64_lor, s)) { - break; - } - /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ - /* fall through */ - case 0xd: /* LDAR */ - /* Generate ISS for non-exclusive accesses including LASR. */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - false, rn != 31, size); - /* TODO: ARMv8.4-LSE SCTLR.nAA */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true, - rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); +static bool trans_LDXR(DisasContext *s, arg_stxr *a) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); + if (a->lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - return; - - case 0x2: case 0x3: /* CASP / STXP */ - if (size & 2) { /* STXP / STLXP */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - true, rn != 31, size); - gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); - return; - } - if (rt2 == 31 - && ((rt | rs) & 1) == 0 - && dc_isar_feature(aa64_atomics, s)) { - /* CASP / CASPL */ - gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); - return; - } - break; + } + return true; +} - case 0x6: case 0x7: /* CASPA / LDXP */ - if (size & 2) { /* LDXP / LDAXP */ - if (rn == 31) { - gen_check_sp_alignment(s); - } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), - false, rn != 31, size); - s->is_ldex = true; - gen_load_exclusive(s, rt, rt2, clean_addr, size, true); - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - } - return; - } - if (rt2 == 31 - && ((rt | rs) & 1) == 0 - && dc_isar_feature(aa64_atomics, s)) { - /* CASPA / CASPAL */ - gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); - return; - } - break; +static bool trans_STLR(DisasContext *s, arg_stlr *a) +{ + TCGv_i64 clean_addr; + MemOp memop; + bool iss_sf = ldst_iss_sf(a->sz, false, false); - case 0xa: /* CAS */ - case 0xb: /* CASL */ - case 0xe: /* CASA */ - case 0xf: /* CASAL */ - if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { - gen_compare_and_swap(s, rs, rt, rn, size); - return; - } - break; + /* + * StoreLORelease is the same as Store-Release for QEMU, but + * needs the feature-test. + */ + if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { + return false; + } + /* Generate ISS for non-exclusive accesses including LASR. */ + if (a->rn == 31) { + gen_check_sp_alignment(s); } - unallocated_encoding(s); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + memop = check_ordered_align(s, a->rn, 0, true, a->sz); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), + true, a->rn != 31, memop); + do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, + iss_sf, a->lasr); + return true; } -/* - * Load register (literal) - * - * 31 30 29 27 26 25 24 23 5 4 0 - * +-----+-------+---+-----+-------------------+-------+ - * | opc | 0 1 1 | V | 0 0 | imm19 | Rt | - * +-----+-------+---+-----+-------------------+-------+ - * - * V: 1 -> vector (simd/fp) - * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit, - * 10-> 32 bit signed, 11 -> prefetch - * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated) - */ -static void disas_ld_lit(DisasContext *s, uint32_t insn) +static bool trans_LDAR(DisasContext *s, arg_stlr *a) { - int rt = extract32(insn, 0, 5); - int64_t imm = sextract32(insn, 5, 19) << 2; - bool is_vector = extract32(insn, 26, 1); - int opc = extract32(insn, 30, 2); - bool is_signed = false; - int size = 2; - TCGv_i64 tcg_rt, clean_addr; + TCGv_i64 clean_addr; + MemOp memop; + bool iss_sf = ldst_iss_sf(a->sz, false, false); - if (is_vector) { - if (opc == 3) { - unallocated_encoding(s); - return; - } - size = 2 + opc; - if (!fp_access_check(s)) { - return; - } - } else { - if (opc == 3) { - /* PRFM (literal) : prefetch */ - return; - } - size = 2 + extract32(opc, 0, 1); - is_signed = extract32(opc, 1, 1); + /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ + if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { + return false; } + /* Generate ISS for non-exclusive accesses including LASR. */ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + memop = check_ordered_align(s, a->rn, 0, false, a->sz); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), + false, a->rn != 31, memop); + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, + a->rt, iss_sf, a->lasr); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return true; +} - tcg_rt = cpu_reg(s, rt); +static bool trans_STXP(DisasContext *s, arg_stxr *a) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + if (a->lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + } + gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); + return true; +} - clean_addr = tcg_const_i64(s->pc_curr + imm); - if (is_vector) { - do_fp_ld(s, rt, clean_addr, size); - } else { - /* Only unsigned 32bit loads target 32bit registers. */ - bool iss_sf = opc != 0; +static bool trans_LDXP(DisasContext *s, arg_stxr *a) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); + if (a->lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + } + return true; +} - do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, - false, true, rt, iss_sf, false); +static bool trans_CASP(DisasContext *s, arg_CASP *a) +{ + if (!dc_isar_feature(aa64_atomics, s)) { + return false; } - tcg_temp_free_i64(clean_addr); + if (((a->rt | a->rs) & 1) != 0) { + return false; + } + + gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); + return true; } -/* - * LDNP (Load Pair - non-temporal hint) - * LDP (Load Pair - non vector) - * LDPSW (Load Pair Signed Word - non vector) - * STNP (Store Pair - non-temporal hint) - * STP (Store Pair - non vector) - * LDNP (Load Pair of SIMD&FP - non-temporal hint) - * LDP (Load Pair of SIMD&FP) - * STNP (Store Pair of SIMD&FP - non-temporal hint) - * STP (Store Pair of SIMD&FP) - * - * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0 - * +-----+-------+---+---+-------+---+-----------------------------+ - * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt | - * +-----+-------+---+---+-------+---+-------+-------+------+------+ - * - * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit - * LDPSW/STGP 01 - * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit - * V: 0 -> GPR, 1 -> Vector - * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, - * 10 -> signed offset, 11 -> pre-index - * L: 0 -> Store 1 -> Load - * - * Rt, Rt2 = GPR or SIMD registers to be stored - * Rn = general purpose register containing address - * imm7 = signed offset (multiple of 4 or 8 depending on size) - */ -static void disas_ldst_pair(DisasContext *s, uint32_t insn) +static bool trans_CAS(DisasContext *s, arg_CAS *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rt2 = extract32(insn, 10, 5); - uint64_t offset = sextract64(insn, 15, 7); - int index = extract32(insn, 23, 2); - bool is_vector = extract32(insn, 26, 1); - bool is_load = extract32(insn, 22, 1); - int opc = extract32(insn, 30, 2); + if (!dc_isar_feature(aa64_atomics, s)) { + return false; + } + gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); + return true; +} - bool is_signed = false; - bool postindex = false; - bool wback = false; - bool set_tag = false; +static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) +{ + bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); + TCGv_i64 tcg_rt = cpu_reg(s, a->rt); + TCGv_i64 clean_addr = tcg_temp_new_i64(); + MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); - TCGv_i64 clean_addr, dirty_addr; + gen_pc_plus_diff(s, clean_addr, a->imm); + do_gpr_ld(s, tcg_rt, clean_addr, memop, + false, true, a->rt, iss_sf, false); + return true; +} - int size; +static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) +{ + /* Load register (literal), vector version */ + TCGv_i64 clean_addr; + MemOp memop; - if (opc == 3) { - unallocated_encoding(s); - return; + if (!fp_access_check(s)) { + return true; } + memop = finalize_memop_asimd(s, a->sz); + clean_addr = tcg_temp_new_i64(); + gen_pc_plus_diff(s, clean_addr, a->imm); + do_fp_ld(s, a->rt, clean_addr, memop); + return true; +} - if (is_vector) { - size = 2 + opc; - } else if (opc == 1 && !is_load) { - /* STGP */ - if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { - unallocated_encoding(s); - return; - } - size = 3; - set_tag = true; - } else { - size = 2 + extract32(opc, 1, 1); - is_signed = extract32(opc, 0, 1); - if (!is_load && is_signed) { - unallocated_encoding(s); - return; - } +static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, + uint64_t offset, bool is_store, MemOp mop) +{ + if (a->rn == 31) { + gen_check_sp_alignment(s); } - switch (index) { - case 1: /* post-index */ - postindex = true; - wback = true; - break; - case 0: - /* signed offset with "non-temporal" hint. Since we don't emulate - * caches we don't care about hints to the cache system about - * data access patterns, and handle this identically to plain - * signed offset. - */ - if (is_signed) { - /* There is no non-temporal-hint version of LDPSW */ - unallocated_encoding(s); - return; - } - postindex = false; - break; - case 2: /* signed offset, rn not updated */ - postindex = false; - break; - case 3: /* pre-index */ - postindex = false; - wback = true; - break; + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + if (!a->p) { + tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); } - if (is_vector && !fp_access_check(s)) { - return; + *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, + (a->w || a->rn != 31), 2 << a->sz, mop); +} + +static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, + TCGv_i64 dirty_addr, uint64_t offset) +{ + if (a->w) { + if (a->p) { + tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); + } + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); } +} - offset <<= (set_tag ? LOG2_TAG_GRANULE : size); +static bool trans_STP(DisasContext *s, arg_ldstpair *a) +{ + uint64_t offset = a->imm << a->sz; + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; + MemOp mop = finalize_memop(s, a->sz); - if (rn == 31) { - gen_check_sp_alignment(s); + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); + tcg_rt = cpu_reg(s, a->rt); + tcg_rt2 = cpu_reg(s, a->rt2); + /* + * We built mop above for the single logical access -- rebuild it + * now for the paired operation. + * + * With LSE2, non-sign-extending pairs are treated atomically if + * aligned, and if unaligned one of the pair will be completely + * within a 16-byte block and that element will be atomic. + * Otherwise each element is separately atomic. + * In all cases, issue one operation with the correct atomicity. + */ + mop = a->sz + 1; + if (s->align_mem) { + mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); } + mop = finalize_memop_pair(s, mop); + if (a->sz == 2) { + TCGv_i64 tmp = tcg_temp_new_i64(); - dirty_addr = read_cpu_reg_sp(s, rn, 1); - if (!postindex) { - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); - } + if (s->be_data == MO_LE) { + tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); + } else { + tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); + } + tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); + } else { + TCGv_i128 tmp = tcg_temp_new_i128(); - if (set_tag) { - if (!s->ata) { - /* - * TODO: We could rely on the stores below, at least for - * system mode, if we arrange to add MO_ALIGN_16. - */ - gen_helper_stg_stub(cpu_env, dirty_addr); - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); } else { - gen_helper_stg(cpu_env, dirty_addr, dirty_addr); + tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); } + tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); } + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; +} - clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, - (wback || rn != 31) && !set_tag, 2 << size); +static bool trans_LDP(DisasContext *s, arg_ldstpair *a) +{ + uint64_t offset = a->imm << a->sz; + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; + MemOp mop = finalize_memop(s, a->sz); - if (is_vector) { - if (is_load) { - do_fp_ld(s, rt, clean_addr, size); - } else { - do_fp_st(s, rt, clean_addr, size); - } - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - if (is_load) { - do_fp_ld(s, rt2, clean_addr, size); + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); + tcg_rt = cpu_reg(s, a->rt); + tcg_rt2 = cpu_reg(s, a->rt2); + + /* + * We built mop above for the single logical access -- rebuild it + * now for the paired operation. + * + * With LSE2, non-sign-extending pairs are treated atomically if + * aligned, and if unaligned one of the pair will be completely + * within a 16-byte block and that element will be atomic. + * Otherwise each element is separately atomic. + * In all cases, issue one operation with the correct atomicity. + * + * This treats sign-extending loads like zero-extending loads, + * since that reuses the most code below. + */ + mop = a->sz + 1; + if (s->align_mem) { + mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); + } + mop = finalize_memop_pair(s, mop); + if (a->sz == 2) { + int o2 = s->be_data == MO_LE ? 32 : 0; + int o1 = o2 ^ 32; + + tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); + if (a->sign) { + tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); + tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); } else { - do_fp_st(s, rt2, clean_addr, size); + tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); + tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); } } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); + TCGv_i128 tmp = tcg_temp_new_i128(); - if (is_load) { - TCGv_i64 tmp = tcg_temp_new_i64(); - - /* Do not modify tcg_rt before recognizing any exception - * from the second load. - */ - do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, - false, false, 0, false, false); - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, - false, false, 0, false, false); - - tcg_gen_mov_i64(tcg_rt, tmp); - tcg_temp_free_i64(tmp); + tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); + if (s->be_data == MO_LE) { + tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); } else { - do_gpr_st(s, tcg_rt, clean_addr, size, - false, 0, false, false); - tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); - do_gpr_st(s, tcg_rt2, clean_addr, size, - false, 0, false, false); + tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); } } + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; +} - if (wback) { - if (postindex) { - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); - } - tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); +static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) +{ + uint64_t offset = a->imm << a->sz; + TCGv_i64 clean_addr, dirty_addr; + MemOp mop; + + if (!fp_access_check(s)) { + return true; } + + /* LSE2 does not merge FP pairs; leave these as separate operations. */ + mop = finalize_memop_asimd(s, a->sz); + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); + do_fp_st(s, a->rt, clean_addr, mop); + tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); + do_fp_st(s, a->rt2, clean_addr, mop); + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; } -/* - * Load/store (immediate post-indexed) - * Load/store (immediate pre-indexed) - * Load/store (unscaled immediate) - * - * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0 - * +----+-------+---+-----+-----+---+--------+-----+------+------+ - * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt | - * +----+-------+---+-----+-----+---+--------+-----+------+------+ - * - * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) - 10 -> unprivileged - * V = 0 -> non-vector - * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 - */ -static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) { - int rn = extract32(insn, 5, 5); - int imm9 = sextract32(insn, 12, 9); - int idx = extract32(insn, 10, 2); - bool is_signed = false; - bool is_store = false; - bool is_extended = false; - bool is_unpriv = (idx == 2); - bool iss_valid = !is_vector; - bool post_index; - bool writeback; - int memidx; - + uint64_t offset = a->imm << a->sz; TCGv_i64 clean_addr, dirty_addr; + MemOp mop; - if (is_vector) { - size |= (opc & 2) << 1; - if (size > 4 || is_unpriv) { - unallocated_encoding(s); - return; - } - is_store = ((opc & 1) == 0); - if (!fp_access_check(s)) { - return; - } - } else { - if (size == 3 && opc == 2) { - /* PRFM - prefetch */ - if (idx != 0) { - unallocated_encoding(s); - return; - } - return; - } - if (opc == 3 && size > 1) { - unallocated_encoding(s); - return; - } - is_store = (opc == 0); - is_signed = extract32(opc, 1, 1); - is_extended = (size < 3) && extract32(opc, 0, 1); + if (!fp_access_check(s)) { + return true; } - switch (idx) { - case 0: - case 2: - post_index = false; - writeback = false; - break; - case 1: - post_index = true; - writeback = true; - break; - case 3: - post_index = false; - writeback = true; - break; - default: - g_assert_not_reached(); + /* LSE2 does not merge FP pairs; leave these as separate operations. */ + mop = finalize_memop_asimd(s, a->sz); + op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); + do_fp_ld(s, a->rt, clean_addr, mop); + tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); + do_fp_ld(s, a->rt2, clean_addr, mop); + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; +} + +static bool trans_STGP(DisasContext *s, arg_ldstpair *a) +{ + TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; + uint64_t offset = a->imm << LOG2_TAG_GRANULE; + MemOp mop; + TCGv_i128 tmp; + + /* STGP only comes in one size. */ + tcg_debug_assert(a->sz == MO_64); + + if (!dc_isar_feature(aa64_mte_insn_reg, s)) { + return false; } - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } - dirty_addr = read_cpu_reg_sp(s, rn, 1); - if (!post_index) { - tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + if (!a->p) { + tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); } - memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); - clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, - writeback || rn != 31, - size, is_unpriv, memidx); + clean_addr = clean_data_tbi(s, dirty_addr); + tcg_rt = cpu_reg(s, a->rt); + tcg_rt2 = cpu_reg(s, a->rt2); - if (is_vector) { - if (is_store) { - do_fp_st(s, rt, clean_addr, size); - } else { - do_fp_ld(s, rt, clean_addr, size); - } + /* + * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, + * and one tag operation. We implement it as one single aligned 16-byte + * memory operation for convenience. Note that the alignment ensures + * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. + */ + mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); + + tmp = tcg_temp_new_i128(); + if (s->be_data == MO_LE) { + tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); + tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); + } + tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); - if (is_store) { - do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, - iss_valid, rt, iss_sf, false); + /* Perform the tag store, if tag access enabled. */ + if (s->ata[0]) { + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); } else { - do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, - is_extended, memidx, - iss_valid, rt, iss_sf, false); + gen_helper_stg(tcg_env, dirty_addr, dirty_addr); } } - if (writeback) { - TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); - if (post_index) { - tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); - } - tcg_gen_mov_i64(tcg_rn, dirty_addr); - } + op_addr_ldstpair_post(s, a, dirty_addr, offset); + return true; } -/* - * Load/store (register offset) - * - * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 - * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ - * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | - * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ - * - * For non-vector: - * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 - * For vector: - * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated - * opc<0>: 0 -> store, 1 -> load - * V: 1 -> vector/simd - * opt: extend encoding (see DecodeRegExtend) - * S: if S=1 then scale (essentially index by sizeof(size)) - * Rt: register to transfer into/out of - * Rn: address register or SP for base - * Rm: offset register or ZR for offset - */ -static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, + uint64_t offset, bool is_store, MemOp mop) { - int rn = extract32(insn, 5, 5); - int shift = extract32(insn, 12, 1); - int rm = extract32(insn, 16, 5); - int opt = extract32(insn, 13, 3); - bool is_signed = false; - bool is_store = false; - bool is_extended = false; + int memidx; - TCGv_i64 tcg_rm, clean_addr, dirty_addr; + if (a->rn == 31) { + gen_check_sp_alignment(s); + } - if (extract32(opt, 1, 1) == 0) { - unallocated_encoding(s); - return; + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + if (!a->p) { + tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); } + memidx = get_a64_user_mem_index(s, a->unpriv); + *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, + a->w || a->rn != 31, + mop, a->unpriv, memidx); +} - if (is_vector) { - size |= (opc & 2) << 1; - if (size > 4) { - unallocated_encoding(s); - return; - } - is_store = !extract32(opc, 0, 1); - if (!fp_access_check(s)) { - return; - } - } else { - if (size == 3 && opc == 2) { - /* PRFM - prefetch */ - return; - } - if (opc == 3 && size > 1) { - unallocated_encoding(s); - return; +static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, + TCGv_i64 dirty_addr, uint64_t offset) +{ + if (a->w) { + if (a->p) { + tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); } - is_store = (opc == 0); - is_signed = extract32(opc, 1, 1); - is_extended = (size < 3) && extract32(opc, 0, 1); + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); } +} - if (rn == 31) { - gen_check_sp_alignment(s); - } - dirty_addr = read_cpu_reg_sp(s, rn, 1); +static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) +{ + bool iss_sf, iss_valid = !a->w; + TCGv_i64 clean_addr, dirty_addr, tcg_rt; + int memidx = get_a64_user_mem_index(s, a->unpriv); + MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); - tcg_rm = read_cpu_reg(s, rm, 1); - ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); - tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); - clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); + tcg_rt = cpu_reg(s, a->rt); + iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); - if (is_vector) { - if (is_store) { - do_fp_st(s, rt, clean_addr, size); - } else { - do_fp_ld(s, rt, clean_addr, size); - } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); - if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, size, - true, rt, iss_sf, false); - } else { - do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, - is_extended, true, rt, iss_sf, false); - } - } + do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, + iss_valid, a->rt, iss_sf, false); + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); + return true; } -/* - * Load/store (unsigned immediate) - * - * 31 30 29 27 26 25 24 23 22 21 10 9 5 - * +----+-------+---+-----+-----+------------+-------+------+ - * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt | - * +----+-------+---+-----+-----+------------+-------+------+ - * - * For non-vector: - * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit - * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 - * For vector: - * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated - * opc<0>: 0 -> store, 1 -> load - * Rn: base address register (inc SP) - * Rt: target register - */ -static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, - int opc, - int size, - int rt, - bool is_vector) +static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) { - int rn = extract32(insn, 5, 5); - unsigned int imm12 = extract32(insn, 10, 12); - unsigned int offset; + bool iss_sf, iss_valid = !a->w; + TCGv_i64 clean_addr, dirty_addr, tcg_rt; + int memidx = get_a64_user_mem_index(s, a->unpriv); + MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); + + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); + + tcg_rt = cpu_reg(s, a->rt); + iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); + do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, + a->ext, memidx, iss_valid, a->rt, iss_sf, false); + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); + return true; +} + +static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) +{ TCGv_i64 clean_addr, dirty_addr; + MemOp mop; - bool is_store; - bool is_signed = false; - bool is_extended = false; + if (!fp_access_check(s)) { + return true; + } + mop = finalize_memop_asimd(s, a->sz); + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); + do_fp_st(s, a->rt, clean_addr, mop); + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); + return true; +} - if (is_vector) { - size |= (opc & 2) << 1; - if (size > 4) { - unallocated_encoding(s); - return; - } - is_store = !extract32(opc, 0, 1); - if (!fp_access_check(s)) { - return; - } - } else { - if (size == 3 && opc == 2) { - /* PRFM - prefetch */ - return; - } - if (opc == 3 && size > 1) { - unallocated_encoding(s); - return; - } - is_store = (opc == 0); - is_signed = extract32(opc, 1, 1); - is_extended = (size < 3) && extract32(opc, 0, 1); +static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) +{ + TCGv_i64 clean_addr, dirty_addr; + MemOp mop; + + if (!fp_access_check(s)) { + return true; } + mop = finalize_memop_asimd(s, a->sz); + op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); + do_fp_ld(s, a->rt, clean_addr, mop); + op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); + return true; +} - if (rn == 31) { +static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, + TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, + bool is_store, MemOp memop) +{ + TCGv_i64 tcg_rm; + + if (a->rn == 31) { gen_check_sp_alignment(s); } - dirty_addr = read_cpu_reg_sp(s, rn, 1); - offset = imm12 << size; - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); - clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); + *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); - if (is_vector) { - if (is_store) { - do_fp_st(s, rt, clean_addr, size); - } else { - do_fp_ld(s, rt, clean_addr, size); - } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); - if (is_store) { - do_gpr_st(s, tcg_rt, clean_addr, size, - true, rt, iss_sf, false); - } else { - do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, - is_extended, true, rt, iss_sf, false); - } + tcg_rm = read_cpu_reg(s, a->rm, 1); + ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); + + tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); + *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); +} + +static bool trans_LDR(DisasContext *s, arg_ldst *a) +{ + TCGv_i64 clean_addr, dirty_addr, tcg_rt; + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); + MemOp memop; + + if (extract32(a->opt, 1, 1) == 0) { + return false; } + + memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); + tcg_rt = cpu_reg(s, a->rt); + do_gpr_ld(s, tcg_rt, clean_addr, memop, + a->ext, true, a->rt, iss_sf, false); + return true; } -/* Atomic memory operations - * - * 31 30 27 26 24 22 21 16 15 12 10 5 0 - * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ - * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | - * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ - * - * Rt: the result register - * Rn: base address or SP - * Rs: the source register for the operation - * V: vector flag (always 0 as of v8.3) - * A: acquire flag - * R: release flag - */ -static void disas_ldst_atomic(DisasContext *s, uint32_t insn, - int size, int rt, bool is_vector) +static bool trans_STR(DisasContext *s, arg_ldst *a) { - int rs = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int o3_opc = extract32(insn, 12, 4); - bool r = extract32(insn, 22, 1); - bool a = extract32(insn, 23, 1); - TCGv_i64 tcg_rs, tcg_rt, clean_addr; - AtomicThreeOpFn *fn = NULL; - MemOp mop = s->be_data | size | MO_ALIGN; + TCGv_i64 clean_addr, dirty_addr, tcg_rt; + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); + MemOp memop; - if (is_vector || !dc_isar_feature(aa64_atomics, s)) { - unallocated_encoding(s); - return; + if (extract32(a->opt, 1, 1) == 0) { + return false; } - switch (o3_opc) { - case 000: /* LDADD */ - fn = tcg_gen_atomic_fetch_add_i64; - break; - case 001: /* LDCLR */ - fn = tcg_gen_atomic_fetch_and_i64; - break; - case 002: /* LDEOR */ - fn = tcg_gen_atomic_fetch_xor_i64; - break; - case 003: /* LDSET */ - fn = tcg_gen_atomic_fetch_or_i64; - break; - case 004: /* LDSMAX */ - fn = tcg_gen_atomic_fetch_smax_i64; - mop |= MO_SIGN; - break; - case 005: /* LDSMIN */ - fn = tcg_gen_atomic_fetch_smin_i64; - mop |= MO_SIGN; - break; - case 006: /* LDUMAX */ - fn = tcg_gen_atomic_fetch_umax_i64; - break; - case 007: /* LDUMIN */ - fn = tcg_gen_atomic_fetch_umin_i64; - break; - case 010: /* SWP */ - fn = tcg_gen_atomic_xchg_i64; - break; - case 014: /* LDAPR, LDAPRH, LDAPRB */ - if (!dc_isar_feature(aa64_rcpc_8_3, s) || - rs != 31 || a != 1 || r != 0) { - unallocated_encoding(s); - return; - } - break; - default: - unallocated_encoding(s); - return; + + memop = finalize_memop(s, a->sz); + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); + tcg_rt = cpu_reg(s, a->rt); + do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); + return true; +} + +static bool trans_LDR_v(DisasContext *s, arg_ldst *a) +{ + TCGv_i64 clean_addr, dirty_addr; + MemOp memop; + + if (extract32(a->opt, 1, 1) == 0) { + return false; } - if (rn == 31) { - gen_check_sp_alignment(s); + if (!fp_access_check(s)) { + return true; } - clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); - if (o3_opc == 014) { - /* - * LDAPR* are a special case because they are a simple load, not a - * fetch-and-do-something op. - * The architectural consistency requirements here are weaker than - * full load-acquire (we only need "load-acquire processor consistent"), - * but we choose to implement them as full LDAQ. - */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, - true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - return; + memop = finalize_memop_asimd(s, a->sz); + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); + do_fp_ld(s, a->rt, clean_addr, memop); + return true; +} + +static bool trans_STR_v(DisasContext *s, arg_ldst *a) +{ + TCGv_i64 clean_addr, dirty_addr; + MemOp memop; + + if (extract32(a->opt, 1, 1) == 0) { + return false; } - tcg_rs = read_cpu_reg(s, rs, true); - tcg_rt = cpu_reg(s, rt); + if (!fp_access_check(s)) { + return true; + } + + memop = finalize_memop_asimd(s, a->sz); + op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); + do_fp_st(s, a->rt, clean_addr, memop); + return true; +} + + +static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, + int sign, bool invert) +{ + MemOp mop = a->sz | sign; + TCGv_i64 clean_addr, tcg_rs, tcg_rt; - if (o3_opc == 1) { /* LDCLR */ + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + mop = check_atomic_align(s, a->rn, mop); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, + a->rn != 31, mop); + tcg_rs = read_cpu_reg(s, a->rs, true); + tcg_rt = cpu_reg(s, a->rt); + if (invert) { tcg_gen_not_i64(tcg_rs, tcg_rs); } - - /* The tcg atomic primitives are all full barriers. Therefore we + /* + * The tcg atomic primitives are all full barriers. Therefore we * can ignore the Acquire and Release bits of this instruction. */ fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); - if ((mop & MO_SIGN) && size != MO_64) { - tcg_gen_ext32u_i64(tcg_rt, tcg_rt); + if (mop & MO_SIGN) { + switch (a->sz) { + case MO_8: + tcg_gen_ext8u_i64(tcg_rt, tcg_rt); + break; + case MO_16: + tcg_gen_ext16u_i64(tcg_rt, tcg_rt); + break; + case MO_32: + tcg_gen_ext32u_i64(tcg_rt, tcg_rt); + break; + case MO_64: + break; + default: + g_assert_not_reached(); + } } + return true; } -/* - * PAC memory operations - * - * 31 30 27 26 24 22 21 12 11 10 5 0 - * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ - * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt | - * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ - * - * Rt: the result register - * Rn: base address or SP - * V: vector flag (always 0 as of v8.3) - * M: clear for key DA, set for key DB - * W: pre-indexing flag - * S: sign for imm9. - */ -static void disas_ldst_pac(DisasContext *s, uint32_t insn, - int size, int rt, bool is_vector) +TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) +TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) +TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) +TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) +TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) +TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) +TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) +TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) +TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) + +static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) +{ + bool iss_sf = ldst_iss_sf(a->sz, false, false); + TCGv_i64 clean_addr; + MemOp mop; + + if (!dc_isar_feature(aa64_atomics, s) || + !dc_isar_feature(aa64_rcpc_8_3, s)) { + return false; + } + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + mop = check_atomic_align(s, a->rn, a->sz); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, + a->rn != 31, mop); + /* + * LDAPR* are a special case because they are a simple load, not a + * fetch-and-do-something op. + * The architectural consistency requirements here are weaker than + * full load-acquire (we only need "load-acquire processor consistent"), + * but we choose to implement them as full LDAQ. + */ + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, + true, a->rt, iss_sf, true); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return true; +} + +static bool trans_LDRA(DisasContext *s, arg_LDRA *a) { - int rn = extract32(insn, 5, 5); - bool is_wback = extract32(insn, 11, 1); - bool use_key_a = !extract32(insn, 23, 1); - int offset; TCGv_i64 clean_addr, dirty_addr, tcg_rt; + MemOp memop; - if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) { - unallocated_encoding(s); - return; + /* Load with pointer authentication */ + if (!dc_isar_feature(aa64_pauth, s)) { + return false; } - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } - dirty_addr = read_cpu_reg_sp(s, rn, 1); + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); if (s->pauth_active) { - if (use_key_a) { - gen_helper_autda(dirty_addr, cpu_env, dirty_addr, - new_tmp_a64_zero(s)); + if (!a->m) { + gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, + tcg_constant_i64(0)); } else { - gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, - new_tmp_a64_zero(s)); + gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, + tcg_constant_i64(0)); } } - /* Form the 10-bit signed, scaled offset. */ - offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9); - offset = sextract32(offset << size, 0, 10 + size); - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); + + memop = finalize_memop(s, MO_64); /* Note that "clean" and "dirty" here refer to TBI not PAC. */ clean_addr = gen_mte_check1(s, dirty_addr, false, - is_wback || rn != 31, size); + a->w || a->rn != 31, memop); - tcg_rt = cpu_reg(s, rt); - do_gpr_ld(s, tcg_rt, clean_addr, size, - /* extend */ false, /* iss_valid */ !is_wback, - /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); + tcg_rt = cpu_reg(s, a->rt); + do_gpr_ld(s, tcg_rt, clean_addr, memop, + /* extend */ false, /* iss_valid */ !a->w, + /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); - if (is_wback) { - tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); + if (a->w) { + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); } + return true; } -/* - * LDAPR/STLR (unscaled immediate) - * - * 31 30 24 22 21 12 10 5 0 - * +------+-------------+-----+---+--------+-----+----+-----+ - * | size | 0 1 1 0 0 1 | opc | 0 | imm9 | 0 0 | Rn | Rt | - * +------+-------------+-----+---+--------+-----+----+-----+ - * - * Rt: source or destination register - * Rn: base register - * imm9: unscaled immediate offset - * opc: 00: STLUR*, 01/10/11: various LDAPUR* - * size: size of load/store - */ -static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) +static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int offset = sextract32(insn, 12, 9); - int opc = extract32(insn, 22, 2); - int size = extract32(insn, 30, 2); TCGv_i64 clean_addr, dirty_addr; - bool is_store = false; - bool extend = false; - bool iss_sf; - MemOp mop; + MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); if (!dc_isar_feature(aa64_rcpc_8_4, s)) { - unallocated_encoding(s); - return; - } - - /* TODO: ARMv8.4-LSE SCTLR.nAA */ - mop = size | MO_ALIGN; - - switch (opc) { - case 0: /* STLURB */ - is_store = true; - break; - case 1: /* LDAPUR* */ - break; - case 2: /* LDAPURS* 64-bit variant */ - if (size == 3) { - unallocated_encoding(s); - return; - } - mop |= MO_SIGN; - break; - case 3: /* LDAPURS* 32-bit variant */ - if (size > 1) { - unallocated_encoding(s); - return; - } - mop |= MO_SIGN; - extend = true; /* zero-extend 32->64 after signed load */ - break; - default: - g_assert_not_reached(); + return false; } - iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); - - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } - dirty_addr = read_cpu_reg_sp(s, rn, 1); - tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); + mop = check_ordered_align(s, a->rn, a->imm, false, mop); + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); clean_addr = clean_data_tbi(s, dirty_addr); - if (is_store) { - /* Store-Release semantics */ - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); - } else { - /* - * Load-AcquirePC semantics; we implement as the slightly more - * restrictive Load-Acquire. - */ - do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, - extend, true, rt, iss_sf, true); - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - } + /* + * Load-AcquirePC semantics; we implement as the slightly more + * restrictive Load-Acquire. + */ + do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, + a->rt, iss_sf, true); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return true; } -/* Load/store register (all forms) */ -static void disas_ldst_reg(DisasContext *s, uint32_t insn) +static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) { - int rt = extract32(insn, 0, 5); - int opc = extract32(insn, 22, 2); - bool is_vector = extract32(insn, 26, 1); - int size = extract32(insn, 30, 2); + TCGv_i64 clean_addr, dirty_addr; + MemOp mop = a->sz; + bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); - switch (extract32(insn, 24, 2)) { - case 0: - if (extract32(insn, 21, 1) == 0) { - /* Load/store register (unscaled immediate) - * Load/store immediate pre/post-indexed - * Load/store register unprivileged - */ - disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); - return; - } - switch (extract32(insn, 10, 2)) { - case 0: - disas_ldst_atomic(s, insn, size, rt, is_vector); - return; - case 2: - disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); - return; - default: - disas_ldst_pac(s, insn, size, rt, is_vector); - return; - } - break; - case 1: - disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); - return; + if (!dc_isar_feature(aa64_rcpc_8_4, s)) { + return false; } - unallocated_encoding(s); + + /* TODO: ARMv8.4-LSE SCTLR.nAA */ + + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + mop = check_ordered_align(s, a->rn, a->imm, true, mop); + dirty_addr = read_cpu_reg_sp(s, a->rn, 1); + tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); + clean_addr = clean_data_tbi(s, dirty_addr); + + /* Store-Release semantics */ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); + return true; } -/* AdvSIMD load/store multiple structures - * - * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 - * +---+---+---------------+---+-------------+--------+------+------+------+ - * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | - * +---+---+---------------+---+-------------+--------+------+------+------+ - * - * AdvSIMD load/store multiple structures (post-indexed) - * - * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +---+---+---------------+---+---+---------+--------+------+------+------+ - * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | - * +---+---+---------------+---+---+---------+--------+------+------+------+ - * - * Rt: first (or only) SIMD&FP register to be transferred - * Rn: base address or SP - * Rm (post-index only): post-index register (when !31) or size dependent #imm - */ -static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) +static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 10, 2); - int opcode = extract32(insn, 12, 4); - bool is_store = !extract32(insn, 22, 1); - bool is_postidx = extract32(insn, 23, 1); - bool is_q = extract32(insn, 30, 1); TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; MemOp endian, align, mop; int total; /* total bytes */ int elements; /* elements per vector */ - int rpt; /* num iterations */ - int selem; /* structure elements */ int r; + int size = a->sz; - if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { - unallocated_encoding(s); - return; + if (!a->p && a->rm != 0) { + /* For non-postindexed accesses the Rm field must be 0 */ + return false; + } + if (size == 3 && !a->q && a->selem != 1) { + return false; + } + if (!fp_access_check(s)) { + return true; } - if (!is_postidx && rm != 0) { - unallocated_encoding(s); - return; + if (a->rn == 31) { + gen_check_sp_alignment(s); } - /* From the shared decode logic */ - switch (opcode) { - case 0x0: - rpt = 1; - selem = 4; - break; - case 0x2: - rpt = 4; - selem = 1; - break; - case 0x4: - rpt = 1; - selem = 3; - break; - case 0x6: - rpt = 3; - selem = 1; - break; - case 0x7: - rpt = 1; - selem = 1; - break; - case 0x8: - rpt = 1; - selem = 2; - break; - case 0xa: - rpt = 2; - selem = 1; - break; - default: - unallocated_encoding(s); - return; + /* For our purposes, bytes are always little-endian. */ + endian = s->be_data; + if (size == 0) { + endian = MO_LE; } - if (size == 3 && !is_q && selem != 1) { - /* reserved */ - unallocated_encoding(s); - return; + total = a->rpt * a->selem * (a->q ? 16 : 8); + tcg_rn = cpu_reg_sp(s, a->rn); + + /* + * Issue the MTE check vs the logical repeat count, before we + * promote consecutive little-endian elements below. + */ + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, + finalize_memop_asimd(s, size)); + + /* + * Consecutive little-endian elements from a single register + * can be promoted to a larger little-endian operation. + */ + align = MO_ALIGN; + if (a->selem == 1 && endian == MO_LE) { + align = pow2_align(size); + size = 3; + } + if (!s->align_mem) { + align = 0; + } + mop = endian | size | align; + + elements = (a->q ? 16 : 8) >> size; + tcg_ebytes = tcg_constant_i64(1 << size); + for (r = 0; r < a->rpt; r++) { + int e; + for (e = 0; e < elements; e++) { + int xs; + for (xs = 0; xs < a->selem; xs++) { + int tt = (a->rt + r + xs) % 32; + do_vec_ld(s, tt, e, clean_addr, mop); + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); + } + } + } + + /* + * For non-quad operations, setting a slice of the low 64 bits of + * the register clears the high 64 bits (in the ARM ARM pseudocode + * this is implicit in the fact that 'rval' is a 64 bit wide + * variable). For quad operations, we might still need to zero + * the high bits of SVE. + */ + for (r = 0; r < a->rpt * a->selem; r++) { + int tt = (a->rt + r) % 32; + clear_vec_high(s, a->q, tt); + } + + if (a->p) { + if (a->rm == 31) { + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); + } else { + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); + } } + return true; +} + +static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) +{ + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; + MemOp endian, align, mop; + + int total; /* total bytes */ + int elements; /* elements per vector */ + int r; + int size = a->sz; + if (!a->p && a->rm != 0) { + /* For non-postindexed accesses the Rm field must be 0 */ + return false; + } + if (size == 3 && !a->q && a->selem != 1) { + return false; + } if (!fp_access_check(s)) { - return; + return true; } - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } @@ -3712,22 +3666,22 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) endian = MO_LE; } - total = rpt * selem * (is_q ? 16 : 8); - tcg_rn = cpu_reg_sp(s, rn); + total = a->rpt * a->selem * (a->q ? 16 : 8); + tcg_rn = cpu_reg_sp(s, a->rn); /* * Issue the MTE check vs the logical repeat count, before we * promote consecutive little-endian elements below. */ - clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, - total); + clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, + finalize_memop_asimd(s, size)); /* * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ align = MO_ALIGN; - if (selem == 1 && endian == MO_LE) { + if (a->selem == 1 && endian == MO_LE) { align = pow2_align(size); size = 3; } @@ -3736,561 +3690,562 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) } mop = endian | size | align; - elements = (is_q ? 16 : 8) >> size; - tcg_ebytes = tcg_const_i64(1 << size); - for (r = 0; r < rpt; r++) { + elements = (a->q ? 16 : 8) >> size; + tcg_ebytes = tcg_constant_i64(1 << size); + for (r = 0; r < a->rpt; r++) { int e; for (e = 0; e < elements; e++) { int xs; - for (xs = 0; xs < selem; xs++) { - int tt = (rt + r + xs) % 32; - if (is_store) { - do_vec_st(s, tt, e, clean_addr, mop); - } else { - do_vec_ld(s, tt, e, clean_addr, mop); - } + for (xs = 0; xs < a->selem; xs++) { + int tt = (a->rt + r + xs) % 32; + do_vec_st(s, tt, e, clean_addr, mop); tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); } } } - tcg_temp_free_i64(tcg_ebytes); - if (!is_store) { - /* For non-quad operations, setting a slice of the low - * 64 bits of the register clears the high 64 bits (in - * the ARM ARM pseudocode this is implicit in the fact - * that 'rval' is a 64 bit wide variable). - * For quad operations, we might still need to zero the - * high bits of SVE. - */ - for (r = 0; r < rpt * selem; r++) { - int tt = (rt + r) % 32; - clear_vec_high(s, is_q, tt); + if (a->p) { + if (a->rm == 31) { + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); + } else { + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); } } + return true; +} + +static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) +{ + int xs, total, rt; + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; + MemOp mop; + + if (!a->p && a->rm != 0) { + return false; + } + if (!fp_access_check(s)) { + return true; + } - if (is_postidx) { - if (rm == 31) { + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + total = a->selem << a->scale; + tcg_rn = cpu_reg_sp(s, a->rn); + + mop = finalize_memop_asimd(s, a->scale); + clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, + total, mop); + + tcg_ebytes = tcg_constant_i64(1 << a->scale); + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { + do_vec_st(s, rt, a->index, clean_addr, mop); + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); + } + + if (a->p) { + if (a->rm == 31) { tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { - tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); } } + return true; } -/* AdvSIMD load/store single structure - * - * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ - * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ - * - * AdvSIMD load/store single structure (post-indexed) - * - * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ - * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | - * +---+---+---------------+-----+-----------+-----+---+------+------+------+ - * - * Rt: first (or only) SIMD&FP register to be transferred - * Rn: base address or SP - * Rm (post-index only): post-index register (when !31) or size dependent #imm - * index = encoded in Q:S:size dependent on size - * - * lane_size = encoded in R, opc - * transfer width = encoded in opc, S, size - */ -static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) +static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 10, 2); - int S = extract32(insn, 12, 1); - int opc = extract32(insn, 13, 3); - int R = extract32(insn, 21, 1); - int is_load = extract32(insn, 22, 1); - int is_postidx = extract32(insn, 23, 1); - int is_q = extract32(insn, 30, 1); - - int scale = extract32(opc, 1, 2); - int selem = (extract32(opc, 0, 1) << 1 | R) + 1; - bool replicate = false; - int index = is_q << 3 | S << 2 | size; - int xs, total; + int xs, total, rt; TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; MemOp mop; - if (extract32(insn, 31, 1)) { - unallocated_encoding(s); - return; + if (!a->p && a->rm != 0) { + return false; } - if (!is_postidx && rm != 0) { - unallocated_encoding(s); - return; + if (!fp_access_check(s)) { + return true; } - switch (scale) { - case 3: - if (!is_load || S) { - unallocated_encoding(s); - return; - } - scale = size; - replicate = true; - break; - case 0: - break; - case 1: - if (extract32(size, 0, 1)) { - unallocated_encoding(s); - return; - } - index >>= 1; - break; - case 2: - if (extract32(size, 1, 1)) { - unallocated_encoding(s); - return; - } - if (!extract32(size, 0, 1)) { - index >>= 2; + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + total = a->selem << a->scale; + tcg_rn = cpu_reg_sp(s, a->rn); + + mop = finalize_memop_asimd(s, a->scale); + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, + total, mop); + + tcg_ebytes = tcg_constant_i64(1 << a->scale); + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { + do_vec_ld(s, rt, a->index, clean_addr, mop); + tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); + } + + if (a->p) { + if (a->rm == 31) { + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { - if (S) { - unallocated_encoding(s); - return; - } - index >>= 3; - scale = 3; + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); } - break; - default: - g_assert_not_reached(); } + return true; +} + +static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) +{ + int xs, total, rt; + TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; + MemOp mop; + if (!a->p && a->rm != 0) { + return false; + } if (!fp_access_check(s)) { - return; + return true; } - if (rn == 31) { + if (a->rn == 31) { gen_check_sp_alignment(s); } - total = selem << scale; - tcg_rn = cpu_reg_sp(s, rn); + total = a->selem << a->scale; + tcg_rn = cpu_reg_sp(s, a->rn); - clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, - total); - mop = finalize_memop(s, scale); + mop = finalize_memop_asimd(s, a->scale); + clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, + total, mop); - tcg_ebytes = tcg_const_i64(1 << scale); - for (xs = 0; xs < selem; xs++) { - if (replicate) { - /* Load and replicate to all elements */ - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); + tcg_ebytes = tcg_constant_i64(1 << a->scale); + for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { + /* Load and replicate to all elements */ + TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); - tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), - (is_q + 1) * 8, vec_full_reg_size(s), - tcg_tmp); - tcg_temp_free_i64(tcg_tmp); - } else { - /* Load/store one element per register */ - if (is_load) { - do_vec_ld(s, rt, index, clean_addr, mop); - } else { - do_vec_st(s, rt, index, clean_addr, mop); - } - } + tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); + tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), + (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); - rt = (rt + 1) % 32; } - tcg_temp_free_i64(tcg_ebytes); - if (is_postidx) { - if (rm == 31) { + if (a->p) { + if (a->rm == 31) { tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { - tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); + tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); } } + return true; } -/* - * Load/Store memory tags - * - * 31 30 29 24 22 21 12 10 5 0 - * +-----+-------------+-----+---+------+-----+------+------+ - * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | - * +-----+-------------+-----+---+------+-----+------+------+ - */ -static void disas_ldst_tag(DisasContext *s, uint32_t insn) +static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) { - int rt = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; - int op2 = extract32(insn, 10, 2); - int op1 = extract32(insn, 22, 2); - bool is_load = false, is_pair = false, is_zero = false, is_mult = false; - int index = 0; TCGv_i64 addr, clean_addr, tcg_rt; + int size = 4 << s->dcz_blocksize; - /* We checked insn bits [29:24,21] in the caller. */ - if (extract32(insn, 30, 2) != 3) { - goto do_unallocated; + if (!dc_isar_feature(aa64_mte, s)) { + return false; + } + if (s->current_el == 0) { + return false; } + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + addr = read_cpu_reg_sp(s, a->rn, true); + tcg_gen_addi_i64(addr, addr, a->imm); + tcg_rt = cpu_reg(s, a->rt); + + if (s->ata[0]) { + gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); + } /* - * @index is a tri-state variable which has 3 states: - * < 0 : post-index, writeback - * = 0 : signed offset - * > 0 : pre-index, writeback + * The non-tags portion of STZGM is mostly like DC_ZVA, + * except the alignment happens before the access. */ - switch (op1) { - case 0: - if (op2 != 0) { - /* STG */ - index = op2 - 2; - } else { - /* STZGM */ - if (s->current_el == 0 || offset != 0) { - goto do_unallocated; - } - is_mult = is_zero = true; - } - break; - case 1: - if (op2 != 0) { - /* STZG */ - is_zero = true; - index = op2 - 2; - } else { - /* LDG */ - is_load = true; - } - break; - case 2: - if (op2 != 0) { - /* ST2G */ - is_pair = true; - index = op2 - 2; - } else { - /* STGM */ - if (s->current_el == 0 || offset != 0) { - goto do_unallocated; - } - is_mult = true; - } - break; - case 3: - if (op2 != 0) { - /* STZ2G */ - is_pair = is_zero = true; - index = op2 - 2; - } else { - /* LDGM */ - if (s->current_el == 0 || offset != 0) { - goto do_unallocated; - } - is_mult = is_load = true; - } - break; + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_helper_dc_zva(tcg_env, clean_addr); + return true; +} - default: - do_unallocated: - unallocated_encoding(s); - return; +static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) +{ + TCGv_i64 addr, clean_addr, tcg_rt; + + if (!dc_isar_feature(aa64_mte, s)) { + return false; + } + if (s->current_el == 0) { + return false; } - if (is_mult - ? !dc_isar_feature(aa64_mte, s) - : !dc_isar_feature(aa64_mte_insn_reg, s)) { - goto do_unallocated; + if (a->rn == 31) { + gen_check_sp_alignment(s); } - if (rn == 31) { + addr = read_cpu_reg_sp(s, a->rn, true); + tcg_gen_addi_i64(addr, addr, a->imm); + tcg_rt = cpu_reg(s, a->rt); + + if (s->ata[0]) { + gen_helper_stgm(tcg_env, addr, tcg_rt); + } else { + MMUAccessType acc = MMU_DATA_STORE; + int size = 4 << s->gm_blocksize; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + } + return true; +} + +static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) +{ + TCGv_i64 addr, clean_addr, tcg_rt; + + if (!dc_isar_feature(aa64_mte, s)) { + return false; + } + if (s->current_el == 0) { + return false; + } + + if (a->rn == 31) { gen_check_sp_alignment(s); } - addr = read_cpu_reg_sp(s, rn, true); - if (index >= 0) { - /* pre-index or signed offset */ - tcg_gen_addi_i64(addr, addr, offset); + addr = read_cpu_reg_sp(s, a->rn, true); + tcg_gen_addi_i64(addr, addr, a->imm); + tcg_rt = cpu_reg(s, a->rt); + + if (s->ata[0]) { + gen_helper_ldgm(tcg_rt, tcg_env, addr); + } else { + MMUAccessType acc = MMU_DATA_LOAD; + int size = 4 << s->gm_blocksize; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + /* The result tags are zeros. */ + tcg_gen_movi_i64(tcg_rt, 0); } + return true; +} - if (is_mult) { - tcg_rt = cpu_reg(s, rt); +static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) +{ + TCGv_i64 addr, clean_addr, tcg_rt; - if (is_zero) { - int size = 4 << s->dcz_blocksize; + if (!dc_isar_feature(aa64_mte_insn_reg, s)) { + return false; + } - if (s->ata) { - gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); - } - /* - * The non-tags portion of STZGM is mostly like DC_ZVA, - * except the alignment happens before the access. - */ - clean_addr = clean_data_tbi(s, addr); - tcg_gen_andi_i64(clean_addr, clean_addr, -size); - gen_helper_dc_zva(cpu_env, clean_addr); - } else if (s->ata) { - if (is_load) { - gen_helper_ldgm(tcg_rt, cpu_env, addr); - } else { - gen_helper_stgm(cpu_env, addr, tcg_rt); - } - } else { - MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; - int size = 4 << GMID_EL1_BS; + if (a->rn == 31) { + gen_check_sp_alignment(s); + } - clean_addr = clean_data_tbi(s, addr); - tcg_gen_andi_i64(clean_addr, clean_addr, -size); - gen_probe_access(s, clean_addr, acc, size); + addr = read_cpu_reg_sp(s, a->rn, true); + if (!a->p) { + /* pre-index or signed offset */ + tcg_gen_addi_i64(addr, addr, a->imm); + } - if (is_load) { - /* The result tags are zeros. */ - tcg_gen_movi_i64(tcg_rt, 0); - } + tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); + tcg_rt = cpu_reg(s, a->rt); + if (s->ata[0]) { + gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); + } else { + /* + * Tag access disabled: we must check for aborts on the load + * load from [rn+offset], and then insert a 0 tag into rt. + */ + clean_addr = clean_data_tbi(s, addr); + gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); + gen_address_with_allocation_tag0(tcg_rt, tcg_rt); + } + + if (a->w) { + /* pre-index or post-index */ + if (a->p) { + /* post-index */ + tcg_gen_addi_i64(addr, addr, a->imm); } - return; + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); } + return true; +} - if (is_load) { - tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); - tcg_rt = cpu_reg(s, rt); - if (s->ata) { - gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); +static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) +{ + TCGv_i64 addr, tcg_rt; + + if (a->rn == 31) { + gen_check_sp_alignment(s); + } + + addr = read_cpu_reg_sp(s, a->rn, true); + if (!a->p) { + /* pre-index or signed offset */ + tcg_gen_addi_i64(addr, addr, a->imm); + } + tcg_rt = cpu_reg_sp(s, a->rt); + if (!s->ata[0]) { + /* + * For STG and ST2G, we need to check alignment and probe memory. + * TODO: For STZG and STZ2G, we could rely on the stores below, + * at least for system mode; user-only won't enforce alignment. + */ + if (is_pair) { + gen_helper_st2g_stub(tcg_env, addr); } else { - clean_addr = clean_data_tbi(s, addr); - gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); - gen_address_with_allocation_tag0(tcg_rt, addr); + gen_helper_stg_stub(tcg_env, addr); + } + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (is_pair) { + gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); + } else { + gen_helper_stg_parallel(tcg_env, addr, tcg_rt); } } else { - tcg_rt = cpu_reg_sp(s, rt); - if (!s->ata) { - /* - * For STG and ST2G, we need to check alignment and probe memory. - * TODO: For STZG and STZ2G, we could rely on the stores below, - * at least for system mode; user-only won't enforce alignment. - */ - if (is_pair) { - gen_helper_st2g_stub(cpu_env, addr); - } else { - gen_helper_stg_stub(cpu_env, addr); - } - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - if (is_pair) { - gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); - } else { - gen_helper_stg_parallel(cpu_env, addr, tcg_rt); - } + if (is_pair) { + gen_helper_st2g(tcg_env, addr, tcg_rt); } else { - if (is_pair) { - gen_helper_st2g(cpu_env, addr, tcg_rt); - } else { - gen_helper_stg(cpu_env, addr, tcg_rt); - } + gen_helper_stg(tcg_env, addr, tcg_rt); } } if (is_zero) { TCGv_i64 clean_addr = clean_data_tbi(s, addr); - TCGv_i64 tcg_zero = tcg_const_i64(0); + TCGv_i64 zero64 = tcg_constant_i64(0); + TCGv_i128 zero128 = tcg_temp_new_i128(); int mem_index = get_mem_index(s); - int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; + MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); - tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, - MO_Q | MO_ALIGN_16); - for (i = 8; i < n; i += 8) { - tcg_gen_addi_i64(clean_addr, clean_addr, 8); - tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_Q); + tcg_gen_concat_i64_i128(zero128, zero64, zero64); + + /* This is 1 or 2 atomic 16-byte operations. */ + tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); + if (is_pair) { + tcg_gen_addi_i64(clean_addr, clean_addr, 16); + tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); } - tcg_temp_free_i64(tcg_zero); } - if (index != 0) { + if (a->w) { /* pre-index or post-index */ - if (index < 0) { + if (a->p) { /* post-index */ - tcg_gen_addi_i64(addr, addr, offset); + tcg_gen_addi_i64(addr, addr, a->imm); } - tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); + tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); } + return true; } -/* Loads and stores */ -static void disas_ldst(DisasContext *s, uint32_t insn) +TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) +TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) +TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) +TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) + +typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); + +static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, + bool is_setg, SetFn fn) { - switch (extract32(insn, 24, 6)) { - case 0x08: /* Load/store exclusive */ - disas_ldst_excl(s, insn); - break; - case 0x18: case 0x1c: /* Load register (literal) */ - disas_ld_lit(s, insn); - break; - case 0x28: case 0x29: - case 0x2c: case 0x2d: /* Load/store pair (all forms) */ - disas_ldst_pair(s, insn); - break; - case 0x38: case 0x39: - case 0x3c: case 0x3d: /* Load/store register (all forms) */ - disas_ldst_reg(s, insn); - break; - case 0x0c: /* AdvSIMD load/store multiple structures */ - disas_ldst_multiple_struct(s, insn); - break; - case 0x0d: /* AdvSIMD load/store single structure */ - disas_ldst_single_struct(s, insn); - break; - case 0x19: - if (extract32(insn, 21, 1) != 0) { - disas_ldst_tag(s, insn); - } else if (extract32(insn, 10, 2) == 0) { - disas_ldst_ldapr_stlr(s, insn); - } else { - unallocated_encoding(s); - } - break; - default: - unallocated_encoding(s); - break; + int memidx; + uint32_t syndrome, desc = 0; + + if (is_setg && !dc_isar_feature(aa64_mte, s)) { + return false; } -} -/* PC-rel. addressing - * 31 30 29 28 24 23 5 4 0 - * +----+-------+-----------+-------------------+------+ - * | op | immlo | 1 0 0 0 0 | immhi | Rd | - * +----+-------+-----------+-------------------+------+ - */ -static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) -{ - unsigned int page, rd; - uint64_t base; - uint64_t offset; + /* + * UNPREDICTABLE cases: we choose to UNDEF, which allows + * us to pull this check before the CheckMOPSEnabled() test + * (which we do in the helper function) + */ + if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || + a->rd == 31 || a->rn == 31) { + return false; + } - page = extract32(insn, 31, 1); - /* SignExtend(immhi:immlo) -> offset */ - offset = sextract64(insn, 5, 19); - offset = offset << 2 | extract32(insn, 29, 2); - rd = extract32(insn, 0, 5); - base = s->pc_curr; + memidx = get_a64_user_mem_index(s, a->unpriv); + + /* + * We pass option_a == true, matching our implementation; + * we pass wrong_option == false: helper function may set that bit. + */ + syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, + is_epilogue, false, true, a->rd, a->rs, a->rn); - if (page) { - /* ADRP (page based) */ - base &= ~0xfff; - offset <<= 12; + if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { + /* We may need to do MTE tag checking, so assemble the descriptor */ + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, true); + /* SIZEM1 and ALIGN we leave 0 (byte write) */ } + /* The helper function always needs the memidx even with MTE disabled */ + desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); - tcg_gen_movi_i64(cpu_reg(s, rd), base + offset); + /* + * The helper needs the register numbers, but since they're in + * the syndrome anyway, we let it extract them from there rather + * than passing in an extra three integer arguments. + */ + fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); + return true; } -/* - * Add/subtract (immediate) - * - * 31 30 29 28 23 22 21 10 9 5 4 0 - * +--+--+--+-------------+--+-------------+-----+-----+ - * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | - * +--+--+--+-------------+--+-------------+-----+-----+ - * - * sf: 0 -> 32bit, 1 -> 64bit - * op: 0 -> add , 1 -> sub - * S: 1 -> set flags - * sh: 1 -> LSL imm by 12 - */ -static void disas_add_sub_imm(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - uint64_t imm = extract32(insn, 10, 12); - bool shift = extract32(insn, 22, 1); - bool setflags = extract32(insn, 29, 1); - bool sub_op = extract32(insn, 30, 1); - bool is_64bit = extract32(insn, 31, 1); +TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) +TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) +TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) +TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) +TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) +TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) - TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); - TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); - TCGv_i64 tcg_result; +typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); + +static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) +{ + int rmemidx, wmemidx; + uint32_t syndrome, rdesc = 0, wdesc = 0; + bool wunpriv = extract32(a->options, 0, 1); + bool runpriv = extract32(a->options, 1, 1); - if (shift) { - imm <<= 12; + /* + * UNPREDICTABLE cases: we choose to UNDEF, which allows + * us to pull this check before the CheckMOPSEnabled() test + * (which we do in the helper function) + */ + if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || + a->rd == 31 || a->rs == 31 || a->rn == 31) { + return false; } - tcg_result = tcg_temp_new_i64(); - if (!setflags) { - if (sub_op) { - tcg_gen_subi_i64(tcg_result, tcg_rn, imm); - } else { - tcg_gen_addi_i64(tcg_result, tcg_rn, imm); - } - } else { - TCGv_i64 tcg_imm = tcg_const_i64(imm); - if (sub_op) { - gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); - } else { - gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); - } - tcg_temp_free_i64(tcg_imm); + rmemidx = get_a64_user_mem_index(s, runpriv); + wmemidx = get_a64_user_mem_index(s, wunpriv); + + /* + * We pass option_a == true, matching our implementation; + * we pass wrong_option == false: helper function may set that bit. + */ + syndrome = syn_mop(false, false, a->options, is_epilogue, + false, true, a->rd, a->rs, a->rn); + + /* If we need to do MTE tag checking, assemble the descriptors */ + if (s->mte_active[runpriv]) { + rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); + rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); + } + if (s->mte_active[wunpriv]) { + wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); + wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); + wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); } + /* The helper function needs these parts of the descriptor regardless */ + rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); + wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); - if (is_64bit) { - tcg_gen_mov_i64(tcg_rd, tcg_result); - } else { - tcg_gen_ext32u_i64(tcg_rd, tcg_result); + /* + * The helper needs the register numbers, but since they're in + * the syndrome anyway, we let it extract them from there rather + * than passing in an extra three integer arguments. + */ + fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), + tcg_constant_i32(rdesc)); + return true; +} + +TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) +TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) +TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) +TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) +TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) +TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) + +typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); + +static bool gen_rri(DisasContext *s, arg_rri_sf *a, + bool rd_sp, bool rn_sp, ArithTwoOp *fn) +{ + TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); + TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); + TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); + + fn(tcg_rd, tcg_rn, tcg_imm); + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); } + return true; +} - tcg_temp_free_i64(tcg_result); +/* + * PC-rel. addressing + */ + +static bool trans_ADR(DisasContext *s, arg_ri *a) +{ + gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); + return true; } +static bool trans_ADRP(DisasContext *s, arg_ri *a) +{ + int64_t offset = (int64_t)a->imm << 12; + + /* The page offset is ok for CF_PCREL. */ + offset -= s->pc_curr & 0xfff; + gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); + return true; +} + +/* + * Add/subtract (immediate) + */ +TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) +TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) +TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) +TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) + /* * Add/subtract (immediate, with tags) - * - * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 - * +--+--+--+-------------+--+---------+--+-------+-----+-----+ - * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | - * +--+--+--+-------------+--+---------+--+-------+-----+-----+ - * - * op: 0 -> add, 1 -> sub */ -static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) + +static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, + bool sub_op) { - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int uimm4 = extract32(insn, 10, 4); - int uimm6 = extract32(insn, 16, 6); - bool sub_op = extract32(insn, 30, 1); TCGv_i64 tcg_rn, tcg_rd; int imm; - /* Test all of sf=1, S=0, o2=0, o3=0. */ - if ((insn & 0xa040c000u) != 0x80000000u || - !dc_isar_feature(aa64_mte_insn_reg, s)) { - unallocated_encoding(s); - return; - } - - imm = uimm6 << LOG2_TAG_GRANULE; + imm = a->uimm6 << LOG2_TAG_GRANULE; if (sub_op) { imm = -imm; } - tcg_rn = cpu_reg_sp(s, rn); - tcg_rd = cpu_reg_sp(s, rd); - - if (s->ata) { - TCGv_i32 offset = tcg_const_i32(imm); - TCGv_i32 tag_offset = tcg_const_i32(uimm4); + tcg_rn = cpu_reg_sp(s, a->rn); + tcg_rd = cpu_reg_sp(s, a->rd); - gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset); - tcg_temp_free_i32(tag_offset); - tcg_temp_free_i32(offset); + if (s->ata[0]) { + gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, + tcg_constant_i32(imm), + tcg_constant_i32(a->uimm4)); } else { tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); gen_address_with_allocation_tag0(tcg_rd, tcg_rd); } + return true; } +TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) +TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) + /* The input should be a value in the bottom e bits (with higher * bits zero); returns that value replicated into every element * of size e in a 64 bit integer. @@ -4305,14 +4260,12 @@ static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) return mask; } -/* Return a value with the bottom len bits set (where 0 < len <= 64) */ -static inline uint64_t bitmask64(unsigned int length) -{ - assert(length > 0 && length <= 64); - return ~0ULL >> (64 - length); -} +/* + * Logical (immediate) + */ -/* Simplified variant of pseudocode DecodeBitMasks() for the case where we +/* + * Simplified variant of pseudocode DecodeBitMasks() for the case where we * only require the wmask. Returns false if the imms/immr/immn are a reserved * value (ie should cause a guest UNDEF exception), and true if they are * valid, in which case the decoded bit pattern is written to result. @@ -4367,10 +4320,10 @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, /* Create the value of one element: s+1 set bits rotated * by r within the element (which is e bits wide)... */ - mask = bitmask64(s + 1); + mask = MAKE_64BIT_MASK(0, s + 1); if (r) { mask = (mask >> r) | (mask << (e - r)); - mask &= bitmask64(e); + mask &= MAKE_64BIT_MASK(0, e); } /* ...then replicate the element over the whole 64 bit value */ mask = bitfield_replicate(mask, e); @@ -4378,300 +4331,215 @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, return true; } -/* Logical (immediate) - * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 - * +----+-----+-------------+---+------+------+------+------+ - * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd | - * +----+-----+-------------+---+------+------+------+------+ - */ -static void disas_logic_imm(DisasContext *s, uint32_t insn) +static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, + void (*fn)(TCGv_i64, TCGv_i64, int64_t)) { - unsigned int sf, opc, is_n, immr, imms, rn, rd; TCGv_i64 tcg_rd, tcg_rn; - uint64_t wmask; - bool is_and = false; - - sf = extract32(insn, 31, 1); - opc = extract32(insn, 29, 2); - is_n = extract32(insn, 22, 1); - immr = extract32(insn, 16, 6); - imms = extract32(insn, 10, 6); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); - - if (!sf && is_n) { - unallocated_encoding(s); - return; - } + uint64_t imm; - if (opc == 0x3) { /* ANDS */ - tcg_rd = cpu_reg(s, rd); - } else { - tcg_rd = cpu_reg_sp(s, rd); + /* Some immediate field values are reserved. */ + if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), + extract32(a->dbm, 0, 6), + extract32(a->dbm, 6, 6))) { + return false; } - tcg_rn = cpu_reg(s, rn); - - if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) { - /* some immediate field values are reserved */ - unallocated_encoding(s); - return; + if (!a->sf) { + imm &= 0xffffffffull; } - if (!sf) { - wmask &= 0xffffffff; - } + tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); + tcg_rn = cpu_reg(s, a->rn); - switch (opc) { - case 0x3: /* ANDS */ - case 0x0: /* AND */ - tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask); - is_and = true; - break; - case 0x1: /* ORR */ - tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask); - break; - case 0x2: /* EOR */ - tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask); - break; - default: - assert(FALSE); /* must handle all above */ - break; + fn(tcg_rd, tcg_rn, imm); + if (set_cc) { + gen_logic_CC(a->sf, tcg_rd); } - - if (!sf && !is_and) { - /* zero extend final result; we know we can skip this for AND - * since the immediate had the high 32 bits clear. - */ + if (!a->sf) { tcg_gen_ext32u_i64(tcg_rd, tcg_rd); } - - if (opc == 3) { /* ANDS */ - gen_logic_CC(sf, tcg_rd); - } + return true; } +TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) +TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) +TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) +TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) + /* * Move wide (immediate) - * - * 31 30 29 28 23 22 21 20 5 4 0 - * +--+-----+-------------+-----+----------------+------+ - * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd | - * +--+-----+-------------+-----+----------------+------+ - * - * sf: 0 -> 32 bit, 1 -> 64 bit - * opc: 00 -> N, 10 -> Z, 11 -> K - * hw: shift/16 (0,16, and sf only 32, 48) */ -static void disas_movw_imm(DisasContext *s, uint32_t insn) + +static bool trans_MOVZ(DisasContext *s, arg_movw *a) { - int rd = extract32(insn, 0, 5); - uint64_t imm = extract32(insn, 5, 16); - int sf = extract32(insn, 31, 1); - int opc = extract32(insn, 29, 2); - int pos = extract32(insn, 21, 2) << 4; - TCGv_i64 tcg_rd = cpu_reg(s, rd); - TCGv_i64 tcg_imm; + int pos = a->hw << 4; + tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); + return true; +} - if (!sf && (pos >= 32)) { - unallocated_encoding(s); - return; +static bool trans_MOVN(DisasContext *s, arg_movw *a) +{ + int pos = a->hw << 4; + uint64_t imm = a->imm; + + imm = ~(imm << pos); + if (!a->sf) { + imm = (uint32_t)imm; } + tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); + return true; +} - switch (opc) { - case 0: /* MOVN */ - case 2: /* MOVZ */ - imm <<= pos; - if (opc == 0) { - imm = ~imm; - } - if (!sf) { - imm &= 0xffffffffu; - } - tcg_gen_movi_i64(tcg_rd, imm); - break; - case 3: /* MOVK */ - tcg_imm = tcg_const_i64(imm); - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16); - tcg_temp_free_i64(tcg_imm); - if (!sf) { - tcg_gen_ext32u_i64(tcg_rd, tcg_rd); - } - break; - default: - unallocated_encoding(s); - break; +static bool trans_MOVK(DisasContext *s, arg_movw *a) +{ + int pos = a->hw << 4; + TCGv_i64 tcg_rd, tcg_im; + + tcg_rd = cpu_reg(s, a->rd); + tcg_im = tcg_constant_i64(a->imm); + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); } + return true; } -/* Bitfield - * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 - * +----+-----+-------------+---+------+------+------+------+ - * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd | - * +----+-----+-------------+---+------+------+------+------+ +/* + * Bitfield */ -static void disas_bitfield(DisasContext *s, uint32_t insn) + +static bool trans_SBFM(DisasContext *s, arg_SBFM *a) { - unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len; - TCGv_i64 tcg_rd, tcg_tmp; + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); + unsigned int bitsize = a->sf ? 64 : 32; + unsigned int ri = a->immr; + unsigned int si = a->imms; + unsigned int pos, len; - sf = extract32(insn, 31, 1); - opc = extract32(insn, 29, 2); - n = extract32(insn, 22, 1); - ri = extract32(insn, 16, 6); - si = extract32(insn, 10, 6); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); - bitsize = sf ? 64 : 32; + if (si >= ri) { + /* Wd<s-r:0> = Wn<s:r> */ + len = (si - ri) + 1; + tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); + if (!a->sf) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + } else { + /* Wd<32+s-r,32-r> = Wn<s:0> */ + len = si + 1; + pos = (bitsize - ri) & (bitsize - 1); - if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) { - unallocated_encoding(s); - return; + if (len < ri) { + /* + * Sign extend the destination field from len to fill the + * balance of the word. Let the deposit below insert all + * of those sign bits. + */ + tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); + len = ri; + } + + /* + * We start with zero, and we haven't modified any bits outside + * bitsize, therefore no final zero-extension is unneeded for !sf. + */ + tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); } + return true; +} - tcg_rd = cpu_reg(s, rd); +static bool trans_UBFM(DisasContext *s, arg_UBFM *a) +{ + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); + unsigned int bitsize = a->sf ? 64 : 32; + unsigned int ri = a->immr; + unsigned int si = a->imms; + unsigned int pos, len; - /* Suppress the zero-extend for !sf. Since RI and SI are constrained - to be smaller than bitsize, we'll never reference data outside the - low 32-bits anyway. */ - tcg_tmp = read_cpu_reg(s, rn, 1); + tcg_rd = cpu_reg(s, a->rd); + tcg_tmp = read_cpu_reg(s, a->rn, 1); - /* Recognize simple(r) extractions. */ if (si >= ri) { /* Wd<s-r:0> = Wn<s:r> */ len = (si - ri) + 1; - if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */ - tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); - goto done; - } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */ - tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); - return; - } - /* opc == 1, BFXIL fall through to deposit */ - tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); - pos = 0; + tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); } else { - /* Handle the ri > si case with a deposit - * Wd<32+s-r,32-r> = Wn<s:0> - */ + /* Wd<32+s-r,32-r> = Wn<s:0> */ len = si + 1; pos = (bitsize - ri) & (bitsize - 1); + tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); } + return true; +} - if (opc == 0 && len < ri) { - /* SBFM: sign extend the destination field from len to fill - the balance of the word. Let the deposit below insert all - of those sign bits. */ - tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); - len = ri; - } +static bool trans_BFM(DisasContext *s, arg_BFM *a) +{ + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); + unsigned int bitsize = a->sf ? 64 : 32; + unsigned int ri = a->immr; + unsigned int si = a->imms; + unsigned int pos, len; - if (opc == 1) { /* BFM, BFXIL */ - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); + tcg_rd = cpu_reg(s, a->rd); + tcg_tmp = read_cpu_reg(s, a->rn, 1); + + if (si >= ri) { + /* Wd<s-r:0> = Wn<s:r> */ + tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); + len = (si - ri) + 1; + pos = 0; } else { - /* SBFM or UBFM: We start with zero, and we haven't modified - any bits outside bitsize, therefore the zero-extension - below is unneeded. */ - tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); - return; + /* Wd<32+s-r,32-r> = Wn<s:0> */ + len = si + 1; + pos = (bitsize - ri) & (bitsize - 1); } - done: - if (!sf) { /* zero extend final result */ + tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); + if (!a->sf) { tcg_gen_ext32u_i64(tcg_rd, tcg_rd); } + return true; } -/* Extract - * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0 - * +----+------+-------------+---+----+------+--------+------+------+ - * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd | - * +----+------+-------------+---+----+------+--------+------+------+ - */ -static void disas_extract(DisasContext *s, uint32_t insn) +static bool trans_EXTR(DisasContext *s, arg_extract *a) { - unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0; + TCGv_i64 tcg_rd, tcg_rm, tcg_rn; - sf = extract32(insn, 31, 1); - n = extract32(insn, 22, 1); - rm = extract32(insn, 16, 5); - imm = extract32(insn, 10, 6); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); - op21 = extract32(insn, 29, 2); - op0 = extract32(insn, 21, 1); - bitsize = sf ? 64 : 32; + tcg_rd = cpu_reg(s, a->rd); - if (sf != n || op21 || op0 || imm >= bitsize) { - unallocated_encoding(s); + if (unlikely(a->imm == 0)) { + /* + * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, + * so an extract from bit 0 is a special case. + */ + if (a->sf) { + tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); + } else { + tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); + } } else { - TCGv_i64 tcg_rd, tcg_rm, tcg_rn; - - tcg_rd = cpu_reg(s, rd); + tcg_rm = cpu_reg(s, a->rm); + tcg_rn = cpu_reg(s, a->rn); - if (unlikely(imm == 0)) { - /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, - * so an extract from bit 0 is a special case. - */ - if (sf) { - tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm)); - } else { - tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm)); - } + if (a->sf) { + /* Specialization to ROR happens in EXTRACT2. */ + tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); } else { - tcg_rm = cpu_reg(s, rm); - tcg_rn = cpu_reg(s, rn); + TCGv_i32 t0 = tcg_temp_new_i32(); - if (sf) { - /* Specialization to ROR happens in EXTRACT2. */ - tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm); + tcg_gen_extrl_i64_i32(t0, tcg_rm); + if (a->rm == a->rn) { + tcg_gen_rotri_i32(t0, t0, a->imm); } else { - TCGv_i32 t0 = tcg_temp_new_i32(); - - tcg_gen_extrl_i64_i32(t0, tcg_rm); - if (rm == rn) { - tcg_gen_rotri_i32(t0, t0, imm); - } else { - TCGv_i32 t1 = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(t1, tcg_rn); - tcg_gen_extract2_i32(t0, t0, t1, imm); - tcg_temp_free_i32(t1); - } - tcg_gen_extu_i32_i64(tcg_rd, t0); - tcg_temp_free_i32(t0); + TCGv_i32 t1 = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(t1, tcg_rn); + tcg_gen_extract2_i32(t0, t0, t1, a->imm); } + tcg_gen_extu_i32_i64(tcg_rd, t0); } } -} - -/* Data processing - immediate */ -static void disas_data_proc_imm(DisasContext *s, uint32_t insn) -{ - switch (extract32(insn, 23, 6)) { - case 0x20: case 0x21: /* PC-rel. addressing */ - disas_pc_rel_adr(s, insn); - break; - case 0x22: /* Add/subtract (immediate) */ - disas_add_sub_imm(s, insn); - break; - case 0x23: /* Add/subtract (immediate, with tags) */ - disas_add_sub_imm_with_tags(s, insn); - break; - case 0x24: /* Logical (immediate) */ - disas_logic_imm(s, insn); - break; - case 0x25: /* Move wide (immediate) */ - disas_movw_imm(s, insn); - break; - case 0x26: /* Bitfield */ - disas_bitfield(s, insn); - break; - case 0x27: /* Extract */ - disas_extract(s, insn); - break; - default: - unallocated_encoding(s); - break; - } + return true; } /* Shift a TCGv src by TCGv shift_amount, put result in dst. @@ -4706,8 +4574,6 @@ static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, tcg_gen_extrl_i64_i32(t1, shift_amount); tcg_gen_rotr_i32(t0, t0, t1); tcg_gen_extu_i32_i64(dst, t0); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); } break; default: @@ -4732,11 +4598,7 @@ static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, if (shift_i == 0) { tcg_gen_mov_i64(dst, src); } else { - TCGv_i64 shift_const; - - shift_const = tcg_const_i64(shift_i); - shift_reg(dst, src, sf, shift_type, shift_const); - tcg_temp_free_i64(shift_const); + shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); } } @@ -4900,8 +4762,6 @@ static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) } else { tcg_gen_ext32u_i64(tcg_rd, tcg_result); } - - tcg_temp_free_i64(tcg_result); } /* @@ -4964,8 +4824,6 @@ static void disas_add_sub_reg(DisasContext *s, uint32_t insn) } else { tcg_gen_ext32u_i64(tcg_rd, tcg_result); } - - tcg_temp_free_i64(tcg_result); } /* Data-processing (3 source) @@ -5023,8 +4881,6 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn) } else { tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); } - - tcg_temp_free_i64(low_bits); return; } @@ -5060,10 +4916,6 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn) if (!sf) { tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); } - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_tmp); } /* Add/subtract (with carry) @@ -5089,7 +4941,7 @@ static void disas_adc_sbc(DisasContext *s, uint32_t insn) tcg_rn = cpu_reg(s, rn); if (op) { - tcg_y = new_tmp_a64(s); + tcg_y = tcg_temp_new_i64(); tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); } else { tcg_y = cpu_reg(s, rm); @@ -5143,8 +4995,6 @@ static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) if (mask & 1) { /* V */ tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); } - - tcg_temp_free_i32(nzcv); } /* @@ -5177,7 +5027,6 @@ static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); - tcg_temp_free_i32(tmp); } /* Conditional compare (immediate / register) @@ -5214,11 +5063,10 @@ static void disas_cc(DisasContext *s, uint32_t insn) tcg_t0 = tcg_temp_new_i32(); arm_test_cc(&c, cond); tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); - arm_free_cc(&c); /* Load the arguments for the new comparison. */ if (is_imm) { - tcg_y = new_tmp_a64(s); + tcg_y = tcg_temp_new_i64(); tcg_gen_movi_i64(tcg_y, y); } else { tcg_y = cpu_reg(s, y); @@ -5232,7 +5080,6 @@ static void disas_cc(DisasContext *s, uint32_t insn) } else { gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); } - tcg_temp_free_i64(tcg_tmp); /* If COND was false, force the flags to #nzcv. Compute two masks * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). @@ -5280,9 +5127,6 @@ static void disas_cc(DisasContext *s, uint32_t insn) tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); } } - tcg_temp_free_i32(tcg_t0); - tcg_temp_free_i32(tcg_t1); - tcg_temp_free_i32(tcg_t2); } /* Conditional select @@ -5313,13 +5157,16 @@ static void disas_cond_select(DisasContext *s, uint32_t insn) tcg_rd = cpu_reg(s, rd); a64_test_cc(&c, cond); - zero = tcg_const_i64(0); + zero = tcg_constant_i64(0); if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { /* CSET & CSETM. */ - tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero); if (else_inv) { - tcg_gen_neg_i64(tcg_rd, tcg_rd); + tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), + tcg_rd, c.value, zero); + } else { + tcg_gen_setcond_i64(tcg_invert_cond(c.cond), + tcg_rd, c.value, zero); } } else { TCGv_i64 t_true = cpu_reg(s, rn); @@ -5334,9 +5181,6 @@ static void disas_cond_select(DisasContext *s, uint32_t insn) tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); } - tcg_temp_free_i64(zero); - a64_free_cc(&c); - if (!sf) { tcg_gen_ext32u_i64(tcg_rd, tcg_rd); } @@ -5356,7 +5200,6 @@ static void handle_clz(DisasContext *s, unsigned int sf, tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); - tcg_temp_free_i32(tcg_tmp32); } } @@ -5374,7 +5217,6 @@ static void handle_cls(DisasContext *s, unsigned int sf, tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); - tcg_temp_free_i32(tcg_tmp32); } } @@ -5392,7 +5234,6 @@ static void handle_rbit(DisasContext *s, unsigned int sf, tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); gen_helper_rbit(tcg_tmp32, tcg_tmp32); tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); - tcg_temp_free_i32(tcg_tmp32); } } @@ -5431,16 +5272,13 @@ static void handle_rev16(DisasContext *s, unsigned int sf, TCGv_i64 tcg_rd = cpu_reg(s, rd); TCGv_i64 tcg_tmp = tcg_temp_new_i64(); TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); - TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); + TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); tcg_gen_and_i64(tcg_rd, tcg_rn, mask); tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); - - tcg_temp_free_i64(mask); - tcg_temp_free_i64(tcg_tmp); } /* Data-processing (1 source) @@ -5494,7 +5332,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x00): /* PACIA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5502,7 +5340,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x01): /* PACIB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5510,7 +5348,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x02): /* PACDA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5518,7 +5356,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x03): /* PACDB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5526,7 +5364,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x04): /* AUTIA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5534,7 +5372,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x05): /* AUTIB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5542,7 +5380,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x06): /* AUTDA */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5550,7 +5388,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) case MAP(1, 0x01, 0x07): /* AUTDB */ if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); + gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); } else if (!dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } @@ -5560,7 +5398,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); } break; case MAP(1, 0x01, 0x09): /* PACIZB */ @@ -5568,7 +5406,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); } break; case MAP(1, 0x01, 0x0a): /* PACDZA */ @@ -5576,7 +5414,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); } break; case MAP(1, 0x01, 0x0b): /* PACDZB */ @@ -5584,7 +5422,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); } break; case MAP(1, 0x01, 0x0c): /* AUTIZA */ @@ -5592,7 +5430,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); } break; case MAP(1, 0x01, 0x0d): /* AUTIZB */ @@ -5600,7 +5438,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); } break; case MAP(1, 0x01, 0x0e): /* AUTDZA */ @@ -5608,7 +5446,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); } break; case MAP(1, 0x01, 0x0f): /* AUTDZB */ @@ -5616,7 +5454,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); + gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); } break; case MAP(1, 0x01, 0x10): /* XPACI */ @@ -5624,7 +5462,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd); + gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd); } break; case MAP(1, 0x01, 0x11): /* XPACD */ @@ -5632,7 +5470,7 @@ static void disas_data_proc_1src(DisasContext *s, uint32_t insn) goto do_unallocated; } else if (s->pauth_active) { tcg_rd = cpu_reg(s, rd); - gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd); + gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd); } break; default: @@ -5651,8 +5489,8 @@ static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, tcg_rd = cpu_reg(s, rd); if (!sf && is_signed) { - tcg_n = new_tmp_a64(s); - tcg_m = new_tmp_a64(s); + tcg_n = tcg_temp_new_i64(); + tcg_m = tcg_temp_new_i64(); tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); } else { @@ -5682,7 +5520,6 @@ static void handle_shift_reg(DisasContext *s, tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); - tcg_temp_free_i64(tcg_shift); } /* CRC32[BHWX], CRC32C[BHWX] */ @@ -5717,20 +5554,18 @@ static void handle_crc32(DisasContext *s, default: g_assert_not_reached(); } - tcg_val = new_tmp_a64(s); + tcg_val = tcg_temp_new_i64(); tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); } tcg_acc = cpu_reg(s, rn); - tcg_bytes = tcg_const_i32(1 << sz); + tcg_bytes = tcg_constant_i32(1 << sz); if (crc32c) { gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); } else { gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); } - - tcg_temp_free_i32(tcg_bytes); } /* Data-processing (2 source) @@ -5784,8 +5619,8 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { goto do_unallocated; } - if (s->ata) { - gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, + if (s->ata[0]) { + gen_helper_irg(cpu_reg_sp(s, rd), tcg_env, cpu_reg_sp(s, rn), cpu_reg(s, rm)); } else { gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), @@ -5796,15 +5631,11 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { goto do_unallocated; } else { - TCGv_i64 t1 = tcg_const_i64(1); - TCGv_i64 t2 = tcg_temp_new_i64(); + TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_extract_i64(t2, cpu_reg_sp(s, rn), 56, 4); - tcg_gen_shl_i64(t1, t1, t2); - tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t1); - - tcg_temp_free_i64(t1); - tcg_temp_free_i64(t2); + tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); + tcg_gen_shl_i64(t, tcg_constant_i64(1), t); + tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); } break; case 8: /* LSLV */ @@ -5823,7 +5654,7 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { goto do_unallocated; } - gen_helper_pacga(cpu_reg(s, rd), cpu_env, + gen_helper_pacga(cpu_reg(s, rd), tcg_env, cpu_reg(s, rn), cpu_reg_sp(s, rm)); break; case 16: @@ -5939,7 +5770,7 @@ static void handle_fp_compare(DisasContext *s, int size, tcg_vn = read_fp_dreg(s, rn); if (cmp_with_zero) { - tcg_vm = tcg_const_i64(0); + tcg_vm = tcg_constant_i64(0); } else { tcg_vm = read_fp_dreg(s, rm); } @@ -5948,8 +5779,6 @@ static void handle_fp_compare(DisasContext *s, int size, } else { gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); } - tcg_temp_free_i64(tcg_vn); - tcg_temp_free_i64(tcg_vm); } else { TCGv_i32 tcg_vn = tcg_temp_new_i32(); TCGv_i32 tcg_vm = tcg_temp_new_i32(); @@ -5979,16 +5808,9 @@ static void handle_fp_compare(DisasContext *s, int size, default: g_assert_not_reached(); } - - tcg_temp_free_i32(tcg_vn); - tcg_temp_free_i32(tcg_vm); } - tcg_temp_free_ptr(fpst); - gen_set_nzcv(tcg_flags); - - tcg_temp_free_i64(tcg_flags); } /* Floating point compare @@ -6049,7 +5871,6 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn) static void disas_fp_ccomp(DisasContext *s, uint32_t insn) { unsigned int mos, type, rm, cond, rn, op, nzcv; - TCGv_i64 tcg_flags; TCGLabel *label_continue = NULL; int size; @@ -6093,9 +5914,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) label_continue = gen_new_label(); arm_gen_test_cc(cond, label_match); /* nomatch: */ - tcg_flags = tcg_const_i64(nzcv << 28); - gen_set_nzcv(tcg_flags); - tcg_temp_free_i64(tcg_flags); + gen_set_nzcv(tcg_constant_i64(nzcv << 28)); tcg_gen_br(label_continue); gen_set_label(label_match); } @@ -6116,7 +5935,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) static void disas_fp_csel(DisasContext *s, uint32_t insn) { unsigned int mos, type, rm, cond, rn, rd; - TCGv_i64 t_true, t_false, t_zero; + TCGv_i64 t_true, t_false; DisasCompare64 c; MemOp sz; @@ -6161,16 +5980,12 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn) read_vec_element(s, t_false, rm, 0, sz); a64_test_cc(&c, cond); - t_zero = tcg_const_i64(0); - tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false); - tcg_temp_free_i64(t_zero); - tcg_temp_free_i64(t_false); - a64_free_cc(&c); + tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), + t_true, t_false); /* Note that sregs & hregs write back zeros to the high bits, and we've already done the zero-extension. */ write_fp_dreg(s, rd, t_true); - tcg_temp_free_i64(t_true); } /* Floating-point data-processing (1 source) - half precision */ @@ -6200,14 +6015,12 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) case 0xb: /* FRINTZ */ case 0xc: /* FRINTA */ { - TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7)); - fpst = fpstatus_ptr(FPST_FPCR_F16); + TCGv_i32 tcg_rmode; - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + fpst = fpstatus_ptr(FPST_FPCR_F16); + tcg_rmode = gen_set_rmode(opcode & 7, fpst); gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); - - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - tcg_temp_free_i32(tcg_rmode); + gen_restore_rmode(tcg_rmode, fpst); break; } case 0xe: /* FRINTX */ @@ -6219,16 +6032,10 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); break; default: - abort(); + g_assert_not_reached(); } write_fp_sreg(s, rd, tcg_res); - - if (fpst) { - tcg_temp_free_ptr(fpst); - } - tcg_temp_free_i32(tcg_op); - tcg_temp_free_i32(tcg_res); } /* Floating-point data-processing (1 source) - single precision */ @@ -6253,7 +6060,7 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) gen_helper_vfp_negs(tcg_res, tcg_op); goto done; case 0x3: /* FSQRT */ - gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); + gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); goto done; case 0x6: /* BFCVT */ gen_fpst = gen_helper_bfcvt; @@ -6263,7 +6070,7 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) case 0xa: /* FRINTM */ case 0xb: /* FRINTZ */ case 0xc: /* FRINTA */ - rmode = arm_rmode_to_sf(opcode & 7); + rmode = opcode & 7; gen_fpst = gen_helper_rints; break; case 0xe: /* FRINTX */ @@ -6273,14 +6080,14 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) gen_fpst = gen_helper_rints; break; case 0x10: /* FRINT32Z */ - rmode = float_round_to_zero; + rmode = FPROUNDING_ZERO; gen_fpst = gen_helper_frint32_s; break; case 0x11: /* FRINT32X */ gen_fpst = gen_helper_frint32_s; break; case 0x12: /* FRINT64Z */ - rmode = float_round_to_zero; + rmode = FPROUNDING_ZERO; gen_fpst = gen_helper_frint64_s; break; case 0x13: /* FRINT64X */ @@ -6292,20 +6099,15 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) fpst = fpstatus_ptr(FPST_FPCR); if (rmode >= 0) { - TCGv_i32 tcg_rmode = tcg_const_i32(rmode); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); gen_fpst(tcg_res, tcg_op, fpst); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - tcg_temp_free_i32(tcg_rmode); + gen_restore_rmode(tcg_rmode, fpst); } else { gen_fpst(tcg_res, tcg_op, fpst); } - tcg_temp_free_ptr(fpst); done: write_fp_sreg(s, rd, tcg_res); - tcg_temp_free_i32(tcg_op); - tcg_temp_free_i32(tcg_res); } /* Floating-point data-processing (1 source) - double precision */ @@ -6333,14 +6135,14 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) gen_helper_vfp_negd(tcg_res, tcg_op); goto done; case 0x3: /* FSQRT */ - gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env); + gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); goto done; case 0x8: /* FRINTN */ case 0x9: /* FRINTP */ case 0xa: /* FRINTM */ case 0xb: /* FRINTZ */ case 0xc: /* FRINTA */ - rmode = arm_rmode_to_sf(opcode & 7); + rmode = opcode & 7; gen_fpst = gen_helper_rintd; break; case 0xe: /* FRINTX */ @@ -6350,14 +6152,14 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) gen_fpst = gen_helper_rintd; break; case 0x10: /* FRINT32Z */ - rmode = float_round_to_zero; + rmode = FPROUNDING_ZERO; gen_fpst = gen_helper_frint32_d; break; case 0x11: /* FRINT32X */ gen_fpst = gen_helper_frint32_d; break; case 0x12: /* FRINT64Z */ - rmode = float_round_to_zero; + rmode = FPROUNDING_ZERO; gen_fpst = gen_helper_frint64_d; break; case 0x13: /* FRINT64X */ @@ -6369,20 +6171,15 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) fpst = fpstatus_ptr(FPST_FPCR); if (rmode >= 0) { - TCGv_i32 tcg_rmode = tcg_const_i32(rmode); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); gen_fpst(tcg_res, tcg_op, fpst); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - tcg_temp_free_i32(tcg_rmode); + gen_restore_rmode(tcg_rmode, fpst); } else { gen_fpst(tcg_res, tcg_op, fpst); } - tcg_temp_free_ptr(fpst); done: write_fp_dreg(s, rd, tcg_res); - tcg_temp_free_i64(tcg_op); - tcg_temp_free_i64(tcg_res); } static void handle_fp_fcvt(DisasContext *s, int opcode, @@ -6395,9 +6192,8 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, if (dtype == 1) { /* Single to double */ TCGv_i64 tcg_rd = tcg_temp_new_i64(); - gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env); + gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env); write_fp_dreg(s, rd, tcg_rd); - tcg_temp_free_i64(tcg_rd); } else { /* Single to half */ TCGv_i32 tcg_rd = tcg_temp_new_i32(); @@ -6407,11 +6203,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); /* write_fp_sreg is OK here because top half of tcg_rd is zero */ write_fp_sreg(s, rd, tcg_rd); - tcg_temp_free_i32(tcg_rd); - tcg_temp_free_i32(ahp); - tcg_temp_free_ptr(fpst); } - tcg_temp_free_i32(tcg_rn); break; } case 0x1: @@ -6420,19 +6212,15 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, TCGv_i32 tcg_rd = tcg_temp_new_i32(); if (dtype == 0) { /* Double to single */ - gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env); + gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env); } else { TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); TCGv_i32 ahp = get_ahp_flag(); /* Double to half */ gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); /* write_fp_sreg is OK here because top half of tcg_rd is zero */ - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(ahp); } write_fp_sreg(s, rd, tcg_rd); - tcg_temp_free_i32(tcg_rd); - tcg_temp_free_i64(tcg_rn); break; } case 0x3: @@ -6446,21 +6234,16 @@ static void handle_fp_fcvt(DisasContext *s, int opcode, TCGv_i32 tcg_rd = tcg_temp_new_i32(); gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); write_fp_sreg(s, rd, tcg_rd); - tcg_temp_free_i32(tcg_rd); } else { /* Half to double */ TCGv_i64 tcg_rd = tcg_temp_new_i64(); gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); write_fp_dreg(s, rd, tcg_rd); - tcg_temp_free_i64(tcg_rd); } - tcg_temp_free_i32(tcg_rn); - tcg_temp_free_ptr(tcg_fpst); - tcg_temp_free_i32(tcg_ahp); break; } default: - abort(); + g_assert_not_reached(); } } @@ -6604,11 +6387,6 @@ static void handle_fp_2src_single(DisasContext *s, int opcode, } write_fp_sreg(s, rd, tcg_res); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_i32(tcg_res); } /* Floating-point data-processing (2 source) - double precision */ @@ -6657,11 +6435,6 @@ static void handle_fp_2src_double(DisasContext *s, int opcode, } write_fp_dreg(s, rd, tcg_res); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_res); } /* Floating-point data-processing (2 source) - half precision */ @@ -6712,11 +6485,6 @@ static void handle_fp_2src_half(DisasContext *s, int opcode, } write_fp_sreg(s, rd, tcg_res); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_i32(tcg_res); } /* Floating point data-processing (2 source) @@ -6797,12 +6565,6 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); write_fp_sreg(s, rd, tcg_res); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_i32(tcg_op3); - tcg_temp_free_i32(tcg_res); } /* Floating-point data-processing (3 source) - double precision */ @@ -6835,12 +6597,6 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); write_fp_dreg(s, rd, tcg_res); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_op3); - tcg_temp_free_i64(tcg_res); } /* Floating-point data-processing (3 source) - half precision */ @@ -6873,12 +6629,6 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); write_fp_sreg(s, rd, tcg_res); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_i32(tcg_op3); - tcg_temp_free_i32(tcg_res); } /* Floating point data-processing (3 source) @@ -6945,7 +6695,6 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) int type = extract32(insn, 22, 2); int mos = extract32(insn, 29, 3); uint64_t imm; - TCGv_i64 tcg_res; MemOp sz; if (mos || imm5) { @@ -6976,10 +6725,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn) } imm = vfp_expand_imm(sz, imm8); - - tcg_res = tcg_const_i64(imm); - write_fp_dreg(s, rd, tcg_res); - tcg_temp_free_i64(tcg_res); + write_fp_dreg(s, rd, tcg_constant_i64(imm)); } /* Handle floating point <=> fixed point conversions. Note that we can @@ -6997,12 +6743,12 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_shift = tcg_const_i32(64 - scale); + tcg_shift = tcg_constant_i32(64 - scale); if (itof) { TCGv_i64 tcg_int = cpu_reg(s, rn); if (!sf) { - TCGv_i64 tcg_extend = new_tmp_a64(s); + TCGv_i64 tcg_extend = tcg_temp_new_i64(); if (is_signed) { tcg_gen_ext32s_i64(tcg_extend, tcg_int); @@ -7024,7 +6770,6 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_shift, tcg_fpstatus); } write_fp_dreg(s, rd, tcg_double); - tcg_temp_free_i64(tcg_double); break; case 0: /* float32 */ @@ -7037,7 +6782,6 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_shift, tcg_fpstatus); } write_fp_sreg(s, rd, tcg_single); - tcg_temp_free_i32(tcg_single); break; case 3: /* float16 */ @@ -7050,7 +6794,6 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_shift, tcg_fpstatus); } write_fp_sreg(s, rd, tcg_single); - tcg_temp_free_i32(tcg_single); break; default: @@ -7067,9 +6810,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, rmode = FPROUNDING_TIEAWAY; } - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); - - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); + tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); switch (type) { case 1: /* float64 */ @@ -7094,7 +6835,6 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, if (!sf) { tcg_gen_ext32u_i64(tcg_int, tcg_int); } - tcg_temp_free_i64(tcg_double); break; case 0: /* float32 */ @@ -7117,9 +6857,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_shift, tcg_fpstatus); } tcg_gen_extu_i32_i64(tcg_int, tcg_dest); - tcg_temp_free_i32(tcg_dest); } - tcg_temp_free_i32(tcg_single); break; case 3: /* float16 */ @@ -7142,21 +6880,15 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, tcg_shift, tcg_fpstatus); } tcg_gen_extu_i32_i64(tcg_int, tcg_dest); - tcg_temp_free_i32(tcg_dest); } - tcg_temp_free_i32(tcg_single); break; default: g_assert_not_reached(); } - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); - tcg_temp_free_i32(tcg_rmode); + gen_restore_rmode(tcg_rmode, tcg_fpstatus); } - - tcg_temp_free_ptr(tcg_fpstatus); - tcg_temp_free_i32(tcg_shift); } /* Floating point <-> fixed point conversions @@ -7233,7 +6965,6 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) tmp = tcg_temp_new_i64(); tcg_gen_ext32u_i64(tmp, tcg_rn); write_fp_dreg(s, rd, tmp); - tcg_temp_free_i64(tmp); break; case 1: /* 64 bit */ @@ -7241,7 +6972,7 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) break; case 2: /* 64 bit to top half. */ - tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); + tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd)); clear_vec_high(s, true, rd); break; case 3: @@ -7249,7 +6980,6 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) tmp = tcg_temp_new_i64(); tcg_gen_ext16u_i64(tmp, tcg_rn); write_fp_dreg(s, rd, tmp); - tcg_temp_free_i64(tmp); break; default: g_assert_not_reached(); @@ -7260,19 +6990,19 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) switch (type) { case 0: /* 32 bit */ - tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32)); + tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32)); break; case 1: /* 64 bit */ - tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64)); + tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64)); break; case 2: /* 64 bits from top half */ - tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn)); + tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn)); break; case 3: /* 16 bit */ - tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16)); + tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16)); break; default: g_assert_not_reached(); @@ -7287,15 +7017,11 @@ static void handle_fjcvtzs(DisasContext *s, int rd, int rn) gen_helper_fjcvtzs(t, t, fpstatus); - tcg_temp_free_ptr(fpstatus); - tcg_gen_ext32u_i64(cpu_reg(s, rd), t); tcg_gen_extrh_i64_i32(cpu_ZF, t); tcg_gen_movi_i32(cpu_CF, 0); tcg_gen_movi_i32(cpu_NF, 0); tcg_gen_movi_i32(cpu_VF, 0); - - tcg_temp_free_i64(t); } /* Floating point <-> integer conversions @@ -7460,8 +7186,6 @@ static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, tcg_gen_shri_i64(tcg_right, tcg_right, pos); tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); - - tcg_temp_free_i64(tcg_tmp); } /* EXT @@ -7526,16 +7250,13 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn) tcg_hh = tcg_temp_new_i64(); read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); do_ext64(s, tcg_hh, tcg_resh, pos); - tcg_temp_free_i64(tcg_hh); } } write_vec_element(s, tcg_resl, rd, 0, MO_64); - tcg_temp_free_i64(tcg_resl); if (is_q) { write_vec_element(s, tcg_resh, rd, 1, MO_64); } - tcg_temp_free_i64(tcg_resh); clear_vec_high(s, is_q, rd); } @@ -7565,7 +7286,7 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn) } tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rm), cpu_env, + vec_full_reg_offset(s, rm), tcg_env, is_q ? 16 : 8, vec_full_reg_size(s), (len << 6) | (is_tbx << 5) | rn, gen_helper_simd_tblx); @@ -7590,10 +7311,10 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) bool part = extract32(insn, 14, 1); bool is_q = extract32(insn, 30, 1); int esize = 8 << size; - int i, ofs; + int i; int datasize = is_q ? 128 : 64; int elements = datasize / esize; - TCGv_i64 tcg_res, tcg_resl, tcg_resh; + TCGv_i64 tcg_res[2], tcg_ele; if (opcode == 0 || (size == 3 && !is_q)) { unallocated_encoding(s); @@ -7604,37 +7325,39 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) return; } - tcg_resl = tcg_const_i64(0); - tcg_resh = is_q ? tcg_const_i64(0) : NULL; - tcg_res = tcg_temp_new_i64(); + tcg_res[0] = tcg_temp_new_i64(); + tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL; + tcg_ele = tcg_temp_new_i64(); for (i = 0; i < elements; i++) { + int o, w; + switch (opcode) { case 1: /* UZP1/2 */ { int midpoint = elements / 2; if (i < midpoint) { - read_vec_element(s, tcg_res, rn, 2 * i + part, size); + read_vec_element(s, tcg_ele, rn, 2 * i + part, size); } else { - read_vec_element(s, tcg_res, rm, + read_vec_element(s, tcg_ele, rm, 2 * (i - midpoint) + part, size); } break; } case 2: /* TRN1/2 */ if (i & 1) { - read_vec_element(s, tcg_res, rm, (i & ~1) + part, size); + read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size); } else { - read_vec_element(s, tcg_res, rn, (i & ~1) + part, size); + read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size); } break; case 3: /* ZIP1/2 */ { int base = part * elements / 2; if (i & 1) { - read_vec_element(s, tcg_res, rm, base + (i >> 1), size); + read_vec_element(s, tcg_ele, rm, base + (i >> 1), size); } else { - read_vec_element(s, tcg_res, rn, base + (i >> 1), size); + read_vec_element(s, tcg_ele, rn, base + (i >> 1), size); } break; } @@ -7642,24 +7365,18 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) g_assert_not_reached(); } - ofs = i * esize; - if (ofs < 64) { - tcg_gen_shli_i64(tcg_res, tcg_res, ofs); - tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res); + w = (i * esize) / 64; + o = (i * esize) % 64; + if (o == 0) { + tcg_gen_mov_i64(tcg_res[w], tcg_ele); } else { - tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64); - tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res); + tcg_gen_shli_i64(tcg_ele, tcg_ele, o); + tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele); } } - tcg_temp_free_i64(tcg_res); - - write_vec_element(s, tcg_resl, rd, 0, MO_64); - tcg_temp_free_i64(tcg_resl); - - if (is_q) { - write_vec_element(s, tcg_resh, rd, 1, MO_64); - tcg_temp_free_i64(tcg_resh); + for (i = 0; i <= is_q; ++i) { + write_vec_element(s, tcg_res[i], rd, i, MO_64); } clear_vec_high(s, is_q, rd); } @@ -7729,9 +7446,6 @@ static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, default: g_assert_not_reached(); } - - tcg_temp_free_i32(tcg_hi); - tcg_temp_free_i32(tcg_lo); return tcg_res; } } @@ -7859,12 +7573,8 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, (is_q ? 128 : 64), vmap, fpst); tcg_gen_extu_i32_i64(tcg_res, tcg_res32); - tcg_temp_free_i32(tcg_res32); - tcg_temp_free_ptr(fpst); } - tcg_temp_free_i64(tcg_elt); - /* Now truncate the result to the width required for the final output */ if (opcode == 0x03) { /* SADDLV, UADDLV: result is 2*esize */ @@ -7888,7 +7598,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) } write_fp_dreg(s, rd, tcg_res); - tcg_temp_free_i64(tcg_res); } /* DUP (Element, Vector) @@ -7951,7 +7660,6 @@ static void handle_simd_dupes(DisasContext *s, int rd, int rn, tmp = tcg_temp_new_i64(); read_vec_element(s, tmp, rn, index, size); write_fp_dreg(s, rd, tmp); - tcg_temp_free_i64(tmp); } /* DUP (General) @@ -8019,8 +7727,6 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn, read_vec_element(s, tmp, rn, src_index, size); write_vec_element(s, tmp, rd, dst_index, size); - tcg_temp_free_i64(tmp); - /* INS is considered a 128-bit write for SVE. */ clear_vec_high(s, true, rd); } @@ -8331,10 +8037,6 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) } write_fp_dreg(s, rd, tcg_res); - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_res); } else { TCGv_i32 tcg_op1 = tcg_temp_new_i32(); TCGv_i32 tcg_op2 = tcg_temp_new_i32(); @@ -8386,14 +8088,6 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) } write_fp_sreg(s, rd, tcg_res); - - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_i32(tcg_res); - } - - if (fpst) { - tcg_temp_free_ptr(fpst); } } @@ -8427,7 +8121,7 @@ static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, /* Deal with the rounding step */ if (round) { if (extended_result) { - TCGv_i64 tcg_zero = tcg_const_i64(0); + TCGv_i64 tcg_zero = tcg_constant_i64(0); if (!is_u) { /* take care of sign extending tcg_res */ tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); @@ -8439,7 +8133,6 @@ static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, tcg_src, tcg_zero, tcg_rnd, tcg_zero); } - tcg_temp_free_i64(tcg_zero); } else { tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); } @@ -8479,10 +8172,6 @@ static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, } else { tcg_gen_mov_i64(tcg_res, tcg_src); } - - if (extended_result) { - tcg_temp_free_i64(tcg_src_hi); - } } /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ @@ -8525,8 +8214,7 @@ static void handle_scalar_simd_shri(DisasContext *s, } if (round) { - uint64_t round_const = 1ULL << (shift - 1); - tcg_round = tcg_const_i64(round_const); + tcg_round = tcg_constant_i64(1ULL << (shift - 1)); } else { tcg_round = NULL; } @@ -8549,12 +8237,6 @@ static void handle_scalar_simd_shri(DisasContext *s, } write_fp_dreg(s, rd, tcg_rd); - - tcg_temp_free_i64(tcg_rn); - tcg_temp_free_i64(tcg_rd); - if (round) { - tcg_temp_free_i64(tcg_round); - } } /* SHL/SLI - Scalar shift left */ @@ -8587,9 +8269,6 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert, } write_fp_dreg(s, rd, tcg_rd); - - tcg_temp_free_i64(tcg_rn); - tcg_temp_free_i64(tcg_rd); } /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with @@ -8649,11 +8328,10 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, tcg_rn = tcg_temp_new_i64(); tcg_rd = tcg_temp_new_i64(); tcg_rd_narrowed = tcg_temp_new_i32(); - tcg_final = tcg_const_i64(0); + tcg_final = tcg_temp_new_i64(); if (round) { - uint64_t round_const = 1ULL << (shift - 1); - tcg_round = tcg_const_i64(round_const); + tcg_round = tcg_constant_i64(1ULL << (shift - 1)); } else { tcg_round = NULL; } @@ -8662,9 +8340,13 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, read_vec_element(s, tcg_rn, rn, i, ldop); handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, false, is_u_shift, size+1, shift); - narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); + narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd); tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); - tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + if (i == 0) { + tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize); + } else { + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } } if (!is_q) { @@ -8672,15 +8354,6 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, } else { write_vec_element(s, tcg_final, rd, 1, MO_64); } - - if (round) { - tcg_temp_free_i64(tcg_round); - } - tcg_temp_free_i64(tcg_rn); - tcg_temp_free_i64(tcg_rd); - tcg_temp_free_i32(tcg_rd_narrowed); - tcg_temp_free_i64(tcg_final); - clear_vec_high(s, is_q, rd); } @@ -8727,7 +8400,7 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, } if (size == 3) { - TCGv_i64 tcg_shift = tcg_const_i64(shift); + TCGv_i64 tcg_shift = tcg_constant_i64(shift); static NeonGenTwo64OpEnvFn * const fns[2][2] = { { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, { NULL, gen_helper_neon_qshl_u64 }, @@ -8739,15 +8412,12 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, TCGv_i64 tcg_op = tcg_temp_new_i64(); read_vec_element(s, tcg_op, rn, pass, MO_64); - genfn(tcg_op, cpu_env, tcg_op, tcg_shift); + genfn(tcg_op, tcg_env, tcg_op, tcg_shift); write_vec_element(s, tcg_op, rd, pass, MO_64); - - tcg_temp_free_i64(tcg_op); } - tcg_temp_free_i64(tcg_shift); clear_vec_high(s, is_q, rd); } else { - TCGv_i32 tcg_shift = tcg_const_i32(shift); + TCGv_i32 tcg_shift = tcg_constant_i32(shift); static NeonGenTwoOpEnvFn * const fns[2][2][3] = { { { gen_helper_neon_qshl_s8, @@ -8771,7 +8441,7 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, TCGv_i32 tcg_op = tcg_temp_new_i32(); read_vec_element_i32(s, tcg_op, rn, pass, memop); - genfn(tcg_op, cpu_env, tcg_op, tcg_shift); + genfn(tcg_op, tcg_env, tcg_op, tcg_shift); if (scalar) { switch (size) { case 0: @@ -8789,10 +8459,7 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, } else { write_vec_element_i32(s, tcg_op, rd, pass, MO_32); } - - tcg_temp_free_i32(tcg_op); } - tcg_temp_free_i32(tcg_shift); if (!scalar) { clear_vec_high(s, is_q, rd); @@ -8812,7 +8479,7 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, int pass; if (fracbits || size == MO_64) { - tcg_shift = tcg_const_i32(fracbits); + tcg_shift = tcg_constant_i32(fracbits); } if (size == MO_64) { @@ -8835,10 +8502,6 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, write_vec_element(s, tcg_double, rd, pass, MO_64); } } - - tcg_temp_free_i64(tcg_int64); - tcg_temp_free_i64(tcg_double); - } else { TCGv_i32 tcg_int32 = tcg_temp_new_i32(); TCGv_i32 tcg_float = tcg_temp_new_i32(); @@ -8891,14 +8554,6 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, write_vec_element_i32(s, tcg_float, rd, pass, size); } } - - tcg_temp_free_i32(tcg_int32); - tcg_temp_free_i32(tcg_float); - } - - tcg_temp_free_ptr(tcg_fpst); - if (tcg_shift) { - tcg_temp_free_i32(tcg_shift); } clear_vec_high(s, elements << size == 16, rd); @@ -8985,11 +8640,10 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, assert(!(is_scalar && is_q)); - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); + tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); fracbits = (16 << size) - immhb; - tcg_shift = tcg_const_i32(fracbits); + tcg_shift = tcg_constant_i32(fracbits); if (size == MO_64) { int maxpass = is_scalar ? 1 : 2; @@ -9004,7 +8658,6 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); } write_vec_element(s, tcg_op, rd, pass, MO_64); - tcg_temp_free_i64(tcg_op); } clear_vec_high(s, is_q, rd); } else { @@ -9040,17 +8693,13 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, } else { write_vec_element_i32(s, tcg_op, rd, pass, size); } - tcg_temp_free_i32(tcg_op); } if (!is_scalar) { clear_vec_high(s, is_q, rd); } } - tcg_temp_free_ptr(tcg_fpstatus); - tcg_temp_free_i32(tcg_shift); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); - tcg_temp_free_i32(tcg_rmode); + gen_restore_rmode(tcg_rmode, tcg_fpstatus); } /* AdvSIMD scalar shift by immediate @@ -9175,7 +8824,7 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); + gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res); switch (opcode) { case 0xd: /* SQDMULL, SQDMULL2 */ @@ -9185,7 +8834,7 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) /* fall through */ case 0x9: /* SQDMLAL, SQDMLAL2 */ read_vec_element(s, tcg_op1, rd, 0, MO_64); - gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_op1); break; default: @@ -9193,17 +8842,13 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) } write_fp_dreg(s, rd, tcg_res); - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_res); } else { TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); TCGv_i64 tcg_res = tcg_temp_new_i64(); gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); + gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res); switch (opcode) { case 0xd: /* SQDMULL, SQDMULL2 */ @@ -9215,9 +8860,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) { TCGv_i64 tcg_op3 = tcg_temp_new_i64(); read_vec_element(s, tcg_op3, rd, 0, MO_32); - gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, + gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_op3); - tcg_temp_free_i64(tcg_op3); break; } default: @@ -9226,10 +8870,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) tcg_gen_ext32u_i64(tcg_res, tcg_res); write_fp_dreg(s, rd, tcg_res); - - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_i64(tcg_res); } } @@ -9246,26 +8886,23 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, switch (opcode) { case 0x1: /* SQADD */ if (u) { - gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); } else { - gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); } break; case 0x5: /* SQSUB */ if (u) { - gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); } else { - gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); } break; case 0x6: /* CMGT, CMHI */ - /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0. - * We implement this using setcond (test) and then negating. - */ cond = u ? TCG_COND_GTU : TCG_COND_GT; do_cmop: - tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); - tcg_gen_neg_i64(tcg_rd, tcg_rd); + /* 64 bit integer comparison, result = test ? -1 : 0. */ + tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); break; case 0x7: /* CMGE, CMHS */ cond = u ? TCG_COND_GEU : TCG_COND_GE; @@ -9286,9 +8923,9 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, break; case 0x9: /* SQSHL, UQSHL */ if (u) { - gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); } else { - gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); } break; case 0xa: /* SRSHL, URSHL */ @@ -9300,9 +8937,9 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, break; case 0xb: /* SQRSHL, UQRSHL */ if (u) { - gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); } else { - gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); } break; case 0x10: /* ADD, SUB */ @@ -9404,10 +9041,6 @@ static void handle_3same_float(DisasContext *s, int size, int elements, } write_vec_element(s, tcg_res, rd, pass, MO_64); - - tcg_temp_free_i64(tcg_res); - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); } else { /* Single */ TCGv_i32 tcg_op1 = tcg_temp_new_i32(); @@ -9489,19 +9122,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements, tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); write_vec_element(s, tcg_tmp, rd, pass, MO_64); - tcg_temp_free_i64(tcg_tmp); } else { write_vec_element_i32(s, tcg_res, rd, pass, MO_32); } - - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); } } - tcg_temp_free_ptr(fpst); - clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); } @@ -9587,8 +9213,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) TCGv_i64 tcg_rm = read_fp_dreg(s, rm); handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); - tcg_temp_free_i64(tcg_rn); - tcg_temp_free_i64(tcg_rm); } else { /* Do a single operation on the lowest element in the vector. * We use the standard Neon helpers and rely on 0 OP 0 == 0 with @@ -9659,16 +9283,11 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) g_assert_not_reached(); } - genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm); + genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm); tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); - tcg_temp_free_i32(tcg_rd32); - tcg_temp_free_i32(tcg_rn); - tcg_temp_free_i32(tcg_rm); } write_fp_dreg(s, rd, tcg_rd); - - tcg_temp_free_i64(tcg_rd); } /* AdvSIMD scalar three same FP16 @@ -9758,12 +9377,6 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, } write_fp_sreg(s, rd, tcg_res); - - - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_ptr(fpst); } /* AdvSIMD scalar three same extra @@ -9823,30 +9436,25 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, switch (opcode) { case 0x0: /* SQRDMLAH */ if (size == 1) { - gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3); } else { - gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3); } break; case 0x1: /* SQRDMLSH */ if (size == 1) { - gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3); } else { - gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3); + gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3); } break; default: g_assert_not_reached(); } - tcg_temp_free_i32(ele1); - tcg_temp_free_i32(ele2); res = tcg_temp_new_i64(); tcg_gen_extu_i32_i64(res, ele3); - tcg_temp_free_i32(ele3); - write_fp_dreg(s, rd, res); - tcg_temp_free_i64(res); } static void handle_2misc_64(DisasContext *s, int opcode, bool u, @@ -9877,20 +9485,16 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, break; case 0x7: /* SQABS, SQNEG */ if (u) { - gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); + gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn); } else { - gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); + gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn); } break; case 0xa: /* CMLT */ - /* 64 bit integer comparison against zero, result is - * test ? (2^64 - 1) : 0. We implement via setcond(!test) and - * subtracting 1. - */ cond = TCG_COND_LT; do_cmop: - tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0); - tcg_gen_neg_i64(tcg_rd, tcg_rd); + /* 64 bit integer comparison against zero, result is test ? -1 : 0. */ + tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0)); break; case 0x8: /* CMGT, CMGE */ cond = u ? TCG_COND_GE : TCG_COND_GT; @@ -9912,30 +9516,22 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, gen_helper_vfp_negd(tcg_rd, tcg_rn); break; case 0x7f: /* FSQRT */ - gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); + gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); break; case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ case 0x1c: /* FCVTAS */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - { - TCGv_i32 tcg_shift = tcg_const_i32(0); - gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); - tcg_temp_free_i32(tcg_shift); + gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); break; - } case 0x5a: /* FCVTNU */ case 0x5b: /* FCVTMU */ case 0x5c: /* FCVTAU */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - { - TCGv_i32 tcg_shift = tcg_const_i32(0); - gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); - tcg_temp_free_i32(tcg_shift); + gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); break; - } case 0x18: /* FRINTN */ case 0x19: /* FRINTM */ case 0x38: /* FRINTP */ @@ -9975,7 +9571,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, if (is_double) { TCGv_i64 tcg_op = tcg_temp_new_i64(); - TCGv_i64 tcg_zero = tcg_const_i64(0); + TCGv_i64 tcg_zero = tcg_constant_i64(0); TCGv_i64 tcg_res = tcg_temp_new_i64(); NeonGenTwoDoubleOpFn *genfn; bool swap = false; @@ -10010,14 +9606,11 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, } write_vec_element(s, tcg_res, rd, pass, MO_64); } - tcg_temp_free_i64(tcg_res); - tcg_temp_free_i64(tcg_zero); - tcg_temp_free_i64(tcg_op); clear_vec_high(s, !is_scalar, rd); } else { TCGv_i32 tcg_op = tcg_temp_new_i32(); - TCGv_i32 tcg_zero = tcg_const_i32(0); + TCGv_i32 tcg_zero = tcg_constant_i32(0); TCGv_i32 tcg_res = tcg_temp_new_i32(); NeonGenTwoSingleOpFn *genfn; bool swap = false; @@ -10085,15 +9678,11 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, write_vec_element_i32(s, tcg_res, rd, pass, size); } } - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_zero); - tcg_temp_free_i32(tcg_op); + if (!is_scalar) { clear_vec_high(s, is_q, rd); } } - - tcg_temp_free_ptr(fpst); } static void handle_2misc_reciprocal(DisasContext *s, int opcode, @@ -10125,8 +9714,6 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, } write_vec_element(s, tcg_res, rd, pass, MO_64); } - tcg_temp_free_i64(tcg_res); - tcg_temp_free_i64(tcg_op); clear_vec_high(s, !is_scalar, rd); } else { TCGv_i32 tcg_op = tcg_temp_new_i32(); @@ -10165,13 +9752,10 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, write_vec_element_i32(s, tcg_res, rd, pass, MO_32); } } - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_op); if (!is_scalar) { clear_vec_high(s, is_q, rd); } } - tcg_temp_free_ptr(fpst); } static void handle_2misc_narrow(DisasContext *s, bool scalar, @@ -10187,7 +9771,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, int passes = scalar ? 1 : 2; if (scalar) { - tcg_res[1] = tcg_const_i32(0); + tcg_res[1] = tcg_constant_i32(0); } for (pass = 0; pass < passes; pass++) { @@ -10238,7 +9822,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, case 0x16: /* FCVTN, FCVTN2 */ /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ if (size == 2) { - gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); + gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env); } else { TCGv_i32 tcg_lo = tcg_temp_new_i32(); TCGv_i32 tcg_hi = tcg_temp_new_i32(); @@ -10249,17 +9833,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); - tcg_temp_free_i32(tcg_lo); - tcg_temp_free_i32(tcg_hi); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(ahp); } break; case 0x36: /* BFCVTN, BFCVTN2 */ { TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); - tcg_temp_free_ptr(fpst); } break; case 0x56: /* FCVTXN, FCVTXN2 */ @@ -10267,7 +9846,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, * with von Neumann rounding (round to odd) */ assert(size == 2); - gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); + gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env); break; default: g_assert_not_reached(); @@ -10276,15 +9855,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, if (genfn) { genfn(tcg_res[pass], tcg_op); } else if (genenvfn) { - genenvfn(tcg_res[pass], cpu_env, tcg_op); + genenvfn(tcg_res[pass], tcg_env, tcg_op); } - - tcg_temp_free_i64(tcg_op); } for (pass = 0; pass < 2; pass++) { write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); - tcg_temp_free_i32(tcg_res[pass]); } clear_vec_high(s, is_q, rd); } @@ -10305,14 +9881,12 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, read_vec_element(s, tcg_rd, rd, pass, MO_64); if (is_u) { /* USQADD */ - gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd); } else { /* SUQADD */ - gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd); } write_vec_element(s, tcg_rd, rd, pass, MO_64); } - tcg_temp_free_i64(tcg_rd); - tcg_temp_free_i64(tcg_rn); clear_vec_high(s, !is_scalar, rd); } else { TCGv_i32 tcg_rn = tcg_temp_new_i32(); @@ -10337,13 +9911,13 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, if (is_u) { /* USQADD */ switch (size) { case 0: - gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd); break; case 1: - gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd); break; case 2: - gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd); break; default: g_assert_not_reached(); @@ -10351,13 +9925,13 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, } else { /* SUQADD */ switch (size) { case 0: - gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd); break; case 1: - gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd); break; case 2: - gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd); break; default: g_assert_not_reached(); @@ -10365,14 +9939,10 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, } if (is_scalar) { - TCGv_i64 tcg_zero = tcg_const_i64(0); - write_vec_element(s, tcg_zero, rd, 0, MO_64); - tcg_temp_free_i64(tcg_zero); + write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); } write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); } - tcg_temp_free_i32(tcg_rd); - tcg_temp_free_i32(tcg_rn); clear_vec_high(s, is_q, rd); } } @@ -10510,12 +10080,11 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } if (is_fcvt) { - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); tcg_fpstatus = fpstatus_ptr(FPST_FPCR); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); + tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); } else { - tcg_rmode = NULL; tcg_fpstatus = NULL; + tcg_rmode = NULL; } if (size == 3) { @@ -10524,8 +10093,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); write_fp_dreg(s, rd, tcg_rd); - tcg_temp_free_i64(tcg_rd); - tcg_temp_free_i64(tcg_rn); } else { TCGv_i32 tcg_rn = tcg_temp_new_i32(); TCGv_i32 tcg_rd = tcg_temp_new_i32(); @@ -10542,7 +10109,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, }; genfn = fns[size][u]; - genfn(tcg_rd, cpu_env, tcg_rn); + genfn(tcg_rd, tcg_env, tcg_rn); break; } case 0x1a: /* FCVTNS */ @@ -10550,36 +10117,26 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x1c: /* FCVTAS */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - { - TCGv_i32 tcg_shift = tcg_const_i32(0); - gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); - tcg_temp_free_i32(tcg_shift); + gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), + tcg_fpstatus); break; - } case 0x5a: /* FCVTNU */ case 0x5b: /* FCVTMU */ case 0x5c: /* FCVTAU */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - { - TCGv_i32 tcg_shift = tcg_const_i32(0); - gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); - tcg_temp_free_i32(tcg_shift); + gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), + tcg_fpstatus); break; - } default: g_assert_not_reached(); } write_fp_sreg(s, rd, tcg_rd); - tcg_temp_free_i32(tcg_rd); - tcg_temp_free_i32(tcg_rn); } if (is_fcvt) { - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); - tcg_temp_free_i32(tcg_rmode); - tcg_temp_free_ptr(tcg_fpstatus); + gen_restore_rmode(tcg_rmode, tcg_fpstatus); } } @@ -10681,8 +10238,8 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, int dsize = 64; int esize = 8 << size; int elements = dsize/esize; - TCGv_i64 tcg_rn = new_tmp_a64(s); - TCGv_i64 tcg_rd = new_tmp_a64(s); + TCGv_i64 tcg_rn = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); int i; if (size >= 3) { @@ -10738,8 +10295,7 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q, read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); if (round) { - uint64_t round_const = 1ULL << (shift - 1); - tcg_round = tcg_const_i64(round_const); + tcg_round = tcg_constant_i64(1ULL << (shift - 1)); } else { tcg_round = NULL; } @@ -10757,12 +10313,6 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q, } else { write_vec_element(s, tcg_final, rd, 1, MO_64); } - if (round) { - tcg_temp_free_i64(tcg_round); - } - tcg_temp_free_i64(tcg_rn); - tcg_temp_free_i64(tcg_rd); - tcg_temp_free_i64(tcg_final); clear_vec_high(s, is_q, rd); } @@ -10933,8 +10483,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, tcg_passres, tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); - tcg_temp_free_i64(tcg_tmp1); - tcg_temp_free_i64(tcg_tmp2); break; } case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ @@ -10946,7 +10494,7 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env, tcg_passres, tcg_passres); break; default: @@ -10958,20 +10506,13 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, if (accop < 0) { tcg_gen_neg_i64(tcg_passres, tcg_passres); } - gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env, tcg_res[pass], tcg_passres); } else if (accop > 0) { tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); } else if (accop < 0) { tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); } - - if (accop != 0) { - tcg_temp_free_i64(tcg_passres); - } - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); } } else { /* size 0 or 1, generally helper functions */ @@ -11005,7 +10546,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, widenfn(tcg_passres, tcg_op1); gen_neon_addl(size, (opcode == 2), tcg_passres, tcg_passres, tcg_op2_64); - tcg_temp_free_i64(tcg_op2_64); break; } case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ @@ -11046,14 +10586,12 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, case 13: /* SQDMULL, SQDMULL2 */ assert(size == 1); gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, + gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env, tcg_passres, tcg_passres); break; default: g_assert_not_reached(); } - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); if (accop != 0) { if (opcode == 9 || opcode == 11) { @@ -11061,22 +10599,19 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, if (accop < 0) { gen_helper_neon_negl_u32(tcg_passres, tcg_passres); } - gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, + gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env, tcg_res[pass], tcg_passres); } else { gen_neon_addl(size, (accop < 0), tcg_res[pass], tcg_res[pass], tcg_passres); } - tcg_temp_free_i64(tcg_passres); } } } write_vec_element(s, tcg_res[0], rd, 0, MO_64); write_vec_element(s, tcg_res[1], rd, 1, MO_64); - tcg_temp_free_i64(tcg_res[0]); - tcg_temp_free_i64(tcg_res[1]); } static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, @@ -11100,17 +10635,13 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, read_vec_element(s, tcg_op1, rn, pass, MO_64); read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); widenfn(tcg_op2_wide, tcg_op2); - tcg_temp_free_i32(tcg_op2); tcg_res[pass] = tcg_temp_new_i64(); gen_neon_addl(size, (opcode == 3), tcg_res[pass], tcg_op1, tcg_op2_wide); - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2_wide); } for (pass = 0; pass < 2; pass++) { write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - tcg_temp_free_i64(tcg_res[pass]); } } @@ -11145,17 +10676,12 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_res[pass] = tcg_temp_new_i32(); gennarrow(tcg_res[pass], tcg_wideres); - tcg_temp_free_i64(tcg_wideres); } for (pass = 0; pass < 2; pass++) { write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); - tcg_temp_free_i32(tcg_res[pass]); } clear_vec_high(s, is_q, rd); } @@ -11382,14 +10908,10 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, default: g_assert_not_reached(); } - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); } for (pass = 0; pass < 2; pass++) { write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - tcg_temp_free_i64(tcg_res[pass]); } } else { int maxpass = is_q ? 4 : 2; @@ -11461,21 +10983,13 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, if (genfn) { genfn(tcg_res[pass], tcg_op1, tcg_op2); } - - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); } for (pass = 0; pass < maxpass; pass++) { write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); - tcg_temp_free_i32(tcg_res[pass]); } clear_vec_high(s, is_q, rd); } - - if (fpst) { - tcg_temp_free_ptr(fpst); - } } /* Floating point op subgroup of C3.6.16. */ @@ -11555,7 +11069,7 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) int data = (is_2 << 1) | is_s; tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), cpu_env, + vec_full_reg_offset(s, rm), tcg_env, is_q ? 16 : 8, vec_full_reg_size(s), data, gen_helper_gvec_fmlal_a64); } @@ -11732,10 +11246,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); write_vec_element(s, tcg_res, rd, pass, MO_64); - - tcg_temp_free_i64(tcg_res); - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); } } else { for (pass = 0; pass < (is_q ? 4 : 2); pass++) { @@ -11814,16 +11324,12 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) } if (genenvfn) { - genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2); + genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2); } else { genfn(tcg_res, tcg_op1, tcg_op2); } write_vec_element_i32(s, tcg_res, rd, pass, MO_32); - - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); } } clear_vec_high(s, is_q, rd); @@ -11994,12 +11500,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) for (pass = 0; pass < maxpass; pass++) { write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); - tcg_temp_free_i32(tcg_res[pass]); } - - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - } else { for (pass = 0; pass < elements; pass++) { TCGv_i32 tcg_op1 = tcg_temp_new_i32(); @@ -12079,14 +11580,9 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) } write_vec_element_i32(s, tcg_res, rd, pass, MO_16); - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); } } - tcg_temp_free_ptr(fpst); - clear_vec_high(s, is_q, rd); } @@ -12297,12 +11793,10 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, tcg_res[pass] = tcg_temp_new_i64(); read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); - gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); - tcg_temp_free_i32(tcg_op); + gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env); } for (pass = 0; pass < 2; pass++) { write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - tcg_temp_free_i64(tcg_res[pass]); } } else { /* 16 -> 32 bit fp conversion */ @@ -12320,11 +11814,7 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, } for (pass = 0; pass < 4; pass++) { write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); - tcg_temp_free_i32(tcg_res[pass]); } - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(ahp); } } @@ -12368,7 +11858,6 @@ static void handle_rev(DisasContext *s, int opcode, bool u, g_assert_not_reached(); } write_vec_element(s, tcg_tmp, rd, i, grp_size); - tcg_temp_free_i64(tcg_tmp); } clear_vec_high(s, is_q, rd); } else { @@ -12376,26 +11865,26 @@ static void handle_rev(DisasContext *s, int opcode, bool u, int esize = 8 << size; int elements = dsize / esize; TCGv_i64 tcg_rn = tcg_temp_new_i64(); - TCGv_i64 tcg_rd = tcg_const_i64(0); - TCGv_i64 tcg_rd_hi = tcg_const_i64(0); + TCGv_i64 tcg_rd[2]; + + for (i = 0; i < 2; i++) { + tcg_rd[i] = tcg_temp_new_i64(); + tcg_gen_movi_i64(tcg_rd[i], 0); + } for (i = 0; i < elements; i++) { int e_rev = (i & 0xf) ^ revmask; - int off = e_rev * esize; + int w = (e_rev * esize) / 64; + int o = (e_rev * esize) % 64; + read_vec_element(s, tcg_rn, rn, i, size); - if (off >= 64) { - tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi, - tcg_rn, off - 64, esize); - } else { - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize); - } + tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); } - write_vec_element(s, tcg_rd, rd, 0, MO_64); - write_vec_element(s, tcg_rd_hi, rd, 1, MO_64); - tcg_temp_free_i64(tcg_rd_hi); - tcg_temp_free_i64(tcg_rd); - tcg_temp_free_i64(tcg_rn); + for (i = 0; i < 2; i++) { + write_vec_element(s, tcg_rd[i], rd, i, MO_64); + } + clear_vec_high(s, true, rd); } } @@ -12429,9 +11918,6 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, read_vec_element(s, tcg_op1, rd, pass, MO_64); tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); } - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); } } else { for (pass = 0; pass < maxpass; pass++) { @@ -12459,15 +11945,13 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, tcg_res[pass], tcg_op); } } - tcg_temp_free_i64(tcg_op); } } if (!is_q) { - tcg_res[1] = tcg_const_i64(0); + tcg_res[1] = tcg_constant_i64(0); } for (pass = 0; pass < 2; pass++) { write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - tcg_temp_free_i64(tcg_res[pass]); } } @@ -12491,13 +11975,10 @@ static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) tcg_res[pass] = tcg_temp_new_i64(); widenfn(tcg_res[pass], tcg_op); tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); - - tcg_temp_free_i32(tcg_op); } for (pass = 0; pass < 2; pass++) { write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - tcg_temp_free_i64(tcg_res[pass]); } } @@ -12516,7 +11997,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); bool need_fpstatus = false; - bool need_rmode = false; int rmode = -1; TCGv_i32 tcg_rmode; TCGv_ptr tcg_fpstatus; @@ -12666,7 +12146,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ need_fpstatus = true; - need_rmode = true; rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); if (size == 3 && !is_q) { unallocated_encoding(s); @@ -12676,7 +12155,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x5c: /* FCVTAU */ case 0x1c: /* FCVTAS */ need_fpstatus = true; - need_rmode = true; rmode = FPROUNDING_TIEAWAY; if (size == 3 && !is_q) { unallocated_encoding(s); @@ -12735,7 +12213,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x19: /* FRINTM */ case 0x38: /* FRINTP */ case 0x39: /* FRINTZ */ - need_rmode = true; rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); /* fall through */ case 0x59: /* FRINTX */ @@ -12747,7 +12224,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } break; case 0x58: /* FRINTA */ - need_rmode = true; rmode = FPROUNDING_TIEAWAY; need_fpstatus = true; if (size == 3 && !is_q) { @@ -12763,7 +12239,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) break; case 0x1e: /* FRINT32Z */ case 0x1f: /* FRINT64Z */ - need_rmode = true; rmode = FPROUNDING_ZERO; /* fall through */ case 0x5e: /* FRINT32X */ @@ -12789,14 +12264,13 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } - if (need_fpstatus || need_rmode) { + if (need_fpstatus || rmode >= 0) { tcg_fpstatus = fpstatus_ptr(FPST_FPCR); } else { tcg_fpstatus = NULL; } - if (need_rmode) { - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); + if (rmode >= 0) { + tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); } else { tcg_rmode = NULL; } @@ -12852,9 +12326,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) tcg_rmode, tcg_fpstatus); write_vec_element(s, tcg_res, rd, pass, MO_64); - - tcg_temp_free_i64(tcg_res); - tcg_temp_free_i64(tcg_op); } } else { int pass; @@ -12877,9 +12348,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) break; case 0x7: /* SQABS, SQNEG */ if (u) { - gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); + gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op); } else { - gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); + gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op); } break; case 0x2f: /* FABS */ @@ -12889,32 +12360,24 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_vfp_negs(tcg_res, tcg_op); break; case 0x7f: /* FSQRT */ - gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); + gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); break; case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ case 0x1c: /* FCVTAS */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - { - TCGv_i32 tcg_shift = tcg_const_i32(0); gen_helper_vfp_tosls(tcg_res, tcg_op, - tcg_shift, tcg_fpstatus); - tcg_temp_free_i32(tcg_shift); + tcg_constant_i32(0), tcg_fpstatus); break; - } case 0x5a: /* FCVTNU */ case 0x5b: /* FCVTMU */ case 0x5c: /* FCVTAU */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - { - TCGv_i32 tcg_shift = tcg_const_i32(0); gen_helper_vfp_touls(tcg_res, tcg_op, - tcg_shift, tcg_fpstatus); - tcg_temp_free_i32(tcg_shift); + tcg_constant_i32(0), tcg_fpstatus); break; - } case 0x18: /* FRINTN */ case 0x19: /* FRINTM */ case 0x38: /* FRINTP */ @@ -12961,7 +12424,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, }; genfn = fns[size][u]; - genfn(tcg_res, cpu_env, tcg_op); + genfn(tcg_res, tcg_env, tcg_op); break; } case 0x4: /* CLS, CLZ */ @@ -12985,19 +12448,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } write_vec_element_i32(s, tcg_res, rd, pass, MO_32); - - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_op); } } clear_vec_high(s, is_q, rd); - if (need_rmode) { - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); - tcg_temp_free_i32(tcg_rmode); - } - if (need_fpstatus) { - tcg_temp_free_ptr(tcg_fpstatus); + if (tcg_rmode) { + gen_restore_rmode(tcg_rmode, tcg_fpstatus); } } @@ -13026,9 +12482,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) int pass; TCGv_i32 tcg_rmode = NULL; TCGv_ptr tcg_fpstatus = NULL; - bool need_rmode = false; bool need_fpst = true; - int rmode; + int rmode = -1; if (!dc_isar_feature(aa64_fp16, s)) { unallocated_encoding(s); @@ -13077,27 +12532,22 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x3f: /* FRECPX */ break; case 0x18: /* FRINTN */ - need_rmode = true; only_in_vector = true; rmode = FPROUNDING_TIEEVEN; break; case 0x19: /* FRINTM */ - need_rmode = true; only_in_vector = true; rmode = FPROUNDING_NEGINF; break; case 0x38: /* FRINTP */ - need_rmode = true; only_in_vector = true; rmode = FPROUNDING_POSINF; break; case 0x39: /* FRINTZ */ - need_rmode = true; only_in_vector = true; rmode = FPROUNDING_ZERO; break; case 0x58: /* FRINTA */ - need_rmode = true; only_in_vector = true; rmode = FPROUNDING_TIEAWAY; break; @@ -13107,43 +12557,33 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) /* current rounding mode */ break; case 0x1a: /* FCVTNS */ - need_rmode = true; rmode = FPROUNDING_TIEEVEN; break; case 0x1b: /* FCVTMS */ - need_rmode = true; rmode = FPROUNDING_NEGINF; break; case 0x1c: /* FCVTAS */ - need_rmode = true; rmode = FPROUNDING_TIEAWAY; break; case 0x3a: /* FCVTPS */ - need_rmode = true; rmode = FPROUNDING_POSINF; break; case 0x3b: /* FCVTZS */ - need_rmode = true; rmode = FPROUNDING_ZERO; break; case 0x5a: /* FCVTNU */ - need_rmode = true; rmode = FPROUNDING_TIEEVEN; break; case 0x5b: /* FCVTMU */ - need_rmode = true; rmode = FPROUNDING_NEGINF; break; case 0x5c: /* FCVTAU */ - need_rmode = true; rmode = FPROUNDING_TIEAWAY; break; case 0x7a: /* FCVTPU */ - need_rmode = true; rmode = FPROUNDING_POSINF; break; case 0x7b: /* FCVTZU */ - need_rmode = true; rmode = FPROUNDING_ZERO; break; case 0x2f: /* FABS */ @@ -13176,13 +12616,12 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) return; } - if (need_rmode || need_fpst) { + if (rmode >= 0 || need_fpst) { tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); } - if (need_rmode) { - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); + if (rmode >= 0) { + tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); } if (is_scalar) { @@ -13223,9 +12662,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) /* limit any sign extension going on */ tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); write_fp_sreg(s, rd, tcg_res); - - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_op); } else { for (pass = 0; pass < (is_q ? 8 : 4); pass++) { TCGv_i32 tcg_op = tcg_temp_new_i32(); @@ -13279,21 +12715,13 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) } write_vec_element_i32(s, tcg_res, rd, pass, MO_16); - - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_op); } clear_vec_high(s, is_q, rd); } if (tcg_rmode) { - gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); - tcg_temp_free_i32(tcg_rmode); - } - - if (tcg_fpstatus) { - tcg_temp_free_ptr(tcg_fpstatus); + gen_restore_rmode(tcg_rmode, tcg_fpstatus); } } @@ -13433,7 +12861,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) return; } size = MO_16; - /* is_fp, but we pass cpu_env not fp_status. */ + /* is_fp, but we pass tcg_env not fp_status. */ break; default: unallocated_encoding(s); @@ -13563,7 +12991,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) size == MO_64 ? gen_helper_gvec_fcmlas_idx : gen_helper_gvec_fcmlah_idx); - tcg_temp_free_ptr(fpst); } return; @@ -13577,7 +13004,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) int data = (index << 2) | (is_2 << 1) | is_s; tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), cpu_env, + vec_full_reg_offset(s, rm), tcg_env, is_q ? 16 : 8, vec_full_reg_size(s), data, gen_helper_gvec_fmlal_idx_a64); } @@ -13668,11 +13095,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } write_vec_element(s, tcg_res, rd, pass, MO_64); - tcg_temp_free_i64(tcg_op); - tcg_temp_free_i64(tcg_res); } - tcg_temp_free_i64(tcg_idx); clear_vec_high(s, !is_scalar, rd); } else if (!is_long) { /* 32 bit floating point, or 16 or 32 bit integer. @@ -13799,19 +13223,19 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) break; case 0x0c: /* SQDMULH */ if (size == 1) { - gen_helper_neon_qdmulh_s16(tcg_res, cpu_env, + gen_helper_neon_qdmulh_s16(tcg_res, tcg_env, tcg_op, tcg_idx); } else { - gen_helper_neon_qdmulh_s32(tcg_res, cpu_env, + gen_helper_neon_qdmulh_s32(tcg_res, tcg_env, tcg_op, tcg_idx); } break; case 0x0d: /* SQRDMULH */ if (size == 1) { - gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env, + gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env, tcg_op, tcg_idx); } else { - gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env, + gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env, tcg_op, tcg_idx); } break; @@ -13819,10 +13243,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) read_vec_element_i32(s, tcg_res, rd, pass, is_scalar ? size : MO_32); if (size == 1) { - gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env, + gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env, tcg_op, tcg_idx, tcg_res); } else { - gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env, + gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env, tcg_op, tcg_idx, tcg_res); } break; @@ -13830,10 +13254,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) read_vec_element_i32(s, tcg_res, rd, pass, is_scalar ? size : MO_32); if (size == 1) { - gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env, + gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env, tcg_op, tcg_idx, tcg_res); } else { - gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env, + gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env, tcg_op, tcg_idx, tcg_res); } break; @@ -13846,12 +13270,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } else { write_vec_element_i32(s, tcg_res, rd, pass, MO_32); } - - tcg_temp_free_i32(tcg_op); - tcg_temp_free_i32(tcg_res); } - tcg_temp_free_i32(tcg_idx); clear_vec_high(s, is_q, rd); } else { /* long ops: 16x16->32 or 32x32->64 */ @@ -13892,11 +13312,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); - tcg_temp_free_i64(tcg_op); if (satop) { /* saturating, doubling */ - gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env, tcg_passres, tcg_passres); } @@ -13918,16 +13337,14 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) tcg_gen_neg_i64(tcg_passres, tcg_passres); /* fall through */ case 0x3: /* SQDMLAL, SQDMLAL2 */ - gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, + gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env, tcg_res[pass], tcg_passres); break; default: g_assert_not_reached(); } - tcg_temp_free_i64(tcg_passres); } - tcg_temp_free_i64(tcg_idx); clear_vec_high(s, !is_scalar, rd); } else { @@ -13970,10 +13387,9 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); } if (satop) { - gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, + gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env, tcg_passres, tcg_passres); } - tcg_temp_free_i32(tcg_op); if (opcode == 0xa || opcode == 0xb) { continue; @@ -13995,16 +13411,14 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) gen_helper_neon_negl_u32(tcg_passres, tcg_passres); /* fall through */ case 0x3: /* SQDMLAL, SQDMLAL2 */ - gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, + gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env, tcg_res[pass], tcg_passres); break; default: g_assert_not_reached(); } - tcg_temp_free_i64(tcg_passres); } - tcg_temp_free_i32(tcg_idx); if (is_scalar) { tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); @@ -14012,18 +13426,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } if (is_scalar) { - tcg_res[1] = tcg_const_i64(0); + tcg_res[1] = tcg_constant_i64(0); } for (pass = 0; pass < 2; pass++) { write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - tcg_temp_free_i64(tcg_res[pass]); } } - - if (fpst) { - tcg_temp_free_ptr(fpst); - } } /* Crypto AES @@ -14038,7 +13447,6 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 12, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - int decrypt; gen_helper_gvec_2 *genfn2 = NULL; gen_helper_gvec_3 *genfn3 = NULL; @@ -14049,20 +13457,16 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) switch (opcode) { case 0x4: /* AESE */ - decrypt = 0; genfn3 = gen_helper_crypto_aese; break; case 0x6: /* AESMC */ - decrypt = 0; genfn2 = gen_helper_crypto_aesmc; break; case 0x5: /* AESD */ - decrypt = 1; - genfn3 = gen_helper_crypto_aese; + genfn3 = gen_helper_crypto_aesd; break; case 0x7: /* AESIMC */ - decrypt = 1; - genfn2 = gen_helper_crypto_aesmc; + genfn2 = gen_helper_crypto_aesimc; break; default: unallocated_encoding(s); @@ -14073,9 +13477,9 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn) return; } if (genfn2) { - gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); + gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2); } else { - gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3); } } @@ -14403,12 +13807,6 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) } write_vec_element(s, tcg_res[0], rd, 0, MO_64); write_vec_element(s, tcg_res[1], rd, 1, MO_64); - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_op3); - tcg_temp_free_i64(tcg_res[0]); - tcg_temp_free_i64(tcg_res[1]); } else { TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; @@ -14416,7 +13814,7 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) tcg_op2 = tcg_temp_new_i32(); tcg_op3 = tcg_temp_new_i32(); tcg_res = tcg_temp_new_i32(); - tcg_zero = tcg_const_i32(0); + tcg_zero = tcg_constant_i32(0); read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); @@ -14431,12 +13829,6 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); write_vec_element_i32(s, tcg_res, rd, 3, MO_32); - - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_i32(tcg_op3); - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_zero); } } @@ -14565,6 +13957,17 @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) } } +static bool trans_OK(DisasContext *s, arg_OK *a) +{ + return true; +} + +static bool trans_FAIL(DisasContext *s, arg_OK *a) +{ + s->is_nonstreaming = true; + return true; +} + /** * is_guarded_page: * @env: The cpu environment @@ -14578,22 +13981,21 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s) #ifdef CONFIG_USER_ONLY return page_get_flags(addr) & PAGE_BTI; #else + CPUTLBEntryFull *full; + void *host; int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); - unsigned int index = tlb_index(env, mmu_idx, addr); - CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr); + int flags; /* * We test this immediately after reading an insn, which means - * that any normal page must be in the TLB. The only exception - * would be for executing from flash or device memory, which - * does not retain the TLB entry. - * - * FIXME: Assume false for those, for now. We could use - * arm_cpu_get_phys_page_attrs_debug to re-read the page - * table entry even for that case. + * that the TLB entry must be present and valid, and thus this + * access will never raise an exception. */ - return (tlb_hit(entry->addr_code, addr) && - arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs)); + flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, + false, &host, &full, 0); + assert(!(flags & TLB_INVALID_MASK)); + + return full->extra.arm.guarded; #endif } @@ -14649,25 +14051,38 @@ static bool btype_destination_ok(uint32_t insn, bool bt, int btype) return false; } +/* C3.1 A64 instruction index by encoding */ +static void disas_a64_legacy(DisasContext *s, uint32_t insn) +{ + switch (extract32(insn, 25, 4)) { + case 0x5: + case 0xd: /* Data processing - register */ + disas_data_proc_reg(s, insn); + break; + case 0x7: + case 0xf: /* Data processing - SIMD and floating point */ + disas_data_proc_simd_fp(s, insn); + break; + default: + unallocated_encoding(s); + break; + } +} + static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - CPUARMState *env = cpu->env_ptr; + CPUARMState *env = cpu_env(cpu); ARMCPU *arm_cpu = env_archcpu(env); CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); int bound, core_mmu_idx; dc->isar = &arm_cpu->isar; dc->condjmp = 0; - - dc->aarch64 = 1; - /* If we are coming from secure EL0 in a system with a 32-bit EL3, then - * there is no secure EL1, so we route exceptions to EL3. - */ - dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) && - !arm_el_is_aa64(env, 3); - dc->thumb = 0; + dc->pc_save = dc->base.pc_first; + dc->aarch64 = true; + dc->thumb = false; dc->sctlr_b = 0; dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; dc->condexec_mask = 0; @@ -14684,26 +14099,44 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); + dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); + dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); + dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); - dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16; + dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); + dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; + dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); dc->bt = EX_TBFLAG_A64(tb_flags, BT); dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); - dc->ata = EX_TBFLAG_A64(tb_flags, ATA); + dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); + dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); + dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); + dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); + dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); + dc->naa = EX_TBFLAG_A64(tb_flags, NAA); + dc->nv = EX_TBFLAG_A64(tb_flags, NV); + dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); + dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); + dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); + dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; dc->features = env->features; dc->dcz_blocksize = arm_cpu->dcz_blocksize; + dc->gm_blocksize = arm_cpu->gm_blocksize; #ifdef CONFIG_USER_ONLY /* In sve_probe_page, we assume TBI is enabled. */ tcg_debug_assert(dc->tbid & 1); #endif + dc->lse2 = dc_isar_feature(aa64_lse2, dc); + /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: * generate code with no special handling for single-stepping (except @@ -14722,7 +14155,6 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); dc->is_ldex = false; - dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL); /* Bound the number of insns to execute to those left on the page. */ bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; @@ -14732,8 +14164,6 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, bound = 1; } dc->base.max_insns = MIN(dc->base.max_insns, bound); - - init_tmp_a64_array(dc); } static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -14743,17 +14173,23 @@ static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); + target_ulong pc_arg = dc->base.pc_next; - tcg_gen_insn_start(dc->base.pc_next, 0, 0); - dc->insn_start = tcg_last_op(); + if (tb_cflags(dcbase->tb) & CF_PCREL) { + pc_arg &= ~TARGET_PAGE_MASK; + } + tcg_gen_insn_start(pc_arg, 0, 0); + dc->insn_start_updated = false; } static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *s = container_of(dcbase, DisasContext, base); - CPUARMState *env = cpu->env_ptr; + CPUARMState *env = cpu_env(cpu); + uint64_t pc = s->base.pc_next; uint32_t insn; + /* Singlestep exceptions have the highest priority. */ if (s->ss_active && !s->pstate_ss) { /* Singlestep state is Active-pending. * If we're in this state at the start of a TB then either @@ -14768,13 +14204,28 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) assert(s->base.num_insns == 1); gen_swstep_exception(s, 0, 0); s->base.is_jmp = DISAS_NORETURN; + s->base.pc_next = pc + 4; + return; + } + + if (pc & 3) { + /* + * PC alignment fault. This has priority over the instruction abort + * that we would receive from a translation fault via arm_ldl_code. + * This should only be possible after an indirect branch, at the + * start of the TB. + */ + assert(s->base.num_insns == 1); + gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); + s->base.is_jmp = DISAS_NORETURN; + s->base.pc_next = QEMU_ALIGN_UP(pc, 4); return; } - s->pc_curr = s->base.pc_next; - insn = arm_ldl_code(env, &s->base, s->base.pc_next, s->sctlr_b); + s->pc_curr = pc; + insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); s->insn = insn; - s->base.pc_next += 4; + s->base.pc_next = pc + 4; s->fp_access_checked = false; s->sve_access_checked = false; @@ -14784,8 +14235,7 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * Illegal execution state. This has priority over BTI * exceptions, but comes after instruction abort exceptions. */ - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_illegalstate(), default_exception_el(s)); + gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); return; } @@ -14816,9 +14266,7 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) if (s->btype != 0 && s->guarded_page && !btype_destination_ok(insn, s->bt, s->btype)) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_btitrap(s->btype), - default_exception_el(s)); + gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); return; } } else { @@ -14827,42 +14275,16 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) } } - switch (extract32(insn, 25, 4)) { - case 0x0: case 0x1: case 0x3: /* UNALLOCATED */ - unallocated_encoding(s); - break; - case 0x2: - if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) { - unallocated_encoding(s); - } - break; - case 0x8: case 0x9: /* Data processing - immediate */ - disas_data_proc_imm(s, insn); - break; - case 0xa: case 0xb: /* Branch, exception generation and system insns */ - disas_b_exc_sys(s, insn); - break; - case 0x4: - case 0x6: - case 0xc: - case 0xe: /* Loads and stores */ - disas_ldst(s, insn); - break; - case 0x5: - case 0xd: /* Data processing - register */ - disas_data_proc_reg(s, insn); - break; - case 0x7: - case 0xf: /* Data processing - SIMD and floating point */ - disas_data_proc_simd_fp(s, insn); - break; - default: - assert(FALSE); /* all 15 cases should be handled above */ - break; + s->is_nonstreaming = false; + if (s->sme_trap_nonstreaming) { + disas_sme_fa64(s, insn); } - /* if we allocated any temporaries, free them here */ - free_tmp_a64(s); + if (!disas_a64(s, insn) && + !disas_sme(s, insn) && + !disas_sve(s, insn)) { + disas_a64_legacy(s, insn); + } /* * After execution of most insns, btype is reset to 0. @@ -14871,15 +14293,13 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { reset_btype(s); } - - translator_loop_temp_check(&s->base); } static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) { + if (unlikely(dc->ss_active)) { /* Note that this means single stepping WFI doesn't halt the CPU. * For conditional branch insns this is harmless unreachable code as * gen_goto_tb() has already handled emitting the debug exception @@ -14887,15 +14307,11 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) */ switch (dc->base.is_jmp) { default: - gen_a64_set_pc_im(dc->base.pc_next); + gen_a64_update_pc(dc, 4); /* fall through */ case DISAS_EXIT: case DISAS_JUMP: - if (dc->base.singlestep_enabled) { - gen_exception_internal(EXCP_DEBUG); - } else { - gen_step_complete_exception(dc); - } + gen_step_complete_exception(dc); break; case DISAS_NORETURN: break; @@ -14904,17 +14320,17 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) switch (dc->base.is_jmp) { case DISAS_NEXT: case DISAS_TOO_MANY: - gen_goto_tb(dc, 1, dc->base.pc_next); + gen_goto_tb(dc, 1, 4); break; default: case DISAS_UPDATE_EXIT: - gen_a64_set_pc_im(dc->base.pc_next); + gen_a64_update_pc(dc, 4); /* fall through */ case DISAS_EXIT: tcg_gen_exit_tb(NULL, 0); break; case DISAS_UPDATE_NOCHAIN: - gen_a64_set_pc_im(dc->base.pc_next); + gen_a64_update_pc(dc, 4); /* fall through */ case DISAS_JUMP: tcg_gen_lookup_and_goto_ptr(); @@ -14923,40 +14339,37 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_SWI: break; case DISAS_WFE: - gen_a64_set_pc_im(dc->base.pc_next); - gen_helper_wfe(cpu_env); + gen_a64_update_pc(dc, 4); + gen_helper_wfe(tcg_env); break; case DISAS_YIELD: - gen_a64_set_pc_im(dc->base.pc_next); - gen_helper_yield(cpu_env); + gen_a64_update_pc(dc, 4); + gen_helper_yield(tcg_env); break; case DISAS_WFI: - { - /* This is a special case because we don't want to just halt the CPU - * if trying to debug across a WFI. + /* + * This is a special case because we don't want to just halt + * the CPU if trying to debug across a WFI. */ - TCGv_i32 tmp = tcg_const_i32(4); - - gen_a64_set_pc_im(dc->base.pc_next); - gen_helper_wfi(cpu_env, tmp); - tcg_temp_free_i32(tmp); - /* The helper doesn't necessarily throw an exception, but we + gen_a64_update_pc(dc, 4); + gen_helper_wfi(tcg_env, tcg_constant_i32(4)); + /* + * The helper doesn't necessarily throw an exception, but we * must go back to the main loop to check for interrupts anyway. */ tcg_gen_exit_tb(NULL, 0); break; } - } } } static void aarch64_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu) + CPUState *cpu, FILE *logfile) { DisasContext *dc = container_of(dcbase, DisasContext, base); - qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first)); - log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size); + fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); + target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); } const TranslatorOps aarch64_translator_ops = { diff --git a/target/arm/translate-a64.h b/target/arm/tcg/translate-a64.h index 58f50abca4..7b811b8ac5 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -18,18 +18,6 @@ #ifndef TARGET_ARM_TRANSLATE_A64_H #define TARGET_ARM_TRANSLATE_A64_H -#define unsupported_encoding(s, insn) \ - do { \ - qemu_log_mask(LOG_UNIMP, \ - "%s:%d: unsupported instruction encoding 0x%08x " \ - "at pc=%016" PRIx64 "\n", \ - __FILE__, __LINE__, insn, s->pc_curr); \ - unallocated_encoding(s); \ - } while (0) - -TCGv_i64 new_tmp_a64(DisasContext *s); -TCGv_i64 new_tmp_a64_local(DisasContext *s); -TCGv_i64 new_tmp_a64_zero(DisasContext *s); TCGv_i64 cpu_reg(DisasContext *s, int reg); TCGv_i64 cpu_reg_sp(DisasContext *s, int reg); TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf); @@ -38,11 +26,34 @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v); bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); bool sve_access_check(DisasContext *s); +bool sme_enabled_check(DisasContext *s); +bool sme_enabled_check_with_svcr(DisasContext *s, unsigned); +uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, + uint32_t msz, bool is_write, uint32_t data); + +/* This function corresponds to CheckStreamingSVEEnabled. */ +static inline bool sme_sm_enabled_check(DisasContext *s) +{ + return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK); +} + +/* This function corresponds to CheckSMEAndZAEnabled. */ +static inline bool sme_za_enabled_check(DisasContext *s) +{ + return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK); +} + +/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */ +static inline bool sme_smza_enabled_check(DisasContext *s) +{ + return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK); +} + TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int log2_size); + bool tag_checked, MemOp memop); TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int size); + bool tag_checked, int total_size, MemOp memop); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that @@ -71,7 +82,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno, { int element_size = 1 << size; int offs = element * element_size; -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN /* This is complicated slightly because vfp.zregs[n].d[0] is * still the lowest and vfp.zregs[n].d[15] the highest of the * 256 byte vector, even on big endian systems. @@ -106,17 +117,76 @@ static inline int vec_full_reg_offset(DisasContext *s, int regno) static inline TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno) { TCGv_ptr ret = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno)); + tcg_gen_addi_ptr(ret, tcg_env, vec_full_reg_offset(s, regno)); return ret; } /* Return the byte size of the "whole" vector register, VL / 8. */ static inline int vec_full_reg_size(DisasContext *s) { - return s->sve_len; + return s->vl; +} + +/* Return the byte size of the vector register, SVL / 8. */ +static inline int streaming_vec_reg_size(DisasContext *s) +{ + return s->svl; +} + +/* + * Return the offset info CPUARMState of the predicate vector register Pn. + * Note for this purpose, FFR is P16. + */ +static inline int pred_full_reg_offset(DisasContext *s, int regno) +{ + return offsetof(CPUARMState, vfp.pregs[regno]); +} + +/* Return the byte size of the whole predicate register, VL / 64. */ +static inline int pred_full_reg_size(DisasContext *s) +{ + return s->vl >> 3; +} + +/* Return the byte size of the predicate register, SVL / 64. */ +static inline int streaming_pred_reg_size(DisasContext *s) +{ + return s->svl >> 3; +} + +/* + * Round up the size of a register to a size allowed by + * the tcg vector infrastructure. Any operation which uses this + * size may assume that the bits above pred_full_reg_size are zero, + * and must leave them the same way. + * + * Note that this is not needed for the vector registers as they + * are always properly sized for tcg vectors. + */ +static inline int size_for_gvec(int size) +{ + if (size <= 8) { + return 8; + } else { + return QEMU_ALIGN_UP(size, 16); + } +} + +static inline int pred_gvec_reg_size(DisasContext *s) +{ + return size_for_gvec(pred_full_reg_size(s)); +} + +/* Return a newly allocated pointer to the predicate register. */ +static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno) +{ + TCGv_ptr ret = tcg_temp_new_ptr(); + tcg_gen_addi_ptr(ret, tcg_env, pred_full_reg_offset(s, regno)); + return ret; } bool disas_sve(DisasContext *, uint32_t); +bool disas_sme(DisasContext *, uint32_t); void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); @@ -124,4 +194,7 @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); +void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); + #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/target/arm/translate-m-nocp.c b/target/arm/tcg/translate-m-nocp.c index d9e144e8eb..f564d06ccf 100644 --- a/target/arm/translate-m-nocp.c +++ b/target/arm/tcg/translate-m-nocp.c @@ -18,8 +18,6 @@ */ #include "qemu/osdep.h" -#include "tcg/tcg-op.h" -#include "tcg/tcg-op-gvec.h" #include "translate.h" #include "translate-a32.h" @@ -87,11 +85,10 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) fptr = load_reg(s, a->rn); if (a->l) { - gen_helper_v7m_vlldm(cpu_env, fptr); + gen_helper_v7m_vlldm(tcg_env, fptr); } else { - gen_helper_v7m_vlstm(cpu_env, fptr); + gen_helper_v7m_vlstm(tcg_env, fptr); } - tcg_temp_free_i32(fptr); clear_eci_state(s); @@ -140,11 +137,11 @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a) tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK); tcg_gen_or_i32(sfpa, sfpa, aspen); arm_gen_condlabel(s); - tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel); + tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel.label); if (s->fp_excp_el != 0) { - gen_exception_insn(s, s->pc_curr, EXCP_NOCP, - syn_uncategorized(), s->fp_excp_el); + gen_exception_insn_el(s, 0, EXCP_NOCP, + syn_uncategorized(), s->fp_excp_el); return true; } @@ -173,7 +170,7 @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a) } /* Zero the Sregs from btmreg to topreg inclusive. */ - zero = tcg_const_i64(0); + zero = tcg_constant_i64(0); if (btmreg & 1) { write_neon_element64(zero, btmreg >> 1, 1, MO_32); btmreg++; @@ -187,8 +184,7 @@ static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a) } assert(btmreg == topreg + 1); if (dc_isar_feature(aa32_mve, s)) { - TCGv_i32 z32 = tcg_const_i32(0); - store_cpu_field(z32, v7m.vpr); + store_cpu_field(tcg_constant_i32(0), v7m.vpr); } clear_eci_state(s); @@ -304,8 +300,6 @@ static void gen_branch_fpInactive(DisasContext *s, TCGCond cond, tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK); tcg_gen_or_i32(fpca, fpca, aspen); tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label); - tcg_temp_free_i32(aspen); - tcg_temp_free_i32(fpca); } static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, @@ -328,8 +322,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, switch (regno) { case ARM_VFP_FPSCR: tmp = loadfn(s, opaque, true); - gen_helper_vfp_set_fpscr(cpu_env, tmp); - tcg_temp_free_i32(tmp); + gen_helper_vfp_set_fpscr(tcg_env, tmp); gen_lookup_tb(s); break; case ARM_VFP_FPSCR_NZCVQC: @@ -352,7 +345,6 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK); tcg_gen_or_i32(fpscr, fpscr, tmp); store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]); - tcg_temp_free_i32(tmp); break; } case ARM_VFP_FPCXT_NS: @@ -377,7 +369,7 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, if (!vfp_access_check_m(s, true)) { /* * This was only a conditional exception, so override - * gen_exception_insn()'s default to DISAS_NORETURN + * gen_exception_insn_el()'s default to DISAS_NORETURN */ s->base.is_jmp = DISAS_NEXT; break; @@ -399,10 +391,8 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, R_V7M_CONTROL_SFPA_SHIFT, 1); store_cpu_field(control, v7m.control[M_REG_S]); tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK); - gen_helper_vfp_set_fpscr(cpu_env, tmp); + gen_helper_vfp_set_fpscr(tcg_env, tmp); s->base.is_jmp = DISAS_UPDATE_NOCHAIN; - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(sfpa); break; } case ARM_VFP_VPR: @@ -424,7 +414,6 @@ static bool gen_M_fp_sysreg_write(DisasContext *s, int regno, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH); store_cpu_field(vpr, v7m.vpr); s->base.is_jmp = DISAS_UPDATE_NOCHAIN; - tcg_temp_free_i32(tmp); break; } default: @@ -462,12 +451,12 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, switch (regno) { case ARM_VFP_FPSCR: tmp = tcg_temp_new_i32(); - gen_helper_vfp_get_fpscr(tmp, cpu_env); + gen_helper_vfp_get_fpscr(tmp, tcg_env); storefn(s, opaque, tmp, true); break; case ARM_VFP_FPSCR_NZCVQC: tmp = tcg_temp_new_i32(); - gen_helper_vfp_get_fpscr(tmp, cpu_env); + gen_helper_vfp_get_fpscr(tmp, tcg_env); tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK); storefn(s, opaque, tmp, true); break; @@ -486,13 +475,12 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */ tmp = tcg_temp_new_i32(); sfpa = tcg_temp_new_i32(); - gen_helper_vfp_get_fpscr(tmp, cpu_env); + gen_helper_vfp_get_fpscr(tmp, tcg_env); tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK); control = load_cpu_field(v7m.control[M_REG_S]); tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK); tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT); tcg_gen_or_i32(tmp, tmp, sfpa); - tcg_temp_free_i32(sfpa); /* * Store result before updating FPSCR etc, in case * it is a memory write which causes an exception. @@ -505,21 +493,20 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK); store_cpu_field(control, v7m.control[M_REG_S]); fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]); - gen_helper_vfp_set_fpscr(cpu_env, fpscr); - tcg_temp_free_i32(fpscr); + gen_helper_vfp_set_fpscr(tcg_env, fpscr); lookup_tb = true; break; } case ARM_VFP_FPCXT_NS: { - TCGv_i32 control, sfpa, fpscr, fpdscr, zero; + TCGv_i32 control, sfpa, fpscr, fpdscr; TCGLabel *lab_active = gen_new_label(); lookup_tb = true; gen_branch_fpInactive(s, TCG_COND_EQ, lab_active); /* fpInactive case: reads as FPDSCR_NS */ - TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]); + tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]); storefn(s, opaque, tmp, true); lab_end = gen_new_label(); tcg_gen_br(lab_end); @@ -533,7 +520,7 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, if (!vfp_access_check_m(s, true)) { /* * This was only a conditional exception, so override - * gen_exception_insn()'s default to DISAS_NORETURN + * gen_exception_insn_el()'s default to DISAS_NORETURN */ s->base.is_jmp = DISAS_NEXT; break; @@ -541,24 +528,19 @@ static bool gen_M_fp_sysreg_read(DisasContext *s, int regno, tmp = tcg_temp_new_i32(); sfpa = tcg_temp_new_i32(); fpscr = tcg_temp_new_i32(); - gen_helper_vfp_get_fpscr(fpscr, cpu_env); + gen_helper_vfp_get_fpscr(fpscr, tcg_env); tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK); control = load_cpu_field(v7m.control[M_REG_S]); tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK); tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT); tcg_gen_or_i32(tmp, tmp, sfpa); - tcg_temp_free_i32(control); /* Store result before updating FPSCR, in case it faults */ storefn(s, opaque, tmp, true); /* If SFPA is zero then set FPSCR from FPDSCR_NS */ fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]); - zero = tcg_const_i32(0); - tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr); - gen_helper_vfp_set_fpscr(cpu_env, fpscr); - tcg_temp_free_i32(zero); - tcg_temp_free_i32(sfpa); - tcg_temp_free_i32(fpdscr); - tcg_temp_free_i32(fpscr); + tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, tcg_constant_i32(0), + fpdscr, fpscr); + gen_helper_vfp_set_fpscr(tcg_env, fpscr); break; } case ARM_VFP_VPR: @@ -600,7 +582,6 @@ static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value, if (a->rt == 15) { /* Set the 4 flag bits in the CPSR */ gen_set_nzcv(value); - tcg_temp_free_i32(value); } else { store_reg(s, a->rt, value); } @@ -662,13 +643,12 @@ static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value, } if (s->v8m_stackcheck && a->rn == 13 && a->w) { - gen_helper_v8m_stackcheck(cpu_env, addr); + gen_helper_v8m_stackcheck(tcg_env, addr); } if (do_access) { gen_aa32_st_i32(s, value, addr, get_mem_index(s), MO_UL | MO_ALIGN | s->be_data); - tcg_temp_free_i32(value); } if (a->w) { @@ -677,8 +657,6 @@ static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value, tcg_gen_addi_i32(addr, addr, offset); } store_reg(s, a->rn, addr); - } else { - tcg_temp_free_i32(addr); } } @@ -704,7 +682,7 @@ static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque, } if (s->v8m_stackcheck && a->rn == 13 && a->w) { - gen_helper_v8m_stackcheck(cpu_env, addr); + gen_helper_v8m_stackcheck(tcg_env, addr); } if (do_access) { @@ -719,8 +697,6 @@ static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque, tcg_gen_addi_i32(addr, addr, offset); } store_reg(s, a->rn, addr); - } else { - tcg_temp_free_i32(addr); } return value; } @@ -767,14 +743,13 @@ static bool trans_NOCP(DisasContext *s, arg_nocp *a) } if (a->cp != 10) { - gen_exception_insn(s, s->pc_curr, EXCP_NOCP, - syn_uncategorized(), default_exception_el(s)); + gen_exception_insn(s, 0, EXCP_NOCP, syn_uncategorized()); return true; } if (s->fp_excp_el != 0) { - gen_exception_insn(s, s->pc_curr, EXCP_NOCP, - syn_uncategorized(), s->fp_excp_el); + gen_exception_insn_el(s, 0, EXCP_NOCP, + syn_uncategorized(), s->fp_excp_el); return true; } diff --git a/target/arm/translate-mve.c b/target/arm/tcg/translate-mve.c index 4267d43cc7..b1a8d6a65c 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/tcg/translate-mve.c @@ -18,10 +18,6 @@ */ #include "qemu/osdep.h" -#include "tcg/tcg-op.h" -#include "tcg/tcg-op-gvec.h" -#include "exec/exec-all.h" -#include "exec/gen-icount.h" #include "translate.h" #include "translate-a32.h" @@ -60,7 +56,7 @@ static inline long mve_qreg_offset(unsigned reg) static TCGv_ptr mve_qreg_ptr(unsigned reg) { TCGv_ptr ret = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); + tcg_gen_addi_ptr(ret, tcg_env, mve_qreg_offset(reg)); return ret; } @@ -100,8 +96,7 @@ bool mve_eci_check(DisasContext *s) return true; default: /* Reserved value: INVSTATE UsageFault */ - gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), - default_exception_el(s)); + gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized()); return false; } } @@ -178,8 +173,7 @@ static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, } qreg = mve_qreg_ptr(a->qd); - fn(cpu_env, qreg, addr); - tcg_temp_free_ptr(qreg); + fn(tcg_env, qreg, addr); /* * Writeback always happens after the last beat of the insn, @@ -190,8 +184,6 @@ static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, tcg_gen_addi_i32(addr, addr, offset); } store_reg(s, a->rn, addr); - } else { - tcg_temp_free_i32(addr); } mve_update_eci(s); return true; @@ -242,10 +234,7 @@ static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) qd = mve_qreg_ptr(a->qd); qm = mve_qreg_ptr(a->qm); - fn(cpu_env, qd, qm, addr); - tcg_temp_free_ptr(qd); - tcg_temp_free_ptr(qm); - tcg_temp_free_i32(addr); + fn(tcg_env, qd, qm, addr); mve_update_eci(s); return true; } @@ -341,9 +330,7 @@ static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, qd = mve_qreg_ptr(a->qd); qm = mve_qreg_ptr(a->qm); - fn(cpu_env, qd, qm, tcg_constant_i32(offset)); - tcg_temp_free_ptr(qd); - tcg_temp_free_ptr(qm); + fn(tcg_env, qd, qm, tcg_constant_i32(offset)); mve_update_eci(s); return true; } @@ -410,13 +397,11 @@ static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, * We pass the index of Qd, not a pointer, because the helper must * access multiple Q registers starting at Qd and working up. */ - fn(cpu_env, tcg_constant_i32(a->qd), rn); + fn(tcg_env, tcg_constant_i32(a->qd), rn); if (a->w) { tcg_gen_addi_i32(rn, rn, addrinc); store_reg(s, a->rn, rn); - } else { - tcg_temp_free_i32(rn); } mve_update_and_store_eci(s); return true; @@ -506,10 +491,8 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a) } else { qd = mve_qreg_ptr(a->qd); tcg_gen_dup_i32(a->size, rt, rt); - gen_helper_mve_vdup(cpu_env, qd, rt); - tcg_temp_free_ptr(qd); + gen_helper_mve_vdup(tcg_env, qd, rt); } - tcg_temp_free_i32(rt); mve_update_eci(s); return true; } @@ -534,9 +517,7 @@ static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn, } else { qd = mve_qreg_ptr(a->qd); qm = mve_qreg_ptr(a->qm); - fn(cpu_env, qd, qm); - tcg_temp_free_ptr(qd); - tcg_temp_free_ptr(qm); + fn(tcg_env, qd, qm); } mve_update_eci(s); return true; @@ -603,7 +584,7 @@ DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, - enum arm_fprounding rmode, bool u) + ARMFPRounding rmode, bool u) { /* * Handle VCVT fp to int with specified rounding mode. @@ -631,9 +612,7 @@ static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, qd = mve_qreg_ptr(a->qd); qm = mve_qreg_ptr(a->qm); - fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); - tcg_temp_free_ptr(qd); - tcg_temp_free_ptr(qm); + fn(tcg_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); mve_update_eci(s); return true; } @@ -821,10 +800,7 @@ static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn, qd = mve_qreg_ptr(a->qd); qn = mve_qreg_ptr(a->qn); qm = mve_qreg_ptr(a->qm); - fn(cpu_env, qd, qn, qm); - tcg_temp_free_ptr(qd); - tcg_temp_free_ptr(qn); - tcg_temp_free_ptr(qm); + fn(tcg_env, qd, qn, qm); } mve_update_eci(s); return true; @@ -1076,10 +1052,7 @@ static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, qd = mve_qreg_ptr(a->qd); qn = mve_qreg_ptr(a->qn); rm = load_reg(s, a->rm); - fn(cpu_env, qd, qn, rm); - tcg_temp_free_i32(rm); - tcg_temp_free_ptr(qd); - tcg_temp_free_ptr(qn); + fn(tcg_env, qd, qn, rm); mve_update_eci(s); return true; } @@ -1173,7 +1146,7 @@ static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, MVEGenLongDualAccOpFn *fn) { TCGv_ptr qn, qm; - TCGv_i64 rda; + TCGv_i64 rda_i, rda_o; TCGv_i32 rdalo, rdahi; if (!dc_isar_feature(aa32_mve, s) || @@ -1200,28 +1173,24 @@ static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, * of an A=0 (no-accumulate) insn which does not execute the first * beat must start with the current rda value, not 0. */ + rda_o = tcg_temp_new_i64(); if (a->a || mve_skip_first_beat(s)) { - rda = tcg_temp_new_i64(); + rda_i = rda_o; rdalo = load_reg(s, a->rdalo); rdahi = load_reg(s, a->rdahi); - tcg_gen_concat_i32_i64(rda, rdalo, rdahi); - tcg_temp_free_i32(rdalo); - tcg_temp_free_i32(rdahi); + tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi); } else { - rda = tcg_const_i64(0); + rda_i = tcg_constant_i64(0); } - fn(rda, cpu_env, qn, qm, rda); - tcg_temp_free_ptr(qn); - tcg_temp_free_ptr(qm); + fn(rda_o, tcg_env, qn, qm, rda_i); rdalo = tcg_temp_new_i32(); rdahi = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(rdalo, rda); - tcg_gen_extrh_i64_i32(rdahi, rda); + tcg_gen_extrl_i64_i32(rdalo, rda_o); + tcg_gen_extrh_i64_i32(rdahi, rda_o); store_reg(s, a->rdalo, rdalo); store_reg(s, a->rdahi, rdahi); - tcg_temp_free_i64(rda); mve_update_eci(s); return true; } @@ -1286,7 +1255,7 @@ static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) { TCGv_ptr qn, qm; - TCGv_i32 rda; + TCGv_i32 rda_i, rda_o; if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qn) || @@ -1306,15 +1275,14 @@ static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) * beat must start with the current rda value, not 0. */ if (a->a || mve_skip_first_beat(s)) { - rda = load_reg(s, a->rda); + rda_o = rda_i = load_reg(s, a->rda); } else { - rda = tcg_const_i32(0); + rda_i = tcg_constant_i32(0); + rda_o = tcg_temp_new_i32(); } - fn(rda, cpu_env, qn, qm, rda); - store_reg(s, a->rda, rda); - tcg_temp_free_ptr(qn); - tcg_temp_free_ptr(qm); + fn(rda_o, tcg_env, qn, qm, rda_i); + store_reg(s, a->rda, rda_o); mve_update_eci(s); return true; @@ -1409,7 +1377,7 @@ static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) return true; } - gen_helper_mve_vpnot(cpu_env); + gen_helper_mve_vpnot(tcg_env); /* This insn updates predication bits */ s->base.is_jmp = DISAS_UPDATE_NOCHAIN; mve_update_eci(s); @@ -1426,7 +1394,7 @@ static bool trans_VADDV(DisasContext *s, arg_VADDV *a) { NULL, NULL } }; TCGv_ptr qm; - TCGv_i32 rda; + TCGv_i32 rda_i, rda_o; if (!dc_isar_feature(aa32_mve, s) || a->size == 3) { @@ -1443,16 +1411,16 @@ static bool trans_VADDV(DisasContext *s, arg_VADDV *a) */ if (a->a || mve_skip_first_beat(s)) { /* Accumulate input from Rda */ - rda = load_reg(s, a->rda); + rda_o = rda_i = load_reg(s, a->rda); } else { /* Accumulate starting at zero */ - rda = tcg_const_i32(0); + rda_i = tcg_constant_i32(0); + rda_o = tcg_temp_new_i32(); } qm = mve_qreg_ptr(a->qm); - fns[a->size][a->u](rda, cpu_env, qm, rda); - store_reg(s, a->rda, rda); - tcg_temp_free_ptr(qm); + fns[a->size][a->u](rda_o, tcg_env, qm, rda_i); + store_reg(s, a->rda, rda_o); mve_update_eci(s); return true; @@ -1467,7 +1435,7 @@ static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) * No need to check Qm's bank: it is only 3 bits in decode. */ TCGv_ptr qm; - TCGv_i64 rda; + TCGv_i64 rda_i, rda_o; TCGv_i32 rdalo, rdahi; if (!dc_isar_feature(aa32_mve, s)) { @@ -1489,34 +1457,31 @@ static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) * of an A=0 (no-accumulate) insn which does not execute the first * beat must start with the current value of RdaHi:RdaLo, not zero. */ + rda_o = tcg_temp_new_i64(); if (a->a || mve_skip_first_beat(s)) { /* Accumulate input from RdaHi:RdaLo */ - rda = tcg_temp_new_i64(); + rda_i = rda_o; rdalo = load_reg(s, a->rdalo); rdahi = load_reg(s, a->rdahi); - tcg_gen_concat_i32_i64(rda, rdalo, rdahi); - tcg_temp_free_i32(rdalo); - tcg_temp_free_i32(rdahi); + tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi); } else { /* Accumulate starting at zero */ - rda = tcg_const_i64(0); + rda_i = tcg_constant_i64(0); } qm = mve_qreg_ptr(a->qm); if (a->u) { - gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); + gen_helper_mve_vaddlv_u(rda_o, tcg_env, qm, rda_i); } else { - gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); + gen_helper_mve_vaddlv_s(rda_o, tcg_env, qm, rda_i); } - tcg_temp_free_ptr(qm); rdalo = tcg_temp_new_i32(); rdahi = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(rdalo, rda); - tcg_gen_extrh_i64_i32(rdahi, rda); + tcg_gen_extrl_i64_i32(rdalo, rda_o); + tcg_gen_extrh_i64_i32(rdahi, rda_o); store_reg(s, a->rdalo, rdalo); store_reg(s, a->rdahi, rdahi); - tcg_temp_free_i64(rda); mve_update_eci(s); return true; } @@ -1543,8 +1508,7 @@ static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn, imm, 16, 16); } else { qd = mve_qreg_ptr(a->qd); - fn(cpu_env, qd, tcg_constant_i64(imm)); - tcg_temp_free_ptr(qd); + fn(tcg_env, qd, tcg_constant_i64(imm)); } mve_update_eci(s); return true; @@ -1616,9 +1580,7 @@ static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, } else { qd = mve_qreg_ptr(a->qd); qm = mve_qreg_ptr(a->qm); - fn(cpu_env, qd, qm, tcg_constant_i32(shift)); - tcg_temp_free_ptr(qd); - tcg_temp_free_ptr(qm); + fn(tcg_env, qd, qm, tcg_constant_i32(shift)); } mve_update_eci(s); return true; @@ -1723,9 +1685,7 @@ static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, qda = mve_qreg_ptr(a->qda); rm = load_reg(s, a->rm); - fn(cpu_env, qda, qda, rm); - tcg_temp_free_ptr(qda); - tcg_temp_free_i32(rm); + fn(tcg_env, qda, qda, rm); mve_update_eci(s); return true; } @@ -1867,9 +1827,8 @@ static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) qd = mve_qreg_ptr(a->qd); rdm = load_reg(s, a->rdm); - gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); + gen_helper_mve_vshlc(rdm, tcg_env, qd, rdm, tcg_constant_i32(a->imm)); store_reg(s, a->rdm, rdm); - tcg_temp_free_ptr(qd); mve_update_eci(s); return true; } @@ -1897,9 +1856,8 @@ static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) qd = mve_qreg_ptr(a->qd); rn = load_reg(s, a->rn); - fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); + fn(rn, tcg_env, qd, rn, tcg_constant_i32(a->imm)); store_reg(s, a->rn, rn); - tcg_temp_free_ptr(qd); mve_update_eci(s); return true; } @@ -1933,10 +1891,8 @@ static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) qd = mve_qreg_ptr(a->qd); rn = load_reg(s, a->rn); rm = load_reg(s, a->rm); - fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); + fn(rn, tcg_env, qd, rn, rm, tcg_constant_i32(a->imm)); store_reg(s, a->rn, rn); - tcg_temp_free_ptr(qd); - tcg_temp_free_i32(rm); mve_update_eci(s); return true; } @@ -2001,9 +1957,7 @@ static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) qn = mve_qreg_ptr(a->qn); qm = mve_qreg_ptr(a->qm); - fn(cpu_env, qn, qm); - tcg_temp_free_ptr(qn); - tcg_temp_free_ptr(qm); + fn(tcg_env, qn, qm); if (a->mask) { /* VPT */ gen_vpst(s, a->mask); @@ -2034,9 +1988,7 @@ static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, } else { rm = load_reg(s, a->rm); } - fn(cpu_env, qn, rm); - tcg_temp_free_ptr(qn); - tcg_temp_free_i32(rm); + fn(tcg_env, qn, rm); if (a->mask) { /* VPT */ gen_vpst(s, a->mask); @@ -2137,9 +2089,8 @@ static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) qm = mve_qreg_ptr(a->qm); rda = load_reg(s, a->rda); - fn(rda, cpu_env, qm, rda); + fn(rda, tcg_env, qm, rda); store_reg(s, a->rda, rda); - tcg_temp_free_ptr(qm); mve_update_eci(s); return true; } @@ -2202,10 +2153,8 @@ static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) qm = mve_qreg_ptr(a->qm); qn = mve_qreg_ptr(a->qn); rda = load_reg(s, a->rda); - fn(rda, cpu_env, qn, qm, rda); + fn(rda, tcg_env, qn, qm, rda); store_reg(s, a->rda, rda); - tcg_temp_free_ptr(qm); - tcg_temp_free_ptr(qn); mve_update_eci(s); return true; } @@ -2233,7 +2182,7 @@ static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) * execution if it is not in an IT block. For us this means * only that if PSR.ECI says we should not be executing the beat * corresponding to the lane of the vector register being accessed - * then we should skip perfoming the move, and that we need to do + * then we should skip performing the move, and that we need to do * the usual check for bad ECI state and advance of ECI state. * (If PSR.ECI is non-zero then we cannot be in an IT block.) */ @@ -2276,7 +2225,7 @@ static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) * execution if it is not in an IT block. For us this means * only that if PSR.ECI says we should not be executing the beat * corresponding to the lane of the vector register being accessed - * then we should skip perfoming the move, and that we need to do + * then we should skip performing the move, and that we need to do * the usual check for bad ECI state and advance of ECI state. * (If PSR.ECI is non-zero then we cannot be in an IT block.) */ @@ -2298,12 +2247,10 @@ static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { tmp = load_reg(s, a->rt); write_neon_element32(tmp, vd, a->idx, MO_32); - tcg_temp_free_i32(tmp); } if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { tmp = load_reg(s, a->rt2); write_neon_element32(tmp, vd + 1, a->idx, MO_32); - tcg_temp_free_i32(tmp); } mve_update_and_store_eci(s); diff --git a/target/arm/translate-neon.c b/target/arm/tcg/translate-neon.c index dd43de558e..144f18ba22 100644 --- a/target/arm/translate-neon.c +++ b/target/arm/tcg/translate-neon.c @@ -21,10 +21,6 @@ */ #include "qemu/osdep.h" -#include "tcg/tcg-op.h" -#include "tcg/tcg-op-gvec.h" -#include "exec/exec-all.h" -#include "exec/gen-icount.h" #include "translate.h" #include "translate-a32.h" @@ -36,7 +32,7 @@ static TCGv_ptr vfp_reg_ptr(bool dp, int reg) { TCGv_ptr ret = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg)); + tcg_gen_addi_ptr(ret, tcg_env, vfp_reg_offset(dp, reg)); return ret; } @@ -46,13 +42,13 @@ static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop) switch (mop) { case MO_UB: - tcg_gen_ld8u_i32(var, cpu_env, offset); + tcg_gen_ld8u_i32(var, tcg_env, offset); break; case MO_UW: - tcg_gen_ld16u_i32(var, cpu_env, offset); + tcg_gen_ld16u_i32(var, tcg_env, offset); break; case MO_UL: - tcg_gen_ld_i32(var, cpu_env, offset); + tcg_gen_ld_i32(var, tcg_env, offset); break; default: g_assert_not_reached(); @@ -65,16 +61,16 @@ static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop) switch (mop) { case MO_UB: - tcg_gen_ld8u_i64(var, cpu_env, offset); + tcg_gen_ld8u_i64(var, tcg_env, offset); break; case MO_UW: - tcg_gen_ld16u_i64(var, cpu_env, offset); + tcg_gen_ld16u_i64(var, tcg_env, offset); break; case MO_UL: - tcg_gen_ld32u_i64(var, cpu_env, offset); + tcg_gen_ld32u_i64(var, tcg_env, offset); break; - case MO_Q: - tcg_gen_ld_i64(var, cpu_env, offset); + case MO_UQ: + tcg_gen_ld_i64(var, tcg_env, offset); break; default: g_assert_not_reached(); @@ -87,13 +83,13 @@ static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var) switch (size) { case MO_8: - tcg_gen_st8_i32(var, cpu_env, offset); + tcg_gen_st8_i32(var, tcg_env, offset); break; case MO_16: - tcg_gen_st16_i32(var, cpu_env, offset); + tcg_gen_st16_i32(var, tcg_env, offset); break; case MO_32: - tcg_gen_st_i32(var, cpu_env, offset); + tcg_gen_st_i32(var, tcg_env, offset); break; default: g_assert_not_reached(); @@ -106,16 +102,16 @@ static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var) switch (size) { case MO_8: - tcg_gen_st8_i64(var, cpu_env, offset); + tcg_gen_st8_i64(var, tcg_env, offset); break; case MO_16: - tcg_gen_st16_i64(var, cpu_env, offset); + tcg_gen_st16_i64(var, tcg_env, offset); break; case MO_32: - tcg_gen_st32_i64(var, cpu_env, offset); + tcg_gen_st32_i64(var, tcg_env, offset); break; case MO_64: - tcg_gen_st_i64(var, cpu_env, offset); + tcg_gen_st_i64(var, tcg_env, offset); break; default: g_assert_not_reached(); @@ -182,7 +178,6 @@ static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm, vfp_reg_offset(1, vm), vfp_reg_offset(1, vd), fpst, opr_sz, opr_sz, data, fn_gvec_ptr); - tcg_temp_free_ptr(fpst); return true; } @@ -236,7 +231,6 @@ static bool trans_VCADD(DisasContext *s, arg_VCADD *a) vfp_reg_offset(1, a->vm), fpst, opr_sz, opr_sz, a->rot, fn_gvec_ptr); - tcg_temp_free_ptr(fpst); return true; } @@ -302,7 +296,7 @@ static bool trans_VFML(DisasContext *s, arg_VFML *a) tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), vfp_reg_offset(a->q, a->vn), vfp_reg_offset(a->q, a->vm), - cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ + tcg_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ gen_helper_gvec_fmlal_a32); return true; } @@ -396,7 +390,7 @@ static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), vfp_reg_offset(a->q, a->vn), vfp_reg_offset(a->q, a->rm), - cpu_env, opr_sz, opr_sz, + tcg_env, opr_sz, opr_sz, (a->index << 2) | a->s, /* is_2 == 0 */ gen_helper_gvec_fmlal_idx_a32); return true; @@ -433,7 +427,6 @@ static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn, TCGv_i32 index; index = load_reg(s, rm); tcg_gen_add_i32(base, base, index); - tcg_temp_free_i32(index); } store_reg(s, rn, base); } @@ -447,7 +440,7 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) int mmu_idx = get_mem_index(s); int size = a->size; TCGv_i64 tmp64; - TCGv_i32 addr, tmp; + TCGv_i32 addr; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -513,7 +506,6 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) tmp64 = tcg_temp_new_i64(); addr = tcg_temp_new_i32(); - tmp = tcg_const_i32(1 << size); load_reg_var(s, addr, a->rn); mop = endian | size | align; @@ -530,16 +522,13 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) neon_load_element64(tmp64, tt, n, size); gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop); } - tcg_gen_add_i32(addr, addr, tmp); + tcg_gen_addi_i32(addr, addr, 1 << size); /* Subsequent memory operations inherit alignment */ mop &= ~MO_AMASK; } } } - tcg_temp_free_i32(addr); - tcg_temp_free_i32(tmp); - tcg_temp_free_i64(tmp64); gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8); return true; @@ -586,7 +575,11 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) case 3: return false; case 4: - align = pow2_align(size + 2); + if (size == 2) { + align = pow2_align(3); + } else { + align = pow2_align(size + 2); + } break; default: g_assert_not_reached(); @@ -628,8 +621,6 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) /* Subsequent memory operations inherit alignment */ mop &= ~MO_AMASK; } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(addr); gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs); @@ -657,28 +648,31 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) /* Catch the UNDEF cases. This is unavoidably a bit messy. */ switch (nregs) { case 1: + if (a->stride != 1) { + return false; + } if (((a->align & (1 << a->size)) != 0) || (a->size == 2 && (a->align == 1 || a->align == 2))) { return false; } break; - case 3: - if ((a->align & 1) != 0) { - return false; - } - /* fall through */ case 2: if (a->size == 2 && (a->align & 2) != 0) { return false; } break; + case 3: + if (a->align != 0) { + return false; + } + break; case 4: if (a->size == 2 && a->align == 3) { return false; } break; default: - abort(); + g_assert_not_reached(); } if ((vd + a->stride * (nregs - 1)) > 31) { /* @@ -746,8 +740,6 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) /* Subsequent memory operations inherit alignment */ mop &= ~MO_AMASK; } - tcg_temp_free_i32(addr); - tcg_temp_free_i32(tmp); gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs); @@ -928,7 +920,7 @@ DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) #define DO_3SAME_64_ENV(INSN, FUNC) \ static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ { \ - FUNC(d, cpu_env, n, m); \ + FUNC(d, tcg_env, n, m); \ } \ DO_3SAME_64(INSN, gen_##INSN##_elt) @@ -961,7 +953,7 @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) } /* - * Some helper functions need to be passed the cpu_env. In order + * Some helper functions need to be passed the tcg_env. In order * to use those with the gvec APIs like tcg_gen_gvec_3() we need * to create wrapper functions whose prototype is a NeonGenTwoOpFn() * and which call a NeonGenTwoOpEnvFn(). @@ -969,7 +961,7 @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) #define WRAP_ENV_FN(WRAPNAME, FUNC) \ static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ { \ - FUNC(d, cpu_env, n, m); \ + FUNC(d, tcg_env, n, m); \ } #define DO_3SAME_32_ENV(INSN, FUNC) \ @@ -1056,9 +1048,6 @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) write_neon_element32(tmp, a->vd, 0, MO_32); write_neon_element32(tmp3, a->vd, 1, MO_32); - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp3); return true; } @@ -1121,7 +1110,6 @@ DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) TCGv_ptr fpst = fpstatus_ptr(FPST); \ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \ oprsz, maxsz, 0, FUNC); \ - tcg_temp_free_ptr(fpst); \ } #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \ @@ -1220,7 +1208,6 @@ static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, vfp_reg_offset(1, a->vn), vfp_reg_offset(1, a->vm), fpstatus, 8, 8, 0, fn); - tcg_temp_free_ptr(fpstatus); return true; } @@ -1318,7 +1305,7 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, { /* * 2-reg-and-shift operations, size == 3 case, where the - * function needs to be passed cpu_env. + * function needs to be passed tcg_env. */ TCGv_i64 constimm; int pass; @@ -1345,17 +1332,15 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a, * To avoid excessive duplication of ops we implement shift * by immediate using the variable shift operations. */ - constimm = tcg_const_i64(dup_const(a->size, a->shift)); + constimm = tcg_constant_i64(dup_const(a->size, a->shift)); for (pass = 0; pass < a->q + 1; pass++) { TCGv_i64 tmp = tcg_temp_new_i64(); read_neon_element64(tmp, a->vm, pass, MO_64); - fn(tmp, cpu_env, tmp, constimm); + fn(tmp, tcg_env, tmp, constimm); write_neon_element64(tmp, a->vd, pass, MO_64); - tcg_temp_free_i64(tmp); } - tcg_temp_free_i64(constimm); return true; } @@ -1364,7 +1349,7 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, { /* * 2-reg-and-shift operations, size < 3 case, where the - * helper needs to be passed cpu_env. + * helper needs to be passed tcg_env. */ TCGv_i32 constimm, tmp; int pass; @@ -1391,16 +1376,14 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, * To avoid excessive duplication of ops we implement shift * by immediate using the variable shift operations. */ - constimm = tcg_const_i32(dup_const(a->size, a->shift)); + constimm = tcg_constant_i32(dup_const(a->size, a->shift)); tmp = tcg_temp_new_i32(); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { read_neon_element32(tmp, a->vm, pass, MO_32); - fn(tmp, cpu_env, tmp, constimm); + fn(tmp, tcg_env, tmp, constimm); write_neon_element32(tmp, a->vd, pass, MO_32); } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(constimm); return true; } @@ -1454,7 +1437,7 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, * This is always a right shift, and the shiftfn is always a * left-shift helper, which thus needs the negated shift count. */ - constimm = tcg_const_i64(-a->shift); + constimm = tcg_constant_i64(-a->shift); rm1 = tcg_temp_new_i64(); rm2 = tcg_temp_new_i64(); rd = tcg_temp_new_i32(); @@ -1464,18 +1447,13 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, read_neon_element64(rm2, a->vm, 1, MO_64); shiftfn(rm1, rm1, constimm); - narrowfn(rd, cpu_env, rm1); + narrowfn(rd, tcg_env, rm1); write_neon_element32(rd, a->vd, 0, MO_32); shiftfn(rm2, rm2, constimm); - narrowfn(rd, cpu_env, rm2); + narrowfn(rd, tcg_env, rm2); write_neon_element32(rd, a->vd, 1, MO_32); - tcg_temp_free_i32(rd); - tcg_temp_free_i64(rm1); - tcg_temp_free_i64(rm2); - tcg_temp_free_i64(constimm); - return true; } @@ -1518,7 +1496,7 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, /* size == 2 */ imm = -a->shift; } - constimm = tcg_const_i32(imm); + constimm = tcg_constant_i32(imm); /* Load all inputs first to avoid potential overwrite */ rm1 = tcg_temp_new_i32(); @@ -1535,23 +1513,17 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, shiftfn(rm2, rm2, constimm); tcg_gen_concat_i32_i64(rtmp, rm1, rm2); - tcg_temp_free_i32(rm2); - narrowfn(rm1, cpu_env, rtmp); + narrowfn(rm1, tcg_env, rtmp); write_neon_element32(rm1, a->vd, 0, MO_32); - tcg_temp_free_i32(rm1); shiftfn(rm3, rm3, constimm); shiftfn(rm4, rm4, constimm); - tcg_temp_free_i32(constimm); tcg_gen_concat_i32_i64(rtmp, rm3, rm4); - tcg_temp_free_i32(rm4); - narrowfn(rm3, cpu_env, rtmp); - tcg_temp_free_i64(rtmp); + narrowfn(rm3, tcg_env, rtmp); write_neon_element32(rm3, a->vd, 1, MO_32); - tcg_temp_free_i32(rm3); return true; } @@ -1659,7 +1631,6 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, tmp = tcg_temp_new_i64(); widenfn(tmp, rm0); - tcg_temp_free_i32(rm0); if (a->shift != 0) { tcg_gen_shli_i64(tmp, tmp, a->shift); tcg_gen_andi_i64(tmp, tmp, ~widen_mask); @@ -1667,13 +1638,11 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, write_neon_element64(tmp, a->vd, 0, MO_64); widenfn(tmp, rm1); - tcg_temp_free_i32(rm1); if (a->shift != 0) { tcg_gen_shli_i64(tmp, tmp, a->shift); tcg_gen_andi_i64(tmp, tmp, ~widen_mask); } write_neon_element64(tmp, a->vd, 1, MO_64); - tcg_temp_free_i64(tmp); return true; } @@ -1732,7 +1701,6 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a, fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn); - tcg_temp_free_ptr(fpst); return true; } @@ -1830,7 +1798,7 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, return false; } - if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) { + if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) { return false; } @@ -1848,7 +1816,6 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, TCGv_i32 tmp = tcg_temp_new_i32(); read_neon_element32(tmp, a->vn, 0, MO_32); widenfn(rn0_64, tmp); - tcg_temp_free_i32(tmp); } if (src2_mop >= 0) { read_neon_element64(rm_64, a->vm, 0, src2_mop); @@ -1856,7 +1823,6 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, TCGv_i32 tmp = tcg_temp_new_i32(); read_neon_element32(tmp, a->vm, 0, MO_32); widenfn(rm_64, tmp); - tcg_temp_free_i32(tmp); } opfn(rn0_64, rn0_64, rm_64); @@ -1871,7 +1837,6 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, TCGv_i32 tmp = tcg_temp_new_i32(); read_neon_element32(tmp, a->vn, 1, MO_32); widenfn(rn1_64, tmp); - tcg_temp_free_i32(tmp); } if (src2_mop >= 0) { read_neon_element64(rm_64, a->vm, 1, src2_mop); @@ -1879,7 +1844,6 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, TCGv_i32 tmp = tcg_temp_new_i32(); read_neon_element32(tmp, a->vm, 1, MO_32); widenfn(rm_64, tmp); - tcg_temp_free_i32(tmp); } write_neon_element64(rn0_64, a->vd, 0, MO_64); @@ -1887,10 +1851,6 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, opfn(rn1_64, rn1_64, rm_64); write_neon_element64(rn1_64, a->vd, 1, MO_64); - tcg_temp_free_i64(rn0_64); - tcg_temp_free_i64(rn1_64); - tcg_temp_free_i64(rm_64); - return true; } @@ -1910,7 +1870,7 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, }; \ int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \ return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \ - SRC1WIDE ? MO_Q : narrow_mop, \ + SRC1WIDE ? MO_UQ : narrow_mop, \ narrow_mop); \ } @@ -1975,11 +1935,6 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a, write_neon_element32(rd0, a->vd, 0, MO_32); write_neon_element32(rd1, a->vd, 1, MO_32); - tcg_temp_free_i32(rd0); - tcg_temp_free_i32(rd1); - tcg_temp_free_i64(rn_64); - tcg_temp_free_i64(rm_64); - return true; } @@ -2060,8 +2015,6 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a, read_neon_element32(rn, a->vn, 1, MO_32); read_neon_element32(rm, a->vm, 1, MO_32); opfn(rd1, rn, rm); - tcg_temp_free_i32(rn); - tcg_temp_free_i32(rm); /* Don't store results until after all loads: they might overlap */ if (accfn) { @@ -2070,13 +2023,10 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a, accfn(rd0, tmp, rd0); read_neon_element64(tmp, a->vd, 1, MO_64); accfn(rd1, tmp, rd1); - tcg_temp_free_i64(tmp); } write_neon_element64(rd0, a->vd, 0, MO_64); write_neon_element64(rd1, a->vd, 1, MO_64); - tcg_temp_free_i64(rd0); - tcg_temp_free_i64(rd1); return true; } @@ -2148,9 +2098,6 @@ static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) tcg_gen_muls2_i32(lo, hi, rn, rm); tcg_gen_concat_i32_i64(rd, lo, hi); - - tcg_temp_free_i32(lo); - tcg_temp_free_i32(hi); } static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) @@ -2160,9 +2107,6 @@ static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) tcg_gen_mulu2_i32(lo, hi, rn, rm); tcg_gen_concat_i32_i64(rd, lo, hi); - - tcg_temp_free_i32(lo); - tcg_temp_free_i32(hi); } static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a) @@ -2215,13 +2159,13 @@ DO_VMLAL(VMLSL_U,mull_u,sub) static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) { gen_helper_neon_mull_s16(rd, rn, rm); - gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd); + gen_helper_neon_addl_saturate_s32(rd, tcg_env, rd, rd); } static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm) { gen_mull_s32(rd, rn, rm); - gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd); + gen_helper_neon_addl_saturate_s64(rd, tcg_env, rd, rd); } static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) @@ -2238,12 +2182,12 @@ static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a) static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) { - gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); + gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm); } static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) { - gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); + gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm); } static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) @@ -2267,13 +2211,13 @@ static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a) static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) { gen_helper_neon_negl_u32(rm, rm); - gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm); + gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm); } static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm) { tcg_gen_neg_i64(rm, rm); - gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm); + gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm); } static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a) @@ -2343,7 +2287,6 @@ static void gen_neon_dup_low16(TCGv_i32 var) tcg_gen_ext16u_i32(var, var); tcg_gen_shli_i32(tmp, var, 16); tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); } static void gen_neon_dup_high16(TCGv_i32 var) @@ -2352,7 +2295,6 @@ static void gen_neon_dup_high16(TCGv_i32 var) tcg_gen_andi_i32(var, var, 0xffff0000); tcg_gen_shri_i32(tmp, var, 16); tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); } static inline TCGv_i32 neon_get_scalar(int size, int reg) @@ -2416,12 +2358,9 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a, TCGv_i32 rd = tcg_temp_new_i32(); read_neon_element32(rd, a->vd, pass, MO_32); accfn(tmp, rd, tmp); - tcg_temp_free_i32(rd); } write_neon_element32(tmp, a->vd, pass, MO_32); } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(scalar); return true; } @@ -2515,7 +2454,6 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a, fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD); tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus, vec_size, vec_size, idx, fn); - tcg_temp_free_ptr(fpstatus); return true; } @@ -2612,13 +2550,9 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, for (pass = 0; pass < (a->q ? 4 : 2); pass++) { read_neon_element32(rn, a->vn, pass, MO_32); read_neon_element32(rd, a->vd, pass, MO_32); - opfn(rd, cpu_env, rn, scalar, rd); + opfn(rd, tcg_env, rn, scalar, rd); write_neon_element32(rd, a->vd, pass, MO_32); } - tcg_temp_free_i32(rn); - tcg_temp_free_i32(rd); - tcg_temp_free_i32(scalar); - return true; } @@ -2691,8 +2625,6 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, read_neon_element32(rn, a->vn, 1, MO_32); rn1_64 = tcg_temp_new_i64(); opfn(rn1_64, rn, scalar); - tcg_temp_free_i32(rn); - tcg_temp_free_i32(scalar); if (accfn) { TCGv_i64 t64 = tcg_temp_new_i64(); @@ -2700,13 +2632,10 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, accfn(rn0_64, t64, rn0_64); read_neon_element64(t64, a->vd, 1, MO_64); accfn(rn1_64, t64, rn1_64); - tcg_temp_free_i64(t64); } write_neon_element64(rn0_64, a->vd, 0, MO_64); write_neon_element64(rn1_64, a->vd, 1, MO_64); - tcg_temp_free_i64(rn0_64); - tcg_temp_free_i64(rn1_64); return true; } @@ -2841,10 +2770,6 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a) read_neon_element64(left, a->vm, 0, MO_64); tcg_gen_extract2_i64(dest, right, left, a->imm * 8); write_neon_element64(dest, a->vd, 0, MO_64); - - tcg_temp_free_i64(left); - tcg_temp_free_i64(right); - tcg_temp_free_i64(dest); } else { /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */ TCGv_i64 left, middle, right, destleft, destright; @@ -2871,12 +2796,6 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a) write_neon_element64(destright, a->vd, 0, MO_64); write_neon_element64(destleft, a->vd, 1, MO_64); - - tcg_temp_free_i64(destright); - tcg_temp_free_i64(destleft); - tcg_temp_free_i64(right); - tcg_temp_free_i64(middle); - tcg_temp_free_i64(left); } return true; } @@ -2908,7 +2827,7 @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a) return true; } - desc = tcg_const_i32((a->vn << 2) | a->len); + desc = tcg_constant_i32((a->vn << 2) | a->len); def = tcg_temp_new_i64(); if (a->op) { read_neon_element64(def, a->vd, 0, MO_64); @@ -2918,12 +2837,8 @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a) val = tcg_temp_new_i64(); read_neon_element64(val, a->vm, 0, MO_64); - gen_helper_neon_tbl(val, cpu_env, desc, val, def); + gen_helper_neon_tbl(val, tcg_env, desc, val, def); write_neon_element64(val, a->vd, 0, MO_64); - - tcg_temp_free_i64(def); - tcg_temp_free_i64(val); - tcg_temp_free_i32(desc); return true; } @@ -3002,9 +2917,6 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) write_neon_element32(tmp[1], a->vd, pass * 2, MO_32); write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32); } - - tcg_temp_free_i32(tmp[0]); - tcg_temp_free_i32(tmp[1]); return true; } @@ -3055,20 +2967,15 @@ static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, widenfn(rm0_64, tmp); read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32); widenfn(rm1_64, tmp); - tcg_temp_free_i32(tmp); opfn(rd_64, rm0_64, rm1_64); - tcg_temp_free_i64(rm0_64); - tcg_temp_free_i64(rm1_64); if (accfn) { TCGv_i64 tmp64 = tcg_temp_new_i64(); read_neon_element64(tmp64, a->vd, pass, MO_64); accfn(rd_64, tmp64, rd_64); - tcg_temp_free_i64(tmp64); } write_neon_element64(rd_64, a->vd, pass, MO_64); - tcg_temp_free_i64(rd_64); } return true; } @@ -3192,8 +3099,6 @@ static bool do_zip_uzp(DisasContext *s, arg_2misc *a, pd = vfp_reg_ptr(true, a->vd); pm = vfp_reg_ptr(true, a->vm); fn(pd, pm); - tcg_temp_free_ptr(pd); - tcg_temp_free_ptr(pm); return true; } @@ -3266,14 +3171,11 @@ static bool do_vmovn(DisasContext *s, arg_2misc *a, rd1 = tcg_temp_new_i32(); read_neon_element64(rm, a->vm, 0, MO_64); - narrowfn(rd0, cpu_env, rm); + narrowfn(rd0, tcg_env, rm); read_neon_element64(rm, a->vm, 1, MO_64); - narrowfn(rd1, cpu_env, rm); + narrowfn(rd1, tcg_env, rm); write_neon_element32(rd0, a->vd, 0, MO_32); write_neon_element32(rd1, a->vd, 1, MO_32); - tcg_temp_free_i32(rd0); - tcg_temp_free_i32(rd1); - tcg_temp_free_i64(rm); return true; } @@ -3341,10 +3243,6 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a) widenfn(rd, rm1); tcg_gen_shli_i64(rd, rd, 8 << a->size); write_neon_element64(rd, a->vd, 1, MO_64); - - tcg_temp_free_i64(rd); - tcg_temp_free_i32(rm0); - tcg_temp_free_i32(rm1); return true; } @@ -3385,11 +3283,6 @@ static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a) write_neon_element32(dst0, a->vd, 0, MO_32); write_neon_element32(dst1, a->vd, 1, MO_32); - - tcg_temp_free_i64(tmp); - tcg_temp_free_i32(dst0); - tcg_temp_free_i32(dst1); - tcg_temp_free_ptr(fpst); return true; } @@ -3432,16 +3325,10 @@ static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) tmp3 = tcg_temp_new_i32(); read_neon_element32(tmp3, a->vm, 3, MO_32); write_neon_element32(tmp2, a->vd, 0, MO_32); - tcg_temp_free_i32(tmp2); gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp); tcg_gen_shli_i32(tmp3, tmp3, 16); tcg_gen_or_i32(tmp3, tmp3, tmp); write_neon_element32(tmp3, a->vd, 1, MO_32); - tcg_temp_free_i32(tmp3); - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(ahp); - tcg_temp_free_ptr(fpst); - return true; } @@ -3482,18 +3369,12 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) tcg_gen_shri_i32(tmp, tmp, 16); gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp); write_neon_element32(tmp, a->vd, 1, MO_32); - tcg_temp_free_i32(tmp); tcg_gen_ext16u_i32(tmp3, tmp2); gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp); write_neon_element32(tmp3, a->vd, 2, MO_32); - tcg_temp_free_i32(tmp3); tcg_gen_shri_i32(tmp2, tmp2, 16); gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp); write_neon_element32(tmp2, a->vd, 3, MO_32); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(ahp); - tcg_temp_free_ptr(fpst); - return true; } @@ -3570,9 +3451,9 @@ static bool trans_VMVN(DisasContext *s, arg_2misc *a) } WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0) -WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1) +WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aesd, 0) WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0) -WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1) +WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesimc, 0) WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0) WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0) WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0) @@ -3628,8 +3509,6 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) fn(tmp, tmp); write_neon_element32(tmp, a->vd, pass, MO_32); } - tcg_temp_free_i32(tmp); - return true; } @@ -3746,7 +3625,7 @@ static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a) #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \ static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \ { \ - FUNC(d, cpu_env, m); \ + FUNC(d, tcg_env, m); \ } WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8) @@ -3790,7 +3669,6 @@ static bool trans_VQNEG(DisasContext *s, arg_2misc *a) fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \ tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \ fns[vece]); \ - tcg_temp_free_ptr(fpst); \ } \ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ { \ @@ -3841,7 +3719,6 @@ static bool trans_VRINTX(DisasContext *s, arg_2misc *a) fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \ tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \ arm_rmode_to_sf(RMODE), fns[vece]); \ - tcg_temp_free_ptr(fpst); \ } \ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \ { \ @@ -3908,11 +3785,9 @@ static bool trans_VSWP(DisasContext *s, arg_2misc *a) write_neon_element64(rm, a->vd, pass, MO_64); write_neon_element64(rd, a->vm, pass, MO_64); } - tcg_temp_free_i64(rm); - tcg_temp_free_i64(rd); - return true; } + static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1) { TCGv_i32 rd, tmp; @@ -3930,9 +3805,6 @@ static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1) tcg_gen_andi_i32(tmp, t0, 0xff00ff00); tcg_gen_or_i32(t1, t1, tmp); tcg_gen_mov_i32(t0, rd); - - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(rd); } static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) @@ -3949,9 +3821,6 @@ static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1) tcg_gen_andi_i32(tmp, t0, 0xffff0000); tcg_gen_or_i32(t1, t1, tmp); tcg_gen_mov_i32(t0, rd); - - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(rd); } static bool trans_VTRN(DisasContext *s, arg_2misc *a) @@ -4003,8 +3872,6 @@ static bool trans_VTRN(DisasContext *s, arg_2misc *a) write_neon_element32(tmp, a->vd, pass, MO_32); } } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(tmp2); return true; } diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c new file mode 100644 index 0000000000..46c7fce8b4 --- /dev/null +++ b/target/arm/tcg/translate-sme.c @@ -0,0 +1,343 @@ +/* + * AArch64 SME translation + * + * Copyright (c) 2022 Linaro, Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "translate.h" +#include "translate-a64.h" + +/* + * Include the generated decoder. + */ + +#include "decode-sme.c.inc" + + +/* + * Resolve tile.size[index] to a host pointer, where tile and index + * are always decoded together, dependent on the element size. + */ +static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, + int tile_index, bool vertical) +{ + int tile = tile_index >> (4 - esz); + int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz); + int pos, len, offset; + TCGv_i32 tmp; + TCGv_ptr addr; + + /* Compute the final index, which is Rs+imm. */ + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs)); + tcg_gen_addi_i32(tmp, tmp, index); + + /* Prepare a power-of-two modulo via extraction of @len bits. */ + len = ctz32(streaming_vec_reg_size(s)) - esz; + + if (vertical) { + /* + * Compute the byte offset of the index within the tile: + * (index % (svl / size)) * size + * = (index % (svl >> esz)) << esz + * Perform the power-of-two modulo via extraction of the low @len bits. + * Perform the multiply by shifting left by @pos bits. + * Perform these operations simultaneously via deposit into zero. + */ + pos = esz; + tcg_gen_deposit_z_i32(tmp, tmp, pos, len); + + /* + * For big-endian, adjust the indexed column byte offset within + * the uint64_t host words that make up env->zarray[]. + */ + if (HOST_BIG_ENDIAN && esz < MO_64) { + tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz)); + } + } else { + /* + * Compute the byte offset of the index within the tile: + * (index % (svl / size)) * (size * sizeof(row)) + * = (index % (svl >> esz)) << (esz + log2(sizeof(row))) + */ + pos = esz + ctz32(sizeof(ARMVectorReg)); + tcg_gen_deposit_z_i32(tmp, tmp, pos, len); + + /* Row slices are always aligned and need no endian adjustment. */ + } + + /* The tile byte offset within env->zarray is the row. */ + offset = tile * sizeof(ARMVectorReg); + + /* Include the byte offset of zarray to make this relative to env. */ + offset += offsetof(CPUARMState, zarray); + tcg_gen_addi_i32(tmp, tmp, offset); + + /* Add the byte offset to env to produce the final pointer. */ + addr = tcg_temp_new_ptr(); + tcg_gen_ext_i32_ptr(addr, tmp); + tcg_gen_add_ptr(addr, addr, tcg_env); + + return addr; +} + +/* + * Resolve tile.size[0] to a host pointer. + * Used by e.g. outer product insns where we require the entire tile. + */ +static TCGv_ptr get_tile(DisasContext *s, int esz, int tile) +{ + TCGv_ptr addr = tcg_temp_new_ptr(); + int offset; + + offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray); + + tcg_gen_addi_ptr(addr, tcg_env, offset); + return addr; +} + +static bool trans_ZERO(DisasContext *s, arg_ZERO *a) +{ + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (sme_za_enabled_check(s)) { + gen_helper_sme_zero(tcg_env, tcg_constant_i32(a->imm), + tcg_constant_i32(streaming_vec_reg_size(s))); + } + return true; +} + +static bool trans_MOVA(DisasContext *s, arg_MOVA *a) +{ + static gen_helper_gvec_4 * const h_fns[5] = { + gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, + gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d, + gen_helper_sve_sel_zpzz_q + }; + static gen_helper_gvec_3 * const cz_fns[5] = { + gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h, + gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d, + gen_helper_sme_mova_cz_q, + }; + static gen_helper_gvec_3 * const zc_fns[5] = { + gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h, + gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d, + gen_helper_sme_mova_zc_q, + }; + + TCGv_ptr t_za, t_zr, t_pg; + TCGv_i32 t_desc; + int svl; + + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (!sme_smza_enabled_check(s)) { + return true; + } + + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); + t_zr = vec_full_reg_ptr(s, a->zr); + t_pg = pred_full_reg_ptr(s, a->pg); + + svl = streaming_vec_reg_size(s); + t_desc = tcg_constant_i32(simd_desc(svl, svl, 0)); + + if (a->v) { + /* Vertical slice -- use sme mova helpers. */ + if (a->to_vec) { + zc_fns[a->esz](t_zr, t_za, t_pg, t_desc); + } else { + cz_fns[a->esz](t_za, t_zr, t_pg, t_desc); + } + } else { + /* Horizontal slice -- reuse sve sel helpers. */ + if (a->to_vec) { + h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc); + } else { + h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc); + } + } + return true; +} + +static bool trans_LDST1(DisasContext *s, arg_LDST1 *a) +{ + typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32); + + /* + * Indexed by [esz][be][v][mte][st], which is (except for load/store) + * also the order in which the elements appear in the function names, + * and so how we must concatenate the pieces. + */ + +#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F } +#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) } +#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) } +#define FN_END(L, B) { FN_HV(L), FN_HV(B) } + + static GenLdSt1 * const fns[5][2][2][2][2] = { + FN_END(b, b), + FN_END(h_le, h_be), + FN_END(s_le, s_be), + FN_END(d_le, d_be), + FN_END(q_le, q_be), + }; + +#undef FN_LS +#undef FN_MTE +#undef FN_HV +#undef FN_END + + TCGv_ptr t_za, t_pg; + TCGv_i64 addr; + uint32_t desc; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; + + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (!sme_smza_enabled_check(s)) { + return true; + } + + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); + t_pg = pred_full_reg_ptr(s, a->pg); + addr = tcg_temp_new_i64(); + + tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz); + tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); + + if (!mte) { + addr = clean_data_tbi(s, addr); + } + + desc = make_svemte_desc(s, streaming_vec_reg_size(s), 1, a->esz, a->st, 0); + + fns[a->esz][be][a->v][mte][a->st](tcg_env, t_za, t_pg, addr, + tcg_constant_i32(desc)); + return true; +} + +typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int); + +static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn) +{ + int svl = streaming_vec_reg_size(s); + int imm = a->imm; + TCGv_ptr base; + + if (!sme_za_enabled_check(s)) { + return true; + } + + /* ZA[n] equates to ZA0H.B[n]. */ + base = get_tile_rowcol(s, MO_8, a->rv, imm, false); + + fn(s, base, 0, svl, a->rn, imm * svl); + return true; +} + +TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr) +TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str) + +static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz, + gen_helper_gvec_4 *fn) +{ + int svl = streaming_vec_reg_size(s); + uint32_t desc = simd_desc(svl, svl, 0); + TCGv_ptr za, zn, pn, pm; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + za = get_tile(s, esz, a->zad); + zn = vec_full_reg_ptr(s, a->zn); + pn = pred_full_reg_ptr(s, a->pn); + pm = pred_full_reg_ptr(s, a->pm); + + fn(za, zn, pn, pm, tcg_constant_i32(desc)); + return true; +} + +TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s) +TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s) +TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d) +TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d) + +static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz, + gen_helper_gvec_5 *fn) +{ + int svl = streaming_vec_reg_size(s); + uint32_t desc = simd_desc(svl, svl, a->sub); + TCGv_ptr za, zn, zm, pn, pm; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + za = get_tile(s, esz, a->zad); + zn = vec_full_reg_ptr(s, a->zn); + zm = vec_full_reg_ptr(s, a->zm); + pn = pred_full_reg_ptr(s, a->pn); + pm = pred_full_reg_ptr(s, a->pm); + + fn(za, zn, zm, pn, pm, tcg_constant_i32(desc)); + return true; +} + +static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, + gen_helper_gvec_5_ptr *fn) +{ + int svl = streaming_vec_reg_size(s); + uint32_t desc = simd_desc(svl, svl, a->sub); + TCGv_ptr za, zn, zm, pn, pm, fpst; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + za = get_tile(s, esz, a->zad); + zn = vec_full_reg_ptr(s, a->zn); + zm = vec_full_reg_ptr(s, a->zm); + pn = pred_full_reg_ptr(s, a->pn); + pm = pred_full_reg_ptr(s, a->pm); + fpst = fpstatus_ptr(FPST_FPCR); + + fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc)); + return true; +} + +TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h) +TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s) +TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d) + +/* TODO: FEAT_EBF16 */ +TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa) + +TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s) +TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s) +TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s) +TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s) + +TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d) +TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d) +TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d) +TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d) diff --git a/target/arm/translate-sve.c b/target/arm/tcg/translate-sve.c index bc91a64171..ada05aa530 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -18,18 +18,7 @@ */ #include "qemu/osdep.h" -#include "cpu.h" -#include "exec/exec-all.h" -#include "tcg/tcg-op.h" -#include "tcg/tcg-op-gvec.h" -#include "tcg/tcg-gvec-desc.h" -#include "qemu/log.h" -#include "arm_ldst.h" #include "translate.h" -#include "internals.h" -#include "exec/helper-proto.h" -#include "exec/helper-gen.h" -#include "exec/log.h" #include "translate-a64.h" #include "fpu/softfloat.h" @@ -100,133 +89,355 @@ static inline int msz_dtype(DisasContext *s, int msz) * Implement all of the translator functions referenced by the decoder. */ -/* Return the offset info CPUARMState of the predicate vector register Pn. - * Note for this purpose, FFR is P16. - */ -static inline int pred_full_reg_offset(DisasContext *s, int regno) +/* Invoke an out-of-line helper on 2 Zregs. */ +static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, + int rd, int rn, int data) { - return offsetof(CPUARMState, vfp.pregs[regno]); + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vsz, vsz, data, fn); + } + return true; } -/* Return the byte size of the whole predicate register, VL / 64. */ -static inline int pred_full_reg_size(DisasContext *s) +static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, + int rd, int rn, int data, + ARMFPStatusFlavour flavour) { - return s->sve_len >> 3; + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = fpstatus_ptr(flavour); + + tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + status, vsz, vsz, data, fn); + } + return true; } -/* Round up the size of a register to a size allowed by - * the tcg vector infrastructure. Any operation which uses this - * size may assume that the bits above pred_full_reg_size are zero, - * and must leave them the same way. - * - * Note that this is not needed for the vector registers as they - * are always properly sized for tcg vectors. - */ -static int size_for_gvec(int size) +static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn, + arg_rr_esz *a, int data) { - if (size <= 8) { - return 8; - } else { - return QEMU_ALIGN_UP(size, 16); + return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); +} + +/* Invoke an out-of-line helper on 3 Zregs. */ +static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, + int rd, int rn, int rm, int data) +{ + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vsz, vsz, data, fn); } + return true; } -static int pred_gvec_reg_size(DisasContext *s) +static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn, + arg_rrr_esz *a, int data) { - return size_for_gvec(pred_full_reg_size(s)); + return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); } -/* Invoke an out-of-line helper on 2 Zregs. */ -static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn, - int rd, int rn, int data) +/* Invoke an out-of-line helper on 3 Zregs, plus float_status. */ +static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, + int rd, int rn, int rm, + int data, ARMFPStatusFlavour flavour) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vsz, vsz, data, fn); + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = fpstatus_ptr(flavour); + + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + status, vsz, vsz, data, fn); + } + return true; } -/* Invoke an out-of-line helper on 3 Zregs. */ -static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn, - int rd, int rn, int rm, int data) +static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn, + arg_rrr_esz *a, int data) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vsz, vsz, data, fn); + return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); } /* Invoke an out-of-line helper on 4 Zregs. */ -static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, +static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, int rd, int rn, int rm, int ra, int data) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vec_full_reg_offset(s, ra), - vsz, vsz, data, fn); + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, ra), + vsz, vsz, data, fn); + } + return true; +} + +static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn, + arg_rrrr_esz *a, int data) +{ + return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); +} + +static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn, + arg_rrxr_esz *a) +{ + return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); +} + +/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */ +static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, + int rd, int rn, int rm, int ra, + int data, TCGv_ptr ptr) +{ + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, ra), + ptr, vsz, vsz, data, fn); + } + return true; +} + +static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, + int rd, int rn, int rm, int ra, + int data, ARMFPStatusFlavour flavour) +{ + TCGv_ptr status = fpstatus_ptr(flavour); + bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status); + return ret; +} + +/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */ +static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn, + int rd, int rn, int rm, int ra, int pg, + int data, ARMFPStatusFlavour flavour) +{ + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = fpstatus_ptr(flavour); + + tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + vec_full_reg_offset(s, ra), + pred_full_reg_offset(s, pg), + status, vsz, vsz, data, fn); + } + return true; } /* Invoke an out-of-line helper on 2 Zregs and a predicate. */ -static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, +static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn, int rd, int rn, int pg, int data) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - pred_full_reg_offset(s, pg), - vsz, vsz, data, fn); + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + pred_full_reg_offset(s, pg), + vsz, vsz, data, fn); + } + return true; +} + +static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn, + arg_rpr_esz *a, int data) +{ + return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data); +} + +static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn, + arg_rpri_esz *a) +{ + return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); +} + +static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn, + int rd, int rn, int pg, int data, + ARMFPStatusFlavour flavour) +{ + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = fpstatus_ptr(flavour); + + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + pred_full_reg_offset(s, pg), + status, vsz, vsz, data, fn); + } + return true; +} + +static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn, + arg_rpr_esz *a, int data, + ARMFPStatusFlavour flavour) +{ + return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour); } /* Invoke an out-of-line helper on 3 Zregs and a predicate. */ -static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, +static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn, int rd, int rn, int rm, int pg, int data) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - pred_full_reg_offset(s, pg), - vsz, vsz, data, fn); + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + pred_full_reg_offset(s, pg), + vsz, vsz, data, fn); + } + return true; } -/* Invoke a vector expander on two Zregs. */ -static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn, - int esz, int rd, int rn) +static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn, + arg_rprr_esz *a, int data) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), vsz, vsz); + return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data); +} + +/* Invoke an out-of-line helper on 3 Zregs and a predicate. */ +static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn, + int rd, int rn, int rm, int pg, int data, + ARMFPStatusFlavour flavour) +{ + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = fpstatus_ptr(flavour); + + tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), + pred_full_reg_offset(s, pg), + status, vsz, vsz, data, fn); + } + return true; +} + +static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn, + arg_rprr_esz *a) +{ + return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); +} + +/* Invoke a vector expander on two Zregs and an immediate. */ +static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, + int esz, int rd, int rn, uint64_t imm) +{ + if (gvec_fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + gvec_fn(esz, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), imm, vsz, vsz); + } + return true; +} + +static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn, + arg_rri_esz *a) +{ + if (a->esz < 0) { + /* Invalid tsz encoding -- see tszimm_esz. */ + return false; + } + return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm); } /* Invoke a vector expander on three Zregs. */ -static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, +static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn, int esz, int rd, int rn, int rm) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), vsz, vsz); + if (gvec_fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + gvec_fn(esz, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), vsz, vsz); + } + return true; +} + +static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn, + arg_rrr_esz *a) +{ + return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); } /* Invoke a vector expander on four Zregs. */ -static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, - int esz, int rd, int rn, int rm, int ra) +static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn, + arg_rrrr_esz *a) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(esz, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vec_full_reg_offset(s, ra), vsz, vsz); + if (gvec_fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + gvec_fn(a->esz, vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vec_full_reg_offset(s, a->ra), vsz, vsz); + } + return true; } /* Invoke a vector move on two Zregs. */ static bool do_mov_z(DisasContext *s, int rd, int rn) { if (sve_access_check(s)) { - gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn); + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), vsz, vsz); } return true; } @@ -239,13 +450,16 @@ static void do_dupi_z(DisasContext *s, int rd, uint64_t word) } /* Invoke a vector expander on three Pregs. */ -static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, +static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn, int rd, int rn, int rm) { - unsigned psz = pred_gvec_reg_size(s); - gvec_fn(MO_64, pred_full_reg_offset(s, rd), - pred_full_reg_offset(s, rn), - pred_full_reg_offset(s, rm), psz, psz); + if (sve_access_check(s)) { + unsigned psz = pred_gvec_reg_size(s); + gvec_fn(MO_64, pred_full_reg_offset(s, rd), + pred_full_reg_offset(s, rn), + pred_full_reg_offset(s, rm), psz, psz); + } + return true; } /* Invoke a vector move on two Pregs. */ @@ -275,64 +489,43 @@ static void do_predtest1(TCGv_i64 d, TCGv_i64 g) gen_helper_sve_predtest1(t, d, g); do_pred_flags(t); - tcg_temp_free_i32(t); } static void do_predtest(DisasContext *s, int dofs, int gofs, int words) { TCGv_ptr dptr = tcg_temp_new_ptr(); TCGv_ptr gptr = tcg_temp_new_ptr(); - TCGv_i32 t; + TCGv_i32 t = tcg_temp_new_i32(); - tcg_gen_addi_ptr(dptr, cpu_env, dofs); - tcg_gen_addi_ptr(gptr, cpu_env, gofs); - t = tcg_const_i32(words); + tcg_gen_addi_ptr(dptr, tcg_env, dofs); + tcg_gen_addi_ptr(gptr, tcg_env, gofs); - gen_helper_sve_predtest(t, dptr, gptr, t); - tcg_temp_free_ptr(dptr); - tcg_temp_free_ptr(gptr); + gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words)); do_pred_flags(t); - tcg_temp_free_i32(t); } /* For each element size, the bits within a predicate word that are active. */ -const uint64_t pred_esz_masks[4] = { +const uint64_t pred_esz_masks[5] = { 0xffffffffffffffffull, 0x5555555555555555ull, - 0x1111111111111111ull, 0x0101010101010101ull + 0x1111111111111111ull, 0x0101010101010101ull, + 0x0001000100010001ull, }; -/* - *** SVE Logical - Unpredicated Group - */ - -static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn) +static bool trans_INVALID(DisasContext *s, arg_INVALID *a) { - if (sve_access_check(s)) { - gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm); - } + unallocated_encoding(s); return true; } -static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_and); -} - -static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_or); -} - -static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_xor); -} +/* + *** SVE Logical - Unpredicated Group + */ -static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_andc); -} +TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a) +TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) +TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) +TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) { @@ -345,7 +538,6 @@ static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) tcg_gen_andi_i64(d, d, mask); tcg_gen_andi_i64(t, t, ~mask); tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) @@ -359,7 +551,6 @@ static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) tcg_gen_andi_i64(d, d, mask); tcg_gen_andi_i64(t, t, ~mask); tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) @@ -438,17 +629,6 @@ static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) return true; } -static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra); - } - return true; -} - static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) { tcg_gen_xor_i64(d, n, m); @@ -475,10 +655,7 @@ static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); } -static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sve2_zzzz_fn(s, a, gen_eor3); -} +TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) { @@ -506,10 +683,7 @@ static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); } -static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sve2_zzzz_fn(s, a, gen_bcax); -} +TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, uint32_t a, uint32_t oprsz, uint32_t maxsz) @@ -518,10 +692,7 @@ static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz); } -static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sve2_zzzz_fn(s, a, gen_bsl); -} +TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a) static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) { @@ -556,10 +727,7 @@ static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); } -static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sve2_zzzz_fn(s, a, gen_bsl1n); -} +TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a) static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) { @@ -603,10 +771,7 @@ static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); } -static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sve2_zzzz_fn(s, a, gen_bsl2n); -} +TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a) static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) { @@ -635,239 +800,136 @@ static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); } -static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sve2_zzzz_fn(s, a, gen_nbsl); -} +TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a) /* *** SVE Integer Arithmetic - Unpredicated Group */ -static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_add); -} - -static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_sub); -} - -static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_ssadd); -} - -static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_sssub); -} - -static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_usadd); -} - -static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_fn(s, a, tcg_gen_gvec_ussub); -} +TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a) +TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a) +TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a) +TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a) +TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a) +TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a) /* *** SVE Integer Arithmetic - Binary Predicated Group */ -static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn) -{ - if (fn == NULL) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0); - } - return true; -} - /* Select active elememnts from Zn and inactive elements from Zm, * storing the result in Zd. */ -static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) +static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz) { static gen_helper_gvec_4 * const fns[4] = { gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d }; - gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); + return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0); } -#define DO_ZPZZ(NAME, name) \ -static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_4 * const fns[4] = { \ - gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \ - gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \ +#define DO_ZPZZ(NAME, FEAT, name) \ + static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \ + gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \ + gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \ }; \ - return do_zpzz_ool(s, a, fns[a->esz]); \ -} - -DO_ZPZZ(AND, and) -DO_ZPZZ(EOR, eor) -DO_ZPZZ(ORR, orr) -DO_ZPZZ(BIC, bic) - -DO_ZPZZ(ADD, add) -DO_ZPZZ(SUB, sub) - -DO_ZPZZ(SMAX, smax) -DO_ZPZZ(UMAX, umax) -DO_ZPZZ(SMIN, smin) -DO_ZPZZ(UMIN, umin) -DO_ZPZZ(SABD, sabd) -DO_ZPZZ(UABD, uabd) - -DO_ZPZZ(MUL, mul) -DO_ZPZZ(SMULH, smulh) -DO_ZPZZ(UMULH, umulh) - -DO_ZPZZ(ASR, asr) -DO_ZPZZ(LSR, lsr) -DO_ZPZZ(LSL, lsl) - -static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a) -{ - static gen_helper_gvec_4 * const fns[4] = { - NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d - }; - return do_zpzz_ool(s, a, fns[a->esz]); -} - -static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a) -{ - static gen_helper_gvec_4 * const fns[4] = { - NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d - }; - return do_zpzz_ool(s, a, fns[a->esz]); -} + TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \ + name##_zpzz_fns[a->esz], a, 0) + +DO_ZPZZ(AND_zpzz, aa64_sve, sve_and) +DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor) +DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr) +DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic) + +DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add) +DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub) + +DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax) +DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax) +DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin) +DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin) +DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd) +DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd) + +DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul) +DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh) +DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh) + +DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr) +DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr) +DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl) + +static gen_helper_gvec_4 * const sdiv_fns[4] = { + NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d +}; +TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0) -static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a) -{ - if (sve_access_check(s)) { - do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz); - } - return true; -} +static gen_helper_gvec_4 * const udiv_fns[4] = { + NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d +}; +TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0) -#undef DO_ZPZZ +TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz) /* *** SVE Integer Arithmetic - Unary Predicated Group */ -static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn) -{ - if (fn == NULL) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0); - } - return true; -} - -#define DO_ZPZ(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \ -{ \ - static gen_helper_gvec_3 * const fns[4] = { \ - gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ - gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ +#define DO_ZPZ(NAME, FEAT, name) \ + static gen_helper_gvec_3 * const name##_fns[4] = { \ + gen_helper_##name##_b, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d, \ }; \ - return do_zpz_ool(s, a, fns[a->esz]); \ -} - -DO_ZPZ(CLS, cls) -DO_ZPZ(CLZ, clz) -DO_ZPZ(CNT_zpz, cnt_zpz) -DO_ZPZ(CNOT, cnot) -DO_ZPZ(NOT_zpz, not_zpz) -DO_ZPZ(ABS, abs) -DO_ZPZ(NEG, neg) - -static bool trans_FABS(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, - gen_helper_sve_fabs_h, - gen_helper_sve_fabs_s, - gen_helper_sve_fabs_d - }; - return do_zpz_ool(s, a, fns[a->esz]); -} - -static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, - gen_helper_sve_fneg_h, - gen_helper_sve_fneg_s, - gen_helper_sve_fneg_d - }; - return do_zpz_ool(s, a, fns[a->esz]); -} - -static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, - gen_helper_sve_sxtb_h, - gen_helper_sve_sxtb_s, - gen_helper_sve_sxtb_d - }; - return do_zpz_ool(s, a, fns[a->esz]); -} + TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0) + +DO_ZPZ(CLS, aa64_sve, sve_cls) +DO_ZPZ(CLZ, aa64_sve, sve_clz) +DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz) +DO_ZPZ(CNOT, aa64_sve, sve_cnot) +DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz) +DO_ZPZ(ABS, aa64_sve, sve_abs) +DO_ZPZ(NEG, aa64_sve, sve_neg) +DO_ZPZ(RBIT, aa64_sve, sve_rbit) + +static gen_helper_gvec_3 * const fabs_fns[4] = { + NULL, gen_helper_sve_fabs_h, + gen_helper_sve_fabs_s, gen_helper_sve_fabs_d, +}; +TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0) -static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, - gen_helper_sve_uxtb_h, - gen_helper_sve_uxtb_s, - gen_helper_sve_uxtb_d - }; - return do_zpz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const fneg_fns[4] = { + NULL, gen_helper_sve_fneg_h, + gen_helper_sve_fneg_s, gen_helper_sve_fneg_d, +}; +TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0) -static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, NULL, - gen_helper_sve_sxth_s, - gen_helper_sve_sxth_d - }; - return do_zpz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const sxtb_fns[4] = { + NULL, gen_helper_sve_sxtb_h, + gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d, +}; +TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0) -static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, NULL, - gen_helper_sve_uxth_s, - gen_helper_sve_uxth_d - }; - return do_zpz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const uxtb_fns[4] = { + NULL, gen_helper_sve_uxtb_h, + gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d, +}; +TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0) -static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL); -} +static gen_helper_gvec_3 * const sxth_fns[4] = { + NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d +}; +TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0) -static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL); -} +static gen_helper_gvec_3 * const uxth_fns[4] = { + NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d +}; +TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0) -#undef DO_ZPZ +TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz, + a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0) +TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz, + a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0) /* *** SVE Integer Reduction Group @@ -889,32 +951,25 @@ static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a, return true; } - desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); + desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); temp = tcg_temp_new_i64(); t_zn = tcg_temp_new_ptr(); t_pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); + tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); fn(temp, t_zn, t_pg, desc); - tcg_temp_free_ptr(t_zn); - tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(desc); write_fp_dreg(s, a->rd, temp); - tcg_temp_free_i64(temp); return true; } #define DO_VPZ(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \ -{ \ - static gen_helper_gvec_reduc * const fns[4] = { \ + static gen_helper_gvec_reduc * const name##_fns[4] = { \ gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ }; \ - return do_vpz_ool(s, a, fns[a->esz]); \ -} + TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz]) DO_VPZ(ORV, orv) DO_VPZ(ANDV, andv) @@ -926,14 +981,11 @@ DO_VPZ(UMAXV, umaxv) DO_VPZ(SMINV, sminv) DO_VPZ(UMINV, uminv) -static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_reduc * const fns[4] = { - gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, - gen_helper_sve_saddv_s, NULL - }; - return do_vpz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_reduc * const saddv_fns[4] = { + gen_helper_sve_saddv_b, gen_helper_sve_saddv_h, + gen_helper_sve_saddv_s, NULL +}; +TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz]) #undef DO_VPZ @@ -952,168 +1004,105 @@ static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, gen_helper_sve_movz_b, gen_helper_sve_movz_h, gen_helper_sve_movz_s, gen_helper_sve_movz_d, }; - - if (sve_access_check(s)) { - gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); - } - return true; + return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert); } -static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a, - gen_helper_gvec_3 *fn) +static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr, + gen_helper_gvec_3 * const fns[4]) { - if (sve_access_check(s)) { - gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm); - } - return true; -} + int max; -static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, - gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, - }; if (a->esz < 0) { /* Invalid tsz encoding -- see tszimm_esz. */ return false; } - /* Shift by element size is architecturally valid. For - arithmetic right-shift, it's the same as by one less. */ - a->imm = MIN(a->imm, (8 << a->esz) - 1); - return do_zpzi_ool(s, a, fns[a->esz]); -} -static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, - gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, - }; - if (a->esz < 0) { - return false; - } - /* Shift by element size is architecturally valid. - For logical shifts, it is a zeroing operation. */ - if (a->imm >= (8 << a->esz)) { - return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); - } else { - return do_zpzi_ool(s, a, fns[a->esz]); + /* + * Shift by element size is architecturally valid. + * For arithmetic right-shift, it's the same as by one less. + * For logical shifts and ASRD, it is a zeroing operation. + */ + max = 8 << a->esz; + if (a->imm >= max) { + if (asr) { + a->imm = max - 1; + } else { + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); + } } + return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a); } -static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, - gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, - }; - if (a->esz < 0) { - return false; - } - /* Shift by element size is architecturally valid. - For logical shifts, it is a zeroing operation. */ - if (a->imm >= (8 << a->esz)) { - return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); - } else { - return do_zpzi_ool(s, a, fns[a->esz]); - } -} +static gen_helper_gvec_3 * const asr_zpzi_fns[4] = { + gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h, + gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d, +}; +TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns) -static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, - gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, - }; - if (a->esz < 0) { - return false; - } - /* Shift by element size is architecturally valid. For arithmetic - right shift for division, it is a zeroing operation. */ - if (a->imm >= (8 << a->esz)) { - return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); - } else { - return do_zpzi_ool(s, a, fns[a->esz]); - } -} +static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = { + gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h, + gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d, +}; +TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns) -static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, - gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, - }; - if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpzi_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = { + gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h, + gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d, +}; +TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns) -static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, - gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, - }; - if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpzi_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const asrd_fns[4] = { + gen_helper_sve_asrd_b, gen_helper_sve_asrd_h, + gen_helper_sve_asrd_s, gen_helper_sve_asrd_d, +}; +TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns) -static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, - gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, - }; - if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpzi_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = { + gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h, + gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d, +}; +TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, + a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a) -static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, - gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, - }; - if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpzi_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = { + gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h, + gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d, +}; +TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi, + a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a) -static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, - gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, - }; - if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpzi_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const srshr_fns[4] = { + gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h, + gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d, +}; +TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, + a->esz < 0 ? NULL : srshr_fns[a->esz], a) + +static gen_helper_gvec_3 * const urshr_fns[4] = { + gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h, + gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d, +}; +TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi, + a->esz < 0 ? NULL : urshr_fns[a->esz], a) + +static gen_helper_gvec_3 * const sqshlu_fns[4] = { + gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h, + gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d, +}; +TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi, + a->esz < 0 ? NULL : sqshlu_fns[a->esz], a) /* *** SVE Bitwise Shift - Predicated Group */ #define DO_ZPZW(NAME, name) \ -static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_4 * const fns[3] = { \ + static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \ gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \ - gen_helper_sve_##name##_zpzw_s, \ + gen_helper_sve_##name##_zpzw_s, NULL \ }; \ - if (a->esz < 0 || a->esz >= 3) { \ - return false; \ - } \ - return do_zpzz_ool(s, a, fns[a->esz]); \ -} + TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \ + a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0) DO_ZPZW(ASR, asr) DO_ZPZW(LSR, lsr) @@ -1152,45 +1141,21 @@ static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr, return true; } -static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a) -{ - return do_shift_imm(s, a, true, tcg_gen_gvec_sari); -} - -static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a) -{ - return do_shift_imm(s, a, false, tcg_gen_gvec_shri); -} - -static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a) -{ - return do_shift_imm(s, a, false, tcg_gen_gvec_shli); -} - -static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn) -{ - if (fn == NULL) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); - } - return true; -} +TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari) +TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri) +TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli) #define DO_ZZW(NAME, name) \ -static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a) \ -{ \ - static gen_helper_gvec_3 * const fns[4] = { \ + static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \ gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \ gen_helper_sve_##name##_zzw_s, NULL \ }; \ - return do_zzw_ool(s, a, fns[a->esz]); \ -} + TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \ + name##_zzw_fns[a->esz], a, 0) -DO_ZZW(ASR, asr) -DO_ZZW(LSR, lsr) -DO_ZZW(LSL, lsl) +DO_ZZW(ASR_zzw, asr) +DO_ZZW(LSR_zzw, lsr) +DO_ZZW(LSL_zzw, lsl) #undef DO_ZZW @@ -1213,33 +1178,38 @@ static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a, return true; } -#define DO_ZPZZZ(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \ -{ \ - static gen_helper_gvec_5 * const fns[4] = { \ - gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \ - gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ - }; \ - return do_zpzzz_ool(s, a, fns[a->esz]); \ -} - -DO_ZPZZZ(MLA, mla) -DO_ZPZZZ(MLS, mls) +static gen_helper_gvec_5 * const mla_fns[4] = { + gen_helper_sve_mla_b, gen_helper_sve_mla_h, + gen_helper_sve_mla_s, gen_helper_sve_mla_d, +}; +TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz]) -#undef DO_ZPZZZ +static gen_helper_gvec_5 * const mls_fns[4] = { + gen_helper_sve_mls_b, gen_helper_sve_mls_h, + gen_helper_sve_mls_s, gen_helper_sve_mls_d, +}; +TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz]) /* *** SVE Index Generation Group */ -static void do_index(DisasContext *s, int esz, int rd, +static bool do_index(DisasContext *s, int esz, int rd, TCGv_i64 start, TCGv_i64 incr) { - unsigned vsz = vec_full_reg_size(s); - TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); - TCGv_ptr t_zd = tcg_temp_new_ptr(); + unsigned vsz; + TCGv_i32 desc; + TCGv_ptr t_zd; + + if (!sve_access_check(s)) { + return true; + } + + vsz = vec_full_reg_size(s); + desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); + t_zd = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); + tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); if (esz == 3) { gen_helper_sve_index_d(t_zd, start, incr, desc); } else { @@ -1255,84 +1225,80 @@ static void do_index(DisasContext *s, int esz, int rd, tcg_gen_extrl_i64_i32(s32, start); tcg_gen_extrl_i64_i32(i32, incr); fns[esz](t_zd, s32, i32, desc); - - tcg_temp_free_i32(s32); - tcg_temp_free_i32(i32); - } - tcg_temp_free_ptr(t_zd); - tcg_temp_free_i32(desc); -} - -static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a) -{ - if (sve_access_check(s)) { - TCGv_i64 start = tcg_const_i64(a->imm1); - TCGv_i64 incr = tcg_const_i64(a->imm2); - do_index(s, a->esz, a->rd, start, incr); - tcg_temp_free_i64(start); - tcg_temp_free_i64(incr); } return true; } -static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a) -{ - if (sve_access_check(s)) { - TCGv_i64 start = tcg_const_i64(a->imm); - TCGv_i64 incr = cpu_reg(s, a->rm); - do_index(s, a->esz, a->rd, start, incr); - tcg_temp_free_i64(start); - } - return true; -} +TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd, + tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2)) +TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd, + tcg_constant_i64(a->imm), cpu_reg(s, a->rm)) +TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd, + cpu_reg(s, a->rn), tcg_constant_i64(a->imm)) +TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd, + cpu_reg(s, a->rn), cpu_reg(s, a->rm)) -static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a) +/* + *** SVE Stack Allocation Group + */ + +static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { - TCGv_i64 start = cpu_reg(s, a->rn); - TCGv_i64 incr = tcg_const_i64(a->imm); - do_index(s, a->esz, a->rd, start, incr); - tcg_temp_free_i64(incr); + TCGv_i64 rd = cpu_reg_sp(s, a->rd); + TCGv_i64 rn = cpu_reg_sp(s, a->rn); + tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); } return true; } -static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a) +static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a) { - if (sve_access_check(s)) { - TCGv_i64 start = cpu_reg(s, a->rn); - TCGv_i64 incr = cpu_reg(s, a->rm); - do_index(s, a->esz, a->rd, start, incr); + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (sme_enabled_check(s)) { + TCGv_i64 rd = cpu_reg_sp(s, a->rd); + TCGv_i64 rn = cpu_reg_sp(s, a->rn); + tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s)); } return true; } -/* - *** SVE Stack Allocation Group - */ - -static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a) +static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { TCGv_i64 rd = cpu_reg_sp(s, a->rd); TCGv_i64 rn = cpu_reg_sp(s, a->rn); - tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s)); + tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); } return true; } -static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a) +static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a) { - if (sve_access_check(s)) { + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (sme_enabled_check(s)) { TCGv_i64 rd = cpu_reg_sp(s, a->rd); TCGv_i64 rn = cpu_reg_sp(s, a->rn); - tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s)); + tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s)); } return true; } static bool trans_RDVL(DisasContext *s, arg_RDVL *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { TCGv_i64 reg = cpu_reg(s, a->rd); tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s)); @@ -1340,75 +1306,49 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a) return true; } -/* - *** SVE Compute Vector Address Group - */ - -static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) +static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a) { - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (sme_enabled_check(s)) { + TCGv_i64 reg = cpu_reg(s, a->rd); + tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s)); } return true; } -static bool trans_ADR_p32(DisasContext *s, arg_rrri *a) -{ - return do_adr(s, a, gen_helper_sve_adr_p32); -} - -static bool trans_ADR_p64(DisasContext *s, arg_rrri *a) -{ - return do_adr(s, a, gen_helper_sve_adr_p64); -} +/* + *** SVE Compute Vector Address Group + */ -static bool trans_ADR_s32(DisasContext *s, arg_rrri *a) +static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn) { - return do_adr(s, a, gen_helper_sve_adr_s32); + return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm); } -static bool trans_ADR_u32(DisasContext *s, arg_rrri *a) -{ - return do_adr(s, a, gen_helper_sve_adr_u32); -} +TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32) +TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64) +TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32) +TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32) /* *** SVE Integer Misc - Unpredicated Group */ -static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a) -{ - static gen_helper_gvec_2 * const fns[4] = { - NULL, - gen_helper_sve_fexpa_h, - gen_helper_sve_fexpa_s, - gen_helper_sve_fexpa_d, - }; - if (a->esz == 0) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); - } - return true; -} +static gen_helper_gvec_2 * const fexpa_fns[4] = { + NULL, gen_helper_sve_fexpa_h, + gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d, +}; +TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz, + fexpa_fns[a->esz], a->rd, a->rn, 0) -static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, - gen_helper_sve_ftssel_h, - gen_helper_sve_ftssel_s, - gen_helper_sve_ftssel_d, - }; - if (a->esz == 0) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); - } - return true; -} +static gen_helper_gvec_3 * const ftssel_fns[4] = { + NULL, gen_helper_sve_ftssel_h, + gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d, +}; +TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, + ftssel_fns[a->esz], a, 0) /* *** SVE Predicate Logical Operations Group @@ -1439,19 +1379,14 @@ static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a, TCGv_i64 pm = tcg_temp_new_i64(); TCGv_i64 pg = tcg_temp_new_i64(); - tcg_gen_ld_i64(pn, cpu_env, nofs); - tcg_gen_ld_i64(pm, cpu_env, mofs); - tcg_gen_ld_i64(pg, cpu_env, gofs); + tcg_gen_ld_i64(pn, tcg_env, nofs); + tcg_gen_ld_i64(pm, tcg_env, mofs); + tcg_gen_ld_i64(pg, tcg_env, gofs); gvec_op->fni8(pd, pn, pm, pg); - tcg_gen_st_i64(pd, cpu_env, dofs); + tcg_gen_st_i64(pd, tcg_env, dofs); do_predtest1(pd, pg); - - tcg_temp_free_i64(pd); - tcg_temp_free_i64(pn); - tcg_temp_free_i64(pm); - tcg_temp_free_i64(pg); } else { /* The operation and flags generation is large. The computation * of the flags depends on the original contents of the guarding @@ -1492,20 +1427,17 @@ static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a) .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (!a->s) { - if (!sve_access_check(s)) { - return true; - } if (a->rn == a->rm) { if (a->pg == a->rn) { - do_mov_p(s, a->rd, a->rn); - } else { - gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); + return do_mov_p(s, a->rd, a->rn); } - return true; + return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg); } else if (a->pg == a->rn || a->pg == a->rm) { - gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); - return true; + return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm); } } return do_pppp_flags(s, a, &op); @@ -1533,11 +1465,11 @@ static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a) .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (!a->s && a->pg == a->rn) { - if (sve_access_check(s)) { - gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); - } - return true; + return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm); } return do_pppp_flags(s, a, &op); } @@ -1563,12 +1495,20 @@ static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_eor_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; + + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */ + if (!a->s && a->pg == a->rm) { + return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn); + } return do_pppp_flags(s, a, &op); } static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a) { - if (a->s) { + if (a->s || !dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { @@ -1603,6 +1543,9 @@ static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a) .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (!a->s && a->pg == a->rn && a->rn == a->rm) { return do_mov_p(s, a->rd, a->rn); } @@ -1630,6 +1573,10 @@ static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_orn_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; + + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } return do_pppp_flags(s, a, &op); } @@ -1654,6 +1601,10 @@ static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_nor_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; + + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } return do_pppp_flags(s, a, &op); } @@ -1678,6 +1629,10 @@ static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) .fno = gen_helper_sve_nand_pppp, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; + + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } return do_pppp_flags(s, a, &op); } @@ -1687,6 +1642,9 @@ static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a) static bool trans_PTEST(DisasContext *s, arg_PTEST *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { int nofs = pred_full_reg_offset(s, a->rn); int gofs = pred_full_reg_offset(s, a->pg); @@ -1696,12 +1654,9 @@ static bool trans_PTEST(DisasContext *s, arg_PTEST *a) TCGv_i64 pn = tcg_temp_new_i64(); TCGv_i64 pg = tcg_temp_new_i64(); - tcg_gen_ld_i64(pn, cpu_env, nofs); - tcg_gen_ld_i64(pg, cpu_env, gofs); + tcg_gen_ld_i64(pn, tcg_env, nofs); + tcg_gen_ld_i64(pg, tcg_env, gofs); do_predtest1(pn, pg); - - tcg_temp_free_i64(pn); - tcg_temp_free_i64(pg); } else { do_predtest(s, nofs, gofs, words); } @@ -1781,7 +1736,7 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) t = tcg_temp_new_i64(); if (fullsz <= 64) { tcg_gen_movi_i64(t, lastword); - tcg_gen_st_i64(t, cpu_env, ofs); + tcg_gen_st_i64(t, tcg_env, ofs); goto done; } @@ -1800,23 +1755,21 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) tcg_gen_movi_i64(t, word); for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) { - tcg_gen_st_i64(t, cpu_env, ofs + i); + tcg_gen_st_i64(t, tcg_env, ofs + i); } if (lastword != word) { tcg_gen_movi_i64(t, lastword); - tcg_gen_st_i64(t, cpu_env, ofs + i); + tcg_gen_st_i64(t, tcg_env, ofs + i); i += 8; } if (i < fullsz) { tcg_gen_movi_i64(t, 0); for (; i < fullsz; i += 8) { - tcg_gen_st_i64(t, cpu_env, ofs + i); + tcg_gen_st_i64(t, tcg_env, ofs + i); } } done: - tcg_temp_free_i64(t); - /* PTRUES */ if (setflag) { tcg_gen_movi_i32(cpu_NF, -(word != 0)); @@ -1827,22 +1780,14 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag) return true; } -static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a) -{ - return do_predset(s, a->esz, a->rd, a->pat, a->s); -} +TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s) -static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a) -{ - /* Note pat == 31 is #all, to set all elements. */ - return do_predset(s, 0, FFR_PRED_NUM, 31, false); -} +/* Note pat == 31 is #all, to set all elements. */ +TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve, + do_predset, 0, FFR_PRED_NUM, 31, false) -static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a) -{ - /* Note pat == 32 is #unimp, to set no elements. */ - return do_predset(s, 0, a->rd, 32, false); -} +/* Note pat == 32 is #unimp, to set no elements. */ +TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false) static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) { @@ -1853,18 +1798,13 @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a) .rd = a->rd, .pg = a->pg, .s = a->s, .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM, }; - return trans_AND_pppp(s, &alt_a); -} -static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a) -{ - return do_mov_p(s, a->rd, FFR_PRED_NUM); + s->is_nonstreaming = true; + return trans_AND_pppp(s, &alt_a); } -static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a) -{ - return do_mov_p(s, FFR_PRED_NUM, a->rn); -} +TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM) +TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn) static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, void (*gen_fn)(TCGv_i32, TCGv_ptr, @@ -1882,28 +1822,18 @@ static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a, desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); - tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd)); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn)); - t = tcg_const_i32(desc); + tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn)); + t = tcg_temp_new_i32(); - gen_fn(t, t_pd, t_pg, t); - tcg_temp_free_ptr(t_pd); - tcg_temp_free_ptr(t_pg); + gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc)); do_pred_flags(t); - tcg_temp_free_i32(t); return true; } -static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a) -{ - return do_pfirst_pnext(s, a, gen_helper_sve_pfirst); -} - -static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a) -{ - return do_pfirst_pnext(s, a, gen_helper_sve_pnext); -} +TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst) +TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext) /* *** SVE Element Count Group @@ -1911,13 +1841,11 @@ static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a) /* Perform an inline saturating addition of a 32-bit value within * a 64-bit register. The second operand is known to be positive, - * which halves the comparisions we must perform to bound the result. + * which halves the comparisons we must perform to bound the result. */ static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) { int64_t ibound; - TCGv_i64 bound; - TCGCond cond; /* Use normal 64-bit arithmetic to detect 32-bit overflow. */ if (u) { @@ -1928,35 +1856,32 @@ static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) if (d) { tcg_gen_sub_i64(reg, reg, val); ibound = (u ? 0 : INT32_MIN); - cond = TCG_COND_LT; + tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound)); } else { tcg_gen_add_i64(reg, reg, val); ibound = (u ? UINT32_MAX : INT32_MAX); - cond = TCG_COND_GT; + tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound)); } - bound = tcg_const_i64(ibound); - tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg); - tcg_temp_free_i64(bound); } /* Similarly with 64-bit values. */ static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) { TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2; if (u) { if (d) { tcg_gen_sub_i64(t0, reg, val); - tcg_gen_movi_i64(t1, 0); - tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0); + t2 = tcg_constant_i64(0); + tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0); } else { tcg_gen_add_i64(t0, reg, val); - tcg_gen_movi_i64(t1, -1); - tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0); + t2 = tcg_constant_i64(-1); + tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0); } } else { + TCGv_i64 t1 = tcg_temp_new_i64(); if (d) { /* Detect signed overflow for subtraction. */ tcg_gen_xor_i64(t0, reg, val); @@ -1966,7 +1891,7 @@ static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) /* Bound the result. */ tcg_gen_movi_i64(reg, INT64_MIN); - t2 = tcg_const_i64(0); + t2 = tcg_constant_i64(0); tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1); } else { /* Detect signed overflow for addition. */ @@ -1977,13 +1902,10 @@ static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d) /* Bound the result. */ tcg_gen_movi_i64(t1, INT64_MAX); - t2 = tcg_const_i64(0); + t2 = tcg_constant_i64(0); tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg); } - tcg_temp_free_i64(t2); } - tcg_temp_free_i64(t0); - tcg_temp_free_i64(t1); } /* Similarly with a vector and a scalar operand. */ @@ -1997,9 +1919,9 @@ static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, dptr = tcg_temp_new_ptr(); nptr = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd)); - tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn)); - desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); + tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd)); + tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn)); + desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); switch (esz) { case MO_8: @@ -2013,7 +1935,6 @@ static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, } else { gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc); } - tcg_temp_free_i32(t32); break; case MO_16: @@ -2027,7 +1948,6 @@ static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, } else { gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc); } - tcg_temp_free_i32(t32); break; case MO_32: @@ -2042,7 +1962,6 @@ static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, } else { gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc); } - tcg_temp_free_i64(t64); break; case MO_64: @@ -2056,7 +1975,6 @@ static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, t64 = tcg_temp_new_i64(); tcg_gen_neg_i64(t64, val); gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc); - tcg_temp_free_i64(t64); } else { gen_helper_sve_sqaddi_d(dptr, nptr, val, desc); } @@ -2065,14 +1983,13 @@ static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn, default: g_assert_not_reached(); } - - tcg_temp_free_ptr(dptr); - tcg_temp_free_ptr(nptr); - tcg_temp_free_i32(desc); } static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { unsigned fullsz = vec_full_reg_size(s); unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); @@ -2083,6 +2000,9 @@ static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a) static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { unsigned fullsz = vec_full_reg_size(s); unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz); @@ -2096,6 +2016,9 @@ static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a) static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (!sve_access_check(s)) { return true; } @@ -2113,15 +2036,16 @@ static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a) tcg_gen_ext32s_i64(reg, reg); } } else { - TCGv_i64 t = tcg_const_i64(inc); - do_sat_addsub_32(reg, t, a->u, a->d); - tcg_temp_free_i64(t); + do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d); } return true; } static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (!sve_access_check(s)) { return true; } @@ -2132,16 +2056,14 @@ static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a) TCGv_i64 reg = cpu_reg(s, a->rd); if (inc != 0) { - TCGv_i64 t = tcg_const_i64(inc); - do_sat_addsub_64(reg, t, a->u, a->d); - tcg_temp_free_i64(t); + do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d); } return true; } static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) { - if (a->esz == 0) { + if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { return false; } @@ -2151,11 +2073,10 @@ static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) if (inc != 0) { if (sve_access_check(s)) { - TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc); tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), - t, fullsz, fullsz); - tcg_temp_free_i64(t); + tcg_constant_i64(a->d ? -inc : inc), + fullsz, fullsz); } } else { do_mov_z(s, a->rd, a->rn); @@ -2165,7 +2086,7 @@ static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a) static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) { - if (a->esz == 0) { + if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { return false; } @@ -2175,9 +2096,8 @@ static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a) if (inc != 0) { if (sve_access_check(s)) { - TCGv_i64 t = tcg_const_i64(inc); - do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d); - tcg_temp_free_i64(t); + do_sat_addsub_vec(s, a->esz, a->rd, a->rn, + tcg_constant_i64(inc), a->u, a->d); } } else { do_mov_z(s, a->rd, a->rn); @@ -2197,32 +2117,20 @@ static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn) extract32(a->dbm, 6, 6))) { return false; } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - gvec_fn(MO_64, vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), imm, vsz, vsz); - } - return true; + return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm); } -static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a) -{ - return do_zz_dbm(s, a, tcg_gen_gvec_andi); -} - -static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a) -{ - return do_zz_dbm(s, a, tcg_gen_gvec_ori); -} - -static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a) -{ - return do_zz_dbm(s, a, tcg_gen_gvec_xori); -} +TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi) +TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori) +TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori) static bool trans_DUPM(DisasContext *s, arg_DUPM *a) { uint64_t imm; + + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), extract32(a->dbm, 0, 6), extract32(a->dbm, 6, 6))) { @@ -2250,47 +2158,38 @@ static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg, gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d, }; unsigned vsz = vec_full_reg_size(s); - TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); + TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); TCGv_ptr t_zd = tcg_temp_new_ptr(); TCGv_ptr t_zn = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd)); - tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn)); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); + tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd)); + tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn)); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); fns[esz](t_zd, t_zn, t_pg, val, desc); - - tcg_temp_free_ptr(t_zd); - tcg_temp_free_ptr(t_zn); - tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(desc); } static bool trans_FCPY(DisasContext *s, arg_FCPY *a) { - if (a->esz == 0) { + if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { /* Decode the VFP immediate. */ uint64_t imm = vfp_expand_imm(a->esz, a->imm); - TCGv_i64 t_imm = tcg_const_i64(imm); - do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm); - tcg_temp_free_i64(t_imm); + do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm)); } return true; } static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a) { - if (a->esz == 0 && extract32(s->insn, 13, 1)) { + if (!dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { - TCGv_i64 t_imm = tcg_const_i64(a->imm); - do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm); - tcg_temp_free_i64(t_imm); + do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm)); } return true; } @@ -2302,16 +2201,15 @@ static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a) gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d, }; - if (a->esz == 0 && extract32(s->insn, 13, 1)) { + if (!dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_i64 t_imm = tcg_const_i64(a->imm); tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), pred_full_reg_offset(s, a->pg), - t_imm, vsz, vsz, 0, fns[a->esz]); - tcg_temp_free_i64(t_imm); + tcg_constant_i64(a->imm), + vsz, vsz, 0, fns[a->esz]); } return true; } @@ -2350,18 +2248,8 @@ static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm) return true; } -static bool trans_EXT(DisasContext *s, arg_EXT *a) -{ - return do_EXT(s, a->rd, a->rn, a->rm, a->imm); -} - -static bool trans_EXT_sve2(DisasContext *s, arg_rri *a) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm); -} +TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm) +TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm) /* *** SVE Permute - Unpredicated Group @@ -2369,6 +2257,9 @@ static bool trans_EXT_sve2(DisasContext *s, arg_rri *a) static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd), @@ -2379,6 +2270,9 @@ static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a) static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if ((a->imm & 0x1f) == 0) { return false; } @@ -2412,97 +2306,64 @@ static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val) gen_helper_sve_insr_s, gen_helper_sve_insr_d, }; unsigned vsz = vec_full_reg_size(s); - TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); + TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); TCGv_ptr t_zd = tcg_temp_new_ptr(); TCGv_ptr t_zn = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd)); - tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); fns[a->esz](t_zd, t_zn, val, desc); - - tcg_temp_free_ptr(t_zd); - tcg_temp_free_ptr(t_zn); - tcg_temp_free_i32(desc); } static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64)); + tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64)); do_insr_i64(s, a, t); - tcg_temp_free_i64(t); } return true; } static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a) { - if (sve_access_check(s)) { - do_insr_i64(s, a, cpu_reg(s, a->rm)); + if (!dc_isar_feature(aa64_sve, s)) { + return false; } - return true; -} - -static bool trans_REV_v(DisasContext *s, arg_rr_esz *a) -{ - static gen_helper_gvec_2 * const fns[4] = { - gen_helper_sve_rev_b, gen_helper_sve_rev_h, - gen_helper_sve_rev_s, gen_helper_sve_rev_d - }; - if (sve_access_check(s)) { - gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0); - } - return true; -} - -static bool trans_TBL(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, - gen_helper_sve_tbl_s, gen_helper_sve_tbl_d - }; - - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); + do_insr_i64(s, a, cpu_reg(s, a->rm)); } return true; } -static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_4 * const fns[4] = { - gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, - gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d - }; +static gen_helper_gvec_2 * const rev_fns[4] = { + gen_helper_sve_rev_b, gen_helper_sve_rev_h, + gen_helper_sve_rev_s, gen_helper_sve_rev_d +}; +TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0) - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, - (a->rn + 1) % 32, a->rm, 0); - } - return true; -} +static gen_helper_gvec_3 * const sve_tbl_fns[4] = { + gen_helper_sve_tbl_b, gen_helper_sve_tbl_h, + gen_helper_sve_tbl_s, gen_helper_sve_tbl_d +}; +TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0) -static bool trans_TBX(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, - gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d - }; +static gen_helper_gvec_4 * const sve2_tbl_fns[4] = { + gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h, + gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d +}; +TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz], + a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0) - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0); - } - return true; -} +static gen_helper_gvec_3 * const tbx_fns[4] = { + gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h, + gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d +}; +TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0) static bool trans_UNPK(DisasContext *s, arg_UNPK *a) { @@ -2513,7 +2374,7 @@ static bool trans_UNPK(DisasContext *s, arg_UNPK *a) { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d }, }; - if (a->esz == 0) { + if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { @@ -2542,24 +2403,17 @@ static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd, TCGv_ptr t_d = tcg_temp_new_ptr(); TCGv_ptr t_n = tcg_temp_new_ptr(); TCGv_ptr t_m = tcg_temp_new_ptr(); - TCGv_i32 t_desc; uint32_t desc = 0; desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); - tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); - tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); - tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm)); - t_desc = tcg_const_i32(desc); - - fn(t_d, t_n, t_m, t_desc); + tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm)); - tcg_temp_free_ptr(t_d); - tcg_temp_free_ptr(t_n); - tcg_temp_free_ptr(t_m); - tcg_temp_free_i32(t_desc); + fn(t_d, t_n, t_m, tcg_constant_i32(desc)); return true; } @@ -2573,210 +2427,88 @@ static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd, unsigned vsz = pred_full_reg_size(s); TCGv_ptr t_d = tcg_temp_new_ptr(); TCGv_ptr t_n = tcg_temp_new_ptr(); - TCGv_i32 t_desc; uint32_t desc = 0; - tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd)); - tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn)); desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz); desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd); - t_desc = tcg_const_i32(desc); - - fn(t_d, t_n, t_desc); - tcg_temp_free_i32(t_desc); - tcg_temp_free_ptr(t_d); - tcg_temp_free_ptr(t_n); + fn(t_d, t_n, tcg_constant_i32(desc)); return true; } -static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a) -{ - return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p); -} - -static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a) -{ - return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p); -} - -static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a) -{ - return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p); -} - -static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a) -{ - return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p); -} - -static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a) -{ - return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p); -} - -static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a) -{ - return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p); -} - -static bool trans_REV_p(DisasContext *s, arg_rr_esz *a) -{ - return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p); -} +TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p) +TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p) +TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p) +TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p) +TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p) +TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p) -static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a) -{ - return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p); -} - -static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a) -{ - return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p); -} +TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p) +TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p) +TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) /* *** SVE Permute - Interleaving Group */ -static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve_zip_b, gen_helper_sve_zip_h, - gen_helper_sve_zip_s, gen_helper_sve_zip_d, - }; - - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - unsigned high_ofs = high ? vsz / 2 : 0; - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn) + high_ofs, - vec_full_reg_offset(s, a->rm) + high_ofs, - vsz, vsz, 0, fns[a->esz]); - } - return true; -} - -static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data, - gen_helper_gvec_3 *fn) -{ - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data); - } - return true; -} - -static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a) -{ - return do_zip(s, a, false); -} - -static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a) -{ - return do_zip(s, a, true); -} - -static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high) -{ - if (!dc_isar_feature(aa64_sve_f64mm, s)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0; - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn) + high_ofs, - vec_full_reg_offset(s, a->rm) + high_ofs, - vsz, vsz, 0, gen_helper_sve2_zip_q); - } - return true; -} +static gen_helper_gvec_3 * const zip_fns[4] = { + gen_helper_sve_zip_b, gen_helper_sve_zip_h, + gen_helper_sve_zip_s, gen_helper_sve_zip_d, +}; +TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, + zip_fns[a->esz], a, 0) +TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, + zip_fns[a->esz], a, vec_full_reg_size(s) / 2) -static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a) -{ - return do_zip_q(s, a, false); -} - -static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a) -{ - return do_zip_q(s, a, true); -} +TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, + gen_helper_sve2_zip_q, a, 0) +TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, + gen_helper_sve2_zip_q, a, + QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) static gen_helper_gvec_3 * const uzp_fns[4] = { gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, gen_helper_sve_uzp_s, gen_helper_sve_uzp_d, }; -static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]); -} - -static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]); -} +TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, + uzp_fns[a->esz], a, 0) +TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, + uzp_fns[a->esz], a, 1 << a->esz) -static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a) -{ - if (!dc_isar_feature(aa64_sve_f64mm, s)) { - return false; - } - return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q); -} - -static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a) -{ - if (!dc_isar_feature(aa64_sve_f64mm, s)) { - return false; - } - return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q); -} +TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, + gen_helper_sve2_uzp_q, a, 0) +TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, + gen_helper_sve2_uzp_q, a, 16) static gen_helper_gvec_3 * const trn_fns[4] = { gen_helper_sve_trn_b, gen_helper_sve_trn_h, gen_helper_sve_trn_s, gen_helper_sve_trn_d, }; -static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]); -} +TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, + trn_fns[a->esz], a, 0) +TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, + trn_fns[a->esz], a, 1 << a->esz) -static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a) -{ - return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]); -} - -static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a) -{ - if (!dc_isar_feature(aa64_sve_f64mm, s)) { - return false; - } - return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q); -} - -static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a) -{ - if (!dc_isar_feature(aa64_sve_f64mm, s)) { - return false; - } - return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q); -} +TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, + gen_helper_sve2_trn_q, a, 0) +TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, + gen_helper_sve2_trn_q, a, 16) /* *** SVE Permute Vector - Predicated Group */ -static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d - }; - return do_zpz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const compact_fns[4] = { + NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d +}; +TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, + compact_fns[a->esz], a, 0) /* Call the helper that computes the ARM LastActiveElement pseudocode * function, scaled by the element size. This includes the not found @@ -2788,19 +2520,14 @@ static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg) * round up, as we do elsewhere, because we need the exact size. */ TCGv_ptr t_p = tcg_temp_new_ptr(); - TCGv_i32 t_desc; unsigned desc = 0; desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s)); desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); - tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg)); - t_desc = tcg_const_i32(desc); - - gen_helper_sve_last_active_element(ret, t_p, t_desc); + tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg)); - tcg_temp_free_i32(t_desc); - tcg_temp_free_ptr(t_p); + gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc)); } /* Increment LAST to the offset of the next element in the vector, @@ -2814,11 +2541,9 @@ static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz) if (is_power_of_2(vsz)) { tcg_gen_andi_i32(last, last, vsz - 1); } else { - TCGv_i32 max = tcg_const_i32(vsz); - TCGv_i32 zero = tcg_const_i32(0); + TCGv_i32 max = tcg_constant_i32(vsz); + TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last); - tcg_temp_free_i32(max); - tcg_temp_free_i32(zero); } } @@ -2830,11 +2555,9 @@ static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz) if (is_power_of_2(vsz)) { tcg_gen_andi_i32(last, last, vsz - 1); } else { - TCGv_i32 max = tcg_const_i32(vsz - (1 << esz)); - TCGv_i32 zero = tcg_const_i32(0); + TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz)); + TCGv_i32 zero = tcg_constant_i32(0); tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last); - tcg_temp_free_i32(max); - tcg_temp_free_i32(zero); } } @@ -2867,25 +2590,21 @@ static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last, int rm, int esz) { TCGv_ptr p = tcg_temp_new_ptr(); - TCGv_i64 r; /* Convert offset into vector into offset into ENV. * The final adjustment for the vector register base * is added via constant offset to the load. */ -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN /* Adjust for element ordering. See vec_reg_offset. */ if (esz < 3) { tcg_gen_xori_i32(last, last, 8 - (1 << esz)); } #endif tcg_gen_ext_i32_ptr(p, last); - tcg_gen_add_ptr(p, p, cpu_env); + tcg_gen_add_ptr(p, p, tcg_env); - r = load_esz(p, vec_full_reg_offset(s, rm), esz); - tcg_temp_free_ptr(p); - - return r; + return load_esz(p, vec_full_reg_offset(s, rm), esz); } /* Compute CLAST for a Zreg. */ @@ -2900,7 +2619,7 @@ static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) return true; } - last = tcg_temp_local_new_i32(); + last = tcg_temp_new_i32(); over = gen_new_label(); find_last_active(s, last, esz, a->pg); @@ -2915,11 +2634,9 @@ static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) } ele = load_last_active(s, last, a->rm, esz); - tcg_temp_free_i32(last); vsz = vec_full_reg_size(s); tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele); - tcg_temp_free_i64(ele); /* If this insn used MOVPRFX, we may need a second move. */ if (a->rd != a->rn) { @@ -2936,22 +2653,15 @@ static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before) return true; } -static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a) -{ - return do_clast_vector(s, a, false); -} - -static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a) -{ - return do_clast_vector(s, a, true); -} +TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false) +TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true) /* Compute CLAST for a scalar. */ static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, bool before, TCGv_i64 reg_val) { TCGv_i32 last = tcg_temp_new_i32(); - TCGv_i64 ele, cmp, zero; + TCGv_i64 ele, cmp; find_last_active(s, last, esz, pg); @@ -2964,19 +2674,14 @@ static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm, } /* The conceit here is that while last < 0 indicates not found, after - * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address + * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address * from which we can load garbage. We then discard the garbage with * a conditional move. */ ele = load_last_active(s, last, rm, esz); - tcg_temp_free_i32(last); - - zero = tcg_const_i64(0); - tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val); - tcg_temp_free_i64(zero); - tcg_temp_free_i64(cmp); - tcg_temp_free_i64(ele); + tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0), + ele, reg_val); } /* Compute CLAST for a Vreg. */ @@ -2985,24 +2690,16 @@ static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before) if (sve_access_check(s)) { int esz = a->esz; int ofs = vec_reg_offset(s, a->rd, 0, esz); - TCGv_i64 reg = load_esz(cpu_env, ofs, esz); + TCGv_i64 reg = load_esz(tcg_env, ofs, esz); do_clast_scalar(s, esz, a->pg, a->rn, before, reg); write_fp_dreg(s, a->rd, reg); - tcg_temp_free_i64(reg); } return true; } -static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a) -{ - return do_clast_fp(s, a, false); -} - -static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a) -{ - return do_clast_fp(s, a, true); -} +TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false) +TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true) /* Compute CLAST for a Xreg. */ static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) @@ -3034,22 +2731,14 @@ static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before) return true; } -static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a) -{ - return do_clast_general(s, a, false); -} - -static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a) -{ - return do_clast_general(s, a, true); -} +TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false) +TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true) /* Compute LAST for a scalar. */ static TCGv_i64 do_last_scalar(DisasContext *s, int esz, int pg, int rm, bool before) { TCGv_i32 last = tcg_temp_new_i32(); - TCGv_i64 ret; find_last_active(s, last, esz, pg); if (before) { @@ -3058,9 +2747,7 @@ static TCGv_i64 do_last_scalar(DisasContext *s, int esz, incr_last_active(s, last, esz); } - ret = load_last_active(s, last, rm, esz); - tcg_temp_free_i32(last); - return ret; + return load_last_active(s, last, rm, esz); } /* Compute LAST for a Vreg. */ @@ -3069,20 +2756,12 @@ static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before) if (sve_access_check(s)) { TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); write_fp_dreg(s, a->rd, val); - tcg_temp_free_i64(val); } return true; } -static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a) -{ - return do_last_fp(s, a, false); -} - -static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a) -{ - return do_last_fp(s, a, true); -} +TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false) +TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true) /* Compute LAST for a Xreg. */ static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) @@ -3090,23 +2769,18 @@ static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before) if (sve_access_check(s)) { TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before); tcg_gen_mov_i64(cpu_reg(s, a->rd), val); - tcg_temp_free_i64(val); } return true; } -static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a) -{ - return do_last_general(s, a, false); -} - -static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a) -{ - return do_last_general(s, a, true); -} +TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false) +TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true) static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn)); } @@ -3115,73 +2789,38 @@ static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a) static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { int ofs = vec_reg_offset(s, a->rn, 0, a->esz); - TCGv_i64 t = load_esz(cpu_env, ofs, a->esz); + TCGv_i64 t = load_esz(tcg_env, ofs, a->esz); do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t); - tcg_temp_free_i64(t); } return true; } -static bool trans_REVB(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, - gen_helper_sve_revb_h, - gen_helper_sve_revb_s, - gen_helper_sve_revb_d, - }; - return do_zpz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const revb_fns[4] = { + NULL, gen_helper_sve_revb_h, + gen_helper_sve_revb_s, gen_helper_sve_revb_d, +}; +TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0) -static bool trans_REVH(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - NULL, - NULL, - gen_helper_sve_revh_s, - gen_helper_sve_revh_d, - }; - return do_zpz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const revh_fns[4] = { + NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d, +}; +TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0) -static bool trans_REVW(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL); -} +TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz, + a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0) -static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve_rbit_b, - gen_helper_sve_rbit_h, - gen_helper_sve_rbit_s, - gen_helper_sve_rbit_d, - }; - return do_zpz_ool(s, a, fns[a->esz]); -} +TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0) -static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a) -{ - if (sve_access_check(s)) { - gen_gvec_ool_zzzp(s, gen_helper_sve_splice, - a->rd, a->rn, a->rm, a->pg, a->esz); - } - return true; -} +TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz, + gen_helper_sve_splice, a, a->esz) -static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzp(s, gen_helper_sve_splice, - a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz); - } - return true; -} +TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice, + a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz) /* *** SVE Integer Compare - Vectors Group @@ -3202,39 +2841,30 @@ static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a, } vsz = vec_full_reg_size(s); - t = tcg_const_i32(simd_desc(vsz, vsz, 0)); + t = tcg_temp_new_i32(); pd = tcg_temp_new_ptr(); zn = tcg_temp_new_ptr(); zm = tcg_temp_new_ptr(); pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); - tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); - tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm)); - tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); - - gen_fn(t, pd, zn, zm, pg, t); + tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm)); + tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); - tcg_temp_free_ptr(pd); - tcg_temp_free_ptr(zn); - tcg_temp_free_ptr(zm); - tcg_temp_free_ptr(pg); + gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0))); do_pred_flags(t); - - tcg_temp_free_i32(t); return true; } #define DO_PPZZ(NAME, name) \ -static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_flags_4 * const fns[4] = { \ - gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ - gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ - }; \ - return do_ppzz_flags(s, a, fns[a->esz]); \ -} + static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \ + gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \ + gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \ + }; \ + TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \ + a, name##_ppzz_fns[a->esz]) DO_PPZZ(CMPEQ, cmpeq) DO_PPZZ(CMPNE, cmpne) @@ -3246,14 +2876,12 @@ DO_PPZZ(CMPHS, cmphs) #undef DO_PPZZ #define DO_PPZW(NAME, name) \ -static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_flags_4 * const fns[4] = { \ - gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ - gen_helper_sve_##name##_ppzw_s, NULL \ - }; \ - return do_ppzz_flags(s, a, fns[a->esz]); \ -} + static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \ + gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \ + gen_helper_sve_##name##_ppzw_s, NULL \ + }; \ + TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \ + a, name##_ppzw_fns[a->esz]) DO_PPZW(CMPEQ, cmpeq) DO_PPZW(CMPNE, cmpne) @@ -3287,36 +2915,28 @@ static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a, } vsz = vec_full_reg_size(s); - t = tcg_const_i32(simd_desc(vsz, vsz, a->imm)); + t = tcg_temp_new_i32(); pd = tcg_temp_new_ptr(); zn = tcg_temp_new_ptr(); pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd)); - tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn)); - tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg)); + tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg)); - gen_fn(t, pd, zn, pg, t); - - tcg_temp_free_ptr(pd); - tcg_temp_free_ptr(zn); - tcg_temp_free_ptr(pg); + gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm))); do_pred_flags(t); - - tcg_temp_free_i32(t); return true; } #define DO_PPZI(NAME, name) \ -static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a) \ -{ \ - static gen_helper_gvec_flags_3 * const fns[4] = { \ + static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \ gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \ gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \ }; \ - return do_ppzi_flags(s, a, fns[a->esz]); \ -} + TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \ + name##_ppzi_fns[a->esz]) DO_PPZI(CMPEQ, cmpeq) DO_PPZI(CMPNE, cmpne) @@ -3349,24 +2969,20 @@ static bool do_brk3(DisasContext *s, arg_rprr_s *a, TCGv_ptr n = tcg_temp_new_ptr(); TCGv_ptr m = tcg_temp_new_ptr(); TCGv_ptr g = tcg_temp_new_ptr(); - TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); + TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); - tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); - tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); - tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm)); - tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); + tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm)); + tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); if (a->s) { - fn_s(t, d, n, m, g, t); + TCGv_i32 t = tcg_temp_new_i32(); + fn_s(t, d, n, m, g, desc); do_pred_flags(t); } else { - fn(d, n, m, g, t); + fn(d, n, m, g, desc); } - tcg_temp_free_ptr(d); - tcg_temp_free_ptr(n); - tcg_temp_free_ptr(m); - tcg_temp_free_ptr(g); - tcg_temp_free_i32(t); return true; } @@ -3383,59 +2999,39 @@ static bool do_brk2(DisasContext *s, arg_rpr_s *a, TCGv_ptr d = tcg_temp_new_ptr(); TCGv_ptr n = tcg_temp_new_ptr(); TCGv_ptr g = tcg_temp_new_ptr(); - TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); + TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz)); - tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd)); - tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn)); - tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg)); + tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg)); if (a->s) { - fn_s(t, d, n, g, t); + TCGv_i32 t = tcg_temp_new_i32(); + fn_s(t, d, n, g, desc); do_pred_flags(t); } else { - fn(d, n, g, t); + fn(d, n, g, desc); } - tcg_temp_free_ptr(d); - tcg_temp_free_ptr(n); - tcg_temp_free_ptr(g); - tcg_temp_free_i32(t); return true; } -static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a) -{ - return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas); -} +TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a, + gen_helper_sve_brkpa, gen_helper_sve_brkpas) +TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a, + gen_helper_sve_brkpb, gen_helper_sve_brkpbs) -static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a) -{ - return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs); -} +TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a, + gen_helper_sve_brka_m, gen_helper_sve_brkas_m) +TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a, + gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m) -static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a) -{ - return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m); -} +TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a, + gen_helper_sve_brka_z, gen_helper_sve_brkas_z) +TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a, + gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z) -static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a) -{ - return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m); -} - -static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a) -{ - return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z); -} - -static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a) -{ - return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z); -} - -static bool trans_BRKN(DisasContext *s, arg_rpr_s *a) -{ - return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns); -} +TRANS_FEAT(BRKN, aa64_sve, do_brk2, a, + gen_helper_sve_brkn, gen_helper_sve_brkns) /* *** SVE Predicate Count Group @@ -3448,12 +3044,11 @@ static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) if (psz <= 8) { uint64_t psz_mask; - tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn)); + tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn)); if (pn != pg) { TCGv_i64 g = tcg_temp_new_i64(); - tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg)); + tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg)); tcg_gen_and_i64(val, val, g); - tcg_temp_free_i64(g); } /* Reduce the pred_esz_masks value simply to reduce the @@ -3467,24 +3062,22 @@ static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg) TCGv_ptr t_pn = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); unsigned desc = 0; - TCGv_i32 t_desc; desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz); desc = FIELD_DP32(desc, PREDDESC, ESZ, esz); - tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn)); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); - t_desc = tcg_const_i32(desc); + tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn)); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); - gen_helper_sve_cntp(val, t_pn, t_pg, t_desc); - tcg_temp_free_ptr(t_pn); - tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(t_desc); + gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc)); } } static bool trans_CNTP(DisasContext *s, arg_CNTP *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg); } @@ -3493,6 +3086,9 @@ static bool trans_CNTP(DisasContext *s, arg_CNTP *a) static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { TCGv_i64 reg = cpu_reg(s, a->rd); TCGv_i64 val = tcg_temp_new_i64(); @@ -3503,14 +3099,13 @@ static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a) } else { tcg_gen_add_i64(reg, reg, val); } - tcg_temp_free_i64(val); } return true; } static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) { - if (a->esz == 0) { + if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { @@ -3527,6 +3122,9 @@ static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a) static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { TCGv_i64 reg = cpu_reg(s, a->rd); TCGv_i64 val = tcg_temp_new_i64(); @@ -3539,6 +3137,9 @@ static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a) static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { TCGv_i64 reg = cpu_reg(s, a->rd); TCGv_i64 val = tcg_temp_new_i64(); @@ -3551,7 +3152,7 @@ static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a) static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) { - if (a->esz == 0) { + if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { @@ -3568,6 +3169,9 @@ static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a) static bool trans_CTERM(DisasContext *s, arg_CTERM *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (!sve_access_check(s)) { return true; } @@ -3579,7 +3183,6 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a) tcg_gen_setcond_i64(cond, cmp, rn, rm); tcg_gen_extrl_i64_i32(cpu_NF, cmp); - tcg_temp_free_i64(cmp); /* VF = !NF & !CF. */ tcg_gen_xori_i32(cpu_VF, cpu_NF, 1); @@ -3594,7 +3197,7 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a) static bool trans_WHILE(DisasContext *s, arg_WHILE *a) { TCGv_i64 op0, op1, t0, t1, tmax; - TCGv_i32 t2, t3; + TCGv_i32 t2; TCGv_ptr ptr; unsigned vsz = vec_full_reg_size(s); unsigned desc = 0; @@ -3604,7 +3207,9 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) bool eq = a->eq == a->lt; /* The greater-than conditions are all SVE2. */ - if (!a->lt && !dc_isar_feature(aa64_sve2, s)) { + if (a->lt + ? !dc_isar_feature(aa64_sve, s) + : !dc_isar_feature(aa64_sve2, s)) { return false; } if (!sve_access_check(s)) { @@ -3650,7 +3255,7 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) } } - tmax = tcg_const_i64(vsz >> a->esz); + tmax = tcg_constant_i64(vsz >> a->esz); if (eq) { /* Equality means one more iteration. */ tcg_gen_addi_i64(t0, t0, 1); @@ -3670,45 +3275,37 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a) /* Bound to the maximum. */ tcg_gen_umin_i64(t0, t0, tmax); - tcg_temp_free_i64(tmax); /* Set the count to zero if the condition is false. */ tcg_gen_movi_i64(t1, 0); tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1); - tcg_temp_free_i64(t1); /* Since we're bounded, pass as a 32-bit type. */ t2 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(t2, t0); - tcg_temp_free_i64(t0); /* Scale elements to bits. */ tcg_gen_shli_i32(t2, t2, a->esz); desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); - t3 = tcg_const_i32(desc); ptr = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); if (a->lt) { - gen_helper_sve_whilel(t2, ptr, t2, t3); + gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); } else { - gen_helper_sve_whileg(t2, ptr, t2, t3); + gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc)); } do_pred_flags(t2); - - tcg_temp_free_ptr(ptr); - tcg_temp_free_i32(t2); - tcg_temp_free_i32(t3); return true; } static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) { TCGv_i64 op0, op1, diff, t1, tmax; - TCGv_i32 t2, t3; + TCGv_i32 t2; TCGv_ptr ptr; unsigned vsz = vec_full_reg_size(s); unsigned desc = 0; @@ -3723,7 +3320,7 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) op0 = read_cpu_reg(s, a->rn, 1); op1 = read_cpu_reg(s, a->rm, 1); - tmax = tcg_const_i64(vsz); + tmax = tcg_constant_i64(vsz); diff = tcg_temp_new_i64(); if (a->rw) { @@ -3733,7 +3330,6 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) tcg_gen_sub_i64(diff, op0, op1); tcg_gen_sub_i64(t1, op1, op0); tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1); - tcg_temp_free_i64(t1); /* Round down to a multiple of ESIZE. */ tcg_gen_andi_i64(diff, diff, -1 << a->esz); /* If op1 == op0, diff == 0, and the condition is always true. */ @@ -3749,26 +3345,19 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) /* Bound to the maximum. */ tcg_gen_umin_i64(diff, diff, tmax); - tcg_temp_free_i64(tmax); /* Since we're bounded, pass as a 32-bit type. */ t2 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(t2, diff); - tcg_temp_free_i64(diff); desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8); desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz); - t3 = tcg_const_i32(desc); ptr = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd)); + tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd)); - gen_helper_sve_whilel(t2, ptr, t2, t3); + gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc)); do_pred_flags(t2); - - tcg_temp_free_ptr(ptr); - tcg_temp_free_i32(t2); - tcg_temp_free_i32(t3); return true; } @@ -3778,7 +3367,7 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a) static bool trans_FDUP(DisasContext *s, arg_FDUP *a) { - if (a->esz == 0) { + if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { @@ -3795,30 +3384,18 @@ static bool trans_FDUP(DisasContext *s, arg_FDUP *a) static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a) { - if (a->esz == 0 && extract32(s->insn, 13, 1)) { + if (!dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); int dofs = vec_full_reg_offset(s, a->rd); - tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm); } return true; } -static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a) -{ - if (a->esz == 0 && extract32(s->insn, 13, 1)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); - } - return true; -} +TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a) static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) { @@ -3857,86 +3434,51 @@ static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) .scalar_first = true } }; - if (a->esz == 0 && extract32(s->insn, 13, 1)) { + if (!dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_i64 c = tcg_const_i64(a->imm); tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), - vsz, vsz, c, &op[a->esz]); - tcg_temp_free_i64(c); + vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]); } return true; } -static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a) -{ - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz); - } - return true; -} +TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a) static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d) { - if (a->esz == 0 && extract32(s->insn, 13, 1)) { - return false; - } if (sve_access_check(s)) { - TCGv_i64 val = tcg_const_i64(a->imm); - do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d); - tcg_temp_free_i64(val); + do_sat_addsub_vec(s, a->esz, a->rd, a->rn, + tcg_constant_i64(a->imm), u, d); } return true; } -static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a) -{ - return do_zzi_sat(s, a, false, false); -} - -static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a) -{ - return do_zzi_sat(s, a, true, false); -} - -static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a) -{ - return do_zzi_sat(s, a, false, true); -} - -static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a) -{ - return do_zzi_sat(s, a, true, true); -} +TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false) +TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false) +TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true) +TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true) static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn) { if (sve_access_check(s)) { unsigned vsz = vec_full_reg_size(s); - TCGv_i64 c = tcg_const_i64(a->imm); - tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), - c, vsz, vsz, 0, fn); - tcg_temp_free_i64(c); + tcg_constant_i64(a->imm), vsz, vsz, 0, fn); } return true; } #define DO_ZZI(NAME, name) \ -static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a) \ -{ \ - static gen_helper_gvec_2i * const fns[4] = { \ + static gen_helper_gvec_2i * const name##i_fns[4] = { \ gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \ gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \ }; \ - return do_zzi_ool(s, a, fns[a->esz]); \ -} + TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz]) DO_ZZI(SMAX, smax) DO_ZZI(UMAX, umax) @@ -3945,204 +3487,130 @@ DO_ZZI(UMIN, umin) #undef DO_ZZI -static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a) -{ - static gen_helper_gvec_4 * const fns[2][2] = { - { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, - { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } - }; - - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0); - } - return true; -} +static gen_helper_gvec_4 * const dot_fns[2][2] = { + { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, + { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } +}; +TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz, + dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0) /* * SVE Multiply - Indexed */ -static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a, - gen_helper_gvec_4 *fn) -{ - if (fn == NULL) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index); - } - return true; -} - -#define DO_RRXR(NAME, FUNC) \ - static bool NAME(DisasContext *s, arg_rrxr_esz *a) \ - { return do_zzxz_ool(s, a, FUNC); } - -DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b) -DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h) -DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b) -DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h) - -static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a) -{ - if (!dc_isar_feature(aa64_sve_i8mm, s)) { - return false; - } - return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b); -} - -static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a) -{ - if (!dc_isar_feature(aa64_sve_i8mm, s)) { - return false; - } - return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b); -} +TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sdot_idx_b, a) +TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sdot_idx_h, a) +TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_udot_idx_b, a) +TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_udot_idx_h, a) -#undef DO_RRXR - -static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data, - gen_helper_gvec_3 *fn) -{ - if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vsz, vsz, data, fn); - } - return true; -} +TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_sudot_idx_b, a) +TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_usdot_idx_b, a) #define DO_SVE2_RRX(NAME, FUNC) \ - static bool NAME(DisasContext *s, arg_rrx_esz *a) \ - { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); } + TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ + a->rd, a->rn, a->rm, a->index) -DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h) -DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s) -DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d) +DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h) +DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s) +DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d) -DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) -DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) -DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) +DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h) +DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s) +DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d) -DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) -DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) -DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) +DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h) +DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s) +DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d) #undef DO_SVE2_RRX #define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \ - static bool NAME(DisasContext *s, arg_rrx_esz *a) \ - { \ - return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, \ - (a->index << 1) | TOP, FUNC); \ - } + TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \ + a->rd, a->rn, a->rm, (a->index << 1) | TOP) -DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) -DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) -DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) -DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) +DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false) +DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false) +DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true) +DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true) -DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) -DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) -DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) -DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) +DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false) +DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false) +DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true) +DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true) -DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) -DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) -DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) -DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) +DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false) +DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false) +DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true) +DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true) #undef DO_SVE2_RRX_TB -static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra, - int data, gen_helper_gvec_4 *fn) -{ - if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vec_full_reg_offset(s, ra), - vsz, vsz, data, fn); - } - return true; -} - #define DO_SVE2_RRXR(NAME, FUNC) \ - static bool NAME(DisasContext *s, arg_rrxr_esz *a) \ - { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); } + TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a) -DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h) -DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s) -DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d) +DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h) +DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s) +DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d) -DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h) -DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s) -DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d) +DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h) +DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s) +DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d) -DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) -DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) -DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) +DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h) +DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s) +DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d) -DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) -DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) -DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) +DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h) +DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s) +DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d) #undef DO_SVE2_RRXR #define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \ - static bool NAME(DisasContext *s, arg_rrxr_esz *a) \ - { \ - return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \ - (a->index << 1) | TOP, FUNC); \ - } - -DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) -DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) -DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) -DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) - -DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) -DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) -DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) -DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) - -DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) -DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) -DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) -DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) - -DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) -DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) -DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) -DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) - -DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) -DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) -DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) -DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) - -DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) -DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) -DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) -DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) + TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ + a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP) + +DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false) +DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false) +DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true) +DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true) + +DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false) +DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false) +DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true) +DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true) + +DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false) +DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false) +DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true) +DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true) + +DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false) +DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false) +DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true) +DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true) + +DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false) +DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false) +DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true) +DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true) + +DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false) +DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false) +DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true) +DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true) #undef DO_SVE2_RRXR_TB #define DO_SVE2_RRXR_ROT(NAME, FUNC) \ - static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ - { \ - return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, \ - (a->index << 2) | a->rot, FUNC); \ - } + TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \ + a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot) DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h) DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s) @@ -4161,59 +3629,31 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d) static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub) { - static gen_helper_gvec_4_ptr * const fns[3] = { + static gen_helper_gvec_4_ptr * const fns[4] = { + NULL, gen_helper_gvec_fmla_idx_h, gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d, }; - - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - status, vsz, vsz, (a->index << 1) | sub, - fns[a->esz - 1]); - tcg_temp_free_ptr(status); - } - return true; + return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, + (a->index << 1) | sub, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); } -static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a) -{ - return do_FMLA_zzxz(s, a, false); -} - -static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a) -{ - return do_FMLA_zzxz(s, a, true); -} +TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false) +TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true) /* *** SVE Floating Point Multiply Indexed Group */ -static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a) -{ - static gen_helper_gvec_3_ptr * const fns[3] = { - gen_helper_gvec_fmul_idx_h, - gen_helper_gvec_fmul_idx_s, - gen_helper_gvec_fmul_idx_d, - }; - - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - status, vsz, vsz, a->index, fns[a->esz - 1]); - tcg_temp_free_ptr(status); - } - return true; -} +static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = { + NULL, gen_helper_gvec_fmul_idx_h, + gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d, +}; +TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz, + fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) /* *** SVE Floating Point Fast Reduction Group @@ -4222,49 +3662,44 @@ static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a) typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); -static void do_reduce(DisasContext *s, arg_rpr_esz *a, +static bool do_reduce(DisasContext *s, arg_rpr_esz *a, gen_helper_fp_reduce *fn) { - unsigned vsz = vec_full_reg_size(s); - unsigned p2vsz = pow2ceil(vsz); - TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz)); + unsigned vsz, p2vsz; + TCGv_i32 t_desc; TCGv_ptr t_zn, t_pg, status; TCGv_i64 temp; + if (fn == NULL) { + return false; + } + if (!sve_access_check(s)) { + return true; + } + + vsz = vec_full_reg_size(s); + p2vsz = pow2ceil(vsz); + t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz)); temp = tcg_temp_new_i64(); t_zn = tcg_temp_new_ptr(); t_pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn)); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); + tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn)); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); fn(temp, t_zn, t_pg, status, t_desc); - tcg_temp_free_ptr(t_zn); - tcg_temp_free_ptr(t_pg); - tcg_temp_free_ptr(status); - tcg_temp_free_i32(t_desc); write_fp_dreg(s, a->rd, temp); - tcg_temp_free_i64(temp); + return true; } #define DO_VPZ(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \ -{ \ - static gen_helper_fp_reduce * const fns[3] = { \ - gen_helper_sve_##name##_h, \ - gen_helper_sve_##name##_s, \ - gen_helper_sve_##name##_d, \ + static gen_helper_fp_reduce * const name##_fns[4] = { \ + NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ }; \ - if (a->esz == 0) { \ - return false; \ - } \ - if (sve_access_check(s)) { \ - do_reduce(s, a, fns[a->esz - 1]); \ - } \ - return true; \ -} + TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz]) DO_VPZ(FADDV, faddv) DO_VPZ(FMINNMV, fminnmv) @@ -4272,86 +3707,53 @@ DO_VPZ(FMAXNMV, fmaxnmv) DO_VPZ(FMINV, fminv) DO_VPZ(FMAXV, fmaxv) +#undef DO_VPZ + /* *** SVE Floating Point Unary Operations - Unpredicated Group */ -static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn) -{ - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - - tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - status, vsz, vsz, 0, fn); - tcg_temp_free_ptr(status); -} - -static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a) -{ - static gen_helper_gvec_2_ptr * const fns[3] = { - gen_helper_gvec_frecpe_h, - gen_helper_gvec_frecpe_s, - gen_helper_gvec_frecpe_d, - }; - if (a->esz == 0) { - return false; - } - if (sve_access_check(s)) { - do_zz_fp(s, a, fns[a->esz - 1]); - } - return true; -} +static gen_helper_gvec_2_ptr * const frecpe_fns[] = { + NULL, gen_helper_gvec_frecpe_h, + gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d, +}; +TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0) -static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a) -{ - static gen_helper_gvec_2_ptr * const fns[3] = { - gen_helper_gvec_frsqrte_h, - gen_helper_gvec_frsqrte_s, - gen_helper_gvec_frsqrte_d, - }; - if (a->esz == 0) { - return false; - } - if (sve_access_check(s)) { - do_zz_fp(s, a, fns[a->esz - 1]); - } - return true; -} +static gen_helper_gvec_2_ptr * const frsqrte_fns[] = { + NULL, gen_helper_gvec_frsqrte_h, + gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d, +}; +TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0) /* *** SVE Floating Point Compare with Zero Group */ -static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a, +static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3_ptr *fn) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = + fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - status, vsz, vsz, 0, fn); - tcg_temp_free_ptr(status); + tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, 0, fn); + } + return true; } #define DO_PPZ(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a) \ -{ \ - static gen_helper_gvec_3_ptr * const fns[3] = { \ - gen_helper_sve_##name##_h, \ - gen_helper_sve_##name##_s, \ - gen_helper_sve_##name##_d, \ + static gen_helper_gvec_3_ptr * const name##_fns[] = { \ + NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \ }; \ - if (a->esz == 0) { \ - return false; \ - } \ - if (sve_access_check(s)) { \ - do_ppz_fp(s, a, fns[a->esz - 1]); \ - } \ - return true; \ -} + TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz]) DO_PPZ(FCMGE_ppz0, fcmge0) DO_PPZ(FCMGT_ppz0, fcmgt0) @@ -4366,28 +3768,13 @@ DO_PPZ(FCMNE_ppz0, fcmne0) *** SVE floating-point trig multiply-add coefficient */ -static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a) -{ - static gen_helper_gvec_3_ptr * const fns[3] = { - gen_helper_sve_ftmad_h, - gen_helper_sve_ftmad_s, - gen_helper_sve_ftmad_d, - }; - - if (a->esz == 0) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - status, vsz, vsz, a->imm, fns[a->esz - 1]); - tcg_temp_free_ptr(status); - } - return true; -} +static gen_helper_gvec_3_ptr * const ftmad_fns[4] = { + NULL, gen_helper_sve_ftmad_h, + gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d, +}; +TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz, + ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) /* *** SVE Floating Point Accumulating Reduction Group @@ -4407,30 +3794,25 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) TCGv_i64 t_val; TCGv_i32 t_desc; - if (a->esz == 0) { + if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) { return false; } + s->is_nonstreaming = true; if (!sve_access_check(s)) { return true; } - t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); + t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz); t_rm = tcg_temp_new_ptr(); t_pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm)); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg)); + tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm)); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg)); t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); + t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc); - tcg_temp_free_i32(t_desc); - tcg_temp_free_ptr(t_fpst); - tcg_temp_free_ptr(t_pg); - tcg_temp_free_ptr(t_rm); - write_fp_dreg(s, a->rd, t_val); - tcg_temp_free_i64(t_val); return true; } @@ -4438,90 +3820,50 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a) *** SVE Floating Point Arithmetic - Unpredicated Group */ -static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a, - gen_helper_gvec_3_ptr *fn) -{ - if (fn == NULL) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - status, vsz, vsz, 0, fn); - tcg_temp_free_ptr(status); - } - return true; -} - - #define DO_FP3(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \ -{ \ - static gen_helper_gvec_3_ptr * const fns[4] = { \ + static gen_helper_gvec_3_ptr * const name##_fns[4] = { \ NULL, gen_helper_gvec_##name##_h, \ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \ }; \ - return do_zzz_fp(s, a, fns[a->esz]); \ -} + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0) DO_FP3(FADD_zzz, fadd) DO_FP3(FSUB_zzz, fsub) DO_FP3(FMUL_zzz, fmul) -DO_FP3(FTSMUL, ftsmul) DO_FP3(FRECPS, recps) DO_FP3(FRSQRTS, rsqrts) #undef DO_FP3 +static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = { + NULL, gen_helper_gvec_ftsmul_h, + gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d +}; +TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz, + ftsmul_fns[a->esz], a, 0) + /* *** SVE Floating Point Arithmetic - Predicated Group */ -static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a, - gen_helper_gvec_4_ptr *fn) -{ - if (fn == NULL) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - pred_full_reg_offset(s, a->pg), - status, vsz, vsz, 0, fn); - tcg_temp_free_ptr(status); - } - return true; -} - -#define DO_FP3(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_4_ptr * const fns[4] = { \ - NULL, gen_helper_sve_##name##_h, \ - gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ - }; \ - return do_zpzz_fp(s, a, fns[a->esz]); \ -} - -DO_FP3(FADD_zpzz, fadd) -DO_FP3(FSUB_zpzz, fsub) -DO_FP3(FMUL_zpzz, fmul) -DO_FP3(FMIN_zpzz, fmin) -DO_FP3(FMAX_zpzz, fmax) -DO_FP3(FMINNM_zpzz, fminnum) -DO_FP3(FMAXNM_zpzz, fmaxnum) -DO_FP3(FABD, fabd) -DO_FP3(FSCALE, fscalbn) -DO_FP3(FDIV, fdiv) -DO_FP3(FMULX, fmulx) - -#undef DO_FP3 +#define DO_ZPZZ_FP(NAME, FEAT, name) \ + static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \ + NULL, gen_helper_##name##_h, \ + gen_helper_##name##_s, gen_helper_##name##_d \ + }; \ + TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a) + +DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd) +DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub) +DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul) +DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin) +DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax) +DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum) +DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum) +DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd) +DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn) +DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv) +DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx) typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, TCGv_i32); @@ -4536,50 +3878,42 @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16, t_zd = tcg_temp_new_ptr(); t_zn = tcg_temp_new_ptr(); t_pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd)); - tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn)); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); + tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd)); + tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn)); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); - desc = tcg_const_i32(simd_desc(vsz, vsz, 0)); + desc = tcg_constant_i32(simd_desc(vsz, vsz, 0)); fn(t_zd, t_zn, t_pg, scalar, status, desc); - - tcg_temp_free_i32(desc); - tcg_temp_free_ptr(status); - tcg_temp_free_ptr(t_pg); - tcg_temp_free_ptr(t_zn); - tcg_temp_free_ptr(t_zd); } -static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, +static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm, gen_helper_sve_fp2scalar *fn) { - TCGv_i64 temp = tcg_const_i64(imm); - do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn); - tcg_temp_free_i64(temp); + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, + tcg_constant_i64(imm), fn); + } + return true; } -#define DO_FP_IMM(NAME, name, const0, const1) \ -static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a) \ -{ \ - static gen_helper_sve_fp2scalar * const fns[3] = { \ - gen_helper_sve_##name##_h, \ - gen_helper_sve_##name##_s, \ - gen_helper_sve_##name##_d \ - }; \ - static uint64_t const val[3][2] = { \ - { float16_##const0, float16_##const1 }, \ - { float32_##const0, float32_##const1 }, \ - { float64_##const0, float64_##const1 }, \ - }; \ - if (a->esz == 0) { \ - return false; \ - } \ - if (sve_access_check(s)) { \ - do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \ - } \ - return true; \ -} +#define DO_FP_IMM(NAME, name, const0, const1) \ + static gen_helper_sve_fp2scalar * const name##_fns[4] = { \ + NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_s, \ + gen_helper_sve_##name##_d \ + }; \ + static uint64_t const name##_const[4][2] = { \ + { -1, -1 }, \ + { float16_##const0, float16_##const1 }, \ + { float32_##const0, float32_##const1 }, \ + { float64_##const0, float64_##const1 }, \ + }; \ + TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \ + name##_const[a->esz][a->imm], name##_fns[a->esz]) DO_FP_IMM(FADD, fadds, half, one) DO_FP_IMM(FSUB, fsubs, half, one) @@ -4606,20 +3940,16 @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, vec_full_reg_offset(s, a->rm), pred_full_reg_offset(s, a->pg), status, vsz, vsz, 0, fn); - tcg_temp_free_ptr(status); } return true; } #define DO_FPCMP(NAME, name) \ -static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_4_ptr * const fns[4] = { \ + static gen_helper_gvec_4_ptr * const name##_fns[4] = { \ NULL, gen_helper_sve_##name##_h, \ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ }; \ - return do_fp_cmp(s, a, fns[a->esz]); \ -} + TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz]) DO_FPCMP(FCMGE, fcmge) DO_FPCMP(FCMGT, fcmgt) @@ -4631,59 +3961,22 @@ DO_FPCMP(FACGT, facgt) #undef DO_FPCMP -static bool trans_FCADD(DisasContext *s, arg_FCADD *a) -{ - static gen_helper_gvec_4_ptr * const fns[3] = { - gen_helper_sve_fcadd_h, - gen_helper_sve_fcadd_s, - gen_helper_sve_fcadd_d - }; - - if (a->esz == 0) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - pred_full_reg_offset(s, a->pg), - status, vsz, vsz, a->rot, fns[a->esz - 1]); - tcg_temp_free_ptr(status); - } - return true; -} - -static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, - gen_helper_gvec_5_ptr *fn) -{ - if (a->esz == 0) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - pred_full_reg_offset(s, a->pg), - status, vsz, vsz, 0, fn); - tcg_temp_free_ptr(status); - } - return true; -} +static gen_helper_gvec_4_ptr * const fcadd_fns[] = { + NULL, gen_helper_sve_fcadd_h, + gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d, +}; +TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz], + a->rd, a->rn, a->rm, a->pg, a->rot, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) #define DO_FMLA(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a) \ -{ \ - static gen_helper_gvec_5_ptr * const fns[4] = { \ - NULL, gen_helper_sve_##name##_h, \ - gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ - }; \ - return do_fmla(s, a, fns[a->esz]); \ -} + static gen_helper_gvec_5_ptr * const name##_fns[4] = { \ + NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ + }; \ + TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \ + a->rd, a->rn, a->rm, a->ra, a->pg, 0, \ + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) DO_FMLA(FMLA_zpzzz, fmla_zpzzz) DO_FMLA(FMLS_zpzzz, fmls_zpzzz) @@ -4692,368 +3985,176 @@ DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz) #undef DO_FMLA -static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a) -{ - static gen_helper_gvec_5_ptr * const fns[4] = { - NULL, - gen_helper_sve_fcmla_zpzzz_h, - gen_helper_sve_fcmla_zpzzz_s, - gen_helper_sve_fcmla_zpzzz_d, - }; - - if (a->esz == 0) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - pred_full_reg_offset(s, a->pg), - status, vsz, vsz, a->rot, fns[a->esz]); - tcg_temp_free_ptr(status); - } - return true; -} - -static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a) -{ - static gen_helper_gvec_4_ptr * const fns[2] = { - gen_helper_gvec_fcmlah_idx, - gen_helper_gvec_fcmlas_idx, - }; +static gen_helper_gvec_5_ptr * const fcmla_fns[4] = { + NULL, gen_helper_sve_fcmla_zpzzz_h, + gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d, +}; +TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz], + a->rd, a->rn, a->rm, a->ra, a->pg, a->rot, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) - tcg_debug_assert(a->esz == 1 || a->esz == 2); - tcg_debug_assert(a->rd == a->ra); - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - status, vsz, vsz, - a->index * 4 + a->rot, - fns[a->esz - 1]); - tcg_temp_free_ptr(status); - } - return true; -} +static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = { + NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL +}; +TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz], + a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot, + a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) /* *** SVE Floating Point Unary Operations Predicated Group */ -static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg, - bool is_fp16, gen_helper_gvec_3_ptr *fn) -{ - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - pred_full_reg_offset(s, pg), - status, vsz, vsz, 0, fn); - tcg_temp_free_ptr(status); - } - return true; -} - -static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh); -} - -static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs); -} - -static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a) -{ - if (!dc_isar_feature(aa64_sve_bf16, s)) { - return false; - } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt); -} - -static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh); -} - -static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd); -} - -static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds); -} - -static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd); -} - -static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh); -} - -static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh); -} - -static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs); -} - -static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs); -} - -static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd); -} - -static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd); -} - -static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss); -} - -static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss); -} - -static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd); -} - -static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd); -} - -static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds); -} - -static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds); -} - -static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd); -} - -static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd); -} - -static gen_helper_gvec_3_ptr * const frint_fns[3] = { +TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR) +TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR) + +TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, + gen_helper_sve_bfcvt, a, 0, FPST_FPCR) + +TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR) +TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR) +TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR) +TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR) + +TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16) +TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16) +TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16) +TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16) +TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16) +TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16) + +TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR) +TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR) +TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR) +TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR) +TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR) +TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR) + +TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR) +TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR) + +static gen_helper_gvec_3_ptr * const frint_fns[] = { + NULL, gen_helper_sve_frint_h, gen_helper_sve_frint_s, gen_helper_sve_frint_d }; - -static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a) -{ - if (a->esz == 0) { - return false; - } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, - frint_fns[a->esz - 1]); -} - -static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3_ptr * const fns[3] = { - gen_helper_sve_frintx_h, - gen_helper_sve_frintx_s, - gen_helper_sve_frintx_d - }; - if (a->esz == 0) { - return false; - } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]); -} +TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz], + a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + +static gen_helper_gvec_3_ptr * const frintx_fns[] = { + NULL, + gen_helper_sve_frintx_h, + gen_helper_sve_frintx_s, + gen_helper_sve_frintx_d +}; +TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz], + a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, - int mode, gen_helper_gvec_3_ptr *fn) -{ - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_i32 tmode = tcg_const_i32(mode); - TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - - gen_helper_set_rmode(tmode, tmode, status); - - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - status, vsz, vsz, 0, fn); - - gen_helper_set_rmode(tmode, tmode, status); - tcg_temp_free_i32(tmode); - tcg_temp_free_ptr(status); - } - return true; -} - -static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a) + ARMFPRounding mode, gen_helper_gvec_3_ptr *fn) { - if (a->esz == 0) { - return false; - } - return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]); -} - -static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a) -{ - if (a->esz == 0) { - return false; - } - return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]); -} - -static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a) -{ - if (a->esz == 0) { - return false; - } - return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]); -} - -static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a) -{ - if (a->esz == 0) { - return false; - } - return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]); -} - -static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a) -{ - if (a->esz == 0) { - return false; - } - return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]); -} + unsigned vsz; + TCGv_i32 tmode; + TCGv_ptr status; -static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3_ptr * const fns[3] = { - gen_helper_sve_frecpx_h, - gen_helper_sve_frecpx_s, - gen_helper_sve_frecpx_d - }; - if (a->esz == 0) { + if (fn == NULL) { return false; } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]); -} - -static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3_ptr * const fns[3] = { - gen_helper_sve_fsqrt_h, - gen_helper_sve_fsqrt_s, - gen_helper_sve_fsqrt_d - }; - if (a->esz == 0) { - return false; + if (!sve_access_check(s)) { + return true; } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]); -} - -static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh); -} - -static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh); -} - -static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh); -} - -static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss); -} - -static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds); -} - -static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd); -} - -static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd); -} - -static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh); -} -static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh); -} + vsz = vec_full_reg_size(s); + status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); + tmode = gen_set_rmode(mode, status); -static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh); -} + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, 0, fn); -static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss); + gen_restore_rmode(tmode, status); + return true; } -static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds); -} +TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a, + FPROUNDING_TIEEVEN, frint_fns[a->esz]) +TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a, + FPROUNDING_POSINF, frint_fns[a->esz]) +TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a, + FPROUNDING_NEGINF, frint_fns[a->esz]) +TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a, + FPROUNDING_ZERO, frint_fns[a->esz]) +TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a, + FPROUNDING_TIEAWAY, frint_fns[a->esz]) -static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd); -} +static gen_helper_gvec_3_ptr * const frecpx_fns[] = { + NULL, gen_helper_sve_frecpx_h, + gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d, +}; +TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz], + a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) -static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a) -{ - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd); -} +static gen_helper_gvec_3_ptr * const fsqrt_fns[] = { + NULL, gen_helper_sve_fsqrt_h, + gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d, +}; +TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz], + a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) + +TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16) +TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16) +TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16) + +TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_scvt_ss, a, 0, FPST_FPCR) +TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_scvt_ds, a, 0, FPST_FPCR) + +TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_scvt_sd, a, 0, FPST_FPCR) +TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_scvt_dd, a, 0, FPST_FPCR) + +TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16) +TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16) +TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16) + +TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR) +TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR) +TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR) + +TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz, + gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR) /* *** SVE Memory - 32-bit Gather and Unsized Contiguous Group @@ -5063,18 +4164,19 @@ static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a) * The load should begin at the address Rn + IMM. */ -static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) +void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs, + int len, int rn, int imm) { - int len_align = QEMU_ALIGN_DOWN(len, 8); - int len_remain = len % 8; - int nparts = len / 8 + ctpop8(len_remain); + int len_align = QEMU_ALIGN_DOWN(len, 16); + int len_remain = len % 16; + int nparts = len / 16 + ctpop8(len_remain); int midx = get_mem_index(s); TCGv_i64 dirty_addr, clean_addr, t0, t1; + TCGv_i128 t16; dirty_addr = tcg_temp_new_i64(); tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); - clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); - tcg_temp_free_i64(dirty_addr); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); /* * Note that unpredicated load/store of vector/predicate registers @@ -5087,42 +4189,57 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int i; t0 = tcg_temp_new_i64(); - for (i = 0; i < len_align; i += 8) { - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ); - tcg_gen_st_i64(t0, cpu_env, vofs + i); - tcg_gen_addi_i64(clean_addr, clean_addr, 8); + t1 = tcg_temp_new_i64(); + t16 = tcg_temp_new_i128(); + + for (i = 0; i < len_align; i += 16) { + tcg_gen_qemu_ld_i128(t16, clean_addr, midx, + MO_LE | MO_128 | MO_ATOM_NONE); + tcg_gen_extr_i128_i64(t0, t1, t16); + tcg_gen_st_i64(t0, base, vofs + i); + tcg_gen_st_i64(t1, base, vofs + i + 8); + tcg_gen_addi_i64(clean_addr, clean_addr, 16); } - tcg_temp_free_i64(t0); } else { TCGLabel *loop = gen_new_label(); - TCGv_ptr tp, i = tcg_const_local_ptr(0); - - /* Copy the clean address into a local temp, live across the loop. */ - t0 = clean_addr; - clean_addr = new_tmp_a64_local(s); - tcg_gen_mov_i64(clean_addr, t0); + TCGv_ptr tp, i = tcg_temp_new_ptr(); + tcg_gen_movi_ptr(i, 0); gen_set_label(loop); - t0 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ); - tcg_gen_addi_i64(clean_addr, clean_addr, 8); + t16 = tcg_temp_new_i128(); + tcg_gen_qemu_ld_i128(t16, clean_addr, midx, + MO_LE | MO_128 | MO_ATOM_NONE); + tcg_gen_addi_i64(clean_addr, clean_addr, 16); tp = tcg_temp_new_ptr(); - tcg_gen_add_ptr(tp, cpu_env, i); - tcg_gen_addi_ptr(i, i, 8); + tcg_gen_add_ptr(tp, base, i); + tcg_gen_addi_ptr(i, i, 16); + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_extr_i128_i64(t0, t1, t16); + tcg_gen_st_i64(t0, tp, vofs); - tcg_temp_free_ptr(tp); - tcg_temp_free_i64(t0); + tcg_gen_st_i64(t1, tp, vofs + 8); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); - tcg_temp_free_ptr(i); } /* * Predicate register loads can be any multiple of 2. - * Note that we still store the entire 64-bit unit into cpu_env. + * Note that we still store the entire 64-bit unit into tcg_env. */ + if (len_remain >= 8) { + t0 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); + tcg_gen_st_i64(t0, base, vofs + len_align); + len_remain -= 8; + len_align += 8; + if (len_remain) { + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + } + } if (len_remain) { t0 = tcg_temp_new_i64(); switch (len_remain) { @@ -5130,39 +4247,38 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) case 4: case 8: tcg_gen_qemu_ld_i64(t0, clean_addr, midx, - MO_LE | ctz32(len_remain)); + MO_LE | ctz32(len_remain) | MO_ATOM_NONE); break; case 6: t1 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); tcg_gen_addi_i64(clean_addr, clean_addr, 4); - tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); + tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); tcg_gen_deposit_i64(t0, t0, t1, 32, 32); - tcg_temp_free_i64(t1); break; default: g_assert_not_reached(); } - tcg_gen_st_i64(t0, cpu_env, vofs + len_align); - tcg_temp_free_i64(t0); + tcg_gen_st_i64(t0, base, vofs + len_align); } } /* Similarly for stores. */ -static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) +void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs, + int len, int rn, int imm) { - int len_align = QEMU_ALIGN_DOWN(len, 8); - int len_remain = len % 8; - int nparts = len / 8 + ctpop8(len_remain); + int len_align = QEMU_ALIGN_DOWN(len, 16); + int len_remain = len % 16; + int nparts = len / 16 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 dirty_addr, clean_addr, t0; + TCGv_i64 dirty_addr, clean_addr, t0, t1; + TCGv_i128 t16; dirty_addr = tcg_temp_new_i64(); tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); - clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len); - tcg_temp_free_i64(dirty_addr); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); /* Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian @@ -5176,101 +4292,125 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int i; t0 = tcg_temp_new_i64(); - for (i = 0; i < len_align; i += 8) { - tcg_gen_ld_i64(t0, cpu_env, vofs + i); - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ); - tcg_gen_addi_i64(clean_addr, clean_addr, 8); + t1 = tcg_temp_new_i64(); + t16 = tcg_temp_new_i128(); + for (i = 0; i < len_align; i += 16) { + tcg_gen_ld_i64(t0, base, vofs + i); + tcg_gen_ld_i64(t1, base, vofs + i + 8); + tcg_gen_concat_i64_i128(t16, t0, t1); + tcg_gen_qemu_st_i128(t16, clean_addr, midx, + MO_LE | MO_128 | MO_ATOM_NONE); + tcg_gen_addi_i64(clean_addr, clean_addr, 16); } - tcg_temp_free_i64(t0); } else { TCGLabel *loop = gen_new_label(); - TCGv_ptr tp, i = tcg_const_local_ptr(0); - - /* Copy the clean address into a local temp, live across the loop. */ - t0 = clean_addr; - clean_addr = new_tmp_a64_local(s); - tcg_gen_mov_i64(clean_addr, t0); + TCGv_ptr tp, i = tcg_temp_new_ptr(); + tcg_gen_movi_ptr(i, 0); gen_set_label(loop); t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); tp = tcg_temp_new_ptr(); - tcg_gen_add_ptr(tp, cpu_env, i); + tcg_gen_add_ptr(tp, base, i); tcg_gen_ld_i64(t0, tp, vofs); - tcg_gen_addi_ptr(i, i, 8); - tcg_temp_free_ptr(tp); + tcg_gen_ld_i64(t1, tp, vofs + 8); + tcg_gen_addi_ptr(i, i, 16); + + t16 = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(t16, t0, t1); - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ); - tcg_gen_addi_i64(clean_addr, clean_addr, 8); - tcg_temp_free_i64(t0); + tcg_gen_qemu_st_i128(t16, clean_addr, midx, + MO_LE | MO_128 | MO_ATOM_NONE); + tcg_gen_addi_i64(clean_addr, clean_addr, 16); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); - tcg_temp_free_ptr(i); } /* Predicate register stores can be any multiple of 2. */ + if (len_remain >= 8) { + t0 = tcg_temp_new_i64(); + tcg_gen_ld_i64(t0, base, vofs + len_align); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE); + len_remain -= 8; + len_align += 8; + if (len_remain) { + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + } + } if (len_remain) { t0 = tcg_temp_new_i64(); - tcg_gen_ld_i64(t0, cpu_env, vofs + len_align); + tcg_gen_ld_i64(t0, base, vofs + len_align); switch (len_remain) { case 2: case 4: case 8: tcg_gen_qemu_st_i64(t0, clean_addr, midx, - MO_LE | ctz32(len_remain)); + MO_LE | ctz32(len_remain) | MO_ATOM_NONE); break; case 6: - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE); tcg_gen_addi_i64(clean_addr, clean_addr, 4); tcg_gen_shri_i64(t0, t0, 32); - tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE); break; default: g_assert_not_reached(); } - tcg_temp_free_i64(t0); } } static bool trans_LDR_zri(DisasContext *s, arg_rri *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { int size = vec_full_reg_size(s); int off = vec_full_reg_offset(s, a->rd); - do_ldr(s, off, size, a->rn, a->imm * size); + gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); } return true; } static bool trans_LDR_pri(DisasContext *s, arg_rri *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { int size = pred_full_reg_size(s); int off = pred_full_reg_offset(s, a->rd); - do_ldr(s, off, size, a->rn, a->imm * size); + gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size); } return true; } static bool trans_STR_zri(DisasContext *s, arg_rri *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { int size = vec_full_reg_size(s); int off = vec_full_reg_offset(s, a->rd); - do_str(s, off, size, a->rn, a->imm * size); + gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); } return true; } static bool trans_STR_pri(DisasContext *s, arg_rri *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { int size = pred_full_reg_size(s); int off = pred_full_reg_offset(s, a->rd); - do_str(s, off, size, a->rn, a->imm * size); + gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size); } return true; } @@ -5284,7 +4424,7 @@ static const MemOp dtype_mop[16] = { MO_UB, MO_UB, MO_UB, MO_UB, MO_SL, MO_UW, MO_UW, MO_UW, MO_SW, MO_SW, MO_UL, MO_UL, - MO_SB, MO_SB, MO_SB, MO_Q + MO_SB, MO_SB, MO_SB, MO_UQ }; #define dtype_msz(x) (dtype_mop[x] & MO_SIZE) @@ -5297,42 +4437,51 @@ static const uint8_t dtype_esz[16] = { 3, 2, 1, 3 }; -static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, - int dtype, uint32_t mte_n, bool is_write, - gen_helper_gvec_mem *fn) +uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs, + uint32_t msz, bool is_write, uint32_t data) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr t_pg; - TCGv_i32 t_desc; - int desc = 0; + uint32_t sizem1; + uint32_t desc = 0; - /* - * For e.g. LD4, there are not enough arguments to pass all 4 - * registers as pointers, so encode the regno into the data field. - * For consistency, do this even for LD1. - */ - if (s->mte_active[0]) { - int msz = dtype_msz(dtype); + /* Assert all of the data fits, with or without MTE enabled. */ + assert(nregs >= 1 && nregs <= 4); + sizem1 = (nregs << msz) - 1; + assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT); + assert(data < 1u << SVE_MTEDESC_SHIFT); + if (s->mte_active[0]) { desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1); + desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1); desc <<= SVE_MTEDESC_SHIFT; - } else { + } + return simd_desc(vsz, vsz, desc | data); +} + +static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, + int dtype, uint32_t nregs, bool is_write, + gen_helper_gvec_mem *fn) +{ + TCGv_ptr t_pg; + uint32_t desc; + + if (!s->mte_active[0]) { addr = clean_data_tbi(s, addr); } - desc = simd_desc(vsz, vsz, zt | desc); - t_desc = tcg_const_i32(desc); + /* + * For e.g. LD4, there are not enough arguments to pass all 4 + * registers as pointers, so encode the regno into the data field. + * For consistency, do this even for LD1. + */ + desc = make_svemte_desc(s, vec_full_reg_size(s), nregs, + dtype_msz(dtype), is_write, zt); t_pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); - fn(cpu_env, t_pg, addr, t_desc); - - tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(t_desc); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); + fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); } /* Indexed by [mte][be][dtype][nreg] */ @@ -5465,16 +4614,16 @@ static void do_ld_zpa(DisasContext *s, int zt, int pg, * accessible via the instruction encoding. */ assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); + do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn); } static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) { - if (a->rm == 31) { + if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg); @@ -5484,10 +4633,13 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { int vsz = vec_full_reg_size(s); int elements = vsz >> dtype_esz[a->dtype]; - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), (a->imm * elements * (a->nreg + 1)) @@ -5585,8 +4737,12 @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) gen_helper_sve_ldff1dd_be_r_mte } }, }; + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + s->is_nonstreaming = true; if (sve_access_check(s)) { - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, @@ -5683,11 +4839,15 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) gen_helper_sve_ldnf1dd_be_r_mte } }, }; + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + s->is_nonstreaming = true; if (sve_access_check(s)) { int vsz = vec_full_reg_size(s); int elements = vsz >> dtype_esz[a->dtype]; int off = (a->imm * elements) << dtype_msz(a->dtype); - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, @@ -5701,8 +4861,13 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; int poff; + uint32_t desc; /* Load the first quadword using the normal predicated load helpers. */ + if (!s->mte_active[0]) { + addr = clean_data_tbi(s, addr); + } + poff = pred_full_reg_offset(s, pg); if (vsz > 16) { /* @@ -5712,24 +4877,22 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) * for this load operation. */ TCGv_i64 tmp = tcg_temp_new_i64(); -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN poff += 6; #endif - tcg_gen_ld16u_i64(tmp, cpu_env, poff); + tcg_gen_ld16u_i64(tmp, tcg_env, poff); poff = offsetof(CPUARMState, vfp.preg_tmp); - tcg_gen_st_i64(tmp, cpu_env, poff); - tcg_temp_free_i64(tmp); + tcg_gen_st_i64(tmp, tcg_env, poff); } t_pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_pg, cpu_env, poff); + tcg_gen_addi_ptr(t_pg, tcg_env, poff); gen_helper_gvec_mem *fn = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; - fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt))); - - tcg_temp_free_ptr(t_pg); + desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt); + fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); /* Replicate that first quadword. */ if (vsz > 16) { @@ -5740,12 +4903,12 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) { - if (a->rm == 31) { + if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { return false; } if (sve_access_check(s)) { int msz = dtype_msz(a->dtype); - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); do_ldrq(s, a->rd, a->pg, addr, a->dtype); @@ -5755,8 +4918,11 @@ static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a) static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (sve_access_check(s)) { - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16); do_ldrq(s, a->rd, a->pg, addr, a->dtype); } @@ -5769,6 +4935,7 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) unsigned vsz_r32; TCGv_ptr t_pg; int poff, doff; + uint32_t desc; if (vsz < 32) { /* @@ -5781,6 +4948,9 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) } /* Load the first octaword using the normal predicated load helpers. */ + if (!s->mte_active[0]) { + addr = clean_data_tbi(s, addr); + } poff = pred_full_reg_offset(s, pg); if (vsz > 32) { @@ -5791,24 +4961,22 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype) * for this load operation. */ TCGv_i64 tmp = tcg_temp_new_i64(); -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN poff += 4; #endif - tcg_gen_ld32u_i64(tmp, cpu_env, poff); + tcg_gen_ld32u_i64(tmp, tcg_env, poff); poff = offsetof(CPUARMState, vfp.preg_tmp); - tcg_gen_st_i64(tmp, cpu_env, poff); - tcg_temp_free_i64(tmp); + tcg_gen_st_i64(tmp, tcg_env, poff); } t_pg = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(t_pg, cpu_env, poff); + tcg_gen_addi_ptr(t_pg, tcg_env, poff); gen_helper_gvec_mem *fn = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0]; - fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt))); - - tcg_temp_free_ptr(t_pg); + desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt); + fn(tcg_env, t_pg, addr, tcg_constant_i32(desc)); /* * Replicate that first octaword. @@ -5834,8 +5002,9 @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a) if (a->rm == 31) { return false; } + s->is_nonstreaming = true; if (sve_access_check(s)) { - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); do_ldro(s, a->rd, a->pg, addr, a->dtype); @@ -5848,8 +5017,9 @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a) if (!dc_isar_feature(aa64_sve_f64mm, s)) { return false; } + s->is_nonstreaming = true; if (sve_access_check(s)) { - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32); do_ldro(s, a->rd, a->pg, addr, a->dtype); } @@ -5865,7 +5035,11 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) unsigned msz = dtype_msz(a->dtype); TCGLabel *over; TCGv_i64 temp, clean_addr; + MemOp memop; + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (!sve_access_check(s)) { return true; } @@ -5879,29 +5053,26 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) */ uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8); temp = tcg_temp_new_i64(); - tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg)); + tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg)); tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask); tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over); - tcg_temp_free_i64(temp); } else { TCGv_i32 t32 = tcg_temp_new_i32(); find_last_active(s, t32, esz, a->pg); tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over); - tcg_temp_free_i32(t32); } /* Load the data. */ temp = tcg_temp_new_i64(); tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); - clean_addr = gen_mte_check1(s, temp, false, true, msz); - tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), - finalize_memop(s, dtype_mop[a->dtype])); + memop = finalize_memop(s, dtype_mop[a->dtype]); + clean_addr = gen_mte_check1(s, temp, false, true, memop); + tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop); /* Broadcast to *all* elements. */ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, temp); - tcg_temp_free_i64(temp); /* Zero the inactive elements. */ gen_set_label(over); @@ -6022,23 +5193,25 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, if (nreg == 0) { /* ST1 */ fn = fn_single[s->mte_active[0]][be][msz][esz]; - nreg = 1; } else { /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ assert(msz == esz); fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (a->rm == 31 || a->msz > a->esz) { return false; } if (sve_access_check(s)) { - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg); @@ -6048,13 +5221,16 @@ static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } if (a->msz > a->esz) { return false; } if (sve_access_check(s)) { int vsz = vec_full_reg_size(s); int elements = vsz >> a->esz; - TCGv_i64 addr = new_tmp_a64(s); + TCGv_i64 addr = tcg_temp_new_i64(); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), (a->imm * elements * (a->nreg + 1)) << a->msz); @@ -6071,33 +5247,17 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale, TCGv_i64 scalar, int msz, bool is_write, gen_helper_gvec_mem_scatter *fn) { - unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_zm = tcg_temp_new_ptr(); TCGv_ptr t_pg = tcg_temp_new_ptr(); TCGv_ptr t_zt = tcg_temp_new_ptr(); - TCGv_i32 t_desc; - int desc = 0; - - if (s->mte_active[0]) { - desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); - desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); - desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); - desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); - desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1); - desc <<= SVE_MTEDESC_SHIFT; - } - desc = simd_desc(vsz, vsz, desc | scale); - t_desc = tcg_const_i32(desc); + uint32_t desc; - tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg)); - tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm)); - tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt)); - fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc); + tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg)); + tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm)); + tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt)); - tcg_temp_free_ptr(t_zt); - tcg_temp_free_ptr(t_zm); - tcg_temp_free_ptr(t_pg); - tcg_temp_free_i32(t_desc); + desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale); + fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc)); } /* Indexed by [mte][be][ff][xs][u][msz]. */ @@ -6434,6 +5594,10 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) bool be = s->be_data == MO_BE; bool mte = s->mte_active[0]; + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + s->is_nonstreaming = true; if (!sve_access_check(s)) { return true; } @@ -6458,11 +5622,14 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) gen_helper_gvec_mem_scatter *fn = NULL; bool be = s->be_data == MO_BE; bool mte = s->mte_active[0]; - TCGv_i64 imm; if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { return false; } + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + s->is_nonstreaming = true; if (!sve_access_check(s)) { return true; } @@ -6480,18 +5647,41 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x]) * by loading the immediate into the scalar parameter. */ - imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn); - tcg_temp_free_i64(imm); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, + tcg_constant_i64(a->imm << a->msz), a->msz, false, fn); return true; } static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a) { + gen_helper_gvec_mem_scatter *fn = NULL; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; + + if (a->esz < a->msz + !a->u) { + return false; + } if (!dc_isar_feature(aa64_sve2, s)) { return false; } - return trans_LD1_zprz(s, a); + s->is_nonstreaming = true; + if (!sve_access_check(s)) { + return true; + } + + switch (a->esz) { + case MO_32: + fn = gather_load_fn32[mte][be][0][0][a->u][a->msz]; + break; + case MO_64: + fn = gather_load_fn64[mte][be][0][2][a->u][a->msz]; + break; + } + assert(fn != NULL); + + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, + cpu_reg(s, a->rm), a->msz, false, fn); + return true; } /* Indexed by [mte][be][xs][msz]. */ @@ -6595,6 +5785,10 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) if (a->esz < a->msz || (a->msz == 0 && a->scale)) { return false; } + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + s->is_nonstreaming = true; if (!sve_access_check(s)) { return true; } @@ -6618,11 +5812,14 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) gen_helper_gvec_mem_scatter *fn = NULL; bool be = s->be_data == MO_BE; bool mte = s->mte_active[0]; - TCGv_i64 imm; if (a->esz < a->msz) { return false; } + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + s->is_nonstreaming = true; if (!sve_access_check(s)) { return true; } @@ -6640,18 +5837,42 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x]) * by loading the immediate into the scalar parameter. */ - imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn); - tcg_temp_free_i64(imm); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, + tcg_constant_i64(a->imm << a->msz), a->msz, true, fn); return true; } static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) { + gen_helper_gvec_mem_scatter *fn; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; + + if (a->esz < a->msz) { + return false; + } if (!dc_isar_feature(aa64_sve2, s)) { return false; } - return trans_ST1_zprz(s, a); + s->is_nonstreaming = true; + if (!sve_access_check(s)) { + return true; + } + + switch (a->esz) { + case MO_32: + fn = scatter_store_fn32[mte][be][0][a->msz]; + break; + case MO_64: + fn = scatter_store_fn64[mte][be][2][a->msz]; + break; + default: + g_assert_not_reached(); + } + + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, + cpu_reg(s, a->rm), a->msz, true, fn); + return true; } /* @@ -6660,6 +5881,9 @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a) static bool trans_PRF(DisasContext *s, arg_PRF *a) { + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } /* Prefetch is a nop within QEMU. */ (void)sve_access_check(s); return true; @@ -6667,7 +5891,7 @@ static bool trans_PRF(DisasContext *s, arg_PRF *a) static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) { - if (a->rm == 31) { + if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) { return false; } /* Prefetch is a nop within QEMU. */ @@ -6675,6 +5899,17 @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) return true; } +static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a) +{ + if (!dc_isar_feature(aa64_sve, s)) { + return false; + } + /* Prefetch is a nop within QEMU. */ + s->is_nonstreaming = true; + (void)sve_access_check(s); + return true; +} + /* * Move Prefix * @@ -6689,294 +5924,213 @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a) * In the meantime, just emit the moves. */ -static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a) -{ - return do_mov_z(s, a->rd, a->rn); -} - -static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a) -{ - if (sve_access_check(s)) { - do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz); - } - return true; -} - -static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a) -{ - return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false); -} +TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn) +TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz) +TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false) /* * SVE2 Integer Multiply - Unpredicated */ -static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm); - } - return true; -} - -static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a, - gen_helper_gvec_3 *fn) -{ - if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); - } - return true; -} +TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a) -static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, - gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, - }; - return do_sve2_zzz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const smulh_zzz_fns[4] = { + gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h, + gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d, +}; +TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + smulh_zzz_fns[a->esz], a, 0) -static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, - gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, - }; - return do_sve2_zzz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const umulh_zzz_fns[4] = { + gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h, + gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d, +}; +TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + umulh_zzz_fns[a->esz], a, 0) -static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a) -{ - return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b); -} +TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + gen_helper_gvec_pmul_b, a, 0) -static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, - gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, - }; - return do_sve2_zzz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = { + gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h, + gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d, +}; +TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + sqdmulh_zzz_fns[a->esz], a, 0) -static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, - gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, - }; - return do_sve2_zzz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = { + gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h, + gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d, +}; +TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + sqrdmulh_zzz_fns[a->esz], a, 0) /* * SVE2 Integer - Predicated */ -static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a, - gen_helper_gvec_4 *fn) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpzz_ool(s, a, fn); -} - -static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a) -{ - static gen_helper_gvec_4 * const fns[3] = { - gen_helper_sve2_sadalp_zpzz_h, - gen_helper_sve2_sadalp_zpzz_s, - gen_helper_sve2_sadalp_zpzz_d, - }; - if (a->esz == 0) { - return false; - } - return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]); -} +static gen_helper_gvec_4 * const sadlp_fns[4] = { + NULL, gen_helper_sve2_sadalp_zpzz_h, + gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d, +}; +TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, + sadlp_fns[a->esz], a, 0) -static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a) -{ - static gen_helper_gvec_4 * const fns[3] = { - gen_helper_sve2_uadalp_zpzz_h, - gen_helper_sve2_uadalp_zpzz_s, - gen_helper_sve2_uadalp_zpzz_d, - }; - if (a->esz == 0) { - return false; - } - return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]); -} +static gen_helper_gvec_4 * const uadlp_fns[4] = { + NULL, gen_helper_sve2_uadalp_zpzz_h, + gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d, +}; +TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz, + uadlp_fns[a->esz], a, 0) /* * SVE2 integer unary operations (predicated) */ -static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a, - gen_helper_gvec_3 *fn) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpz_ool(s, a, fn); -} - -static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a) -{ - if (a->esz != 2) { - return false; - } - return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s); -} - -static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a) -{ - if (a->esz != 2) { - return false; - } - return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s); -} +TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz, + a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0) -static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, - gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, - }; - return do_sve2_zpz_ool(s, a, fns[a->esz]); -} +TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz, + a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0) -static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, - gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, - }; - return do_sve2_zpz_ool(s, a, fns[a->esz]); -} +static gen_helper_gvec_3 * const sqabs_fns[4] = { + gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h, + gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d, +}; +TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0) -#define DO_SVE2_ZPZZ(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_4 * const fns[4] = { \ - gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \ - gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \ - }; \ - return do_sve2_zpzz_ool(s, a, fns[a->esz]); \ -} +static gen_helper_gvec_3 * const sqneg_fns[4] = { + gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h, + gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d, +}; +TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0) -DO_SVE2_ZPZZ(SQSHL, sqshl) -DO_SVE2_ZPZZ(SQRSHL, sqrshl) -DO_SVE2_ZPZZ(SRSHL, srshl) +DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl) +DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl) +DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl) -DO_SVE2_ZPZZ(UQSHL, uqshl) -DO_SVE2_ZPZZ(UQRSHL, uqrshl) -DO_SVE2_ZPZZ(URSHL, urshl) +DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl) +DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl) +DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl) -DO_SVE2_ZPZZ(SHADD, shadd) -DO_SVE2_ZPZZ(SRHADD, srhadd) -DO_SVE2_ZPZZ(SHSUB, shsub) +DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd) +DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd) +DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub) -DO_SVE2_ZPZZ(UHADD, uhadd) -DO_SVE2_ZPZZ(URHADD, urhadd) -DO_SVE2_ZPZZ(UHSUB, uhsub) +DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd) +DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd) +DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub) -DO_SVE2_ZPZZ(ADDP, addp) -DO_SVE2_ZPZZ(SMAXP, smaxp) -DO_SVE2_ZPZZ(UMAXP, umaxp) -DO_SVE2_ZPZZ(SMINP, sminp) -DO_SVE2_ZPZZ(UMINP, uminp) +DO_ZPZZ(ADDP, aa64_sve2, sve2_addp) +DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp) +DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp) +DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp) +DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp) -DO_SVE2_ZPZZ(SQADD_zpzz, sqadd) -DO_SVE2_ZPZZ(UQADD_zpzz, uqadd) -DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub) -DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub) -DO_SVE2_ZPZZ(SUQADD, suqadd) -DO_SVE2_ZPZZ(USQADD, usqadd) +DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd) +DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd) +DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub) +DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub) +DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd) +DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd) /* * SVE2 Widening Integer Arithmetic */ -static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a, - gen_helper_gvec_3 *fn, int data) -{ - if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vsz, vsz, data, fn); - } - return true; -} - -#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \ -static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \ -{ \ - static gen_helper_gvec_3 * const fns[4] = { \ - NULL, gen_helper_sve2_##name##_h, \ - gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ - }; \ - return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1); \ -} - -DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false) -DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false) -DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false) - -DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false) -DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false) -DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false) - -DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true) -DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true) -DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true) - -DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true) -DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true) -DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true) - -DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true) -DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true) -DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false) - -DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false) -DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true) - -DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false) -DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true) - -DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false) -DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true) - -static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, - gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, - }; - return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1); -} - -static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a) -{ - return do_eor_tb(s, a, false); -} - -static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a) -{ - return do_eor_tb(s, a, true); -} +static gen_helper_gvec_3 * const saddl_fns[4] = { + NULL, gen_helper_sve2_saddl_h, + gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d, +}; +TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, + saddl_fns[a->esz], a, 0) +TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, + saddl_fns[a->esz], a, 3) +TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz, + saddl_fns[a->esz], a, 2) + +static gen_helper_gvec_3 * const ssubl_fns[4] = { + NULL, gen_helper_sve2_ssubl_h, + gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d, +}; +TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, + ssubl_fns[a->esz], a, 0) +TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, + ssubl_fns[a->esz], a, 3) +TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz, + ssubl_fns[a->esz], a, 2) +TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz, + ssubl_fns[a->esz], a, 1) + +static gen_helper_gvec_3 * const sabdl_fns[4] = { + NULL, gen_helper_sve2_sabdl_h, + gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d, +}; +TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, + sabdl_fns[a->esz], a, 0) +TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, + sabdl_fns[a->esz], a, 3) + +static gen_helper_gvec_3 * const uaddl_fns[4] = { + NULL, gen_helper_sve2_uaddl_h, + gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d, +}; +TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz, + uaddl_fns[a->esz], a, 0) +TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz, + uaddl_fns[a->esz], a, 3) + +static gen_helper_gvec_3 * const usubl_fns[4] = { + NULL, gen_helper_sve2_usubl_h, + gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d, +}; +TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz, + usubl_fns[a->esz], a, 0) +TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz, + usubl_fns[a->esz], a, 3) + +static gen_helper_gvec_3 * const uabdl_fns[4] = { + NULL, gen_helper_sve2_uabdl_h, + gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d, +}; +TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz, + uabdl_fns[a->esz], a, 0) +TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz, + uabdl_fns[a->esz], a, 3) + +static gen_helper_gvec_3 * const sqdmull_fns[4] = { + NULL, gen_helper_sve2_sqdmull_zzz_h, + gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d, +}; +TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + sqdmull_fns[a->esz], a, 0) +TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + sqdmull_fns[a->esz], a, 3) + +static gen_helper_gvec_3 * const smull_fns[4] = { + NULL, gen_helper_sve2_smull_zzz_h, + gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d, +}; +TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + smull_fns[a->esz], a, 0) +TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + smull_fns[a->esz], a, 3) + +static gen_helper_gvec_3 * const umull_fns[4] = { + NULL, gen_helper_sve2_umull_zzz_h, + gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d, +}; +TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + umull_fns[a->esz], a, 0) +TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz, + umull_fns[a->esz], a, 3) + +static gen_helper_gvec_3 * const eoril_fns[4] = { + gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h, + gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d, +}; +TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2) +TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1) static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) { @@ -6984,41 +6138,48 @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel) gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h, NULL, gen_helper_sve2_pmull_d, }; - if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) { + + if (a->esz == 0) { + if (!dc_isar_feature(aa64_sve2_pmull128, s)) { + return false; + } + s->is_nonstreaming = true; + } else if (!dc_isar_feature(aa64_sve, s)) { return false; } - return do_sve2_zzw_ool(s, a, fns[a->esz], sel); + return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel); } -static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a) -{ - return do_trans_pmull(s, a, false); -} +TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false) +TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true) -static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a) -{ - return do_trans_pmull(s, a, true); -} +static gen_helper_gvec_3 * const saddw_fns[4] = { + NULL, gen_helper_sve2_saddw_h, + gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d, +}; +TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0) +TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1) -#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \ -static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \ -{ \ - static gen_helper_gvec_3 * const fns[4] = { \ - NULL, gen_helper_sve2_##name##_h, \ - gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ - }; \ - return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2); \ -} +static gen_helper_gvec_3 * const ssubw_fns[4] = { + NULL, gen_helper_sve2_ssubw_h, + gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d, +}; +TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0) +TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1) -DO_SVE2_ZZZ_WTB(SADDWB, saddw, false) -DO_SVE2_ZZZ_WTB(SADDWT, saddw, true) -DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false) -DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true) +static gen_helper_gvec_3 * const uaddw_fns[4] = { + NULL, gen_helper_sve2_uaddw_h, + gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d, +}; +TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0) +TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1) -DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false) -DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true) -DO_SVE2_ZZZ_WTB(USUBWB, usubw, false) -DO_SVE2_ZZZ_WTB(USUBWT, usubw, true) +static gen_helper_gvec_3 * const usubw_fns[4] = { + NULL, gen_helper_sve2_usubw_h, + gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d, +}; +TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0) +TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1) static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) { @@ -7031,7 +6192,6 @@ static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) TCGv_vec t = tcg_temp_new_vec_matching(d); tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); tcg_gen_and_vec(vece, d, n, t); - tcg_temp_free_vec(t); } else { tcg_gen_sari_vec(vece, d, n, halfbits); tcg_gen_shli_vec(vece, d, d, shl); @@ -7089,7 +6249,6 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) TCGv_vec t = tcg_temp_new_vec_matching(d); tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits)); tcg_gen_and_vec(vece, d, n, t); - tcg_temp_free_vec(t); } else { tcg_gen_shri_vec(vece, d, n, halfbits); tcg_gen_shli_vec(vece, d, d, shl); @@ -7099,7 +6258,6 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) TCGv_vec t = tcg_temp_new_vec_matching(d); tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); tcg_gen_and_vec(vece, d, n, t); - tcg_temp_free_vec(t); } else { tcg_gen_shli_vec(vece, d, n, halfbits); tcg_gen_shri_vec(vece, d, d, halfbits - shl); @@ -7107,46 +6265,11 @@ static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm) } } -static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a, - bool sel, bool uns) +static bool do_shll_tb(DisasContext *s, arg_rri_esz *a, + const GVecGen2i ops[3], bool sel) { - static const TCGOpcode sshll_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, 0 - }; - static const TCGOpcode ushll_list[] = { - INDEX_op_shli_vec, INDEX_op_shri_vec, 0 - }; - static const GVecGen2i ops[2][3] = { - { { .fniv = gen_sshll_vec, - .opt_opc = sshll_list, - .fno = gen_helper_sve2_sshll_h, - .vece = MO_16 }, - { .fniv = gen_sshll_vec, - .opt_opc = sshll_list, - .fno = gen_helper_sve2_sshll_s, - .vece = MO_32 }, - { .fniv = gen_sshll_vec, - .opt_opc = sshll_list, - .fno = gen_helper_sve2_sshll_d, - .vece = MO_64 } }, - { { .fni8 = gen_ushll16_i64, - .fniv = gen_ushll_vec, - .opt_opc = ushll_list, - .fno = gen_helper_sve2_ushll_h, - .vece = MO_16 }, - { .fni8 = gen_ushll32_i64, - .fniv = gen_ushll_vec, - .opt_opc = ushll_list, - .fno = gen_helper_sve2_ushll_s, - .vece = MO_32 }, - { .fni8 = gen_ushll64_i64, - .fniv = gen_ushll_vec, - .opt_opc = ushll_list, - .fno = gen_helper_sve2_ushll_d, - .vece = MO_64 } }, - }; - if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) { + if (a->esz < 0 || a->esz > 2) { return false; } if (sve_access_check(s)) { @@ -7154,140 +6277,106 @@ static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a, tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd), vec_full_reg_offset(s, a->rn), vsz, vsz, (a->imm << 1) | sel, - &ops[uns][a->esz]); + &ops[a->esz]); } return true; } -static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_shll_tb(s, a, false, false); -} - -static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_shll_tb(s, a, true, false); -} - -static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_shll_tb(s, a, false, true); -} - -static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_shll_tb(s, a, true, true); -} - -static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, - gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, - }; - if (!dc_isar_feature(aa64_sve2_bitperm, s)) { - return false; - } - return do_sve2_zzw_ool(s, a, fns[a->esz], 0); -} - -static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, - gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, - }; - if (!dc_isar_feature(aa64_sve2_bitperm, s)) { - return false; - } - return do_sve2_zzw_ool(s, a, fns[a->esz], 0); -} - -static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, - gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, - }; - if (!dc_isar_feature(aa64_sve2_bitperm, s)) { - return false; - } - return do_sve2_zzw_ool(s, a, fns[a->esz], 0); -} - -static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot) -{ - static gen_helper_gvec_3 * const fns[2][4] = { - { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, - gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d }, - { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, - gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d }, - }; - return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot); -} - -static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a) -{ - return do_cadd(s, a, false, false); -} - -static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a) -{ - return do_cadd(s, a, false, true); -} - -static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a) -{ - return do_cadd(s, a, true, false); -} - -static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a) -{ - return do_cadd(s, a, true, true); -} +static const TCGOpcode sshll_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, 0 +}; +static const GVecGen2i sshll_ops[3] = { + { .fniv = gen_sshll_vec, + .opt_opc = sshll_list, + .fno = gen_helper_sve2_sshll_h, + .vece = MO_16 }, + { .fniv = gen_sshll_vec, + .opt_opc = sshll_list, + .fno = gen_helper_sve2_sshll_s, + .vece = MO_32 }, + { .fniv = gen_sshll_vec, + .opt_opc = sshll_list, + .fno = gen_helper_sve2_sshll_d, + .vece = MO_64 } +}; +TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false) +TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true) -static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a, - gen_helper_gvec_4 *fn, int data) -{ - if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); - } - return true; -} +static const TCGOpcode ushll_list[] = { + INDEX_op_shli_vec, INDEX_op_shri_vec, 0 +}; +static const GVecGen2i ushll_ops[3] = { + { .fni8 = gen_ushll16_i64, + .fniv = gen_ushll_vec, + .opt_opc = ushll_list, + .fno = gen_helper_sve2_ushll_h, + .vece = MO_16 }, + { .fni8 = gen_ushll32_i64, + .fniv = gen_ushll_vec, + .opt_opc = ushll_list, + .fno = gen_helper_sve2_ushll_s, + .vece = MO_32 }, + { .fni8 = gen_ushll64_i64, + .fniv = gen_ushll_vec, + .opt_opc = ushll_list, + .fno = gen_helper_sve2_ushll_d, + .vece = MO_64 }, +}; +TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false) +TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true) -static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel) -{ - static gen_helper_gvec_4 * const fns[2][4] = { - { NULL, gen_helper_sve2_sabal_h, - gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d }, - { NULL, gen_helper_sve2_uabal_h, - gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d }, - }; - return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel); -} +static gen_helper_gvec_3 * const bext_fns[4] = { + gen_helper_sve2_bext_b, gen_helper_sve2_bext_h, + gen_helper_sve2_bext_s, gen_helper_sve2_bext_d, +}; +TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, + bext_fns[a->esz], a, 0) -static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a) -{ - return do_abal(s, a, false, false); -} +static gen_helper_gvec_3 * const bdep_fns[4] = { + gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h, + gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d, +}; +TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, + bdep_fns[a->esz], a, 0) -static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a) -{ - return do_abal(s, a, false, true); -} +static gen_helper_gvec_3 * const bgrp_fns[4] = { + gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h, + gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d, +}; +TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz, + bgrp_fns[a->esz], a, 0) -static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a) -{ - return do_abal(s, a, true, false); -} +static gen_helper_gvec_3 * const cadd_fns[4] = { + gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h, + gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d, +}; +TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, + cadd_fns[a->esz], a, 0) +TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, + cadd_fns[a->esz], a, 1) + +static gen_helper_gvec_3 * const sqcadd_fns[4] = { + gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h, + gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d, +}; +TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz, + sqcadd_fns[a->esz], a, 0) +TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz, + sqcadd_fns[a->esz], a, 1) + +static gen_helper_gvec_4 * const sabal_fns[4] = { + NULL, gen_helper_sve2_sabal_h, + gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d, +}; +TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0) +TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1) -static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a) -{ - return do_abal(s, a, true, true); -} +static gen_helper_gvec_4 * const uabal_fns[4] = { + NULL, gen_helper_sve2_uabal_h, + gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d, +}; +TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0) +TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1) static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) { @@ -7299,89 +6388,26 @@ static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel) * Note that in this case the ESZ field encodes both size and sign. * Split out 'subtract' into bit 1 of the data field for the helper. */ - return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel); + return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel); } -static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a) -{ - return do_adcl(s, a, false); -} +TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false) +TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true) -static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a) -{ - return do_adcl(s, a, true); -} +TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a) +TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a) +TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a) +TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a) +TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a) +TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a) -static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn) -{ - if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - unsigned rd_ofs = vec_full_reg_offset(s, a->rd); - unsigned rn_ofs = vec_full_reg_offset(s, a->rn); - fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz); - } - return true; -} - -static bool trans_SSRA(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_fn2i(s, a, gen_gvec_ssra); -} - -static bool trans_USRA(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_fn2i(s, a, gen_gvec_usra); -} +TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a) +TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a) -static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a) +static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a, + const GVecGen2 ops[3]) { - return do_sve2_fn2i(s, a, gen_gvec_srsra); -} - -static bool trans_URSRA(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_fn2i(s, a, gen_gvec_ursra); -} - -static bool trans_SRI(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_fn2i(s, a, gen_gvec_sri); -} - -static bool trans_SLI(DisasContext *s, arg_rri_esz *a) -{ - return do_sve2_fn2i(s, a, gen_gvec_sli); -} - -static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm); - } - return true; -} - -static bool trans_SABA(DisasContext *s, arg_rrr_esz *a) -{ - return do_sve2_fn_zzz(s, a, gen_gvec_saba); -} - -static bool trans_UABA(DisasContext *s, arg_rrr_esz *a) -{ - return do_sve2_fn_zzz(s, a, gen_gvec_uaba); -} - -static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a, - const GVecGen2 ops[3]) -{ - if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 || - !dc_isar_feature(aa64_sve2, s)) { + if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) { return false; } if (sve_access_check(s)) { @@ -7411,27 +6437,23 @@ static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) tcg_gen_smin_vec(vece, d, d, t); tcg_gen_dupi_vec(vece, t, mask); tcg_gen_and_vec(vece, d, d, t); - tcg_temp_free_vec(t); } -static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2 ops[3] = { - { .fniv = gen_sqxtnb_vec, - .opt_opc = sqxtn_list, - .fno = gen_helper_sve2_sqxtnb_h, - .vece = MO_16 }, - { .fniv = gen_sqxtnb_vec, - .opt_opc = sqxtn_list, - .fno = gen_helper_sve2_sqxtnb_s, - .vece = MO_32 }, - { .fniv = gen_sqxtnb_vec, - .opt_opc = sqxtn_list, - .fno = gen_helper_sve2_sqxtnb_d, - .vece = MO_64 }, - }; - return do_sve2_narrow_extract(s, a, ops); -} +static const GVecGen2 sqxtnb_ops[3] = { + { .fniv = gen_sqxtnb_vec, + .opt_opc = sqxtn_list, + .fno = gen_helper_sve2_sqxtnb_h, + .vece = MO_16 }, + { .fniv = gen_sqxtnb_vec, + .opt_opc = sqxtn_list, + .fno = gen_helper_sve2_sqxtnb_s, + .vece = MO_32 }, + { .fniv = gen_sqxtnb_vec, + .opt_opc = sqxtn_list, + .fno = gen_helper_sve2_sqxtnb_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops) static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { @@ -7448,30 +6470,26 @@ static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) tcg_gen_shli_vec(vece, n, n, halfbits); tcg_gen_dupi_vec(vece, t, mask); tcg_gen_bitsel_vec(vece, d, t, d, n); - tcg_temp_free_vec(t); } -static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2 ops[3] = { - { .fniv = gen_sqxtnt_vec, - .opt_opc = sqxtn_list, - .load_dest = true, - .fno = gen_helper_sve2_sqxtnt_h, - .vece = MO_16 }, - { .fniv = gen_sqxtnt_vec, - .opt_opc = sqxtn_list, - .load_dest = true, - .fno = gen_helper_sve2_sqxtnt_s, - .vece = MO_32 }, - { .fniv = gen_sqxtnt_vec, - .opt_opc = sqxtn_list, - .load_dest = true, - .fno = gen_helper_sve2_sqxtnt_d, - .vece = MO_64 }, - }; - return do_sve2_narrow_extract(s, a, ops); -} +static const GVecGen2 sqxtnt_ops[3] = { + { .fniv = gen_sqxtnt_vec, + .opt_opc = sqxtn_list, + .load_dest = true, + .fno = gen_helper_sve2_sqxtnt_h, + .vece = MO_16 }, + { .fniv = gen_sqxtnt_vec, + .opt_opc = sqxtn_list, + .load_dest = true, + .fno = gen_helper_sve2_sqxtnt_s, + .vece = MO_32 }, + { .fniv = gen_sqxtnt_vec, + .opt_opc = sqxtn_list, + .load_dest = true, + .fno = gen_helper_sve2_sqxtnt_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops) static const TCGOpcode uqxtn_list[] = { INDEX_op_shli_vec, INDEX_op_umin_vec, 0 @@ -7485,27 +6503,23 @@ static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) tcg_gen_dupi_vec(vece, t, max); tcg_gen_umin_vec(vece, d, n, t); - tcg_temp_free_vec(t); } -static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2 ops[3] = { - { .fniv = gen_uqxtnb_vec, - .opt_opc = uqxtn_list, - .fno = gen_helper_sve2_uqxtnb_h, - .vece = MO_16 }, - { .fniv = gen_uqxtnb_vec, - .opt_opc = uqxtn_list, - .fno = gen_helper_sve2_uqxtnb_s, - .vece = MO_32 }, - { .fniv = gen_uqxtnb_vec, - .opt_opc = uqxtn_list, - .fno = gen_helper_sve2_uqxtnb_d, - .vece = MO_64 }, - }; - return do_sve2_narrow_extract(s, a, ops); -} +static const GVecGen2 uqxtnb_ops[3] = { + { .fniv = gen_uqxtnb_vec, + .opt_opc = uqxtn_list, + .fno = gen_helper_sve2_uqxtnb_h, + .vece = MO_16 }, + { .fniv = gen_uqxtnb_vec, + .opt_opc = uqxtn_list, + .fno = gen_helper_sve2_uqxtnb_s, + .vece = MO_32 }, + { .fniv = gen_uqxtnb_vec, + .opt_opc = uqxtn_list, + .fno = gen_helper_sve2_uqxtnb_d, + .vece = MO_64 }, +}; +TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops) static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { @@ -7517,30 +6531,26 @@ static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) tcg_gen_umin_vec(vece, n, n, t); tcg_gen_shli_vec(vece, n, n, halfbits); tcg_gen_bitsel_vec(vece, d, t, d, n); - tcg_temp_free_vec(t); } -static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2 ops[3] = { - { .fniv = gen_uqxtnt_vec, - .opt_opc = uqxtn_list, - .load_dest = true, - .fno = gen_helper_sve2_uqxtnt_h, - .vece = MO_16 }, - { .fniv = gen_uqxtnt_vec, - .opt_opc = uqxtn_list, - .load_dest = true, - .fno = gen_helper_sve2_uqxtnt_s, - .vece = MO_32 }, - { .fniv = gen_uqxtnt_vec, - .opt_opc = uqxtn_list, - .load_dest = true, - .fno = gen_helper_sve2_uqxtnt_d, - .vece = MO_64 }, - }; - return do_sve2_narrow_extract(s, a, ops); -} +static const GVecGen2 uqxtnt_ops[3] = { + { .fniv = gen_uqxtnt_vec, + .opt_opc = uqxtn_list, + .load_dest = true, + .fno = gen_helper_sve2_uqxtnt_h, + .vece = MO_16 }, + { .fniv = gen_uqxtnt_vec, + .opt_opc = uqxtn_list, + .load_dest = true, + .fno = gen_helper_sve2_uqxtnt_s, + .vece = MO_32 }, + { .fniv = gen_uqxtnt_vec, + .opt_opc = uqxtn_list, + .load_dest = true, + .fno = gen_helper_sve2_uqxtnt_d, + .vece = MO_64 }, +}; +TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops) static const TCGOpcode sqxtun_list[] = { INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0 @@ -7556,27 +6566,23 @@ static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n) tcg_gen_smax_vec(vece, d, n, t); tcg_gen_dupi_vec(vece, t, max); tcg_gen_umin_vec(vece, d, d, t); - tcg_temp_free_vec(t); } -static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2 ops[3] = { - { .fniv = gen_sqxtunb_vec, - .opt_opc = sqxtun_list, - .fno = gen_helper_sve2_sqxtunb_h, - .vece = MO_16 }, - { .fniv = gen_sqxtunb_vec, - .opt_opc = sqxtun_list, - .fno = gen_helper_sve2_sqxtunb_s, - .vece = MO_32 }, - { .fniv = gen_sqxtunb_vec, - .opt_opc = sqxtun_list, - .fno = gen_helper_sve2_sqxtunb_d, - .vece = MO_64 }, - }; - return do_sve2_narrow_extract(s, a, ops); -} +static const GVecGen2 sqxtunb_ops[3] = { + { .fniv = gen_sqxtunb_vec, + .opt_opc = sqxtun_list, + .fno = gen_helper_sve2_sqxtunb_h, + .vece = MO_16 }, + { .fniv = gen_sqxtunb_vec, + .opt_opc = sqxtun_list, + .fno = gen_helper_sve2_sqxtunb_s, + .vece = MO_32 }, + { .fniv = gen_sqxtunb_vec, + .opt_opc = sqxtun_list, + .fno = gen_helper_sve2_sqxtunb_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops) static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) { @@ -7590,35 +6596,31 @@ static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n) tcg_gen_umin_vec(vece, n, n, t); tcg_gen_shli_vec(vece, n, n, halfbits); tcg_gen_bitsel_vec(vece, d, t, d, n); - tcg_temp_free_vec(t); } -static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2 ops[3] = { - { .fniv = gen_sqxtunt_vec, - .opt_opc = sqxtun_list, - .load_dest = true, - .fno = gen_helper_sve2_sqxtunt_h, - .vece = MO_16 }, - { .fniv = gen_sqxtunt_vec, - .opt_opc = sqxtun_list, - .load_dest = true, - .fno = gen_helper_sve2_sqxtunt_s, - .vece = MO_32 }, - { .fniv = gen_sqxtunt_vec, - .opt_opc = sqxtun_list, - .load_dest = true, - .fno = gen_helper_sve2_sqxtunt_d, - .vece = MO_64 }, - }; - return do_sve2_narrow_extract(s, a, ops); -} +static const GVecGen2 sqxtunt_ops[3] = { + { .fniv = gen_sqxtunt_vec, + .opt_opc = sqxtun_list, + .load_dest = true, + .fno = gen_helper_sve2_sqxtunt_h, + .vece = MO_16 }, + { .fniv = gen_sqxtunt_vec, + .opt_opc = sqxtun_list, + .load_dest = true, + .fno = gen_helper_sve2_sqxtunt_s, + .vece = MO_32 }, + { .fniv = gen_sqxtunt_vec, + .opt_opc = sqxtun_list, + .load_dest = true, + .fno = gen_helper_sve2_sqxtunt_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops) -static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a, - const GVecGen2i ops[3]) +static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a, + const GVecGen2i ops[3]) { - if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) { + if (a->esz < 0 || a->esz > MO_32) { return false; } assert(a->imm > 0 && a->imm <= (8 << a->esz)); @@ -7664,31 +6666,27 @@ static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) tcg_gen_shri_vec(vece, n, n, shr); tcg_gen_dupi_vec(vece, t, mask); tcg_gen_and_vec(vece, d, n, t); - tcg_temp_free_vec(t); } -static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a) -{ - static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 }; - static const GVecGen2i ops[3] = { - { .fni8 = gen_shrnb16_i64, - .fniv = gen_shrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_shrnb_h, - .vece = MO_16 }, - { .fni8 = gen_shrnb32_i64, - .fniv = gen_shrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_shrnb_s, - .vece = MO_32 }, - { .fni8 = gen_shrnb64_i64, - .fniv = gen_shrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_shrnb_d, - .vece = MO_64 }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 }; +static const GVecGen2i shrnb_ops[3] = { + { .fni8 = gen_shrnb16_i64, + .fniv = gen_shrnb_vec, + .opt_opc = shrnb_vec_list, + .fno = gen_helper_sve2_shrnb_h, + .vece = MO_16 }, + { .fni8 = gen_shrnb32_i64, + .fniv = gen_shrnb_vec, + .opt_opc = shrnb_vec_list, + .fno = gen_helper_sve2_shrnb_s, + .vece = MO_32 }, + { .fni8 = gen_shrnb64_i64, + .fniv = gen_shrnb_vec, + .opt_opc = shrnb_vec_list, + .fno = gen_helper_sve2_shrnb_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops) static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr) { @@ -7726,54 +6724,44 @@ static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) tcg_gen_shli_vec(vece, n, n, halfbits - shr); tcg_gen_dupi_vec(vece, t, mask); tcg_gen_bitsel_vec(vece, d, t, d, n); - tcg_temp_free_vec(t); } -static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a) -{ - static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 }; - static const GVecGen2i ops[3] = { - { .fni8 = gen_shrnt16_i64, - .fniv = gen_shrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_shrnt_h, - .vece = MO_16 }, - { .fni8 = gen_shrnt32_i64, - .fniv = gen_shrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_shrnt_s, - .vece = MO_32 }, - { .fni8 = gen_shrnt64_i64, - .fniv = gen_shrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_shrnt_d, - .vece = MO_64 }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 }; +static const GVecGen2i shrnt_ops[3] = { + { .fni8 = gen_shrnt16_i64, + .fniv = gen_shrnt_vec, + .opt_opc = shrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_shrnt_h, + .vece = MO_16 }, + { .fni8 = gen_shrnt32_i64, + .fniv = gen_shrnt_vec, + .opt_opc = shrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_shrnt_s, + .vece = MO_32 }, + { .fni8 = gen_shrnt64_i64, + .fniv = gen_shrnt_vec, + .opt_opc = shrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_shrnt_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops) -static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2i ops[3] = { - { .fno = gen_helper_sve2_rshrnb_h }, - { .fno = gen_helper_sve2_rshrnb_s }, - { .fno = gen_helper_sve2_rshrnb_d }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const GVecGen2i rshrnb_ops[3] = { + { .fno = gen_helper_sve2_rshrnb_h }, + { .fno = gen_helper_sve2_rshrnb_s }, + { .fno = gen_helper_sve2_rshrnb_d }, +}; +TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops) -static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2i ops[3] = { - { .fno = gen_helper_sve2_rshrnt_h }, - { .fno = gen_helper_sve2_rshrnt_s }, - { .fno = gen_helper_sve2_rshrnt_d }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const GVecGen2i rshrnt_ops[3] = { + { .fno = gen_helper_sve2_rshrnt_h }, + { .fno = gen_helper_sve2_rshrnt_s }, + { .fno = gen_helper_sve2_rshrnt_d }, +}; +TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops) static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) @@ -7786,30 +6774,26 @@ static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d, tcg_gen_smax_vec(vece, n, n, t); tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); tcg_gen_umin_vec(vece, d, n, t); - tcg_temp_free_vec(t); } -static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a) -{ - static const TCGOpcode vec_list[] = { - INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 - }; - static const GVecGen2i ops[3] = { - { .fniv = gen_sqshrunb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_sqshrunb_h, - .vece = MO_16 }, - { .fniv = gen_sqshrunb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_sqshrunb_s, - .vece = MO_32 }, - { .fniv = gen_sqshrunb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_sqshrunb_d, - .vece = MO_64 }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const TCGOpcode sqshrunb_vec_list[] = { + INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0 +}; +static const GVecGen2i sqshrunb_ops[3] = { + { .fniv = gen_sqshrunb_vec, + .opt_opc = sqshrunb_vec_list, + .fno = gen_helper_sve2_sqshrunb_h, + .vece = MO_16 }, + { .fniv = gen_sqshrunb_vec, + .opt_opc = sqshrunb_vec_list, + .fno = gen_helper_sve2_sqshrunb_s, + .vece = MO_32 }, + { .fniv = gen_sqshrunb_vec, + .opt_opc = sqshrunb_vec_list, + .fno = gen_helper_sve2_sqshrunb_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops) static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) @@ -7824,54 +6808,44 @@ static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d, tcg_gen_umin_vec(vece, n, n, t); tcg_gen_shli_vec(vece, n, n, halfbits); tcg_gen_bitsel_vec(vece, d, t, d, n); - tcg_temp_free_vec(t); } -static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a) -{ - static const TCGOpcode vec_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, - INDEX_op_smax_vec, INDEX_op_umin_vec, 0 - }; - static const GVecGen2i ops[3] = { - { .fniv = gen_sqshrunt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_sqshrunt_h, - .vece = MO_16 }, - { .fniv = gen_sqshrunt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_sqshrunt_s, - .vece = MO_32 }, - { .fniv = gen_sqshrunt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_sqshrunt_d, - .vece = MO_64 }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const TCGOpcode sqshrunt_vec_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, + INDEX_op_smax_vec, INDEX_op_umin_vec, 0 +}; +static const GVecGen2i sqshrunt_ops[3] = { + { .fniv = gen_sqshrunt_vec, + .opt_opc = sqshrunt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_sqshrunt_h, + .vece = MO_16 }, + { .fniv = gen_sqshrunt_vec, + .opt_opc = sqshrunt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_sqshrunt_s, + .vece = MO_32 }, + { .fniv = gen_sqshrunt_vec, + .opt_opc = sqshrunt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_sqshrunt_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops) -static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2i ops[3] = { - { .fno = gen_helper_sve2_sqrshrunb_h }, - { .fno = gen_helper_sve2_sqrshrunb_s }, - { .fno = gen_helper_sve2_sqrshrunb_d }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const GVecGen2i sqrshrunb_ops[3] = { + { .fno = gen_helper_sve2_sqrshrunb_h }, + { .fno = gen_helper_sve2_sqrshrunb_s }, + { .fno = gen_helper_sve2_sqrshrunb_d }, +}; +TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops) -static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2i ops[3] = { - { .fno = gen_helper_sve2_sqrshrunt_h }, - { .fno = gen_helper_sve2_sqrshrunt_s }, - { .fno = gen_helper_sve2_sqrshrunt_d }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const GVecGen2i sqrshrunt_ops[3] = { + { .fno = gen_helper_sve2_sqrshrunt_h }, + { .fno = gen_helper_sve2_sqrshrunt_s }, + { .fno = gen_helper_sve2_sqrshrunt_d }, +}; +TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops) static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) @@ -7888,30 +6862,26 @@ static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d, tcg_gen_smin_vec(vece, n, n, t); tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); tcg_gen_and_vec(vece, d, n, t); - tcg_temp_free_vec(t); } -static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a) -{ - static const TCGOpcode vec_list[] = { - INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 - }; - static const GVecGen2i ops[3] = { - { .fniv = gen_sqshrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_sqshrnb_h, - .vece = MO_16 }, - { .fniv = gen_sqshrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_sqshrnb_s, - .vece = MO_32 }, - { .fniv = gen_sqshrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_sqshrnb_d, - .vece = MO_64 }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const TCGOpcode sqshrnb_vec_list[] = { + INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0 +}; +static const GVecGen2i sqshrnb_ops[3] = { + { .fniv = gen_sqshrnb_vec, + .opt_opc = sqshrnb_vec_list, + .fno = gen_helper_sve2_sqshrnb_h, + .vece = MO_16 }, + { .fniv = gen_sqshrnb_vec, + .opt_opc = sqshrnb_vec_list, + .fno = gen_helper_sve2_sqshrnb_s, + .vece = MO_32 }, + { .fniv = gen_sqshrnb_vec, + .opt_opc = sqshrnb_vec_list, + .fno = gen_helper_sve2_sqshrnb_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops) static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) @@ -7929,54 +6899,44 @@ static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d, tcg_gen_shli_vec(vece, n, n, halfbits); tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); tcg_gen_bitsel_vec(vece, d, t, d, n); - tcg_temp_free_vec(t); } -static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a) -{ - static const TCGOpcode vec_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, - INDEX_op_smax_vec, INDEX_op_smin_vec, 0 - }; - static const GVecGen2i ops[3] = { - { .fniv = gen_sqshrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_sqshrnt_h, - .vece = MO_16 }, - { .fniv = gen_sqshrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_sqshrnt_s, - .vece = MO_32 }, - { .fniv = gen_sqshrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_sqshrnt_d, - .vece = MO_64 }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const TCGOpcode sqshrnt_vec_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, + INDEX_op_smax_vec, INDEX_op_smin_vec, 0 +}; +static const GVecGen2i sqshrnt_ops[3] = { + { .fniv = gen_sqshrnt_vec, + .opt_opc = sqshrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_sqshrnt_h, + .vece = MO_16 }, + { .fniv = gen_sqshrnt_vec, + .opt_opc = sqshrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_sqshrnt_s, + .vece = MO_32 }, + { .fniv = gen_sqshrnt_vec, + .opt_opc = sqshrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_sqshrnt_d, + .vece = MO_64 }, +}; +TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops) -static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2i ops[3] = { - { .fno = gen_helper_sve2_sqrshrnb_h }, - { .fno = gen_helper_sve2_sqrshrnb_s }, - { .fno = gen_helper_sve2_sqrshrnb_d }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const GVecGen2i sqrshrnb_ops[3] = { + { .fno = gen_helper_sve2_sqrshrnb_h }, + { .fno = gen_helper_sve2_sqrshrnb_s }, + { .fno = gen_helper_sve2_sqrshrnb_d }, +}; +TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops) -static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2i ops[3] = { - { .fno = gen_helper_sve2_sqrshrnt_h }, - { .fno = gen_helper_sve2_sqrshrnt_s }, - { .fno = gen_helper_sve2_sqrshrnt_d }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const GVecGen2i sqrshrnt_ops[3] = { + { .fno = gen_helper_sve2_sqrshrnt_h }, + { .fno = gen_helper_sve2_sqrshrnt_s }, + { .fno = gen_helper_sve2_sqrshrnt_d }, +}; +TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops) static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) @@ -7987,30 +6947,26 @@ static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d, tcg_gen_shri_vec(vece, n, n, shr); tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits)); tcg_gen_umin_vec(vece, d, n, t); - tcg_temp_free_vec(t); } -static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a) -{ - static const TCGOpcode vec_list[] = { - INDEX_op_shri_vec, INDEX_op_umin_vec, 0 - }; - static const GVecGen2i ops[3] = { - { .fniv = gen_uqshrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_uqshrnb_h, - .vece = MO_16 }, - { .fniv = gen_uqshrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_uqshrnb_s, - .vece = MO_32 }, - { .fniv = gen_uqshrnb_vec, - .opt_opc = vec_list, - .fno = gen_helper_sve2_uqshrnb_d, - .vece = MO_64 }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const TCGOpcode uqshrnb_vec_list[] = { + INDEX_op_shri_vec, INDEX_op_umin_vec, 0 +}; +static const GVecGen2i uqshrnb_ops[3] = { + { .fniv = gen_uqshrnb_vec, + .opt_opc = uqshrnb_vec_list, + .fno = gen_helper_sve2_uqshrnb_h, + .vece = MO_16 }, + { .fniv = gen_uqshrnb_vec, + .opt_opc = uqshrnb_vec_list, + .fno = gen_helper_sve2_uqshrnb_s, + .vece = MO_32 }, + { .fniv = gen_uqshrnb_vec, + .opt_opc = uqshrnb_vec_list, + .fno = gen_helper_sve2_uqshrnb_d, + .vece = MO_64 }, +}; +TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops) static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr) @@ -8023,63 +6979,51 @@ static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d, tcg_gen_umin_vec(vece, n, n, t); tcg_gen_shli_vec(vece, n, n, halfbits); tcg_gen_bitsel_vec(vece, d, t, d, n); - tcg_temp_free_vec(t); } -static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a) -{ - static const TCGOpcode vec_list[] = { - INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 - }; - static const GVecGen2i ops[3] = { - { .fniv = gen_uqshrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_uqshrnt_h, - .vece = MO_16 }, - { .fniv = gen_uqshrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_uqshrnt_s, - .vece = MO_32 }, - { .fniv = gen_uqshrnt_vec, - .opt_opc = vec_list, - .load_dest = true, - .fno = gen_helper_sve2_uqshrnt_d, - .vece = MO_64 }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const TCGOpcode uqshrnt_vec_list[] = { + INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0 +}; +static const GVecGen2i uqshrnt_ops[3] = { + { .fniv = gen_uqshrnt_vec, + .opt_opc = uqshrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_uqshrnt_h, + .vece = MO_16 }, + { .fniv = gen_uqshrnt_vec, + .opt_opc = uqshrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_uqshrnt_s, + .vece = MO_32 }, + { .fniv = gen_uqshrnt_vec, + .opt_opc = uqshrnt_vec_list, + .load_dest = true, + .fno = gen_helper_sve2_uqshrnt_d, + .vece = MO_64 }, +}; +TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops) -static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2i ops[3] = { - { .fno = gen_helper_sve2_uqrshrnb_h }, - { .fno = gen_helper_sve2_uqrshrnb_s }, - { .fno = gen_helper_sve2_uqrshrnb_d }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const GVecGen2i uqrshrnb_ops[3] = { + { .fno = gen_helper_sve2_uqrshrnb_h }, + { .fno = gen_helper_sve2_uqrshrnb_s }, + { .fno = gen_helper_sve2_uqrshrnb_d }, +}; +TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops) -static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a) -{ - static const GVecGen2i ops[3] = { - { .fno = gen_helper_sve2_uqrshrnt_h }, - { .fno = gen_helper_sve2_uqrshrnt_s }, - { .fno = gen_helper_sve2_uqrshrnt_d }, - }; - return do_sve2_shr_narrow(s, a, ops); -} +static const GVecGen2i uqrshrnt_ops[3] = { + { .fno = gen_helper_sve2_uqrshrnt_h }, + { .fno = gen_helper_sve2_uqrshrnt_s }, + { .fno = gen_helper_sve2_uqrshrnt_d }, +}; +TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops) #define DO_SVE2_ZZZ_NARROW(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a) \ -{ \ - static gen_helper_gvec_3 * const fns[4] = { \ + static gen_helper_gvec_3 * const name##_fns[4] = { \ NULL, gen_helper_sve2_##name##_h, \ gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \ }; \ - return do_sve2_zzz_ool(s, a, fns[a->esz]); \ -} + TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \ + name##_fns[a->esz], a, 0) DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb) DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt) @@ -8091,655 +7035,388 @@ DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt) DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb) DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt) -static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a, - gen_helper_gvec_flags_4 *fn) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_ppzz_flags(s, a, fn); -} - -#define DO_SVE2_PPZZ_MATCH(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_flags_4 * const fns[4] = { \ - gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h, \ - NULL, NULL \ - }; \ - return do_sve2_ppzz_flags(s, a, fns[a->esz]); \ -} - -DO_SVE2_PPZZ_MATCH(MATCH, match) -DO_SVE2_PPZZ_MATCH(NMATCH, nmatch) - -static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a) -{ - static gen_helper_gvec_4 * const fns[2] = { - gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d - }; - if (a->esz < 2) { - return false; - } - return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]); -} +static gen_helper_gvec_flags_4 * const match_fns[4] = { + gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL +}; +TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz]) -static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a) -{ - if (a->esz != 0) { - return false; - } - return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg); -} +static gen_helper_gvec_flags_4 * const nmatch_fns[4] = { + gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL +}; +TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz]) -static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a, - gen_helper_gvec_4_ptr *fn) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpzz_fp(s, a, fn); -} +static gen_helper_gvec_4 * const histcnt_fns[4] = { + NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d +}; +TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz, + histcnt_fns[a->esz], a, 0) -#define DO_SVE2_ZPZZ_FP(NAME, name) \ -static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a) \ -{ \ - static gen_helper_gvec_4_ptr * const fns[4] = { \ - NULL, gen_helper_sve2_##name##_zpzz_h, \ - gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d \ - }; \ - return do_sve2_zpzz_fp(s, a, fns[a->esz]); \ -} +TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz, + a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0) -DO_SVE2_ZPZZ_FP(FADDP, faddp) -DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp) -DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp) -DO_SVE2_ZPZZ_FP(FMAXP, fmaxp) -DO_SVE2_ZPZZ_FP(FMINP, fminp) +DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz) +DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz) +DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz) +DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz) +DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz) /* * SVE Integer Multiply-Add (unpredicated) */ -static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a) -{ - gen_helper_gvec_4_ptr *fn; - - switch (a->esz) { - case MO_32: - if (!dc_isar_feature(aa64_sve_f32mm, s)) { - return false; - } - fn = gen_helper_fmmla_s; - break; - case MO_64: - if (!dc_isar_feature(aa64_sve_f64mm, s)) { - return false; - } - fn = gen_helper_fmmla_d; - break; - default: - return false; - } +TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, + gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra, + 0, FPST_FPCR) +TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, + gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra, + 0, FPST_FPCR) - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - TCGv_ptr status = fpstatus_ptr(FPST_FPCR); - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - status, vsz, vsz, 0, fn); - tcg_temp_free_ptr(status); - } - return true; -} - -static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a, - bool sel1, bool sel2) -{ - static gen_helper_gvec_4 * const fns[] = { - NULL, gen_helper_sve2_sqdmlal_zzzw_h, - gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, - }; - return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1); -} - -static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, - bool sel1, bool sel2) -{ - static gen_helper_gvec_4 * const fns[] = { - NULL, gen_helper_sve2_sqdmlsl_zzzw_h, - gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, - }; - return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1); -} - -static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sqdmlal_zzzw(s, a, false, false); -} - -static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sqdmlal_zzzw(s, a, true, true); -} - -static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sqdmlal_zzzw(s, a, false, true); -} - -static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sqdmlsl_zzzw(s, a, false, false); -} - -static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sqdmlsl_zzzw(s, a, true, true); -} - -static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a) -{ - return do_sqdmlsl_zzzw(s, a, false, true); -} - -static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a) -{ - static gen_helper_gvec_4 * const fns[] = { - gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, - gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, - }; - return do_sve2_zzzz_ool(s, a, fns[a->esz], 0); -} +static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = { + NULL, gen_helper_sve2_sqdmlal_zzzw_h, + gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d, +}; +TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + sqdmlal_zzzw_fns[a->esz], a, 0) +TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + sqdmlal_zzzw_fns[a->esz], a, 3) +TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz, + sqdmlal_zzzw_fns[a->esz], a, 2) + +static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = { + NULL, gen_helper_sve2_sqdmlsl_zzzw_h, + gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d, +}; +TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + sqdmlsl_zzzw_fns[a->esz], a, 0) +TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + sqdmlsl_zzzw_fns[a->esz], a, 3) +TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz, + sqdmlsl_zzzw_fns[a->esz], a, 2) + +static gen_helper_gvec_4 * const sqrdmlah_fns[] = { + gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h, + gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d, +}; +TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, + sqrdmlah_fns[a->esz], a, 0) -static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a) -{ - static gen_helper_gvec_4 * const fns[] = { - gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, - gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, - }; - return do_sve2_zzzz_ool(s, a, fns[a->esz], 0); -} +static gen_helper_gvec_4 * const sqrdmlsh_fns[] = { + gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h, + gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d, +}; +TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz, + sqrdmlsh_fns[a->esz], a, 0) -static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) -{ - static gen_helper_gvec_4 * const fns[] = { - NULL, gen_helper_sve2_smlal_zzzw_h, - gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, - }; - return do_sve2_zzzz_ool(s, a, fns[a->esz], sel); -} +static gen_helper_gvec_4 * const smlal_zzzw_fns[] = { + NULL, gen_helper_sve2_smlal_zzzw_h, + gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d, +}; +TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + smlal_zzzw_fns[a->esz], a, 0) +TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + smlal_zzzw_fns[a->esz], a, 1) + +static gen_helper_gvec_4 * const umlal_zzzw_fns[] = { + NULL, gen_helper_sve2_umlal_zzzw_h, + gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, +}; +TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + umlal_zzzw_fns[a->esz], a, 0) +TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + umlal_zzzw_fns[a->esz], a, 1) + +static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = { + NULL, gen_helper_sve2_smlsl_zzzw_h, + gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, +}; +TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + smlsl_zzzw_fns[a->esz], a, 0) +TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + smlsl_zzzw_fns[a->esz], a, 1) + +static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = { + NULL, gen_helper_sve2_umlsl_zzzw_h, + gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, +}; +TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + umlsl_zzzw_fns[a->esz], a, 0) +TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz, + umlsl_zzzw_fns[a->esz], a, 1) + +static gen_helper_gvec_4 * const cmla_fns[] = { + gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, + gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, +}; +TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz, + cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) -static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_smlal_zzzw(s, a, false); -} +static gen_helper_gvec_4 * const cdot_fns[] = { + NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d +}; +TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz, + cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) -static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_smlal_zzzw(s, a, true); -} +static gen_helper_gvec_4 * const sqrdcmlah_fns[] = { + gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, + gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, +}; +TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz, + sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot) + +TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, + a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0) + +TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz, + gen_helper_crypto_aesmc, a->rd, a->rd, 0) +TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz, + gen_helper_crypto_aesimc, a->rd, a->rd, 0) + +TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz, + gen_helper_crypto_aese, a, 0) +TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz, + gen_helper_crypto_aesd, a, 0) + +TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, + gen_helper_crypto_sm4e, a, 0) +TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz, + gen_helper_crypto_sm4ekey, a, 0) + +TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, + gen_gvec_rax1, a) + +TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz, + gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR) +TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz, + gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR) + +TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz, + gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR) + +TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz, + gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR) +TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz, + gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR) + +TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a, + FPROUNDING_ODD, gen_helper_sve_fcvt_ds) +TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a, + FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds) + +static gen_helper_gvec_3_ptr * const flogb_fns[] = { + NULL, gen_helper_flogb_h, + gen_helper_flogb_s, gen_helper_flogb_d +}; +TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz], + a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR) -static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) +static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) { - static gen_helper_gvec_4 * const fns[] = { - NULL, gen_helper_sve2_umlal_zzzw_h, - gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d, - }; - return do_sve2_zzzz_ool(s, a, fns[a->esz], sel); + return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s, + a->rd, a->rn, a->rm, a->ra, + (sel << 1) | sub, tcg_env); } -static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_umlal_zzzw(s, a, false); -} +TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false) +TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true) +TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false) +TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true) -static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) +static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) { - return do_umlal_zzzw(s, a, true); + return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s, + a->rd, a->rn, a->rm, a->ra, + (a->index << 2) | (sel << 1) | sub, tcg_env); } -static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) -{ - static gen_helper_gvec_4 * const fns[] = { - NULL, gen_helper_sve2_smlsl_zzzw_h, - gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d, - }; - return do_sve2_zzzz_ool(s, a, fns[a->esz], sel); -} +TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false) +TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true) +TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false) +TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true) -static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_smlsl_zzzw(s, a, false); -} +TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_smmla_b, a, 0) +TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_usmmla_b, a, 0) +TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_ummla_b, a, 0) -static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_smlsl_zzzw(s, a, true); -} +TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_bfdot, a, 0) +TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz, + gen_helper_gvec_bfdot_idx, a) -static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) -{ - static gen_helper_gvec_4 * const fns[] = { - NULL, gen_helper_sve2_umlsl_zzzw_h, - gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d, - }; - return do_sve2_zzzz_ool(s, a, fns[a->esz], sel); -} +TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz, + gen_helper_gvec_bfmmla, a, 0) -static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a) +static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) { - return do_umlsl_zzzw(s, a, false); + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal, + a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR); } -static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_umlsl_zzzw(s, a, true); -} +TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false) +TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true) -static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a) +static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) { - static gen_helper_gvec_4 * const fns[] = { - gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h, - gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d, - }; - - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot); - } - return true; + return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx, + a->rd, a->rn, a->rm, a->ra, + (a->index << 1) | sel, FPST_FPCR); } -static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a) -{ - if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) { - return false; - } - if (sve_access_check(s)) { - gen_helper_gvec_4 *fn = (a->esz == MO_32 - ? gen_helper_sve2_cdot_zzzz_s - : gen_helper_sve2_cdot_zzzz_d); - gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot); - } - return true; -} +TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false) +TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true) -static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a) +static bool trans_PSEL(DisasContext *s, arg_psel *a) { - static gen_helper_gvec_4 * const fns[] = { - gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h, - gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d, - }; + int vl = vec_full_reg_size(s); + int pl = pred_gvec_reg_size(s); + int elements = vl >> a->esz; + TCGv_i64 tmp, didx, dbit; + TCGv_ptr ptr; - if (!dc_isar_feature(aa64_sve2, s)) { + if (!dc_isar_feature(aa64_sme, s)) { return false; } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot); + if (!sve_access_check(s)) { + return true; } - return true; -} -static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a) -{ - if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - vsz, vsz, 0, gen_helper_gvec_usdot_b); - } - return true; -} + tmp = tcg_temp_new_i64(); + dbit = tcg_temp_new_i64(); + didx = tcg_temp_new_i64(); + ptr = tcg_temp_new_ptr(); -static bool trans_AESMC(DisasContext *s, arg_AESMC *a) -{ - if (!dc_isar_feature(aa64_sve2_aes, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt); + /* Compute the predicate element. */ + tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm); + if (is_power_of_2(elements)) { + tcg_gen_andi_i64(tmp, tmp, elements - 1); + } else { + tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements)); } - return true; -} -static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt) -{ - if (!dc_isar_feature(aa64_sve2_aes, s)) { - return false; + /* Extract the predicate byte and bit indices. */ + tcg_gen_shli_i64(tmp, tmp, a->esz); + tcg_gen_andi_i64(dbit, tmp, 7); + tcg_gen_shri_i64(didx, tmp, 3); + if (HOST_BIG_ENDIAN) { + tcg_gen_xori_i64(didx, didx, 7); } - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, gen_helper_crypto_aese, - a->rd, a->rn, a->rm, decrypt); - } - return true; -} -static bool trans_AESE(DisasContext *s, arg_rrr_esz *a) -{ - return do_aese(s, a, false); -} + /* Load the predicate word. */ + tcg_gen_trunc_i64_ptr(ptr, didx); + tcg_gen_add_ptr(ptr, ptr, tcg_env); + tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm)); -static bool trans_AESD(DisasContext *s, arg_rrr_esz *a) -{ - return do_aese(s, a, true); -} + /* Extract the predicate bit and replicate to MO_64. */ + tcg_gen_shr_i64(tmp, tmp, dbit); + tcg_gen_andi_i64(tmp, tmp, 1); + tcg_gen_neg_i64(tmp, tmp); -static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn) -{ - if (!dc_isar_feature(aa64_sve2_sm4, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0); - } + /* Apply to either copy the source, or write zeros. */ + tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), + pred_full_reg_offset(s, a->pn), tmp, pl, pl); return true; } -static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a) +static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) { - return do_sm4(s, a, gen_helper_crypto_sm4e); + tcg_gen_smax_i32(d, a, n); + tcg_gen_smin_i32(d, d, m); } -static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a) +static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) { - return do_sm4(s, a, gen_helper_crypto_sm4ekey); -} - -static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a) -{ - if (!dc_isar_feature(aa64_sve2_sha3, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm); - } - return true; + tcg_gen_smax_i64(d, a, n); + tcg_gen_smin_i64(d, d, m); } -static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a) +static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, TCGv_vec a) { - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh); + tcg_gen_smax_vec(vece, d, a, n); + tcg_gen_smin_vec(vece, d, d, m); } -static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a) +static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz) { - if (!dc_isar_feature(aa64_sve_bf16, s)) { - return false; - } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt); -} - -static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds); -} - -static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs); -} - -static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd); -} - -static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds); -} - -static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds); -} - -static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a) -{ - static gen_helper_gvec_3_ptr * const fns[] = { - NULL, gen_helper_flogb_h, - gen_helper_flogb_s, gen_helper_flogb_d + static const TCGOpcode vecop[] = { + INDEX_op_smin_vec, INDEX_op_smax_vec, 0 }; - - if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) { - return false; - } - if (sve_access_check(s)) { - TCGv_ptr status = - fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - unsigned vsz = vec_full_reg_size(s); - - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - pred_full_reg_offset(s, a->pg), - status, vsz, vsz, 0, fns[a->esz]); - tcg_temp_free_ptr(status); - } - return true; -} - -static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - cpu_env, vsz, vsz, (sel << 1) | sub, - gen_helper_sve2_fmlal_zzzw_s); - } - return true; -} - -static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_FMLAL_zzzw(s, a, false, false); -} - -static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_FMLAL_zzzw(s, a, false, true); -} - -static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_FMLAL_zzzw(s, a, true, false); -} - -static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_FMLAL_zzzw(s, a, true, true); -} - -static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel) -{ - if (!dc_isar_feature(aa64_sve2, s)) { - return false; - } - if (sve_access_check(s)) { - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - cpu_env, vsz, vsz, - (a->index << 2) | (sel << 1) | sub, - gen_helper_sve2_fmlal_zzxw_s); - } - return true; -} - -static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a) -{ - return do_FMLAL_zzxw(s, a, false, false); -} - -static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a) -{ - return do_FMLAL_zzxw(s, a, false, true); -} - -static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a) -{ - return do_FMLAL_zzxw(s, a, true, false); -} - -static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a) -{ - return do_FMLAL_zzxw(s, a, true, true); -} - -static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a, - gen_helper_gvec_4 *fn, int data) -{ - if (!dc_isar_feature(aa64_sve_i8mm, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data); - } - return true; -} - -static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a) -{ - return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0); -} - -static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a) -{ - return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0); + static const GVecGen4 ops[4] = { + { .fniv = gen_sclamp_vec, + .fno = gen_helper_gvec_sclamp_b, + .opt_opc = vecop, + .vece = MO_8 }, + { .fniv = gen_sclamp_vec, + .fno = gen_helper_gvec_sclamp_h, + .opt_opc = vecop, + .vece = MO_16 }, + { .fni4 = gen_sclamp_i32, + .fniv = gen_sclamp_vec, + .fno = gen_helper_gvec_sclamp_s, + .opt_opc = vecop, + .vece = MO_32 }, + { .fni8 = gen_sclamp_i64, + .fniv = gen_sclamp_vec, + .fno = gen_helper_gvec_sclamp_d, + .opt_opc = vecop, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64 } + }; + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); } -static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a) -{ - return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0); -} +TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a) -static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a) +static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a) { - if (!dc_isar_feature(aa64_sve_bf16, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot, - a->rd, a->rn, a->rm, a->ra, 0); - } - return true; + tcg_gen_umax_i32(d, a, n); + tcg_gen_umin_i32(d, d, m); } -static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a) +static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a) { - if (!dc_isar_feature(aa64_sve_bf16, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx, - a->rd, a->rn, a->rm, a->ra, a->index); - } - return true; + tcg_gen_umax_i64(d, a, n); + tcg_gen_umin_i64(d, d, m); } -static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a) +static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, TCGv_vec a) { - if (!dc_isar_feature(aa64_sve_bf16, s)) { - return false; - } - if (sve_access_check(s)) { - gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla, - a->rd, a->rn, a->rm, a->ra, 0); - } - return true; + tcg_gen_umax_vec(vece, d, a, n); + tcg_gen_umin_vec(vece, d, d, m); } -static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel) +static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz) { - if (!dc_isar_feature(aa64_sve_bf16, s)) { - return false; - } - if (sve_access_check(s)) { - TCGv_ptr status = fpstatus_ptr(FPST_FPCR); - unsigned vsz = vec_full_reg_size(s); - - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - status, vsz, vsz, sel, - gen_helper_gvec_bfmlal); - tcg_temp_free_ptr(status); - } - return true; -} - -static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_BFMLAL_zzzw(s, a, false); -} - -static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a) -{ - return do_BFMLAL_zzzw(s, a, true); -} - -static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel) -{ - if (!dc_isar_feature(aa64_sve_bf16, s)) { - return false; - } - if (sve_access_check(s)) { - TCGv_ptr status = fpstatus_ptr(FPST_FPCR); - unsigned vsz = vec_full_reg_size(s); - - tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd), - vec_full_reg_offset(s, a->rn), - vec_full_reg_offset(s, a->rm), - vec_full_reg_offset(s, a->ra), - status, vsz, vsz, (a->index << 1) | sel, - gen_helper_gvec_bfmlal_idx); - tcg_temp_free_ptr(status); - } - return true; -} - -static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a) -{ - return do_BFMLAL_zzxw(s, a, false); + static const TCGOpcode vecop[] = { + INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uclamp_vec, + .fno = gen_helper_gvec_uclamp_b, + .opt_opc = vecop, + .vece = MO_8 }, + { .fniv = gen_uclamp_vec, + .fno = gen_helper_gvec_uclamp_h, + .opt_opc = vecop, + .vece = MO_16 }, + { .fni4 = gen_uclamp_i32, + .fniv = gen_uclamp_vec, + .fno = gen_helper_gvec_uclamp_s, + .opt_opc = vecop, + .vece = MO_32 }, + { .fni8 = gen_uclamp_i64, + .fniv = gen_uclamp_vec, + .fno = gen_helper_gvec_uclamp_d, + .opt_opc = vecop, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64 } + }; + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]); } -static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a) -{ - return do_BFMLAL_zzxw(s, a, true); -} +TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a) diff --git a/target/arm/translate-vfp.c b/target/arm/tcg/translate-vfp.c index 59bcaec5be..b9af03b7c3 100644 --- a/target/arm/translate-vfp.c +++ b/target/arm/tcg/translate-vfp.c @@ -21,10 +21,6 @@ */ #include "qemu/osdep.h" -#include "tcg/tcg-op.h" -#include "tcg/tcg-op-gvec.h" -#include "exec/exec-all.h" -#include "exec/gen-icount.h" #include "translate.h" #include "translate-a32.h" @@ -34,22 +30,22 @@ static inline void vfp_load_reg64(TCGv_i64 var, int reg) { - tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg)); + tcg_gen_ld_i64(var, tcg_env, vfp_reg_offset(true, reg)); } static inline void vfp_store_reg64(TCGv_i64 var, int reg) { - tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg)); + tcg_gen_st_i64(var, tcg_env, vfp_reg_offset(true, reg)); } static inline void vfp_load_reg32(TCGv_i32 var, int reg) { - tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg)); + tcg_gen_ld_i32(var, tcg_env, vfp_reg_offset(false, reg)); } static inline void vfp_store_reg32(TCGv_i32 var, int reg) { - tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg)); + tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg)); } /* @@ -93,7 +89,7 @@ uint64_t vfp_expand_imm(int size, uint8_t imm8) static inline long vfp_f16_offset(unsigned reg, bool top) { long offs = vfp_reg_offset(false, reg); -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN if (!top) { offs += 2; } @@ -117,11 +113,10 @@ static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update) * so we must mark it as an IO operation for icount (and cause * this to be the last insn in the TB). */ - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { + if (translator_io_start(&s->base)) { s->base.is_jmp = DISAS_UPDATE_EXIT; - gen_io_start(); } - gen_helper_v7m_preserve_fp_state(cpu_env); + gen_helper_v7m_preserve_fp_state(tcg_env); /* * If the preserve_fp_state helper doesn't throw an exception * then it will clear LSPACT; we don't need to repeat this for @@ -149,7 +144,7 @@ static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update) * Generate code for M-profile FP context handling: update the * ownership of the FP context, and create a new context if * necessary. This corresponds to the parts of the pseudocode - * ExecuteFPCheck() after the inital PreserveFPState() call. + * ExecuteFPCheck() after the initial PreserveFPState() call. */ static void gen_update_fp_context(DisasContext *s) { @@ -177,11 +172,9 @@ static void gen_update_fp_context(DisasContext *s) uint32_t bits = R_V7M_CONTROL_FPCA_MASK; fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]); - gen_helper_vfp_set_fpscr(cpu_env, fpscr); - tcg_temp_free_i32(fpscr); + gen_helper_vfp_set_fpscr(tcg_env, fpscr); if (dc_isar_feature(aa32_mve, s)) { - TCGv_i32 z32 = tcg_const_i32(0); - store_cpu_field(z32, v7m.vpr); + store_cpu_field(tcg_constant_i32(0), v7m.vpr); } /* * We just updated the FPSCR and VPR. Some of this state is cached @@ -220,8 +213,30 @@ static void gen_update_fp_context(DisasContext *s) static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled) { if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + /* + * The full syndrome is only used for HSR when HCPTR traps: + * For v8, when TA==0, coproc is RES0. + * For v7, any use of a Floating-point instruction or access + * to a Floating-point Extension register that is trapped to + * Hyp mode because of a trap configured in the HCPTR sets + * this field to 0xA. + */ + int coproc = arm_dc_feature(s, ARM_FEATURE_V8) ? 0 : 0xa; + uint32_t syn = syn_fp_access_trap(1, 0xe, false, coproc); + + gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el); + return false; + } + + /* + * Note that rebuild_hflags_a32 has already accounted for being in EL0 + * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not + * appear to be any insns which touch VFP which are allowed. + */ + if (s->sme_trap_nonstreaming) { + gen_exception_insn(s, 0, EXCP_UDEF, + syn_smetrap(SME_ET_Streaming, + curr_insn_len(s) == 2)); return false; } @@ -251,8 +266,8 @@ bool vfp_access_check_m(DisasContext *s, bool skip_context_update) * the encoding space handled by the patterns in m-nocp.decode, * and for them we may need to raise NOCP here. */ - gen_exception_insn(s, s->pc_curr, EXCP_NOCP, - syn_uncategorized(), s->fp_excp_el); + gen_exception_insn_el(s, 0, EXCP_NOCP, + syn_uncategorized(), s->fp_excp_el); return false; } @@ -317,7 +332,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) TCGv_i64 frn, frm, dest; TCGv_i64 tmp, zero, zf, nf, vf; - zero = tcg_const_i64(0); + zero = tcg_constant_i64(0); frn = tcg_temp_new_i64(); frm = tcg_temp_new_i64(); @@ -335,45 +350,29 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) vfp_load_reg64(frm, rm); switch (a->cc) { case 0: /* eq: Z */ - tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, - frn, frm); + tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm); break; case 1: /* vs: V */ - tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, - frn, frm); + tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm); break; case 2: /* ge: N == V -> N ^ V == 0 */ tmp = tcg_temp_new_i64(); tcg_gen_xor_i64(tmp, vf, nf); - tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, - frn, frm); - tcg_temp_free_i64(tmp); + tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm); break; case 3: /* gt: !Z && N == V */ - tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, - frn, frm); + tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm); tmp = tcg_temp_new_i64(); tcg_gen_xor_i64(tmp, vf, nf); - tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, - dest, frm); - tcg_temp_free_i64(tmp); + tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm); break; } vfp_store_reg64(dest, rd); - tcg_temp_free_i64(frn); - tcg_temp_free_i64(frm); - tcg_temp_free_i64(dest); - - tcg_temp_free_i64(zf); - tcg_temp_free_i64(nf); - tcg_temp_free_i64(vf); - - tcg_temp_free_i64(zero); } else { TCGv_i32 frn, frm, dest; TCGv_i32 tmp, zero; - zero = tcg_const_i32(0); + zero = tcg_constant_i32(0); frn = tcg_temp_new_i32(); frm = tcg_temp_new_i32(); @@ -382,28 +381,21 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) vfp_load_reg32(frm, rm); switch (a->cc) { case 0: /* eq: Z */ - tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, - frn, frm); + tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm); break; case 1: /* vs: V */ - tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, - frn, frm); + tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm); break; case 2: /* ge: N == V -> N ^ V == 0 */ tmp = tcg_temp_new_i32(); tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); - tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, - frn, frm); - tcg_temp_free_i32(tmp); + tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm); break; case 3: /* gt: !Z && N == V */ - tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, - frn, frm); + tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm); tmp = tcg_temp_new_i32(); tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); - tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, - dest, frm); - tcg_temp_free_i32(tmp); + tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm); break; } /* For fp16 the top half is always zeroes */ @@ -411,11 +403,6 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a) tcg_gen_andi_i32(dest, dest, 0xffff); } vfp_store_reg32(dest, rd); - tcg_temp_free_i32(frn); - tcg_temp_free_i32(frm); - tcg_temp_free_i32(dest); - - tcg_temp_free_i32(zero); } return true; @@ -472,8 +459,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) fpst = fpstatus_ptr(FPST_FPCR); } - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + tcg_rmode = gen_set_rmode(rounding, fpst); if (sz == 3) { TCGv_i64 tcg_op; @@ -483,8 +469,6 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) vfp_load_reg64(tcg_op, rm); gen_helper_rintd(tcg_res, tcg_op, fpst); vfp_store_reg64(tcg_res, rd); - tcg_temp_free_i64(tcg_op); - tcg_temp_free_i64(tcg_res); } else { TCGv_i32 tcg_op; TCGv_i32 tcg_res; @@ -497,14 +481,9 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a) gen_helper_rints(tcg_res, tcg_op, fpst); } vfp_store_reg32(tcg_res, rd); - tcg_temp_free_i32(tcg_op); - tcg_temp_free_i32(tcg_res); } - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - tcg_temp_free_i32(tcg_rmode); - - tcg_temp_free_ptr(fpst); + gen_restore_rmode(tcg_rmode, fpst); return true; } @@ -547,10 +526,8 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) fpst = fpstatus_ptr(FPST_FPCR); } - tcg_shift = tcg_const_i32(0); - - tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding)); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + tcg_shift = tcg_constant_i32(0); + tcg_rmode = gen_set_rmode(rounding, fpst); if (sz == 3) { TCGv_i64 tcg_double, tcg_res; @@ -566,9 +543,6 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) } tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res); vfp_store_reg32(tcg_tmp, rd); - tcg_temp_free_i32(tcg_tmp); - tcg_temp_free_i64(tcg_res); - tcg_temp_free_i64(tcg_double); } else { TCGv_i32 tcg_single, tcg_res; tcg_single = tcg_temp_new_i32(); @@ -588,17 +562,9 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) } } vfp_store_reg32(tcg_res, rd); - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_single); } - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); - tcg_temp_free_i32(tcg_rmode); - - tcg_temp_free_i32(tcg_shift); - - tcg_temp_free_ptr(fpst); - + gen_restore_rmode(tcg_rmode, fpst); return true; } @@ -724,7 +690,6 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { tmp = load_reg(s, a->rt); write_neon_element32(tmp, a->vn, a->index, a->size); - tcg_temp_free_i32(tmp); } if (dc_isar_feature(aa32_mve, s)) { @@ -772,8 +737,6 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a) tmp = load_reg(s, a->rt); tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn), vec_size, vec_size, tmp); - tcg_temp_free_i32(tmp); - return true; } @@ -850,15 +813,11 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) case ARM_VFP_MVFR2: case ARM_VFP_FPSID: if (s->current_el == 1) { - TCGv_i32 tcg_reg, tcg_rt; - gen_set_condexec(s); - gen_set_pc_im(s, s->pc_curr); - tcg_reg = tcg_const_i32(a->reg); - tcg_rt = tcg_const_i32(a->rt); - gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg); - tcg_temp_free_i32(tcg_reg); - tcg_temp_free_i32(tcg_rt); + gen_update_pc(s, 0); + gen_helper_check_hcr_el2_trap(tcg_env, + tcg_constant_i32(a->rt), + tcg_constant_i32(a->reg)); } /* fall through */ case ARM_VFP_FPEXC: @@ -872,7 +831,7 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK); } else { tmp = tcg_temp_new_i32(); - gen_helper_vfp_get_fpscr(tmp, cpu_env); + gen_helper_vfp_get_fpscr(tmp, tcg_env); } break; default: @@ -882,7 +841,6 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) if (a->rt == 15) { /* Set the 4 flag bits in the CPSR. */ gen_set_nzcv(tmp); - tcg_temp_free_i32(tmp); } else { store_reg(s, a->rt, tmp); } @@ -897,8 +855,7 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) break; case ARM_VFP_FPSCR: tmp = load_reg(s, a->rt); - gen_helper_vfp_set_fpscr(cpu_env, tmp); - tcg_temp_free_i32(tmp); + gen_helper_vfp_set_fpscr(tcg_env, tmp); gen_lookup_tb(s); break; case ARM_VFP_FPEXC: @@ -953,7 +910,6 @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a) tmp = load_reg(s, a->rt); tcg_gen_andi_i32(tmp, tmp, 0xffff); vfp_store_reg32(tmp, a->vn); - tcg_temp_free_i32(tmp); } return true; @@ -978,7 +934,6 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) if (a->rt == 15) { /* Set the 4 flag bits in the CPSR. */ gen_set_nzcv(tmp); - tcg_temp_free_i32(tmp); } else { store_reg(s, a->rt, tmp); } @@ -986,7 +941,6 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) /* general purpose register to VFP */ tmp = load_reg(s, a->rt); vfp_store_reg32(tmp, a->vn); - tcg_temp_free_i32(tmp); } return true; @@ -1020,10 +974,8 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) /* gpreg to fpreg */ tmp = load_reg(s, a->rt); vfp_store_reg32(tmp, a->vm); - tcg_temp_free_i32(tmp); tmp = load_reg(s, a->rt2); vfp_store_reg32(tmp, a->vm + 1); - tcg_temp_free_i32(tmp); } return true; @@ -1063,10 +1015,8 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) /* gpreg to fpreg */ tmp = load_reg(s, a->rt); vfp_store_reg32(tmp, a->vm * 2); - tcg_temp_free_i32(tmp); tmp = load_reg(s, a->rt2); vfp_store_reg32(tmp, a->vm * 2 + 1); - tcg_temp_free_i32(tmp); } return true; @@ -1101,9 +1051,6 @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) vfp_load_reg32(tmp, a->vd); gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(addr); - return true; } @@ -1135,9 +1082,6 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) vfp_load_reg32(tmp, a->vd); gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); } - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(addr); - return true; } @@ -1170,15 +1114,12 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) addr = add_reg_for_lit(s, a->rn, offset); tmp = tcg_temp_new_i64(); if (a->l) { - gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); + gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); vfp_store_reg64(tmp, a->vd); } else { vfp_load_reg64(tmp, a->vd); - gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); + gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); } - tcg_temp_free_i64(tmp); - tcg_temp_free_i32(addr); - return true; } @@ -1228,7 +1169,7 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) * value is above, it is UNKNOWN whether the limit check * triggers; we choose to trigger. */ - gen_helper_v8m_stackcheck(cpu_env, addr); + gen_helper_v8m_stackcheck(tcg_env, addr); } offset = 4; @@ -1245,7 +1186,6 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) } tcg_gen_addi_i32(addr, addr, offset); } - tcg_temp_free_i32(tmp); if (a->w) { /* writeback */ if (a->p) { @@ -1253,8 +1193,6 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) tcg_gen_addi_i32(addr, addr, offset); } store_reg(s, a->rn, addr); - } else { - tcg_temp_free_i32(addr); } clear_eci_state(s); @@ -1314,7 +1252,7 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) * value is above, it is UNKNOWN whether the limit check * triggers; we choose to trigger. */ - gen_helper_v8m_stackcheck(cpu_env, addr); + gen_helper_v8m_stackcheck(tcg_env, addr); } offset = 8; @@ -1322,16 +1260,15 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) for (i = 0; i < n; i++) { if (a->l) { /* load */ - gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); + gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); vfp_store_reg64(tmp, a->vd + i); } else { /* store */ vfp_load_reg64(tmp, a->vd + i); - gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4); + gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); } tcg_gen_addi_i32(addr, addr, offset); } - tcg_temp_free_i64(tmp); if (a->w) { /* writeback */ if (a->p) { @@ -1346,8 +1283,6 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) tcg_gen_addi_i32(addr, addr, offset); } store_reg(s, a->rn, addr); - } else { - tcg_temp_free_i32(addr); } clear_eci_state(s); @@ -1484,12 +1419,6 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, vfp_load_reg32(f1, vm); } } - - tcg_temp_free_i32(f0); - tcg_temp_free_i32(f1); - tcg_temp_free_i32(fd); - tcg_temp_free_ptr(fpst); - return true; } @@ -1532,12 +1461,6 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, } fn(fd, f0, f1, fpst); vfp_store_reg32(fd, vd); - - tcg_temp_free_i32(f0); - tcg_temp_free_i32(f1); - tcg_temp_free_i32(fd); - tcg_temp_free_ptr(fpst); - return true; } @@ -1614,12 +1537,6 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, vfp_load_reg64(f1, vm); } } - - tcg_temp_free_i64(f0); - tcg_temp_free_i64(f1); - tcg_temp_free_i64(fd); - tcg_temp_free_ptr(fpst); - return true; } @@ -1687,10 +1604,6 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) vm = vfp_advance_sreg(vm, delta_m); vfp_load_reg32(f0, vm); } - - tcg_temp_free_i32(f0); - tcg_temp_free_i32(fd); - return true; } @@ -1723,7 +1636,6 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) vfp_load_reg32(f0, vm); fn(f0, f0); vfp_store_reg32(f0, vd); - tcg_temp_free_i32(f0); return true; } @@ -1797,10 +1709,6 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) vd = vfp_advance_dreg(vm, delta_m); vfp_load_reg64(f0, vm); } - - tcg_temp_free_i64(f0); - tcg_temp_free_i64(fd); - return true; } @@ -1811,7 +1719,6 @@ static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) gen_helper_vfp_mulh(tmp, vn, vm, fpst); gen_helper_vfp_addh(vd, vd, tmp, fpst); - tcg_temp_free_i32(tmp); } static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a) @@ -1826,7 +1733,6 @@ static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) gen_helper_vfp_muls(tmp, vn, vm, fpst); gen_helper_vfp_adds(vd, vd, tmp, fpst); - tcg_temp_free_i32(tmp); } static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a) @@ -1841,7 +1747,6 @@ static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) gen_helper_vfp_muld(tmp, vn, vm, fpst); gen_helper_vfp_addd(vd, vd, tmp, fpst); - tcg_temp_free_i64(tmp); } static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a) @@ -1860,7 +1765,6 @@ static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) gen_helper_vfp_mulh(tmp, vn, vm, fpst); gen_helper_vfp_negh(tmp, tmp); gen_helper_vfp_addh(vd, vd, tmp, fpst); - tcg_temp_free_i32(tmp); } static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a) @@ -1879,7 +1783,6 @@ static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) gen_helper_vfp_muls(tmp, vn, vm, fpst); gen_helper_vfp_negs(tmp, tmp); gen_helper_vfp_adds(vd, vd, tmp, fpst); - tcg_temp_free_i32(tmp); } static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a) @@ -1898,7 +1801,6 @@ static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) gen_helper_vfp_muld(tmp, vn, vm, fpst); gen_helper_vfp_negd(tmp, tmp); gen_helper_vfp_addd(vd, vd, tmp, fpst); - tcg_temp_free_i64(tmp); } static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a) @@ -1919,7 +1821,6 @@ static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) gen_helper_vfp_mulh(tmp, vn, vm, fpst); gen_helper_vfp_negh(vd, vd); gen_helper_vfp_addh(vd, vd, tmp, fpst); - tcg_temp_free_i32(tmp); } static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a) @@ -1940,7 +1841,6 @@ static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) gen_helper_vfp_muls(tmp, vn, vm, fpst); gen_helper_vfp_negs(vd, vd); gen_helper_vfp_adds(vd, vd, tmp, fpst); - tcg_temp_free_i32(tmp); } static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a) @@ -1961,7 +1861,6 @@ static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) gen_helper_vfp_muld(tmp, vn, vm, fpst); gen_helper_vfp_negd(vd, vd); gen_helper_vfp_addd(vd, vd, tmp, fpst); - tcg_temp_free_i64(tmp); } static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a) @@ -1978,7 +1877,6 @@ static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) gen_helper_vfp_negh(tmp, tmp); gen_helper_vfp_negh(vd, vd); gen_helper_vfp_addh(vd, vd, tmp, fpst); - tcg_temp_free_i32(tmp); } static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a) @@ -1995,7 +1893,6 @@ static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) gen_helper_vfp_negs(tmp, tmp); gen_helper_vfp_negs(vd, vd); gen_helper_vfp_adds(vd, vd, tmp, fpst); - tcg_temp_free_i32(tmp); } static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a) @@ -2012,7 +1909,6 @@ static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) gen_helper_vfp_negd(tmp, tmp); gen_helper_vfp_negd(vd, vd); gen_helper_vfp_addd(vd, vd, tmp, fpst); - tcg_temp_free_i64(tmp); } static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a) @@ -2224,12 +2120,6 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); vfp_store_reg32(vd, a->vd); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(vn); - tcg_temp_free_i32(vm); - tcg_temp_free_i32(vd); - return true; } @@ -2289,12 +2179,6 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); vfp_store_reg32(vd, a->vd); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(vn); - tcg_temp_free_i32(vm); - tcg_temp_free_i32(vd); - return true; } @@ -2360,12 +2244,6 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); vfp_store_reg64(vd, a->vd); - - tcg_temp_free_ptr(fpst); - tcg_temp_free_i64(vn); - tcg_temp_free_i64(vm); - tcg_temp_free_i64(vd); - return true; } @@ -2388,8 +2266,6 @@ MAKE_VFM_TRANS_FNS(dp) static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a) { - TCGv_i32 fd; - if (!dc_isar_feature(aa32_fp16_arith, s)) { return false; } @@ -2402,9 +2278,7 @@ static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a) return true; } - fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm)); - vfp_store_reg32(fd, a->vd); - tcg_temp_free_i32(fd); + vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd); return true; } @@ -2440,7 +2314,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) } } - fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm)); + fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm)); for (;;) { vfp_store_reg32(fd, vd); @@ -2454,7 +2328,6 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) vd = vfp_advance_sreg(vd, delta_d); } - tcg_temp_free_i32(fd); return true; } @@ -2495,7 +2368,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) } } - fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm)); + fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm)); for (;;) { vfp_store_reg64(fd, vd); @@ -2509,7 +2382,6 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) vd = vfp_advance_dreg(vd, delta_d); } - tcg_temp_free_i64(fd); return true; } @@ -2547,17 +2419,17 @@ DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2) static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) { - gen_helper_vfp_sqrth(vd, vm, cpu_env); + gen_helper_vfp_sqrth(vd, vm, tcg_env); } static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) { - gen_helper_vfp_sqrts(vd, vm, cpu_env); + gen_helper_vfp_sqrts(vd, vm, tcg_env); } static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) { - gen_helper_vfp_sqrtd(vd, vm, cpu_env); + gen_helper_vfp_sqrtd(vd, vm, tcg_env); } DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) @@ -2592,14 +2464,10 @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) } if (a->e) { - gen_helper_vfp_cmpeh(vd, vm, cpu_env); + gen_helper_vfp_cmpeh(vd, vm, tcg_env); } else { - gen_helper_vfp_cmph(vd, vm, cpu_env); + gen_helper_vfp_cmph(vd, vm, tcg_env); } - - tcg_temp_free_i32(vd); - tcg_temp_free_i32(vm); - return true; } @@ -2631,14 +2499,10 @@ static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a) } if (a->e) { - gen_helper_vfp_cmpes(vd, vm, cpu_env); + gen_helper_vfp_cmpes(vd, vm, tcg_env); } else { - gen_helper_vfp_cmps(vd, vm, cpu_env); + gen_helper_vfp_cmps(vd, vm, tcg_env); } - - tcg_temp_free_i32(vd); - tcg_temp_free_i32(vm); - return true; } @@ -2675,14 +2539,10 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a) } if (a->e) { - gen_helper_vfp_cmped(vd, vm, cpu_env); + gen_helper_vfp_cmped(vd, vm, tcg_env); } else { - gen_helper_vfp_cmpd(vd, vm, cpu_env); + gen_helper_vfp_cmpd(vd, vm, tcg_env); } - - tcg_temp_free_i64(vd); - tcg_temp_free_i64(vm); - return true; } @@ -2704,12 +2564,9 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); /* The T bit tells us if we want the low or high 16 bits of Vm */ - tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t)); + tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t)); gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode); vfp_store_reg32(tmp, a->vd); - tcg_temp_free_i32(ahp_mode); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); return true; } @@ -2741,14 +2598,10 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) ahp_mode = get_ahp_flag(); tmp = tcg_temp_new_i32(); /* The T bit tells us if we want the low or high 16 bits of Vm */ - tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t)); + tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t)); vd = tcg_temp_new_i64(); gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode); vfp_store_reg64(vd, a->vd); - tcg_temp_free_i32(ahp_mode); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); - tcg_temp_free_i64(vd); return true; } @@ -2770,9 +2623,7 @@ static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a) vfp_load_reg32(tmp, a->vm); gen_helper_bfcvt(tmp, tmp, fpst); - tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); + tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t)); return true; } @@ -2796,10 +2647,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) vfp_load_reg32(tmp, a->vm); gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode); - tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); - tcg_temp_free_i32(ahp_mode); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); + tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t)); return true; } @@ -2834,11 +2682,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) vfp_load_reg64(vm, a->vm); gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode); - tcg_temp_free_i64(vm); - tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); - tcg_temp_free_i32(ahp_mode); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); + tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t)); return true; } @@ -2860,8 +2704,6 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_rinth(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); return true; } @@ -2883,8 +2725,6 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rints(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); return true; } @@ -2915,8 +2755,6 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rintd(tmp, tmp, fpst); vfp_store_reg64(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i64(tmp); return true; } @@ -2937,14 +2775,10 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) tmp = tcg_temp_new_i32(); vfp_load_reg32(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); - tcg_rmode = tcg_const_i32(float_round_to_zero); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); gen_helper_rinth(tmp, tmp, fpst); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + gen_restore_rmode(tcg_rmode, fpst); vfp_store_reg32(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tcg_rmode); - tcg_temp_free_i32(tmp); return true; } @@ -2965,14 +2799,10 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) tmp = tcg_temp_new_i32(); vfp_load_reg32(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR); - tcg_rmode = tcg_const_i32(float_round_to_zero); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); gen_helper_rints(tmp, tmp, fpst); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + gen_restore_rmode(tcg_rmode, fpst); vfp_store_reg32(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tcg_rmode); - tcg_temp_free_i32(tmp); return true; } @@ -3002,14 +2832,10 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) tmp = tcg_temp_new_i64(); vfp_load_reg64(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR); - tcg_rmode = tcg_const_i32(float_round_to_zero); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); gen_helper_rintd(tmp, tmp, fpst); - gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); + gen_restore_rmode(tcg_rmode, fpst); vfp_store_reg64(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i64(tmp); - tcg_temp_free_i32(tcg_rmode); return true; } @@ -3031,8 +2857,6 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_rinth_exact(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); return true; } @@ -3054,8 +2878,6 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rints_exact(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i32(tmp); return true; } @@ -3086,8 +2908,6 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) fpst = fpstatus_ptr(FPST_FPCR); gen_helper_rintd_exact(tmp, tmp, fpst); vfp_store_reg64(tmp, a->vd); - tcg_temp_free_ptr(fpst); - tcg_temp_free_i64(tmp); return true; } @@ -3112,10 +2932,8 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i64(); vfp_load_reg32(vm, a->vm); - gen_helper_vfp_fcvtds(vd, vm, cpu_env); + gen_helper_vfp_fcvtds(vd, vm, tcg_env); vfp_store_reg64(vd, a->vd); - tcg_temp_free_i32(vm); - tcg_temp_free_i64(vd); return true; } @@ -3140,10 +2958,8 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) vd = tcg_temp_new_i32(); vm = tcg_temp_new_i64(); vfp_load_reg64(vm, a->vm); - gen_helper_vfp_fcvtsd(vd, vm, cpu_env); + gen_helper_vfp_fcvtsd(vd, vm, tcg_env); vfp_store_reg32(vd, a->vd); - tcg_temp_free_i32(vd); - tcg_temp_free_i64(vm); return true; } @@ -3171,8 +2987,6 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) gen_helper_vfp_uitoh(vm, vm, fpst); } vfp_store_reg32(vm, a->vd); - tcg_temp_free_i32(vm); - tcg_temp_free_ptr(fpst); return true; } @@ -3200,8 +3014,6 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) gen_helper_vfp_uitos(vm, vm, fpst); } vfp_store_reg32(vm, a->vd); - tcg_temp_free_i32(vm); - tcg_temp_free_ptr(fpst); return true; } @@ -3236,9 +3048,6 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) gen_helper_vfp_uitod(vd, vm, fpst); } vfp_store_reg64(vd, a->vd); - tcg_temp_free_i32(vm); - tcg_temp_free_i64(vd); - tcg_temp_free_ptr(fpst); return true; } @@ -3267,10 +3076,8 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a) vm = tcg_temp_new_i64(); vd = tcg_temp_new_i32(); vfp_load_reg64(vm, a->vm); - gen_helper_vjcvt(vd, vm, cpu_env); + gen_helper_vjcvt(vd, vm, tcg_env); vfp_store_reg32(vd, a->vd); - tcg_temp_free_i64(vm); - tcg_temp_free_i32(vd); return true; } @@ -3294,7 +3101,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) vfp_load_reg32(vd, a->vd); fpst = fpstatus_ptr(FPST_FPCR_F16); - shift = tcg_const_i32(frac_bits); + shift = tcg_constant_i32(frac_bits); /* Switch on op:U:sx bits */ switch (a->opc) { @@ -3327,9 +3134,6 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) } vfp_store_reg32(vd, a->vd); - tcg_temp_free_i32(vd); - tcg_temp_free_i32(shift); - tcg_temp_free_ptr(fpst); return true; } @@ -3353,7 +3157,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) vfp_load_reg32(vd, a->vd); fpst = fpstatus_ptr(FPST_FPCR); - shift = tcg_const_i32(frac_bits); + shift = tcg_constant_i32(frac_bits); /* Switch on op:U:sx bits */ switch (a->opc) { @@ -3386,9 +3190,6 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) } vfp_store_reg32(vd, a->vd); - tcg_temp_free_i32(vd); - tcg_temp_free_i32(shift); - tcg_temp_free_ptr(fpst); return true; } @@ -3418,7 +3219,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) vfp_load_reg64(vd, a->vd); fpst = fpstatus_ptr(FPST_FPCR); - shift = tcg_const_i32(frac_bits); + shift = tcg_constant_i32(frac_bits); /* Switch on op:U:sx bits */ switch (a->opc) { @@ -3451,9 +3252,6 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) } vfp_store_reg64(vd, a->vd); - tcg_temp_free_i64(vd); - tcg_temp_free_i32(shift); - tcg_temp_free_ptr(fpst); return true; } @@ -3488,8 +3286,6 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) } } vfp_store_reg32(vm, a->vd); - tcg_temp_free_i32(vm); - tcg_temp_free_ptr(fpst); return true; } @@ -3524,8 +3320,6 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) } } vfp_store_reg32(vm, a->vd); - tcg_temp_free_i32(vm); - tcg_temp_free_ptr(fpst); return true; } @@ -3567,9 +3361,6 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) } } vfp_store_reg32(vd, a->vd); - tcg_temp_free_i32(vd); - tcg_temp_free_i64(vm); - tcg_temp_free_ptr(fpst); return true; } @@ -3596,8 +3387,6 @@ static bool trans_VINS(DisasContext *s, arg_VINS *a) vfp_load_reg32(rd, a->vd); tcg_gen_deposit_i32(rd, rd, rm, 16, 16); vfp_store_reg32(rd, a->vd); - tcg_temp_free_i32(rm); - tcg_temp_free_i32(rd); return true; } @@ -3622,6 +3411,5 @@ static bool trans_VMOVX(DisasContext *s, arg_VINS *a) vfp_load_reg32(rm, a->vm); tcg_gen_shri_i32(rm, rm, 16); vfp_store_reg32(rm, a->vd); - tcg_temp_free_i32(rm); return true; } diff --git a/target/arm/translate.c b/target/arm/tcg/translate.c index f7086c66a5..dc49a8d806 100644 --- a/target/arm/translate.c +++ b/target/arm/tcg/translate.c @@ -20,22 +20,18 @@ */ #include "qemu/osdep.h" -#include "cpu.h" -#include "internals.h" -#include "disas/disas.h" -#include "exec/exec-all.h" -#include "tcg/tcg-op.h" -#include "tcg/tcg-op-gvec.h" +#include "translate.h" +#include "translate-a32.h" #include "qemu/log.h" -#include "qemu/bitops.h" +#include "disas/disas.h" #include "arm_ldst.h" #include "semihosting/semihost.h" - +#include "cpregs.h" #include "exec/helper-proto.h" -#include "exec/helper-gen.h" - -#include "exec/log.h" +#define HELPER_H "helper.h" +#include "exec/helper-info.c.inc" +#undef HELPER_H #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T) #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5) @@ -48,9 +44,6 @@ #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7) #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8) -#include "translate.h" -#include "translate-a32.h" - /* These are TCG temporaries used only by the legacy iwMMXt decoder */ static TCGv_i64 cpu_V0, cpu_V1, cpu_M0; /* These are TCG globals which alias CPUARMState fields */ @@ -59,8 +52,6 @@ TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; TCGv_i64 cpu_exclusive_addr; TCGv_i64 cpu_exclusive_val; -#include "exec/gen-icount.h" - static const char * const regnames[] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" }; @@ -72,18 +63,18 @@ void arm_translate_init(void) int i; for (i = 0; i < 16; i++) { - cpu_R[i] = tcg_global_mem_new_i32(cpu_env, + cpu_R[i] = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, regs[i]), regnames[i]); } - cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF"); - cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF"); - cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF"); - cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF"); + cpu_CF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, CF), "CF"); + cpu_NF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, NF), "NF"); + cpu_VF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, VF), "VF"); + cpu_ZF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, ZF), "ZF"); - cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env, + cpu_exclusive_addr = tcg_global_mem_new_i64(tcg_env, offsetof(CPUARMState, exclusive_addr), "exclusive_addr"); - cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env, + cpu_exclusive_val = tcg_global_mem_new_i64(tcg_env, offsetof(CPUARMState, exclusive_val), "exclusive_val"); a64_translate_init(); @@ -163,7 +154,7 @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op) void arm_gen_condlabel(DisasContext *s) { if (!s->condjmp) { - s->condlabel = gen_new_label(); + s->condlabel = gen_disas_label(s); s->condjmp = 1; } } @@ -180,6 +171,24 @@ typedef enum ISSInfo { ISSIs16Bit = (1 << 8), } ISSInfo; +/* + * Store var into env + offset to a member with size bytes. + * Free var after use. + */ +void store_cpu_offset(TCGv_i32 var, int offset, int size) +{ + switch (size) { + case 1: + tcg_gen_st8_i32(var, tcg_env, offset); + break; + case 4: + tcg_gen_st_i32(var, tcg_env, offset); + break; + default: + g_assert_not_reached(); + } +} + /* Save the syndrome information for a Data Abort */ static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo) { @@ -219,16 +228,12 @@ static inline int get_a32_user_mem_index(DisasContext *s) * otherwise, access as if at PL0. */ switch (s->mmu_idx) { + case ARMMMUIdx_E3: case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */ case ARMMMUIdx_E10_0: case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: return arm_to_core_mmu_idx(ARMMMUIdx_E10_0); - case ARMMMUIdx_SE3: - case ARMMMUIdx_SE10_0: - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0); case ARMMMUIdx_MUser: case ARMMMUIdx_MPriv: return arm_to_core_mmu_idx(ARMMMUIdx_MUser); @@ -246,17 +251,27 @@ static inline int get_a32_user_mem_index(DisasContext *s) } } -/* The architectural value of PC. */ -static uint32_t read_pc(DisasContext *s) +/* The pc_curr difference for an architectural jump. */ +static target_long jmp_diff(DisasContext *s, target_long diff) { - return s->pc_curr + (s->thumb ? 4 : 8); + return diff + (s->thumb ? 4 : 8); +} + +static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff) +{ + assert(s->pc_save != -1); + if (tb_cflags(s->base.tb) & CF_PCREL) { + tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff); + } else { + tcg_gen_movi_i32(var, s->pc_curr + diff); + } } /* Set a variable to the value of a CPU register. */ void load_reg_var(DisasContext *s, TCGv_i32 var, int reg) { if (reg == 15) { - tcg_gen_movi_i32(var, read_pc(s)); + gen_pc_plus_diff(s, var, jmp_diff(s, 0)); } else { tcg_gen_mov_i32(var, cpu_R[reg]); } @@ -272,7 +287,11 @@ TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs) TCGv_i32 tmp = tcg_temp_new_i32(); if (reg == 15) { - tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs); + /* + * This address is computed from an aligned PC: + * subtract off the low bits. + */ + gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3))); } else { tcg_gen_addi_i32(tmp, cpu_R[reg], ofs); } @@ -291,12 +310,12 @@ void store_reg(DisasContext *s, int reg, TCGv_i32 var) */ tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3); s->base.is_jmp = DISAS_JUMP; + s->pc_save = -1; } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) { /* For M-profile SP bits [1:0] are always zero */ tcg_gen_andi_i32(var, var, ~3); } tcg_gen_mov_i32(cpu_R[reg], var); - tcg_temp_free_i32(var); } /* @@ -310,7 +329,7 @@ static void store_sp_checked(DisasContext *s, TCGv_i32 var) { #ifndef CONFIG_USER_ONLY if (s->v8m_stackcheck) { - gen_helper_v8m_stackcheck(cpu_env, var); + gen_helper_v8m_stackcheck(tcg_env, var); } #endif store_reg(s, 13, var); @@ -327,21 +346,36 @@ static void store_sp_checked(DisasContext *s, TCGv_i32 var) void gen_set_cpsr(TCGv_i32 var, uint32_t mask) { - TCGv_i32 tmp_mask = tcg_const_i32(mask); - gen_helper_cpsr_write(cpu_env, var, tmp_mask); - tcg_temp_free_i32(tmp_mask); + gen_helper_cpsr_write(tcg_env, var, tcg_constant_i32(mask)); } -static void gen_exception_internal(int excp) +static void gen_rebuild_hflags(DisasContext *s, bool new_el) { - TCGv_i32 tcg_excp = tcg_const_i32(excp); + bool m_profile = arm_dc_feature(s, ARM_FEATURE_M); + if (new_el) { + if (m_profile) { + gen_helper_rebuild_hflags_m32_newel(tcg_env); + } else { + gen_helper_rebuild_hflags_a32_newel(tcg_env); + } + } else { + TCGv_i32 tcg_el = tcg_constant_i32(s->current_el); + if (m_profile) { + gen_helper_rebuild_hflags_m32(tcg_env, tcg_el); + } else { + gen_helper_rebuild_hflags_a32(tcg_env, tcg_el); + } + } +} + +static void gen_exception_internal(int excp) +{ assert(excp_is_internal(excp)); - gen_helper_exception_internal(cpu_env, tcg_excp); - tcg_temp_free_i32(tcg_excp); + gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); } -static void gen_step_complete_exception(DisasContext *s) +static void gen_singlestep_exception(DisasContext *s) { /* We just completed step of an insn. Move from Active-not-pending * to Active-pending, and then also take the swstep exception. @@ -357,30 +391,6 @@ static void gen_step_complete_exception(DisasContext *s) s->base.is_jmp = DISAS_NORETURN; } -static void gen_singlestep_exception(DisasContext *s) -{ - /* Generate the right kind of exception for singlestep, which is - * either the architectural singlestep or EXCP_DEBUG for QEMU's - * gdb singlestepping. - */ - if (s->ss_active) { - gen_step_complete_exception(s); - } else { - gen_exception_internal(EXCP_DEBUG); - } -} - -static inline bool is_singlestepping(DisasContext *s) -{ - /* Return true if we are singlestepping either because of - * architectural singlestep or QEMU gdbstub singlestep. This does - * not include the command line '-singlestep' mode which is rather - * misnamed as it only means "one instruction per TB" and doesn't - * affect the code we generate. - */ - return s->base.singlestep_enabled || s->ss_active; -} - void clear_eci_state(DisasContext *s) { /* @@ -388,8 +398,7 @@ void clear_eci_state(DisasContext *s) * multiple insn executes. */ if (s->eci) { - TCGv_i32 tmp = tcg_const_i32(0); - store_cpu_field(tmp, condexec_bits); + store_cpu_field_constant(0, condexec_bits); s->eci = 0; } } @@ -401,26 +410,22 @@ static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b) tcg_gen_ext16s_i32(tmp1, a); tcg_gen_ext16s_i32(tmp2, b); tcg_gen_mul_i32(tmp1, tmp1, tmp2); - tcg_temp_free_i32(tmp2); tcg_gen_sari_i32(a, a, 16); tcg_gen_sari_i32(b, b, 16); tcg_gen_mul_i32(b, b, a); tcg_gen_mov_i32(a, tmp1); - tcg_temp_free_i32(tmp1); } /* Byteswap each halfword. */ void gen_rev16(TCGv_i32 dest, TCGv_i32 var) { TCGv_i32 tmp = tcg_temp_new_i32(); - TCGv_i32 mask = tcg_const_i32(0x00ff00ff); + TCGv_i32 mask = tcg_constant_i32(0x00ff00ff); tcg_gen_shri_i32(tmp, var, 8); tcg_gen_and_i32(tmp, tmp, mask); tcg_gen_and_i32(var, var, mask); tcg_gen_shli_i32(var, var, 8); tcg_gen_or_i32(dest, var, tmp); - tcg_temp_free_i32(mask); - tcg_temp_free_i32(tmp); } /* Byteswap low halfword and sign extend. */ @@ -445,7 +450,6 @@ static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) tcg_gen_andi_i32(t1, t1, ~0x8000); tcg_gen_add_i32(t0, t0, t1); tcg_gen_xor_i32(dest, t0, tmp); - tcg_temp_free_i32(tmp); } /* Set N and Z flags from var. */ @@ -480,7 +484,6 @@ static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); tcg_gen_xor_i32(tmp, t0, t1); tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); - tcg_temp_free_i32(tmp); tcg_gen_mov_i32(dest, cpu_NF); } @@ -501,14 +504,11 @@ static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) tcg_gen_extu_i32_i64(q1, cpu_CF); tcg_gen_add_i64(q0, q0, q1); tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0); - tcg_temp_free_i64(q0); - tcg_temp_free_i64(q1); } tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); tcg_gen_xor_i32(tmp, t0, t1); tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); - tcg_temp_free_i32(tmp); tcg_gen_mov_i32(dest, cpu_NF); } @@ -523,7 +523,6 @@ static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) tmp = tcg_temp_new_i32(); tcg_gen_xor_i32(tmp, t0, t1); tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); - tcg_temp_free_i32(tmp); tcg_gen_mov_i32(dest, cpu_NF); } @@ -533,23 +532,18 @@ static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) TCGv_i32 tmp = tcg_temp_new_i32(); tcg_gen_not_i32(tmp, t1); gen_adc_CC(dest, t0, tmp); - tcg_temp_free_i32(tmp); } #define GEN_SHIFT(name) \ static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \ { \ - TCGv_i32 tmp1, tmp2, tmp3; \ - tmp1 = tcg_temp_new_i32(); \ - tcg_gen_andi_i32(tmp1, t1, 0xff); \ - tmp2 = tcg_const_i32(0); \ - tmp3 = tcg_const_i32(0x1f); \ - tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \ - tcg_temp_free_i32(tmp3); \ - tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \ - tcg_gen_##name##_i32(dest, tmp2, tmp1); \ - tcg_temp_free_i32(tmp2); \ - tcg_temp_free_i32(tmp1); \ + TCGv_i32 tmpd = tcg_temp_new_i32(); \ + TCGv_i32 tmp1 = tcg_temp_new_i32(); \ + TCGv_i32 zero = tcg_constant_i32(0); \ + tcg_gen_andi_i32(tmp1, t1, 0x1f); \ + tcg_gen_##name##_i32(tmpd, t0, tmp1); \ + tcg_gen_andi_i32(tmp1, t1, 0xe0); \ + tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd); \ } GEN_SHIFT(shl) GEN_SHIFT(shr) @@ -557,14 +551,11 @@ GEN_SHIFT(shr) static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) { - TCGv_i32 tmp1, tmp2; - tmp1 = tcg_temp_new_i32(); + TCGv_i32 tmp1 = tcg_temp_new_i32(); + tcg_gen_andi_i32(tmp1, t1, 0xff); - tmp2 = tcg_const_i32(0x1f); - tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1); - tcg_temp_free_i32(tmp2); + tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31)); tcg_gen_sar_i32(dest, t0, tmp1); - tcg_temp_free_i32(tmp1); } static void shifter_out_im(TCGv_i32 var, int shift) @@ -617,7 +608,6 @@ static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop, shifter_out_im(var, 0); tcg_gen_shri_i32(var, var, 1); tcg_gen_or_i32(var, var, tmp); - tcg_temp_free_i32(tmp); } } }; @@ -627,10 +617,10 @@ static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop, { if (flags) { switch (shiftop) { - case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break; - case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break; - case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break; - case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break; + case 0: gen_helper_shl_cc(var, tcg_env, var, shift); break; + case 1: gen_helper_shr_cc(var, tcg_env, var, shift); break; + case 2: gen_helper_sar_cc(var, tcg_env, var, shift); break; + case 3: gen_helper_ror_cc(var, tcg_env, var, shift); break; } } else { switch (shiftop) { @@ -647,7 +637,6 @@ static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop, tcg_gen_rotr_i32(var, var, shift); break; } } - tcg_temp_free_i32(shift); } /* @@ -658,7 +647,6 @@ void arm_test_cc(DisasCompare *cmp, int cc) { TCGv_i32 value; TCGCond cond; - bool global = true; switch (cc) { case 0: /* eq: Z */ @@ -689,7 +677,6 @@ void arm_test_cc(DisasCompare *cmp, int cc) case 9: /* ls: !C || Z -> !(C && !Z) */ cond = TCG_COND_NE; value = tcg_temp_new_i32(); - global = false; /* CF is 1 for C, so -CF is an all-bits-set mask for C; ZF is non-zero for !Z; so AND the two subexpressions. */ tcg_gen_neg_i32(value, cpu_CF); @@ -701,7 +688,6 @@ void arm_test_cc(DisasCompare *cmp, int cc) /* Since we're only interested in the sign bit, == 0 is >= 0. */ cond = TCG_COND_GE; value = tcg_temp_new_i32(); - global = false; tcg_gen_xor_i32(value, cpu_VF, cpu_NF); break; @@ -709,7 +695,6 @@ void arm_test_cc(DisasCompare *cmp, int cc) case 13: /* le: Z || N != V */ cond = TCG_COND_NE; value = tcg_temp_new_i32(); - global = false; /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate * the sign bit then AND with ZF to yield the result. */ tcg_gen_xor_i32(value, cpu_VF, cpu_NF); @@ -737,14 +722,6 @@ void arm_test_cc(DisasCompare *cmp, int cc) no_invert: cmp->cond = cond; cmp->value = value; - cmp->value_global = global; -} - -void arm_free_cc(DisasCompare *cmp) -{ - if (!cmp->value_global) { - tcg_temp_free_i32(cmp->value); - } } void arm_jump_cc(DisasCompare *cmp, TCGLabel *label) @@ -757,22 +734,21 @@ void arm_gen_test_cc(int cc, TCGLabel *label) DisasCompare cmp; arm_test_cc(&cmp, cc); arm_jump_cc(&cmp, label); - arm_free_cc(&cmp); } void gen_set_condexec(DisasContext *s) { if (s->condexec_mask) { uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1); - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_movi_i32(tmp, val); - store_cpu_field(tmp, condexec_bits); + + store_cpu_field_constant(val, condexec_bits); } } -void gen_set_pc_im(DisasContext *s, target_ulong val) +void gen_update_pc(DisasContext *s, target_long diff) { - tcg_gen_movi_i32(cpu_R[15], val); + gen_pc_plus_diff(s, cpu_R[15], diff); + s->pc_save = s->pc_curr + diff; } /* Set PC and Thumb state from var. var is marked as dead. */ @@ -782,6 +758,7 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var) tcg_gen_andi_i32(cpu_R[15], var, ~1); tcg_gen_andi_i32(var, var, 1); store_cpu_field(var, thumb); + s->pc_save = -1; } /* @@ -823,7 +800,7 @@ static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var) static inline void gen_bx_excret_final_code(DisasContext *s) { /* Generate the code to finish possible exception return and end the TB */ - TCGLabel *excret_label = gen_new_label(); + DisasLabel excret_label = gen_disas_label(s); uint32_t min_magic; if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) { @@ -835,14 +812,14 @@ static inline void gen_bx_excret_final_code(DisasContext *s) } /* Is the new PC value in the magic range indicating exception return? */ - tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label); + tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label); /* No: end the TB as we would for a DISAS_JMP */ - if (is_singlestepping(s)) { + if (s->ss_active) { gen_singlestep_exception(s); } else { tcg_gen_exit_tb(NULL, 0); } - gen_set_label(excret_label); + set_disas_label(s, excret_label); /* Yes: this is an exception return. * At this point in runtime env->regs[15] and env->thumb will hold * the exception-return magic number, which do_v7m_exception_exit() @@ -864,7 +841,7 @@ static inline void gen_bxns(DisasContext *s, int rm) /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory * we need to sync state before calling it, but: - * - we don't need to do gen_set_pc_im() because the bxns helper will + * - we don't need to do gen_update_pc() because the bxns helper will * always set the PC itself * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE * unless it's outside an IT block or the last insn in an IT block, @@ -872,8 +849,7 @@ static inline void gen_bxns(DisasContext *s, int rm) * is correct in the non-UNPREDICTABLE cases, and we can choose * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise. */ - gen_helper_v7m_bxns(cpu_env, var); - tcg_temp_free_i32(var); + gen_helper_v7m_bxns(tcg_env, var); s->base.is_jmp = DISAS_EXIT; } @@ -885,9 +861,8 @@ static inline void gen_blxns(DisasContext *s, int rm) * We do however need to set the PC, because the blxns helper reads it. * The blxns helper may throw an exception. */ - gen_set_pc_im(s, s->base.pc_next); - gen_helper_v7m_blxns(cpu_env, var); - tcg_temp_free_i32(var); + gen_update_pc(s, curr_insn_len(s)); + gen_helper_v7m_blxns(tcg_env, var); s->base.is_jmp = DISAS_EXIT; } @@ -925,13 +900,7 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) MemOp pow2_align(unsigned i) { static const MemOp mop_align[] = { - 0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16, - /* - * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such - * that 256-bit alignment (MO_ALIGN_32) cannot be supported: - * see get_alignment_bits(). Enforce only 128-bit alignment for now. - */ - MO_ALIGN_16 + 0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16, MO_ALIGN_32 }; g_assert(i < ARRAY_SIZE(mop_align)); return mop_align[i]; @@ -967,7 +936,6 @@ void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val, { TCGv addr = gen_aa32_addr(s, a32, opc); tcg_gen_qemu_ld_i32(val, addr, index, opc); - tcg_temp_free(addr); } void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val, @@ -975,7 +943,6 @@ void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val, { TCGv addr = gen_aa32_addr(s, a32, opc); tcg_gen_qemu_st_i32(val, addr, index, opc); - tcg_temp_free(addr); } void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val, @@ -989,7 +956,6 @@ void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val, if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) { tcg_gen_rotri_i64(val, val, 32); } - tcg_temp_free(addr); } void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val, @@ -1002,11 +968,9 @@ void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val, TCGv_i64 tmp = tcg_temp_new_i64(); tcg_gen_rotri_i64(tmp, val, 32); tcg_gen_qemu_st_i64(tmp, addr, index, opc); - tcg_temp_free_i64(tmp); } else { tcg_gen_qemu_st_i64(val, addr, index, opc); } - tcg_temp_free(addr); } void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32, @@ -1053,15 +1017,15 @@ static inline void gen_hvc(DisasContext *s, int imm16) * as an undefined insn by runtime configuration (ie before * the insn really executes). */ - gen_set_pc_im(s, s->pc_curr); - gen_helper_pre_hvc(cpu_env); + gen_update_pc(s, 0); + gen_helper_pre_hvc(tcg_env); /* Otherwise we will treat this as a real exception which * happens after execution of the insn. (The distinction matters * for the PC value reported to the exception handler and also * for single stepping.) */ s->svc_imm = imm16; - gen_set_pc_im(s, s->base.pc_next); + gen_update_pc(s, curr_insn_len(s)); s->base.is_jmp = DISAS_HVC; } @@ -1070,76 +1034,88 @@ static inline void gen_smc(DisasContext *s) /* As with HVC, we may take an exception either before or after * the insn executes. */ - TCGv_i32 tmp; - - gen_set_pc_im(s, s->pc_curr); - tmp = tcg_const_i32(syn_aa32_smc()); - gen_helper_pre_smc(cpu_env, tmp); - tcg_temp_free_i32(tmp); - gen_set_pc_im(s, s->base.pc_next); + gen_update_pc(s, 0); + gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa32_smc())); + gen_update_pc(s, curr_insn_len(s)); s->base.is_jmp = DISAS_SMC; } -static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp) +static void gen_exception_internal_insn(DisasContext *s, int excp) { gen_set_condexec(s); - gen_set_pc_im(s, pc); + gen_update_pc(s, 0); gen_exception_internal(excp); s->base.is_jmp = DISAS_NORETURN; } -void gen_exception_insn(DisasContext *s, uint64_t pc, int excp, - uint32_t syn, uint32_t target_el) +static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el) +{ + gen_helper_exception_with_syndrome_el(tcg_env, tcg_constant_i32(excp), + tcg_constant_i32(syndrome), tcg_el); +} + +static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el) +{ + gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el)); +} + +static void gen_exception(int excp, uint32_t syndrome) +{ + gen_helper_exception_with_syndrome(tcg_env, tcg_constant_i32(excp), + tcg_constant_i32(syndrome)); +} + +static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff, + int excp, uint32_t syn, TCGv_i32 tcg_el) { if (s->aarch64) { - gen_a64_set_pc_im(pc); + gen_a64_update_pc(s, pc_diff); } else { gen_set_condexec(s); - gen_set_pc_im(s, pc); + gen_update_pc(s, pc_diff); } - gen_exception(excp, syn, target_el); + gen_exception_el_v(excp, syn, tcg_el); s->base.is_jmp = DISAS_NORETURN; } -static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn) +void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp, + uint32_t syn, uint32_t target_el) { - TCGv_i32 tcg_syn; - - gen_set_condexec(s); - gen_set_pc_im(s, s->pc_curr); - tcg_syn = tcg_const_i32(syn); - gen_helper_exception_bkpt_insn(cpu_env, tcg_syn); - tcg_temp_free_i32(tcg_syn); - s->base.is_jmp = DISAS_NORETURN; + gen_exception_insn_el_v(s, pc_diff, excp, syn, + tcg_constant_i32(target_el)); } -void unallocated_encoding(DisasContext *s) +void gen_exception_insn(DisasContext *s, target_long pc_diff, + int excp, uint32_t syn) { - /* Unallocated and reserved encodings are uncategorized */ - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), - default_exception_el(s)); + if (s->aarch64) { + gen_a64_update_pc(s, pc_diff); + } else { + gen_set_condexec(s); + gen_update_pc(s, pc_diff); + } + gen_exception(excp, syn); + s->base.is_jmp = DISAS_NORETURN; } -static void gen_exception_el(DisasContext *s, int excp, uint32_t syn, - TCGv_i32 tcg_el) +static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn) { - TCGv_i32 tcg_excp; - TCGv_i32 tcg_syn; - gen_set_condexec(s); - gen_set_pc_im(s, s->pc_curr); - tcg_excp = tcg_const_i32(excp); - tcg_syn = tcg_const_i32(syn); - gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn, tcg_el); - tcg_temp_free_i32(tcg_syn); - tcg_temp_free_i32(tcg_excp); + gen_update_pc(s, 0); + gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syn)); s->base.is_jmp = DISAS_NORETURN; } +void unallocated_encoding(DisasContext *s) +{ + /* Unallocated and reserved encodings are uncategorized */ + gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized()); +} + /* Force a TB lookup after an instruction that changes the CPU state. */ void gen_lookup_tb(DisasContext *s) { - tcg_gen_movi_i32(cpu_R[15], s->base.pc_next); + gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s)); s->base.is_jmp = DISAS_EXIT; } @@ -1157,12 +1133,9 @@ static inline void gen_hlt(DisasContext *s, int imm) * semihosting, to provide some semblance of security * (and for consistency with our 32-bit semihosting). */ - if (semihosting_enabled() && -#ifndef CONFIG_USER_ONLY - s->current_el != 0 && -#endif + if (semihosting_enabled(s->current_el == 0) && (imm == (s->thumb ? 0x3c : 0xf000))) { - gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST); + gen_exception_internal_insn(s, EXCP_SEMIHOST); return; } @@ -1185,7 +1158,7 @@ long neon_element_offset(int reg, int element, MemOp memop) { int element_size = 1 << (memop & MO_SIZE); int ofs = element * element_size; -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN /* * Calculate the offset assuming fully little-endian, * then XOR to account for the order of the 8-byte units. @@ -1213,20 +1186,20 @@ void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop) switch (memop) { case MO_SB: - tcg_gen_ld8s_i32(dest, cpu_env, off); + tcg_gen_ld8s_i32(dest, tcg_env, off); break; case MO_UB: - tcg_gen_ld8u_i32(dest, cpu_env, off); + tcg_gen_ld8u_i32(dest, tcg_env, off); break; case MO_SW: - tcg_gen_ld16s_i32(dest, cpu_env, off); + tcg_gen_ld16s_i32(dest, tcg_env, off); break; case MO_UW: - tcg_gen_ld16u_i32(dest, cpu_env, off); + tcg_gen_ld16u_i32(dest, tcg_env, off); break; case MO_UL: case MO_SL: - tcg_gen_ld_i32(dest, cpu_env, off); + tcg_gen_ld_i32(dest, tcg_env, off); break; default: g_assert_not_reached(); @@ -1239,13 +1212,13 @@ void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop) switch (memop) { case MO_SL: - tcg_gen_ld32s_i64(dest, cpu_env, off); + tcg_gen_ld32s_i64(dest, tcg_env, off); break; case MO_UL: - tcg_gen_ld32u_i64(dest, cpu_env, off); + tcg_gen_ld32u_i64(dest, tcg_env, off); break; - case MO_Q: - tcg_gen_ld_i64(dest, cpu_env, off); + case MO_UQ: + tcg_gen_ld_i64(dest, tcg_env, off); break; default: g_assert_not_reached(); @@ -1258,13 +1231,13 @@ void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop) switch (memop) { case MO_8: - tcg_gen_st8_i32(src, cpu_env, off); + tcg_gen_st8_i32(src, tcg_env, off); break; case MO_16: - tcg_gen_st16_i32(src, cpu_env, off); + tcg_gen_st16_i32(src, tcg_env, off); break; case MO_32: - tcg_gen_st_i32(src, cpu_env, off); + tcg_gen_st_i32(src, tcg_env, off); break; default: g_assert_not_reached(); @@ -1277,10 +1250,10 @@ void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop) switch (memop) { case MO_32: - tcg_gen_st32_i64(src, cpu_env, off); + tcg_gen_st32_i64(src, tcg_env, off); break; case MO_64: - tcg_gen_st_i64(src, cpu_env, off); + tcg_gen_st_i64(src, tcg_env, off); break; default: g_assert_not_reached(); @@ -1291,25 +1264,24 @@ void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop) static inline void iwmmxt_load_reg(TCGv_i64 var, int reg) { - tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg])); + tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg])); } static inline void iwmmxt_store_reg(TCGv_i64 var, int reg) { - tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg])); + tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg])); } static inline TCGv_i32 iwmmxt_load_creg(int reg) { TCGv_i32 var = tcg_temp_new_i32(); - tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); + tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); return var; } static inline void iwmmxt_store_creg(int reg, TCGv_i32 var) { - tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); - tcg_temp_free_i32(var); + tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg])); } static inline void gen_op_iwmmxt_movq_wRn_M0(int rn) @@ -1351,7 +1323,7 @@ static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \ { \ iwmmxt_load_reg(cpu_V1, rn); \ - gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \ + gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \ } #define IWMMXT_OP_ENV_SIZE(name) \ @@ -1362,7 +1334,7 @@ IWMMXT_OP_ENV(name##l) #define IWMMXT_OP_ENV1(name) \ static inline void gen_op_iwmmxt_##name##_M0(void) \ { \ - gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \ + gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \ } IWMMXT_OP(maddsq) @@ -1468,10 +1440,9 @@ static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, else tcg_gen_addi_i32(tmp, tmp, -offset); tcg_gen_mov_i32(dest, tmp); - if (insn & (1 << 21)) + if (insn & (1 << 21)) { store_reg(s, rd, tmp); - else - tcg_temp_free_i32(tmp); + } } else if (insn & (1 << 21)) { /* Post indexed */ tcg_gen_mov_i32(dest, tmp); @@ -1503,7 +1474,6 @@ static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest) } tcg_gen_andi_i32(tmp, tmp, mask); tcg_gen_mov_i32(dest, tmp); - tcg_temp_free_i32(tmp); return 0; } @@ -1536,7 +1506,6 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) wrd = (insn >> 12) & 0xf; addr = tcg_temp_new_i32(); if (gen_iwmmxt_address(s, insn, addr)) { - tcg_temp_free_i32(addr); return 1; } if (insn & ARM_CP_RW_BIT) { @@ -1564,7 +1533,6 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) } if (i) { tcg_gen_extu_i32_i64(cpu_M0, tmp); - tcg_temp_free_i32(tmp); } gen_op_iwmmxt_movq_wRn_M0(wrd); } @@ -1592,9 +1560,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) } } } - tcg_temp_free_i32(tmp); } - tcg_temp_free_i32(addr); return 0; } @@ -1629,7 +1595,6 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) tmp = iwmmxt_load_creg(wrd); tmp2 = load_reg(s, rd); tcg_gen_andc_i32(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); iwmmxt_store_creg(wrd, tmp); break; case ARM_IWMMXT_wCGR0: @@ -1842,7 +1807,6 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) tcg_gen_andi_i32(tmp, tmp, 7); iwmmxt_load_reg(cpu_V1, rd1); gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -1855,25 +1819,21 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) gen_op_iwmmxt_movq_M0_wRn(wrd); switch ((insn >> 6) & 3) { case 0: - tmp2 = tcg_const_i32(0xff); - tmp3 = tcg_const_i32((insn & 7) << 3); + tmp2 = tcg_constant_i32(0xff); + tmp3 = tcg_constant_i32((insn & 7) << 3); break; case 1: - tmp2 = tcg_const_i32(0xffff); - tmp3 = tcg_const_i32((insn & 3) << 4); + tmp2 = tcg_constant_i32(0xffff); + tmp3 = tcg_constant_i32((insn & 3) << 4); break; case 2: - tmp2 = tcg_const_i32(0xffffffff); - tmp3 = tcg_const_i32((insn & 1) << 5); + tmp2 = tcg_constant_i32(0xffffffff); + tmp3 = tcg_constant_i32((insn & 1) << 5); break; default: - tmp2 = NULL; - tmp3 = NULL; + g_assert_not_reached(); } gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3); - tcg_temp_free_i32(tmp3); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -1927,7 +1887,6 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) } tcg_gen_shli_i32(tmp, tmp, 28); gen_set_nzcv(tmp); - tcg_temp_free_i32(tmp); break; case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */ if (((insn >> 6) & 3) == 3) @@ -1946,7 +1905,6 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) gen_helper_iwmmxt_bcstl(cpu_M0, tmp); break; } - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -1975,8 +1933,6 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) break; } gen_set_nzcv(tmp); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); break; case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */ wrd = (insn >> 12) & 0xf; @@ -2023,8 +1979,6 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) break; } gen_set_nzcv(tmp); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); break; case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */ rd = (insn >> 12) & 0xf; @@ -2149,21 +2103,19 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) gen_op_iwmmxt_movq_M0_wRn(rd0); tmp = tcg_temp_new_i32(); if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - tcg_temp_free_i32(tmp); return 1; } switch ((insn >> 22) & 3) { case 1: - gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp); break; } - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2177,21 +2129,19 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) gen_op_iwmmxt_movq_M0_wRn(rd0); tmp = tcg_temp_new_i32(); if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - tcg_temp_free_i32(tmp); return 1; } switch ((insn >> 22) & 3) { case 1: - gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp); break; } - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2205,21 +2155,19 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) gen_op_iwmmxt_movq_M0_wRn(rd0); tmp = tcg_temp_new_i32(); if (gen_iwmmxt_shift(insn, 0xff, tmp)) { - tcg_temp_free_i32(tmp); return 1; } switch ((insn >> 22) & 3) { case 1: - gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp); break; case 2: - gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp); break; case 3: - gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp); break; } - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2235,27 +2183,23 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) switch ((insn >> 22) & 3) { case 1: if (gen_iwmmxt_shift(insn, 0xf, tmp)) { - tcg_temp_free_i32(tmp); return 1; } - gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp); break; case 2: if (gen_iwmmxt_shift(insn, 0x1f, tmp)) { - tcg_temp_free_i32(tmp); return 1; } - gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp); break; case 3: if (gen_iwmmxt_shift(insn, 0x3f, tmp)) { - tcg_temp_free_i32(tmp); return 1; } - gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp); + gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp); break; } - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2328,10 +2272,9 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) rd0 = (insn >> 16) & 0xf; rd1 = (insn >> 0) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_const_i32((insn >> 20) & 3); iwmmxt_load_reg(cpu_V1, rd1); - gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp); - tcg_temp_free_i32(tmp); + gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, + tcg_constant_i32((insn >> 20) & 3)); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -2385,9 +2328,8 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) wrd = (insn >> 12) & 0xf; rd0 = (insn >> 16) & 0xf; gen_op_iwmmxt_movq_M0_wRn(rd0); - tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f)); - gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp); - tcg_temp_free_i32(tmp); + tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f)); + gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); gen_op_iwmmxt_set_cup(); @@ -2496,12 +2438,8 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2); break; default: - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); return 1; } - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(wrd); gen_op_iwmmxt_set_mup(); break; @@ -2550,8 +2488,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) default: return 1; } - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); gen_op_iwmmxt_movq_wRn_M0(acc); return 0; @@ -2590,25 +2526,38 @@ static void gen_goto_ptr(void) * cpu_loop_exec. Any live exit_requests will be processed as we * enter the next TB. */ -static void gen_goto_tb(DisasContext *s, int n, target_ulong dest) +static void gen_goto_tb(DisasContext *s, int n, target_long diff) { - if (translator_use_goto_tb(&s->base, dest)) { - tcg_gen_goto_tb(n); - gen_set_pc_im(s, dest); + if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) { + /* + * For pcrel, the pc must always be up-to-date on entry to + * the linked TB, so that it can use simple additions for all + * further adjustments. For !pcrel, the linked TB is compiled + * to know its full virtual address, so we can delay the + * update to pc to the unlinked path. A long chain of links + * can thus avoid many updates to the PC. + */ + if (tb_cflags(s->base.tb) & CF_PCREL) { + gen_update_pc(s, diff); + tcg_gen_goto_tb(n); + } else { + tcg_gen_goto_tb(n); + gen_update_pc(s, diff); + } tcg_gen_exit_tb(s->base.tb, n); } else { - gen_set_pc_im(s, dest); + gen_update_pc(s, diff); gen_goto_ptr(); } s->base.is_jmp = DISAS_NORETURN; } /* Jump, specifying which TB number to use if we gen_goto_tb() */ -static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno) +static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno) { - if (unlikely(is_singlestepping(s))) { + if (unlikely(s->ss_active)) { /* An indirect jump so that we still trigger the debug exception. */ - gen_set_pc_im(s, dest); + gen_update_pc(s, diff); s->base.is_jmp = DISAS_JUMP; return; } @@ -2625,7 +2574,7 @@ static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno) * gen_jmp(); * on the second call to gen_jmp(). */ - gen_goto_tb(s, tbno, dest); + gen_goto_tb(s, tbno, diff); break; case DISAS_UPDATE_NOCHAIN: case DISAS_UPDATE_EXIT: @@ -2634,7 +2583,7 @@ static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno) * Avoid using goto_tb so we really do exit back to the main loop * and don't chain to another TB. */ - gen_set_pc_im(s, dest); + gen_update_pc(s, diff); gen_goto_ptr(); s->base.is_jmp = DISAS_NORETURN; break; @@ -2647,9 +2596,9 @@ static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno) } } -static inline void gen_jmp(DisasContext *s, uint32_t dest) +static inline void gen_jmp(DisasContext *s, target_long diff) { - gen_jmp_tb(s, dest, 0); + gen_jmp_tb(s, diff, 0); } static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y) @@ -2715,7 +2664,6 @@ static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0) } else { gen_set_cpsr(t0, mask); } - tcg_temp_free_i32(t0); gen_lookup_tb(s); return 0; } @@ -2751,8 +2699,6 @@ static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn, * an exception and return false. Otherwise it will return true, * and set *tgtmode and *regno appropriately. */ - int exc_target = default_exception_el(s); - /* These instructions are present only in ARMv8, or in ARMv7 with the * Virtualization Extensions. */ @@ -2856,27 +2802,34 @@ static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn, if (arm_dc_feature(s, ARM_FEATURE_AARCH64) && dc_isar_feature(aa64_sel2, s)) { /* Target EL is EL<3 minus SCR_EL3.EEL2> */ - tcg_el = load_cpu_field(cp15.scr_el3); + tcg_el = load_cpu_field_low32(cp15.scr_el3); tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1); tcg_gen_addi_i32(tcg_el, tcg_el, 3); } else { - tcg_el = tcg_const_i32(3); + tcg_el = tcg_constant_i32(3); } - gen_exception_el(s, EXCP_UDEF, syn_uncategorized(), tcg_el); - tcg_temp_free_i32(tcg_el); + gen_exception_insn_el_v(s, 0, EXCP_UDEF, + syn_uncategorized(), tcg_el); return false; } break; case ARM_CPU_MODE_HYP: /* - * SPSR_hyp and r13_hyp can only be accessed from Monitor mode - * (and so we can forbid accesses from EL2 or below). elr_hyp - * can be accessed also from Hyp mode, so forbid accesses from - * EL0 or EL1. + * r13_hyp can only be accessed from Monitor mode, and so we + * can forbid accesses from EL2 or below. + * elr_hyp can be accessed also from Hyp mode, so forbid + * accesses from EL0 or EL1. + * SPSR_hyp is supposed to be in the same category as r13_hyp + * and UNPREDICTABLE if accessed from anything except Monitor + * mode. However there is some real-world code that will do + * it because at least some hardware happens to permit the + * access. (Notably a standard Cortex-R52 startup code fragment + * does this.) So we permit SPSR_hyp from Hyp mode also, to allow + * this (incorrect) guest code to run. */ - if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 || - (s->current_el < 3 && *regno != 17)) { + if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 + || (s->current_el < 3 && *regno != 16 && *regno != 17)) { goto undef; } break; @@ -2888,14 +2841,13 @@ static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn, undef: /* If we get here then some access check did not pass */ - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_uncategorized(), exc_target); + gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized()); return false; } static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn) { - TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno; + TCGv_i32 tcg_reg; int tgtmode = 0, regno = 0; if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) { @@ -2904,20 +2856,17 @@ static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn) /* Sync state because msr_banked() can raise exceptions */ gen_set_condexec(s); - gen_set_pc_im(s, s->pc_curr); + gen_update_pc(s, 0); tcg_reg = load_reg(s, rn); - tcg_tgtmode = tcg_const_i32(tgtmode); - tcg_regno = tcg_const_i32(regno); - gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno); - tcg_temp_free_i32(tcg_tgtmode); - tcg_temp_free_i32(tcg_regno); - tcg_temp_free_i32(tcg_reg); + gen_helper_msr_banked(tcg_env, tcg_reg, + tcg_constant_i32(tgtmode), + tcg_constant_i32(regno)); s->base.is_jmp = DISAS_UPDATE_EXIT; } static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) { - TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno; + TCGv_i32 tcg_reg; int tgtmode = 0, regno = 0; if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) { @@ -2926,13 +2875,11 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) /* Sync state because mrs_banked() can raise exceptions */ gen_set_condexec(s); - gen_set_pc_im(s, s->pc_curr); + gen_update_pc(s, 0); tcg_reg = tcg_temp_new_i32(); - tcg_tgtmode = tcg_const_i32(tgtmode); - tcg_regno = tcg_const_i32(regno); - gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno); - tcg_temp_free_i32(tcg_tgtmode); - tcg_temp_free_i32(tcg_regno); + gen_helper_mrs_banked(tcg_reg, tcg_env, + tcg_constant_i32(tgtmode), + tcg_constant_i32(regno)); store_reg(s, rn, tcg_reg); s->base.is_jmp = DISAS_UPDATE_EXIT; } @@ -2944,7 +2891,6 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc) { tcg_gen_mov_i32(cpu_R[15], pc); - tcg_temp_free_i32(pc); } /* Generate a v6 exception return. Marks both values as dead. */ @@ -2955,11 +2901,8 @@ static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr) * appropriately depending on the new Thumb bit, so it must * be called after storing the new PC. */ - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - gen_helper_cpsr_write_eret(cpu_env, cpsr); - tcg_temp_free_i32(cpsr); + translator_io_start(&s->base); + gen_helper_cpsr_write_eret(tcg_env, cpsr); /* Must exit loop to check un-masked IRQs */ s->base.is_jmp = DISAS_EXIT; } @@ -2976,10 +2919,9 @@ static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, { TCGv_ptr qc_ptr = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, opr_sz, max_sz, 0, fn); - tcg_temp_free_ptr(qc_ptr); } void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -3002,57 +2944,16 @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); } -#define GEN_CMP0(NAME, COND) \ - static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \ - { \ - tcg_gen_setcondi_i32(COND, d, a, 0); \ - tcg_gen_neg_i32(d, d); \ - } \ - static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \ - { \ - tcg_gen_setcondi_i64(COND, d, a, 0); \ - tcg_gen_neg_i64(d, d); \ - } \ - static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \ - { \ - TCGv_vec zero = tcg_const_zeros_vec_matching(d); \ - tcg_gen_cmp_vec(COND, vece, d, a, zero); \ - tcg_temp_free_vec(zero); \ - } \ - void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \ - uint32_t opr_sz, uint32_t max_sz) \ - { \ - const GVecGen2 op[4] = { \ - { .fno = gen_helper_gvec_##NAME##0_b, \ - .fniv = gen_##NAME##0_vec, \ - .opt_opc = vecop_list_cmp, \ - .vece = MO_8 }, \ - { .fno = gen_helper_gvec_##NAME##0_h, \ - .fniv = gen_##NAME##0_vec, \ - .opt_opc = vecop_list_cmp, \ - .vece = MO_16 }, \ - { .fni4 = gen_##NAME##0_i32, \ - .fniv = gen_##NAME##0_vec, \ - .opt_opc = vecop_list_cmp, \ - .vece = MO_32 }, \ - { .fni8 = gen_##NAME##0_i64, \ - .fniv = gen_##NAME##0_vec, \ - .opt_opc = vecop_list_cmp, \ - .prefer_i64 = TCG_TARGET_REG_BITS == 64, \ - .vece = MO_64 }, \ - }; \ - tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \ - } - -static const TCGOpcode vecop_list_cmp[] = { - INDEX_op_cmp_vec, 0 -}; +#define GEN_CMP0(NAME, COND) \ + void NAME(unsigned vece, uint32_t d, uint32_t m, \ + uint32_t opr_sz, uint32_t max_sz) \ + { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } -GEN_CMP0(ceq, TCG_COND_EQ) -GEN_CMP0(cle, TCG_COND_LE) -GEN_CMP0(cge, TCG_COND_GE) -GEN_CMP0(clt, TCG_COND_LT) -GEN_CMP0(cgt, TCG_COND_GT) +GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) +GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) +GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) +GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) +GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) #undef GEN_CMP0 @@ -3113,7 +3014,7 @@ void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, .vece = MO_32 }, { .fni8 = gen_ssra64_i64, .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_b, + .fno = gen_helper_gvec_ssra_d, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .opt_opc = vecop_list, .load_dest = true, @@ -3226,7 +3127,6 @@ static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); tcg_gen_vec_sar8i_i64(d, a, sh); tcg_gen_vec_add8_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) @@ -3237,7 +3137,6 @@ static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); tcg_gen_vec_sar16i_i64(d, a, sh); tcg_gen_vec_add16_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) @@ -3253,7 +3152,6 @@ static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) tcg_gen_extract_i32(t, a, sh - 1, 1); tcg_gen_sari_i32(d, a, sh); tcg_gen_add_i32(d, d, t); - tcg_temp_free_i32(t); } static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) @@ -3263,7 +3161,6 @@ static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) tcg_gen_extract_i64(t, a, sh - 1, 1); tcg_gen_sari_i64(d, a, sh); tcg_gen_add_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) @@ -3276,9 +3173,6 @@ static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) tcg_gen_and_vec(vece, t, t, ones); tcg_gen_sari_vec(vece, d, a, sh); tcg_gen_add_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(ones); } void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -3334,7 +3228,6 @@ static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) gen_srshr8_i64(t, a, sh); tcg_gen_vec_add8_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) @@ -3343,7 +3236,6 @@ static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) gen_srshr16_i64(t, a, sh); tcg_gen_vec_add16_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) @@ -3352,7 +3244,6 @@ static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) gen_srshr32_i32(t, a, sh); tcg_gen_add_i32(d, d, t); - tcg_temp_free_i32(t); } static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) @@ -3361,7 +3252,6 @@ static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) gen_srshr64_i64(t, a, sh); tcg_gen_add_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) @@ -3370,7 +3260,6 @@ static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) gen_srshr_vec(vece, t, a, sh); tcg_gen_add_vec(vece, d, d, t); - tcg_temp_free_vec(t); } void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -3433,7 +3322,6 @@ static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); tcg_gen_vec_shr8i_i64(d, a, sh); tcg_gen_vec_add8_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) @@ -3444,7 +3332,6 @@ static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); tcg_gen_vec_shr16i_i64(d, a, sh); tcg_gen_vec_add16_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) @@ -3460,7 +3347,6 @@ static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) tcg_gen_extract_i32(t, a, sh - 1, 1); tcg_gen_shri_i32(d, a, sh); tcg_gen_add_i32(d, d, t); - tcg_temp_free_i32(t); } static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) @@ -3470,7 +3356,6 @@ static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) tcg_gen_extract_i64(t, a, sh - 1, 1); tcg_gen_shri_i64(d, a, sh); tcg_gen_add_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) @@ -3483,9 +3368,6 @@ static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) tcg_gen_and_vec(vece, t, t, ones); tcg_gen_shri_vec(vece, d, a, shift); tcg_gen_add_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(ones); } void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -3544,7 +3426,6 @@ static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) gen_urshr8_i64(t, a, sh); } tcg_gen_vec_add8_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) @@ -3557,7 +3438,6 @@ static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) gen_urshr16_i64(t, a, sh); } tcg_gen_vec_add16_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) @@ -3570,7 +3450,6 @@ static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) gen_urshr32_i32(t, a, sh); } tcg_gen_add_i32(d, d, t); - tcg_temp_free_i32(t); } static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) @@ -3583,7 +3462,6 @@ static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) gen_urshr64_i64(t, a, sh); } tcg_gen_add_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) @@ -3596,7 +3474,6 @@ static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) gen_urshr_vec(vece, t, a, sh); } tcg_gen_add_vec(vece, d, d, t); - tcg_temp_free_vec(t); } void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -3649,7 +3526,6 @@ static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) tcg_gen_andi_i64(t, t, mask); tcg_gen_andi_i64(d, d, ~mask); tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) @@ -3661,7 +3537,6 @@ static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) tcg_gen_andi_i64(t, t, mask); tcg_gen_andi_i64(d, d, ~mask); tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) @@ -3685,9 +3560,6 @@ static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) tcg_gen_shri_vec(vece, t, a, sh); tcg_gen_and_vec(vece, d, d, m); tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); } void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -3744,7 +3616,6 @@ static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) tcg_gen_andi_i64(t, t, mask); tcg_gen_andi_i64(d, d, ~mask); tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) @@ -3756,7 +3627,6 @@ static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) tcg_gen_andi_i64(t, t, mask); tcg_gen_andi_i64(d, d, ~mask); tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) @@ -3778,9 +3648,6 @@ static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); tcg_gen_and_vec(vece, d, d, m); tcg_gen_or_vec(vece, d, d, t); - - tcg_temp_free_vec(t); - tcg_temp_free_vec(m); } void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -3957,15 +3824,13 @@ void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { tcg_gen_and_i32(d, a, b); - tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i32(d, d); + tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); } void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { tcg_gen_and_i64(d, a, b); - tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0); - tcg_gen_neg_i64(d, d); + tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); } static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) @@ -4007,8 +3872,8 @@ void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) TCGv_i32 rval = tcg_temp_new_i32(); TCGv_i32 lsh = tcg_temp_new_i32(); TCGv_i32 rsh = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_const_i32(0); - TCGv_i32 max = tcg_const_i32(32); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 max = tcg_constant_i32(32); /* * Rely on the TCG guarantee that out of range shifts produce @@ -4021,13 +3886,6 @@ void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) tcg_gen_shr_i32(rval, src, rsh); tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); - - tcg_temp_free_i32(lval); - tcg_temp_free_i32(rval); - tcg_temp_free_i32(lsh); - tcg_temp_free_i32(rsh); - tcg_temp_free_i32(zero); - tcg_temp_free_i32(max); } void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) @@ -4036,8 +3894,8 @@ void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) TCGv_i64 rval = tcg_temp_new_i64(); TCGv_i64 lsh = tcg_temp_new_i64(); TCGv_i64 rsh = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_const_i64(0); - TCGv_i64 max = tcg_const_i64(64); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 max = tcg_constant_i64(64); /* * Rely on the TCG guarantee that out of range shifts produce @@ -4050,13 +3908,6 @@ void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) tcg_gen_shr_i64(rval, src, rsh); tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); - - tcg_temp_free_i64(lval); - tcg_temp_free_i64(rval); - tcg_temp_free_i64(lsh); - tcg_temp_free_i64(rsh); - tcg_temp_free_i64(zero); - tcg_temp_free_i64(max); } static void gen_ushl_vec(unsigned vece, TCGv_vec dst, @@ -4076,7 +3927,6 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst, tcg_gen_dupi_vec(vece, msk, 0xff); tcg_gen_and_vec(vece, lsh, shift, msk); tcg_gen_and_vec(vece, rsh, rsh, msk); - tcg_temp_free_vec(msk); } /* @@ -4109,12 +3959,6 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst, tcg_gen_and_vec(vece, rval, rval, rsh); } tcg_gen_or_vec(vece, dst, lval, rval); - - tcg_temp_free_vec(max); - tcg_temp_free_vec(lval); - tcg_temp_free_vec(rval); - tcg_temp_free_vec(lsh); - tcg_temp_free_vec(rsh); } void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4151,8 +3995,8 @@ void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) TCGv_i32 rval = tcg_temp_new_i32(); TCGv_i32 lsh = tcg_temp_new_i32(); TCGv_i32 rsh = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_const_i32(0); - TCGv_i32 max = tcg_const_i32(31); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 max = tcg_constant_i32(31); /* * Rely on the TCG guarantee that out of range shifts produce @@ -4166,13 +4010,6 @@ void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) tcg_gen_sar_i32(rval, src, rsh); tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); - - tcg_temp_free_i32(lval); - tcg_temp_free_i32(rval); - tcg_temp_free_i32(lsh); - tcg_temp_free_i32(rsh); - tcg_temp_free_i32(zero); - tcg_temp_free_i32(max); } void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) @@ -4181,8 +4018,8 @@ void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) TCGv_i64 rval = tcg_temp_new_i64(); TCGv_i64 lsh = tcg_temp_new_i64(); TCGv_i64 rsh = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_const_i64(0); - TCGv_i64 max = tcg_const_i64(63); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 max = tcg_constant_i64(63); /* * Rely on the TCG guarantee that out of range shifts produce @@ -4196,13 +4033,6 @@ void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) tcg_gen_sar_i64(rval, src, rsh); tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); - - tcg_temp_free_i64(lval); - tcg_temp_free_i64(rval); - tcg_temp_free_i64(lsh); - tcg_temp_free_i64(rsh); - tcg_temp_free_i64(zero); - tcg_temp_free_i64(max); } static void gen_sshl_vec(unsigned vece, TCGv_vec dst, @@ -4247,12 +4077,6 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst, tcg_gen_dupi_vec(vece, tmp, 0x80); tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); } - - tcg_temp_free_vec(lval); - tcg_temp_free_vec(rval); - tcg_temp_free_vec(lsh); - tcg_temp_free_vec(rsh); - tcg_temp_free_vec(tmp); } void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4291,7 +4115,6 @@ static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, tcg_gen_usadd_vec(vece, t, a, b); tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); tcg_gen_or_vec(vece, sat, sat, x); - tcg_temp_free_vec(x); } void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4334,7 +4157,6 @@ static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, tcg_gen_ssadd_vec(vece, t, a, b); tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); tcg_gen_or_vec(vece, sat, sat, x); - tcg_temp_free_vec(x); } void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4377,7 +4199,6 @@ static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, tcg_gen_ussub_vec(vece, t, a, b); tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); tcg_gen_or_vec(vece, sat, sat, x); - tcg_temp_free_vec(x); } void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4420,7 +4241,6 @@ static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, tcg_gen_sssub_vec(vece, t, a, b); tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); tcg_gen_or_vec(vece, sat, sat, x); - tcg_temp_free_vec(x); } void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4462,7 +4282,6 @@ static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) tcg_gen_sub_i32(t, a, b); tcg_gen_sub_i32(d, b, a); tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); - tcg_temp_free_i32(t); } static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) @@ -4472,7 +4291,6 @@ static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) tcg_gen_sub_i64(t, a, b); tcg_gen_sub_i64(d, b, a); tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); - tcg_temp_free_i64(t); } static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) @@ -4482,7 +4300,6 @@ static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) tcg_gen_smin_vec(vece, t, a, b); tcg_gen_smax_vec(vece, d, a, b); tcg_gen_sub_vec(vece, d, d, t); - tcg_temp_free_vec(t); } void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4522,7 +4339,6 @@ static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) tcg_gen_sub_i32(t, a, b); tcg_gen_sub_i32(d, b, a); tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); - tcg_temp_free_i32(t); } static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) @@ -4532,7 +4348,6 @@ static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) tcg_gen_sub_i64(t, a, b); tcg_gen_sub_i64(d, b, a); tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); - tcg_temp_free_i64(t); } static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) @@ -4542,7 +4357,6 @@ static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) tcg_gen_umin_vec(vece, t, a, b); tcg_gen_umax_vec(vece, d, a, b); tcg_gen_sub_vec(vece, d, d, t); - tcg_temp_free_vec(t); } void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4580,7 +4394,6 @@ static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) TCGv_i32 t = tcg_temp_new_i32(); gen_sabd_i32(t, a, b); tcg_gen_add_i32(d, d, t); - tcg_temp_free_i32(t); } static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) @@ -4588,7 +4401,6 @@ static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) TCGv_i64 t = tcg_temp_new_i64(); gen_sabd_i64(t, a, b); tcg_gen_add_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) @@ -4596,7 +4408,6 @@ static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) TCGv_vec t = tcg_temp_new_vec_matching(d); gen_sabd_vec(vece, t, a, b); tcg_gen_add_vec(vece, d, d, t); - tcg_temp_free_vec(t); } void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4639,7 +4450,6 @@ static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) TCGv_i32 t = tcg_temp_new_i32(); gen_uabd_i32(t, a, b); tcg_gen_add_i32(d, d, t); - tcg_temp_free_i32(t); } static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) @@ -4647,7 +4457,6 @@ static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) TCGv_i64 t = tcg_temp_new_i64(); gen_uabd_i64(t, a, b); tcg_gen_add_i64(d, d, t); - tcg_temp_free_i64(t); } static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) @@ -4655,7 +4464,6 @@ static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) TCGv_vec t = tcg_temp_new_vec_matching(d); gen_uabd_vec(vece, t, a, b); tcg_gen_add_vec(vece, d, d, t); - tcg_temp_free_vec(t); } void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -4693,250 +4501,291 @@ void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } +static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm) +{ + static const uint16_t mask[3] = { + 0b0000000111100111, /* crn == 9, crm == {c0-c2, c5-c8} */ + 0b0000000100010011, /* crn == 10, crm == {c0, c1, c4, c8} */ + 0b1000000111111111, /* crn == 11, crm == {c0-c8, c15} */ + }; + + if (crn >= 9 && crn <= 11) { + return (mask[crn - 9] >> crm) & 1; + } + return false; +} + static void do_coproc_insn(DisasContext *s, int cpnum, int is64, int opc1, int crn, int crm, int opc2, bool isread, int rt, int rt2) { - const ARMCPRegInfo *ri; - - ri = get_arm_cp_reginfo(s->cp_regs, - ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2)); - if (ri) { - bool need_exit_tb; + uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2); + const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); + TCGv_ptr tcg_ri = NULL; + bool need_exit_tb = false; + uint32_t syndrome; - /* Check access permissions */ - if (!cp_access_ok(s->current_el, ri, isread)) { - unallocated_encoding(s); - return; + /* + * Note that since we are an implementation which takes an + * exception on a trapped conditional instruction only if the + * instruction passes its condition code check, we can take + * advantage of the clause in the ARM ARM that allows us to set + * the COND field in the instruction to 0xE in all cases. + * We could fish the actual condition out of the insn (ARM) + * or the condexec bits (Thumb) but it isn't necessary. + */ + switch (cpnum) { + case 14: + if (is64) { + syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2, + isread, false); + } else { + syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm, + rt, isread, false); } + break; + case 15: + if (is64) { + syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2, + isread, false); + } else { + syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm, + rt, isread, false); + } + break; + default: + /* + * ARMv8 defines that only coprocessors 14 and 15 exist, + * so this can only happen if this is an ARMv7 or earlier CPU, + * in which case the syndrome information won't actually be + * guest visible. + */ + assert(!arm_dc_feature(s, ARM_FEATURE_V8)); + syndrome = syn_uncategorized(); + break; + } - if (s->hstr_active || ri->accessfn || - (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) { - /* Emit code to perform further access permissions checks at - * runtime; this may result in an exception. - * Note that on XScale all cp0..c13 registers do an access check - * call in order to handle c15_cpar. - */ - TCGv_ptr tmpptr; - TCGv_i32 tcg_syn, tcg_isread; - uint32_t syndrome; - - /* Note that since we are an implementation which takes an - * exception on a trapped conditional instruction only if the - * instruction passes its condition code check, we can take - * advantage of the clause in the ARM ARM that allows us to set - * the COND field in the instruction to 0xE in all cases. - * We could fish the actual condition out of the insn (ARM) - * or the condexec bits (Thumb) but it isn't necessary. - */ - switch (cpnum) { - case 14: - if (is64) { - syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2, - isread, false); - } else { - syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm, - rt, isread, false); - } - break; - case 15: - if (is64) { - syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2, - isread, false); - } else { - syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm, - rt, isread, false); - } - break; - default: - /* ARMv8 defines that only coprocessors 14 and 15 exist, - * so this can only happen if this is an ARMv7 or earlier CPU, - * in which case the syndrome information won't actually be - * guest visible. - */ - assert(!arm_dc_feature(s, ARM_FEATURE_V8)); - syndrome = syn_uncategorized(); - break; - } + if (s->hstr_active && cpnum == 15 && s->current_el == 1) { + /* + * At EL1, check for a HSTR_EL2 trap, which must take precedence + * over the UNDEF for "no such register" or the UNDEF for "access + * permissions forbid this EL1 access". HSTR_EL2 traps from EL0 + * only happen if the cpreg doesn't UNDEF at EL0, so we do those in + * access_check_cp_reg(), after the checks for whether the access + * configurably trapped to EL1. + */ + uint32_t maskbit = is64 ? crm : crn; + + if (maskbit != 4 && maskbit != 14) { + /* T4 and T14 are RES0 so never cause traps */ + TCGv_i32 t; + DisasLabel over = gen_disas_label(s); + + t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2)); + tcg_gen_andi_i32(t, t, 1u << maskbit); + tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label); - gen_set_condexec(s); - gen_set_pc_im(s, s->pc_curr); - tmpptr = tcg_const_ptr(ri); - tcg_syn = tcg_const_i32(syndrome); - tcg_isread = tcg_const_i32(isread); - gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, - tcg_isread); - tcg_temp_free_ptr(tmpptr); - tcg_temp_free_i32(tcg_syn); - tcg_temp_free_i32(tcg_isread); - } else if (ri->type & ARM_CP_RAISES_EXC) { + gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); /* - * The readfn or writefn might raise an exception; - * synchronize the CPU state in case it does. + * gen_exception_insn() will set is_jmp to DISAS_NORETURN, + * but since we're conditionally branching over it, we want + * to assume continue-to-next-instruction. */ - gen_set_condexec(s); - gen_set_pc_im(s, s->pc_curr); + s->base.is_jmp = DISAS_NEXT; + set_disas_label(s, over); } + } - /* Handle special cases first */ - switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) { - case ARM_CP_NOP: - return; - case ARM_CP_WFI: - if (isread) { - unallocated_encoding(s); - return; + if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) { + /* + * Check for TIDCP trap, which must take precedence over the UNDEF + * for "no such register" etc. It shares precedence with HSTR, + * but raises the same exception, so order doesn't matter. + */ + switch (s->current_el) { + case 0: + if (arm_dc_feature(s, ARM_FEATURE_AARCH64) + && dc_isar_feature(aa64_tidcp1, s)) { + gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); } - gen_set_pc_im(s, s->base.pc_next); - s->base.is_jmp = DISAS_WFI; - return; - default: + break; + case 1: + gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); break; } + } - if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { - gen_io_start(); + if (!ri) { + /* + * Unknown register; this might be a guest error or a QEMU + * unimplemented feature. + */ + if (is64) { + qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 " + "64 bit system register cp:%d opc1: %d crm:%d " + "(%s)\n", + isread ? "read" : "write", cpnum, opc1, crm, + s->ns ? "non-secure" : "secure"); + } else { + qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 " + "system register cp:%d opc1:%d crn:%d crm:%d " + "opc2:%d (%s)\n", + isread ? "read" : "write", cpnum, opc1, crn, + crm, opc2, s->ns ? "non-secure" : "secure"); } + unallocated_encoding(s); + return; + } + + /* Check access permissions */ + if (!cp_access_ok(s->current_el, ri, isread)) { + unallocated_encoding(s); + return; + } + if ((s->hstr_active && s->current_el == 0) || ri->accessfn || + (ri->fgt && s->fgt_active) || + (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) { + /* + * Emit code to perform further access permissions checks at + * runtime; this may result in an exception. + * Note that on XScale all cp0..c13 registers do an access check + * call in order to handle c15_cpar. + */ + gen_set_condexec(s); + gen_update_pc(s, 0); + tcg_ri = tcg_temp_new_ptr(); + gen_helper_access_check_cp_reg(tcg_ri, tcg_env, + tcg_constant_i32(key), + tcg_constant_i32(syndrome), + tcg_constant_i32(isread)); + } else if (ri->type & ARM_CP_RAISES_EXC) { + /* + * The readfn or writefn might raise an exception; + * synchronize the CPU state in case it does. + */ + gen_set_condexec(s); + gen_update_pc(s, 0); + } + + /* Handle special cases first */ + switch (ri->type & ARM_CP_SPECIAL_MASK) { + case 0: + break; + case ARM_CP_NOP: + return; + case ARM_CP_WFI: if (isread) { - /* Read */ - if (is64) { - TCGv_i64 tmp64; - TCGv_i32 tmp; - if (ri->type & ARM_CP_CONST) { - tmp64 = tcg_const_i64(ri->resetvalue); - } else if (ri->readfn) { - TCGv_ptr tmpptr; - tmp64 = tcg_temp_new_i64(); - tmpptr = tcg_const_ptr(ri); - gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr); - tcg_temp_free_ptr(tmpptr); - } else { - tmp64 = tcg_temp_new_i64(); - tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset); + unallocated_encoding(s); + } else { + gen_update_pc(s, curr_insn_len(s)); + s->base.is_jmp = DISAS_WFI; + } + return; + default: + g_assert_not_reached(); + } + + if (ri->type & ARM_CP_IO) { + /* I/O operations must end the TB here (whether read or write) */ + need_exit_tb = translator_io_start(&s->base); + } + + if (isread) { + /* Read */ + if (is64) { + TCGv_i64 tmp64; + TCGv_i32 tmp; + if (ri->type & ARM_CP_CONST) { + tmp64 = tcg_constant_i64(ri->resetvalue); + } else if (ri->readfn) { + if (!tcg_ri) { + tcg_ri = gen_lookup_cp_reg(key); } - tmp = tcg_temp_new_i32(); - tcg_gen_extrl_i64_i32(tmp, tmp64); - store_reg(s, rt, tmp); - tmp = tcg_temp_new_i32(); - tcg_gen_extrh_i64_i32(tmp, tmp64); - tcg_temp_free_i64(tmp64); - store_reg(s, rt2, tmp); + tmp64 = tcg_temp_new_i64(); + gen_helper_get_cp_reg64(tmp64, tcg_env, tcg_ri); } else { - TCGv_i32 tmp; - if (ri->type & ARM_CP_CONST) { - tmp = tcg_const_i32(ri->resetvalue); - } else if (ri->readfn) { - TCGv_ptr tmpptr; - tmp = tcg_temp_new_i32(); - tmpptr = tcg_const_ptr(ri); - gen_helper_get_cp_reg(tmp, cpu_env, tmpptr); - tcg_temp_free_ptr(tmpptr); - } else { - tmp = load_cpu_offset(ri->fieldoffset); - } - if (rt == 15) { - /* Destination register of r15 for 32 bit loads sets - * the condition codes from the high 4 bits of the value - */ - gen_set_nzcv(tmp); - tcg_temp_free_i32(tmp); - } else { - store_reg(s, rt, tmp); - } + tmp64 = tcg_temp_new_i64(); + tcg_gen_ld_i64(tmp64, tcg_env, ri->fieldoffset); } + tmp = tcg_temp_new_i32(); + tcg_gen_extrl_i64_i32(tmp, tmp64); + store_reg(s, rt, tmp); + tmp = tcg_temp_new_i32(); + tcg_gen_extrh_i64_i32(tmp, tmp64); + store_reg(s, rt2, tmp); } else { - /* Write */ + TCGv_i32 tmp; if (ri->type & ARM_CP_CONST) { - /* If not forbidden by access permissions, treat as WI */ - return; - } - - if (is64) { - TCGv_i32 tmplo, tmphi; - TCGv_i64 tmp64 = tcg_temp_new_i64(); - tmplo = load_reg(s, rt); - tmphi = load_reg(s, rt2); - tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi); - tcg_temp_free_i32(tmplo); - tcg_temp_free_i32(tmphi); - if (ri->writefn) { - TCGv_ptr tmpptr = tcg_const_ptr(ri); - gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64); - tcg_temp_free_ptr(tmpptr); - } else { - tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset); + tmp = tcg_constant_i32(ri->resetvalue); + } else if (ri->readfn) { + if (!tcg_ri) { + tcg_ri = gen_lookup_cp_reg(key); } - tcg_temp_free_i64(tmp64); + tmp = tcg_temp_new_i32(); + gen_helper_get_cp_reg(tmp, tcg_env, tcg_ri); } else { - if (ri->writefn) { - TCGv_i32 tmp; - TCGv_ptr tmpptr; - tmp = load_reg(s, rt); - tmpptr = tcg_const_ptr(ri); - gen_helper_set_cp_reg(cpu_env, tmpptr, tmp); - tcg_temp_free_ptr(tmpptr); - tcg_temp_free_i32(tmp); - } else { - TCGv_i32 tmp = load_reg(s, rt); - store_cpu_offset(tmp, ri->fieldoffset); - } + tmp = load_cpu_offset(ri->fieldoffset); + } + if (rt == 15) { + /* Destination register of r15 for 32 bit loads sets + * the condition codes from the high 4 bits of the value + */ + gen_set_nzcv(tmp); + } else { + store_reg(s, rt, tmp); } } + } else { + /* Write */ + if (ri->type & ARM_CP_CONST) { + /* If not forbidden by access permissions, treat as WI */ + return; + } - /* I/O operations must end the TB here (whether read or write) */ - need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && - (ri->type & ARM_CP_IO)); - - if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { - /* - * A write to any coprocessor register that ends a TB - * must rebuild the hflags for the next TB. - */ - TCGv_i32 tcg_el = tcg_const_i32(s->current_el); - if (arm_dc_feature(s, ARM_FEATURE_M)) { - gen_helper_rebuild_hflags_m32(cpu_env, tcg_el); + if (is64) { + TCGv_i32 tmplo, tmphi; + TCGv_i64 tmp64 = tcg_temp_new_i64(); + tmplo = load_reg(s, rt); + tmphi = load_reg(s, rt2); + tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi); + if (ri->writefn) { + if (!tcg_ri) { + tcg_ri = gen_lookup_cp_reg(key); + } + gen_helper_set_cp_reg64(tcg_env, tcg_ri, tmp64); } else { - if (ri->type & ARM_CP_NEWEL) { - gen_helper_rebuild_hflags_a32_newel(cpu_env); - } else { - gen_helper_rebuild_hflags_a32(cpu_env, tcg_el); + tcg_gen_st_i64(tmp64, tcg_env, ri->fieldoffset); + } + } else { + TCGv_i32 tmp = load_reg(s, rt); + if (ri->writefn) { + if (!tcg_ri) { + tcg_ri = gen_lookup_cp_reg(key); } + gen_helper_set_cp_reg(tcg_env, tcg_ri, tmp); + } else { + store_cpu_offset(tmp, ri->fieldoffset, 4); } - tcg_temp_free_i32(tcg_el); - /* - * We default to ending the TB on a coprocessor register write, - * but allow this to be suppressed by the register definition - * (usually only necessary to work around guest bugs). - */ - need_exit_tb = true; - } - if (need_exit_tb) { - gen_lookup_tb(s); } - - return; } - /* Unknown register; this might be a guest error or a QEMU - * unimplemented feature. - */ - if (is64) { - qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 " - "64 bit system register cp:%d opc1: %d crm:%d " - "(%s)\n", - isread ? "read" : "write", cpnum, opc1, crm, - s->ns ? "non-secure" : "secure"); - } else { - qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 " - "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d " - "(%s)\n", - isread ? "read" : "write", cpnum, opc1, crn, crm, opc2, - s->ns ? "non-secure" : "secure"); + if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { + /* + * A write to any coprocessor register that ends a TB + * must rebuild the hflags for the next TB. + */ + gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL); + /* + * We default to ending the TB on a coprocessor register write, + * but allow this to be suppressed by the register definition + * (usually only necessary to work around guest bugs). + */ + need_exit_tb = true; + } + if (need_exit_tb) { + gen_lookup_tb(s); } - - unallocated_encoding(s); - return; } /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */ @@ -4981,10 +4830,7 @@ static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh) tmph = load_reg(s, rhigh); tmp = tcg_temp_new_i64(); tcg_gen_concat_i32_i64(tmp, tmpl, tmph); - tcg_temp_free_i32(tmpl); - tcg_temp_free_i32(tmph); tcg_gen_add_i64(val, val, tmp); - tcg_temp_free_i64(tmp); } /* Set N and Z flags from hi|lo. */ @@ -5023,15 +4869,12 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv taddr = gen_aa32_addr(s, addr, opc); tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc); - tcg_temp_free(taddr); tcg_gen_mov_i64(cpu_exclusive_val, t64); if (s->be_data == MO_BE) { tcg_gen_extr_i64_i32(tmp2, tmp, t64); } else { tcg_gen_extr_i64_i32(tmp, tmp2, t64); } - tcg_temp_free_i64(t64); - store_reg(s, rt2, tmp2); } else { gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc); @@ -5068,7 +4911,6 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, extaddr = tcg_temp_new_i64(); tcg_gen_extu_i32_i64(extaddr, addr); tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label); - tcg_temp_free_i64(extaddr); taddr = gen_aa32_addr(s, addr, opc); t0 = tcg_temp_new_i32(); @@ -5093,27 +4935,19 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, } else { tcg_gen_concat_i32_i64(n64, t1, t2); } - tcg_temp_free_i32(t2); tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64, get_mem_index(s), opc); - tcg_temp_free_i64(n64); tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val); tcg_gen_extrl_i64_i32(t0, o64); - - tcg_temp_free_i64(o64); } else { t2 = tcg_temp_new_i32(); tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val); tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc); tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2); - tcg_temp_free_i32(t2); } - tcg_temp_free_i32(t1); - tcg_temp_free(taddr); tcg_gen_mov_i32(cpu_R[rd], t0); - tcg_temp_free_i32(t0); tcg_gen_br(done_label); gen_set_label(fail_label); @@ -5151,7 +4985,7 @@ static void gen_srs(DisasContext *s, * For the UNPREDICTABLE cases we choose to UNDEF. */ if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) { - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3); + gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3); return; } @@ -5192,12 +5026,10 @@ static void gen_srs(DisasContext *s, } addr = tcg_temp_new_i32(); - tmp = tcg_const_i32(mode); /* get_r13_banked() will raise an exception if called from System mode */ gen_set_condexec(s); - gen_set_pc_im(s, s->pc_curr); - gen_helper_get_r13_banked(addr, cpu_env, tmp); - tcg_temp_free_i32(tmp); + gen_update_pc(s, 0); + gen_helper_get_r13_banked(addr, tcg_env, tcg_constant_i32(mode)); switch (amode) { case 0: /* DA */ offset = -4; @@ -5212,16 +5044,14 @@ static void gen_srs(DisasContext *s, offset = 4; break; default: - abort(); + g_assert_not_reached(); } tcg_gen_addi_i32(addr, addr, offset); tmp = load_reg(s, 14); gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); - tcg_temp_free_i32(tmp); tmp = load_cpu_field(spsr); tcg_gen_addi_i32(addr, addr, 4); gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); - tcg_temp_free_i32(tmp); if (writeback) { switch (amode) { case 0: @@ -5237,14 +5067,11 @@ static void gen_srs(DisasContext *s, offset = 0; break; default: - abort(); + g_assert_not_reached(); } tcg_gen_addi_i32(addr, addr, offset); - tmp = tcg_const_i32(mode); - gen_helper_set_r13_banked(cpu_env, tmp, addr); - tcg_temp_free_i32(tmp); + gen_helper_set_r13_banked(tcg_env, tcg_constant_i32(mode), addr); } - tcg_temp_free_i32(addr); s->base.is_jmp = DISAS_UPDATE_EXIT; } @@ -5252,7 +5079,7 @@ static void gen_srs(DisasContext *s, static void arm_skip_unless(DisasContext *s, uint32_t cond) { arm_gen_condlabel(s); - arm_gen_test_cc(cond ^ 1, s->condlabel); + arm_gen_test_cc(cond ^ 1, s->condlabel.label); } @@ -5436,7 +5263,6 @@ static bool store_reg_kind(DisasContext *s, int rd, { switch (kind) { case STREG_NONE: - tcg_temp_free_i32(val); return true; case STREG_NORMAL: /* See ALUWritePC: Interworking only from a32 mode. */ @@ -5473,7 +5299,6 @@ static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a, tmp1 = load_reg(s, a->rn); gen(tmp1, tmp1, tmp2); - tcg_temp_free_i32(tmp2); if (logic_cc) { gen_logic_CC(tmp1); @@ -5515,7 +5340,6 @@ static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a, tmp1 = load_reg(s, a->rn); gen(tmp1, tmp1, tmp2); - tcg_temp_free_i32(tmp2); if (logic_cc) { gen_logic_CC(tmp1); @@ -5554,18 +5378,16 @@ static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a, void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32), int logic_cc, StoreRegKind kind) { - TCGv_i32 tmp1, tmp2; + TCGv_i32 tmp1; uint32_t imm; imm = ror32(a->imm, a->rot); if (logic_cc && a->rot) { tcg_gen_movi_i32(cpu_CF, imm >> 31); } - tmp2 = tcg_const_i32(imm); tmp1 = load_reg(s, a->rn); - gen(tmp1, tmp1, tmp2); - tcg_temp_free_i32(tmp2); + gen(tmp1, tmp1, tcg_constant_i32(imm)); if (logic_cc) { gen_logic_CC(tmp1); @@ -5584,9 +5406,10 @@ static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a, if (logic_cc && a->rot) { tcg_gen_movi_i32(cpu_CF, imm >> 31); } - tmp = tcg_const_i32(imm); - gen(tmp, tmp); + tmp = tcg_temp_new_i32(); + gen(tmp, tcg_constant_i32(imm)); + if (logic_cc) { gen_logic_CC(tmp); } @@ -5712,14 +5535,11 @@ static bool trans_ADR(DisasContext *s, arg_ri *a) static bool trans_MOVW(DisasContext *s, arg_MOVW *a) { - TCGv_i32 tmp; - if (!ENABLE_ARCH_6T2) { return false; } - tmp = tcg_const_i32(a->imm); - store_reg(s, a->rd, tmp); + store_reg(s, a->rd, tcg_constant_i32(a->imm)); return true; } @@ -5778,7 +5598,6 @@ static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a, tcg_gen_extrh_i64_i32(rdahi, rda); store_reg(s, a->rdalo, rdalo); store_reg(s, a->rdahi, rdahi); - tcg_temp_free_i64(rda); return true; } @@ -5800,7 +5619,7 @@ static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a) static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift) { - gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift)); + gen_helper_mve_sqshll(r, tcg_env, n, tcg_constant_i32(shift)); } static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a) @@ -5810,7 +5629,7 @@ static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a) static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift) { - gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift)); + gen_helper_mve_uqshll(r, tcg_env, n, tcg_constant_i32(shift)); } static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a) @@ -5856,13 +5675,12 @@ static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn) tcg_gen_concat_i32_i64(rda, rdalo, rdahi); /* The helper takes care of the sign-extension of the low 8 bits of Rm */ - fn(rda, cpu_env, rda, cpu_R[a->rm]); + fn(rda, tcg_env, rda, cpu_R[a->rm]); tcg_gen_extrl_i64_i32(rdalo, rda); tcg_gen_extrh_i64_i32(rdahi, rda); store_reg(s, a->rdalo, rdalo); store_reg(s, a->rdahi, rdahi); - tcg_temp_free_i64(rda); return true; } @@ -5931,7 +5749,7 @@ static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a) static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift) { - gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift)); + gen_helper_mve_sqshl(r, tcg_env, n, tcg_constant_i32(shift)); } static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a) @@ -5941,7 +5759,7 @@ static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a) static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift) { - gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift)); + gen_helper_mve_uqshl(r, tcg_env, n, tcg_constant_i32(shift)); } static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a) @@ -5965,7 +5783,7 @@ static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn) } /* The helper takes care of the sign-extension of the low 8 bits of Rm */ - fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]); + fn(cpu_R[a->rda], tcg_env, cpu_R[a->rda], cpu_R[a->rm]); return true; } @@ -5990,11 +5808,9 @@ static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); tcg_gen_mul_i32(t1, t1, t2); - tcg_temp_free_i32(t2); if (add) { t2 = load_reg(s, a->ra); tcg_gen_add_i32(t1, t1, t2); - tcg_temp_free_i32(t2); } if (a->s) { gen_logic_CC(t1); @@ -6023,10 +5839,8 @@ static bool trans_MLS(DisasContext *s, arg_MLS *a) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); tcg_gen_mul_i32(t1, t1, t2); - tcg_temp_free_i32(t2); t2 = load_reg(s, a->ra); tcg_gen_sub_i32(t1, t2, t1); - tcg_temp_free_i32(t2); store_reg(s, a->rd, t1); return true; } @@ -6046,8 +5860,6 @@ static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add) t2 = load_reg(s, a->ra); t3 = load_reg(s, a->rd); tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3); - tcg_temp_free_i32(t2); - tcg_temp_free_i32(t3); } if (a->s) { gen_logicq_cc(t0, t1); @@ -6090,14 +5902,11 @@ static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a) t0 = load_reg(s, a->rm); t1 = load_reg(s, a->rn); tcg_gen_mulu2_i32(t0, t1, t0, t1); - zero = tcg_const_i32(0); + zero = tcg_constant_i32(0); t2 = load_reg(s, a->ra); tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero); - tcg_temp_free_i32(t2); t2 = load_reg(s, a->rd); tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero); - tcg_temp_free_i32(t2); - tcg_temp_free_i32(zero); store_reg(s, a->ra, t0); store_reg(s, a->rd, t1); return true; @@ -6120,14 +5929,13 @@ static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub) t0 = load_reg(s, a->rm); t1 = load_reg(s, a->rn); if (doub) { - gen_helper_add_saturate(t1, cpu_env, t1, t1); + gen_helper_add_saturate(t1, tcg_env, t1, t1); } if (add) { - gen_helper_add_saturate(t0, cpu_env, t0, t1); + gen_helper_add_saturate(t0, tcg_env, t0, t1); } else { - gen_helper_sub_saturate(t0, cpu_env, t0, t1); + gen_helper_sub_saturate(t0, tcg_env, t0, t1); } - tcg_temp_free_i32(t1); store_reg(s, a->rd, t0); return true; } @@ -6163,7 +5971,6 @@ static bool op_smlaxxx(DisasContext *s, arg_rrrr *a, t0 = load_reg(s, a->rn); t1 = load_reg(s, a->rm); gen_mulxy(t0, t1, nt, mt); - tcg_temp_free_i32(t1); switch (add_long) { case 0: @@ -6171,8 +5978,7 @@ static bool op_smlaxxx(DisasContext *s, arg_rrrr *a, break; case 1: t1 = load_reg(s, a->ra); - gen_helper_add_setq(t0, cpu_env, t0, t1); - tcg_temp_free_i32(t1); + gen_helper_add_setq(t0, tcg_env, t0, t1); store_reg(s, a->rd, t0); break; case 2: @@ -6182,8 +5988,6 @@ static bool op_smlaxxx(DisasContext *s, arg_rrrr *a, t1 = tcg_temp_new_i32(); tcg_gen_sari_i32(t1, t0, 31); tcg_gen_add2_i32(tl, th, tl, th, t0, t1); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); store_reg(s, a->ra, tl); store_reg(s, a->rd, th); break; @@ -6236,11 +6040,9 @@ static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt) tcg_gen_shli_i32(t1, t1, 16); } tcg_gen_muls2_i32(t0, t1, t0, t1); - tcg_temp_free_i32(t0); if (add) { t0 = load_reg(s, a->ra); - gen_helper_add_setq(t1, cpu_env, t1, t0); - tcg_temp_free_i32(t0); + gen_helper_add_setq(t1, tcg_env, t1, t0); } store_reg(s, a->rd, t1); return true; @@ -6272,7 +6074,7 @@ static bool trans_YIELD(DisasContext *s, arg_YIELD *a) * scheduling of other vCPUs. */ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_set_pc_im(s, s->base.pc_next); + gen_update_pc(s, curr_insn_len(s)); s->base.is_jmp = DISAS_YIELD; } return true; @@ -6288,7 +6090,7 @@ static bool trans_WFE(DisasContext *s, arg_WFE *a) * implemented so we can't sleep like WFI does. */ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_set_pc_im(s, s->base.pc_next); + gen_update_pc(s, curr_insn_len(s)); s->base.is_jmp = DISAS_WFE; } return true; @@ -6297,11 +6099,34 @@ static bool trans_WFE(DisasContext *s, arg_WFE *a) static bool trans_WFI(DisasContext *s, arg_WFI *a) { /* For WFI, halt the vCPU until an IRQ. */ - gen_set_pc_im(s, s->base.pc_next); + gen_update_pc(s, curr_insn_len(s)); s->base.is_jmp = DISAS_WFI; return true; } +static bool trans_ESB(DisasContext *s, arg_ESB *a) +{ + /* + * For M-profile, minimal-RAS ESB can be a NOP. + * Without RAS, we must implement this as NOP. + */ + if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) { + /* + * QEMU does not have a source of physical SErrors, + * so we are only concerned with virtual SErrors. + * The pseudocode in the ARM for this case is + * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then + * AArch32.vESBOperation(); + * Most of the condition can be evaluated at translation time. + * Test for EL2 present, and defer test for SEL2 to runtime. + */ + if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { + gen_helper_vesb(tcg_env); + } + } + return true; +} + static bool trans_NOP(DisasContext *s, arg_NOP *a) { return true; @@ -6344,14 +6169,12 @@ static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz) default: g_assert_not_reached(); } - t3 = tcg_const_i32(1 << sz); + t3 = tcg_constant_i32(1 << sz); if (c) { gen_helper_crc32c(t1, t1, t2, t3); } else { gen_helper_crc32(t1, t1, t2, t3); } - tcg_temp_free_i32(t2); - tcg_temp_free_i32(t3); store_reg(s, a->rd, t1); return true; } @@ -6406,7 +6229,7 @@ static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a) tmp = load_cpu_field(spsr); } else { tmp = tcg_temp_new_i32(); - gen_helper_cpsr_read(tmp, cpu_env); + gen_helper_cpsr_read(tmp, tcg_env); } store_reg(s, a->rd, tmp); return true; @@ -6434,8 +6257,8 @@ static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a) if (!arm_dc_feature(s, ARM_FEATURE_M)) { return false; } - tmp = tcg_const_i32(a->sysm); - gen_helper_v7m_mrs(tmp, cpu_env, tmp); + tmp = tcg_temp_new_i32(); + gen_helper_v7m_mrs(tmp, tcg_env, tcg_constant_i32(a->sysm)); store_reg(s, a->rd, tmp); return true; } @@ -6447,13 +6270,11 @@ static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a) if (!arm_dc_feature(s, ARM_FEATURE_M)) { return false; } - addr = tcg_const_i32((a->mask << 10) | a->sysm); + addr = tcg_constant_i32((a->mask << 10) | a->sysm); reg = load_reg(s, a->rn); - gen_helper_v7m_msr(cpu_env, addr, reg); - tcg_temp_free_i32(addr); - tcg_temp_free_i32(reg); + gen_helper_v7m_msr(tcg_env, addr, reg); /* If we wrote to CONTROL, the EL might have changed */ - gen_helper_rebuild_hflags_m32_newel(cpu_env); + gen_rebuild_hflags(s, true); gen_lookup_tb(s); return true; } @@ -6482,7 +6303,7 @@ static bool trans_BXJ(DisasContext *s, arg_BXJ *a) if (!arm_dc_feature(s, ARM_FEATURE_V8) && arm_dc_feature(s, ARM_FEATURE_EL2) && s->current_el < 2 && s->ns) { - gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm)); + gen_helper_check_bxj_trap(tcg_env, tcg_constant_i32(a->rm)); } /* Trivial implementation equivalent to bx. */ gen_bx(s, load_reg(s, a->rm)); @@ -6497,7 +6318,7 @@ static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a) return false; } tmp = load_reg(s, a->rm); - tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb); + gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb); gen_bx(s, tmp); return true; } @@ -6554,7 +6375,7 @@ static bool trans_ERET(DisasContext *s, arg_ERET *a) } if (s->current_el == 2) { /* ERET from Hyp uses ELR_Hyp, not LR */ - tmp = load_cpu_field(elr_el[2]); + tmp = load_cpu_field_low32(elr_el[2]); } else { tmp = load_reg(s, 14); } @@ -6576,12 +6397,9 @@ static bool trans_BKPT(DisasContext *s, arg_BKPT *a) /* BKPT is OK with ECI set and leaves it untouched */ s->eci_handled = true; if (arm_dc_feature(s, ARM_FEATURE_M) && - semihosting_enabled() && -#ifndef CONFIG_USER_ONLY - !IS_USER(s) && -#endif + semihosting_enabled(s->current_el == 0) && (a->imm == 0xab)) { - gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST); + gen_exception_internal_insn(s, EXCP_SEMIHOST); } else { gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false)); } @@ -6662,9 +6480,8 @@ static bool trans_TT(DisasContext *s, arg_TT *a) } addr = load_reg(s, a->rn); - tmp = tcg_const_i32((a->A << 1) | a->T); - gen_helper_v7m_tt(tmp, cpu_env, addr, tmp); - tcg_temp_free_i32(addr); + tmp = tcg_temp_new_i32(); + gen_helper_v7m_tt(tmp, tcg_env, addr, tcg_constant_i32((a->A << 1) | a->T)); store_reg(s, a->rd, tmp); return true; } @@ -6680,7 +6497,7 @@ static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w) /* ISS not valid if writeback */ if (p && !w) { ret = rd; - if (s->base.pc_next - s->pc_curr == 2) { + if (curr_insn_len(s) == 2) { ret |= ISSIs16Bit; } } else { @@ -6694,7 +6511,7 @@ static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a) TCGv_i32 addr = load_reg(s, a->rn); if (s->v8m_stackcheck && a->rn == 13 && a->w) { - gen_helper_v8m_stackcheck(cpu_env, addr); + gen_helper_v8m_stackcheck(tcg_env, addr); } if (a->p) { @@ -6705,7 +6522,6 @@ static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a) } else { tcg_gen_sub_i32(addr, addr, ofs); } - tcg_temp_free_i32(ofs); } return addr; } @@ -6721,9 +6537,7 @@ static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a, } else { tcg_gen_sub_i32(addr, addr, ofs); } - tcg_temp_free_i32(ofs); } else if (!a->w) { - tcg_temp_free_i32(addr); return; } tcg_gen_addi_i32(addr, addr, address_offset); @@ -6770,7 +6584,6 @@ static bool op_store_rr(DisasContext *s, arg_ldst_rr *a, tmp = load_reg(s, a->rt); gen_aa32_st_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); - tcg_temp_free_i32(tmp); op_addr_rr_post(s, a, addr, 0); return true; @@ -6821,13 +6634,11 @@ static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a) tmp = load_reg(s, a->rt); gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - tcg_temp_free_i32(tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = load_reg(s, a->rt + 1); gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - tcg_temp_free_i32(tmp); op_addr_rr_post(s, a, addr, -4); return true; @@ -6855,10 +6666,9 @@ static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a) if (!a->u) { TCGv_i32 newsp = tcg_temp_new_i32(); tcg_gen_addi_i32(newsp, cpu_R[13], ofs); - gen_helper_v8m_stackcheck(cpu_env, newsp); - tcg_temp_free_i32(newsp); + gen_helper_v8m_stackcheck(tcg_env, newsp); } else { - gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]); + gen_helper_v8m_stackcheck(tcg_env, cpu_R[13]); } } @@ -6875,7 +6685,6 @@ static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a, address_offset -= a->imm; } } else if (!a->w) { - tcg_temp_free_i32(addr); return; } tcg_gen_addi_i32(addr, addr, address_offset); @@ -6922,7 +6731,6 @@ static bool op_store_ri(DisasContext *s, arg_ldst_ri *a, tmp = load_reg(s, a->rt); gen_aa32_st_i32(s, tmp, addr, mem_idx, mop); disas_set_da_iss(s, mop, issinfo); - tcg_temp_free_i32(tmp); op_addr_ri_post(s, a, addr, 0); return true; @@ -6976,13 +6784,11 @@ static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2) tmp = load_reg(s, a->rt); gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - tcg_temp_free_i32(tmp); tcg_gen_addi_i32(addr, addr, 4); tmp = load_reg(s, rt2); gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - tcg_temp_free_i32(tmp); op_addr_ri_post(s, a, addr, -4); return true; @@ -7047,11 +6853,9 @@ static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc) opc |= s->be_data; addr = load_reg(s, a->rn); taddr = gen_aa32_addr(s, addr, opc); - tcg_temp_free_i32(addr); tmp = load_reg(s, a->rt2); tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc); - tcg_temp_free(taddr); store_reg(s, a->rt, tmp); return true; @@ -7093,12 +6897,11 @@ static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel) tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - addr = tcg_temp_local_new_i32(); + addr = tcg_temp_new_i32(); load_reg_var(s, addr, a->rn); tcg_gen_addi_i32(addr, addr, a->imm); gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop); - tcg_temp_free_i32(addr); return true; } @@ -7210,8 +7013,6 @@ static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop) gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN); disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite); - tcg_temp_free_i32(tmp); - tcg_temp_free_i32(addr); return true; } @@ -7246,12 +7047,11 @@ static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq) return true; } - addr = tcg_temp_local_new_i32(); + addr = tcg_temp_new_i32(); load_reg_var(s, addr, a->rn); tcg_gen_addi_i32(addr, addr, a->imm); gen_load_exclusive(s, a->rt, a->rt2, addr, mop); - tcg_temp_free_i32(addr); if (acq) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); @@ -7365,7 +7165,6 @@ static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop) tmp = tcg_temp_new_i32(); gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN); disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel); - tcg_temp_free_i32(addr); store_reg(s, a->rt, tmp); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); @@ -7402,11 +7201,9 @@ static bool trans_USADA8(DisasContext *s, arg_USADA8 *a) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); gen_helper_usad8(t1, t1, t2); - tcg_temp_free_i32(t2); if (a->ra != 15) { t2 = load_reg(s, a->ra); tcg_gen_add_i32(t1, t1, t2); - tcg_temp_free_i32(t2); } store_reg(s, a->rd, t1); return true; @@ -7449,8 +7246,8 @@ static bool trans_UBFX(DisasContext *s, arg_UBFX *a) static bool trans_BFCI(DisasContext *s, arg_BFCI *a) { - TCGv_i32 tmp; int msb = a->msb, lsb = a->lsb; + TCGv_i32 t_in, t_rd; int width; if (!ENABLE_ARCH_6T2) { @@ -7465,17 +7262,14 @@ static bool trans_BFCI(DisasContext *s, arg_BFCI *a) width = msb + 1 - lsb; if (a->rn == 15) { /* BFC */ - tmp = tcg_const_i32(0); + t_in = tcg_constant_i32(0); } else { /* BFI */ - tmp = load_reg(s, a->rn); - } - if (width != 32) { - TCGv_i32 tmp2 = load_reg(s, a->rd); - tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width); - tcg_temp_free_i32(tmp2); + t_in = load_reg(s, a->rn); } - store_reg(s, a->rd, tmp); + t_rd = load_reg(s, a->rd); + tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width); + store_reg(s, a->rd, t_rd); return true; } @@ -7505,7 +7299,6 @@ static bool op_par_addsub(DisasContext *s, arg_rrr *a, gen(t0, t0, t1); - tcg_temp_free_i32(t1); store_reg(s, a->rd, t0); return true; } @@ -7527,11 +7320,9 @@ static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a, t1 = load_reg(s, a->rm); ge = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE)); + tcg_gen_addi_ptr(ge, tcg_env, offsetof(CPUARMState, GE)); gen(t0, t0, t1, ge); - tcg_temp_free_ptr(ge); - tcg_temp_free_i32(t1); store_reg(s, a->rd, t0); return true; } @@ -7622,7 +7413,6 @@ static bool trans_PKH(DisasContext *s, arg_PKH *a) tcg_gen_shli_i32(tm, tm, shift); tcg_gen_deposit_i32(tn, tm, tn, 0, 16); } - tcg_temp_free_i32(tm); store_reg(s, a->rd, tn); return true; } @@ -7630,7 +7420,7 @@ static bool trans_PKH(DisasContext *s, arg_PKH *a) static bool op_sat(DisasContext *s, arg_sat *a, void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32)) { - TCGv_i32 tmp, satimm; + TCGv_i32 tmp; int shift = a->imm; if (!ENABLE_ARCH_6) { @@ -7644,9 +7434,7 @@ static bool op_sat(DisasContext *s, arg_sat *a, tcg_gen_shli_i32(tmp, tmp, shift); } - satimm = tcg_const_i32(a->satimm); - gen(tmp, cpu_env, tmp, satimm); - tcg_temp_free_i32(satimm); + gen(tmp, tcg_env, tmp, tcg_constant_i32(a->satimm)); store_reg(s, a->rd, tmp); return true; @@ -7699,7 +7487,6 @@ static bool op_xta(DisasContext *s, arg_rrr_rot *a, if (a->rn != 15) { TCGv_i32 tmp2 = load_reg(s, a->rn); gen_add(tmp, tmp, tmp2); - tcg_temp_free_i32(tmp2); } store_reg(s, a->rd, tmp); return true; @@ -7754,10 +7541,8 @@ static bool trans_SEL(DisasContext *s, arg_rrr *a) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); t3 = tcg_temp_new_i32(); - tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE)); + tcg_gen_ld_i32(t3, tcg_env, offsetof(CPUARMState, GE)); gen_helper_sel_flags(t1, t3, t1, t2); - tcg_temp_free_i32(t3); - tcg_temp_free_i32(t2); store_reg(s, a->rd, t1); return true; } @@ -7831,17 +7616,14 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) * addition of Ra. */ tcg_gen_sub_i32(t1, t1, t2); - tcg_temp_free_i32(t2); if (a->ra != 15) { t2 = load_reg(s, a->ra); - gen_helper_add_setq(t1, cpu_env, t1, t2); - tcg_temp_free_i32(t2); + gen_helper_add_setq(t1, tcg_env, t1, t2); } } else if (a->ra == 15) { /* Single saturation-checking addition */ - gen_helper_add_setq(t1, cpu_env, t1, t2); - tcg_temp_free_i32(t2); + gen_helper_add_setq(t1, tcg_env, t1, t2); } else { /* * We need to add the products and Ra together and then @@ -7861,10 +7643,8 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) load_reg_var(s, t2, a->ra); tcg_gen_ext_i32_i64(q64, t2); tcg_gen_add_i64(p64, p64, q64); - tcg_temp_free_i64(q64); tcg_gen_extr_i64_i32(t1, t2, p64); - tcg_temp_free_i64(p64); /* * t1 is the low half of the result which goes into Rd. * We have overflow and must set Q if the high half (t2) @@ -7873,12 +7653,9 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) t3 = tcg_temp_new_i32(); tcg_gen_sari_i32(t3, t1, 31); qf = load_cpu_field(QF); - one = tcg_const_i32(1); + one = tcg_constant_i32(1); tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf); store_cpu_field(qf, QF); - tcg_temp_free_i32(one); - tcg_temp_free_i32(t3); - tcg_temp_free_i32(t2); } store_reg(s, a->rd, t1); return true; @@ -7924,19 +7701,15 @@ static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) l2 = tcg_temp_new_i64(); tcg_gen_ext_i32_i64(l1, t1); tcg_gen_ext_i32_i64(l2, t2); - tcg_temp_free_i32(t1); - tcg_temp_free_i32(t2); if (sub) { tcg_gen_sub_i64(l1, l1, l2); } else { tcg_gen_add_i64(l1, l1, l2); } - tcg_temp_free_i64(l2); gen_addq(s, l1, a->ra, a->rd); gen_storeq_reg(s, a->ra, a->rd, l1); - tcg_temp_free_i64(l1); return true; } @@ -7982,13 +7755,10 @@ static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub) * a non-zero multiplicand lowpart, and the correct result * lowpart for rounding. */ - TCGv_i32 zero = tcg_const_i32(0); - tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1); - tcg_temp_free_i32(zero); + tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1); } else { tcg_gen_add_i32(t1, t1, t3); } - tcg_temp_free_i32(t3); } if (round) { /* @@ -7998,7 +7768,6 @@ static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub) tcg_gen_shri_i32(t2, t2, 31); tcg_gen_add_i32(t1, t1, t2); } - tcg_temp_free_i32(t2); store_reg(s, a->rd, t1); return true; } @@ -8036,11 +7805,10 @@ static bool op_div(DisasContext *s, arg_rrr *a, bool u) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); if (u) { - gen_helper_udiv(t1, cpu_env, t1, t2); + gen_helper_udiv(t1, tcg_env, t1, t2); } else { - gen_helper_sdiv(t1, cpu_env, t1, t2); + gen_helper_sdiv(t1, tcg_env, t1, t2); } - tcg_temp_free_i32(t2); store_reg(s, a->rd, t1); return true; } @@ -8081,14 +7849,14 @@ static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n) * If the writeback is incrementing SP rather than * decrementing it, and the initial SP is below the * stack limit but the final written-back SP would - * be above, then then we must not perform any memory + * be above, then we must not perform any memory * accesses, but it is IMPDEF whether we generate * an exception. We choose to do so in this case. * At this point 'addr' is the lowest address, so * either the original SP (if incrementing) or our * final SP (if decrementing), so that's what we check. */ - gen_helper_v8m_stackcheck(cpu_env, addr); + gen_helper_v8m_stackcheck(tcg_env, addr); } return addr; @@ -8112,16 +7880,14 @@ static void op_addr_block_post(DisasContext *s, arg_ldst_block *a, tcg_gen_addi_i32(addr, addr, -((n - 1) * 4)); } store_reg(s, a->rn, addr); - } else { - tcg_temp_free_i32(addr); } } -static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) +static bool op_stm(DisasContext *s, arg_ldst_block *a) { int i, j, n, list, mem_idx; bool user = a->u; - TCGv_i32 addr, tmp, tmp2; + TCGv_i32 addr, tmp; if (user) { /* STM (user) */ @@ -8134,7 +7900,14 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) list = a->list; n = ctpop16(list); - if (n < min_n || a->rn == 15) { + /* + * This is UNPREDICTABLE for n < 1 in all encodings, and we choose + * to UNDEF. In the T32 STM encoding n == 1 is also UNPREDICTABLE, + * but hardware treats it like the A32 version and implements the + * single-register-store, and some in-the-wild (buggy) software + * assumes that, so we don't UNDEF on that case. + */ + if (n < 1 || a->rn == 15) { unallocated_encoding(s); return true; } @@ -8151,14 +7924,11 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) if (user && i != 15) { tmp = tcg_temp_new_i32(); - tmp2 = tcg_const_i32(i); - gen_helper_get_user_reg(tmp, cpu_env, tmp2); - tcg_temp_free_i32(tmp2); + gen_helper_get_user_reg(tmp, tcg_env, tcg_constant_i32(i)); } else { tmp = load_reg(s, i); } gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); - tcg_temp_free_i32(tmp); /* No need to add after the last transfer. */ if (++j != n) { @@ -8173,8 +7943,7 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) static bool trans_STM(DisasContext *s, arg_ldst_block *a) { - /* BitCount(list) < 1 is UNPREDICTABLE */ - return op_stm(s, a, 1); + return op_stm(s, a); } static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a) @@ -8184,17 +7953,16 @@ static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 2 is UNPREDICTABLE */ - return op_stm(s, a, 2); + return op_stm(s, a); } -static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) +static bool do_ldm(DisasContext *s, arg_ldst_block *a) { int i, j, n, list, mem_idx; bool loaded_base; bool user = a->u; bool exc_return = false; - TCGv_i32 addr, tmp, tmp2, loaded_var; + TCGv_i32 addr, tmp, loaded_var; if (user) { /* LDM (user), LDM (exception return) */ @@ -8217,7 +7985,14 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) list = a->list; n = ctpop16(list); - if (n < min_n || a->rn == 15) { + /* + * This is UNPREDICTABLE for n < 1 in all encodings, and we choose + * to UNDEF. In the T32 LDM encoding n == 1 is also UNPREDICTABLE, + * but hardware treats it like the A32 version and implements the + * single-register-load, and some in-the-wild (buggy) software + * assumes that, so we don't UNDEF on that case. + */ + if (n < 1 || a->rn == 15) { unallocated_encoding(s); return true; } @@ -8237,10 +8012,7 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) tmp = tcg_temp_new_i32(); gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN); if (user) { - tmp2 = tcg_const_i32(i); - gen_helper_set_user_reg(cpu_env, tmp2, tmp); - tcg_temp_free_i32(tmp2); - tcg_temp_free_i32(tmp); + gen_helper_set_user_reg(tcg_env, tcg_constant_i32(i), tmp); } else if (i == a->rn) { loaded_var = tmp; loaded_base = true; @@ -8266,11 +8038,8 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) if (exc_return) { /* Restore CPSR from SPSR. */ tmp = load_cpu_field(spsr); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - gen_helper_cpsr_write_eret(cpu_env, tmp); - tcg_temp_free_i32(tmp); + translator_io_start(&s->base); + gen_helper_cpsr_write_eret(tcg_env, tmp); /* Must exit loop to check un-masked IRQs */ s->base.is_jmp = DISAS_EXIT; } @@ -8289,8 +8058,7 @@ static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 1 is UNPREDICTABLE */ - return do_ldm(s, a, 1); + return do_ldm(s, a); } static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a) @@ -8300,16 +8068,14 @@ static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 2 is UNPREDICTABLE */ - return do_ldm(s, a, 2); + return do_ldm(s, a); } static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a) { /* Writeback is conditional on the base register not being loaded. */ a->w = !(a->list & (1 << a->rn)); - /* BitCount(list) < 1 is UNPREDICTABLE */ - return do_ldm(s, a, 1); + return do_ldm(s, a); } static bool trans_CLRM(DisasContext *s, arg_CLRM *a) @@ -8332,7 +8098,7 @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a) s->eci_handled = true; - zero = tcg_const_i32(0); + zero = tcg_constant_i32(0); for (i = 0; i < 15; i++) { if (extract32(a->list, i, 1)) { /* Clear R[i] */ @@ -8344,11 +8110,8 @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a) * Clear APSR (by calling the MSR helper with the same argument * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0) */ - TCGv_i32 maskreg = tcg_const_i32(0xc << 8); - gen_helper_v7m_msr(cpu_env, maskreg, zero); - tcg_temp_free_i32(maskreg); + gen_helper_v7m_msr(tcg_env, tcg_constant_i32(0xc00), zero); } - tcg_temp_free_i32(zero); clear_eci_state(s); return true; } @@ -8359,7 +8122,7 @@ static bool trans_CLRM(DisasContext *s, arg_CLRM *a) static bool trans_B(DisasContext *s, arg_i *a) { - gen_jmp(s, read_pc(s) + a->imm); + gen_jmp(s, jmp_diff(s, a->imm)); return true; } @@ -8374,21 +8137,19 @@ static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a) return true; } arm_skip_unless(s, a->cond); - gen_jmp(s, read_pc(s) + a->imm); + gen_jmp(s, jmp_diff(s, a->imm)); return true; } static bool trans_BL(DisasContext *s, arg_i *a) { - tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb); - gen_jmp(s, read_pc(s) + a->imm); + gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb); + gen_jmp(s, jmp_diff(s, a->imm)); return true; } static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a) { - TCGv_i32 tmp; - /* * BLX <imm> would be useless on M-profile; the encoding space * is used for other insns from v8.1M onward, and UNDEFs before that. @@ -8401,17 +8162,17 @@ static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a) if (s->thumb && (a->imm & 2)) { return false; } - tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb); - tmp = tcg_const_i32(!s->thumb); - store_cpu_field(tmp, thumb); - gen_jmp(s, (read_pc(s) & ~3) + a->imm); + gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb); + store_cpu_field_constant(!s->thumb, thumb); + /* This jump is computed from an aligned PC: subtract off the low bits. */ + gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3))); return true; } static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a) { assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2)); - tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12)); + gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12)); return true; } @@ -8421,7 +8182,7 @@ static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a) assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2)); tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1); - tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1); + gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1); gen_bx(s, tmp); return true; } @@ -8437,7 +8198,7 @@ static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a) tmp = tcg_temp_new_i32(); tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1); tcg_gen_andi_i32(tmp, tmp, 0xfffffffc); - tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1); + gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1); gen_bx(s, tmp); return true; } @@ -8494,8 +8255,7 @@ static bool trans_DLS(DisasContext *s, arg_DLS *a) store_reg(s, 14, tmp); if (a->size != 4) { /* DLSTP: set FPSCR.LTPSIZE */ - tmp = tcg_const_i32(a->size); - store_cpu_field(tmp, v7m.ltpsize); + store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize); s->base.is_jmp = DISAS_UPDATE_NOCHAIN; } return true; @@ -8505,7 +8265,7 @@ static bool trans_WLS(DisasContext *s, arg_WLS *a) { /* M-profile low-overhead while-loop start */ TCGv_i32 tmp; - TCGLabel *nextlabel; + DisasLabel nextlabel; if (!dc_isar_feature(aa32_lob, s)) { return false; @@ -8540,14 +8300,14 @@ static bool trans_WLS(DisasContext *s, arg_WLS *a) * Do the check-and-raise-exception by hand. */ if (s->fp_excp_el) { - gen_exception_insn(s, s->pc_curr, EXCP_NOCP, - syn_uncategorized(), s->fp_excp_el); + gen_exception_insn_el(s, 0, EXCP_NOCP, + syn_uncategorized(), s->fp_excp_el); return true; } } - nextlabel = gen_new_label(); - tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel); + nextlabel = gen_disas_label(s); + tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label); tmp = load_reg(s, a->rn); store_reg(s, 14, tmp); if (a->size != 4) { @@ -8560,17 +8320,16 @@ static bool trans_WLS(DisasContext *s, arg_WLS *a) */ bool ok = vfp_access_check(s); assert(ok); - tmp = tcg_const_i32(a->size); - store_cpu_field(tmp, v7m.ltpsize); + store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize); /* * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0) * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK. */ } - gen_jmp_tb(s, s->base.pc_next, 1); + gen_jmp_tb(s, curr_insn_len(s), 1); - gen_set_label(nextlabel); - gen_jmp(s, read_pc(s) + a->imm); + set_disas_label(s, nextlabel); + gen_jmp(s, jmp_diff(s, a->imm)); return true; } @@ -8585,7 +8344,7 @@ static bool trans_LE(DisasContext *s, arg_LE *a) * any faster. */ TCGv_i32 tmp; - TCGLabel *loopend; + DisasLabel loopend; bool fpu_active; if (!dc_isar_feature(aa32_lob, s)) { @@ -8640,18 +8399,16 @@ static bool trans_LE(DisasContext *s, arg_LE *a) if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) { /* Need to do a runtime check for LTPSIZE != 4 */ - TCGLabel *skipexc = gen_new_label(); + DisasLabel skipexc = gen_disas_label(s); tmp = load_cpu_field(v7m.ltpsize); - tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc); - tcg_temp_free_i32(tmp); - gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), - default_exception_el(s)); - gen_set_label(skipexc); + tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label); + gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized()); + set_disas_label(s, skipexc); } if (a->f) { /* Loop-forever: just jump back to the loop start */ - gen_jmp(s, read_pc(s) - a->imm); + gen_jmp(s, jmp_diff(s, -a->imm)); return true; } @@ -8661,37 +8418,34 @@ static bool trans_LE(DisasContext *s, arg_LE *a) * loop decrement value is 1. For LETP we need to calculate the decrement * value from LTPSIZE. */ - loopend = gen_new_label(); + loopend = gen_disas_label(s); if (!a->tp) { - tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend); + tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label); tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1); } else { /* * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local * so that decr stays live after the brcondi. */ - TCGv_i32 decr = tcg_temp_local_new_i32(); + TCGv_i32 decr = tcg_temp_new_i32(); TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize); tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize); tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr); - tcg_temp_free_i32(ltpsize); - tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend); + tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label); tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr); - tcg_temp_free_i32(decr); } /* Jump back to the loop start */ - gen_jmp(s, read_pc(s) - a->imm); + gen_jmp(s, jmp_diff(s, -a->imm)); - gen_set_label(loopend); + set_disas_label(s, loopend); if (a->tp) { /* Exits from tail-pred loops must reset LTPSIZE to 4 */ - tmp = tcg_const_i32(4); - store_cpu_field(tmp, v7m.ltpsize); + store_cpu_field(tcg_constant_i32(4), v7m.ltpsize); } /* End TB, continuing to following insn */ - gen_jmp_tb(s, s->base.pc_next, 1); + gen_jmp_tb(s, curr_insn_len(s), 1); return true; } @@ -8702,7 +8456,6 @@ static bool trans_LCTP(DisasContext *s, arg_LCTP *a) * doesn't cache branch information, all we need to do is reset * FPSCR.LTPSIZE to 4. */ - TCGv_i32 ltpsize; if (!dc_isar_feature(aa32_lob, s) || !dc_isar_feature(aa32_mve, s)) { @@ -8713,8 +8466,7 @@ static bool trans_LCTP(DisasContext *s, arg_LCTP *a) return true; } - ltpsize = tcg_const_i32(4); - store_cpu_field(ltpsize, v7m.ltpsize); + store_cpu_field_constant(4, v7m.ltpsize); return true; } @@ -8745,9 +8497,7 @@ static bool trans_VCTP(DisasContext *s, arg_VCTP *a) tcg_gen_movcond_i32(TCG_COND_LEU, masklen, masklen, tcg_constant_i32(1 << (4 - a->size)), rn_shifted, tcg_constant_i32(16)); - gen_helper_mve_vctp(cpu_env, masklen); - tcg_temp_free_i32(masklen); - tcg_temp_free_i32(rn_shifted); + gen_helper_mve_vctp(tcg_env, masklen); /* This insn updates predication bits */ s->base.is_jmp = DISAS_UPDATE_NOCHAIN; mve_update_eci(s); @@ -8766,10 +8516,10 @@ static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half) tcg_gen_add_i32(addr, addr, tmp); gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB); - tcg_temp_free_i32(addr); tcg_gen_add_i32(tmp, tmp, tmp); - tcg_gen_addi_i32(tmp, tmp, read_pc(s)); + gen_pc_plus_diff(s, addr, jmp_diff(s, 0)); + tcg_gen_add_i32(tmp, tmp, addr); store_reg(s, 15, tmp); return true; } @@ -8790,9 +8540,8 @@ static bool trans_CBZ(DisasContext *s, arg_CBZ *a) arm_gen_condlabel(s); tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE, - tmp, 0, s->condlabel); - tcg_temp_free_i32(tmp); - gen_jmp(s, read_pc(s) + a->imm); + tmp, 0, s->condlabel.label); + gen_jmp(s, jmp_diff(s, a->imm)); return true; } @@ -8805,16 +8554,19 @@ static bool trans_SVC(DisasContext *s, arg_SVC *a) { const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456; - if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() && -#ifndef CONFIG_USER_ONLY - !IS_USER(s) && -#endif + if (!arm_dc_feature(s, ARM_FEATURE_M) && + semihosting_enabled(s->current_el == 0) && (a->imm == semihost_imm)) { - gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST); + gen_exception_internal_insn(s, EXCP_SEMIHOST); } else { - gen_set_pc_im(s, s->base.pc_next); - s->svc_imm = a->imm; - s->base.is_jmp = DISAS_SWI; + if (s->fgt_svc) { + uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb); + gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); + } else { + gen_update_pc(s, curr_insn_len(s)); + s->svc_imm = a->imm; + s->base.is_jmp = DISAS_SWI; + } } return true; } @@ -8855,8 +8607,6 @@ static bool trans_RFE(DisasContext *s, arg_RFE *a) /* Base writeback. */ tcg_gen_addi_i32(addr, addr, post_offset[a->pu]); store_reg(s, a->rn, addr); - } else { - tcg_temp_free_i32(addr); } gen_rfe(s, t1, t2); return true; @@ -8911,7 +8661,7 @@ static bool trans_CPS(DisasContext *s, arg_CPS *a) static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a) { - TCGv_i32 tmp, addr, el; + TCGv_i32 tmp, addr; if (!arm_dc_feature(s, ARM_FEATURE_M)) { return false; @@ -8921,23 +8671,18 @@ static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a) return true; } - tmp = tcg_const_i32(a->im); + tmp = tcg_constant_i32(a->im); /* FAULTMASK */ if (a->F) { - addr = tcg_const_i32(19); - gen_helper_v7m_msr(cpu_env, addr, tmp); - tcg_temp_free_i32(addr); + addr = tcg_constant_i32(19); + gen_helper_v7m_msr(tcg_env, addr, tmp); } /* PRIMASK */ if (a->I) { - addr = tcg_const_i32(16); - gen_helper_v7m_msr(cpu_env, addr, tmp); - tcg_temp_free_i32(addr); - } - el = tcg_const_i32(s->current_el); - gen_helper_rebuild_hflags_m32(cpu_env, el); - tcg_temp_free_i32(el); - tcg_temp_free_i32(tmp); + addr = tcg_constant_i32(16); + gen_helper_v7m_msr(tcg_env, addr, tmp); + } + gen_rebuild_hflags(s, false); gen_lookup_tb(s); return true; } @@ -9005,7 +8750,7 @@ static bool trans_SETEND(DisasContext *s, arg_SETEND *a) return false; } if (a->E != (s->be_data == MO_BE)) { - gen_helper_setend(cpu_env); + gen_helper_setend(tcg_env); s->base.is_jmp = DISAS_UPDATE_EXIT; } return true; @@ -9055,7 +8800,7 @@ static bool trans_IT(DisasContext *s, arg_IT *a) /* v8.1M CSEL/CSINC/CSNEG/CSINV */ static bool trans_CSEL(DisasContext *s, arg_CSEL *a) { - TCGv_i32 rn, rm, zero; + TCGv_i32 rn, rm; DisasCompare c; if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) { @@ -9073,15 +8818,17 @@ static bool trans_CSEL(DisasContext *s, arg_CSEL *a) } /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */ + rn = tcg_temp_new_i32(); + rm = tcg_temp_new_i32(); if (a->rn == 15) { - rn = tcg_const_i32(0); + tcg_gen_movi_i32(rn, 0); } else { - rn = load_reg(s, a->rn); + load_reg_var(s, rn, a->rn); } if (a->rm == 15) { - rm = tcg_const_i32(0); + tcg_gen_movi_i32(rm, 0); } else { - rm = load_reg(s, a->rm); + load_reg_var(s, rm, a->rm); } switch (a->op) { @@ -9101,14 +8848,9 @@ static bool trans_CSEL(DisasContext *s, arg_CSEL *a) } arm_test_cc(&c, a->fcond); - zero = tcg_const_i32(0); - tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm); - arm_free_cc(&c); - tcg_temp_free_i32(zero); + tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm); store_reg(s, a->rd, rn); - tcg_temp_free_i32(rm); - return true; } @@ -9124,8 +8866,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) * UsageFault exception. */ if (arm_dc_feature(s, ARM_FEATURE_M)) { - gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(), - default_exception_el(s)); + gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized()); return; } @@ -9134,8 +8875,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) * Illegal execution state. This has priority over BTI * exceptions, but comes after instruction abort exceptions. */ - gen_exception_insn(s, s->pc_curr, EXCP_UDEF, - syn_illegalstate(), default_exception_el(s)); + gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); return; } @@ -9359,20 +9099,15 @@ static bool insn_crosses_page(CPUARMState *env, DisasContext *s) static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *dc = container_of(dcbase, DisasContext, base); - CPUARMState *env = cs->env_ptr; + CPUARMState *env = cpu_env(cs); ARMCPU *cpu = env_archcpu(env); CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); uint32_t condexec, core_mmu_idx; dc->isar = &cpu->isar; dc->condjmp = 0; - - dc->aarch64 = 0; - /* If we are coming from secure EL0 in a system with a 32-bit EL3, then - * there is no secure EL1, so we route exceptions to EL3. - */ - dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) && - !arm_el_is_aa64(env, 3); + dc->pc_save = dc->base.pc_first; + dc->aarch64 = false; dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB); dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC); @@ -9389,7 +9124,6 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) */ dc->eci = dc->condexec_mask = dc->condexec_cond = 0; dc->eci_handled = false; - dc->insn_eci_rewind = NULL; if (condexec & 0xf) { dc->condexec_mask = (condexec & 0xf) << 1; dc->condexec_cond = condexec >> 4; @@ -9408,13 +9142,14 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); + dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); + dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); if (arm_feature(env, ARM_FEATURE_M)) { dc->vfp_enabled = 1; dc->be_data = MO_TE; dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER); - dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) && - regime_is_secure(env, dc->mmu_idx); + dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE); dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK); dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG); dc->v7m_new_fp_ctxt_needed = @@ -9422,7 +9157,6 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT); dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED); } else { - dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL); dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B); dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE); dc->ns = EX_TBFLAG_A32(tb_flags, NS); @@ -9433,7 +9167,10 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN); dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE); } + dc->sme_trap_nonstreaming = + EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING); } + dc->lse2 = false; /* applies only to aarch64 */ dc->cp_regs = cpu->cp_regs; dc->features = env->features; @@ -9459,7 +9196,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK; /* If architectural single step active, limit to 1. */ - if (is_singlestepping(dc)) { + if (dc->ss_active) { dc->base.max_insns = 1; } @@ -9512,9 +9249,7 @@ static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu) /* Reset the conditional execution bits immediately. This avoids complications trying to do it at the end of the block. */ if (dc->condexec_mask || dc->condexec_cond) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_movi_i32(tmp, 0); - store_cpu_field(tmp, condexec_bits); + store_cpu_field_constant(0, condexec_bits); } } @@ -9527,17 +9262,21 @@ static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) * fields here. */ uint32_t condexec_bits; + target_ulong pc_arg = dc->base.pc_next; + if (tb_cflags(dcbase->tb) & CF_PCREL) { + pc_arg &= ~TARGET_PAGE_MASK; + } if (dc->eci) { condexec_bits = dc->eci << 4; } else { condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1); } - tcg_gen_insn_start(dc->base.pc_next, condexec_bits, 0); - dc->insn_start = tcg_last_op(); + tcg_gen_insn_start(pc_arg, condexec_bits, 0); + dc->insn_start_updated = false; } -static bool arm_pre_translate_insn(DisasContext *dc) +static bool arm_check_kernelpage(DisasContext *dc) { #ifdef CONFIG_USER_ONLY /* Intercept jump to the magic kernel page. */ @@ -9549,7 +9288,11 @@ static bool arm_pre_translate_insn(DisasContext *dc) return true; } #endif + return false; +} +static bool arm_check_ss_active(DisasContext *dc) +{ if (dc->ss_active && !dc->pstate_ss) { /* Singlestep state is Active-pending. * If we're in this state at the start of a TB then either @@ -9572,28 +9315,51 @@ static bool arm_pre_translate_insn(DisasContext *dc) static void arm_post_translate_insn(DisasContext *dc) { - if (dc->condjmp && !dc->base.is_jmp) { - gen_set_label(dc->condlabel); + if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) { + if (dc->pc_save != dc->condlabel.pc_save) { + gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save); + } + gen_set_label(dc->condlabel.label); dc->condjmp = 0; } - translator_loop_temp_check(&dc->base); } static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - CPUARMState *env = cpu->env_ptr; + CPUARMState *env = cpu_env(cpu); + uint32_t pc = dc->base.pc_next; unsigned int insn; - if (arm_pre_translate_insn(dc)) { - dc->base.pc_next += 4; + /* Singlestep exceptions have the highest priority. */ + if (arm_check_ss_active(dc)) { + dc->base.pc_next = pc + 4; return; } - dc->pc_curr = dc->base.pc_next; - insn = arm_ldl_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b); + if (pc & 3) { + /* + * PC alignment fault. This has priority over the instruction abort + * that we would receive from a translation fault via arm_ldl_code + * (or the execution of the kernelpage entrypoint). This should only + * be possible after an indirect branch, at the start of the TB. + */ + assert(dc->base.num_insns == 1); + gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); + dc->base.is_jmp = DISAS_NORETURN; + dc->base.pc_next = QEMU_ALIGN_UP(pc, 4); + return; + } + + if (arm_check_kernelpage(dc)) { + dc->base.pc_next = pc + 4; + return; + } + + dc->pc_curr = pc; + insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b); dc->insn = insn; - dc->base.pc_next += 4; + dc->base.pc_next = pc + 4; disas_arm_insn(dc, insn); arm_post_translate_insn(dc); @@ -9651,26 +9417,32 @@ static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn) static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - CPUARMState *env = cpu->env_ptr; + CPUARMState *env = cpu_env(cpu); + uint32_t pc = dc->base.pc_next; uint32_t insn; bool is_16bit; + /* TCG op to rewind to if this turns out to be an invalid ECI state */ + TCGOp *insn_eci_rewind = NULL; + target_ulong insn_eci_pc_save = -1; - if (arm_pre_translate_insn(dc)) { - dc->base.pc_next += 2; + /* Misaligned thumb PC is architecturally impossible. */ + assert((dc->base.pc_next & 1) == 0); + + if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) { + dc->base.pc_next = pc + 2; return; } - dc->pc_curr = dc->base.pc_next; - insn = arm_lduw_code(env, &dc->base, dc->base.pc_next, dc->sctlr_b); + dc->pc_curr = pc; + insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b); is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn); - dc->base.pc_next += 2; + pc += 2; if (!is_16bit) { - uint32_t insn2 = arm_lduw_code(env, &dc->base, dc->base.pc_next, - dc->sctlr_b); - + uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b); insn = insn << 16 | insn2; - dc->base.pc_next += 2; + pc += 2; } + dc->base.pc_next = pc; dc->insn = insn; if (dc->pstate_il) { @@ -9678,8 +9450,7 @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * Illegal execution state. This has priority over BTI * exceptions, but comes after instruction abort exceptions. */ - gen_exception_insn(dc, dc->pc_curr, EXCP_UDEF, - syn_illegalstate(), default_exception_el(dc)); + gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate()); return; } @@ -9714,7 +9485,8 @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * insn" case. We will rewind to the marker (ie throwing away * all the generated code) and instead emit "take exception". */ - dc->insn_eci_rewind = tcg_last_op(); + insn_eci_rewind = tcg_last_op(); + insn_eci_pc_save = dc->pc_save; } if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) { @@ -9750,10 +9522,10 @@ static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) * Insn wasn't valid for ECI/ICI at all: undo what we * just generated and instead emit an exception */ - tcg_remove_ops_after(dc->insn_eci_rewind); + tcg_remove_ops_after(insn_eci_rewind); + dc->pc_save = insn_eci_pc_save; dc->condjmp = 0; - gen_exception_insn(dc, dc->pc_curr, EXCP_INVSTATE, syn_uncategorized(), - default_exception_el(dc)); + gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized()); } arm_post_translate_insn(dc); @@ -9794,27 +9566,26 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) * insn codepath itself. */ gen_bx_excret_final_code(dc); - } else if (unlikely(is_singlestepping(dc))) { + } else if (unlikely(dc->ss_active)) { /* Unconditional and "condition passed" instruction codepath. */ switch (dc->base.is_jmp) { case DISAS_SWI: gen_ss_advance(dc); - gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb), - default_exception_el(dc)); + gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb)); break; case DISAS_HVC: gen_ss_advance(dc); - gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2); + gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2); break; case DISAS_SMC: gen_ss_advance(dc); - gen_exception(EXCP_SMC, syn_aa32_smc(), 3); + gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3); break; case DISAS_NEXT: case DISAS_TOO_MANY: case DISAS_UPDATE_EXIT: case DISAS_UPDATE_NOCHAIN: - gen_set_pc_im(dc, dc->base.pc_next); + gen_update_pc(dc, curr_insn_len(dc)); /* fall through */ default: /* FIXME: Single stepping a WFI insn will not halt the CPU. */ @@ -9835,16 +9606,16 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) switch (dc->base.is_jmp) { case DISAS_NEXT: case DISAS_TOO_MANY: - gen_goto_tb(dc, 1, dc->base.pc_next); + gen_goto_tb(dc, 1, curr_insn_len(dc)); break; case DISAS_UPDATE_NOCHAIN: - gen_set_pc_im(dc, dc->base.pc_next); + gen_update_pc(dc, curr_insn_len(dc)); /* fall through */ case DISAS_JUMP: gen_goto_ptr(); break; case DISAS_UPDATE_EXIT: - gen_set_pc_im(dc, dc->base.pc_next); + gen_update_pc(dc, curr_insn_len(dc)); /* fall through */ default: /* indicate that the hash table must be used to find the next TB */ @@ -9854,56 +9625,51 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) /* nothing more to generate */ break; case DISAS_WFI: - { - TCGv_i32 tmp = tcg_const_i32((dc->thumb && - !(dc->insn & (1U << 31))) ? 2 : 4); - - gen_helper_wfi(cpu_env, tmp); - tcg_temp_free_i32(tmp); - /* The helper doesn't necessarily throw an exception, but we + gen_helper_wfi(tcg_env, tcg_constant_i32(curr_insn_len(dc))); + /* + * The helper doesn't necessarily throw an exception, but we * must go back to the main loop to check for interrupts anyway. */ tcg_gen_exit_tb(NULL, 0); break; - } case DISAS_WFE: - gen_helper_wfe(cpu_env); + gen_helper_wfe(tcg_env); break; case DISAS_YIELD: - gen_helper_yield(cpu_env); + gen_helper_yield(tcg_env); break; case DISAS_SWI: - gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb), - default_exception_el(dc)); + gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb)); break; case DISAS_HVC: - gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2); + gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2); break; case DISAS_SMC: - gen_exception(EXCP_SMC, syn_aa32_smc(), 3); + gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3); break; } } if (dc->condjmp) { /* "Condition failed" instruction codepath for the branch/trap insn */ - gen_set_label(dc->condlabel); + set_disas_label(dc, dc->condlabel); gen_set_condexec(dc); - if (unlikely(is_singlestepping(dc))) { - gen_set_pc_im(dc, dc->base.pc_next); + if (unlikely(dc->ss_active)) { + gen_update_pc(dc, curr_insn_len(dc)); gen_singlestep_exception(dc); } else { - gen_goto_tb(dc, 1, dc->base.pc_next); + gen_goto_tb(dc, 1, curr_insn_len(dc)); } } } -static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu) +static void arm_tr_disas_log(const DisasContextBase *dcbase, + CPUState *cpu, FILE *logfile) { DisasContext *dc = container_of(dcbase, DisasContext, base); - qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first)); - log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size); + fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); + target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); } static const TranslatorOps arm_translator_ops = { @@ -9925,7 +9691,8 @@ static const TranslatorOps thumb_translator_ops = { }; /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns, + vaddr pc, void *host_pc) { DisasContext dc = { }; const TranslatorOps *ops = &arm_translator_ops; @@ -9940,19 +9707,5 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns) } #endif - translator_loop(ops, &dc.base, cpu, tb, max_insns); -} - -void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, - target_ulong *data) -{ - if (is_a64(env)) { - env->pc = data[0]; - env->condexec_bits = 0; - env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT; - } else { - env->regs[15] = data[0]; - env->condexec_bits = data[1]; - env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT; - } + translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base); } diff --git a/target/arm/translate.h b/target/arm/tcg/translate.h index 3a0db801d3..dc66ff2190 100644 --- a/target/arm/translate.h +++ b/target/arm/tcg/translate.h @@ -1,23 +1,52 @@ #ifndef TARGET_ARM_TRANSLATE_H #define TARGET_ARM_TRANSLATE_H +#include "cpu.h" +#include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" +#include "exec/exec-all.h" #include "exec/translator.h" +#include "exec/helper-gen.h" #include "internals.h" - +#include "cpu-features.h" /* internal defines */ + +/* + * Save pc_save across a branch, so that we may restore the value from + * before the branch at the point the label is emitted. + */ +typedef struct DisasLabel { + TCGLabel *label; + target_ulong pc_save; +} DisasLabel; + typedef struct DisasContext { DisasContextBase base; const ARMISARegisters *isar; /* The address of the current instruction being translated. */ target_ulong pc_curr; + /* + * For CF_PCREL, the full value of cpu_pc is not known + * (although the page offset is known). For convenience, the + * translation loop uses the full virtual address that triggered + * the translation, from base.pc_start through pc_curr. + * For efficiency, we do not update cpu_pc for every instruction. + * Instead, pc_save has the value of pc_curr at the time of the + * last update to cpu_pc, which allows us to compute the addend + * needed to bring cpu_pc current: pc_curr - pc_save. + * If cpu_pc now contains the destination of an indirect branch, + * pc_save contains -1 to indicate that relative updates are no + * longer possible. + */ + target_ulong pc_save; target_ulong page_start; uint32_t insn; /* Nonzero if this instruction has been conditionally skipped. */ int condjmp; /* The label that will be jumped to when the instruction is skipped. */ - TCGLabel *condlabel; + DisasLabel condlabel; /* Thumb-2 conditional execution bits. */ int condexec_mask; int condexec_cond; @@ -28,9 +57,6 @@ typedef struct DisasContext { * after decode (ie after any UNDEF checks) */ bool eci_handled; - /* TCG op to rewind to if this turns out to be an invalid ECI state */ - TCGOp *insn_eci_rewind; - int thumb; int sctlr_b; MemOp be_data; #if !defined(CONFIG_USER_ONLY) @@ -43,9 +69,9 @@ typedef struct DisasContext { bool ns; /* Use non-secure CPREG bank on access */ int fp_excp_el; /* FP exception EL or 0 if enabled */ int sve_excp_el; /* SVE exception EL or 0 if enabled */ - int sve_len; /* SVE vector length in bytes */ - /* Flag indicating that exceptions from secure mode are routed to EL3. */ - bool secure_routed_to_el3; + int sme_excp_el; /* SME exception EL or 0 if enabled */ + int vl; /* current vector length in bytes */ + int svl; /* current streaming vector length in bytes */ bool vfp_enabled; /* FP enabled via FPSCR.EN */ int vec_len; int vec_stride; @@ -59,12 +85,12 @@ typedef struct DisasContext { * so that top level loop can generate correct syndrome information. */ uint32_t svc_imm; - int aarch64; int current_el; - /* Debug target exception level for single-step exceptions */ - int debug_target_el; GHashTable *cp_regs; uint64_t features; /* CPU features bits */ + bool aarch64; + bool thumb; + bool lse2; /* Because unallocated encodings generate different exception syndrome * information from traps due to FP being disabled, we can't do a single * "is fp access disabled" check at a high level in the decode tree. @@ -88,8 +114,8 @@ typedef struct DisasContext { bool unpriv; /* True if v8.3-PAuth is active. */ bool pauth_active; - /* True if v8.5-MTE access to tags is enabled. */ - bool ata; + /* True if v8.5-MTE access to tags is enabled; index with is_unpriv. */ + bool ata[2]; /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */ bool mte_active[2]; /* True with v8.5-BTI and SCTLR_ELx.BT* set. */ @@ -100,8 +126,34 @@ typedef struct DisasContext { bool align_mem; /* True if PSTATE.IL is set */ bool pstate_il; + /* True if PSTATE.SM is set. */ + bool pstate_sm; + /* True if PSTATE.ZA is set. */ + bool pstate_za; + /* True if non-streaming insns should raise an SME Streaming exception. */ + bool sme_trap_nonstreaming; + /* True if the current instruction is non-streaming. */ + bool is_nonstreaming; /* True if MVE insns are definitely not predicated by VPR or LTPSIZE */ bool mve_no_pred; + /* True if fine-grained traps are active */ + bool fgt_active; + /* True if fine-grained trap on SVC is enabled */ + bool fgt_svc; + /* True if a trap on ERET is enabled (FGT or NV) */ + bool trap_eret; + /* True if FEAT_LSE2 SCTLR_ELx.nAA is set */ + bool naa; + /* True if FEAT_NV HCR_EL2.NV is enabled */ + bool nv; + /* True if NV enabled and HCR_EL2.NV1 is set */ + bool nv1; + /* True if NV enabled and HCR_EL2.NV2 is set */ + bool nv2; + /* True if NV2 enabled and NV2 RAM accesses use EL2&0 translation regime */ + bool nv2_mem_e20; + /* True if NV2 enabled and NV2 RAM accesses are big-endian */ + bool nv2_mem_be; /* * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI. * < 0, set by the current instruction. @@ -109,21 +161,21 @@ typedef struct DisasContext { int8_t btype; /* A copy of cpu->dcz_blocksize. */ uint8_t dcz_blocksize; + /* A copy of cpu->gm_blocksize. */ + uint8_t gm_blocksize; /* True if this page is guarded. */ bool guarded_page; + /* True if the current insn_start has been updated. */ + bool insn_start_updated; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ int c15_cpar; - /* TCG op of the current insn_start. */ - TCGOp *insn_start; -#define TMP_A64_MAX 16 - int tmp_a64_count; - TCGv_i64 tmp_a64[TMP_A64_MAX]; + /* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */ + uint32_t nv2_redirect_offset; } DisasContext; typedef struct DisasCompare { TCGCond cond; TCGv_i32 value; - bool value_global; } DisasCompare; /* Share the TCG temporaries common between 32 and 64 bit modes. */ @@ -150,6 +202,11 @@ static inline int plus_2(DisasContext *s, int x) return x + 2; } +static inline int plus_12(DisasContext *s, int x) +{ + return x + 12; +} + static inline int times_2(DisasContext *s, int x) { return x * 2; @@ -160,6 +217,11 @@ static inline int times_4(DisasContext *s, int x) return x * 4; } +static inline int times_8(DisasContext *s, int x) +{ + return x * 8; +} + static inline int times_2_plus_1(DisasContext *s, int x) { return x * 2 + 1; @@ -185,6 +247,11 @@ static inline int rsub_8(DisasContext *s, int x) return 8 - x; } +static inline int shl_12(DisasContext *s, int x) +{ + return x << 12; +} + static inline int neon_3same_fp_size(DisasContext *s, int x) { /* Convert 0==fp32, 1==fp16 into a MO_* value */ @@ -201,20 +268,6 @@ static inline int get_mem_index(DisasContext *s) return arm_to_core_mmu_idx(s->mmu_idx); } -/* Function used to determine the target exception EL when otherwise not known - * or default. - */ -static inline int default_exception_el(DisasContext *s) -{ - /* If we are coming from secure EL0 in a system with a 32-bit EL3, then - * there is no secure EL1, so we route exceptions to EL3. Otherwise, - * exceptions can only be routed to ELs above 1, so we target the higher of - * 1 or the current EL. - */ - return (s->mmu_idx == ARMMMUIdx_SE10_0 && s->secure_routed_to_el3) - ? 3 : MAX(1, s->current_el); -} - static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) { /* We don't need to save all of the syndrome so we mask and shift @@ -223,10 +276,15 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) syn &= ARM_INSN_START_WORD2_MASK; syn >>= ARM_INSN_START_WORD2_SHIFT; - /* We check and clear insn_start_idx to catch multiple updates. */ - assert(s->insn_start != NULL); - tcg_set_insn_start_param(s->insn_start, 2, syn); - s->insn_start = NULL; + /* Check for multiple updates. */ + assert(!s->insn_start_updated); + s->insn_start_updated = true; + tcg_set_insn_start_param(s->base.insn_start, 2, syn); +} + +static inline int curr_insn_len(DisasContext *s) +{ + return s->base.pc_next - s->pc_curr; } /* is_jmp field values */ @@ -252,7 +310,7 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * For instructions which want an immediate exit to the main loop, as opposed * to attempting to use lookup_and_goto_ptr. Unlike DISAS_UPDATE_EXIT, this * doesn't write the PC on exiting the translation loop so you need to ensure - * something (gen_a64_set_pc_im or runtime helper) has done so before we reach + * something (gen_a64_update_pc or runtime helper) has done so before we reach * return from cpu_tb_exec. */ #define DISAS_EXIT DISAS_TARGET_9 @@ -261,33 +319,34 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) #ifdef TARGET_AARCH64 void a64_translate_init(void); -void gen_a64_set_pc_im(uint64_t val); +void gen_a64_update_pc(DisasContext *s, target_long diff); extern const TranslatorOps aarch64_translator_ops; #else static inline void a64_translate_init(void) { } -static inline void gen_a64_set_pc_im(uint64_t val) +static inline void gen_a64_update_pc(DisasContext *s, target_long diff) { } #endif void arm_test_cc(DisasCompare *cmp, int cc); -void arm_free_cc(DisasCompare *cmp); void arm_jump_cc(DisasCompare *cmp, TCGLabel *label); void arm_gen_test_cc(int cc, TCGLabel *label); MemOp pow2_align(unsigned i); void unallocated_encoding(DisasContext *s); -void gen_exception_insn(DisasContext *s, uint64_t pc, int excp, - uint32_t syn, uint32_t target_el); +void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp, + uint32_t syn, uint32_t target_el); +void gen_exception_insn(DisasContext *s, target_long pc_diff, + int excp, uint32_t syn); /* Return state of Alternate Half-precision flag, caller frees result */ static inline TCGv_i32 get_ahp_flag(void) { TCGv_i32 ret = tcg_temp_new_i32(); - tcg_gen_ld_i32(ret, cpu_env, + tcg_gen_ld_i32(ret, tcg_env, offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR])); tcg_gen_extract_i32(ret, ret, 26, 1); @@ -301,10 +360,9 @@ static inline void set_pstate_bits(uint32_t bits) tcg_debug_assert(!(bits & CACHED_PSTATE_BITS)); - tcg_gen_ld_i32(p, cpu_env, offsetof(CPUARMState, pstate)); + tcg_gen_ld_i32(p, tcg_env, offsetof(CPUARMState, pstate)); tcg_gen_ori_i32(p, p, bits); - tcg_gen_st_i32(p, cpu_env, offsetof(CPUARMState, pstate)); - tcg_temp_free_i32(p); + tcg_gen_st_i32(p, tcg_env, offsetof(CPUARMState, pstate)); } /* Clear bits within PSTATE. */ @@ -314,10 +372,9 @@ static inline void clear_pstate_bits(uint32_t bits) tcg_debug_assert(!(bits & CACHED_PSTATE_BITS)); - tcg_gen_ld_i32(p, cpu_env, offsetof(CPUARMState, pstate)); + tcg_gen_ld_i32(p, tcg_env, offsetof(CPUARMState, pstate)); tcg_gen_andi_i32(p, p, ~bits); - tcg_gen_st_i32(p, cpu_env, offsetof(CPUARMState, pstate)); - tcg_temp_free_i32(p); + tcg_gen_st_i32(p, tcg_env, offsetof(CPUARMState, pstate)); } /* If the singlestep state is Active-not-pending, advance to Active-pending. */ @@ -329,33 +386,12 @@ static inline void gen_ss_advance(DisasContext *s) } } -static inline void gen_exception(int excp, uint32_t syndrome, - uint32_t target_el) -{ - TCGv_i32 tcg_excp = tcg_const_i32(excp); - TCGv_i32 tcg_syn = tcg_const_i32(syndrome); - TCGv_i32 tcg_el = tcg_const_i32(target_el); - - gen_helper_exception_with_syndrome(cpu_env, tcg_excp, - tcg_syn, tcg_el); - - tcg_temp_free_i32(tcg_el); - tcg_temp_free_i32(tcg_syn); - tcg_temp_free_i32(tcg_excp); -} - /* Generate an architectural singlestep exception */ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex) { - bool same_el = (s->debug_target_el == s->current_el); - - /* - * If singlestep is targeting a lower EL than the current one, - * then s->ss_active must be false and we can never get here. - */ - assert(s->debug_target_el >= s->current_el); - - gen_exception(EXCP_UDEF, syn_swstep(same_el, isv, ex), s->debug_target_el); + /* Fill in the same_el field of the syndrome in the helper. */ + uint32_t syn = syn_swstep(false, isv, ex); + gen_helper_exception_swstep(tcg_env, tcg_constant_i32(syn)); } /* @@ -538,17 +574,18 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) default: g_assert_not_reached(); } - tcg_gen_addi_ptr(statusptr, cpu_env, offset); + tcg_gen_addi_ptr(statusptr, tcg_env, offset); return statusptr; } /** - * finalize_memop: + * finalize_memop_atom: * @s: DisasContext * @opc: size+sign+align of the memory operation + * @atom: atomicity of the memory operation * - * Build the complete MemOp for a memory operation, including alignment - * and endianness. + * Build the complete MemOp for a memory operation, including alignment, + * endianness, and atomicity. * * If (op & MO_AMASK) then the operation already contains the required * alignment, e.g. for AccType_ATOMIC. Otherwise, this an optionally @@ -558,12 +595,63 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour) * and this is applied here. Note that there is no way to indicate that * no alignment should ever be enforced; this must be handled manually. */ -static inline MemOp finalize_memop(DisasContext *s, MemOp opc) +static inline MemOp finalize_memop_atom(DisasContext *s, MemOp opc, MemOp atom) { if (s->align_mem && !(opc & MO_AMASK)) { opc |= MO_ALIGN; } - return opc | s->be_data; + return opc | atom | s->be_data; +} + +/** + * finalize_memop: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Like finalize_memop_atom, but with default atomicity. + */ +static inline MemOp finalize_memop(DisasContext *s, MemOp opc) +{ + MemOp atom = s->lse2 ? MO_ATOM_WITHIN16 : MO_ATOM_IFALIGN; + return finalize_memop_atom(s, opc, atom); +} + +/** + * finalize_memop_pair: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Like finalize_memop_atom, but with atomicity for a pair. + * C.f. Pseudocode for Mem[], operand ispair. + */ +static inline MemOp finalize_memop_pair(DisasContext *s, MemOp opc) +{ + MemOp atom = s->lse2 ? MO_ATOM_WITHIN16_PAIR : MO_ATOM_IFALIGN_PAIR; + return finalize_memop_atom(s, opc, atom); +} + +/** + * finalize_memop_asimd: + * @s: DisasContext + * @opc: size+sign+align of the memory operation + * + * Like finalize_memop_atom, but with atomicity of AccessType_ASIMD. + */ +static inline MemOp finalize_memop_asimd(DisasContext *s, MemOp opc) +{ + /* + * In the pseudocode for Mem[], with AccessType_ASIMD, size == 16, + * if IsAligned(8), the first case provides separate atomicity for + * the pair of 64-bit accesses. If !IsAligned(8), the middle cases + * do not apply, and we're left with the final case of no atomicity. + * Thus MO_ATOM_IFALIGN_PAIR. + * + * For other sizes, normal LSE2 rules apply. + */ + if ((opc & MO_SIZE) == MO_128) { + return finalize_memop_atom(s, opc, MO_ATOM_IFALIGN_PAIR); + } + return finalize_memop(s, opc); } /** @@ -583,4 +671,68 @@ static inline MemOp finalize_memop(DisasContext *s, MemOp opc) */ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op); +/* + * gen_disas_label: + * Create a label and cache a copy of pc_save. + */ +static inline DisasLabel gen_disas_label(DisasContext *s) +{ + return (DisasLabel){ + .label = gen_new_label(), + .pc_save = s->pc_save, + }; +} + +/* + * set_disas_label: + * Emit a label and restore the cached copy of pc_save. + */ +static inline void set_disas_label(DisasContext *s, DisasLabel l) +{ + gen_set_label(l.label); + s->pc_save = l.pc_save; +} + +static inline TCGv_ptr gen_lookup_cp_reg(uint32_t key) +{ + TCGv_ptr ret = tcg_temp_new_ptr(); + gen_helper_lookup_cp_reg(ret, tcg_env, tcg_constant_i32(key)); + return ret; +} + +/* + * Set and reset rounding mode around another operation. + */ +static inline TCGv_i32 gen_set_rmode(ARMFPRounding rmode, TCGv_ptr fpst) +{ + TCGv_i32 new = tcg_constant_i32(arm_rmode_to_sf(rmode)); + TCGv_i32 old = tcg_temp_new_i32(); + + gen_helper_set_rmode(old, new, fpst); + return old; +} + +static inline void gen_restore_rmode(TCGv_i32 old, TCGv_ptr fpst) +{ + gen_helper_set_rmode(old, old, fpst); +} + +/* + * Helpers for implementing sets of trans_* functions. + * Defer the implementation of NAME to FUNC, with optional extra arguments. + */ +#define TRANS(NAME, FUNC, ...) \ + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ + { return FUNC(s, __VA_ARGS__); } +#define TRANS_FEAT(NAME, FEAT, FUNC, ...) \ + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ + { return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); } + +#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \ + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ + { \ + s->is_nonstreaming = true; \ + return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \ + } + #endif /* TARGET_ARM_TRANSLATE_H */ diff --git a/target/arm/vec_helper.c b/target/arm/tcg/vec_helper.c index 17fb158362..1f93510b85 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -23,6 +23,7 @@ #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" #include "qemu/int128.h" +#include "crypto/clmul.h" #include "vec_internal.h" /* @@ -127,6 +128,32 @@ const uint64_t expand_pred_b_data[256] = { 0xffffffffffffffff, }; +/* + * Similarly for half-word elements. + * for (i = 0; i < 256; ++i) { + * unsigned long m = 0; + * if (i & 0xaa) { + * continue; + * } + * for (j = 0; j < 8; j += 2) { + * if ((i >> j) & 1) { + * m |= 0xfffful << (j << 3); + * } + * } + * printf("[0x%x] = 0x%016lx,\n", i, m); + * } + */ +const uint64_t expand_pred_h_data[0x55 + 1] = { + [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000, + [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000, + [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000, + [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000, + [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000, + [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000, + [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000, + [0x55] = 0xffffffffffffffff, +}; + /* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */ int8_t do_sqrdmlah_b(int8_t src1, int8_t src2, int8_t src3, bool neg, bool round) @@ -1960,21 +1987,11 @@ void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc) */ void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc) { - intptr_t i, j, opr_sz = simd_oprsz(desc); + intptr_t i, opr_sz = simd_oprsz(desc); uint64_t *d = vd, *n = vn, *m = vm; for (i = 0; i < opr_sz / 8; ++i) { - uint64_t nn = n[i]; - uint64_t mm = m[i]; - uint64_t rr = 0; - - for (j = 0; j < 8; ++j) { - uint64_t mask = (nn & 0x0101010101010101ull) * 0xff; - rr ^= mm & mask; - mm = (mm << 1) & 0xfefefefefefefefeull; - nn >>= 1; - } - d[i] = rr; + d[i] = clmul_8x8_low(n[i], m[i]); } clear_tail(d, opr_sz, simd_maxsz(desc)); } @@ -1986,84 +2003,28 @@ void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc) */ void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc) { - intptr_t i, j, opr_sz = simd_oprsz(desc); + intptr_t i, opr_sz = simd_oprsz(desc); intptr_t hi = simd_data(desc); uint64_t *d = vd, *n = vn, *m = vm; for (i = 0; i < opr_sz / 8; i += 2) { - uint64_t nn = n[i + hi]; - uint64_t mm = m[i + hi]; - uint64_t rhi = 0; - uint64_t rlo = 0; - - /* Bit 0 can only influence the low 64-bit result. */ - if (nn & 1) { - rlo = mm; - } - - for (j = 1; j < 64; ++j) { - uint64_t mask = -((nn >> j) & 1); - rlo ^= (mm << j) & mask; - rhi ^= (mm >> (64 - j)) & mask; - } - d[i] = rlo; - d[i + 1] = rhi; + Int128 r = clmul_64(n[i + hi], m[i + hi]); + d[i] = int128_getlo(r); + d[i + 1] = int128_gethi(r); } clear_tail(d, opr_sz, simd_maxsz(desc)); } -/* - * 8x8->16 polynomial multiply. - * - * The byte inputs are expanded to (or extracted from) half-words. - * Note that neon and sve2 get the inputs from different positions. - * This allows 4 bytes to be processed in parallel with uint64_t. - */ - -static uint64_t expand_byte_to_half(uint64_t x) -{ - return (x & 0x000000ff) - | ((x & 0x0000ff00) << 8) - | ((x & 0x00ff0000) << 16) - | ((x & 0xff000000) << 24); -} - -uint64_t pmull_w(uint64_t op1, uint64_t op2) -{ - uint64_t result = 0; - int i; - for (i = 0; i < 16; ++i) { - uint64_t mask = (op1 & 0x0000000100000001ull) * 0xffffffff; - result ^= op2 & mask; - op1 >>= 1; - op2 <<= 1; - } - return result; -} - -uint64_t pmull_h(uint64_t op1, uint64_t op2) -{ - uint64_t result = 0; - int i; - for (i = 0; i < 8; ++i) { - uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff; - result ^= op2 & mask; - op1 >>= 1; - op2 <<= 1; - } - return result; -} - void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) { int hi = simd_data(desc); uint64_t *d = vd, *n = vn, *m = vm; uint64_t nn = n[hi], mm = m[hi]; - d[0] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm)); + d[0] = clmul_8x4_packed(nn, mm); nn >>= 32; mm >>= 32; - d[1] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm)); + d[1] = clmul_8x4_packed(nn, mm); clear_tail(d, 16, simd_maxsz(desc)); } @@ -2076,25 +2037,10 @@ void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) uint64_t *d = vd, *n = vn, *m = vm; for (i = 0; i < opr_sz / 8; ++i) { - uint64_t nn = (n[i] >> shift) & 0x00ff00ff00ff00ffull; - uint64_t mm = (m[i] >> shift) & 0x00ff00ff00ff00ffull; - - d[i] = pmull_h(nn, mm); + d[i] = clmul_8x4_even(n[i] >> shift, m[i] >> shift); } } -static uint64_t pmull_d(uint64_t op1, uint64_t op2) -{ - uint64_t result = 0; - int i; - - for (i = 0; i < 32; ++i) { - uint64_t mask = -((op1 >> i) & 1); - result ^= (op2 << i) & mask; - } - return result; -} - void HELPER(sve2_pmull_d)(void *vd, void *vn, void *vm, uint32_t desc) { intptr_t sel = H4(simd_data(desc)); @@ -2103,7 +2049,7 @@ void HELPER(sve2_pmull_d)(void *vd, void *vn, void *vm, uint32_t desc) uint64_t *d = vd; for (i = 0; i < opr_sz / 8; ++i) { - d[i] = pmull_d(n[2 * i + sel], m[2 * i + sel]); + d[i] = clmul_32(n[2 * i + sel], m[2 * i + sel]); } } #endif @@ -2531,7 +2477,7 @@ DO_MMLA_B(gvec_usmmla_b, do_usmmla_b) * BFloat16 Dot Product */ -static float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2) +float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2) { /* FPCR is ignored for BFDOT and BFMMLA. */ float_status bf_status = { @@ -2600,7 +2546,7 @@ void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc) * Process the entire segment at once, writing back the * results only after we've consumed all of the inputs. * - * Key to indicies by column: + * Key to indices by column: * i j i k j k */ sum00 = a[s + H4(0 + 0)]; @@ -2664,3 +2610,27 @@ void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +#define DO_CLAMP(NAME, TYPE) \ +void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \ + TYPE aa = *(TYPE *)(a + i); \ + TYPE nn = *(TYPE *)(n + i); \ + TYPE mm = *(TYPE *)(m + i); \ + TYPE dd = MIN(MAX(aa, nn), mm); \ + *(TYPE *)(d + i) = dd; \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +DO_CLAMP(gvec_sclamp_b, int8_t) +DO_CLAMP(gvec_sclamp_h, int16_t) +DO_CLAMP(gvec_sclamp_s, int32_t) +DO_CLAMP(gvec_sclamp_d, int64_t) + +DO_CLAMP(gvec_uclamp_b, uint8_t) +DO_CLAMP(gvec_uclamp_h, uint16_t) +DO_CLAMP(gvec_uclamp_s, uint32_t) +DO_CLAMP(gvec_uclamp_d, uint64_t) diff --git a/target/arm/vec_internal.h b/target/arm/tcg/vec_internal.h index 2a33558290..3ca1b94ccf 100644 --- a/target/arm/vec_internal.h +++ b/target/arm/tcg/vec_internal.h @@ -17,8 +17,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef TARGET_ARM_VEC_INTERNALS_H -#define TARGET_ARM_VEC_INTERNALS_H +#ifndef TARGET_ARM_VEC_INTERNAL_H +#define TARGET_ARM_VEC_INTERNAL_H /* * Note that vector data is stored in host-endian 64-bit chunks, @@ -29,7 +29,7 @@ * The H1_<N> macros are used when performing byte arithmetic and then * casting the final pointer to a type of size N. */ -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN #define H1(x) ((x) ^ 7) #define H1_2(x) ((x) ^ 6) #define H1_4(x) ((x) ^ 4) @@ -50,8 +50,21 @@ #define H8(x) (x) #define H1_8(x) (x) -/* Data for expanding active predicate bits to bytes, for byte elements. */ +/* + * Expand active predicate bits to bytes, for byte elements. + */ extern const uint64_t expand_pred_b_data[256]; +static inline uint64_t expand_pred_b(uint8_t byte) +{ + return expand_pred_b_data[byte]; +} + +/* Similarly for half-word elements. */ +extern const uint64_t expand_pred_h_data[0x55 + 1]; +static inline uint64_t expand_pred_h(uint8_t byte) +{ + return expand_pred_h_data[byte & 0x55]; +} static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) { @@ -206,15 +219,17 @@ int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); -/* - * 8 x 8 -> 16 vector polynomial multiply where the inputs are - * in the low 8 bits of each 16-bit element -*/ -uint64_t pmull_h(uint64_t op1, uint64_t op2); -/* - * 16 x 16 -> 32 vector polynomial multiply where the inputs are - * in the low 16 bits of each 32-bit element +/** + * bfdotadd: + * @sum: addend + * @e1, @e2: multiplicand vectors + * + * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum. + * The @e1 and @e2 operands correspond to the 32-bit source vector + * slots and contain two Bfloat16 values each. + * + * Corresponds to the ARM pseudocode function BFDotAdd. */ -uint64_t pmull_w(uint64_t op1, uint64_t op2); +float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2); -#endif /* TARGET_ARM_VEC_INTERNALS_H */ +#endif /* TARGET_ARM_VEC_INTERNAL_H */ diff --git a/target/arm/vfp-uncond.decode b/target/arm/tcg/vfp-uncond.decode index 5c50447a66..5c50447a66 100644 --- a/target/arm/vfp-uncond.decode +++ b/target/arm/tcg/vfp-uncond.decode diff --git a/target/arm/vfp.decode b/target/arm/tcg/vfp.decode index 5405e80197..5405e80197 100644 --- a/target/arm/vfp.decode +++ b/target/arm/tcg/vfp.decode diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c deleted file mode 100644 index 3107f9823e..0000000000 --- a/target/arm/tlb_helper.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * ARM TLB (Translation lookaside buffer) helpers. - * - * This code is licensed under the GNU GPL v2 or later. - * - * SPDX-License-Identifier: GPL-2.0-or-later - */ -#include "qemu/osdep.h" -#include "cpu.h" -#include "internals.h" -#include "exec/exec-all.h" - -static inline uint32_t merge_syn_data_abort(uint32_t template_syn, - unsigned int target_el, - bool same_el, bool ea, - bool s1ptw, bool is_write, - int fsc) -{ - uint32_t syn; - - /* - * ISV is only set for data aborts routed to EL2 and - * never for stage-1 page table walks faulting on stage 2. - * - * Furthermore, ISV is only set for certain kinds of load/stores. - * If the template syndrome does not have ISV set, we should leave - * it cleared. - * - * See ARMv8 specs, D7-1974: - * ISS encoding for an exception from a Data Abort, the - * ISV field. - */ - if (!(template_syn & ARM_EL_ISV) || target_el != 2 || s1ptw) { - syn = syn_data_abort_no_iss(same_el, 0, - ea, 0, s1ptw, is_write, fsc); - } else { - /* - * Fields: IL, ISV, SAS, SSE, SRT, SF and AR come from the template - * syndrome created at translation time. - * Now we create the runtime syndrome with the remaining fields. - */ - syn = syn_data_abort_with_iss(same_el, - 0, 0, 0, 0, 0, - ea, 0, s1ptw, is_write, fsc, - true); - /* Merge the runtime syndrome with the template syndrome. */ - syn |= template_syn; - } - return syn; -} - -static void QEMU_NORETURN arm_deliver_fault(ARMCPU *cpu, vaddr addr, - MMUAccessType access_type, - int mmu_idx, ARMMMUFaultInfo *fi) -{ - CPUARMState *env = &cpu->env; - int target_el; - bool same_el; - uint32_t syn, exc, fsr, fsc; - ARMMMUIdx arm_mmu_idx = core_to_arm_mmu_idx(env, mmu_idx); - - target_el = exception_target_el(env); - if (fi->stage2) { - target_el = 2; - env->cp15.hpfar_el2 = extract64(fi->s2addr, 12, 47) << 4; - if (arm_is_secure_below_el3(env) && fi->s1ns) { - env->cp15.hpfar_el2 |= HPFAR_NS; - } - } - same_el = (arm_current_el(env) == target_el); - - if (target_el == 2 || arm_el_is_aa64(env, target_el) || - arm_s1_regime_using_lpae_format(env, arm_mmu_idx)) { - /* - * LPAE format fault status register : bottom 6 bits are - * status code in the same form as needed for syndrome - */ - fsr = arm_fi_to_lfsc(fi); - fsc = extract32(fsr, 0, 6); - } else { - fsr = arm_fi_to_sfsc(fi); - /* - * Short format FSR : this fault will never actually be reported - * to an EL that uses a syndrome register. Use a (currently) - * reserved FSR code in case the constructed syndrome does leak - * into the guest somehow. - */ - fsc = 0x3f; - } - - if (access_type == MMU_INST_FETCH) { - syn = syn_insn_abort(same_el, fi->ea, fi->s1ptw, fsc); - exc = EXCP_PREFETCH_ABORT; - } else { - syn = merge_syn_data_abort(env->exception.syndrome, target_el, - same_el, fi->ea, fi->s1ptw, - access_type == MMU_DATA_STORE, - fsc); - if (access_type == MMU_DATA_STORE - && arm_feature(env, ARM_FEATURE_V6)) { - fsr |= (1 << 11); - } - exc = EXCP_DATA_ABORT; - } - - env->exception.vaddress = addr; - env->exception.fsr = fsr; - raise_exception(env, exc, syn, target_el); -} - -/* Raise a data fault alignment exception for the specified virtual address */ -void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, - MMUAccessType access_type, - int mmu_idx, uintptr_t retaddr) -{ - ARMCPU *cpu = ARM_CPU(cs); - ARMMMUFaultInfo fi = {}; - - /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr, true); - - fi.type = ARMFault_Alignment; - arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi); -} - -#if !defined(CONFIG_USER_ONLY) - -/* - * arm_cpu_do_transaction_failed: handle a memory system error response - * (eg "no device/memory present at address") by raising an external abort - * exception - */ -void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, - vaddr addr, unsigned size, - MMUAccessType access_type, - int mmu_idx, MemTxAttrs attrs, - MemTxResult response, uintptr_t retaddr) -{ - ARMCPU *cpu = ARM_CPU(cs); - ARMMMUFaultInfo fi = {}; - - /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr, true); - - fi.ea = arm_extabort_type(response); - fi.type = ARMFault_SyncExternal; - arm_deliver_fault(cpu, addr, access_type, mmu_idx, &fi); -} - -#endif /* !defined(CONFIG_USER_ONLY) */ - -bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ - ARMCPU *cpu = ARM_CPU(cs); - ARMMMUFaultInfo fi = {}; - -#ifdef CONFIG_USER_ONLY - int flags = page_get_flags(useronly_clean_ptr(address)); - if (flags & PAGE_VALID) { - fi.type = ARMFault_Permission; - } else { - fi.type = ARMFault_Translation; - } - fi.level = 3; - - /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr, true); - arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi); -#else - hwaddr phys_addr; - target_ulong page_size; - int prot, ret; - MemTxAttrs attrs = {}; - ARMCacheAttrs cacheattrs = {}; - - /* - * Walk the page table and (if the mapping exists) add the page - * to the TLB. On success, return true. Otherwise, if probing, - * return false. Otherwise populate fsr with ARM DFSR/IFSR fault - * register format, and signal the fault. - */ - ret = get_phys_addr(&cpu->env, address, access_type, - core_to_arm_mmu_idx(&cpu->env, mmu_idx), - &phys_addr, &attrs, &prot, &page_size, - &fi, &cacheattrs); - if (likely(!ret)) { - /* - * Map a single [sub]page. Regions smaller than our declared - * target page size are handled specially, so for those we - * pass in the exact addresses. - */ - if (page_size >= TARGET_PAGE_SIZE) { - phys_addr &= TARGET_PAGE_MASK; - address &= TARGET_PAGE_MASK; - } - /* Notice and record tagged memory. */ - if (cpu_isar_feature(aa64_mte, cpu) && cacheattrs.attrs == 0xf0) { - arm_tlb_mte_tagged(&attrs) = true; - } - - tlb_set_page_with_attrs(cs, address, phys_addr, attrs, - prot, mmu_idx, page_size); - return true; - } else if (probe) { - return false; - } else { - /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr, true); - arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi); - } -#endif -} diff --git a/target/arm/trace-events b/target/arm/trace-events index 2a0ba7bffc..4438dce7be 100644 --- a/target/arm/trace-events +++ b/target/arm/trace-events @@ -1,13 +1,15 @@ # See docs/devel/tracing.rst for syntax documentation. # helper.c -arm_gt_recalc(int timer, int irqstate, uint64_t nexttick) "gt recalc: timer %d irqstate %d next tick 0x%" PRIx64 -arm_gt_recalc_disabled(int timer) "gt recalc: timer %d irqstate 0 timer disabled" +arm_gt_recalc(int timer, uint64_t nexttick) "gt recalc: timer %d next tick 0x%" PRIx64 +arm_gt_recalc_disabled(int timer) "gt recalc: timer %d timer disabled" arm_gt_cval_write(int timer, uint64_t value) "gt_cval_write: timer %d value 0x%" PRIx64 arm_gt_tval_write(int timer, uint64_t value) "gt_tval_write: timer %d value 0x%" PRIx64 arm_gt_ctl_write(int timer, uint64_t value) "gt_ctl_write: timer %d value 0x%" PRIx64 -arm_gt_imask_toggle(int timer, int irqstate) "gt_ctl_write: timer %d IMASK toggle, new irqstate %d" +arm_gt_imask_toggle(int timer) "gt_ctl_write: timer %d IMASK toggle" arm_gt_cntvoff_write(uint64_t value) "gt_cntvoff_write: value 0x%" PRIx64 +arm_gt_cntpoff_write(uint64_t value) "gt_cntpoff_write: value 0x%" PRIx64 +arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d" # kvm.c kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova = 0x%"PRIx64" is translated into 0x%"PRIx64 diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 24e3d820a5..3e5e37abbe 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "internals.h" +#include "cpu-features.h" #ifdef CONFIG_TCG #include "qemu/log.h" #include "fpu/softfloat.h" @@ -1104,33 +1105,14 @@ float64 HELPER(rintd)(float64 x, void *fp_status) } /* Convert ARM rounding mode to softfloat */ -int arm_rmode_to_sf(int rmode) -{ - switch (rmode) { - case FPROUNDING_TIEAWAY: - rmode = float_round_ties_away; - break; - case FPROUNDING_ODD: - /* FIXME: add support for TIEAWAY and ODD */ - qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n", - rmode); - /* fall through for now */ - case FPROUNDING_TIEEVEN: - default: - rmode = float_round_nearest_even; - break; - case FPROUNDING_POSINF: - rmode = float_round_up; - break; - case FPROUNDING_NEGINF: - rmode = float_round_down; - break; - case FPROUNDING_ZERO: - rmode = float_round_to_zero; - break; - } - return rmode; -} +const FloatRoundMode arm_rmode_to_sf_map[] = { + [FPROUNDING_TIEEVEN] = float_round_nearest_even, + [FPROUNDING_POSINF] = float_round_up, + [FPROUNDING_NEGINF] = float_round_down, + [FPROUNDING_ZERO] = float_round_to_zero, + [FPROUNDING_TIEAWAY] = float_round_ties_away, + [FPROUNDING_ODD] = float_round_to_odd, +}; /* * Implement float64 to int32_t conversion without saturation; @@ -1139,68 +1121,21 @@ int arm_rmode_to_sf(int rmode) uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus) { float_status *status = vstatus; - uint32_t exp, sign; - uint64_t frac; - uint32_t inexact = 1; /* !Z */ - - sign = extract64(value, 63, 1); - exp = extract64(value, 52, 11); - frac = extract64(value, 0, 52); - - if (exp == 0) { - /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */ - inexact = sign; - if (frac != 0) { - if (status->flush_inputs_to_zero) { - float_raise(float_flag_input_denormal, status); - } else { - float_raise(float_flag_inexact, status); - inexact = 1; - } - } - frac = 0; - } else if (exp == 0x7ff) { - /* This operation raises Invalid for both NaN and overflow (Inf). */ - float_raise(float_flag_invalid, status); - frac = 0; + uint32_t inexact, frac; + uint32_t e_old, e_new; + + e_old = get_float_exception_flags(status); + set_float_exception_flags(0, status); + frac = float64_to_int32_modulo(value, float_round_to_zero, status); + e_new = get_float_exception_flags(status); + set_float_exception_flags(e_old | e_new, status); + + if (value == float64_chs(float64_zero)) { + /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */ + inexact = 1; } else { - int true_exp = exp - 1023; - int shift = true_exp - 52; - - /* Restore implicit bit. */ - frac |= 1ull << 52; - - /* Shift the fraction into place. */ - if (shift >= 0) { - /* The number is so large we must shift the fraction left. */ - if (shift >= 64) { - /* The fraction is shifted out entirely. */ - frac = 0; - } else { - frac <<= shift; - } - } else if (shift > -64) { - /* Normal case -- shift right and notice if bits shift out. */ - inexact = (frac << (64 + shift)) != 0; - frac >>= -shift; - } else { - /* The fraction is shifted out entirely. */ - frac = 0; - } - - /* Notice overflow or inexact exceptions. */ - if (true_exp > 31 || frac > (sign ? 0x80000000ull : 0x7fffffff)) { - /* Overflow, for which this operation raises invalid. */ - float_raise(float_flag_invalid, status); - inexact = 1; - } else if (inexact) { - float_raise(float_flag_inexact, status); - } - - /* Honor the sign. */ - if (sign) { - frac = -frac; - } + /* Normal inexact or overflow or NaN */ + inexact = e_new & (float_flag_inexact | float_flag_invalid); } /* Pack the result and the env->ZF representation of Z together. */ |