aboutsummaryrefslogtreecommitdiff
path: root/target/arm
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm')
-rw-r--r--target/arm/Kconfig11
-rw-r--r--target/arm/Makefile.objs22
-rw-r--r--target/arm/arch_dump.c148
-rw-r--r--target/arm/arm-powerctl.c128
-rw-r--r--target/arm/arm-powerctl.h16
-rw-r--r--target/arm/arm-qmp-cmds.c248
-rw-r--r--target/arm/arm-semi.c656
-rw-r--r--target/arm/common-semi-target.h60
-rw-r--r--target/arm/cortex-regs.c76
-rw-r--r--target/arm/cpregs.h1137
-rw-r--r--target/arm/cpu-features.h1021
-rw-r--r--target/arm/cpu-param.h40
-rw-r--r--target/arm/cpu-qom.h92
-rw-r--r--target/arm/cpu.c2935
-rw-r--r--target/arm/cpu.h2927
-rw-r--r--target/arm/cpu64.c958
-rw-r--r--target/arm/crypto_helper.c695
-rw-r--r--target/arm/debug_helper.c1280
-rw-r--r--target/arm/gdbstub.c523
-rw-r--r--target/arm/gdbstub64.c319
-rw-r--r--target/arm/gtimer.h21
-rw-r--r--target/arm/helper-a64.c902
-rw-r--r--target/arm/helper.c14903
-rw-r--r--target/arm/helper.h617
-rw-r--r--target/arm/hvf/hvf.c2252
-rw-r--r--target/arm/hvf/meson.build3
-rw-r--r--target/arm/hvf/trace-events13
-rw-r--r--target/arm/hvf_arm.h25
-rw-r--r--target/arm/hyp_gdbstub.c253
-rw-r--r--target/arm/idau.h15
-rw-r--r--target/arm/internals.h1475
-rw-r--r--target/arm/kvm-consts.h38
-rw-r--r--target/arm/kvm-stub.c5
-rw-r--r--target/arm/kvm.c1856
-rw-r--r--target/arm/kvm32.c516
-rw-r--r--target/arm/kvm64.c980
-rw-r--r--target/arm/kvm_arm.h278
-rw-r--r--target/arm/machine.c307
-rw-r--r--target/arm/meson.build41
-rw-r--r--target/arm/monitor.c84
-rw-r--r--target/arm/multiprocessing.h16
-rw-r--r--target/arm/op_helper.c1482
-rw-r--r--target/arm/ptw.c3650
-rw-r--r--target/arm/sve_helper.c5383
-rw-r--r--target/arm/syndrome.h369
-rw-r--r--target/arm/tcg-stubs.c27
-rw-r--r--target/arm/tcg/a32-uncond.decode74
-rw-r--r--target/arm/tcg/a32.decode557
-rw-r--r--target/arm/tcg/a64.decode591
-rw-r--r--target/arm/tcg/arm_ldst.h (renamed from target/arm/arm_ldst.h)25
-rw-r--r--target/arm/tcg/cpu-v7m.c290
-rw-r--r--target/arm/tcg/cpu32.c1039
-rw-r--r--target/arm/tcg/cpu64.c1295
-rw-r--r--target/arm/tcg/crypto_helper.c679
-rw-r--r--target/arm/tcg/helper-a64.c1857
-rw-r--r--target/arm/tcg/helper-a64.h (renamed from target/arm/helper-a64.h)66
-rw-r--r--target/arm/tcg/helper-mve.h890
-rw-r--r--target/arm/tcg/helper-sme.h146
-rw-r--r--target/arm/tcg/helper-sve.h (renamed from target/arm/helper-sve.h)1278
-rw-r--r--target/arm/tcg/hflags.c485
-rw-r--r--target/arm/tcg/iwmmxt_helper.c (renamed from target/arm/iwmmxt_helper.c)2
-rw-r--r--target/arm/tcg/m-nocp.decode72
-rw-r--r--target/arm/tcg/m_helper.c2902
-rw-r--r--target/arm/tcg/meson.build60
-rw-r--r--target/arm/tcg/mte_helper.c1190
-rw-r--r--target/arm/tcg/mve.decode832
-rw-r--r--target/arm/tcg/mve_helper.c3444
-rw-r--r--target/arm/tcg/neon-dp.decode646
-rw-r--r--target/arm/tcg/neon-ls.decode52
-rw-r--r--target/arm/tcg/neon-shared.decode99
-rw-r--r--target/arm/tcg/neon_helper.c (renamed from target/arm/neon_helper.c)696
-rw-r--r--target/arm/tcg/op_helper.c1185
-rw-r--r--target/arm/tcg/pauth_helper.c632
-rw-r--r--target/arm/tcg/psci.c (renamed from target/arm/psci.c)79
-rw-r--r--target/arm/tcg/sme-fa64.decode60
-rw-r--r--target/arm/tcg/sme.decode88
-rw-r--r--target/arm/tcg/sme_helper.c1179
-rw-r--r--target/arm/tcg/sve.decode (renamed from target/arm/sve.decode)722
-rw-r--r--target/arm/tcg/sve_helper.c7486
-rw-r--r--target/arm/tcg/sve_ldst_internal.h222
-rw-r--r--target/arm/tcg/t16.decode281
-rw-r--r--target/arm/tcg/t32.decode753
-rw-r--r--target/arm/tcg/tlb_helper.c398
-rw-r--r--target/arm/tcg/translate-a32.h167
-rw-r--r--target/arm/tcg/translate-a64.c (renamed from target/arm/translate-a64.c)9402
-rw-r--r--target/arm/tcg/translate-a64.h (renamed from target/arm/translate-a64.h)126
-rw-r--r--target/arm/tcg/translate-m-nocp.c766
-rw-r--r--target/arm/tcg/translate-mve.c2258
-rw-r--r--target/arm/tcg/translate-neon.c3931
-rw-r--r--target/arm/tcg/translate-sme.c343
-rw-r--r--target/arm/tcg/translate-sve.c7422
-rw-r--r--target/arm/tcg/translate-vfp.c3415
-rw-r--r--target/arm/tcg/translate.c9711
-rw-r--r--target/arm/tcg/translate.h738
-rw-r--r--target/arm/tcg/vec_helper.c2636
-rw-r--r--target/arm/tcg/vec_internal.h235
-rw-r--r--target/arm/tcg/vfp-uncond.decode82
-rw-r--r--target/arm/tcg/vfp.decode247
-rw-r--r--target/arm/trace-events14
-rw-r--r--target/arm/trace.h1
-rw-r--r--target/arm/translate-sve.c5483
-rw-r--r--target/arm/translate.c13199
-rw-r--r--target/arm/translate.h193
-rw-r--r--target/arm/vec_helper.c768
-rw-r--r--target/arm/vfp_helper.c1283
105 files changed, 96621 insertions, 46954 deletions
diff --git a/target/arm/Kconfig b/target/arm/Kconfig
new file mode 100644
index 0000000000..bf57d739cd
--- /dev/null
+++ b/target/arm/Kconfig
@@ -0,0 +1,11 @@
+config ARM
+ bool
+ select ARM_COMPATIBLE_SEMIHOSTING if TCG
+
+ # We need to select this until we move m_helper.c and the
+ # translate.c v7m helpers under ARM_V7M.
+ select ARM_V7M if TCG
+
+config AARCH64
+ bool
+ select ARM
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
deleted file mode 100644
index 11c7baf8a3..0000000000
--- a/target/arm/Makefile.objs
+++ /dev/null
@@ -1,22 +0,0 @@
-obj-y += arm-semi.o
-obj-$(CONFIG_SOFTMMU) += machine.o psci.o arch_dump.o monitor.o
-obj-$(CONFIG_KVM) += kvm.o
-obj-$(call land,$(CONFIG_KVM),$(call lnot,$(TARGET_AARCH64))) += kvm32.o
-obj-$(call land,$(CONFIG_KVM),$(TARGET_AARCH64)) += kvm64.o
-obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
-obj-y += translate.o op_helper.o helper.o cpu.o
-obj-y += neon_helper.o iwmmxt_helper.o vec_helper.o
-obj-y += gdbstub.o
-obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o
-obj-y += crypto_helper.o
-obj-$(CONFIG_SOFTMMU) += arm-powerctl.o
-
-DECODETREE = $(SRC_PATH)/scripts/decodetree.py
-
-target/arm/decode-sve.inc.c: $(SRC_PATH)/target/arm/sve.decode $(DECODETREE)
- $(call quiet-command,\
- $(PYTHON) $(DECODETREE) --decode disas_sve -o $@ $<,\
- "GEN", $(TARGET_DIR)$@)
-
-target/arm/translate-sve.o: target/arm/decode-sve.inc.c
-obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index 26a2c09868..06cdf4ba28 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -22,6 +22,7 @@
#include "cpu.h"
#include "elf.h"
#include "sysemu/dump.h"
+#include "cpu-features.h"
/* struct user_pt_regs from arch/arm64/include/uapi/asm/ptrace.h */
struct aarch64_user_regs {
@@ -62,12 +63,23 @@ struct aarch64_user_vfp_state {
QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528);
+/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */
+struct aarch64_user_sve_header {
+ uint32_t size;
+ uint32_t max_size;
+ uint16_t vl;
+ uint16_t max_vl;
+ uint16_t flags;
+ uint16_t reserved;
+} QEMU_PACKED;
+
struct aarch64_note {
Elf64_Nhdr hdr;
char name[8]; /* align_up(sizeof("CORE"), 4) */
union {
struct aarch64_elf_prstatus prstatus;
struct aarch64_user_vfp_state vfp;
+ struct aarch64_user_sve_header sve;
};
} QEMU_PACKED;
@@ -76,6 +88,8 @@ struct aarch64_note {
(AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus))
#define AARCH64_PRFPREG_NOTE_SIZE \
(AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state))
+#define AARCH64_SVE_NOTE_SIZE(env) \
+ (AARCH64_NOTE_HEADER_SIZE + sve_size(env))
static void aarch64_note_init(struct aarch64_note *note, DumpState *s,
const char *name, Elf64_Word namesz,
@@ -101,8 +115,8 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f,
for (i = 0; i < 32; ++i) {
uint64_t *q = aa64_vfp_qreg(env, i);
- note.vfp.vregs[2*i + 0] = cpu_to_dump64(s, q[0]);
- note.vfp.vregs[2*i + 1] = cpu_to_dump64(s, q[1]);
+ note.vfp.vregs[2 * i + 0] = cpu_to_dump64(s, q[0]);
+ note.vfp.vregs[2 * i + 1] = cpu_to_dump64(s, q[1]);
}
if (s->dump_info.d_endian == ELFDATA2MSB) {
@@ -112,8 +126,8 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f,
*/
for (i = 0; i < 32; ++i) {
uint64_t tmp = note.vfp.vregs[2*i];
- note.vfp.vregs[2*i] = note.vfp.vregs[2*i+1];
- note.vfp.vregs[2*i+1] = tmp;
+ note.vfp.vregs[2 * i] = note.vfp.vregs[2 * i + 1];
+ note.vfp.vregs[2 * i + 1] = tmp;
}
}
@@ -128,12 +142,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f,
return 0;
}
+#ifdef TARGET_AARCH64
+static off_t sve_zreg_offset(uint32_t vq, int n)
+{
+ off_t off = sizeof(struct aarch64_user_sve_header);
+ return ROUND_UP(off, 16) + vq * 16 * n;
+}
+
+static off_t sve_preg_offset(uint32_t vq, int n)
+{
+ return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n;
+}
+
+static off_t sve_fpsr_offset(uint32_t vq)
+{
+ off_t off = sve_preg_offset(vq, 17);
+ return ROUND_UP(off, 16);
+}
+
+static off_t sve_fpcr_offset(uint32_t vq)
+{
+ return sve_fpsr_offset(vq) + sizeof(uint32_t);
+}
+
+static uint32_t sve_current_vq(CPUARMState *env)
+{
+ return sve_vqm1_for_el(env, arm_current_el(env)) + 1;
+}
+
+static size_t sve_size_vq(uint32_t vq)
+{
+ off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t);
+ return ROUND_UP(off, 16);
+}
+
+static size_t sve_size(CPUARMState *env)
+{
+ return sve_size_vq(sve_current_vq(env));
+}
+
+static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
+ CPUARMState *env, int cpuid,
+ DumpState *s)
+{
+ struct aarch64_note *note;
+ ARMCPU *cpu = env_archcpu(env);
+ uint32_t vq = sve_current_vq(env);
+ uint64_t tmp[ARM_MAX_VQ * 2], *r;
+ uint32_t fpr;
+ uint8_t *buf;
+ int ret, i;
+
+ note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env));
+ buf = (uint8_t *)&note->sve;
+
+ aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq));
+
+ note->sve.size = cpu_to_dump32(s, sve_size_vq(vq));
+ note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq));
+ note->sve.vl = cpu_to_dump16(s, vq * 16);
+ note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16);
+ note->sve.flags = cpu_to_dump16(s, 1);
+
+ for (i = 0; i < 32; ++i) {
+ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2);
+ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16);
+ }
+
+ for (i = 0; i < 17; ++i) {
+ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0],
+ DIV_ROUND_UP(vq * 2, 8));
+ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8);
+ }
+
+ fpr = cpu_to_dump32(s, vfp_get_fpsr(env));
+ memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t));
+
+ fpr = cpu_to_dump32(s, vfp_get_fpcr(env));
+ memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t));
+
+ ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s);
+ g_free(note);
+
+ if (ret < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+#endif
+
int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
struct aarch64_note note;
- CPUARMState *env = &ARM_CPU(cs)->env;
- DumpState *s = opaque;
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
uint64_t pstate, sp;
int ret, i;
@@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
return -1;
}
- return aarch64_write_elf64_prfpreg(f, env, cpuid, s);
+ ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s);
+ if (ret) {
+ return ret;
+ }
+
+#ifdef TARGET_AARCH64
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ ret = aarch64_write_elf64_sve(f, env, cpuid, s);
+ }
+#endif
+
+ return ret;
}
/* struct pt_regs from arch/arm/include/asm/ptrace.h */
@@ -245,12 +360,13 @@ static int arm_write_elf32_vfp(WriteCoreDumpFunction f, CPUARMState *env,
}
int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque)
+ int cpuid, DumpState *s)
{
struct arm_note note;
- CPUARMState *env = &ARM_CPU(cs)->env;
- DumpState *s = opaque;
- int ret, i, fpvalid = !!arm_feature(env, ARM_FEATURE_VFP);
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ int ret, i;
+ bool fpvalid = cpu_isar_feature(aa32_vfp_simd, cpu);
arm_note_init(&note, s, "CORE", 5, NT_PRSTATUS, sizeof(note.prstatus));
@@ -329,15 +445,19 @@ int cpu_get_dump_info(ArchDumpInfo *info,
ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
{
ARMCPU *cpu = ARM_CPU(first_cpu);
- CPUARMState *env = &cpu->env;
size_t note_size;
if (class == ELFCLASS64) {
note_size = AARCH64_PRSTATUS_NOTE_SIZE;
note_size += AARCH64_PRFPREG_NOTE_SIZE;
+#ifdef TARGET_AARCH64
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ note_size += AARCH64_SVE_NOTE_SIZE(&cpu->env);
+ }
+#endif
} else {
note_size = ARM_PRSTATUS_NOTE_SIZE;
- if (arm_feature(env, ARM_FEATURE_VFP)) {
+ if (cpu_isar_feature(aa32_vfp_simd, cpu)) {
note_size += ARM_VFP_NOTE_SIZE;
}
}
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index 2b856930fb..2b2055c6ac 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -15,6 +15,8 @@
#include "arm-powerctl.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
+#include "sysemu/tcg.h"
+#include "target/arm/multiprocessing.h"
#ifndef DEBUG_ARM_POWERCTL
#define DEBUG_ARM_POWERCTL 0
@@ -36,7 +38,7 @@ CPUState *arm_get_cpu_by_id(uint64_t id)
CPU_FOREACH(cpu) {
ARMCPU *armcpu = ARM_CPU(cpu);
- if (armcpu->mp_affinity == id) {
+ if (arm_cpu_mp_affinity(armcpu) == id) {
return cpu;
}
}
@@ -64,67 +66,21 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state,
/* Initialize the cpu we are turning on */
cpu_reset(target_cpu_state);
+ arm_emulate_firmware_reset(target_cpu_state, info->target_el);
target_cpu_state->halted = 0;
- if (info->target_aa64) {
- if ((info->target_el < 3) && arm_feature(&target_cpu->env,
- ARM_FEATURE_EL3)) {
- /*
- * As target mode is AArch64, we need to set lower
- * exception level (the requested level 2) to AArch64
- */
- target_cpu->env.cp15.scr_el3 |= SCR_RW;
- }
-
- if ((info->target_el < 2) && arm_feature(&target_cpu->env,
- ARM_FEATURE_EL2)) {
- /*
- * As target mode is AArch64, we need to set lower
- * exception level (the requested level 1) to AArch64
- */
- target_cpu->env.cp15.hcr_el2 |= HCR_RW;
- }
-
- target_cpu->env.pstate = aarch64_pstate_mode(info->target_el, true);
- } else {
- /* We are requested to boot in AArch32 mode */
- static const uint32_t mode_for_el[] = { 0,
- ARM_CPU_MODE_SVC,
- ARM_CPU_MODE_HYP,
- ARM_CPU_MODE_SVC };
-
- cpsr_write(&target_cpu->env, mode_for_el[info->target_el], CPSR_M,
- CPSRWriteRaw);
- }
-
- if (info->target_el == 3) {
- /* Processor is in secure mode */
- target_cpu->env.cp15.scr_el3 &= ~SCR_NS;
- } else {
- /* Processor is not in secure mode */
- target_cpu->env.cp15.scr_el3 |= SCR_NS;
-
- /*
- * If QEMU is providing the equivalent of EL3 firmware, then we need
- * to make sure a CPU targeting EL2 comes out of reset with a
- * functional HVC insn.
- */
- if (arm_feature(&target_cpu->env, ARM_FEATURE_EL3)
- && info->target_el == 2) {
- target_cpu->env.cp15.scr_el3 |= SCR_HCE;
- }
- }
-
/* We check if the started CPU is now at the correct level */
assert(info->target_el == arm_current_el(&target_cpu->env));
if (info->target_aa64) {
target_cpu->env.xregs[0] = info->context_id;
- target_cpu->env.thumb = false;
} else {
target_cpu->env.regs[0] = info->context_id;
- target_cpu->env.thumb = info->entry & 1;
- info->entry &= 0xfffffffe;
+ }
+
+ if (tcg_enabled()) {
+ /* CP15 update requires rebuilding hflags */
+ arm_rebuild_hflags(&target_cpu->env);
}
/* Start the new CPU at the requested address */
@@ -133,7 +89,7 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state,
g_free(info);
/* Finally set the power status */
- assert(qemu_mutex_iothread_locked());
+ assert(bql_locked());
target_cpu->power_state = PSCI_ON;
}
@@ -144,7 +100,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
ARMCPU *target_cpu;
struct CpuOnInfo *info;
- assert(qemu_mutex_iothread_locked());
+ assert(bql_locked());
DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64
"\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry,
@@ -231,12 +187,68 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
return QEMU_ARM_POWERCTL_RET_SUCCESS;
}
+static void arm_set_cpu_on_and_reset_async_work(CPUState *target_cpu_state,
+ run_on_cpu_data data)
+{
+ ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
+
+ /* Initialize the cpu we are turning on */
+ cpu_reset(target_cpu_state);
+ target_cpu_state->halted = 0;
+
+ /* Finally set the power status */
+ assert(bql_locked());
+ target_cpu->power_state = PSCI_ON;
+}
+
+int arm_set_cpu_on_and_reset(uint64_t cpuid)
+{
+ CPUState *target_cpu_state;
+ ARMCPU *target_cpu;
+
+ assert(bql_locked());
+
+ /* Retrieve the cpu we are powering up */
+ target_cpu_state = arm_get_cpu_by_id(cpuid);
+ if (!target_cpu_state) {
+ /* The cpu was not found */
+ return QEMU_ARM_POWERCTL_INVALID_PARAM;
+ }
+
+ target_cpu = ARM_CPU(target_cpu_state);
+ if (target_cpu->power_state == PSCI_ON) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "[ARM]%s: CPU %" PRId64 " is already on\n",
+ __func__, cpuid);
+ return QEMU_ARM_POWERCTL_ALREADY_ON;
+ }
+
+ /*
+ * If another CPU has powered the target on we are in the state
+ * ON_PENDING and additional attempts to power on the CPU should
+ * fail (see 6.6 Implementation CPU_ON/CPU_OFF races in the PSCI
+ * spec)
+ */
+ if (target_cpu->power_state == PSCI_ON_PENDING) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "[ARM]%s: CPU %" PRId64 " is already powering on\n",
+ __func__, cpuid);
+ return QEMU_ARM_POWERCTL_ON_PENDING;
+ }
+
+ async_run_on_cpu(target_cpu_state, arm_set_cpu_on_and_reset_async_work,
+ RUN_ON_CPU_NULL);
+
+ /* We are good to go */
+ return QEMU_ARM_POWERCTL_RET_SUCCESS;
+}
+
static void arm_set_cpu_off_async_work(CPUState *target_cpu_state,
run_on_cpu_data data)
{
ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
- assert(qemu_mutex_iothread_locked());
+ assert(bql_locked());
target_cpu->power_state = PSCI_OFF;
target_cpu_state->halted = 1;
target_cpu_state->exception_index = EXCP_HLT;
@@ -247,7 +259,7 @@ int arm_set_cpu_off(uint64_t cpuid)
CPUState *target_cpu_state;
ARMCPU *target_cpu;
- assert(qemu_mutex_iothread_locked());
+ assert(bql_locked());
DPRINTF("cpu %" PRId64 "\n", cpuid);
@@ -283,7 +295,7 @@ int arm_reset_cpu(uint64_t cpuid)
CPUState *target_cpu_state;
ARMCPU *target_cpu;
- assert(qemu_mutex_iothread_locked());
+ assert(bql_locked());
DPRINTF("cpu %" PRId64 "\n", cpuid);
diff --git a/target/arm/arm-powerctl.h b/target/arm/arm-powerctl.h
index 04353923c0..37c8a04f0a 100644
--- a/target/arm/arm-powerctl.h
+++ b/target/arm/arm-powerctl.h
@@ -74,4 +74,20 @@ int arm_set_cpu_off(uint64_t cpuid);
*/
int arm_reset_cpu(uint64_t cpuid);
+/*
+ * arm_set_cpu_on_and_reset:
+ * @cpuid: the id of the CPU we want to star
+ *
+ * Start the cpu designated by @cpuid and put it through its normal
+ * CPU reset process. The CPU will start in the way it is architected
+ * to start after a power-on reset.
+ *
+ * Returns: QEMU_ARM_POWERCTL_RET_SUCCESS on success.
+ * QEMU_ARM_POWERCTL_INVALID_PARAM if there is no CPU with that ID.
+ * QEMU_ARM_POWERCTL_ALREADY_ON if the CPU is already on.
+ * QEMU_ARM_POWERCTL_ON_PENDING if the CPU is already partway through
+ * powering on.
+ */
+int arm_set_cpu_on_and_reset(uint64_t cpuid);
+
#endif
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
new file mode 100644
index 0000000000..3cc8cc738b
--- /dev/null
+++ b/target/arm/arm-qmp-cmds.c
@@ -0,0 +1,248 @@
+/*
+ * QEMU monitor.c for ARM.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/boards.h"
+#include "kvm_arm.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qapi-commands-machine-target.h"
+#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qmp/qdict.h"
+#include "qom/qom-qobject.h"
+
+static GICCapability *gic_cap_new(int version)
+{
+ GICCapability *cap = g_new0(GICCapability, 1);
+ cap->version = version;
+ /* by default, support none */
+ cap->emulated = false;
+ cap->kernel = false;
+ return cap;
+}
+
+static inline void gic_cap_kvm_probe(GICCapability *v2, GICCapability *v3)
+{
+#ifdef CONFIG_KVM
+ int fdarray[3];
+
+ if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, NULL)) {
+ return;
+ }
+
+ /* Test KVM GICv2 */
+ if (kvm_device_supported(fdarray[1], KVM_DEV_TYPE_ARM_VGIC_V2)) {
+ v2->kernel = true;
+ }
+
+ /* Test KVM GICv3 */
+ if (kvm_device_supported(fdarray[1], KVM_DEV_TYPE_ARM_VGIC_V3)) {
+ v3->kernel = true;
+ }
+
+ kvm_arm_destroy_scratch_host_vcpu(fdarray);
+#endif
+}
+
+GICCapabilityList *qmp_query_gic_capabilities(Error **errp)
+{
+ GICCapabilityList *head = NULL;
+ GICCapability *v2 = gic_cap_new(2), *v3 = gic_cap_new(3);
+
+ v2->emulated = true;
+ v3->emulated = true;
+
+ gic_cap_kvm_probe(v2, v3);
+
+ QAPI_LIST_PREPEND(head, v2);
+ QAPI_LIST_PREPEND(head, v3);
+
+ return head;
+}
+
+QEMU_BUILD_BUG_ON(ARM_MAX_VQ > 16);
+
+/*
+ * These are cpu model features we want to advertise. The order here
+ * matters as this is the order in which qmp_query_cpu_model_expansion
+ * will attempt to set them. If there are dependencies between features,
+ * then the order that considers those dependencies must be used.
+ */
+static const char *cpu_model_advertised_features[] = {
+ "aarch64", "pmu", "sve",
+ "sve128", "sve256", "sve384", "sve512",
+ "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280",
+ "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048",
+ "kvm-no-adjvtime", "kvm-steal-time",
+ "pauth", "pauth-impdef", "pauth-qarma3",
+ NULL
+};
+
+CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
+ CpuModelInfo *model,
+ Error **errp)
+{
+ CpuModelExpansionInfo *expansion_info;
+ const QDict *qdict_in;
+ QDict *qdict_out;
+ ObjectClass *oc;
+ Object *obj;
+ const char *name;
+ int i;
+
+ if (type != CPU_MODEL_EXPANSION_TYPE_FULL) {
+ error_setg(errp, "The requested expansion type is not supported");
+ return NULL;
+ }
+
+ if (!kvm_enabled() && !strcmp(model->name, "host")) {
+ error_setg(errp, "The CPU type '%s' requires KVM", model->name);
+ return NULL;
+ }
+
+ oc = cpu_class_by_name(TYPE_ARM_CPU, model->name);
+ if (!oc) {
+ error_setg(errp, "The CPU type '%s' is not a recognized ARM CPU type",
+ model->name);
+ return NULL;
+ }
+
+ if (kvm_enabled()) {
+ bool supported = false;
+
+ if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) {
+ /* These are kvmarm's recommended cpu types */
+ supported = true;
+ } else if (current_machine->cpu_type) {
+ const char *cpu_type = current_machine->cpu_type;
+ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX);
+
+ if (strlen(model->name) == len &&
+ !strncmp(model->name, cpu_type, len)) {
+ /* KVM is enabled and we're using this type, so it works. */
+ supported = true;
+ }
+ }
+ if (!supported) {
+ error_setg(errp, "We cannot guarantee the CPU type '%s' works "
+ "with KVM on this host", model->name);
+ return NULL;
+ }
+ }
+
+ obj = object_new(object_class_get_name(oc));
+
+ if (model->props) {
+ Visitor *visitor;
+ Error *err = NULL;
+
+ visitor = qobject_input_visitor_new(model->props);
+ if (!visit_start_struct(visitor, "model.props", NULL, 0, errp)) {
+ visit_free(visitor);
+ object_unref(obj);
+ return NULL;
+ }
+
+ qdict_in = qobject_to(QDict, model->props);
+ i = 0;
+ while ((name = cpu_model_advertised_features[i++]) != NULL) {
+ if (qdict_get(qdict_in, name)) {
+ if (!object_property_set(obj, name, visitor, &err)) {
+ break;
+ }
+ }
+ }
+
+ if (!err) {
+ visit_check_struct(visitor, &err);
+ }
+ if (!err) {
+ arm_cpu_finalize_features(ARM_CPU(obj), &err);
+ }
+ visit_end_struct(visitor, NULL);
+ visit_free(visitor);
+ if (err) {
+ object_unref(obj);
+ error_propagate(errp, err);
+ return NULL;
+ }
+ } else {
+ arm_cpu_finalize_features(ARM_CPU(obj), &error_abort);
+ }
+
+ expansion_info = g_new0(CpuModelExpansionInfo, 1);
+ expansion_info->model = g_malloc0(sizeof(*expansion_info->model));
+ expansion_info->model->name = g_strdup(model->name);
+
+ qdict_out = qdict_new();
+
+ i = 0;
+ while ((name = cpu_model_advertised_features[i++]) != NULL) {
+ ObjectProperty *prop = object_property_find(obj, name);
+ if (prop) {
+ QObject *value;
+
+ assert(prop->get);
+ value = object_property_get_qobject(obj, name, &error_abort);
+
+ qdict_put_obj(qdict_out, name, value);
+ }
+ }
+
+ if (!qdict_size(qdict_out)) {
+ qobject_unref(qdict_out);
+ } else {
+ expansion_info->model->props = QOBJECT(qdict_out);
+ }
+
+ object_unref(obj);
+
+ return expansion_info;
+}
+
+static void arm_cpu_add_definition(gpointer data, gpointer user_data)
+{
+ ObjectClass *oc = data;
+ CpuDefinitionInfoList **cpu_list = user_data;
+ CpuDefinitionInfo *info;
+ const char *typename;
+
+ typename = object_class_get_name(oc);
+ info = g_malloc0(sizeof(*info));
+ info->name = cpu_model_from_type(typename);
+ info->q_typename = g_strdup(typename);
+
+ QAPI_LIST_PREPEND(*cpu_list, info);
+}
+
+CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
+{
+ CpuDefinitionInfoList *cpu_list = NULL;
+ GSList *list;
+
+ list = object_class_get_list(TYPE_ARM_CPU, false);
+ g_slist_foreach(list, arm_cpu_add_definition, &cpu_list);
+ g_slist_free(list);
+
+ return cpu_list;
+}
diff --git a/target/arm/arm-semi.c b/target/arm/arm-semi.c
deleted file mode 100644
index b2b22d231e..0000000000
--- a/target/arm/arm-semi.c
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
- * Arm "Angel" semihosting syscalls
- *
- * Copyright (c) 2005, 2007 CodeSourcery.
- * Written by Paul Brook.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "exec/semihost.h"
-#ifdef CONFIG_USER_ONLY
-#include "qemu.h"
-
-#define ARM_ANGEL_HEAP_SIZE (128 * 1024 * 1024)
-#else
-#include "qemu-common.h"
-#include "exec/gdbstub.h"
-#include "hw/arm/arm.h"
-#include "qemu/cutils.h"
-#endif
-
-#define TARGET_SYS_OPEN 0x01
-#define TARGET_SYS_CLOSE 0x02
-#define TARGET_SYS_WRITEC 0x03
-#define TARGET_SYS_WRITE0 0x04
-#define TARGET_SYS_WRITE 0x05
-#define TARGET_SYS_READ 0x06
-#define TARGET_SYS_READC 0x07
-#define TARGET_SYS_ISTTY 0x09
-#define TARGET_SYS_SEEK 0x0a
-#define TARGET_SYS_FLEN 0x0c
-#define TARGET_SYS_TMPNAM 0x0d
-#define TARGET_SYS_REMOVE 0x0e
-#define TARGET_SYS_RENAME 0x0f
-#define TARGET_SYS_CLOCK 0x10
-#define TARGET_SYS_TIME 0x11
-#define TARGET_SYS_SYSTEM 0x12
-#define TARGET_SYS_ERRNO 0x13
-#define TARGET_SYS_GET_CMDLINE 0x15
-#define TARGET_SYS_HEAPINFO 0x16
-#define TARGET_SYS_EXIT 0x18
-#define TARGET_SYS_SYNCCACHE 0x19
-
-/* ADP_Stopped_ApplicationExit is used for exit(0),
- * anything else is implemented as exit(1) */
-#define ADP_Stopped_ApplicationExit (0x20026)
-
-#ifndef O_BINARY
-#define O_BINARY 0
-#endif
-
-#define GDB_O_RDONLY 0x000
-#define GDB_O_WRONLY 0x001
-#define GDB_O_RDWR 0x002
-#define GDB_O_APPEND 0x008
-#define GDB_O_CREAT 0x200
-#define GDB_O_TRUNC 0x400
-#define GDB_O_BINARY 0
-
-static int gdb_open_modeflags[12] = {
- GDB_O_RDONLY,
- GDB_O_RDONLY | GDB_O_BINARY,
- GDB_O_RDWR,
- GDB_O_RDWR | GDB_O_BINARY,
- GDB_O_WRONLY | GDB_O_CREAT | GDB_O_TRUNC,
- GDB_O_WRONLY | GDB_O_CREAT | GDB_O_TRUNC | GDB_O_BINARY,
- GDB_O_RDWR | GDB_O_CREAT | GDB_O_TRUNC,
- GDB_O_RDWR | GDB_O_CREAT | GDB_O_TRUNC | GDB_O_BINARY,
- GDB_O_WRONLY | GDB_O_CREAT | GDB_O_APPEND,
- GDB_O_WRONLY | GDB_O_CREAT | GDB_O_APPEND | GDB_O_BINARY,
- GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND,
- GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND | GDB_O_BINARY
-};
-
-static int open_modeflags[12] = {
- O_RDONLY,
- O_RDONLY | O_BINARY,
- O_RDWR,
- O_RDWR | O_BINARY,
- O_WRONLY | O_CREAT | O_TRUNC,
- O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
- O_RDWR | O_CREAT | O_TRUNC,
- O_RDWR | O_CREAT | O_TRUNC | O_BINARY,
- O_WRONLY | O_CREAT | O_APPEND,
- O_WRONLY | O_CREAT | O_APPEND | O_BINARY,
- O_RDWR | O_CREAT | O_APPEND,
- O_RDWR | O_CREAT | O_APPEND | O_BINARY
-};
-
-#ifdef CONFIG_USER_ONLY
-static inline uint32_t set_swi_errno(TaskState *ts, uint32_t code)
-{
- if (code == (uint32_t)-1)
- ts->swi_errno = errno;
- return code;
-}
-#else
-static inline uint32_t set_swi_errno(CPUARMState *env, uint32_t code)
-{
- return code;
-}
-
-#include "exec/softmmu-semi.h"
-#endif
-
-static target_ulong arm_semi_syscall_len;
-
-#if !defined(CONFIG_USER_ONLY)
-static target_ulong syscall_err;
-#endif
-
-static void arm_semi_cb(CPUState *cs, target_ulong ret, target_ulong err)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
-#ifdef CONFIG_USER_ONLY
- TaskState *ts = cs->opaque;
-#endif
- target_ulong reg0 = is_a64(env) ? env->xregs[0] : env->regs[0];
-
- if (ret == (target_ulong)-1) {
-#ifdef CONFIG_USER_ONLY
- ts->swi_errno = err;
-#else
- syscall_err = err;
-#endif
- reg0 = ret;
- } else {
- /* Fixup syscalls that use nonstardard return conventions. */
- switch (reg0) {
- case TARGET_SYS_WRITE:
- case TARGET_SYS_READ:
- reg0 = arm_semi_syscall_len - ret;
- break;
- case TARGET_SYS_SEEK:
- reg0 = 0;
- break;
- default:
- reg0 = ret;
- break;
- }
- }
- if (is_a64(env)) {
- env->xregs[0] = reg0;
- } else {
- env->regs[0] = reg0;
- }
-}
-
-static target_ulong arm_flen_buf(ARMCPU *cpu)
-{
- /* Return an address in target memory of 64 bytes where the remote
- * gdb should write its stat struct. (The format of this structure
- * is defined by GDB's remote protocol and is not target-specific.)
- * We put this on the guest's stack just below SP.
- */
- CPUARMState *env = &cpu->env;
- target_ulong sp;
-
- if (is_a64(env)) {
- sp = env->xregs[31];
- } else {
- sp = env->regs[13];
- }
-
- return sp - 64;
-}
-
-static void arm_semi_flen_cb(CPUState *cs, target_ulong ret, target_ulong err)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- /* The size is always stored in big-endian order, extract
- the value. We assume the size always fit in 32 bits. */
- uint32_t size;
- cpu_memory_rw_debug(cs, arm_flen_buf(cpu) + 32, (uint8_t *)&size, 4, 0);
- size = be32_to_cpu(size);
- if (is_a64(env)) {
- env->xregs[0] = size;
- } else {
- env->regs[0] = size;
- }
-#ifdef CONFIG_USER_ONLY
- ((TaskState *)cs->opaque)->swi_errno = err;
-#else
- syscall_err = err;
-#endif
-}
-
-static target_ulong arm_gdb_syscall(ARMCPU *cpu, gdb_syscall_complete_cb cb,
- const char *fmt, ...)
-{
- va_list va;
- CPUARMState *env = &cpu->env;
-
- va_start(va, fmt);
- gdb_do_syscallv(cb, fmt, va);
- va_end(va);
-
- /* FIXME: we are implicitly relying on the syscall completing
- * before this point, which is not guaranteed. We should
- * put in an explicit synchronization between this and
- * the callback function.
- */
-
- return is_a64(env) ? env->xregs[0] : env->regs[0];
-}
-
-/* Read the input value from the argument block; fail the semihosting
- * call if the memory read fails.
- */
-#define GET_ARG(n) do { \
- if (is_a64(env)) { \
- if (get_user_u64(arg ## n, args + (n) * 8)) { \
- return -1; \
- } \
- } else { \
- if (get_user_u32(arg ## n, args + (n) * 4)) { \
- return -1; \
- } \
- } \
-} while (0)
-
-#define SET_ARG(n, val) \
- (is_a64(env) ? \
- put_user_u64(val, args + (n) * 8) : \
- put_user_u32(val, args + (n) * 4))
-
-target_ulong do_arm_semihosting(CPUARMState *env)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
- target_ulong args;
- target_ulong arg0, arg1, arg2, arg3;
- char * s;
- int nr;
- uint32_t ret;
- uint32_t len;
-#ifdef CONFIG_USER_ONLY
- TaskState *ts = cs->opaque;
-#else
- CPUARMState *ts = env;
-#endif
-
- if (is_a64(env)) {
- /* Note that the syscall number is in W0, not X0 */
- nr = env->xregs[0] & 0xffffffffU;
- args = env->xregs[1];
- } else {
- nr = env->regs[0];
- args = env->regs[1];
- }
-
- switch (nr) {
- case TARGET_SYS_OPEN:
- GET_ARG(0);
- GET_ARG(1);
- GET_ARG(2);
- s = lock_user_string(arg0);
- if (!s) {
- /* FIXME - should this error code be -TARGET_EFAULT ? */
- return (uint32_t)-1;
- }
- if (arg1 >= 12) {
- unlock_user(s, arg0, 0);
- return (uint32_t)-1;
- }
- if (strcmp(s, ":tt") == 0) {
- int result_fileno = arg1 < 4 ? STDIN_FILENO : STDOUT_FILENO;
- unlock_user(s, arg0, 0);
- return result_fileno;
- }
- if (use_gdb_syscalls()) {
- ret = arm_gdb_syscall(cpu, arm_semi_cb, "open,%s,%x,1a4", arg0,
- (int)arg2+1, gdb_open_modeflags[arg1]);
- } else {
- ret = set_swi_errno(ts, open(s, open_modeflags[arg1], 0644));
- }
- unlock_user(s, arg0, 0);
- return ret;
- case TARGET_SYS_CLOSE:
- GET_ARG(0);
- if (use_gdb_syscalls()) {
- return arm_gdb_syscall(cpu, arm_semi_cb, "close,%x", arg0);
- } else {
- return set_swi_errno(ts, close(arg0));
- }
- case TARGET_SYS_WRITEC:
- {
- char c;
-
- if (get_user_u8(c, args))
- /* FIXME - should this error code be -TARGET_EFAULT ? */
- return (uint32_t)-1;
- /* Write to debug console. stderr is near enough. */
- if (use_gdb_syscalls()) {
- return arm_gdb_syscall(cpu, arm_semi_cb, "write,2,%x,1", args);
- } else {
- return write(STDERR_FILENO, &c, 1);
- }
- }
- case TARGET_SYS_WRITE0:
- if (!(s = lock_user_string(args)))
- /* FIXME - should this error code be -TARGET_EFAULT ? */
- return (uint32_t)-1;
- len = strlen(s);
- if (use_gdb_syscalls()) {
- return arm_gdb_syscall(cpu, arm_semi_cb, "write,2,%x,%x",
- args, len);
- } else {
- ret = write(STDERR_FILENO, s, len);
- }
- unlock_user(s, args, 0);
- return ret;
- case TARGET_SYS_WRITE:
- GET_ARG(0);
- GET_ARG(1);
- GET_ARG(2);
- len = arg2;
- if (use_gdb_syscalls()) {
- arm_semi_syscall_len = len;
- return arm_gdb_syscall(cpu, arm_semi_cb, "write,%x,%x,%x",
- arg0, arg1, len);
- } else {
- s = lock_user(VERIFY_READ, arg1, len, 1);
- if (!s) {
- /* FIXME - should this error code be -TARGET_EFAULT ? */
- return (uint32_t)-1;
- }
- ret = set_swi_errno(ts, write(arg0, s, len));
- unlock_user(s, arg1, 0);
- if (ret == (uint32_t)-1)
- return -1;
- return len - ret;
- }
- case TARGET_SYS_READ:
- GET_ARG(0);
- GET_ARG(1);
- GET_ARG(2);
- len = arg2;
- if (use_gdb_syscalls()) {
- arm_semi_syscall_len = len;
- return arm_gdb_syscall(cpu, arm_semi_cb, "read,%x,%x,%x",
- arg0, arg1, len);
- } else {
- s = lock_user(VERIFY_WRITE, arg1, len, 0);
- if (!s) {
- /* FIXME - should this error code be -TARGET_EFAULT ? */
- return (uint32_t)-1;
- }
- do {
- ret = set_swi_errno(ts, read(arg0, s, len));
- } while (ret == -1 && errno == EINTR);
- unlock_user(s, arg1, len);
- if (ret == (uint32_t)-1)
- return -1;
- return len - ret;
- }
- case TARGET_SYS_READC:
- /* XXX: Read from debug console. Not implemented. */
- return 0;
- case TARGET_SYS_ISTTY:
- GET_ARG(0);
- if (use_gdb_syscalls()) {
- return arm_gdb_syscall(cpu, arm_semi_cb, "isatty,%x", arg0);
- } else {
- return isatty(arg0);
- }
- case TARGET_SYS_SEEK:
- GET_ARG(0);
- GET_ARG(1);
- if (use_gdb_syscalls()) {
- return arm_gdb_syscall(cpu, arm_semi_cb, "lseek,%x,%x,0",
- arg0, arg1);
- } else {
- ret = set_swi_errno(ts, lseek(arg0, arg1, SEEK_SET));
- if (ret == (uint32_t)-1)
- return -1;
- return 0;
- }
- case TARGET_SYS_FLEN:
- GET_ARG(0);
- if (use_gdb_syscalls()) {
- return arm_gdb_syscall(cpu, arm_semi_flen_cb, "fstat,%x,%x",
- arg0, arm_flen_buf(cpu));
- } else {
- struct stat buf;
- ret = set_swi_errno(ts, fstat(arg0, &buf));
- if (ret == (uint32_t)-1)
- return -1;
- return buf.st_size;
- }
- case TARGET_SYS_TMPNAM:
- /* XXX: Not implemented. */
- return -1;
- case TARGET_SYS_REMOVE:
- GET_ARG(0);
- GET_ARG(1);
- if (use_gdb_syscalls()) {
- ret = arm_gdb_syscall(cpu, arm_semi_cb, "unlink,%s",
- arg0, (int)arg1+1);
- } else {
- s = lock_user_string(arg0);
- if (!s) {
- /* FIXME - should this error code be -TARGET_EFAULT ? */
- return (uint32_t)-1;
- }
- ret = set_swi_errno(ts, remove(s));
- unlock_user(s, arg0, 0);
- }
- return ret;
- case TARGET_SYS_RENAME:
- GET_ARG(0);
- GET_ARG(1);
- GET_ARG(2);
- GET_ARG(3);
- if (use_gdb_syscalls()) {
- return arm_gdb_syscall(cpu, arm_semi_cb, "rename,%s,%s",
- arg0, (int)arg1+1, arg2, (int)arg3+1);
- } else {
- char *s2;
- s = lock_user_string(arg0);
- s2 = lock_user_string(arg2);
- if (!s || !s2)
- /* FIXME - should this error code be -TARGET_EFAULT ? */
- ret = (uint32_t)-1;
- else
- ret = set_swi_errno(ts, rename(s, s2));
- if (s2)
- unlock_user(s2, arg2, 0);
- if (s)
- unlock_user(s, arg0, 0);
- return ret;
- }
- case TARGET_SYS_CLOCK:
- return clock() / (CLOCKS_PER_SEC / 100);
- case TARGET_SYS_TIME:
- return set_swi_errno(ts, time(NULL));
- case TARGET_SYS_SYSTEM:
- GET_ARG(0);
- GET_ARG(1);
- if (use_gdb_syscalls()) {
- return arm_gdb_syscall(cpu, arm_semi_cb, "system,%s",
- arg0, (int)arg1+1);
- } else {
- s = lock_user_string(arg0);
- if (!s) {
- /* FIXME - should this error code be -TARGET_EFAULT ? */
- return (uint32_t)-1;
- }
- ret = set_swi_errno(ts, system(s));
- unlock_user(s, arg0, 0);
- return ret;
- }
- case TARGET_SYS_ERRNO:
-#ifdef CONFIG_USER_ONLY
- return ts->swi_errno;
-#else
- return syscall_err;
-#endif
- case TARGET_SYS_GET_CMDLINE:
- {
- /* Build a command-line from the original argv.
- *
- * The inputs are:
- * * arg0, pointer to a buffer of at least the size
- * specified in arg1.
- * * arg1, size of the buffer pointed to by arg0 in
- * bytes.
- *
- * The outputs are:
- * * arg0, pointer to null-terminated string of the
- * command line.
- * * arg1, length of the string pointed to by arg0.
- */
-
- char *output_buffer;
- size_t input_size;
- size_t output_size;
- int status = 0;
-#if !defined(CONFIG_USER_ONLY)
- const char *cmdline;
-#endif
- GET_ARG(0);
- GET_ARG(1);
- input_size = arg1;
- /* Compute the size of the output string. */
-#if !defined(CONFIG_USER_ONLY)
- cmdline = semihosting_get_cmdline();
- if (cmdline == NULL) {
- cmdline = ""; /* Default to an empty line. */
- }
- output_size = strlen(cmdline) + 1; /* Count terminating 0. */
-#else
- unsigned int i;
-
- output_size = ts->info->arg_end - ts->info->arg_start;
- if (!output_size) {
- /* We special-case the "empty command line" case (argc==0).
- Just provide the terminating 0. */
- output_size = 1;
- }
-#endif
-
- if (output_size > input_size) {
- /* Not enough space to store command-line arguments. */
- return -1;
- }
-
- /* Adjust the command-line length. */
- if (SET_ARG(1, output_size - 1)) {
- /* Couldn't write back to argument block */
- return -1;
- }
-
- /* Lock the buffer on the ARM side. */
- output_buffer = lock_user(VERIFY_WRITE, arg0, output_size, 0);
- if (!output_buffer) {
- return -1;
- }
-
- /* Copy the command-line arguments. */
-#if !defined(CONFIG_USER_ONLY)
- pstrcpy(output_buffer, output_size, cmdline);
-#else
- if (output_size == 1) {
- /* Empty command-line. */
- output_buffer[0] = '\0';
- goto out;
- }
-
- if (copy_from_user(output_buffer, ts->info->arg_start,
- output_size)) {
- status = -1;
- goto out;
- }
-
- /* Separate arguments by white spaces. */
- for (i = 0; i < output_size - 1; i++) {
- if (output_buffer[i] == 0) {
- output_buffer[i] = ' ';
- }
- }
- out:
-#endif
- /* Unlock the buffer on the ARM side. */
- unlock_user(output_buffer, arg0, output_size);
-
- return status;
- }
- case TARGET_SYS_HEAPINFO:
- {
- target_ulong retvals[4];
- target_ulong limit;
- int i;
-
- GET_ARG(0);
-
-#ifdef CONFIG_USER_ONLY
- /* Some C libraries assume the heap immediately follows .bss, so
- allocate it using sbrk. */
- if (!ts->heap_limit) {
- abi_ulong ret;
-
- ts->heap_base = do_brk(0);
- limit = ts->heap_base + ARM_ANGEL_HEAP_SIZE;
- /* Try a big heap, and reduce the size if that fails. */
- for (;;) {
- ret = do_brk(limit);
- if (ret >= limit) {
- break;
- }
- limit = (ts->heap_base >> 1) + (limit >> 1);
- }
- ts->heap_limit = limit;
- }
-
- retvals[0] = ts->heap_base;
- retvals[1] = ts->heap_limit;
- retvals[2] = ts->stack_base;
- retvals[3] = 0; /* Stack limit. */
-#else
- limit = ram_size;
- /* TODO: Make this use the limit of the loaded application. */
- retvals[0] = limit / 2;
- retvals[1] = limit;
- retvals[2] = limit; /* Stack base */
- retvals[3] = 0; /* Stack limit. */
-#endif
-
- for (i = 0; i < ARRAY_SIZE(retvals); i++) {
- bool fail;
-
- if (is_a64(env)) {
- fail = put_user_u64(retvals[i], arg0 + i * 8);
- } else {
- fail = put_user_u32(retvals[i], arg0 + i * 4);
- }
-
- if (fail) {
- /* Couldn't write back to argument block */
- return -1;
- }
- }
- return 0;
- }
- case TARGET_SYS_EXIT:
- if (is_a64(env)) {
- /* The A64 version of this call takes a parameter block,
- * so the application-exit type can return a subcode which
- * is the exit status code from the application.
- */
- GET_ARG(0);
- GET_ARG(1);
-
- if (arg0 == ADP_Stopped_ApplicationExit) {
- ret = arg1;
- } else {
- ret = 1;
- }
- } else {
- /* ARM specifies only Stopped_ApplicationExit as normal
- * exit, everything else is considered an error */
- ret = (args == ADP_Stopped_ApplicationExit) ? 0 : 1;
- }
- gdb_exit(env, ret);
- exit(ret);
- case TARGET_SYS_SYNCCACHE:
- /* Clean the D-cache and invalidate the I-cache for the specified
- * virtual address range. This is a nop for us since we don't
- * implement caches. This is only present on A64.
- */
- if (is_a64(env)) {
- return 0;
- }
- /* fall through -- invalid for A32/T32 */
- default:
- fprintf(stderr, "qemu: Unsupported SemiHosting SWI 0x%02x\n", nr);
- cpu_dump_state(cs, stderr, fprintf, 0);
- abort();
- }
-}
diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.h
new file mode 100644
index 0000000000..da51f2d7f5
--- /dev/null
+++ b/target/arm/common-semi-target.h
@@ -0,0 +1,60 @@
+/*
+ * Target-specific parts of semihosting/arm-compat-semi.c.
+ *
+ * Copyright (c) 2005, 2007 CodeSourcery.
+ * Copyright (c) 2019, 2022 Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef TARGET_ARM_COMMON_SEMI_TARGET_H
+#define TARGET_ARM_COMMON_SEMI_TARGET_H
+
+#include "target/arm/cpu-qom.h"
+
+static inline target_ulong common_semi_arg(CPUState *cs, int argno)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ if (is_a64(env)) {
+ return env->xregs[argno];
+ } else {
+ return env->regs[argno];
+ }
+}
+
+static inline void common_semi_set_ret(CPUState *cs, target_ulong ret)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ if (is_a64(env)) {
+ env->xregs[0] = ret;
+ } else {
+ env->regs[0] = ret;
+ }
+}
+
+static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr)
+{
+ return nr == TARGET_SYS_EXIT_EXTENDED || is_a64(cpu_env(cs));
+}
+
+static inline bool is_64bit_semihosting(CPUArchState *env)
+{
+ return is_a64(env);
+}
+
+static inline target_ulong common_semi_stack_bottom(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ return is_a64(env) ? env->xregs[31] : env->regs[13];
+}
+
+static inline bool common_semi_has_synccache(CPUArchState *env)
+{
+ /* Ok for A64, invalid for A32/T32 */
+ return is_a64(env);
+}
+
+#endif
diff --git a/target/arm/cortex-regs.c b/target/arm/cortex-regs.c
new file mode 100644
index 0000000000..ae817b08dd
--- /dev/null
+++ b/target/arm/cortex-regs.c
@@ -0,0 +1,76 @@
+/*
+ * ARM Cortex-A registers
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpregs.h"
+
+
+static uint64_t l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ /*
+ * Number of cores is in [25:24]; otherwise we RAZ.
+ * If the board didn't configure the CPUs into clusters,
+ * we default to "all CPUs in one cluster", which might be
+ * more than the 4 that the hardware permits and which is
+ * all you can report in this two-bit field. Saturate to
+ * 0b11 (== 4 CPUs) rather than overflowing the field.
+ */
+ return MIN(cpu->core_count - 1, 3) << 24;
+}
+
+static const ARMCPRegInfo cortex_a72_a57_a53_cp_reginfo[] = {
+ { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2,
+ .access = PL1_RW, .readfn = l2ctlr_read,
+ .writefn = arm_cp_write_ignore },
+ { .name = "L2CTLR",
+ .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 2,
+ .access = PL1_RW, .readfn = l2ctlr_read,
+ .writefn = arm_cp_write_ignore },
+ { .name = "L2ECTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 3,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "L2ECTLR",
+ .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 3,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "L2ACTLR", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUACTLR",
+ .cp = 15, .opc1 = 0, .crm = 15,
+ .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+ { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUECTLR",
+ .cp = 15, .opc1 = 1, .crm = 15,
+ .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+ { .name = "CPUMERRSR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUMERRSR",
+ .cp = 15, .opc1 = 2, .crm = 15,
+ .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+ { .name = "L2MERRSR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 3,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "L2MERRSR",
+ .cp = 15, .opc1 = 3, .crm = 15,
+ .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+};
+
+void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu)
+{
+ define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
+}
diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
new file mode 100644
index 0000000000..cc7c54378f
--- /dev/null
+++ b/target/arm/cpregs.h
@@ -0,0 +1,1137 @@
+/*
+ * QEMU ARM CP Register access and descriptions
+ *
+ * Copyright (c) 2022 Linaro Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ */
+
+#ifndef TARGET_ARM_CPREGS_H
+#define TARGET_ARM_CPREGS_H
+
+#include "hw/registerfields.h"
+#include "target/arm/kvm-consts.h"
+
+/*
+ * ARMCPRegInfo type field bits:
+ */
+enum {
+ /*
+ * Register must be handled specially during translation.
+ * The method is one of the values below:
+ */
+ ARM_CP_SPECIAL_MASK = 0x000f,
+ /* Special: no change to PE state: writes ignored, reads ignored. */
+ ARM_CP_NOP = 0x0001,
+ /* Special: sysreg is WFI, for v5 and v6. */
+ ARM_CP_WFI = 0x0002,
+ /* Special: sysreg is NZCV. */
+ ARM_CP_NZCV = 0x0003,
+ /* Special: sysreg is CURRENTEL. */
+ ARM_CP_CURRENTEL = 0x0004,
+ /* Special: sysreg is DC ZVA or similar. */
+ ARM_CP_DC_ZVA = 0x0005,
+ ARM_CP_DC_GVA = 0x0006,
+ ARM_CP_DC_GZVA = 0x0007,
+
+ /* Flag: reads produce resetvalue; writes ignored. */
+ ARM_CP_CONST = 1 << 4,
+ /* Flag: For ARM_CP_STATE_AA32, sysreg is 64-bit. */
+ ARM_CP_64BIT = 1 << 5,
+ /*
+ * Flag: TB should not be ended after a write to this register
+ * (the default is that the TB ends after cp writes).
+ */
+ ARM_CP_SUPPRESS_TB_END = 1 << 6,
+ /*
+ * Flag: Permit a register definition to override a previous definition
+ * for the same (cp, is64, crn, crm, opc1, opc2) tuple: either the new
+ * or the old must have the ARM_CP_OVERRIDE bit set.
+ */
+ ARM_CP_OVERRIDE = 1 << 7,
+ /*
+ * Flag: Register is an alias view of some underlying state which is also
+ * visible via another register, and that the other register is handling
+ * migration and reset; registers marked ARM_CP_ALIAS will not be migrated
+ * but may have their state set by syncing of register state from KVM.
+ */
+ ARM_CP_ALIAS = 1 << 8,
+ /*
+ * Flag: Register does I/O and therefore its accesses need to be marked
+ * with translator_io_start() and also end the TB. In particular,
+ * registers which implement clocks or timers require this.
+ */
+ ARM_CP_IO = 1 << 9,
+ /*
+ * Flag: Register has no underlying state and does not support raw access
+ * for state saving/loading; it will not be used for either migration or
+ * KVM state synchronization. Typically this is for "registers" which are
+ * actually used as instructions for cache maintenance and so on.
+ */
+ ARM_CP_NO_RAW = 1 << 10,
+ /*
+ * Flag: The read or write hook might raise an exception; the generated
+ * code will synchronize the CPU state before calling the hook so that it
+ * is safe for the hook to call raise_exception().
+ */
+ ARM_CP_RAISES_EXC = 1 << 11,
+ /*
+ * Flag: Writes to the sysreg might change the exception level - typically
+ * on older ARM chips. For those cases we need to re-read the new el when
+ * recomputing the translation flags.
+ */
+ ARM_CP_NEWEL = 1 << 12,
+ /*
+ * Flag: Access check for this sysreg is identical to accessing FPU state
+ * from an instruction: use translation fp_access_check().
+ */
+ ARM_CP_FPU = 1 << 13,
+ /*
+ * Flag: Access check for this sysreg is identical to accessing SVE state
+ * from an instruction: use translation sve_access_check().
+ */
+ ARM_CP_SVE = 1 << 14,
+ /* Flag: Do not expose in gdb sysreg xml. */
+ ARM_CP_NO_GDB = 1 << 15,
+ /*
+ * Flags: If EL3 but not EL2...
+ * - UNDEF: discard the cpreg,
+ * - KEEP: retain the cpreg as is,
+ * - C_NZ: set const on the cpreg, but retain resetvalue,
+ * - else: set const on the cpreg, zero resetvalue, aka RES0.
+ * See rule RJFFP in section D1.1.3 of DDI0487H.a.
+ */
+ ARM_CP_EL3_NO_EL2_UNDEF = 1 << 16,
+ ARM_CP_EL3_NO_EL2_KEEP = 1 << 17,
+ ARM_CP_EL3_NO_EL2_C_NZ = 1 << 18,
+ /*
+ * Flag: Access check for this sysreg is constrained by the
+ * ARM pseudocode function CheckSMEAccess().
+ */
+ ARM_CP_SME = 1 << 19,
+ /*
+ * Flag: one of the four EL2 registers which redirect to the
+ * equivalent EL1 register when FEAT_NV2 is enabled.
+ */
+ ARM_CP_NV2_REDIRECT = 1 << 20,
+};
+
+/*
+ * Interface for defining coprocessor registers.
+ * Registers are defined in tables of arm_cp_reginfo structs
+ * which are passed to define_arm_cp_regs().
+ */
+
+/*
+ * When looking up a coprocessor register we look for it
+ * via an integer which encodes all of:
+ * coprocessor number
+ * Crn, Crm, opc1, opc2 fields
+ * 32 or 64 bit register (ie is it accessed via MRC/MCR
+ * or via MRRC/MCRR?)
+ * non-secure/secure bank (AArch32 only)
+ * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field.
+ * (In this case crn and opc2 should be zero.)
+ * For AArch64, there is no 32/64 bit size distinction;
+ * instead all registers have a 2 bit op0, 3 bit op1 and op2,
+ * and 4 bit CRn and CRm. The encoding patterns are chosen
+ * to be easy to convert to and from the KVM encodings, and also
+ * so that the hashtable can contain both AArch32 and AArch64
+ * registers (to allow for interprocessing where we might run
+ * 32 bit code on a 64 bit core).
+ */
+/*
+ * This bit is private to our hashtable cpreg; in KVM register
+ * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64
+ * in the upper bits of the 64 bit ID.
+ */
+#define CP_REG_AA64_SHIFT 28
+#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT)
+
+/*
+ * To enable banking of coprocessor registers depending on ns-bit we
+ * add a bit to distinguish between secure and non-secure cpregs in the
+ * hashtable.
+ */
+#define CP_REG_NS_SHIFT 29
+#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT)
+
+#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2) \
+ ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) | \
+ ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
+
+#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
+ (CP_REG_AA64_MASK | \
+ ((cp) << CP_REG_ARM_COPROC_SHIFT) | \
+ ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \
+ ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \
+ ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \
+ ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \
+ ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
+
+/*
+ * Convert a full 64 bit KVM register ID to the truncated 32 bit
+ * version used as a key for the coprocessor register hashtable
+ */
+static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
+{
+ uint32_t cpregid = kvmid;
+ if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
+ cpregid |= CP_REG_AA64_MASK;
+ } else {
+ if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
+ cpregid |= (1 << 15);
+ }
+
+ /*
+ * KVM is always non-secure so add the NS flag on AArch32 register
+ * entries.
+ */
+ cpregid |= 1 << CP_REG_NS_SHIFT;
+ }
+ return cpregid;
+}
+
+/*
+ * Convert a truncated 32 bit hashtable key into the full
+ * 64 bit KVM register ID.
+ */
+static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
+{
+ uint64_t kvmid;
+
+ if (cpregid & CP_REG_AA64_MASK) {
+ kvmid = cpregid & ~CP_REG_AA64_MASK;
+ kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64;
+ } else {
+ kvmid = cpregid & ~(1 << 15);
+ if (cpregid & (1 << 15)) {
+ kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
+ } else {
+ kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
+ }
+ }
+ return kvmid;
+}
+
+/*
+ * Valid values for ARMCPRegInfo state field, indicating which of
+ * the AArch32 and AArch64 execution states this register is visible in.
+ * If the reginfo doesn't explicitly specify then it is AArch32 only.
+ * If the reginfo is declared to be visible in both states then a second
+ * reginfo is synthesised for the AArch32 view of the AArch64 register,
+ * such that the AArch32 view is the lower 32 bits of the AArch64 one.
+ * Note that we rely on the values of these enums as we iterate through
+ * the various states in some places.
+ */
+typedef enum {
+ ARM_CP_STATE_AA32 = 0,
+ ARM_CP_STATE_AA64 = 1,
+ ARM_CP_STATE_BOTH = 2,
+} CPState;
+
+/*
+ * ARM CP register secure state flags. These flags identify security state
+ * attributes for a given CP register entry.
+ * The existence of both or neither secure and non-secure flags indicates that
+ * the register has both a secure and non-secure hash entry. A single one of
+ * these flags causes the register to only be hashed for the specified
+ * security state.
+ * Although definitions may have any combination of the S/NS bits, each
+ * registered entry will only have one to identify whether the entry is secure
+ * or non-secure.
+ */
+typedef enum {
+ ARM_CP_SECSTATE_BOTH = 0, /* define one cpreg for each secstate */
+ ARM_CP_SECSTATE_S = (1 << 0), /* bit[0]: Secure state register */
+ ARM_CP_SECSTATE_NS = (1 << 1), /* bit[1]: Non-secure state register */
+} CPSecureState;
+
+/*
+ * Access rights:
+ * We define bits for Read and Write access for what rev C of the v7-AR ARM ARM
+ * defines as PL0 (user), PL1 (fiq/irq/svc/abt/und/sys, ie privileged), and
+ * PL2 (hyp). The other level which has Read and Write bits is Secure PL1
+ * (ie any of the privileged modes in Secure state, or Monitor mode).
+ * If a register is accessible in one privilege level it's always accessible
+ * in higher privilege levels too. Since "Secure PL1" also follows this rule
+ * (ie anything visible in PL2 is visible in S-PL1, some things are only
+ * visible in S-PL1) but "Secure PL1" is a bit of a mouthful, we bend the
+ * terminology a little and call this PL3.
+ * In AArch64 things are somewhat simpler as the PLx bits line up exactly
+ * with the ELx exception levels.
+ *
+ * If access permissions for a register are more complex than can be
+ * described with these bits, then use a laxer set of restrictions, and
+ * do the more restrictive/complex check inside a helper function.
+ */
+typedef enum {
+ PL3_R = 0x80,
+ PL3_W = 0x40,
+ PL2_R = 0x20 | PL3_R,
+ PL2_W = 0x10 | PL3_W,
+ PL1_R = 0x08 | PL2_R,
+ PL1_W = 0x04 | PL2_W,
+ PL0_R = 0x02 | PL1_R,
+ PL0_W = 0x01 | PL1_W,
+
+ /*
+ * For user-mode some registers are accessible to EL0 via a kernel
+ * trap-and-emulate ABI. In this case we define the read permissions
+ * as actually being PL0_R. However some bits of any given register
+ * may still be masked.
+ */
+#ifdef CONFIG_USER_ONLY
+ PL0U_R = PL0_R,
+#else
+ PL0U_R = PL1_R,
+#endif
+
+ PL3_RW = PL3_R | PL3_W,
+ PL2_RW = PL2_R | PL2_W,
+ PL1_RW = PL1_R | PL1_W,
+ PL0_RW = PL0_R | PL0_W,
+} CPAccessRights;
+
+typedef enum CPAccessResult {
+ /* Access is permitted */
+ CP_ACCESS_OK = 0,
+
+ /*
+ * Combined with one of the following, the low 2 bits indicate the
+ * target exception level. If 0, the exception is taken to the usual
+ * target EL (EL1 or PL1 if in EL0, otherwise to the current EL).
+ */
+ CP_ACCESS_EL_MASK = 3,
+
+ /*
+ * Access fails due to a configurable trap or enable which would
+ * result in a categorized exception syndrome giving information about
+ * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6,
+ * 0xc or 0x18).
+ */
+ CP_ACCESS_TRAP = (1 << 2),
+ CP_ACCESS_TRAP_EL2 = CP_ACCESS_TRAP | 2,
+ CP_ACCESS_TRAP_EL3 = CP_ACCESS_TRAP | 3,
+
+ /*
+ * Access fails and results in an exception syndrome 0x0 ("uncategorized").
+ * Note that this is not a catch-all case -- the set of cases which may
+ * result in this failure is specifically defined by the architecture.
+ * This trap is always to the usual target EL, never directly to a
+ * specified target EL.
+ */
+ CP_ACCESS_TRAP_UNCATEGORIZED = (2 << 2),
+} CPAccessResult;
+
+/* Indexes into fgt_read[] */
+#define FGTREG_HFGRTR 0
+#define FGTREG_HDFGRTR 1
+/* Indexes into fgt_write[] */
+#define FGTREG_HFGWTR 0
+#define FGTREG_HDFGWTR 1
+/* Indexes into fgt_exec[] */
+#define FGTREG_HFGITR 0
+
+FIELD(HFGRTR_EL2, AFSR0_EL1, 0, 1)
+FIELD(HFGRTR_EL2, AFSR1_EL1, 1, 1)
+FIELD(HFGRTR_EL2, AIDR_EL1, 2, 1)
+FIELD(HFGRTR_EL2, AMAIR_EL1, 3, 1)
+FIELD(HFGRTR_EL2, APDAKEY, 4, 1)
+FIELD(HFGRTR_EL2, APDBKEY, 5, 1)
+FIELD(HFGRTR_EL2, APGAKEY, 6, 1)
+FIELD(HFGRTR_EL2, APIAKEY, 7, 1)
+FIELD(HFGRTR_EL2, APIBKEY, 8, 1)
+FIELD(HFGRTR_EL2, CCSIDR_EL1, 9, 1)
+FIELD(HFGRTR_EL2, CLIDR_EL1, 10, 1)
+FIELD(HFGRTR_EL2, CONTEXTIDR_EL1, 11, 1)
+FIELD(HFGRTR_EL2, CPACR_EL1, 12, 1)
+FIELD(HFGRTR_EL2, CSSELR_EL1, 13, 1)
+FIELD(HFGRTR_EL2, CTR_EL0, 14, 1)
+FIELD(HFGRTR_EL2, DCZID_EL0, 15, 1)
+FIELD(HFGRTR_EL2, ESR_EL1, 16, 1)
+FIELD(HFGRTR_EL2, FAR_EL1, 17, 1)
+FIELD(HFGRTR_EL2, ISR_EL1, 18, 1)
+FIELD(HFGRTR_EL2, LORC_EL1, 19, 1)
+FIELD(HFGRTR_EL2, LOREA_EL1, 20, 1)
+FIELD(HFGRTR_EL2, LORID_EL1, 21, 1)
+FIELD(HFGRTR_EL2, LORN_EL1, 22, 1)
+FIELD(HFGRTR_EL2, LORSA_EL1, 23, 1)
+FIELD(HFGRTR_EL2, MAIR_EL1, 24, 1)
+FIELD(HFGRTR_EL2, MIDR_EL1, 25, 1)
+FIELD(HFGRTR_EL2, MPIDR_EL1, 26, 1)
+FIELD(HFGRTR_EL2, PAR_EL1, 27, 1)
+FIELD(HFGRTR_EL2, REVIDR_EL1, 28, 1)
+FIELD(HFGRTR_EL2, SCTLR_EL1, 29, 1)
+FIELD(HFGRTR_EL2, SCXTNUM_EL1, 30, 1)
+FIELD(HFGRTR_EL2, SCXTNUM_EL0, 31, 1)
+FIELD(HFGRTR_EL2, TCR_EL1, 32, 1)
+FIELD(HFGRTR_EL2, TPIDR_EL1, 33, 1)
+FIELD(HFGRTR_EL2, TPIDRRO_EL0, 34, 1)
+FIELD(HFGRTR_EL2, TPIDR_EL0, 35, 1)
+FIELD(HFGRTR_EL2, TTBR0_EL1, 36, 1)
+FIELD(HFGRTR_EL2, TTBR1_EL1, 37, 1)
+FIELD(HFGRTR_EL2, VBAR_EL1, 38, 1)
+FIELD(HFGRTR_EL2, ICC_IGRPENN_EL1, 39, 1)
+FIELD(HFGRTR_EL2, ERRIDR_EL1, 40, 1)
+FIELD(HFGRTR_EL2, ERRSELR_EL1, 41, 1)
+FIELD(HFGRTR_EL2, ERXFR_EL1, 42, 1)
+FIELD(HFGRTR_EL2, ERXCTLR_EL1, 43, 1)
+FIELD(HFGRTR_EL2, ERXSTATUS_EL1, 44, 1)
+FIELD(HFGRTR_EL2, ERXMISCN_EL1, 45, 1)
+FIELD(HFGRTR_EL2, ERXPFGF_EL1, 46, 1)
+FIELD(HFGRTR_EL2, ERXPFGCTL_EL1, 47, 1)
+FIELD(HFGRTR_EL2, ERXPFGCDN_EL1, 48, 1)
+FIELD(HFGRTR_EL2, ERXADDR_EL1, 49, 1)
+FIELD(HFGRTR_EL2, NACCDATA_EL1, 50, 1)
+/* 51-53: RES0 */
+FIELD(HFGRTR_EL2, NSMPRI_EL1, 54, 1)
+FIELD(HFGRTR_EL2, NTPIDR2_EL0, 55, 1)
+/* 56-63: RES0 */
+
+/* These match HFGRTR but bits for RO registers are RES0 */
+FIELD(HFGWTR_EL2, AFSR0_EL1, 0, 1)
+FIELD(HFGWTR_EL2, AFSR1_EL1, 1, 1)
+FIELD(HFGWTR_EL2, AMAIR_EL1, 3, 1)
+FIELD(HFGWTR_EL2, APDAKEY, 4, 1)
+FIELD(HFGWTR_EL2, APDBKEY, 5, 1)
+FIELD(HFGWTR_EL2, APGAKEY, 6, 1)
+FIELD(HFGWTR_EL2, APIAKEY, 7, 1)
+FIELD(HFGWTR_EL2, APIBKEY, 8, 1)
+FIELD(HFGWTR_EL2, CONTEXTIDR_EL1, 11, 1)
+FIELD(HFGWTR_EL2, CPACR_EL1, 12, 1)
+FIELD(HFGWTR_EL2, CSSELR_EL1, 13, 1)
+FIELD(HFGWTR_EL2, ESR_EL1, 16, 1)
+FIELD(HFGWTR_EL2, FAR_EL1, 17, 1)
+FIELD(HFGWTR_EL2, LORC_EL1, 19, 1)
+FIELD(HFGWTR_EL2, LOREA_EL1, 20, 1)
+FIELD(HFGWTR_EL2, LORN_EL1, 22, 1)
+FIELD(HFGWTR_EL2, LORSA_EL1, 23, 1)
+FIELD(HFGWTR_EL2, MAIR_EL1, 24, 1)
+FIELD(HFGWTR_EL2, PAR_EL1, 27, 1)
+FIELD(HFGWTR_EL2, SCTLR_EL1, 29, 1)
+FIELD(HFGWTR_EL2, SCXTNUM_EL1, 30, 1)
+FIELD(HFGWTR_EL2, SCXTNUM_EL0, 31, 1)
+FIELD(HFGWTR_EL2, TCR_EL1, 32, 1)
+FIELD(HFGWTR_EL2, TPIDR_EL1, 33, 1)
+FIELD(HFGWTR_EL2, TPIDRRO_EL0, 34, 1)
+FIELD(HFGWTR_EL2, TPIDR_EL0, 35, 1)
+FIELD(HFGWTR_EL2, TTBR0_EL1, 36, 1)
+FIELD(HFGWTR_EL2, TTBR1_EL1, 37, 1)
+FIELD(HFGWTR_EL2, VBAR_EL1, 38, 1)
+FIELD(HFGWTR_EL2, ICC_IGRPENN_EL1, 39, 1)
+FIELD(HFGWTR_EL2, ERRSELR_EL1, 41, 1)
+FIELD(HFGWTR_EL2, ERXCTLR_EL1, 43, 1)
+FIELD(HFGWTR_EL2, ERXSTATUS_EL1, 44, 1)
+FIELD(HFGWTR_EL2, ERXMISCN_EL1, 45, 1)
+FIELD(HFGWTR_EL2, ERXPFGCTL_EL1, 47, 1)
+FIELD(HFGWTR_EL2, ERXPFGCDN_EL1, 48, 1)
+FIELD(HFGWTR_EL2, ERXADDR_EL1, 49, 1)
+FIELD(HFGWTR_EL2, NACCDATA_EL1, 50, 1)
+FIELD(HFGWTR_EL2, NSMPRI_EL1, 54, 1)
+FIELD(HFGWTR_EL2, NTPIDR2_EL0, 55, 1)
+
+FIELD(HFGITR_EL2, ICIALLUIS, 0, 1)
+FIELD(HFGITR_EL2, ICIALLU, 1, 1)
+FIELD(HFGITR_EL2, ICIVAU, 2, 1)
+FIELD(HFGITR_EL2, DCIVAC, 3, 1)
+FIELD(HFGITR_EL2, DCISW, 4, 1)
+FIELD(HFGITR_EL2, DCCSW, 5, 1)
+FIELD(HFGITR_EL2, DCCISW, 6, 1)
+FIELD(HFGITR_EL2, DCCVAU, 7, 1)
+FIELD(HFGITR_EL2, DCCVAP, 8, 1)
+FIELD(HFGITR_EL2, DCCVADP, 9, 1)
+FIELD(HFGITR_EL2, DCCIVAC, 10, 1)
+FIELD(HFGITR_EL2, DCZVA, 11, 1)
+FIELD(HFGITR_EL2, ATS1E1R, 12, 1)
+FIELD(HFGITR_EL2, ATS1E1W, 13, 1)
+FIELD(HFGITR_EL2, ATS1E0R, 14, 1)
+FIELD(HFGITR_EL2, ATS1E0W, 15, 1)
+FIELD(HFGITR_EL2, ATS1E1RP, 16, 1)
+FIELD(HFGITR_EL2, ATS1E1WP, 17, 1)
+FIELD(HFGITR_EL2, TLBIVMALLE1OS, 18, 1)
+FIELD(HFGITR_EL2, TLBIVAE1OS, 19, 1)
+FIELD(HFGITR_EL2, TLBIASIDE1OS, 20, 1)
+FIELD(HFGITR_EL2, TLBIVAAE1OS, 21, 1)
+FIELD(HFGITR_EL2, TLBIVALE1OS, 22, 1)
+FIELD(HFGITR_EL2, TLBIVAALE1OS, 23, 1)
+FIELD(HFGITR_EL2, TLBIRVAE1OS, 24, 1)
+FIELD(HFGITR_EL2, TLBIRVAAE1OS, 25, 1)
+FIELD(HFGITR_EL2, TLBIRVALE1OS, 26, 1)
+FIELD(HFGITR_EL2, TLBIRVAALE1OS, 27, 1)
+FIELD(HFGITR_EL2, TLBIVMALLE1IS, 28, 1)
+FIELD(HFGITR_EL2, TLBIVAE1IS, 29, 1)
+FIELD(HFGITR_EL2, TLBIASIDE1IS, 30, 1)
+FIELD(HFGITR_EL2, TLBIVAAE1IS, 31, 1)
+FIELD(HFGITR_EL2, TLBIVALE1IS, 32, 1)
+FIELD(HFGITR_EL2, TLBIVAALE1IS, 33, 1)
+FIELD(HFGITR_EL2, TLBIRVAE1IS, 34, 1)
+FIELD(HFGITR_EL2, TLBIRVAAE1IS, 35, 1)
+FIELD(HFGITR_EL2, TLBIRVALE1IS, 36, 1)
+FIELD(HFGITR_EL2, TLBIRVAALE1IS, 37, 1)
+FIELD(HFGITR_EL2, TLBIRVAE1, 38, 1)
+FIELD(HFGITR_EL2, TLBIRVAAE1, 39, 1)
+FIELD(HFGITR_EL2, TLBIRVALE1, 40, 1)
+FIELD(HFGITR_EL2, TLBIRVAALE1, 41, 1)
+FIELD(HFGITR_EL2, TLBIVMALLE1, 42, 1)
+FIELD(HFGITR_EL2, TLBIVAE1, 43, 1)
+FIELD(HFGITR_EL2, TLBIASIDE1, 44, 1)
+FIELD(HFGITR_EL2, TLBIVAAE1, 45, 1)
+FIELD(HFGITR_EL2, TLBIVALE1, 46, 1)
+FIELD(HFGITR_EL2, TLBIVAALE1, 47, 1)
+FIELD(HFGITR_EL2, CFPRCTX, 48, 1)
+FIELD(HFGITR_EL2, DVPRCTX, 49, 1)
+FIELD(HFGITR_EL2, CPPRCTX, 50, 1)
+FIELD(HFGITR_EL2, ERET, 51, 1)
+FIELD(HFGITR_EL2, SVC_EL0, 52, 1)
+FIELD(HFGITR_EL2, SVC_EL1, 53, 1)
+FIELD(HFGITR_EL2, DCCVAC, 54, 1)
+FIELD(HFGITR_EL2, NBRBINJ, 55, 1)
+FIELD(HFGITR_EL2, NBRBIALL, 56, 1)
+
+FIELD(HDFGRTR_EL2, DBGBCRN_EL1, 0, 1)
+FIELD(HDFGRTR_EL2, DBGBVRN_EL1, 1, 1)
+FIELD(HDFGRTR_EL2, DBGWCRN_EL1, 2, 1)
+FIELD(HDFGRTR_EL2, DBGWVRN_EL1, 3, 1)
+FIELD(HDFGRTR_EL2, MDSCR_EL1, 4, 1)
+FIELD(HDFGRTR_EL2, DBGCLAIM, 5, 1)
+FIELD(HDFGRTR_EL2, DBGAUTHSTATUS_EL1, 6, 1)
+FIELD(HDFGRTR_EL2, DBGPRCR_EL1, 7, 1)
+/* 8: RES0: OSLAR_EL1 is WO */
+FIELD(HDFGRTR_EL2, OSLSR_EL1, 9, 1)
+FIELD(HDFGRTR_EL2, OSECCR_EL1, 10, 1)
+FIELD(HDFGRTR_EL2, OSDLR_EL1, 11, 1)
+FIELD(HDFGRTR_EL2, PMEVCNTRN_EL0, 12, 1)
+FIELD(HDFGRTR_EL2, PMEVTYPERN_EL0, 13, 1)
+FIELD(HDFGRTR_EL2, PMCCFILTR_EL0, 14, 1)
+FIELD(HDFGRTR_EL2, PMCCNTR_EL0, 15, 1)
+FIELD(HDFGRTR_EL2, PMCNTEN, 16, 1)
+FIELD(HDFGRTR_EL2, PMINTEN, 17, 1)
+FIELD(HDFGRTR_EL2, PMOVS, 18, 1)
+FIELD(HDFGRTR_EL2, PMSELR_EL0, 19, 1)
+/* 20: RES0: PMSWINC_EL0 is WO */
+/* 21: RES0: PMCR_EL0 is WO */
+FIELD(HDFGRTR_EL2, PMMIR_EL1, 22, 1)
+FIELD(HDFGRTR_EL2, PMBLIMITR_EL1, 23, 1)
+FIELD(HDFGRTR_EL2, PMBPTR_EL1, 24, 1)
+FIELD(HDFGRTR_EL2, PMBSR_EL1, 25, 1)
+FIELD(HDFGRTR_EL2, PMSCR_EL1, 26, 1)
+FIELD(HDFGRTR_EL2, PMSEVFR_EL1, 27, 1)
+FIELD(HDFGRTR_EL2, PMSFCR_EL1, 28, 1)
+FIELD(HDFGRTR_EL2, PMSICR_EL1, 29, 1)
+FIELD(HDFGRTR_EL2, PMSIDR_EL1, 30, 1)
+FIELD(HDFGRTR_EL2, PMSIRR_EL1, 31, 1)
+FIELD(HDFGRTR_EL2, PMSLATFR_EL1, 32, 1)
+FIELD(HDFGRTR_EL2, TRC, 33, 1)
+FIELD(HDFGRTR_EL2, TRCAUTHSTATUS, 34, 1)
+FIELD(HDFGRTR_EL2, TRCAUXCTLR, 35, 1)
+FIELD(HDFGRTR_EL2, TRCCLAIM, 36, 1)
+FIELD(HDFGRTR_EL2, TRCCNTVRn, 37, 1)
+/* 38, 39: RES0 */
+FIELD(HDFGRTR_EL2, TRCID, 40, 1)
+FIELD(HDFGRTR_EL2, TRCIMSPECN, 41, 1)
+/* 42: RES0: TRCOSLAR is WO */
+FIELD(HDFGRTR_EL2, TRCOSLSR, 43, 1)
+FIELD(HDFGRTR_EL2, TRCPRGCTLR, 44, 1)
+FIELD(HDFGRTR_EL2, TRCSEQSTR, 45, 1)
+FIELD(HDFGRTR_EL2, TRCSSCSRN, 46, 1)
+FIELD(HDFGRTR_EL2, TRCSTATR, 47, 1)
+FIELD(HDFGRTR_EL2, TRCVICTLR, 48, 1)
+/* 49: RES0: TRFCR_EL1 is WO */
+FIELD(HDFGRTR_EL2, TRBBASER_EL1, 50, 1)
+FIELD(HDFGRTR_EL2, TRBIDR_EL1, 51, 1)
+FIELD(HDFGRTR_EL2, TRBLIMITR_EL1, 52, 1)
+FIELD(HDFGRTR_EL2, TRBMAR_EL1, 53, 1)
+FIELD(HDFGRTR_EL2, TRBPTR_EL1, 54, 1)
+FIELD(HDFGRTR_EL2, TRBSR_EL1, 55, 1)
+FIELD(HDFGRTR_EL2, TRBTRG_EL1, 56, 1)
+FIELD(HDFGRTR_EL2, PMUSERENR_EL0, 57, 1)
+FIELD(HDFGRTR_EL2, PMCEIDN_EL0, 58, 1)
+FIELD(HDFGRTR_EL2, NBRBIDR, 59, 1)
+FIELD(HDFGRTR_EL2, NBRBCTL, 60, 1)
+FIELD(HDFGRTR_EL2, NBRBDATA, 61, 1)
+FIELD(HDFGRTR_EL2, NPMSNEVFR_EL1, 62, 1)
+FIELD(HDFGRTR_EL2, PMBIDR_EL1, 63, 1)
+
+/*
+ * These match HDFGRTR_EL2, but bits for RO registers are RES0.
+ * A few bits are for WO registers, where the HDFGRTR_EL2 bit is RES0.
+ */
+FIELD(HDFGWTR_EL2, DBGBCRN_EL1, 0, 1)
+FIELD(HDFGWTR_EL2, DBGBVRN_EL1, 1, 1)
+FIELD(HDFGWTR_EL2, DBGWCRN_EL1, 2, 1)
+FIELD(HDFGWTR_EL2, DBGWVRN_EL1, 3, 1)
+FIELD(HDFGWTR_EL2, MDSCR_EL1, 4, 1)
+FIELD(HDFGWTR_EL2, DBGCLAIM, 5, 1)
+FIELD(HDFGWTR_EL2, DBGPRCR_EL1, 7, 1)
+FIELD(HDFGWTR_EL2, OSLAR_EL1, 8, 1)
+FIELD(HDFGWTR_EL2, OSLSR_EL1, 9, 1)
+FIELD(HDFGWTR_EL2, OSECCR_EL1, 10, 1)
+FIELD(HDFGWTR_EL2, OSDLR_EL1, 11, 1)
+FIELD(HDFGWTR_EL2, PMEVCNTRN_EL0, 12, 1)
+FIELD(HDFGWTR_EL2, PMEVTYPERN_EL0, 13, 1)
+FIELD(HDFGWTR_EL2, PMCCFILTR_EL0, 14, 1)
+FIELD(HDFGWTR_EL2, PMCCNTR_EL0, 15, 1)
+FIELD(HDFGWTR_EL2, PMCNTEN, 16, 1)
+FIELD(HDFGWTR_EL2, PMINTEN, 17, 1)
+FIELD(HDFGWTR_EL2, PMOVS, 18, 1)
+FIELD(HDFGWTR_EL2, PMSELR_EL0, 19, 1)
+FIELD(HDFGWTR_EL2, PMSWINC_EL0, 20, 1)
+FIELD(HDFGWTR_EL2, PMCR_EL0, 21, 1)
+FIELD(HDFGWTR_EL2, PMBLIMITR_EL1, 23, 1)
+FIELD(HDFGWTR_EL2, PMBPTR_EL1, 24, 1)
+FIELD(HDFGWTR_EL2, PMBSR_EL1, 25, 1)
+FIELD(HDFGWTR_EL2, PMSCR_EL1, 26, 1)
+FIELD(HDFGWTR_EL2, PMSEVFR_EL1, 27, 1)
+FIELD(HDFGWTR_EL2, PMSFCR_EL1, 28, 1)
+FIELD(HDFGWTR_EL2, PMSICR_EL1, 29, 1)
+FIELD(HDFGWTR_EL2, PMSIRR_EL1, 31, 1)
+FIELD(HDFGWTR_EL2, PMSLATFR_EL1, 32, 1)
+FIELD(HDFGWTR_EL2, TRC, 33, 1)
+FIELD(HDFGWTR_EL2, TRCAUXCTLR, 35, 1)
+FIELD(HDFGWTR_EL2, TRCCLAIM, 36, 1)
+FIELD(HDFGWTR_EL2, TRCCNTVRn, 37, 1)
+FIELD(HDFGWTR_EL2, TRCIMSPECN, 41, 1)
+FIELD(HDFGWTR_EL2, TRCOSLAR, 42, 1)
+FIELD(HDFGWTR_EL2, TRCPRGCTLR, 44, 1)
+FIELD(HDFGWTR_EL2, TRCSEQSTR, 45, 1)
+FIELD(HDFGWTR_EL2, TRCSSCSRN, 46, 1)
+FIELD(HDFGWTR_EL2, TRCVICTLR, 48, 1)
+FIELD(HDFGWTR_EL2, TRFCR_EL1, 49, 1)
+FIELD(HDFGWTR_EL2, TRBBASER_EL1, 50, 1)
+FIELD(HDFGWTR_EL2, TRBLIMITR_EL1, 52, 1)
+FIELD(HDFGWTR_EL2, TRBMAR_EL1, 53, 1)
+FIELD(HDFGWTR_EL2, TRBPTR_EL1, 54, 1)
+FIELD(HDFGWTR_EL2, TRBSR_EL1, 55, 1)
+FIELD(HDFGWTR_EL2, TRBTRG_EL1, 56, 1)
+FIELD(HDFGWTR_EL2, PMUSERENR_EL0, 57, 1)
+FIELD(HDFGWTR_EL2, NBRBCTL, 60, 1)
+FIELD(HDFGWTR_EL2, NBRBDATA, 61, 1)
+FIELD(HDFGWTR_EL2, NPMSNEVFR_EL1, 62, 1)
+
+/* Which fine-grained trap bit register to check, if any */
+FIELD(FGT, TYPE, 10, 3)
+FIELD(FGT, REV, 9, 1) /* Is bit sense reversed? */
+FIELD(FGT, IDX, 6, 3) /* Index within a uint64_t[] array */
+FIELD(FGT, BITPOS, 0, 6) /* Bit position within the uint64_t */
+
+/*
+ * Macros to define FGT_##bitname enum constants to use in ARMCPRegInfo::fgt
+ * fields. We assume for brevity's sake that there are no duplicated
+ * bit names across the various FGT registers.
+ */
+#define DO_BIT(REG, BITNAME) \
+ FGT_##BITNAME = FGT_##REG | R_##REG##_EL2_##BITNAME##_SHIFT
+
+/* Some bits have reversed sense, so 0 means trap and 1 means not */
+#define DO_REV_BIT(REG, BITNAME) \
+ FGT_##BITNAME = FGT_##REG | FGT_REV | R_##REG##_EL2_##BITNAME##_SHIFT
+
+typedef enum FGTBit {
+ /*
+ * These bits tell us which register arrays to use:
+ * if FGT_R is set then reads are checked against fgt_read[];
+ * if FGT_W is set then writes are checked against fgt_write[];
+ * if FGT_EXEC is set then all accesses are checked against fgt_exec[].
+ *
+ * For almost all bits in the R/W register pairs, the bit exists in
+ * both registers for a RW register, in HFGRTR/HDFGRTR for a RO register
+ * with the corresponding HFGWTR/HDFGTWTR bit being RES0, and vice-versa
+ * for a WO register. There are unfortunately a couple of exceptions
+ * (PMCR_EL0, TRFCR_EL1) where the register being trapped is RW but
+ * the FGT system only allows trapping of writes, not reads.
+ *
+ * Note that we arrange these bits so that a 0 FGTBit means "no trap".
+ */
+ FGT_R = 1 << R_FGT_TYPE_SHIFT,
+ FGT_W = 2 << R_FGT_TYPE_SHIFT,
+ FGT_EXEC = 4 << R_FGT_TYPE_SHIFT,
+ FGT_RW = FGT_R | FGT_W,
+ /* Bit to identify whether trap bit is reversed sense */
+ FGT_REV = R_FGT_REV_MASK,
+
+ /*
+ * If a bit exists in HFGRTR/HDFGRTR then either the register being
+ * trapped is RO or the bit also exists in HFGWTR/HDFGWTR, so we either
+ * want to trap for both reads and writes or else it's harmless to mark
+ * it as trap-on-writes.
+ * If a bit exists only in HFGWTR/HDFGWTR then either the register being
+ * trapped is WO, or else it is one of the two oddball special cases
+ * which are RW but have only a write trap. We mark these as only
+ * FGT_W so we get the right behaviour for those special cases.
+ * (If a bit was added in future that provided only a read trap for an
+ * RW register we'd need to do something special to get the FGT_R bit
+ * only. But this seems unlikely to happen.)
+ *
+ * So for the DO_BIT/DO_REV_BIT macros: use FGT_HFGRTR/FGT_HDFGRTR if
+ * the bit exists in that register. Otherwise use FGT_HFGWTR/FGT_HDFGWTR.
+ */
+ FGT_HFGRTR = FGT_RW | (FGTREG_HFGRTR << R_FGT_IDX_SHIFT),
+ FGT_HFGWTR = FGT_W | (FGTREG_HFGWTR << R_FGT_IDX_SHIFT),
+ FGT_HDFGRTR = FGT_RW | (FGTREG_HDFGRTR << R_FGT_IDX_SHIFT),
+ FGT_HDFGWTR = FGT_W | (FGTREG_HDFGWTR << R_FGT_IDX_SHIFT),
+ FGT_HFGITR = FGT_EXEC | (FGTREG_HFGITR << R_FGT_IDX_SHIFT),
+
+ /* Trap bits in HFGRTR_EL2 / HFGWTR_EL2, starting from bit 0. */
+ DO_BIT(HFGRTR, AFSR0_EL1),
+ DO_BIT(HFGRTR, AFSR1_EL1),
+ DO_BIT(HFGRTR, AIDR_EL1),
+ DO_BIT(HFGRTR, AMAIR_EL1),
+ DO_BIT(HFGRTR, APDAKEY),
+ DO_BIT(HFGRTR, APDBKEY),
+ DO_BIT(HFGRTR, APGAKEY),
+ DO_BIT(HFGRTR, APIAKEY),
+ DO_BIT(HFGRTR, APIBKEY),
+ DO_BIT(HFGRTR, CCSIDR_EL1),
+ DO_BIT(HFGRTR, CLIDR_EL1),
+ DO_BIT(HFGRTR, CONTEXTIDR_EL1),
+ DO_BIT(HFGRTR, CPACR_EL1),
+ DO_BIT(HFGRTR, CSSELR_EL1),
+ DO_BIT(HFGRTR, CTR_EL0),
+ DO_BIT(HFGRTR, DCZID_EL0),
+ DO_BIT(HFGRTR, ESR_EL1),
+ DO_BIT(HFGRTR, FAR_EL1),
+ DO_BIT(HFGRTR, ISR_EL1),
+ DO_BIT(HFGRTR, LORC_EL1),
+ DO_BIT(HFGRTR, LOREA_EL1),
+ DO_BIT(HFGRTR, LORID_EL1),
+ DO_BIT(HFGRTR, LORN_EL1),
+ DO_BIT(HFGRTR, LORSA_EL1),
+ DO_BIT(HFGRTR, MAIR_EL1),
+ DO_BIT(HFGRTR, MIDR_EL1),
+ DO_BIT(HFGRTR, MPIDR_EL1),
+ DO_BIT(HFGRTR, PAR_EL1),
+ DO_BIT(HFGRTR, REVIDR_EL1),
+ DO_BIT(HFGRTR, SCTLR_EL1),
+ DO_BIT(HFGRTR, SCXTNUM_EL1),
+ DO_BIT(HFGRTR, SCXTNUM_EL0),
+ DO_BIT(HFGRTR, TCR_EL1),
+ DO_BIT(HFGRTR, TPIDR_EL1),
+ DO_BIT(HFGRTR, TPIDRRO_EL0),
+ DO_BIT(HFGRTR, TPIDR_EL0),
+ DO_BIT(HFGRTR, TTBR0_EL1),
+ DO_BIT(HFGRTR, TTBR1_EL1),
+ DO_BIT(HFGRTR, VBAR_EL1),
+ DO_BIT(HFGRTR, ICC_IGRPENN_EL1),
+ DO_BIT(HFGRTR, ERRIDR_EL1),
+ DO_REV_BIT(HFGRTR, NSMPRI_EL1),
+ DO_REV_BIT(HFGRTR, NTPIDR2_EL0),
+
+ /* Trap bits in HDFGRTR_EL2 / HDFGWTR_EL2, starting from bit 0. */
+ DO_BIT(HDFGRTR, DBGBCRN_EL1),
+ DO_BIT(HDFGRTR, DBGBVRN_EL1),
+ DO_BIT(HDFGRTR, DBGWCRN_EL1),
+ DO_BIT(HDFGRTR, DBGWVRN_EL1),
+ DO_BIT(HDFGRTR, MDSCR_EL1),
+ DO_BIT(HDFGRTR, DBGCLAIM),
+ DO_BIT(HDFGWTR, OSLAR_EL1),
+ DO_BIT(HDFGRTR, OSLSR_EL1),
+ DO_BIT(HDFGRTR, OSECCR_EL1),
+ DO_BIT(HDFGRTR, OSDLR_EL1),
+ DO_BIT(HDFGRTR, PMEVCNTRN_EL0),
+ DO_BIT(HDFGRTR, PMEVTYPERN_EL0),
+ DO_BIT(HDFGRTR, PMCCFILTR_EL0),
+ DO_BIT(HDFGRTR, PMCCNTR_EL0),
+ DO_BIT(HDFGRTR, PMCNTEN),
+ DO_BIT(HDFGRTR, PMINTEN),
+ DO_BIT(HDFGRTR, PMOVS),
+ DO_BIT(HDFGRTR, PMSELR_EL0),
+ DO_BIT(HDFGWTR, PMSWINC_EL0),
+ DO_BIT(HDFGWTR, PMCR_EL0),
+ DO_BIT(HDFGRTR, PMMIR_EL1),
+ DO_BIT(HDFGRTR, PMCEIDN_EL0),
+
+ /* Trap bits in HFGITR_EL2, starting from bit 0 */
+ DO_BIT(HFGITR, ICIALLUIS),
+ DO_BIT(HFGITR, ICIALLU),
+ DO_BIT(HFGITR, ICIVAU),
+ DO_BIT(HFGITR, DCIVAC),
+ DO_BIT(HFGITR, DCISW),
+ DO_BIT(HFGITR, DCCSW),
+ DO_BIT(HFGITR, DCCISW),
+ DO_BIT(HFGITR, DCCVAU),
+ DO_BIT(HFGITR, DCCVAP),
+ DO_BIT(HFGITR, DCCVADP),
+ DO_BIT(HFGITR, DCCIVAC),
+ DO_BIT(HFGITR, DCZVA),
+ DO_BIT(HFGITR, ATS1E1R),
+ DO_BIT(HFGITR, ATS1E1W),
+ DO_BIT(HFGITR, ATS1E0R),
+ DO_BIT(HFGITR, ATS1E0W),
+ DO_BIT(HFGITR, ATS1E1RP),
+ DO_BIT(HFGITR, ATS1E1WP),
+ DO_BIT(HFGITR, TLBIVMALLE1OS),
+ DO_BIT(HFGITR, TLBIVAE1OS),
+ DO_BIT(HFGITR, TLBIASIDE1OS),
+ DO_BIT(HFGITR, TLBIVAAE1OS),
+ DO_BIT(HFGITR, TLBIVALE1OS),
+ DO_BIT(HFGITR, TLBIVAALE1OS),
+ DO_BIT(HFGITR, TLBIRVAE1OS),
+ DO_BIT(HFGITR, TLBIRVAAE1OS),
+ DO_BIT(HFGITR, TLBIRVALE1OS),
+ DO_BIT(HFGITR, TLBIRVAALE1OS),
+ DO_BIT(HFGITR, TLBIVMALLE1IS),
+ DO_BIT(HFGITR, TLBIVAE1IS),
+ DO_BIT(HFGITR, TLBIASIDE1IS),
+ DO_BIT(HFGITR, TLBIVAAE1IS),
+ DO_BIT(HFGITR, TLBIVALE1IS),
+ DO_BIT(HFGITR, TLBIVAALE1IS),
+ DO_BIT(HFGITR, TLBIRVAE1IS),
+ DO_BIT(HFGITR, TLBIRVAAE1IS),
+ DO_BIT(HFGITR, TLBIRVALE1IS),
+ DO_BIT(HFGITR, TLBIRVAALE1IS),
+ DO_BIT(HFGITR, TLBIRVAE1),
+ DO_BIT(HFGITR, TLBIRVAAE1),
+ DO_BIT(HFGITR, TLBIRVALE1),
+ DO_BIT(HFGITR, TLBIRVAALE1),
+ DO_BIT(HFGITR, TLBIVMALLE1),
+ DO_BIT(HFGITR, TLBIVAE1),
+ DO_BIT(HFGITR, TLBIASIDE1),
+ DO_BIT(HFGITR, TLBIVAAE1),
+ DO_BIT(HFGITR, TLBIVALE1),
+ DO_BIT(HFGITR, TLBIVAALE1),
+ DO_BIT(HFGITR, CFPRCTX),
+ DO_BIT(HFGITR, DVPRCTX),
+ DO_BIT(HFGITR, CPPRCTX),
+ DO_BIT(HFGITR, DCCVAC),
+} FGTBit;
+
+#undef DO_BIT
+#undef DO_REV_BIT
+
+typedef struct ARMCPRegInfo ARMCPRegInfo;
+
+/*
+ * Access functions for coprocessor registers. These cannot fail and
+ * may not raise exceptions.
+ */
+typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque);
+typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque,
+ uint64_t value);
+/* Access permission check functions for coprocessor registers. */
+typedef CPAccessResult CPAccessFn(CPUARMState *env,
+ const ARMCPRegInfo *opaque,
+ bool isread);
+/* Hook function for register reset */
+typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque);
+
+#define CP_ANY 0xff
+
+/* Flags in the high bits of nv2_redirect_offset */
+#define NV2_REDIR_NV1 0x4000 /* Only redirect when HCR_EL2.NV1 == 1 */
+#define NV2_REDIR_NO_NV1 0x8000 /* Only redirect when HCR_EL2.NV1 == 0 */
+#define NV2_REDIR_FLAG_MASK 0xc000
+
+/* Definition of an ARM coprocessor register */
+struct ARMCPRegInfo {
+ /* Name of register (useful mainly for debugging, need not be unique) */
+ const char *name;
+ /*
+ * Location of register: coprocessor number and (crn,crm,opc1,opc2)
+ * tuple. Any of crm, opc1 and opc2 may be CP_ANY to indicate a
+ * 'wildcard' field -- any value of that field in the MRC/MCR insn
+ * will be decoded to this register. The register read and write
+ * callbacks will be passed an ARMCPRegInfo with the crn/crm/opc1/opc2
+ * used by the program, so it is possible to register a wildcard and
+ * then behave differently on read/write if necessary.
+ * For 64 bit registers, only crm and opc1 are relevant; crn and opc2
+ * must both be zero.
+ * For AArch64-visible registers, opc0 is also used.
+ * Since there are no "coprocessors" in AArch64, cp is purely used as a
+ * way to distinguish (for KVM's benefit) guest-visible system registers
+ * from demuxed ones provided to preserve the "no side effects on
+ * KVM register read/write from QEMU" semantics. cp==0x13 is guest
+ * visible (to match KVM's encoding); cp==0 will be converted to
+ * cp==0x13 when the ARMCPRegInfo is registered, for convenience.
+ */
+ uint8_t cp;
+ uint8_t crn;
+ uint8_t crm;
+ uint8_t opc0;
+ uint8_t opc1;
+ uint8_t opc2;
+ /* Execution state in which this register is visible: ARM_CP_STATE_* */
+ CPState state;
+ /* Register type: ARM_CP_* bits/values */
+ int type;
+ /* Access rights: PL*_[RW] */
+ CPAccessRights access;
+ /* Security state: ARM_CP_SECSTATE_* bits/values */
+ CPSecureState secure;
+ /*
+ * Which fine-grained trap register bit to check, if any. This
+ * value encodes both the trap register and bit within it.
+ */
+ FGTBit fgt;
+
+ /*
+ * Offset from VNCR_EL2 when FEAT_NV2 redirects access to memory;
+ * may include an NV2_REDIR_* flag.
+ */
+ uint32_t nv2_redirect_offset;
+
+ /*
+ * The opaque pointer passed to define_arm_cp_regs_with_opaque() when
+ * this register was defined: can be used to hand data through to the
+ * register read/write functions, since they are passed the ARMCPRegInfo*.
+ */
+ void *opaque;
+ /*
+ * Value of this register, if it is ARM_CP_CONST. Otherwise, if
+ * fieldoffset is non-zero, the reset value of the register.
+ */
+ uint64_t resetvalue;
+ /*
+ * Offset of the field in CPUARMState for this register.
+ * This is not needed if either:
+ * 1. type is ARM_CP_CONST or one of the ARM_CP_SPECIALs
+ * 2. both readfn and writefn are specified
+ */
+ ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */
+
+ /*
+ * Offsets of the secure and non-secure fields in CPUARMState for the
+ * register if it is banked. These fields are only used during the static
+ * registration of a register. During hashing the bank associated
+ * with a given security state is copied to fieldoffset which is used from
+ * there on out.
+ *
+ * It is expected that register definitions use either fieldoffset or
+ * bank_fieldoffsets in the definition but not both. It is also expected
+ * that both bank offsets are set when defining a banked register. This
+ * use indicates that a register is banked.
+ */
+ ptrdiff_t bank_fieldoffsets[2];
+
+ /*
+ * Function for making any access checks for this register in addition to
+ * those specified by the 'access' permissions bits. If NULL, no extra
+ * checks required. The access check is performed at runtime, not at
+ * translate time.
+ */
+ CPAccessFn *accessfn;
+ /*
+ * Function for handling reads of this register. If NULL, then reads
+ * will be done by loading from the offset into CPUARMState specified
+ * by fieldoffset.
+ */
+ CPReadFn *readfn;
+ /*
+ * Function for handling writes of this register. If NULL, then writes
+ * will be done by writing to the offset into CPUARMState specified
+ * by fieldoffset.
+ */
+ CPWriteFn *writefn;
+ /*
+ * Function for doing a "raw" read; used when we need to copy
+ * coprocessor state to the kernel for KVM or out for
+ * migration. This only needs to be provided if there is also a
+ * readfn and it has side effects (for instance clear-on-read bits).
+ */
+ CPReadFn *raw_readfn;
+ /*
+ * Function for doing a "raw" write; used when we need to copy KVM
+ * kernel coprocessor state into userspace, or for inbound
+ * migration. This only needs to be provided if there is also a
+ * writefn and it masks out "unwritable" bits or has write-one-to-clear
+ * or similar behaviour.
+ */
+ CPWriteFn *raw_writefn;
+ /*
+ * Function for resetting the register. If NULL, then reset will be done
+ * by writing resetvalue to the field specified in fieldoffset. If
+ * fieldoffset is 0 then no reset will be done.
+ */
+ CPResetFn *resetfn;
+
+ /*
+ * "Original" readfn, writefn, accessfn.
+ * For ARMv8.1-VHE register aliases, we overwrite the read/write
+ * accessor functions of various EL1/EL0 to perform the runtime
+ * check for which sysreg should actually be modified, and then
+ * forwards the operation. Before overwriting the accessors,
+ * the original function is copied here, so that accesses that
+ * really do go to the EL1/EL0 version proceed normally.
+ * (The corresponding EL2 register is linked via opaque.)
+ */
+ CPReadFn *orig_readfn;
+ CPWriteFn *orig_writefn;
+ CPAccessFn *orig_accessfn;
+};
+
+/*
+ * Macros which are lvalues for the field in CPUARMState for the
+ * ARMCPRegInfo *ri.
+ */
+#define CPREG_FIELD32(env, ri) \
+ (*(uint32_t *)((char *)(env) + (ri)->fieldoffset))
+#define CPREG_FIELD64(env, ri) \
+ (*(uint64_t *)((char *)(env) + (ri)->fieldoffset))
+
+void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu, const ARMCPRegInfo *reg,
+ void *opaque);
+
+static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
+{
+ define_one_arm_cp_reg_with_opaque(cpu, regs, NULL);
+}
+
+void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs,
+ void *opaque, size_t len);
+
+#define define_arm_cp_regs_with_opaque(CPU, REGS, OPAQUE) \
+ do { \
+ QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \
+ define_arm_cp_regs_with_opaque_len(CPU, REGS, OPAQUE, \
+ ARRAY_SIZE(REGS)); \
+ } while (0)
+
+#define define_arm_cp_regs(CPU, REGS) \
+ define_arm_cp_regs_with_opaque(CPU, REGS, NULL)
+
+const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);
+
+/*
+ * Definition of an ARM co-processor register as viewed from
+ * userspace. This is used for presenting sanitised versions of
+ * registers to userspace when emulating the Linux AArch64 CPU
+ * ID/feature ABI (advertised as HWCAP_CPUID).
+ */
+typedef struct ARMCPRegUserSpaceInfo {
+ /* Name of register */
+ const char *name;
+
+ /* Is the name actually a glob pattern */
+ bool is_glob;
+
+ /* Only some bits are exported to user space */
+ uint64_t exported_bits;
+
+ /* Fixed bits are applied after the mask */
+ uint64_t fixed_bits;
+} ARMCPRegUserSpaceInfo;
+
+void modify_arm_cp_regs_with_len(ARMCPRegInfo *regs, size_t regs_len,
+ const ARMCPRegUserSpaceInfo *mods,
+ size_t mods_len);
+
+#define modify_arm_cp_regs(REGS, MODS) \
+ do { \
+ QEMU_BUILD_BUG_ON(ARRAY_SIZE(REGS) == 0); \
+ QEMU_BUILD_BUG_ON(ARRAY_SIZE(MODS) == 0); \
+ modify_arm_cp_regs_with_len(REGS, ARRAY_SIZE(REGS), \
+ MODS, ARRAY_SIZE(MODS)); \
+ } while (0)
+
+/* CPWriteFn that can be used to implement writes-ignored behaviour */
+void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value);
+/* CPReadFn that can be used for read-as-zero behaviour */
+uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri);
+
+/* CPWriteFn that just writes the value to ri->fieldoffset */
+void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value);
+
+/*
+ * CPResetFn that does nothing, for use if no reset is required even
+ * if fieldoffset is non zero.
+ */
+void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque);
+
+/*
+ * Return true if this reginfo struct's field in the cpu state struct
+ * is 64 bits wide.
+ */
+static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri)
+{
+ return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT);
+}
+
+static inline bool cp_access_ok(int current_el,
+ const ARMCPRegInfo *ri, int isread)
+{
+ return (ri->access >> ((current_el * 2) + isread)) & 1;
+}
+
+/* Raw read of a coprocessor register (as needed for migration, etc) */
+uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri);
+
+/*
+ * Return true if the cp register encoding is in the "feature ID space" as
+ * defined by FEAT_IDST (and thus should be reported with ER_ELx.EC
+ * as EC_SYSTEMREGISTERTRAP rather than EC_UNCATEGORIZED).
+ */
+static inline bool arm_cpreg_encoding_in_idspace(uint8_t opc0, uint8_t opc1,
+ uint8_t opc2,
+ uint8_t crn, uint8_t crm)
+{
+ return opc0 == 3 && (opc1 == 0 || opc1 == 1 || opc1 == 3) &&
+ crn == 0 && crm < 8;
+}
+
+/*
+ * As arm_cpreg_encoding_in_idspace(), but take the encoding from an
+ * ARMCPRegInfo.
+ */
+static inline bool arm_cpreg_in_idspace(const ARMCPRegInfo *ri)
+{
+ return ri->state == ARM_CP_STATE_AA64 &&
+ arm_cpreg_encoding_in_idspace(ri->opc0, ri->opc1, ri->opc2,
+ ri->crn, ri->crm);
+}
+
+#ifdef CONFIG_USER_ONLY
+static inline void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) { }
+#else
+void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu);
+#endif
+
+CPAccessResult access_tvm_trvm(CPUARMState *, const ARMCPRegInfo *, bool);
+
+/**
+ * arm_cpreg_trap_in_nv: Return true if cpreg traps in nested virtualization
+ *
+ * Return true if this cpreg is one which should be trapped to EL2 if
+ * it is executed at EL1 when nested virtualization is enabled via HCR_EL2.NV.
+ */
+static inline bool arm_cpreg_traps_in_nv(const ARMCPRegInfo *ri)
+{
+ /*
+ * The Arm ARM defines the registers to be trapped in terms of
+ * their names (I_TZTZL). However the underlying principle is "if
+ * it would UNDEF at EL1 but work at EL2 then it should trap", and
+ * the way the encoding of sysregs and system instructions is done
+ * means that the right set of registers is exactly those where
+ * the opc1 field is 4 or 5. (You can see this also in the assert
+ * we do that the opc1 field and the permissions mask line up in
+ * define_one_arm_cp_reg_with_opaque().)
+ * Checking the opc1 field is easier for us and avoids the problem
+ * that we do not consistently use the right architectural names
+ * for all sysregs, since we treat the name field as largely for debug.
+ *
+ * However we do this check, it is going to be at least potentially
+ * fragile to future new sysregs, but this seems the least likely
+ * to break.
+ *
+ * In particular, note that the released sysreg XML defines that
+ * the FEAT_MEC sysregs and instructions do not follow this FEAT_NV
+ * trapping rule, so we will need to add an ARM_CP_* flag to indicate
+ * "register does not trap on NV" to handle those if/when we implement
+ * FEAT_MEC.
+ */
+ return ri->opc1 == 4 || ri->opc1 == 5;
+}
+
+#endif /* TARGET_ARM_CPREGS_H */
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
new file mode 100644
index 0000000000..e5758d9fbc
--- /dev/null
+++ b/target/arm/cpu-features.h
@@ -0,0 +1,1021 @@
+/*
+ * QEMU Arm CPU -- feature test functions
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_ARM_FEATURES_H
+#define TARGET_ARM_FEATURES_H
+
+#include "hw/registerfields.h"
+
+/*
+ * Naming convention for isar_feature functions:
+ * Functions which test 32-bit ID registers should have _aa32_ in
+ * their name. Functions which test 64-bit ID registers should have
+ * _aa64_ in their name. These must only be used in code where we
+ * know for certain that the CPU has AArch32 or AArch64 respectively
+ * or where the correct answer for a CPU which doesn't implement that
+ * CPU state is "false" (eg when generating A32 or A64 code, if adding
+ * system registers that are specific to that CPU state, for "should
+ * we let this system register bit be set" tests where the 32-bit
+ * flavour of the register doesn't have the bit, and so on).
+ * Functions which simply ask "does this feature exist at all" have
+ * _any_ in their name, and always return the logical OR of the _aa64_
+ * and the _aa32_ function.
+ */
+
+/*
+ * 32-bit feature tests via id registers.
+ */
+static inline bool isar_feature_aa32_thumb_div(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0;
+}
+
+static inline bool isar_feature_aa32_arm_div(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1;
+}
+
+static inline bool isar_feature_aa32_lob(const ARMISARegisters *id)
+{
+ /* (M-profile) low-overhead loops and branch future */
+ return FIELD_EX32(id->id_isar0, ID_ISAR0, CMPBRANCH) >= 3;
+}
+
+static inline bool isar_feature_aa32_jazelle(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0;
+}
+
+static inline bool isar_feature_aa32_aes(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0;
+}
+
+static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1;
+}
+
+static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0;
+}
+
+static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0;
+}
+
+static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0;
+}
+
+static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0;
+}
+
+static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0;
+}
+
+static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0;
+}
+
+static inline bool isar_feature_aa32_dp(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0;
+}
+
+static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0;
+}
+
+static inline bool isar_feature_aa32_sb(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0;
+}
+
+static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0;
+}
+
+static inline bool isar_feature_aa32_bf16(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, BF16) != 0;
+}
+
+static inline bool isar_feature_aa32_i8mm(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_isar6, ID_ISAR6, I8MM) != 0;
+}
+
+static inline bool isar_feature_aa32_ras(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_pfr0, ID_PFR0, RAS) != 0;
+}
+
+static inline bool isar_feature_aa32_mprofile(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_pfr1, ID_PFR1, MPROGMOD) != 0;
+}
+
+static inline bool isar_feature_aa32_m_sec_state(const ARMISARegisters *id)
+{
+ /*
+ * Return true if M-profile state handling insns
+ * (VSCCLRM, CLRM, FPCTX access insns) are implemented
+ */
+ return FIELD_EX32(id->id_pfr1, ID_PFR1, SECURITY) >= 3;
+}
+
+static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id)
+{
+ /* Sadly this is encoded differently for A-profile and M-profile */
+ if (isar_feature_aa32_mprofile(id)) {
+ return FIELD_EX32(id->mvfr1, MVFR1, FP16) > 0;
+ } else {
+ return FIELD_EX32(id->mvfr1, MVFR1, FPHP) >= 3;
+ }
+}
+
+static inline bool isar_feature_aa32_mve(const ARMISARegisters *id)
+{
+ /*
+ * Return true if MVE is supported (either integer or floating point).
+ * We must check for M-profile as the MVFR1 field means something
+ * else for A-profile.
+ */
+ return isar_feature_aa32_mprofile(id) &&
+ FIELD_EX32(id->mvfr1, MVFR1, MVE) > 0;
+}
+
+static inline bool isar_feature_aa32_mve_fp(const ARMISARegisters *id)
+{
+ /*
+ * Return true if MVE is supported (either integer or floating point).
+ * We must check for M-profile as the MVFR1 field means something
+ * else for A-profile.
+ */
+ return isar_feature_aa32_mprofile(id) &&
+ FIELD_EX32(id->mvfr1, MVFR1, MVE) >= 2;
+}
+
+static inline bool isar_feature_aa32_vfp_simd(const ARMISARegisters *id)
+{
+ /*
+ * Return true if either VFP or SIMD is implemented.
+ * In this case, a minimum of VFP w/ D0-D15.
+ */
+ return FIELD_EX32(id->mvfr0, MVFR0, SIMDREG) > 0;
+}
+
+static inline bool isar_feature_aa32_simd_r32(const ARMISARegisters *id)
+{
+ /* Return true if D16-D31 are implemented */
+ return FIELD_EX32(id->mvfr0, MVFR0, SIMDREG) >= 2;
+}
+
+static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->mvfr0, MVFR0, FPSHVEC) > 0;
+}
+
+static inline bool isar_feature_aa32_fpsp_v2(const ARMISARegisters *id)
+{
+ /* Return true if CPU supports single precision floating point, VFPv2 */
+ return FIELD_EX32(id->mvfr0, MVFR0, FPSP) > 0;
+}
+
+static inline bool isar_feature_aa32_fpsp_v3(const ARMISARegisters *id)
+{
+ /* Return true if CPU supports single precision floating point, VFPv3 */
+ return FIELD_EX32(id->mvfr0, MVFR0, FPSP) >= 2;
+}
+
+static inline bool isar_feature_aa32_fpdp_v2(const ARMISARegisters *id)
+{
+ /* Return true if CPU supports double precision floating point, VFPv2 */
+ return FIELD_EX32(id->mvfr0, MVFR0, FPDP) > 0;
+}
+
+static inline bool isar_feature_aa32_fpdp_v3(const ARMISARegisters *id)
+{
+ /* Return true if CPU supports double precision floating point, VFPv3 */
+ return FIELD_EX32(id->mvfr0, MVFR0, FPDP) >= 2;
+}
+
+static inline bool isar_feature_aa32_vfp(const ARMISARegisters *id)
+{
+ return isar_feature_aa32_fpsp_v2(id) || isar_feature_aa32_fpdp_v2(id);
+}
+
+/*
+ * We always set the FP and SIMD FP16 fields to indicate identical
+ * levels of support (assuming SIMD is implemented at all), so
+ * we only need one set of accessors.
+ */
+static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 0;
+}
+
+static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->mvfr1, MVFR1, FPHP) > 1;
+}
+
+/*
+ * Note that this ID register field covers both VFP and Neon FMAC,
+ * so should usually be tested in combination with some other
+ * check that confirms the presence of whichever of VFP or Neon is
+ * relevant, to avoid accidentally enabling a Neon feature on
+ * a VFP-no-Neon core or vice-versa.
+ */
+static inline bool isar_feature_aa32_simdfmac(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->mvfr1, MVFR1, SIMDFMAC) != 0;
+}
+
+static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 1;
+}
+
+static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 2;
+}
+
+static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 3;
+}
+
+static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->mvfr2, MVFR2, FPMISC) >= 4;
+}
+
+static inline bool isar_feature_aa32_pxn(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr0, ID_MMFR0, VMSA) >= 4;
+}
+
+static inline bool isar_feature_aa32_pan(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0;
+}
+
+static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2;
+}
+
+static inline bool isar_feature_aa32_pmuv3p1(const ARMISARegisters *id)
+{
+ /* 0xf means "non-standard IMPDEF PMU" */
+ return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 &&
+ FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf;
+}
+
+static inline bool isar_feature_aa32_pmuv3p4(const ARMISARegisters *id)
+{
+ /* 0xf means "non-standard IMPDEF PMU" */
+ return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 5 &&
+ FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf;
+}
+
+static inline bool isar_feature_aa32_pmuv3p5(const ARMISARegisters *id)
+{
+ /* 0xf means "non-standard IMPDEF PMU" */
+ return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 6 &&
+ FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf;
+}
+
+static inline bool isar_feature_aa32_hpd(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, HPDS) != 0;
+}
+
+static inline bool isar_feature_aa32_ac2(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, AC2) != 0;
+}
+
+static inline bool isar_feature_aa32_ccidx(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, CCIDX) != 0;
+}
+
+static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0;
+}
+
+static inline bool isar_feature_aa32_half_evt(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 1;
+}
+
+static inline bool isar_feature_aa32_evt(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 2;
+}
+
+static inline bool isar_feature_aa32_dit(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0;
+}
+
+static inline bool isar_feature_aa32_ssbs(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_pfr2, ID_PFR2, SSBS) != 0;
+}
+
+static inline bool isar_feature_aa32_debugv7p1(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 5;
+}
+
+static inline bool isar_feature_aa32_debugv8p2(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->id_dfr0, ID_DFR0, COPDBG) >= 8;
+}
+
+static inline bool isar_feature_aa32_doublelock(const ARMISARegisters *id)
+{
+ return FIELD_EX32(id->dbgdevid, DBGDEVID, DOUBLELOCK) > 0;
+}
+
+/*
+ * 64-bit feature tests via id registers.
+ */
+static inline bool isar_feature_aa64_aes(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0;
+}
+
+static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1;
+}
+
+static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0;
+}
+
+static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0;
+}
+
+static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1;
+}
+
+static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0;
+}
+
+static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0;
+}
+
+static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0;
+}
+
+static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0;
+}
+
+static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0;
+}
+
+static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0;
+}
+
+static inline bool isar_feature_aa64_dp(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0;
+}
+
+static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0;
+}
+
+static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0;
+}
+
+static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2;
+}
+
+static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0;
+}
+
+static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2;
+}
+
+static inline bool isar_feature_aa64_tlbios(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) != 0;
+}
+
+static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0;
+}
+
+static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0;
+}
+
+/*
+ * These are the values from APA/API/APA3.
+ * In general these must be compared '>=', per the normal Arm ARM
+ * treatment of fields in ID registers.
+ */
+typedef enum {
+ PauthFeat_None = 0,
+ PauthFeat_1 = 1,
+ PauthFeat_EPAC = 2,
+ PauthFeat_2 = 3,
+ PauthFeat_FPAC = 4,
+ PauthFeat_FPACCOMBINED = 5,
+} ARMPauthFeature;
+
+static inline ARMPauthFeature
+isar_feature_pauth_feature(const ARMISARegisters *id)
+{
+ /*
+ * Architecturally, only one of {APA,API,APA3} may be active (non-zero)
+ * and the other two must be zero. Thus we may avoid conditionals.
+ */
+ return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) |
+ FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, API) |
+ FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3));
+}
+
+static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id)
+{
+ /*
+ * Return true if any form of pauth is enabled, as this
+ * predicate controls migration of the 128-bit keys.
+ */
+ return isar_feature_pauth_feature(id) != PauthFeat_None;
+}
+
+static inline bool isar_feature_aa64_pauth_qarma5(const ARMISARegisters *id)
+{
+ /*
+ * Return true if pauth is enabled with the architected QARMA5 algorithm.
+ * QEMU will always enable or disable both APA and GPA.
+ */
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0;
+}
+
+static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id)
+{
+ /*
+ * Return true if pauth is enabled with the architected QARMA3 algorithm.
+ * QEMU will always enable or disable both APA3 and GPA3.
+ */
+ return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3) != 0;
+}
+
+static inline bool isar_feature_aa64_sb(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0;
+}
+
+static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0;
+}
+
+static inline bool isar_feature_aa64_frint(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0;
+}
+
+static inline bool isar_feature_aa64_dcpop(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) != 0;
+}
+
+static inline bool isar_feature_aa64_dcpodp(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, DPB) >= 2;
+}
+
+static inline bool isar_feature_aa64_bf16(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, BF16) != 0;
+}
+
+static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0;
+}
+
+static inline bool isar_feature_aa64_rcpc_8_4(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) >= 2;
+}
+
+static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0;
+}
+
+static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0;
+}
+
+static inline bool isar_feature_aa64_mops(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS);
+}
+
+static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
+{
+ /* We always set the AdvSIMD and FP fields identically. */
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) != 0xf;
+}
+
+static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id)
+{
+ /* We always set the AdvSIMD and FP fields identically wrt FP16. */
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1;
+}
+
+static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2;
+}
+
+static inline bool isar_feature_aa64_aa32_el1(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL1) >= 2;
+}
+
+static inline bool isar_feature_aa64_aa32_el2(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL2) >= 2;
+}
+
+static inline bool isar_feature_aa64_ras(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) != 0;
+}
+
+static inline bool isar_feature_aa64_doublefault(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RAS) >= 2;
+}
+
+static inline bool isar_feature_aa64_sve(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0;
+}
+
+static inline bool isar_feature_aa64_sel2(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SEL2) != 0;
+}
+
+static inline bool isar_feature_aa64_rme(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, RME) != 0;
+}
+
+static inline bool isar_feature_aa64_dit(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0;
+}
+
+static inline bool isar_feature_aa64_scxtnum(const ARMISARegisters *id)
+{
+ int key = FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, CSV2);
+ if (key >= 2) {
+ return true; /* FEAT_CSV2_2 */
+ }
+ if (key == 1) {
+ key = FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, CSV2_FRAC);
+ return key >= 2; /* FEAT_CSV2_1p2 */
+ }
+ return false;
+}
+
+static inline bool isar_feature_aa64_ssbs(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SSBS) != 0;
+}
+
+static inline bool isar_feature_aa64_bti(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0;
+}
+
+static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0;
+}
+
+static inline bool isar_feature_aa64_mte(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2;
+}
+
+static inline bool isar_feature_aa64_mte3(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 3;
+}
+
+static inline bool isar_feature_aa64_sme(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, SME) != 0;
+}
+
+static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id)
+{
+ return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 1;
+}
+
+static inline bool isar_feature_aa64_tgran4_2_lpa2(const ARMISARegisters *id)
+{
+ unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2);
+ return t >= 3 || (t == 0 && isar_feature_aa64_tgran4_lpa2(id));
+}
+
+static inline bool isar_feature_aa64_tgran16_lpa2(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 2;
+}
+
+static inline bool isar_feature_aa64_tgran16_2_lpa2(const ARMISARegisters *id)
+{
+ unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2);
+ return t >= 3 || (t == 0 && isar_feature_aa64_tgran16_lpa2(id));
+}
+
+static inline bool isar_feature_aa64_tgran4(const ARMISARegisters *id)
+{
+ return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 0;
+}
+
+static inline bool isar_feature_aa64_tgran16(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16) >= 1;
+}
+
+static inline bool isar_feature_aa64_tgran64(const ARMISARegisters *id)
+{
+ return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64) >= 0;
+}
+
+static inline bool isar_feature_aa64_tgran4_2(const ARMISARegisters *id)
+{
+ unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4_2);
+ return t >= 2 || (t == 0 && isar_feature_aa64_tgran4(id));
+}
+
+static inline bool isar_feature_aa64_tgran16_2(const ARMISARegisters *id)
+{
+ unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN16_2);
+ return t >= 2 || (t == 0 && isar_feature_aa64_tgran16(id));
+}
+
+static inline bool isar_feature_aa64_tgran64_2(const ARMISARegisters *id)
+{
+ unsigned t = FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN64_2);
+ return t >= 2 || (t == 0 && isar_feature_aa64_tgran64(id));
+}
+
+static inline bool isar_feature_aa64_fgt(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, FGT) != 0;
+}
+
+static inline bool isar_feature_aa64_ecv_traps(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 0;
+}
+
+static inline bool isar_feature_aa64_ecv(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr0, ID_AA64MMFR0, ECV) > 1;
+}
+
+static inline bool isar_feature_aa64_vh(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, VH) != 0;
+}
+
+static inline bool isar_feature_aa64_lor(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0;
+}
+
+static inline bool isar_feature_aa64_pan(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0;
+}
+
+static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2;
+}
+
+static inline bool isar_feature_aa64_pan3(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 3;
+}
+
+static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0;
+}
+
+static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0;
+}
+
+static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0;
+}
+
+static inline bool isar_feature_aa64_hdbs(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) >= 2;
+}
+
+static inline bool isar_feature_aa64_tts2uxn(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0;
+}
+
+static inline bool isar_feature_aa64_uao(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0;
+}
+
+static inline bool isar_feature_aa64_st(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, ST) != 0;
+}
+
+static inline bool isar_feature_aa64_lse2(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, AT) != 0;
+}
+
+static inline bool isar_feature_aa64_fwb(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, FWB) != 0;
+}
+
+static inline bool isar_feature_aa64_ids(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, IDS) != 0;
+}
+
+static inline bool isar_feature_aa64_half_evt(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 1;
+}
+
+static inline bool isar_feature_aa64_evt(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 2;
+}
+
+static inline bool isar_feature_aa64_ccidx(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, CCIDX) != 0;
+}
+
+static inline bool isar_feature_aa64_lva(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, VARANGE) != 0;
+}
+
+static inline bool isar_feature_aa64_e0pd(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, E0PD) != 0;
+}
+
+static inline bool isar_feature_aa64_nv(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) != 0;
+}
+
+static inline bool isar_feature_aa64_nv2(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, NV) >= 2;
+}
+
+static inline bool isar_feature_aa64_pmuv3p1(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 &&
+ FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
+}
+
+static inline bool isar_feature_aa64_pmuv3p4(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 5 &&
+ FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
+}
+
+static inline bool isar_feature_aa64_pmuv3p5(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 6 &&
+ FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
+}
+
+static inline bool isar_feature_aa64_debugv8p2(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, DEBUGVER) >= 8;
+}
+
+static inline bool isar_feature_aa64_doublelock(const ARMISARegisters *id)
+{
+ return FIELD_SEX64(id->id_aa64dfr0, ID_AA64DFR0, DOUBLELOCK) >= 0;
+}
+
+static inline bool isar_feature_aa64_sve2(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SVEVER) != 0;
+}
+
+static inline bool isar_feature_aa64_sve2_aes(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) != 0;
+}
+
+static inline bool isar_feature_aa64_sve2_pmull128(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, AES) >= 2;
+}
+
+static inline bool isar_feature_aa64_sve2_bitperm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BITPERM) != 0;
+}
+
+static inline bool isar_feature_aa64_sve_bf16(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, BFLOAT16) != 0;
+}
+
+static inline bool isar_feature_aa64_sve2_sha3(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SHA3) != 0;
+}
+
+static inline bool isar_feature_aa64_sve2_sm4(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, SM4) != 0;
+}
+
+static inline bool isar_feature_aa64_sve_i8mm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, I8MM) != 0;
+}
+
+static inline bool isar_feature_aa64_sve_f32mm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F32MM) != 0;
+}
+
+static inline bool isar_feature_aa64_sve_f64mm(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64zfr0, ID_AA64ZFR0, F64MM) != 0;
+}
+
+static inline bool isar_feature_aa64_sme_f64f64(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, F64F64);
+}
+
+static inline bool isar_feature_aa64_sme_i16i64(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, I16I64) == 0xf;
+}
+
+static inline bool isar_feature_aa64_sme_fa64(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64smfr0, ID_AA64SMFR0, FA64);
+}
+
+/*
+ * Feature tests for "does this exist in either 32-bit or 64-bit?"
+ */
+static inline bool isar_feature_any_fp16(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_fp16(id) || isar_feature_aa32_fp16_arith(id);
+}
+
+static inline bool isar_feature_any_predinv(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_predinv(id) || isar_feature_aa32_predinv(id);
+}
+
+static inline bool isar_feature_any_pmuv3p1(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_pmuv3p1(id) || isar_feature_aa32_pmuv3p1(id);
+}
+
+static inline bool isar_feature_any_pmuv3p4(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_pmuv3p4(id) || isar_feature_aa32_pmuv3p4(id);
+}
+
+static inline bool isar_feature_any_pmuv3p5(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_pmuv3p5(id) || isar_feature_aa32_pmuv3p5(id);
+}
+
+static inline bool isar_feature_any_ccidx(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_ccidx(id) || isar_feature_aa32_ccidx(id);
+}
+
+static inline bool isar_feature_any_tts2uxn(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_tts2uxn(id) || isar_feature_aa32_tts2uxn(id);
+}
+
+static inline bool isar_feature_any_debugv8p2(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_debugv8p2(id) || isar_feature_aa32_debugv8p2(id);
+}
+
+static inline bool isar_feature_any_ras(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_ras(id) || isar_feature_aa32_ras(id);
+}
+
+static inline bool isar_feature_any_half_evt(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_half_evt(id) || isar_feature_aa32_half_evt(id);
+}
+
+static inline bool isar_feature_any_evt(const ARMISARegisters *id)
+{
+ return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id);
+}
+
+/*
+ * Forward to the above feature tests given an ARMCPU pointer.
+ */
+#define cpu_isar_feature(name, cpu) \
+ ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); })
+
+#endif
diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h
new file mode 100644
index 0000000000..da3243ab21
--- /dev/null
+++ b/target/arm/cpu-param.h
@@ -0,0 +1,40 @@
+/*
+ * ARM cpu parameters for qemu.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * SPDX-License-Identifier: LGPL-2.0+
+ */
+
+#ifndef ARM_CPU_PARAM_H
+#define ARM_CPU_PARAM_H
+
+#ifdef TARGET_AARCH64
+# define TARGET_LONG_BITS 64
+# define TARGET_PHYS_ADDR_SPACE_BITS 52
+# define TARGET_VIRT_ADDR_SPACE_BITS 52
+#else
+# define TARGET_LONG_BITS 32
+# define TARGET_PHYS_ADDR_SPACE_BITS 40
+# define TARGET_VIRT_ADDR_SPACE_BITS 32
+#endif
+
+#ifdef CONFIG_USER_ONLY
+# ifdef TARGET_AARCH64
+# define TARGET_TAGGED_ADDRESSES
+/* Allow user-only to vary page size from 4k */
+# define TARGET_PAGE_BITS_VARY
+# define TARGET_PAGE_BITS_MIN 12
+# else
+# define TARGET_PAGE_BITS 12
+# endif
+#else
+/*
+ * ARMv7 and later CPUs have 4K pages minimum, but ARMv5 and v6
+ * have to support 1K tiny pages.
+ */
+# define TARGET_PAGE_BITS_VARY
+# define TARGET_PAGE_BITS_MIN 10
+
+#endif
+
+#endif
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index d135ff8e06..8e032691db 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -1,5 +1,5 @@
/*
- * QEMU ARM CPU
+ * QEMU ARM CPU QOM header (target agnostic)
*
* Copyright (c) 2012 SUSE LINUX Products GmbH
*
@@ -20,73 +20,41 @@
#ifndef QEMU_ARM_CPU_QOM_H
#define QEMU_ARM_CPU_QOM_H
-#include "qom/cpu.h"
-
-struct arm_boot_info;
+#include "hw/core/cpu.h"
#define TYPE_ARM_CPU "arm-cpu"
-#define ARM_CPU_CLASS(klass) \
- OBJECT_CLASS_CHECK(ARMCPUClass, (klass), TYPE_ARM_CPU)
-#define ARM_CPU(obj) \
- OBJECT_CHECK(ARMCPU, (obj), TYPE_ARM_CPU)
-#define ARM_CPU_GET_CLASS(obj) \
- OBJECT_GET_CLASS(ARMCPUClass, (obj), TYPE_ARM_CPU)
+OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU)
#define TYPE_ARM_MAX_CPU "max-" TYPE_ARM_CPU
-/**
- * ARMCPUClass:
- * @parent_realize: The parent class' realize handler.
- * @parent_reset: The parent class' reset handler.
- *
- * An ARM CPU model.
- */
-typedef struct ARMCPUClass {
- /*< private >*/
- CPUClass parent_class;
- /*< public >*/
-
- DeviceRealize parent_realize;
- void (*parent_reset)(CPUState *cpu);
-} ARMCPUClass;
-
-typedef struct ARMCPU ARMCPU;
-
#define TYPE_AARCH64_CPU "aarch64-cpu"
-#define AARCH64_CPU_CLASS(klass) \
- OBJECT_CLASS_CHECK(AArch64CPUClass, (klass), TYPE_AARCH64_CPU)
-#define AARCH64_CPU_GET_CLASS(obj) \
- OBJECT_GET_CLASS(AArch64CPUClass, (obj), TYPE_AArch64_CPU)
-
-typedef struct AArch64CPUClass {
- /*< private >*/
- ARMCPUClass parent_class;
- /*< public >*/
-} AArch64CPUClass;
-
-void register_cp_regs_for_features(ARMCPU *cpu);
-void init_cpreg_list(ARMCPU *cpu);
-
-/* Callback functions for the generic timer's timers. */
-void arm_gt_ptimer_cb(void *opaque);
-void arm_gt_vtimer_cb(void *opaque);
-void arm_gt_htimer_cb(void *opaque);
-void arm_gt_stimer_cb(void *opaque);
-
-#define ARM_AFF0_SHIFT 0
-#define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT)
-#define ARM_AFF1_SHIFT 8
-#define ARM_AFF1_MASK (0xFFULL << ARM_AFF1_SHIFT)
-#define ARM_AFF2_SHIFT 16
-#define ARM_AFF2_MASK (0xFFULL << ARM_AFF2_SHIFT)
-#define ARM_AFF3_SHIFT 32
-#define ARM_AFF3_MASK (0xFFULL << ARM_AFF3_SHIFT)
-#define ARM_DEFAULT_CPUS_PER_CLUSTER 8
-
-#define ARM32_AFFINITY_MASK (ARM_AFF0_MASK|ARM_AFF1_MASK|ARM_AFF2_MASK)
-#define ARM64_AFFINITY_MASK \
- (ARM_AFF0_MASK|ARM_AFF1_MASK|ARM_AFF2_MASK|ARM_AFF3_MASK)
-#define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK)
+typedef struct AArch64CPUClass AArch64CPUClass;
+DECLARE_CLASS_CHECKERS(AArch64CPUClass, AARCH64_CPU,
+ TYPE_AARCH64_CPU)
+
+#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU
+#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
+
+/* Meanings of the ARMCPU object's four inbound GPIO lines */
+#define ARM_CPU_IRQ 0
+#define ARM_CPU_FIQ 1
+#define ARM_CPU_VIRQ 2
+#define ARM_CPU_VFIQ 3
+
+/* For M profile, some registers are banked secure vs non-secure;
+ * these are represented as a 2-element array where the first element
+ * is the non-secure copy and the second is the secure copy.
+ * When the CPU does not have implement the security extension then
+ * only the first element is used.
+ * This means that the copy for the current security state can be
+ * accessed via env->registerfield[env->v7m.secure] (whether the security
+ * extension is implemented or not).
+ */
+enum {
+ M_REG_NS = 0,
+ M_REG_S = 1,
+ M_REG_NUM_BANKS = 2,
+};
#endif
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index cd48ad42d8..ab8d007a86 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -19,31 +19,109 @@
*/
#include "qemu/osdep.h"
+#include "qemu/qemu-print.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
+#include "exec/page-vary.h"
#include "target/arm/idau.h"
-#include "qemu/error-report.h"
+#include "qemu/module.h"
#include "qapi/error.h"
#include "cpu.h"
+#ifdef CONFIG_TCG
+#include "hw/core/tcg-cpu-ops.h"
+#endif /* CONFIG_TCG */
#include "internals.h"
-#include "qemu-common.h"
+#include "cpu-features.h"
#include "exec/exec-all.h"
#include "hw/qdev-properties.h"
#if !defined(CONFIG_USER_ONLY)
#include "hw/loader.h"
-#endif
-#include "hw/arm/arm.h"
-#include "sysemu/sysemu.h"
+#include "hw/boards.h"
+#ifdef CONFIG_TCG
+#include "hw/intc/armv7m_nvic.h"
+#endif /* CONFIG_TCG */
+#endif /* !CONFIG_USER_ONLY */
+#include "sysemu/tcg.h"
+#include "sysemu/qtest.h"
#include "sysemu/hw_accel.h"
#include "kvm_arm.h"
#include "disas/capstone.h"
#include "fpu/softfloat.h"
+#include "cpregs.h"
+#include "target/arm/cpu-qom.h"
+#include "target/arm/gtimer.h"
static void arm_cpu_set_pc(CPUState *cs, vaddr value)
{
ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
- cpu->env.regs[15] = value;
+ if (is_a64(env)) {
+ env->pc = value;
+ env->thumb = false;
+ } else {
+ env->regs[15] = value & ~1;
+ env->thumb = value & 1;
+ }
}
+static vaddr arm_cpu_get_pc(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ if (is_a64(env)) {
+ return env->pc;
+ } else {
+ return env->regs[15];
+ }
+}
+
+#ifdef CONFIG_TCG
+void arm_cpu_synchronize_from_tb(CPUState *cs,
+ const TranslationBlock *tb)
+{
+ /* The program counter is always up to date with CF_PCREL. */
+ if (!(tb_cflags(tb) & CF_PCREL)) {
+ CPUARMState *env = cpu_env(cs);
+ /*
+ * It's OK to look at env for the current mode here, because it's
+ * never possible for an AArch64 TB to chain to an AArch32 TB.
+ */
+ if (is_a64(env)) {
+ env->pc = tb->pc;
+ } else {
+ env->regs[15] = tb->pc;
+ }
+ }
+}
+
+void arm_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data)
+{
+ CPUARMState *env = cpu_env(cs);
+
+ if (is_a64(env)) {
+ if (tb_cflags(tb) & CF_PCREL) {
+ env->pc = (env->pc & TARGET_PAGE_MASK) | data[0];
+ } else {
+ env->pc = data[0];
+ }
+ env->condexec_bits = 0;
+ env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
+ } else {
+ if (tb_cflags(tb) & CF_PCREL) {
+ env->regs[15] = (env->regs[15] & TARGET_PAGE_MASK) | data[0];
+ } else {
+ env->regs[15] = data[0];
+ }
+ env->condexec_bits = data[1];
+ env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
+ }
+}
+#endif /* CONFIG_TCG */
+
static bool arm_cpu_has_work(CPUState *cs)
{
ARMCPU *cpu = ARM_CPU(cs);
@@ -51,10 +129,15 @@ static bool arm_cpu_has_work(CPUState *cs)
return (cpu->power_state != PSCI_OFF)
&& cs->interrupt_request &
(CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD
- | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ
+ | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VSERR
| CPU_INTERRUPT_EXITTB);
}
+static int arm_cpu_mmu_index(CPUState *cs, bool ifetch)
+{
+ return arm_env_mmu_index(cpu_env(cs));
+}
+
void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
void *opaque)
{
@@ -83,7 +166,7 @@ static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
ARMCPRegInfo *ri = value;
ARMCPU *cpu = opaque;
- if (ri->type & (ARM_CP_SPECIAL | ARM_CP_ALIAS)) {
+ if (ri->type & (ARM_CP_SPECIAL_MASK | ARM_CP_ALIAS)) {
return;
}
@@ -119,7 +202,7 @@ static void cp_reg_check_reset(gpointer key, gpointer value, gpointer opaque)
ARMCPU *cpu = opaque;
uint64_t oldvalue, newvalue;
- if (ri->type & (ARM_CP_SPECIAL | ARM_CP_ALIAS | ARM_CP_NO_RAW)) {
+ if (ri->type & (ARM_CP_SPECIAL_MASK | ARM_CP_ALIAS | ARM_CP_NO_RAW)) {
return;
}
@@ -129,14 +212,16 @@ static void cp_reg_check_reset(gpointer key, gpointer value, gpointer opaque)
assert(oldvalue == newvalue);
}
-/* CPUClass::reset() */
-static void arm_cpu_reset(CPUState *s)
+static void arm_cpu_reset_hold(Object *obj)
{
- ARMCPU *cpu = ARM_CPU(s);
- ARMCPUClass *acc = ARM_CPU_GET_CLASS(cpu);
+ CPUState *cs = CPU(obj);
+ ARMCPU *cpu = ARM_CPU(cs);
+ ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj);
CPUARMState *env = &cpu->env;
- acc->parent_reset(s);
+ if (acc->parent_phases.hold) {
+ acc->parent_phases.hold(obj);
+ }
memset(env, 0, offsetof(CPUARMState, end_reset_fields));
@@ -144,12 +229,11 @@ static void arm_cpu_reset(CPUState *s)
g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu);
env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid;
- env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0;
- env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
- env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
+ env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.mvfr0;
+ env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1;
+ env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2;
- cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON;
- s->halted = cpu->start_powered_off;
+ cpu->power_state = cs->start_powered_off ? PSCI_OFF : PSCI_ON;
if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q';
@@ -157,20 +241,70 @@ static void arm_cpu_reset(CPUState *s)
if (arm_feature(env, ARM_FEATURE_AARCH64)) {
/* 64 bit CPUs always start in 64 bit mode */
- env->aarch64 = 1;
+ env->aarch64 = true;
#if defined(CONFIG_USER_ONLY)
env->pstate = PSTATE_MODE_EL0t;
/* Userspace expects access to DC ZVA, CTL_EL0 and the cache ops */
env->cp15.sctlr_el[1] |= SCTLR_UCT | SCTLR_UCI | SCTLR_DZE;
+ /* Enable all PAC keys. */
+ env->cp15.sctlr_el[1] |= (SCTLR_EnIA | SCTLR_EnIB |
+ SCTLR_EnDA | SCTLR_EnDB);
+ /* Trap on btype=3 for PACIxSP. */
+ env->cp15.sctlr_el[1] |= SCTLR_BT0;
+ /* Trap on implementation defined registers. */
+ if (cpu_isar_feature(aa64_tidcp1, cpu)) {
+ env->cp15.sctlr_el[1] |= SCTLR_TIDCP;
+ }
/* and to the FP/Neon instructions */
- env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 2, 3);
- /* and to the SVE instructions */
- env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 16, 2, 3);
- env->cp15.cptr_el[3] |= CPTR_EZ;
- /* with maximum vector length */
- env->vfp.zcr_el[1] = cpu->sve_max_vq - 1;
- env->vfp.zcr_el[2] = env->vfp.zcr_el[1];
- env->vfp.zcr_el[3] = env->vfp.zcr_el[1];
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+ CPACR_EL1, FPEN, 3);
+ /* and to the SVE instructions, with default vector length */
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+ CPACR_EL1, ZEN, 3);
+ env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
+ }
+ /* and for SME instructions, with default vector length, and TPIDR2 */
+ if (cpu_isar_feature(aa64_sme, cpu)) {
+ env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+ CPACR_EL1, SMEN, 3);
+ env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
+ if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
+ env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
+ SMCR, FA64, 1);
+ }
+ }
+ /*
+ * Enable 48-bit address space (TODO: take reserved_va into account).
+ * Enable TBI0 but not TBI1.
+ * Note that this must match useronly_clean_ptr.
+ */
+ env->cp15.tcr_el[1] = 5 | (1ULL << 37);
+
+ /* Enable MTE */
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ /* Enable tag access, but leave TCF0 as No Effect (0). */
+ env->cp15.sctlr_el[1] |= SCTLR_ATA0;
+ /*
+ * Exclude all tags, so that tag 0 is always used.
+ * This corresponds to Linux current->thread.gcr_incl = 0.
+ *
+ * Set RRND, so that helper_irg() will generate a seed later.
+ * Here in cpu_reset(), the crypto subsystem has not yet been
+ * initialized.
+ */
+ env->cp15.gcr_el1 = 0x1ffff;
+ }
+ /*
+ * Disable access to SCXTNUM_EL0 from CSV2_1p2.
+ * This is not yet exposed from the Linux kernel in any way.
+ */
+ env->cp15.sctlr_el[1] |= SCTLR_TSCXT;
+ /* Disable access to Debug Communication Channel (DCC). */
+ env->cp15.mdscr_el1 |= 1 << 12;
+ /* Enable FEAT_MOPS */
+ env->cp15.sctlr_el[1] |= SCTLR_MSCEN;
#else
/* Reset into the highest available EL */
if (arm_feature(env, ARM_FEATURE_EL3)) {
@@ -180,13 +314,23 @@ static void arm_cpu_reset(CPUState *s)
} else {
env->pstate = PSTATE_MODE_EL1h;
}
- env->pc = cpu->rvbar;
+
+ /* Sample rvbar at reset. */
+ env->cp15.rvbar = cpu->rvbar_prop;
+ env->pc = env->cp15.rvbar;
#endif
} else {
#if defined(CONFIG_USER_ONLY)
/* Userspace expects access to cp10 and cp11 for FP/Neon */
- env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 4, 0xf);
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+ CPACR, CP10, 3);
+ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+ CPACR, CP11, 3);
#endif
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ env->cp15.rvbar = cpu->rvbar_prop;
+ env->regs[15] = cpu->rvbar_prop;
+ }
}
#if defined(CONFIG_USER_ONLY)
@@ -213,11 +357,36 @@ static void arm_cpu_reset(CPUState *s)
}
env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
+ /* AArch32 has a hard highvec setting of 0xFFFF0000. If we are currently
+ * executing as AArch32 then check if highvecs are enabled and
+ * adjust the PC accordingly.
+ */
+ if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) {
+ env->regs[15] = 0xFFFF0000;
+ }
+
+ env->vfp.xregs[ARM_VFP_FPEXC] = 0;
+#endif
+
if (arm_feature(env, ARM_FEATURE_M)) {
+#ifndef CONFIG_USER_ONLY
uint32_t initial_msp; /* Loaded from 0x0 */
uint32_t initial_pc; /* Loaded from 0x4 */
uint8_t *rom;
uint32_t vecbase;
+#endif
+
+ if (cpu_isar_feature(aa32_lob, cpu)) {
+ /*
+ * LTPSIZE is constant 4 if MVE not implemented, and resets
+ * to an UNKNOWN value if MVE is implemented. We choose to
+ * always reset to 4.
+ */
+ env->v7m.ltpsize = 4;
+ /* The LTPSIZE field in FPDSCR is constant and reads as 4. */
+ env->v7m.fpdscr[M_REG_NS] = 4 << FPCR_LTPSIZE_SHIFT;
+ env->v7m.fpdscr[M_REG_S] = 4 << FPCR_LTPSIZE_SHIFT;
+ }
if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
env->v7m.secure = true;
@@ -228,6 +397,14 @@ static void arm_cpu_reset(CPUState *s)
* on ARM_FEATURE_V8 (we don't let the guest see the bit).
*/
env->v7m.aircr = R_V7M_AIRCR_BFHFNMINS_MASK;
+ /*
+ * Set NSACR to indicate "NS access permitted to everything";
+ * this avoids having to have all the tests of it being
+ * conditional on ARM_FEATURE_M_SECURITY. Note also that from
+ * v8.1M the guest-visible value of NSACR in a CPU without the
+ * Security Extension is 0xcff.
+ */
+ env->v7m.nsacr = 0xcff;
}
/* In v7M the reset value of this bit is IMPDEF, but ARM recommends
@@ -246,14 +423,22 @@ static void arm_cpu_reset(CPUState *s)
env->v7m.ccr[M_REG_S] |= R_V7M_CCR_UNALIGN_TRP_MASK;
}
+ if (cpu_isar_feature(aa32_vfp_simd, cpu)) {
+ env->v7m.fpccr[M_REG_NS] = R_V7M_FPCCR_ASPEN_MASK;
+ env->v7m.fpccr[M_REG_S] = R_V7M_FPCCR_ASPEN_MASK |
+ R_V7M_FPCCR_LSPEN_MASK | R_V7M_FPCCR_S_MASK;
+ }
+
+#ifndef CONFIG_USER_ONLY
/* Unlike A/R profile, M profile defines the reset LR value */
env->regs[14] = 0xffffffff;
env->v7m.vecbase[M_REG_S] = cpu->init_svtor & 0xffffff80;
+ env->v7m.vecbase[M_REG_NS] = cpu->init_nsvtor & 0xffffff80;
/* Load the initial SP and PC from offset 0 and 4 in the vector table */
vecbase = env->v7m.vecbase[env->v7m.secure];
- rom = rom_ptr(vecbase, 8);
+ rom = rom_ptr_for_as(cs->as, vecbase, 8);
if (rom) {
/* Address zero is covered by ROM which hasn't yet been
* copied into physical memory.
@@ -266,21 +451,30 @@ static void arm_cpu_reset(CPUState *s)
* it got copied into memory. In the latter case, rom_ptr
* will return a NULL pointer and we should use ldl_phys instead.
*/
- initial_msp = ldl_phys(s->as, vecbase);
- initial_pc = ldl_phys(s->as, vecbase + 4);
+ initial_msp = ldl_phys(cs->as, vecbase);
+ initial_pc = ldl_phys(cs->as, vecbase + 4);
}
+ qemu_log_mask(CPU_LOG_INT,
+ "Loaded reset SP 0x%x PC 0x%x from vector table\n",
+ initial_msp, initial_pc);
+
env->regs[13] = initial_msp & 0xFFFFFFFC;
env->regs[15] = initial_pc & ~1;
env->thumb = initial_pc & 1;
- }
-
- /* AArch32 has a hard highvec setting of 0xFFFF0000. If we are currently
- * executing as AArch32 then check if highvecs are enabled and
- * adjust the PC accordingly.
- */
- if (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_V) {
- env->regs[15] = 0xFFFF0000;
+#else
+ /*
+ * For user mode we run non-secure and with access to the FPU.
+ * The FPU context is active (ie does not need further setup)
+ * and is owned by non-secure.
+ */
+ env->v7m.secure = false;
+ env->v7m.nsacr = 0xcff;
+ env->v7m.cpacr[M_REG_NS] = 0xf0ffff;
+ env->v7m.fpccr[M_REG_S] &=
+ ~(R_V7M_FPCCR_LSPEN_MASK | R_V7M_FPCCR_S_MASK);
+ env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK;
+#endif
}
/* M profile requires that reset clears the exclusive monitor;
@@ -289,9 +483,6 @@ static void arm_cpu_reset(CPUState *s)
*/
arm_clear_exclusive(env);
- env->vfp.xregs[ARM_VFP_FPEXC] = 0;
-#endif
-
if (arm_feature(env, ARM_FEATURE_PMSA)) {
if (cpu->pmsav7_dregion > 0) {
if (arm_feature(env, ARM_FEATURE_V8)) {
@@ -318,6 +509,14 @@ static void arm_cpu_reset(CPUState *s)
sizeof(*env->pmsav7.dracr) * cpu->pmsav7_dregion);
}
}
+
+ if (cpu->pmsav8r_hdregion > 0) {
+ memset(env->pmsav8.hprbar, 0,
+ sizeof(*env->pmsav8.hprbar) * cpu->pmsav8r_hdregion);
+ memset(env->pmsav8.hprlar, 0,
+ sizeof(*env->pmsav8.hprlar) * cpu->pmsav8r_hdregion);
+ }
+
env->pmsav7.rnr[M_REG_NS] = 0;
env->pmsav7.rnr[M_REG_S] = 0;
env->pmsav8.mair0[M_REG_NS] = 0;
@@ -341,100 +540,384 @@ static void arm_cpu_reset(CPUState *s)
set_flush_to_zero(1, &env->vfp.standard_fp_status);
set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
set_default_nan_mode(1, &env->vfp.standard_fp_status);
+ set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
set_float_detect_tininess(float_tininess_before_rounding,
&env->vfp.fp_status);
set_float_detect_tininess(float_tininess_before_rounding,
&env->vfp.standard_fp_status);
set_float_detect_tininess(float_tininess_before_rounding,
&env->vfp.fp_status_f16);
+ set_float_detect_tininess(float_tininess_before_rounding,
+ &env->vfp.standard_fp_status_f16);
#ifndef CONFIG_USER_ONLY
if (kvm_enabled()) {
kvm_arm_reset_vcpu(cpu);
}
#endif
- hw_breakpoint_update_all(cpu);
- hw_watchpoint_update_all(cpu);
+ if (tcg_enabled()) {
+ hw_breakpoint_update_all(cpu);
+ hw_watchpoint_update_all(cpu);
+
+ arm_rebuild_hflags(env);
+ }
}
-bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+void arm_emulate_firmware_reset(CPUState *cpustate, int target_el)
+{
+ ARMCPU *cpu = ARM_CPU(cpustate);
+ CPUARMState *env = &cpu->env;
+ bool have_el3 = arm_feature(env, ARM_FEATURE_EL3);
+ bool have_el2 = arm_feature(env, ARM_FEATURE_EL2);
+
+ /*
+ * Check we have the EL we're aiming for. If that is the
+ * highest implemented EL, then cpu_reset has already done
+ * all the work.
+ */
+ switch (target_el) {
+ case 3:
+ assert(have_el3);
+ return;
+ case 2:
+ assert(have_el2);
+ if (!have_el3) {
+ return;
+ }
+ break;
+ case 1:
+ if (!have_el3 && !have_el2) {
+ return;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (have_el3) {
+ /*
+ * Set the EL3 state so code can run at EL2. This should match
+ * the requirements set by Linux in its booting spec.
+ */
+ if (env->aarch64) {
+ env->cp15.scr_el3 |= SCR_RW;
+ if (cpu_isar_feature(aa64_pauth, cpu)) {
+ env->cp15.scr_el3 |= SCR_API | SCR_APK;
+ }
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ env->cp15.scr_el3 |= SCR_ATA;
+ }
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ env->cp15.cptr_el[3] |= R_CPTR_EL3_EZ_MASK;
+ env->vfp.zcr_el[3] = 0xf;
+ }
+ if (cpu_isar_feature(aa64_sme, cpu)) {
+ env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK;
+ env->cp15.scr_el3 |= SCR_ENTP2;
+ env->vfp.smcr_el[3] = 0xf;
+ }
+ if (cpu_isar_feature(aa64_hcx, cpu)) {
+ env->cp15.scr_el3 |= SCR_HXEN;
+ }
+ if (cpu_isar_feature(aa64_fgt, cpu)) {
+ env->cp15.scr_el3 |= SCR_FGTEN;
+ }
+ }
+
+ if (target_el == 2) {
+ /* If the guest is at EL2 then Linux expects the HVC insn to work */
+ env->cp15.scr_el3 |= SCR_HCE;
+ }
+
+ /* Put CPU into non-secure state */
+ env->cp15.scr_el3 |= SCR_NS;
+ /* Set NSACR.{CP11,CP10} so NS can access the FPU */
+ env->cp15.nsacr |= 3 << 10;
+ }
+
+ if (have_el2 && target_el < 2) {
+ /* Set EL2 state so code can run at EL1. */
+ if (env->aarch64) {
+ env->cp15.hcr_el2 |= HCR_RW;
+ }
+ }
+
+ /* Set the CPU to the desired state */
+ if (env->aarch64) {
+ env->pstate = aarch64_pstate_mode(target_el, true);
+ } else {
+ static const uint32_t mode_for_el[] = {
+ 0,
+ ARM_CPU_MODE_SVC,
+ ARM_CPU_MODE_HYP,
+ ARM_CPU_MODE_SVC,
+ };
+
+ cpsr_write(env, mode_for_el[target_el], CPSR_M, CPSRWriteRaw);
+ }
+}
+
+
+#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
+
+static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
+ unsigned int target_el,
+ unsigned int cur_el, bool secure,
+ uint64_t hcr_el2)
+{
+ CPUARMState *env = cpu_env(cs);
+ bool pstate_unmasked;
+ bool unmasked = false;
+
+ /*
+ * Don't take exceptions if they target a lower EL.
+ * This check should catch any exceptions that would not be taken
+ * but left pending.
+ */
+ if (cur_el > target_el) {
+ return false;
+ }
+
+ switch (excp_idx) {
+ case EXCP_FIQ:
+ pstate_unmasked = !(env->daif & PSTATE_F);
+ break;
+
+ case EXCP_IRQ:
+ pstate_unmasked = !(env->daif & PSTATE_I);
+ break;
+
+ case EXCP_VFIQ:
+ if (!(hcr_el2 & HCR_FMO) || (hcr_el2 & HCR_TGE)) {
+ /* VFIQs are only taken when hypervized. */
+ return false;
+ }
+ return !(env->daif & PSTATE_F);
+ case EXCP_VIRQ:
+ if (!(hcr_el2 & HCR_IMO) || (hcr_el2 & HCR_TGE)) {
+ /* VIRQs are only taken when hypervized. */
+ return false;
+ }
+ return !(env->daif & PSTATE_I);
+ case EXCP_VSERR:
+ if (!(hcr_el2 & HCR_AMO) || (hcr_el2 & HCR_TGE)) {
+ /* VIRQs are only taken when hypervized. */
+ return false;
+ }
+ return !(env->daif & PSTATE_A);
+ default:
+ g_assert_not_reached();
+ }
+
+ /*
+ * Use the target EL, current execution state and SCR/HCR settings to
+ * determine whether the corresponding CPSR bit is used to mask the
+ * interrupt.
+ */
+ if ((target_el > cur_el) && (target_el != 1)) {
+ /* Exceptions targeting a higher EL may not be maskable */
+ if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+ switch (target_el) {
+ case 2:
+ /*
+ * According to ARM DDI 0487H.a, an interrupt can be masked
+ * when HCR_E2H and HCR_TGE are both set regardless of the
+ * current Security state. Note that we need to revisit this
+ * part again once we need to support NMI.
+ */
+ if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
+ unmasked = true;
+ }
+ break;
+ case 3:
+ /* Interrupt cannot be masked when the target EL is 3 */
+ unmasked = true;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ /*
+ * The old 32-bit-only environment has a more complicated
+ * masking setup. HCR and SCR bits not only affect interrupt
+ * routing but also change the behaviour of masking.
+ */
+ bool hcr, scr;
+
+ switch (excp_idx) {
+ case EXCP_FIQ:
+ /*
+ * If FIQs are routed to EL3 or EL2 then there are cases where
+ * we override the CPSR.F in determining if the exception is
+ * masked or not. If neither of these are set then we fall back
+ * to the CPSR.F setting otherwise we further assess the state
+ * below.
+ */
+ hcr = hcr_el2 & HCR_FMO;
+ scr = (env->cp15.scr_el3 & SCR_FIQ);
+
+ /*
+ * When EL3 is 32-bit, the SCR.FW bit controls whether the
+ * CPSR.F bit masks FIQ interrupts when taken in non-secure
+ * state. If SCR.FW is set then FIQs can be masked by CPSR.F
+ * when non-secure but only when FIQs are only routed to EL3.
+ */
+ scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
+ break;
+ case EXCP_IRQ:
+ /*
+ * When EL3 execution state is 32-bit, if HCR.IMO is set then
+ * we may override the CPSR.I masking when in non-secure state.
+ * The SCR.IRQ setting has already been taken into consideration
+ * when setting the target EL, so it does not have a further
+ * affect here.
+ */
+ hcr = hcr_el2 & HCR_IMO;
+ scr = false;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if ((scr || hcr) && !secure) {
+ unmasked = true;
+ }
+ }
+ }
+
+ /*
+ * The PSTATE bits only mask the interrupt if we have not overridden the
+ * ability above.
+ */
+ return unmasked || pstate_unmasked;
+}
+
+static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
CPUClass *cc = CPU_GET_CLASS(cs);
- CPUARMState *env = cs->env_ptr;
+ CPUARMState *env = cpu_env(cs);
uint32_t cur_el = arm_current_el(env);
bool secure = arm_is_secure(env);
+ uint64_t hcr_el2 = arm_hcr_el2_eff(env);
uint32_t target_el;
uint32_t excp_idx;
- bool ret = false;
+
+ /* The prioritization of interrupts is IMPLEMENTATION DEFINED. */
if (interrupt_request & CPU_INTERRUPT_FIQ) {
excp_idx = EXCP_FIQ;
target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure);
- if (arm_excp_unmasked(cs, excp_idx, target_el)) {
- cs->exception_index = excp_idx;
- env->exception.target_el = target_el;
- cc->do_interrupt(cs);
- ret = true;
+ if (arm_excp_unmasked(cs, excp_idx, target_el,
+ cur_el, secure, hcr_el2)) {
+ goto found;
}
}
if (interrupt_request & CPU_INTERRUPT_HARD) {
excp_idx = EXCP_IRQ;
target_el = arm_phys_excp_target_el(cs, excp_idx, cur_el, secure);
- if (arm_excp_unmasked(cs, excp_idx, target_el)) {
- cs->exception_index = excp_idx;
- env->exception.target_el = target_el;
- cc->do_interrupt(cs);
- ret = true;
+ if (arm_excp_unmasked(cs, excp_idx, target_el,
+ cur_el, secure, hcr_el2)) {
+ goto found;
}
}
if (interrupt_request & CPU_INTERRUPT_VIRQ) {
excp_idx = EXCP_VIRQ;
target_el = 1;
- if (arm_excp_unmasked(cs, excp_idx, target_el)) {
- cs->exception_index = excp_idx;
- env->exception.target_el = target_el;
- cc->do_interrupt(cs);
- ret = true;
+ if (arm_excp_unmasked(cs, excp_idx, target_el,
+ cur_el, secure, hcr_el2)) {
+ goto found;
}
}
if (interrupt_request & CPU_INTERRUPT_VFIQ) {
excp_idx = EXCP_VFIQ;
target_el = 1;
- if (arm_excp_unmasked(cs, excp_idx, target_el)) {
- cs->exception_index = excp_idx;
- env->exception.target_el = target_el;
- cc->do_interrupt(cs);
- ret = true;
+ if (arm_excp_unmasked(cs, excp_idx, target_el,
+ cur_el, secure, hcr_el2)) {
+ goto found;
+ }
+ }
+ if (interrupt_request & CPU_INTERRUPT_VSERR) {
+ excp_idx = EXCP_VSERR;
+ target_el = 1;
+ if (arm_excp_unmasked(cs, excp_idx, target_el,
+ cur_el, secure, hcr_el2)) {
+ /* Taking a virtual abort clears HCR_EL2.VSE */
+ env->cp15.hcr_el2 &= ~HCR_VSE;
+ cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR);
+ goto found;
}
}
+ return false;
- return ret;
+ found:
+ cs->exception_index = excp_idx;
+ env->exception.target_el = target_el;
+ cc->tcg_ops->do_interrupt(cs);
+ return true;
}
-#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
-static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */
+
+void arm_cpu_update_virq(ARMCPU *cpu)
{
- CPUClass *cc = CPU_GET_CLASS(cs);
- ARMCPU *cpu = ARM_CPU(cs);
+ /*
+ * Update the interrupt level for VIRQ, which is the logical OR of
+ * the HCR_EL2.VI bit and the input line level from the GIC.
+ */
CPUARMState *env = &cpu->env;
- bool ret = false;
-
- /* ARMv7-M interrupt masking works differently than -A or -R.
- * There is no FIQ/IRQ distinction. Instead of I and F bits
- * masking FIQ and IRQ interrupts, an exception is taken only
- * if it is higher priority than the current execution priority
- * (which depends on state like BASEPRI, FAULTMASK and the
- * currently active exception).
+ CPUState *cs = CPU(cpu);
+
+ bool new_state = (env->cp15.hcr_el2 & HCR_VI) ||
+ (env->irq_line_state & CPU_INTERRUPT_VIRQ);
+
+ if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VIRQ) != 0)) {
+ if (new_state) {
+ cpu_interrupt(cs, CPU_INTERRUPT_VIRQ);
+ } else {
+ cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ);
+ }
+ }
+}
+
+void arm_cpu_update_vfiq(ARMCPU *cpu)
+{
+ /*
+ * Update the interrupt level for VFIQ, which is the logical OR of
+ * the HCR_EL2.VF bit and the input line level from the GIC.
*/
- if (interrupt_request & CPU_INTERRUPT_HARD
- && (armv7m_nvic_can_take_pending_exception(env->nvic))) {
- cs->exception_index = EXCP_IRQ;
- cc->do_interrupt(cs);
- ret = true;
+ CPUARMState *env = &cpu->env;
+ CPUState *cs = CPU(cpu);
+
+ bool new_state = (env->cp15.hcr_el2 & HCR_VF) ||
+ (env->irq_line_state & CPU_INTERRUPT_VFIQ);
+
+ if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VFIQ) != 0)) {
+ if (new_state) {
+ cpu_interrupt(cs, CPU_INTERRUPT_VFIQ);
+ } else {
+ cpu_reset_interrupt(cs, CPU_INTERRUPT_VFIQ);
+ }
+ }
+}
+
+void arm_cpu_update_vserr(ARMCPU *cpu)
+{
+ /*
+ * Update the interrupt level for VSERR, which is the HCR_EL2.VSE bit.
+ */
+ CPUARMState *env = &cpu->env;
+ CPUState *cs = CPU(cpu);
+
+ bool new_state = env->cp15.hcr_el2 & HCR_VSE;
+
+ if (new_state != ((cs->interrupt_request & CPU_INTERRUPT_VSERR) != 0)) {
+ if (new_state) {
+ cpu_interrupt(cs, CPU_INTERRUPT_VSERR);
+ } else {
+ cpu_reset_interrupt(cs, CPU_INTERRUPT_VSERR);
+ }
}
- return ret;
}
-#endif
#ifndef CONFIG_USER_ONLY
static void arm_cpu_set_irq(void *opaque, int irq, int level)
@@ -449,11 +932,29 @@ static void arm_cpu_set_irq(void *opaque, int irq, int level)
[ARM_CPU_VFIQ] = CPU_INTERRUPT_VFIQ
};
+ if (!arm_feature(env, ARM_FEATURE_EL2) &&
+ (irq == ARM_CPU_VIRQ || irq == ARM_CPU_VFIQ)) {
+ /*
+ * The GIC might tell us about VIRQ and VFIQ state, but if we don't
+ * have EL2 support we don't care. (Unless the guest is doing something
+ * silly this will only be calls saying "level is still 0".)
+ */
+ return;
+ }
+
+ if (level) {
+ env->irq_line_state |= mask[irq];
+ } else {
+ env->irq_line_state &= ~mask[irq];
+ }
+
switch (irq) {
case ARM_CPU_VIRQ:
+ arm_cpu_update_virq(cpu);
+ break;
case ARM_CPU_VFIQ:
- assert(arm_feature(env, ARM_FEATURE_EL2));
- /* fall through */
+ arm_cpu_update_vfiq(cpu);
+ break;
case ARM_CPU_IRQ:
case ARM_CPU_FIQ:
if (level) {
@@ -471,21 +972,30 @@ static void arm_cpu_kvm_set_irq(void *opaque, int irq, int level)
{
#ifdef CONFIG_KVM
ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
CPUState *cs = CPU(cpu);
- int kvm_irq = KVM_ARM_IRQ_TYPE_CPU << KVM_ARM_IRQ_TYPE_SHIFT;
+ uint32_t linestate_bit;
+ int irq_id;
switch (irq) {
case ARM_CPU_IRQ:
- kvm_irq |= KVM_ARM_IRQ_CPU_IRQ;
+ irq_id = KVM_ARM_IRQ_CPU_IRQ;
+ linestate_bit = CPU_INTERRUPT_HARD;
break;
case ARM_CPU_FIQ:
- kvm_irq |= KVM_ARM_IRQ_CPU_FIQ;
+ irq_id = KVM_ARM_IRQ_CPU_FIQ;
+ linestate_bit = CPU_INTERRUPT_FIQ;
break;
default:
g_assert_not_reached();
}
- kvm_irq |= cs->cpu_index << KVM_ARM_IRQ_VCPU_SHIFT;
- kvm_set_irq(kvm_state, kvm_irq, level ? 1 : 0);
+
+ if (level) {
+ env->irq_line_state |= linestate_bit;
+ } else {
+ env->irq_line_state &= ~linestate_bit;
+ }
+ kvm_arm_set_irq(cs->cpu_index, KVM_ARM_IRQ_TYPE_CPU, irq_id, !!level);
#endif
}
@@ -500,22 +1010,6 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs)
#endif
-static inline void set_feature(CPUARMState *env, int feature)
-{
- env->features |= 1ULL << feature;
-}
-
-static inline void unset_feature(CPUARMState *env, int feature)
-{
- env->features &= ~(1ULL << feature);
-}
-
-static int
-print_insn_thumb1(bfd_vma pc, disassemble_info *info)
-{
- return print_insn_arm(pc | 1, info);
-}
-
static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
{
ARMCPU *ac = ARM_CPU(cpu);
@@ -523,25 +1017,16 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
bool sctlr_b;
if (is_a64(env)) {
- /* We might not be compiled with the A64 disassembler
- * because it needs a C++ compiler. Leave print_insn
- * unset in this case to use the caller default behaviour.
- */
-#if defined(CONFIG_ARM_A64_DIS)
- info->print_insn = print_insn_arm_a64;
-#endif
info->cap_arch = CS_ARCH_ARM64;
info->cap_insn_unit = 4;
info->cap_insn_split = 4;
} else {
int cap_mode;
if (env->thumb) {
- info->print_insn = print_insn_thumb1;
info->cap_insn_unit = 2;
info->cap_insn_split = 4;
cap_mode = CS_MODE_THUMB;
} else {
- info->print_insn = print_insn_arm;
info->cap_insn_unit = 4;
info->cap_insn_split = 4;
cap_mode = CS_MODE_ARM;
@@ -558,7 +1043,7 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
sctlr_b = arm_sctlr_b(env);
if (bswap_code(sctlr_b)) {
-#ifdef TARGET_WORDS_BIGENDIAN
+#if TARGET_BIG_ENDIAN
info->endian = BFD_ENDIAN_LITTLE;
#else
info->endian = BFD_ENDIAN_BIG;
@@ -572,26 +1057,297 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
#endif
}
-uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz)
+#ifdef TARGET_AARCH64
+
+static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ uint32_t psr = pstate_read(env);
+ int i, j;
+ int el = arm_current_el(env);
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ const char *ns_status;
+ bool sve;
+
+ qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
+ for (i = 0; i < 32; i++) {
+ if (i == 31) {
+ qemu_fprintf(f, " SP=%016" PRIx64 "\n", env->xregs[i]);
+ } else {
+ qemu_fprintf(f, "X%02d=%016" PRIx64 "%s", i, env->xregs[i],
+ (i + 2) % 3 ? " " : "\n");
+ }
+ }
+
+ if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
+ ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
+ } else {
+ ns_status = "";
+ }
+ qemu_fprintf(f, "PSTATE=%08x %c%c%c%c %sEL%d%c",
+ psr,
+ psr & PSTATE_N ? 'N' : '-',
+ psr & PSTATE_Z ? 'Z' : '-',
+ psr & PSTATE_C ? 'C' : '-',
+ psr & PSTATE_V ? 'V' : '-',
+ ns_status,
+ el,
+ psr & PSTATE_SP ? 'h' : 't');
+
+ if (cpu_isar_feature(aa64_sme, cpu)) {
+ qemu_fprintf(f, " SVCR=%08" PRIx64 " %c%c",
+ env->svcr,
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
+ }
+ if (cpu_isar_feature(aa64_bti, cpu)) {
+ qemu_fprintf(f, " BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
+ }
+ qemu_fprintf(f, "%s%s%s",
+ (hcr & HCR_NV) ? " NV" : "",
+ (hcr & HCR_NV1) ? " NV1" : "",
+ (hcr & HCR_NV2) ? " NV2" : "");
+ if (!(flags & CPU_DUMP_FPU)) {
+ qemu_fprintf(f, "\n");
+ return;
+ }
+ if (fp_exception_el(env, el) != 0) {
+ qemu_fprintf(f, " FPU disabled\n");
+ return;
+ }
+ qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
+ vfp_get_fpcr(env), vfp_get_fpsr(env));
+
+ if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
+ sve = sme_exception_el(env, el) == 0;
+ } else if (cpu_isar_feature(aa64_sve, cpu)) {
+ sve = sve_exception_el(env, el) == 0;
+ } else {
+ sve = false;
+ }
+
+ if (sve) {
+ int zcr_len = sve_vqm1_for_el(env, el);
+
+ for (i = 0; i <= FFR_PRED_NUM; i++) {
+ bool eol;
+ if (i == FFR_PRED_NUM) {
+ qemu_fprintf(f, "FFR=");
+ /* It's last, so end the line. */
+ eol = true;
+ } else {
+ qemu_fprintf(f, "P%02d=", i);
+ switch (zcr_len) {
+ case 0:
+ eol = i % 8 == 7;
+ break;
+ case 1:
+ eol = i % 6 == 5;
+ break;
+ case 2:
+ case 3:
+ eol = i % 3 == 2;
+ break;
+ default:
+ /* More than one quadword per predicate. */
+ eol = true;
+ break;
+ }
+ }
+ for (j = zcr_len / 4; j >= 0; j--) {
+ int digits;
+ if (j * 4 + 4 <= zcr_len + 1) {
+ digits = 16;
+ } else {
+ digits = (zcr_len % 4 + 1) * 4;
+ }
+ qemu_fprintf(f, "%0*" PRIx64 "%s", digits,
+ env->vfp.pregs[i].p[j],
+ j ? ":" : eol ? "\n" : " ");
+ }
+ }
+
+ if (zcr_len == 0) {
+ /*
+ * With vl=16, there are only 37 columns per register,
+ * so output two registers per line.
+ */
+ for (i = 0; i < 32; i++) {
+ qemu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 "%s",
+ i, env->vfp.zregs[i].d[1],
+ env->vfp.zregs[i].d[0], i & 1 ? "\n" : " ");
+ }
+ } else {
+ for (i = 0; i < 32; i++) {
+ qemu_fprintf(f, "Z%02d=", i);
+ for (j = zcr_len; j >= 0; j--) {
+ qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s",
+ env->vfp.zregs[i].d[j * 2 + 1],
+ env->vfp.zregs[i].d[j * 2 + 0],
+ j ? ":" : "\n");
+ }
+ }
+ }
+ } else {
+ for (i = 0; i < 32; i++) {
+ uint64_t *q = aa64_vfp_qreg(env, i);
+ qemu_fprintf(f, "Q%02d=%016" PRIx64 ":%016" PRIx64 "%s",
+ i, q[1], q[0], (i & 1 ? "\n" : " "));
+ }
+ }
+
+ if (cpu_isar_feature(aa64_sme, cpu) &&
+ FIELD_EX64(env->svcr, SVCR, ZA) &&
+ sme_exception_el(env, el) == 0) {
+ int zcr_len = sve_vqm1_for_el_sm(env, el, true);
+ int svl = (zcr_len + 1) * 16;
+ int svl_lg10 = svl < 100 ? 2 : 3;
+
+ for (i = 0; i < svl; i++) {
+ qemu_fprintf(f, "ZA[%0*d]=", svl_lg10, i);
+ for (j = zcr_len; j >= 0; --j) {
+ qemu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%c",
+ env->zarray[i].d[2 * j + 1],
+ env->zarray[i].d[2 * j],
+ j ? ':' : '\n');
+ }
+ }
+ }
+}
+
+#else
+
+static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+{
+ g_assert_not_reached();
+}
+
+#endif
+
+static void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ int i;
+
+ if (is_a64(env)) {
+ aarch64_cpu_dump_state(cs, f, flags);
+ return;
+ }
+
+ for (i = 0; i < 16; i++) {
+ qemu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
+ if ((i % 4) == 3) {
+ qemu_fprintf(f, "\n");
+ } else {
+ qemu_fprintf(f, " ");
+ }
+ }
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ uint32_t xpsr = xpsr_read(env);
+ const char *mode;
+ const char *ns_status = "";
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ ns_status = env->v7m.secure ? "S " : "NS ";
+ }
+
+ if (xpsr & XPSR_EXCP) {
+ mode = "handler";
+ } else {
+ if (env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_NPRIV_MASK) {
+ mode = "unpriv-thread";
+ } else {
+ mode = "priv-thread";
+ }
+ }
+
+ qemu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
+ xpsr,
+ xpsr & XPSR_N ? 'N' : '-',
+ xpsr & XPSR_Z ? 'Z' : '-',
+ xpsr & XPSR_C ? 'C' : '-',
+ xpsr & XPSR_V ? 'V' : '-',
+ xpsr & XPSR_T ? 'T' : 'A',
+ ns_status,
+ mode);
+ } else {
+ uint32_t psr = cpsr_read(env);
+ const char *ns_status = "";
+
+ if (arm_feature(env, ARM_FEATURE_EL3) &&
+ (psr & CPSR_M) != ARM_CPU_MODE_MON) {
+ ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
+ }
+
+ qemu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
+ psr,
+ psr & CPSR_N ? 'N' : '-',
+ psr & CPSR_Z ? 'Z' : '-',
+ psr & CPSR_C ? 'C' : '-',
+ psr & CPSR_V ? 'V' : '-',
+ psr & CPSR_T ? 'T' : 'A',
+ ns_status,
+ aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26);
+ }
+
+ if (flags & CPU_DUMP_FPU) {
+ int numvfpregs = 0;
+ if (cpu_isar_feature(aa32_simd_r32, cpu)) {
+ numvfpregs = 32;
+ } else if (cpu_isar_feature(aa32_vfp_simd, cpu)) {
+ numvfpregs = 16;
+ }
+ for (i = 0; i < numvfpregs; i++) {
+ uint64_t v = *aa32_vfp_dreg(env, i);
+ qemu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
+ i * 2, (uint32_t)v,
+ i * 2 + 1, (uint32_t)(v >> 32),
+ i, v);
+ }
+ qemu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env));
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ qemu_fprintf(f, "VPR: %08x\n", env->v7m.vpr);
+ }
+ }
+}
+
+uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz)
{
uint32_t Aff1 = idx / clustersz;
uint32_t Aff0 = idx % clustersz;
return (Aff1 << ARM_AFF1_SHIFT) | Aff0;
}
+uint64_t arm_cpu_mp_affinity(ARMCPU *cpu)
+{
+ return cpu->mp_affinity;
+}
+
static void arm_cpu_initfn(Object *obj)
{
- CPUState *cs = CPU(obj);
ARMCPU *cpu = ARM_CPU(obj);
- cs->env_ptr = &cpu->env;
- cpu->cp_regs = g_hash_table_new_full(g_int_hash, g_int_equal,
- g_free, g_free);
+ cpu->cp_regs = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, g_free);
QLIST_INIT(&cpu->pre_el_change_hooks);
QLIST_INIT(&cpu->el_change_hooks);
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+# ifdef TARGET_AARCH64
+ /*
+ * The linux kernel defaults to 512-bit for SVE, and 256-bit for SME.
+ * These values were chosen to fit within the default signal frame.
+ * See documentation for /proc/sys/abi/{sve,sme}_default_vector_length,
+ * and our corresponding cpu property.
+ */
+ cpu->sve_default_vq = 4;
+ cpu->sme_default_vq = 2;
+# endif
+#else
/* Our inbound IRQ and FIQ lines */
if (kvm_enabled()) {
/* VIRQ and VFIQ are unused with KVM but we add them to maintain
@@ -602,14 +1358,6 @@ static void arm_cpu_initfn(Object *obj)
qdev_init_gpio_in(DEVICE(cpu), arm_cpu_set_irq, 4);
}
- cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE,
- arm_gt_ptimer_cb, cpu);
- cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE,
- arm_gt_vtimer_cb, cpu);
- cpu->gt_timer[GTIMER_HYP] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE,
- arm_gt_htimer_cb, cpu);
- cpu->gt_timer[GTIMER_SEC] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE,
- arm_gt_stimer_cb, cpu);
qdev_init_gpio_out(DEVICE(cpu), cpu->gt_timer_outputs,
ARRAY_SIZE(cpu->gt_timer_outputs));
@@ -624,35 +1372,47 @@ static void arm_cpu_initfn(Object *obj)
* picky DTB consumer will also provide a helpful error message.
*/
cpu->dtb_compatible = "qemu,unknown";
- cpu->psci_version = 1; /* By default assume PSCI v0.1 */
+ cpu->psci_version = QEMU_PSCI_VERSION_0_1; /* By default assume PSCI v0.1 */
cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
- if (tcg_enabled()) {
- cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
+ if (tcg_enabled() || hvf_enabled()) {
+ /* TCG and HVF implement PSCI 1.1 */
+ cpu->psci_version = QEMU_PSCI_VERSION_1_1;
}
}
+static Property arm_cpu_gt_cntfrq_property =
+ DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz,
+ NANOSECONDS_PER_SECOND / GTIMER_SCALE);
+
static Property arm_cpu_reset_cbar_property =
DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0);
static Property arm_cpu_reset_hivecs_property =
DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false);
-static Property arm_cpu_rvbar_property =
- DEFINE_PROP_UINT64("rvbar", ARMCPU, rvbar, 0);
-
+#ifndef CONFIG_USER_ONLY
static Property arm_cpu_has_el2_property =
DEFINE_PROP_BOOL("has_el2", ARMCPU, has_el2, true);
static Property arm_cpu_has_el3_property =
DEFINE_PROP_BOOL("has_el3", ARMCPU, has_el3, true);
+#endif
static Property arm_cpu_cfgend_property =
DEFINE_PROP_BOOL("cfgend", ARMCPU, cfgend, false);
-/* use property name "pmu" to match other archs and virt tools */
-static Property arm_cpu_has_pmu_property =
- DEFINE_PROP_BOOL("pmu", ARMCPU, has_pmu, true);
+static Property arm_cpu_has_vfp_property =
+ DEFINE_PROP_BOOL("vfp", ARMCPU, has_vfp, true);
+
+static Property arm_cpu_has_vfp_d32_property =
+ DEFINE_PROP_BOOL("vfp-d32", ARMCPU, has_vfp_d32, true);
+
+static Property arm_cpu_has_neon_property =
+ DEFINE_PROP_BOOL("neon", ARMCPU, has_neon, true);
+
+static Property arm_cpu_has_dsp_property =
+ DEFINE_PROP_BOOL("dsp", ARMCPU, has_dsp, true);
static Property arm_cpu_has_mpu_property =
DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true);
@@ -667,86 +1427,307 @@ static Property arm_cpu_pmsav7_dregion_property =
pmsav7_dregion,
qdev_prop_uint32, uint32_t);
-/* M profile: initial value of the Secure VTOR */
-static Property arm_cpu_initsvtor_property =
- DEFINE_PROP_UINT32("init-svtor", ARMCPU, init_svtor, 0);
+static bool arm_get_pmu(Object *obj, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ return cpu->has_pmu;
+}
-static void arm_cpu_post_init(Object *obj)
+static void arm_set_pmu(Object *obj, bool value, Error **errp)
{
ARMCPU *cpu = ARM_CPU(obj);
- /* M profile implies PMSA. We have to do this here rather than
- * in realize with the other feature-implication checks because
- * we look at the PMSA bit to see if we should add some properties.
+ if (value) {
+ if (kvm_enabled() && !kvm_arm_pmu_supported()) {
+ error_setg(errp, "'pmu' feature not supported by KVM on this host");
+ return;
+ }
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ } else {
+ unset_feature(&cpu->env, ARM_FEATURE_PMU);
+ }
+ cpu->has_pmu = value;
+}
+
+unsigned int gt_cntfrq_period_ns(ARMCPU *cpu)
+{
+ /*
+ * The exact approach to calculating guest ticks is:
+ *
+ * muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), cpu->gt_cntfrq_hz,
+ * NANOSECONDS_PER_SECOND);
+ *
+ * We don't do that. Rather we intentionally use integer division
+ * truncation below and in the caller for the conversion of host monotonic
+ * time to guest ticks to provide the exact inverse for the semantics of
+ * the QEMUTimer scale factor. QEMUTimer's scale facter is an integer, so
+ * it loses precision when representing frequencies where
+ * `(NANOSECONDS_PER_SECOND % cpu->gt_cntfrq) > 0` holds. Failing to
+ * provide an exact inverse leads to scheduling timers with negative
+ * periods, which in turn leads to sticky behaviour in the guest.
+ *
+ * Finally, CNTFRQ is effectively capped at 1GHz to ensure our scale factor
+ * cannot become zero.
*/
- if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
- set_feature(&cpu->env, ARM_FEATURE_PMSA);
+ return NANOSECONDS_PER_SECOND > cpu->gt_cntfrq_hz ?
+ NANOSECONDS_PER_SECOND / cpu->gt_cntfrq_hz : 1;
+}
+
+static void arm_cpu_propagate_feature_implications(ARMCPU *cpu)
+{
+ CPUARMState *env = &cpu->env;
+ bool no_aa32 = false;
+
+ /*
+ * Some features automatically imply others: set the feature
+ * bits explicitly for these cases.
+ */
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ set_feature(env, ARM_FEATURE_PMSA);
}
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ set_feature(env, ARM_FEATURE_V7);
+ } else {
+ set_feature(env, ARM_FEATURE_V7VE);
+ }
+ }
+
+ /*
+ * There exist AArch64 cpus without AArch32 support. When KVM
+ * queries ID_ISAR0_EL1 on such a host, the value is UNKNOWN.
+ * Similarly, we cannot check ID_AA64PFR0 without AArch64 support.
+ * As a general principle, we also do not make ID register
+ * consistency checks anywhere unless using TCG, because only
+ * for TCG would a consistency-check failure be a QEMU bug.
+ */
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ no_aa32 = !cpu_isar_feature(aa64_aa32, cpu);
+ }
+
+ if (arm_feature(env, ARM_FEATURE_V7VE)) {
+ /*
+ * v7 Virtualization Extensions. In real hardware this implies
+ * EL2 and also the presence of the Security Extensions.
+ * For QEMU, for backwards-compatibility we implement some
+ * CPUs or CPU configs which have no actual EL2 or EL3 but do
+ * include the various other features that V7VE implies.
+ * Presence of EL2 itself is ARM_FEATURE_EL2, and of the
+ * Security Extensions is ARM_FEATURE_EL3.
+ */
+ assert(!tcg_enabled() || no_aa32 ||
+ cpu_isar_feature(aa32_arm_div, cpu));
+ set_feature(env, ARM_FEATURE_LPAE);
+ set_feature(env, ARM_FEATURE_V7);
+ }
+ if (arm_feature(env, ARM_FEATURE_V7)) {
+ set_feature(env, ARM_FEATURE_VAPA);
+ set_feature(env, ARM_FEATURE_THUMB2);
+ set_feature(env, ARM_FEATURE_MPIDR);
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ set_feature(env, ARM_FEATURE_V6K);
+ } else {
+ set_feature(env, ARM_FEATURE_V6);
+ }
+
+ /*
+ * Always define VBAR for V7 CPUs even if it doesn't exist in
+ * non-EL3 configs. This is needed by some legacy boards.
+ */
+ set_feature(env, ARM_FEATURE_VBAR);
+ }
+ if (arm_feature(env, ARM_FEATURE_V6K)) {
+ set_feature(env, ARM_FEATURE_V6);
+ set_feature(env, ARM_FEATURE_MVFR);
+ }
+ if (arm_feature(env, ARM_FEATURE_V6)) {
+ set_feature(env, ARM_FEATURE_V5);
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ assert(!tcg_enabled() || no_aa32 ||
+ cpu_isar_feature(aa32_jazelle, cpu));
+ set_feature(env, ARM_FEATURE_AUXCR);
+ }
+ }
+ if (arm_feature(env, ARM_FEATURE_V5)) {
+ set_feature(env, ARM_FEATURE_V4T);
+ }
+ if (arm_feature(env, ARM_FEATURE_LPAE)) {
+ set_feature(env, ARM_FEATURE_V7MP);
+ }
+ if (arm_feature(env, ARM_FEATURE_CBAR_RO)) {
+ set_feature(env, ARM_FEATURE_CBAR);
+ }
+ if (arm_feature(env, ARM_FEATURE_THUMB2) &&
+ !arm_feature(env, ARM_FEATURE_M)) {
+ set_feature(env, ARM_FEATURE_THUMB_DSP);
+ }
+}
+
+void arm_cpu_post_init(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ /*
+ * Some features imply others. Figure this out now, because we
+ * are going to look at the feature bits in deciding which
+ * properties to add.
+ */
+ arm_cpu_propagate_feature_implications(cpu);
+
if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) ||
arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) {
- qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property,
- &error_abort);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property);
}
if (!arm_feature(&cpu->env, ARM_FEATURE_M)) {
- qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_hivecs_property,
- &error_abort);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_hivecs_property);
}
- if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
- qdev_property_add_static(DEVICE(obj), &arm_cpu_rvbar_property,
- &error_abort);
+ if (arm_feature(&cpu->env, ARM_FEATURE_V8)) {
+ object_property_add_uint64_ptr(obj, "rvbar",
+ &cpu->rvbar_prop,
+ OBJ_PROP_FLAG_READWRITE);
}
+#ifndef CONFIG_USER_ONLY
if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) {
/* Add the has_el3 state CPU property only if EL3 is allowed. This will
* prevent "has_el3" from existing on CPUs which cannot support EL3.
*/
- qdev_property_add_static(DEVICE(obj), &arm_cpu_has_el3_property,
- &error_abort);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_has_el3_property);
-#ifndef CONFIG_USER_ONLY
object_property_add_link(obj, "secure-memory",
TYPE_MEMORY_REGION,
(Object **)&cpu->secure_memory,
qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_STRONG,
- &error_abort);
-#endif
+ OBJ_PROP_LINK_STRONG);
}
if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) {
- qdev_property_add_static(DEVICE(obj), &arm_cpu_has_el2_property,
- &error_abort);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_has_el2_property);
}
+#endif
if (arm_feature(&cpu->env, ARM_FEATURE_PMU)) {
- qdev_property_add_static(DEVICE(obj), &arm_cpu_has_pmu_property,
- &error_abort);
+ cpu->has_pmu = true;
+ object_property_add_bool(obj, "pmu", arm_get_pmu, arm_set_pmu);
+ }
+
+ /*
+ * Allow user to turn off VFP and Neon support, but only for TCG --
+ * KVM does not currently allow us to lie to the guest about its
+ * ID/feature registers, so the guest always sees what the host has.
+ */
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ if (cpu_isar_feature(aa64_fp_simd, cpu)) {
+ cpu->has_vfp = true;
+ cpu->has_vfp_d32 = true;
+ if (tcg_enabled() || qtest_enabled()) {
+ qdev_property_add_static(DEVICE(obj),
+ &arm_cpu_has_vfp_property);
+ }
+ }
+ } else if (cpu_isar_feature(aa32_vfp, cpu)) {
+ cpu->has_vfp = true;
+ if (tcg_enabled() || qtest_enabled()) {
+ qdev_property_add_static(DEVICE(obj),
+ &arm_cpu_has_vfp_property);
+ }
+ if (cpu_isar_feature(aa32_simd_r32, cpu)) {
+ cpu->has_vfp_d32 = true;
+ /*
+ * The permitted values of the SIMDReg bits [3:0] on
+ * Armv8-A are either 0b0000 and 0b0010. On such CPUs,
+ * make sure that has_vfp_d32 can not be set to false.
+ */
+ if ((tcg_enabled() || qtest_enabled())
+ && !(arm_feature(&cpu->env, ARM_FEATURE_V8)
+ && !arm_feature(&cpu->env, ARM_FEATURE_M))) {
+ qdev_property_add_static(DEVICE(obj),
+ &arm_cpu_has_vfp_d32_property);
+ }
+ }
+ }
+
+ if (arm_feature(&cpu->env, ARM_FEATURE_NEON)) {
+ cpu->has_neon = true;
+ if (!kvm_enabled()) {
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_has_neon_property);
+ }
+ }
+
+ if (arm_feature(&cpu->env, ARM_FEATURE_M) &&
+ arm_feature(&cpu->env, ARM_FEATURE_THUMB_DSP)) {
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_has_dsp_property);
}
if (arm_feature(&cpu->env, ARM_FEATURE_PMSA)) {
- qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property,
- &error_abort);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property);
if (arm_feature(&cpu->env, ARM_FEATURE_V7)) {
qdev_property_add_static(DEVICE(obj),
- &arm_cpu_pmsav7_dregion_property,
- &error_abort);
+ &arm_cpu_pmsav7_dregion_property);
}
}
if (arm_feature(&cpu->env, ARM_FEATURE_M_SECURITY)) {
object_property_add_link(obj, "idau", TYPE_IDAU_INTERFACE, &cpu->idau,
qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_STRONG,
- &error_abort);
- qdev_property_add_static(DEVICE(obj), &arm_cpu_initsvtor_property,
- &error_abort);
+ OBJ_PROP_LINK_STRONG);
+ /*
+ * M profile: initial value of the Secure VTOR. We can't just use
+ * a simple DEFINE_PROP_UINT32 for this because we want to permit
+ * the property to be set after realize.
+ */
+ object_property_add_uint32_ptr(obj, "init-svtor",
+ &cpu->init_svtor,
+ OBJ_PROP_FLAG_READWRITE);
+ }
+ if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
+ /*
+ * Initial value of the NS VTOR (for cores without the Security
+ * extension, this is the only VTOR)
+ */
+ object_property_add_uint32_ptr(obj, "init-nsvtor",
+ &cpu->init_nsvtor,
+ OBJ_PROP_FLAG_READWRITE);
+ }
+
+ /* Not DEFINE_PROP_UINT32: we want this to be settable after realize */
+ object_property_add_uint32_ptr(obj, "psci-conduit",
+ &cpu->psci_conduit,
+ OBJ_PROP_FLAG_READWRITE);
+
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property);
+
+ if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) {
+ qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property);
+ }
+
+ if (kvm_enabled()) {
+ kvm_arm_add_vcpu_properties(cpu);
}
- qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property,
- &error_abort);
+#ifndef CONFIG_USER_ONLY
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) &&
+ cpu_isar_feature(aa64_mte, cpu)) {
+ object_property_add_link(obj, "tag-memory",
+ TYPE_MEMORY_REGION,
+ (Object **)&cpu->tag_memory,
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+
+ if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) {
+ object_property_add_link(obj, "secure-tag-memory",
+ TYPE_MEMORY_REGION,
+ (Object **)&cpu->secure_tag_memory,
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+ }
+#endif
}
static void arm_cpu_finalizefn(Object *obj)
@@ -764,6 +1745,62 @@ static void arm_cpu_finalizefn(Object *obj)
QLIST_REMOVE(hook, node);
g_free(hook);
}
+#ifndef CONFIG_USER_ONLY
+ if (cpu->pmu_timer) {
+ timer_free(cpu->pmu_timer);
+ }
+#endif
+}
+
+void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
+{
+ Error *local_err = NULL;
+
+#ifdef TARGET_AARCH64
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ arm_cpu_sve_finalize(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /*
+ * FEAT_SME is not architecturally dependent on FEAT_SVE (unless
+ * FEAT_SME_FA64 is present). However our implementation currently
+ * assumes it, so if the user asked for sve=off then turn off SME also.
+ * (KVM doesn't currently support SME at all.)
+ */
+ if (cpu_isar_feature(aa64_sme, cpu) && !cpu_isar_feature(aa64_sve, cpu)) {
+ object_property_set_bool(OBJECT(cpu), "sme", false, &error_abort);
+ }
+
+ arm_cpu_sme_finalize(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ arm_cpu_pauth_finalize(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ arm_cpu_lpa2_finalize(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+#endif
+
+ if (kvm_enabled()) {
+ kvm_arm_steal_time_finalize(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
}
static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
@@ -772,16 +1809,20 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
ARMCPU *cpu = ARM_CPU(dev);
ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
CPUARMState *env = &cpu->env;
- int pagebits;
Error *local_err = NULL;
+#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY)
+ /* Use pc-relative instructions in system-mode */
+ cs->tcg_cflags |= CF_PCREL;
+#endif
+
/* If we needed to query the host kernel for the CPU features
* then it's possible that might have failed in the initfn, but
* this is the first point where we can report it.
*/
if (cpu->host_cpu_probe_failed) {
- if (!kvm_enabled()) {
- error_setg(errp, "The 'host' CPU type can only be used with KVM");
+ if (!kvm_enabled() && !hvf_enabled()) {
+ error_setg(errp, "The 'host' CPU type can only be used with KVM or HVF");
} else {
error_setg(errp, "Failed to retrieve host CPU features");
}
@@ -804,6 +1845,62 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
return;
}
}
+
+ if (!tcg_enabled() && !qtest_enabled()) {
+ /*
+ * We assume that no accelerator except TCG (and the "not really an
+ * accelerator" qtest) can handle these features, because Arm hardware
+ * virtualization can't virtualize them.
+ *
+ * Catch all the cases which might cause us to create more than one
+ * address space for the CPU (otherwise we will assert() later in
+ * cpu_address_space_init()).
+ */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ error_setg(errp,
+ "Cannot enable %s when using an M-profile guest CPU",
+ current_accel_name());
+ return;
+ }
+ if (cpu->has_el3) {
+ error_setg(errp,
+ "Cannot enable %s when guest CPU has EL3 enabled",
+ current_accel_name());
+ return;
+ }
+ if (cpu->tag_memory) {
+ error_setg(errp,
+ "Cannot enable %s when guest CPUs has MTE enabled",
+ current_accel_name());
+ return;
+ }
+ }
+
+ {
+ uint64_t scale;
+
+ if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
+ if (!cpu->gt_cntfrq_hz) {
+ error_setg(errp, "Invalid CNTFRQ: %"PRId64"Hz",
+ cpu->gt_cntfrq_hz);
+ return;
+ }
+ scale = gt_cntfrq_period_ns(cpu);
+ } else {
+ scale = GTIMER_SCALE;
+ }
+
+ cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+ arm_gt_ptimer_cb, cpu);
+ cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+ arm_gt_vtimer_cb, cpu);
+ cpu->gt_timer[GTIMER_HYP] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+ arm_gt_htimer_cb, cpu);
+ cpu->gt_timer[GTIMER_SEC] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+ arm_gt_stimer_cb, cpu);
+ cpu->gt_timer[GTIMER_HYPVIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale,
+ arm_gt_hvtimer_cb, cpu);
+ }
#endif
cpu_exec_realizefn(cs, &local_err);
@@ -812,99 +1909,226 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
return;
}
- /* Some features automatically imply others: */
- if (arm_feature(env, ARM_FEATURE_V8)) {
- set_feature(env, ARM_FEATURE_V7VE);
+ arm_cpu_finalize_features(cpu, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ return;
}
- if (arm_feature(env, ARM_FEATURE_V7VE)) {
- /* v7 Virtualization Extensions. In real hardware this implies
- * EL2 and also the presence of the Security Extensions.
- * For QEMU, for backwards-compatibility we implement some
- * CPUs or CPU configs which have no actual EL2 or EL3 but do
- * include the various other features that V7VE implies.
- * Presence of EL2 itself is ARM_FEATURE_EL2, and of the
- * Security Extensions is ARM_FEATURE_EL3.
+
+#ifdef CONFIG_USER_ONLY
+ /*
+ * User mode relies on IC IVAU instructions to catch modification of
+ * dual-mapped code.
+ *
+ * Clear CTR_EL0.DIC to ensure that software that honors these flags uses
+ * IC IVAU even if the emulated processor does not normally require it.
+ */
+ cpu->ctr = FIELD_DP64(cpu->ctr, CTR_EL0, DIC, 0);
+#endif
+
+ if (arm_feature(env, ARM_FEATURE_AARCH64) &&
+ cpu->has_vfp != cpu->has_neon) {
+ /*
+ * This is an architectural requirement for AArch64; AArch32 is
+ * more flexible and permits VFP-no-Neon and Neon-no-VFP.
*/
- set_feature(env, ARM_FEATURE_ARM_DIV);
- set_feature(env, ARM_FEATURE_LPAE);
- set_feature(env, ARM_FEATURE_V7);
+ error_setg(errp,
+ "AArch64 CPUs must have both VFP and Neon or neither");
+ return;
}
- if (arm_feature(env, ARM_FEATURE_V7)) {
- set_feature(env, ARM_FEATURE_VAPA);
- set_feature(env, ARM_FEATURE_THUMB2);
- set_feature(env, ARM_FEATURE_MPIDR);
- if (!arm_feature(env, ARM_FEATURE_M)) {
- set_feature(env, ARM_FEATURE_V6K);
- } else {
- set_feature(env, ARM_FEATURE_V6);
- }
- /* Always define VBAR for V7 CPUs even if it doesn't exist in
- * non-EL3 configs. This is needed by some legacy boards.
- */
- set_feature(env, ARM_FEATURE_VBAR);
+ if (cpu->has_vfp_d32 != cpu->has_neon) {
+ error_setg(errp, "ARM CPUs must have both VFP-D32 and Neon or neither");
+ return;
}
- if (arm_feature(env, ARM_FEATURE_V6K)) {
- set_feature(env, ARM_FEATURE_V6);
- set_feature(env, ARM_FEATURE_MVFR);
+
+ if (!cpu->has_vfp_d32) {
+ uint32_t u;
+
+ u = cpu->isar.mvfr0;
+ u = FIELD_DP32(u, MVFR0, SIMDREG, 1); /* 16 registers */
+ cpu->isar.mvfr0 = u;
}
- if (arm_feature(env, ARM_FEATURE_V6)) {
- set_feature(env, ARM_FEATURE_V5);
- set_feature(env, ARM_FEATURE_JAZELLE);
+
+ if (!cpu->has_vfp) {
+ uint64_t t;
+ uint32_t u;
+
+ t = cpu->isar.id_aa64isar1;
+ t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0);
+ cpu->isar.id_aa64isar1 = t;
+
+ t = cpu->isar.id_aa64pfr0;
+ t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf);
+ cpu->isar.id_aa64pfr0 = t;
+
+ u = cpu->isar.id_isar6;
+ u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0);
+ u = FIELD_DP32(u, ID_ISAR6, BF16, 0);
+ cpu->isar.id_isar6 = u;
+
+ u = cpu->isar.mvfr0;
+ u = FIELD_DP32(u, MVFR0, FPSP, 0);
+ u = FIELD_DP32(u, MVFR0, FPDP, 0);
+ u = FIELD_DP32(u, MVFR0, FPDIVIDE, 0);
+ u = FIELD_DP32(u, MVFR0, FPSQRT, 0);
+ u = FIELD_DP32(u, MVFR0, FPROUND, 0);
if (!arm_feature(env, ARM_FEATURE_M)) {
- set_feature(env, ARM_FEATURE_AUXCR);
+ u = FIELD_DP32(u, MVFR0, FPTRAP, 0);
+ u = FIELD_DP32(u, MVFR0, FPSHVEC, 0);
+ }
+ cpu->isar.mvfr0 = u;
+
+ u = cpu->isar.mvfr1;
+ u = FIELD_DP32(u, MVFR1, FPFTZ, 0);
+ u = FIELD_DP32(u, MVFR1, FPDNAN, 0);
+ u = FIELD_DP32(u, MVFR1, FPHP, 0);
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ u = FIELD_DP32(u, MVFR1, FP16, 0);
+ }
+ cpu->isar.mvfr1 = u;
+
+ u = cpu->isar.mvfr2;
+ u = FIELD_DP32(u, MVFR2, FPMISC, 0);
+ cpu->isar.mvfr2 = u;
+ }
+
+ if (!cpu->has_neon) {
+ uint64_t t;
+ uint32_t u;
+
+ unset_feature(env, ARM_FEATURE_NEON);
+
+ t = cpu->isar.id_aa64isar0;
+ t = FIELD_DP64(t, ID_AA64ISAR0, AES, 0);
+ t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 0);
+ t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 0);
+ t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 0);
+ t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 0);
+ t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 0);
+ t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0);
+ cpu->isar.id_aa64isar0 = t;
+
+ t = cpu->isar.id_aa64isar1;
+ t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0);
+ t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 0);
+ t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 0);
+ cpu->isar.id_aa64isar1 = t;
+
+ t = cpu->isar.id_aa64pfr0;
+ t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf);
+ cpu->isar.id_aa64pfr0 = t;
+
+ u = cpu->isar.id_isar5;
+ u = FIELD_DP32(u, ID_ISAR5, AES, 0);
+ u = FIELD_DP32(u, ID_ISAR5, SHA1, 0);
+ u = FIELD_DP32(u, ID_ISAR5, SHA2, 0);
+ u = FIELD_DP32(u, ID_ISAR5, RDM, 0);
+ u = FIELD_DP32(u, ID_ISAR5, VCMA, 0);
+ cpu->isar.id_isar5 = u;
+
+ u = cpu->isar.id_isar6;
+ u = FIELD_DP32(u, ID_ISAR6, DP, 0);
+ u = FIELD_DP32(u, ID_ISAR6, FHM, 0);
+ u = FIELD_DP32(u, ID_ISAR6, BF16, 0);
+ u = FIELD_DP32(u, ID_ISAR6, I8MM, 0);
+ cpu->isar.id_isar6 = u;
+
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ u = cpu->isar.mvfr1;
+ u = FIELD_DP32(u, MVFR1, SIMDLS, 0);
+ u = FIELD_DP32(u, MVFR1, SIMDINT, 0);
+ u = FIELD_DP32(u, MVFR1, SIMDSP, 0);
+ u = FIELD_DP32(u, MVFR1, SIMDHP, 0);
+ cpu->isar.mvfr1 = u;
+
+ u = cpu->isar.mvfr2;
+ u = FIELD_DP32(u, MVFR2, SIMDMISC, 0);
+ cpu->isar.mvfr2 = u;
}
}
- if (arm_feature(env, ARM_FEATURE_V5)) {
- set_feature(env, ARM_FEATURE_V4T);
- }
- if (arm_feature(env, ARM_FEATURE_M)) {
- set_feature(env, ARM_FEATURE_THUMB_DIV);
- }
- if (arm_feature(env, ARM_FEATURE_ARM_DIV)) {
- set_feature(env, ARM_FEATURE_THUMB_DIV);
- }
- if (arm_feature(env, ARM_FEATURE_VFP4)) {
- set_feature(env, ARM_FEATURE_VFP3);
- set_feature(env, ARM_FEATURE_VFP_FP16);
- }
- if (arm_feature(env, ARM_FEATURE_VFP3)) {
- set_feature(env, ARM_FEATURE_VFP);
- }
- if (arm_feature(env, ARM_FEATURE_LPAE)) {
- set_feature(env, ARM_FEATURE_V7MP);
- set_feature(env, ARM_FEATURE_PXN);
- }
- if (arm_feature(env, ARM_FEATURE_CBAR_RO)) {
- set_feature(env, ARM_FEATURE_CBAR);
- }
- if (arm_feature(env, ARM_FEATURE_THUMB2) &&
- !arm_feature(env, ARM_FEATURE_M)) {
- set_feature(env, ARM_FEATURE_THUMB_DSP);
+
+ if (!cpu->has_neon && !cpu->has_vfp) {
+ uint64_t t;
+ uint32_t u;
+
+ t = cpu->isar.id_aa64isar0;
+ t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0);
+ cpu->isar.id_aa64isar0 = t;
+
+ t = cpu->isar.id_aa64isar1;
+ t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0);
+ cpu->isar.id_aa64isar1 = t;
+
+ u = cpu->isar.mvfr0;
+ u = FIELD_DP32(u, MVFR0, SIMDREG, 0);
+ cpu->isar.mvfr0 = u;
+
+ /* Despite the name, this field covers both VFP and Neon */
+ u = cpu->isar.mvfr1;
+ u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0);
+ cpu->isar.mvfr1 = u;
}
- if (arm_feature(env, ARM_FEATURE_V7) &&
- !arm_feature(env, ARM_FEATURE_M) &&
- !arm_feature(env, ARM_FEATURE_PMSA)) {
- /* v7VMSA drops support for the old ARMv5 tiny pages, so we
- * can use 4K pages.
- */
- pagebits = 12;
- } else {
- /* For CPUs which might have tiny 1K pages, or which have an
- * MPU and might have small region sizes, stick with 1K pages.
- */
- pagebits = 10;
+ if (arm_feature(env, ARM_FEATURE_M) && !cpu->has_dsp) {
+ uint32_t u;
+
+ unset_feature(env, ARM_FEATURE_THUMB_DSP);
+
+ u = cpu->isar.id_isar1;
+ u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1);
+ cpu->isar.id_isar1 = u;
+
+ u = cpu->isar.id_isar2;
+ u = FIELD_DP32(u, ID_ISAR2, MULTU, 1);
+ u = FIELD_DP32(u, ID_ISAR2, MULTS, 1);
+ cpu->isar.id_isar2 = u;
+
+ u = cpu->isar.id_isar3;
+ u = FIELD_DP32(u, ID_ISAR3, SIMD, 1);
+ u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0);
+ cpu->isar.id_isar3 = u;
}
- if (!set_preferred_target_page_bits(pagebits)) {
- /* This can only ever happen for hotplugging a CPU, or if
- * the board code incorrectly creates a CPU which it has
- * promised via minimum_page_size that it will not.
- */
- error_setg(errp, "This CPU requires a smaller page size than the "
- "system is using");
- return;
+
+
+ /*
+ * We rely on no XScale CPU having VFP so we can use the same bits in the
+ * TB flags field for VECSTRIDE and XSCALE_CPAR.
+ */
+ assert(arm_feature(env, ARM_FEATURE_AARCH64) ||
+ !cpu_isar_feature(aa32_vfp_simd, cpu) ||
+ !arm_feature(env, ARM_FEATURE_XSCALE));
+
+#ifndef CONFIG_USER_ONLY
+ {
+ int pagebits;
+ if (arm_feature(env, ARM_FEATURE_V7) &&
+ !arm_feature(env, ARM_FEATURE_M) &&
+ !arm_feature(env, ARM_FEATURE_PMSA)) {
+ /*
+ * v7VMSA drops support for the old ARMv5 tiny pages,
+ * so we can use 4K pages.
+ */
+ pagebits = 12;
+ } else {
+ /*
+ * For CPUs which might have tiny 1K pages, or which have an
+ * MPU and might have small region sizes, stick with 1K pages.
+ */
+ pagebits = 10;
+ }
+ if (!set_preferred_target_page_bits(pagebits)) {
+ /*
+ * This can only ever happen for hotplugging a CPU, or if
+ * the board code incorrectly creates a CPU which it has
+ * promised via minimum_page_size that it will not.
+ */
+ error_setg(errp, "This CPU requires a smaller page size "
+ "than the system is using");
+ return;
+ }
}
+#endif
/* This cpu-id-to-MPIDR affinity is used only for TCG; KVM will override it.
* We don't support setting cluster ID ([16..23]) (known as Aff2
@@ -912,8 +2136,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
* so these bits always RAZ.
*/
if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) {
- cpu->mp_affinity = arm_cpu_mp_affinity(cs->cpu_index,
- ARM_DEFAULT_CPUS_PER_CLUSTER);
+ cpu->mp_affinity = arm_build_mp_affinity(cs->cpu_index,
+ ARM_DEFAULT_CPUS_PER_CLUSTER);
}
if (cpu->reset_hivecs) {
@@ -921,24 +2145,31 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
}
if (cpu->cfgend) {
- if (arm_feature(&cpu->env, ARM_FEATURE_V7)) {
+ if (arm_feature(env, ARM_FEATURE_V7)) {
cpu->reset_sctlr |= SCTLR_EE;
} else {
cpu->reset_sctlr |= SCTLR_B;
}
}
- if (!cpu->has_el3) {
+ if (!arm_feature(env, ARM_FEATURE_M) && !cpu->has_el3) {
/* If the has_el3 CPU property is disabled then we need to disable the
* feature.
*/
unset_feature(env, ARM_FEATURE_EL3);
- /* Disable the security extension feature bits in the processor feature
- * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12].
+ /*
+ * Disable the security extension feature bits in the processor
+ * feature registers as well.
*/
- cpu->id_pfr1 &= ~0xf0;
- cpu->id_aa64pfr0 &= ~0xf000;
+ cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1, ID_PFR1, SECURITY, 0);
+ cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPSDBG, 0);
+ cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0,
+ ID_AA64PFR0, EL3, 0);
+
+ /* Disable the realm management extension, which requires EL3. */
+ cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0,
+ ID_AA64PFR0, RME, 0);
}
if (!cpu->has_el2) {
@@ -947,26 +2178,103 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
if (!cpu->has_pmu) {
unset_feature(env, ARM_FEATURE_PMU);
- cpu->id_aa64dfr0 &= ~0xf00;
+ }
+ if (arm_feature(env, ARM_FEATURE_PMU)) {
+ pmu_init(cpu);
+
+ if (!kvm_enabled()) {
+ arm_register_pre_el_change_hook(cpu, &pmu_pre_el_change, 0);
+ arm_register_el_change_hook(cpu, &pmu_post_el_change, 0);
+ }
+
+#ifndef CONFIG_USER_ONLY
+ cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, arm_pmu_timer_cb,
+ cpu);
+#endif
+ } else {
+ cpu->isar.id_aa64dfr0 =
+ FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0);
+ cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0);
+ cpu->pmceid0 = 0;
+ cpu->pmceid1 = 0;
}
if (!arm_feature(env, ARM_FEATURE_EL2)) {
- /* Disable the hypervisor feature bits in the processor feature
- * registers if we don't have EL2. These are id_pfr1[15:12] and
- * id_aa64pfr0_el1[11:8].
+ /*
+ * Disable the hypervisor feature bits in the processor feature
+ * registers if we don't have EL2.
+ */
+ cpu->isar.id_aa64pfr0 = FIELD_DP64(cpu->isar.id_aa64pfr0,
+ ID_AA64PFR0, EL2, 0);
+ cpu->isar.id_pfr1 = FIELD_DP32(cpu->isar.id_pfr1,
+ ID_PFR1, VIRTUALIZATION, 0);
+ }
+
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ /*
+ * The architectural range of GM blocksize is 2-6, however qemu
+ * doesn't support blocksize of 2 (see HELPER(ldgm)).
*/
- cpu->id_aa64pfr0 &= ~0xf00;
- cpu->id_pfr1 &= ~0xf000;
+ if (tcg_enabled()) {
+ assert(cpu->gm_blocksize >= 3 && cpu->gm_blocksize <= 6);
+ }
+
+#ifndef CONFIG_USER_ONLY
+ /*
+ * If we do not have tag-memory provided by the machine,
+ * reduce MTE support to instructions enabled at EL0.
+ * This matches Cortex-A710 BROADCASTMTE input being LOW.
+ */
+ if (cpu->tag_memory == NULL) {
+ cpu->isar.id_aa64pfr1 =
+ FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
+ }
+#endif
+ }
+
+ if (tcg_enabled()) {
+ /*
+ * Don't report some architectural features in the ID registers
+ * where TCG does not yet implement it (not even a minimal
+ * stub version). This avoids guests falling over when they
+ * try to access the non-existent system registers for them.
+ */
+ /* FEAT_SPE (Statistical Profiling Extension) */
+ cpu->isar.id_aa64dfr0 =
+ FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMSVER, 0);
+ /* FEAT_TRBE (Trace Buffer Extension) */
+ cpu->isar.id_aa64dfr0 =
+ FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEBUFFER, 0);
+ /* FEAT_TRF (Self-hosted Trace Extension) */
+ cpu->isar.id_aa64dfr0 =
+ FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEFILT, 0);
+ cpu->isar.id_dfr0 =
+ FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, TRACEFILT, 0);
+ /* Trace Macrocell system register access */
+ cpu->isar.id_aa64dfr0 =
+ FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, TRACEVER, 0);
+ cpu->isar.id_dfr0 =
+ FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, COPTRC, 0);
+ /* Memory mapped trace */
+ cpu->isar.id_dfr0 =
+ FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, MMAPTRC, 0);
+ /* FEAT_AMU (Activity Monitors Extension) */
+ cpu->isar.id_aa64pfr0 =
+ FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, AMU, 0);
+ cpu->isar.id_pfr0 =
+ FIELD_DP32(cpu->isar.id_pfr0, ID_PFR0, AMU, 0);
+ /* FEAT_MPAM (Memory Partitioning and Monitoring Extension) */
+ cpu->isar.id_aa64pfr0 =
+ FIELD_DP64(cpu->isar.id_aa64pfr0, ID_AA64PFR0, MPAM, 0);
}
/* MPU can be configured out of a PMSA CPU either by setting has-mpu
* to false or by setting pmsav7-dregion to 0.
*/
- if (!cpu->has_mpu) {
- cpu->pmsav7_dregion = 0;
- }
- if (cpu->pmsav7_dregion == 0) {
+ if (!cpu->has_mpu || cpu->pmsav7_dregion == 0) {
cpu->has_mpu = false;
+ cpu->pmsav7_dregion = 0;
+ cpu->pmsav8r_hdregion = 0;
}
if (arm_feature(env, ARM_FEATURE_PMSA) &&
@@ -993,6 +2301,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
env->pmsav7.dracr = g_new0(uint32_t, nr);
}
}
+
+ if (cpu->pmsav8r_hdregion > 0xff) {
+ error_setg(errp, "PMSAv8 MPU EL2 #regions invalid %" PRIu32,
+ cpu->pmsav8r_hdregion);
+ return;
+ }
+
+ if (cpu->pmsav8r_hdregion) {
+ env->pmsav8.hprbar = g_new0(uint32_t,
+ cpu->pmsav8r_hdregion);
+ env->pmsav8.hprlar = g_new0(uint32_t,
+ cpu->pmsav8r_hdregion);
+ }
}
if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
@@ -1013,23 +2334,49 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
set_feature(env, ARM_FEATURE_VBAR);
}
+#ifndef CONFIG_USER_ONLY
+ if (tcg_enabled() && cpu_isar_feature(aa64_rme, cpu)) {
+ arm_register_el_change_hook(cpu, &gt_rme_post_el_change, 0);
+ }
+#endif
+
register_cp_regs_for_features(cpu);
arm_cpu_register_gdb_regs_for_features(cpu);
init_cpreg_list(cpu);
#ifndef CONFIG_USER_ONLY
- if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- cs->num_ases = 2;
+ MachineState *ms = MACHINE(qdev_get_machine());
+ unsigned int smp_cpus = ms->smp.cpus;
+ bool has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY);
+ /*
+ * We must set cs->num_ases to the final value before
+ * the first call to cpu_address_space_init.
+ */
+ if (cpu->tag_memory != NULL) {
+ cs->num_ases = 3 + has_secure;
+ } else {
+ cs->num_ases = 1 + has_secure;
+ }
+
+ if (has_secure) {
if (!cpu->secure_memory) {
cpu->secure_memory = cs->memory;
}
cpu_address_space_init(cs, ARMASIdx_S, "cpu-secure-memory",
cpu->secure_memory);
- } else {
- cs->num_ases = 1;
}
+
+ if (cpu->tag_memory != NULL) {
+ cpu_address_space_init(cs, ARMASIdx_TagNS, "cpu-tag-memory",
+ cpu->tag_memory);
+ if (has_secure) {
+ cpu_address_space_init(cs, ARMASIdx_TagS, "cpu-tag-memory",
+ cpu->secure_tag_memory);
+ }
+ }
+
cpu_address_space_init(cs, ARMASIdx_NS, "cpu-memory", cs->memory);
/* No core_count specified, default to smp_cpus. */
@@ -1038,6 +2385,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
}
#endif
+ if (tcg_enabled()) {
+ int dcz_blocklen = 4 << cpu->dcz_blocksize;
+
+ /*
+ * We only support DCZ blocklen that fits on one page.
+ *
+ * Architectually this is always true. However TARGET_PAGE_SIZE
+ * is variable and, for compatibility with -machine virt-2.7,
+ * is only 1KiB, as an artifact of legacy ARMv5 subpage support.
+ * But even then, while the largest architectural DCZ blocklen
+ * is 2KiB, no cpu actually uses such a large blocklen.
+ */
+ assert(dcz_blocklen <= TARGET_PAGE_SIZE);
+
+ /*
+ * We only support DCZ blocksize >= 2*TAG_GRANULE, which is to say
+ * both nibbles of each byte storing tag data may be written at once.
+ * Since TAG_GRANULE is 16, this means that blocklen must be >= 32.
+ */
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ assert(dcz_blocklen >= 2 * TAG_GRANULE);
+ }
+ }
+
qemu_init_vcpu(cs);
cpu_reset(cs);
@@ -1065,852 +2436,12 @@ static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
oc = object_class_by_name(typename);
g_strfreev(cpuname);
g_free(typename);
- if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU) ||
- object_class_is_abstract(oc)) {
- return NULL;
- }
- return oc;
-}
-
-/* CPU models. These are not needed for the AArch64 linux-user build. */
-#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
-
-static void arm926_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,arm926";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_VFP);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN);
- set_feature(&cpu->env, ARM_FEATURE_JAZELLE);
- cpu->midr = 0x41069265;
- cpu->reset_fpsid = 0x41011090;
- cpu->ctr = 0x1dd20d2;
- cpu->reset_sctlr = 0x00090078;
-}
-
-static void arm946_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,arm946";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_PMSA);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- cpu->midr = 0x41059461;
- cpu->ctr = 0x0f004006;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void arm1026_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,arm1026";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_VFP);
- set_feature(&cpu->env, ARM_FEATURE_AUXCR);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN);
- set_feature(&cpu->env, ARM_FEATURE_JAZELLE);
- cpu->midr = 0x4106a262;
- cpu->reset_fpsid = 0x410110a0;
- cpu->ctr = 0x1dd20d2;
- cpu->reset_sctlr = 0x00090078;
- cpu->reset_auxcr = 1;
- {
- /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */
- ARMCPRegInfo ifar = {
- .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
- .access = PL1_RW,
- .fieldoffset = offsetof(CPUARMState, cp15.ifar_ns),
- .resetvalue = 0
- };
- define_one_arm_cp_reg(cpu, &ifar);
- }
-}
-
-static void arm1136_r2_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
- /* What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an
- * older core than plain "arm1136". In particular this does not
- * have the v6K features.
- * These ID register values are correct for 1136 but may be wrong
- * for 1136_r2 (in particular r0p2 does not actually implement most
- * of the ID registers).
- */
-
- cpu->dtb_compatible = "arm,arm1136";
- set_feature(&cpu->env, ARM_FEATURE_V6);
- set_feature(&cpu->env, ARM_FEATURE_VFP);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG);
- set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS);
- cpu->midr = 0x4107b362;
- cpu->reset_fpsid = 0x410120b4;
- cpu->mvfr0 = 0x11111111;
- cpu->mvfr1 = 0x00000000;
- cpu->ctr = 0x1dd20d2;
- cpu->reset_sctlr = 0x00050078;
- cpu->id_pfr0 = 0x111;
- cpu->id_pfr1 = 0x1;
- cpu->id_dfr0 = 0x2;
- cpu->id_afr0 = 0x3;
- cpu->id_mmfr0 = 0x01130003;
- cpu->id_mmfr1 = 0x10030302;
- cpu->id_mmfr2 = 0x01222110;
- cpu->id_isar0 = 0x00140011;
- cpu->id_isar1 = 0x12002111;
- cpu->id_isar2 = 0x11231111;
- cpu->id_isar3 = 0x01102131;
- cpu->id_isar4 = 0x141;
- cpu->reset_auxcr = 7;
-}
-
-static void arm1136_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,arm1136";
- set_feature(&cpu->env, ARM_FEATURE_V6K);
- set_feature(&cpu->env, ARM_FEATURE_V6);
- set_feature(&cpu->env, ARM_FEATURE_VFP);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG);
- set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS);
- cpu->midr = 0x4117b363;
- cpu->reset_fpsid = 0x410120b4;
- cpu->mvfr0 = 0x11111111;
- cpu->mvfr1 = 0x00000000;
- cpu->ctr = 0x1dd20d2;
- cpu->reset_sctlr = 0x00050078;
- cpu->id_pfr0 = 0x111;
- cpu->id_pfr1 = 0x1;
- cpu->id_dfr0 = 0x2;
- cpu->id_afr0 = 0x3;
- cpu->id_mmfr0 = 0x01130003;
- cpu->id_mmfr1 = 0x10030302;
- cpu->id_mmfr2 = 0x01222110;
- cpu->id_isar0 = 0x00140011;
- cpu->id_isar1 = 0x12002111;
- cpu->id_isar2 = 0x11231111;
- cpu->id_isar3 = 0x01102131;
- cpu->id_isar4 = 0x141;
- cpu->reset_auxcr = 7;
-}
-
-static void arm1176_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,arm1176";
- set_feature(&cpu->env, ARM_FEATURE_V6K);
- set_feature(&cpu->env, ARM_FEATURE_VFP);
- set_feature(&cpu->env, ARM_FEATURE_VAPA);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG);
- set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS);
- set_feature(&cpu->env, ARM_FEATURE_EL3);
- cpu->midr = 0x410fb767;
- cpu->reset_fpsid = 0x410120b5;
- cpu->mvfr0 = 0x11111111;
- cpu->mvfr1 = 0x00000000;
- cpu->ctr = 0x1dd20d2;
- cpu->reset_sctlr = 0x00050078;
- cpu->id_pfr0 = 0x111;
- cpu->id_pfr1 = 0x11;
- cpu->id_dfr0 = 0x33;
- cpu->id_afr0 = 0;
- cpu->id_mmfr0 = 0x01130003;
- cpu->id_mmfr1 = 0x10030302;
- cpu->id_mmfr2 = 0x01222100;
- cpu->id_isar0 = 0x0140011;
- cpu->id_isar1 = 0x12002111;
- cpu->id_isar2 = 0x11231121;
- cpu->id_isar3 = 0x01102131;
- cpu->id_isar4 = 0x01141;
- cpu->reset_auxcr = 7;
-}
-
-static void arm11mpcore_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,arm11mpcore";
- set_feature(&cpu->env, ARM_FEATURE_V6K);
- set_feature(&cpu->env, ARM_FEATURE_VFP);
- set_feature(&cpu->env, ARM_FEATURE_VAPA);
- set_feature(&cpu->env, ARM_FEATURE_MPIDR);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- cpu->midr = 0x410fb022;
- cpu->reset_fpsid = 0x410120b4;
- cpu->mvfr0 = 0x11111111;
- cpu->mvfr1 = 0x00000000;
- cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */
- cpu->id_pfr0 = 0x111;
- cpu->id_pfr1 = 0x1;
- cpu->id_dfr0 = 0;
- cpu->id_afr0 = 0x2;
- cpu->id_mmfr0 = 0x01100103;
- cpu->id_mmfr1 = 0x10020302;
- cpu->id_mmfr2 = 0x01222000;
- cpu->id_isar0 = 0x00100011;
- cpu->id_isar1 = 0x12002111;
- cpu->id_isar2 = 0x11221011;
- cpu->id_isar3 = 0x01102131;
- cpu->id_isar4 = 0x141;
- cpu->reset_auxcr = 1;
-}
-
-static void cortex_m0_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
- set_feature(&cpu->env, ARM_FEATURE_V6);
- set_feature(&cpu->env, ARM_FEATURE_M);
-
- cpu->midr = 0x410cc200;
-}
-
-static void cortex_m3_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
- set_feature(&cpu->env, ARM_FEATURE_V7);
- set_feature(&cpu->env, ARM_FEATURE_M);
- set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
- cpu->midr = 0x410fc231;
- cpu->pmsav7_dregion = 8;
- cpu->id_pfr0 = 0x00000030;
- cpu->id_pfr1 = 0x00000200;
- cpu->id_dfr0 = 0x00100000;
- cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x00000030;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x00000000;
- cpu->id_mmfr3 = 0x00000000;
- cpu->id_isar0 = 0x01141110;
- cpu->id_isar1 = 0x02111000;
- cpu->id_isar2 = 0x21112231;
- cpu->id_isar3 = 0x01111110;
- cpu->id_isar4 = 0x01310102;
- cpu->id_isar5 = 0x00000000;
- cpu->id_isar6 = 0x00000000;
-}
-
-static void cortex_m4_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- set_feature(&cpu->env, ARM_FEATURE_V7);
- set_feature(&cpu->env, ARM_FEATURE_M);
- set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
- set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
- cpu->midr = 0x410fc240; /* r0p0 */
- cpu->pmsav7_dregion = 8;
- cpu->id_pfr0 = 0x00000030;
- cpu->id_pfr1 = 0x00000200;
- cpu->id_dfr0 = 0x00100000;
- cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x00000030;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x00000000;
- cpu->id_mmfr3 = 0x00000000;
- cpu->id_isar0 = 0x01141110;
- cpu->id_isar1 = 0x02111000;
- cpu->id_isar2 = 0x21112231;
- cpu->id_isar3 = 0x01111110;
- cpu->id_isar4 = 0x01310102;
- cpu->id_isar5 = 0x00000000;
- cpu->id_isar6 = 0x00000000;
-}
-
-static void cortex_m33_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- set_feature(&cpu->env, ARM_FEATURE_V8);
- set_feature(&cpu->env, ARM_FEATURE_M);
- set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
- set_feature(&cpu->env, ARM_FEATURE_M_SECURITY);
- set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
- cpu->midr = 0x410fd213; /* r0p3 */
- cpu->pmsav7_dregion = 16;
- cpu->sau_sregion = 8;
- cpu->id_pfr0 = 0x00000030;
- cpu->id_pfr1 = 0x00000210;
- cpu->id_dfr0 = 0x00200000;
- cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x00101F40;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x01000000;
- cpu->id_mmfr3 = 0x00000000;
- cpu->id_isar0 = 0x01101110;
- cpu->id_isar1 = 0x02212000;
- cpu->id_isar2 = 0x20232232;
- cpu->id_isar3 = 0x01111131;
- cpu->id_isar4 = 0x01310132;
- cpu->id_isar5 = 0x00000000;
- cpu->id_isar6 = 0x00000000;
- cpu->clidr = 0x00000000;
- cpu->ctr = 0x8000c000;
-}
-
-static void arm_v7m_class_init(ObjectClass *oc, void *data)
-{
- CPUClass *cc = CPU_CLASS(oc);
-
-#ifndef CONFIG_USER_ONLY
- cc->do_interrupt = arm_v7m_cpu_do_interrupt;
-#endif
-
- cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt;
-}
-
-static const ARMCPRegInfo cortexr5_cp_reginfo[] = {
- /* Dummy the TCM region regs for the moment */
- { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
- .access = PL1_RW, .type = ARM_CP_CONST },
- { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
- .access = PL1_RW, .type = ARM_CP_CONST },
- { .name = "DCACHE_INVAL", .cp = 15, .opc1 = 0, .crn = 15, .crm = 5,
- .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP },
- REGINFO_SENTINEL
-};
-
-static void cortex_r5_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- set_feature(&cpu->env, ARM_FEATURE_V7);
- set_feature(&cpu->env, ARM_FEATURE_THUMB_DIV);
- set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
- set_feature(&cpu->env, ARM_FEATURE_V7MP);
- set_feature(&cpu->env, ARM_FEATURE_PMSA);
- cpu->midr = 0x411fc153; /* r1p3 */
- cpu->id_pfr0 = 0x0131;
- cpu->id_pfr1 = 0x001;
- cpu->id_dfr0 = 0x010400;
- cpu->id_afr0 = 0x0;
- cpu->id_mmfr0 = 0x0210030;
- cpu->id_mmfr1 = 0x00000000;
- cpu->id_mmfr2 = 0x01200000;
- cpu->id_mmfr3 = 0x0211;
- cpu->id_isar0 = 0x02101111;
- cpu->id_isar1 = 0x13112111;
- cpu->id_isar2 = 0x21232141;
- cpu->id_isar3 = 0x01112131;
- cpu->id_isar4 = 0x0010142;
- cpu->id_isar5 = 0x0;
- cpu->id_isar6 = 0x0;
- cpu->mp_is_up = true;
- cpu->pmsav7_dregion = 16;
- define_arm_cp_regs(cpu, cortexr5_cp_reginfo);
-}
-
-static void cortex_r5f_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cortex_r5_initfn(obj);
- set_feature(&cpu->env, ARM_FEATURE_VFP3);
-}
-static const ARMCPRegInfo cortexa8_cp_reginfo[] = {
- { .name = "L2LOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 0,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "L2AUXCR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- REGINFO_SENTINEL
-};
-
-static void cortex_a8_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,cortex-a8";
- set_feature(&cpu->env, ARM_FEATURE_V7);
- set_feature(&cpu->env, ARM_FEATURE_VFP3);
- set_feature(&cpu->env, ARM_FEATURE_NEON);
- set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- set_feature(&cpu->env, ARM_FEATURE_EL3);
- cpu->midr = 0x410fc080;
- cpu->reset_fpsid = 0x410330c0;
- cpu->mvfr0 = 0x11110222;
- cpu->mvfr1 = 0x00011111;
- cpu->ctr = 0x82048004;
- cpu->reset_sctlr = 0x00c50078;
- cpu->id_pfr0 = 0x1031;
- cpu->id_pfr1 = 0x11;
- cpu->id_dfr0 = 0x400;
- cpu->id_afr0 = 0;
- cpu->id_mmfr0 = 0x31100003;
- cpu->id_mmfr1 = 0x20000000;
- cpu->id_mmfr2 = 0x01202000;
- cpu->id_mmfr3 = 0x11;
- cpu->id_isar0 = 0x00101111;
- cpu->id_isar1 = 0x12112111;
- cpu->id_isar2 = 0x21232031;
- cpu->id_isar3 = 0x11112131;
- cpu->id_isar4 = 0x00111142;
- cpu->dbgdidr = 0x15141000;
- cpu->clidr = (1 << 27) | (2 << 24) | 3;
- cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */
- cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */
- cpu->ccsidr[2] = 0xf0000000; /* No L2 icache. */
- cpu->reset_auxcr = 2;
- define_arm_cp_regs(cpu, cortexa8_cp_reginfo);
-}
-
-static const ARMCPRegInfo cortexa9_cp_reginfo[] = {
- /* power_control should be set to maximum latency. Again,
- * default to 0 and set by private hook
- */
- { .name = "A9_PWRCTL", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW, .resetvalue = 0,
- .fieldoffset = offsetof(CPUARMState, cp15.c15_power_control) },
- { .name = "A9_DIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 1,
- .access = PL1_RW, .resetvalue = 0,
- .fieldoffset = offsetof(CPUARMState, cp15.c15_diagnostic) },
- { .name = "A9_PWRDIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 2,
- .access = PL1_RW, .resetvalue = 0,
- .fieldoffset = offsetof(CPUARMState, cp15.c15_power_diagnostic) },
- { .name = "NEONBUSY", .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
- /* TLB lockdown control */
- { .name = "TLB_LOCKR", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 2,
- .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP },
- { .name = "TLB_LOCKW", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 4,
- .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP },
- { .name = "TLB_VA", .cp = 15, .crn = 15, .crm = 5, .opc1 = 5, .opc2 = 2,
- .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
- { .name = "TLB_PA", .cp = 15, .crn = 15, .crm = 6, .opc1 = 5, .opc2 = 2,
- .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
- { .name = "TLB_ATTR", .cp = 15, .crn = 15, .crm = 7, .opc1 = 5, .opc2 = 2,
- .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
- REGINFO_SENTINEL
-};
-
-static void cortex_a9_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,cortex-a9";
- set_feature(&cpu->env, ARM_FEATURE_V7);
- set_feature(&cpu->env, ARM_FEATURE_VFP3);
- set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
- set_feature(&cpu->env, ARM_FEATURE_NEON);
- set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
- set_feature(&cpu->env, ARM_FEATURE_EL3);
- /* Note that A9 supports the MP extensions even for
- * A9UP and single-core A9MP (which are both different
- * and valid configurations; we don't model A9UP).
- */
- set_feature(&cpu->env, ARM_FEATURE_V7MP);
- set_feature(&cpu->env, ARM_FEATURE_CBAR);
- cpu->midr = 0x410fc090;
- cpu->reset_fpsid = 0x41033090;
- cpu->mvfr0 = 0x11110222;
- cpu->mvfr1 = 0x01111111;
- cpu->ctr = 0x80038003;
- cpu->reset_sctlr = 0x00c50078;
- cpu->id_pfr0 = 0x1031;
- cpu->id_pfr1 = 0x11;
- cpu->id_dfr0 = 0x000;
- cpu->id_afr0 = 0;
- cpu->id_mmfr0 = 0x00100103;
- cpu->id_mmfr1 = 0x20000000;
- cpu->id_mmfr2 = 0x01230000;
- cpu->id_mmfr3 = 0x00002111;
- cpu->id_isar0 = 0x00101111;
- cpu->id_isar1 = 0x13112111;
- cpu->id_isar2 = 0x21232041;
- cpu->id_isar3 = 0x11112131;
- cpu->id_isar4 = 0x00111142;
- cpu->dbgdidr = 0x35141000;
- cpu->clidr = (1 << 27) | (1 << 24) | 3;
- cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */
- cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
- define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
-}
-
-#ifndef CONFIG_USER_ONLY
-static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
- /* Linux wants the number of processors from here.
- * Might as well set the interrupt-controller bit too.
- */
- return ((smp_cpus - 1) << 24) | (1 << 23);
-}
-#endif
-
-static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
-#ifndef CONFIG_USER_ONLY
- { .name = "L2CTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2,
- .access = PL1_RW, .resetvalue = 0, .readfn = a15_l2ctlr_read,
- .writefn = arm_cp_write_ignore, },
-#endif
- { .name = "L2ECTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 3,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- REGINFO_SENTINEL
-};
-
-static void cortex_a7_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,cortex-a7";
- set_feature(&cpu->env, ARM_FEATURE_V7VE);
- set_feature(&cpu->env, ARM_FEATURE_VFP4);
- set_feature(&cpu->env, ARM_FEATURE_NEON);
- set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
- set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
- set_feature(&cpu->env, ARM_FEATURE_EL3);
- cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7;
- cpu->midr = 0x410fc075;
- cpu->reset_fpsid = 0x41023075;
- cpu->mvfr0 = 0x10110222;
- cpu->mvfr1 = 0x11111111;
- cpu->ctr = 0x84448003;
- cpu->reset_sctlr = 0x00c50078;
- cpu->id_pfr0 = 0x00001131;
- cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x02010555;
- cpu->pmceid0 = 0x00000000;
- cpu->pmceid1 = 0x00000000;
- cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10101105;
- cpu->id_mmfr1 = 0x40000000;
- cpu->id_mmfr2 = 0x01240000;
- cpu->id_mmfr3 = 0x02102211;
- /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
- * table 4-41 gives 0x02101110, which includes the arm div insns.
- */
- cpu->id_isar0 = 0x02101110;
- cpu->id_isar1 = 0x13112111;
- cpu->id_isar2 = 0x21232041;
- cpu->id_isar3 = 0x11112131;
- cpu->id_isar4 = 0x10011142;
- cpu->dbgdidr = 0x3515f005;
- cpu->clidr = 0x0a200023;
- cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
- cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
- cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
- define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
-}
-
-static void cortex_a15_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,cortex-a15";
- set_feature(&cpu->env, ARM_FEATURE_V7VE);
- set_feature(&cpu->env, ARM_FEATURE_VFP4);
- set_feature(&cpu->env, ARM_FEATURE_NEON);
- set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
- set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
- set_feature(&cpu->env, ARM_FEATURE_EL3);
- cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15;
- cpu->midr = 0x412fc0f1;
- cpu->reset_fpsid = 0x410430f0;
- cpu->mvfr0 = 0x10110222;
- cpu->mvfr1 = 0x11111111;
- cpu->ctr = 0x8444c004;
- cpu->reset_sctlr = 0x00c50078;
- cpu->id_pfr0 = 0x00001131;
- cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x02010555;
- cpu->pmceid0 = 0x0000000;
- cpu->pmceid1 = 0x00000000;
- cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10201105;
- cpu->id_mmfr1 = 0x20000000;
- cpu->id_mmfr2 = 0x01240000;
- cpu->id_mmfr3 = 0x02102211;
- cpu->id_isar0 = 0x02101110;
- cpu->id_isar1 = 0x13112111;
- cpu->id_isar2 = 0x21232041;
- cpu->id_isar3 = 0x11112131;
- cpu->id_isar4 = 0x10011142;
- cpu->dbgdidr = 0x3515f021;
- cpu->clidr = 0x0a200023;
- cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
- cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
- cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
- define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
-}
-
-static void ti925t_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
- set_feature(&cpu->env, ARM_FEATURE_V4T);
- set_feature(&cpu->env, ARM_FEATURE_OMAPCP);
- cpu->midr = ARM_CPUID_TI925T;
- cpu->ctr = 0x5109149;
- cpu->reset_sctlr = 0x00000070;
-}
-
-static void sa1100_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "intel,sa1100";
- set_feature(&cpu->env, ARM_FEATURE_STRONGARM);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- cpu->midr = 0x4401A11B;
- cpu->reset_sctlr = 0x00000070;
-}
-
-static void sa1110_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
- set_feature(&cpu->env, ARM_FEATURE_STRONGARM);
- set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
- cpu->midr = 0x6901B119;
- cpu->reset_sctlr = 0x00000070;
-}
-
-static void pxa250_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- cpu->midr = 0x69052100;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa255_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- cpu->midr = 0x69052d00;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa260_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- cpu->midr = 0x69052903;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa261_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- cpu->midr = 0x69052d05;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa262_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- cpu->midr = 0x69052d06;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270a0_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
- cpu->midr = 0x69054110;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270a1_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
- cpu->midr = 0x69054111;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270b0_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
- cpu->midr = 0x69054112;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270b1_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
- cpu->midr = 0x69054113;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270c0_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
- cpu->midr = 0x69054114;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-static void pxa270c5_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "marvell,xscale";
- set_feature(&cpu->env, ARM_FEATURE_V5);
- set_feature(&cpu->env, ARM_FEATURE_XSCALE);
- set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
- cpu->midr = 0x69054117;
- cpu->ctr = 0xd172172;
- cpu->reset_sctlr = 0x00000078;
-}
-
-#ifndef TARGET_AARCH64
-/* -cpu max: if KVM is enabled, like -cpu host (best possible with this host);
- * otherwise, a CPU with as many features enabled as our emulation supports.
- * The version of '-cpu max' for qemu-system-aarch64 is defined in cpu64.c;
- * this only needs to handle 32 bits.
- */
-static void arm_max_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- if (kvm_enabled()) {
- kvm_arm_set_cpu_features_from_host(cpu);
- } else {
- cortex_a15_initfn(obj);
-#ifdef CONFIG_USER_ONLY
- /* We don't set these in system emulation mode for the moment,
- * since we don't correctly set the ID registers to advertise them,
- */
- set_feature(&cpu->env, ARM_FEATURE_V8);
- set_feature(&cpu->env, ARM_FEATURE_V8_AES);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
- set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
- set_feature(&cpu->env, ARM_FEATURE_CRC);
- set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
- set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD);
- set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
-#endif
- }
+ return oc;
}
-#endif
-
-#endif /* !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) */
-
-typedef struct ARMCPUInfo {
- const char *name;
- void (*initfn)(Object *obj);
- void (*class_init)(ObjectClass *oc, void *data);
-} ARMCPUInfo;
-
-static const ARMCPUInfo arm_cpus[] = {
-#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
- { .name = "arm926", .initfn = arm926_initfn },
- { .name = "arm946", .initfn = arm946_initfn },
- { .name = "arm1026", .initfn = arm1026_initfn },
- /* What QEMU calls "arm1136-r2" is actually the 1136 r0p2, i.e. an
- * older core than plain "arm1136". In particular this does not
- * have the v6K features.
- */
- { .name = "arm1136-r2", .initfn = arm1136_r2_initfn },
- { .name = "arm1136", .initfn = arm1136_initfn },
- { .name = "arm1176", .initfn = arm1176_initfn },
- { .name = "arm11mpcore", .initfn = arm11mpcore_initfn },
- { .name = "cortex-m0", .initfn = cortex_m0_initfn,
- .class_init = arm_v7m_class_init },
- { .name = "cortex-m3", .initfn = cortex_m3_initfn,
- .class_init = arm_v7m_class_init },
- { .name = "cortex-m4", .initfn = cortex_m4_initfn,
- .class_init = arm_v7m_class_init },
- { .name = "cortex-m33", .initfn = cortex_m33_initfn,
- .class_init = arm_v7m_class_init },
- { .name = "cortex-r5", .initfn = cortex_r5_initfn },
- { .name = "cortex-r5f", .initfn = cortex_r5f_initfn },
- { .name = "cortex-a7", .initfn = cortex_a7_initfn },
- { .name = "cortex-a8", .initfn = cortex_a8_initfn },
- { .name = "cortex-a9", .initfn = cortex_a9_initfn },
- { .name = "cortex-a15", .initfn = cortex_a15_initfn },
- { .name = "ti925t", .initfn = ti925t_initfn },
- { .name = "sa1100", .initfn = sa1100_initfn },
- { .name = "sa1110", .initfn = sa1110_initfn },
- { .name = "pxa250", .initfn = pxa250_initfn },
- { .name = "pxa255", .initfn = pxa255_initfn },
- { .name = "pxa260", .initfn = pxa260_initfn },
- { .name = "pxa261", .initfn = pxa261_initfn },
- { .name = "pxa262", .initfn = pxa262_initfn },
- /* "pxa270" is an alias for "pxa270-a0" */
- { .name = "pxa270", .initfn = pxa270a0_initfn },
- { .name = "pxa270-a0", .initfn = pxa270a0_initfn },
- { .name = "pxa270-a1", .initfn = pxa270a1_initfn },
- { .name = "pxa270-b0", .initfn = pxa270b0_initfn },
- { .name = "pxa270-b1", .initfn = pxa270b1_initfn },
- { .name = "pxa270-c0", .initfn = pxa270c0_initfn },
- { .name = "pxa270-c5", .initfn = pxa270c5_initfn },
-#ifndef TARGET_AARCH64
- { .name = "max", .initfn = arm_max_initfn },
-#endif
-#ifdef CONFIG_USER_ONLY
- { .name = "any", .initfn = arm_max_initfn },
-#endif
-#endif
- { .name = NULL }
-};
static Property arm_cpu_properties[] = {
- DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
- DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
- DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
+ DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0),
DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
mp_affinity, ARM64_AFFINITY_INVALID),
DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID),
@@ -1918,112 +2449,112 @@ static Property arm_cpu_properties[] = {
DEFINE_PROP_END_OF_LIST()
};
-#ifdef CONFIG_USER_ONLY
-static int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int size,
- int rw, int mmu_idx)
+static const gchar *arm_gdb_arch_name(CPUState *cs)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
- env->exception.vaddress = address;
- if (rw == 2) {
- cs->exception_index = EXCP_PREFETCH_ABORT;
- } else {
- cs->exception_index = EXCP_DATA_ABORT;
+ if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
+ return "iwmmxt";
}
- return 1;
+ return "arm";
}
+
+#ifndef CONFIG_USER_ONLY
+#include "hw/core/sysemu-cpu-ops.h"
+
+static const struct SysemuCPUOps arm_sysemu_ops = {
+ .get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug,
+ .asidx_from_attrs = arm_asidx_from_attrs,
+ .write_elf32_note = arm_cpu_write_elf32_note,
+ .write_elf64_note = arm_cpu_write_elf64_note,
+ .virtio_is_big_endian = arm_cpu_virtio_is_big_endian,
+ .legacy_vmsd = &vmstate_arm_cpu,
+};
#endif
-static gchar *arm_gdb_arch_name(CPUState *cs)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
+#ifdef CONFIG_TCG
+static const TCGCPUOps arm_tcg_ops = {
+ .initialize = arm_translate_init,
+ .synchronize_from_tb = arm_cpu_synchronize_from_tb,
+ .debug_excp_handler = arm_debug_excp_handler,
+ .restore_state_to_opc = arm_restore_state_to_opc,
- if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
- return g_strdup("iwmmxt");
- }
- return g_strdup("arm");
-}
+#ifdef CONFIG_USER_ONLY
+ .record_sigsegv = arm_cpu_record_sigsegv,
+ .record_sigbus = arm_cpu_record_sigbus,
+#else
+ .tlb_fill = arm_cpu_tlb_fill,
+ .cpu_exec_interrupt = arm_cpu_exec_interrupt,
+ .do_interrupt = arm_cpu_do_interrupt,
+ .do_transaction_failed = arm_cpu_do_transaction_failed,
+ .do_unaligned_access = arm_cpu_do_unaligned_access,
+ .adjust_watchpoint_address = arm_adjust_watchpoint_address,
+ .debug_check_watchpoint = arm_debug_check_watchpoint,
+ .debug_check_breakpoint = arm_debug_check_breakpoint,
+#endif /* !CONFIG_USER_ONLY */
+};
+#endif /* CONFIG_TCG */
static void arm_cpu_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
CPUClass *cc = CPU_CLASS(acc);
DeviceClass *dc = DEVICE_CLASS(oc);
+ ResettableClass *rc = RESETTABLE_CLASS(oc);
device_class_set_parent_realize(dc, arm_cpu_realizefn,
&acc->parent_realize);
- dc->props = arm_cpu_properties;
- acc->parent_reset = cc->reset;
- cc->reset = arm_cpu_reset;
+ device_class_set_props(dc, arm_cpu_properties);
+
+ resettable_class_set_parent_phases(rc, NULL, arm_cpu_reset_hold, NULL,
+ &acc->parent_phases);
cc->class_by_name = arm_cpu_class_by_name;
cc->has_work = arm_cpu_has_work;
- cc->cpu_exec_interrupt = arm_cpu_exec_interrupt;
+ cc->mmu_index = arm_cpu_mmu_index;
cc->dump_state = arm_cpu_dump_state;
cc->set_pc = arm_cpu_set_pc;
+ cc->get_pc = arm_cpu_get_pc;
cc->gdb_read_register = arm_cpu_gdb_read_register;
cc->gdb_write_register = arm_cpu_gdb_write_register;
-#ifdef CONFIG_USER_ONLY
- cc->handle_mmu_fault = arm_cpu_handle_mmu_fault;
-#else
- cc->do_interrupt = arm_cpu_do_interrupt;
- cc->do_unaligned_access = arm_cpu_do_unaligned_access;
- cc->do_transaction_failed = arm_cpu_do_transaction_failed;
- cc->get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug;
- cc->asidx_from_attrs = arm_asidx_from_attrs;
- cc->vmsd = &vmstate_arm_cpu;
- cc->virtio_is_big_endian = arm_cpu_virtio_is_big_endian;
- cc->write_elf64_note = arm_cpu_write_elf64_note;
- cc->write_elf32_note = arm_cpu_write_elf32_note;
+#ifndef CONFIG_USER_ONLY
+ cc->sysemu_ops = &arm_sysemu_ops;
#endif
- cc->gdb_num_core_regs = 26;
- cc->gdb_core_xml_file = "arm-core.xml";
cc->gdb_arch_name = arm_gdb_arch_name;
- cc->gdb_get_dynamic_xml = arm_gdb_get_dynamic_xml;
cc->gdb_stop_before_watchpoint = true;
- cc->debug_excp_handler = arm_debug_excp_handler;
- cc->debug_check_watchpoint = arm_debug_check_watchpoint;
-#if !defined(CONFIG_USER_ONLY)
- cc->adjust_watchpoint_address = arm_adjust_watchpoint_address;
-#endif
-
cc->disas_set_info = arm_disas_set_info;
+
#ifdef CONFIG_TCG
- cc->tcg_initialize = arm_translate_init;
-#endif
+ cc->tcg_ops = &arm_tcg_ops;
+#endif /* CONFIG_TCG */
}
-#ifdef CONFIG_KVM
-static void arm_host_initfn(Object *obj)
+static void arm_cpu_instance_init(Object *obj)
{
- ARMCPU *cpu = ARM_CPU(obj);
+ ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj);
- kvm_arm_set_cpu_features_from_host(cpu);
+ acc->info->initfn(obj);
+ arm_cpu_post_init(obj);
}
-static const TypeInfo host_arm_cpu_type_info = {
- .name = TYPE_ARM_HOST_CPU,
-#ifdef TARGET_AARCH64
- .parent = TYPE_AARCH64_CPU,
-#else
- .parent = TYPE_ARM_CPU,
-#endif
- .instance_init = arm_host_initfn,
-};
+static void cpu_register_class_init(ObjectClass *oc, void *data)
+{
+ ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(acc);
-#endif
+ acc->info = data;
+ cc->gdb_core_xml_file = "arm-core.xml";
+}
-static void cpu_register(const ARMCPUInfo *info)
+void arm_cpu_register(const ARMCPUInfo *info)
{
TypeInfo type_info = {
.parent = TYPE_ARM_CPU,
- .instance_size = sizeof(ARMCPU),
- .instance_init = info->initfn,
- .class_size = sizeof(ARMCPUClass),
- .class_init = info->class_init,
+ .instance_init = arm_cpu_instance_init,
+ .class_init = info->class_init ?: cpu_register_class_init,
+ .class_data = (void *)info,
};
type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name);
@@ -2035,35 +2566,17 @@ static const TypeInfo arm_cpu_type_info = {
.name = TYPE_ARM_CPU,
.parent = TYPE_CPU,
.instance_size = sizeof(ARMCPU),
+ .instance_align = __alignof__(ARMCPU),
.instance_init = arm_cpu_initfn,
- .instance_post_init = arm_cpu_post_init,
.instance_finalize = arm_cpu_finalizefn,
.abstract = true,
.class_size = sizeof(ARMCPUClass),
.class_init = arm_cpu_class_init,
};
-static const TypeInfo idau_interface_type_info = {
- .name = TYPE_IDAU_INTERFACE,
- .parent = TYPE_INTERFACE,
- .class_size = sizeof(IDAUInterfaceClass),
-};
-
static void arm_cpu_register_types(void)
{
- const ARMCPUInfo *info = arm_cpus;
-
type_register_static(&arm_cpu_type_info);
- type_register_static(&idau_interface_type_info);
-
- while (info->name) {
- cpu_register(info);
- info++;
- }
-
-#ifdef CONFIG_KVM
- type_register_static(&host_arm_cpu_type_info);
-#endif
}
type_init(arm_cpu_register_types)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f00c0444c4..bc0c84873f 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -6,7 +6,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -21,23 +21,21 @@
#define ARM_CPU_H
#include "kvm-consts.h"
+#include "qemu/cpu-float.h"
#include "hw/registerfields.h"
-
-#if defined(TARGET_AARCH64)
- /* AArch64 definitions */
-# define TARGET_LONG_BITS 64
-#else
-# define TARGET_LONG_BITS 32
-#endif
+#include "cpu-qom.h"
+#include "exec/cpu-defs.h"
+#include "exec/gdbstub.h"
+#include "qapi/qapi-types-common.h"
+#include "target/arm/multiprocessing.h"
+#include "target/arm/gtimer.h"
/* ARM processors have a weak memory model */
#define TCG_GUEST_DEFAULT_MO (0)
-#define CPUArchState struct CPUARMState
-
-#include "qemu-common.h"
-#include "cpu-qom.h"
-#include "exec/cpu-defs.h"
+#ifdef TARGET_AARCH64
+#define KVM_HAVE_MCE_INJECTION 1
+#endif
#define EXCP_UDEF 1 /* undefined instruction */
#define EXCP_SWI 2 /* software interrupt */
@@ -57,6 +55,12 @@
#define EXCP_NOCP 17 /* v7M NOCP UsageFault */
#define EXCP_INVSTATE 18 /* v7M INVSTATE UsageFault */
#define EXCP_STKOF 19 /* v8M STKOF UsageFault */
+#define EXCP_LAZYFP 20 /* v7M fault during lazy FP stacking */
+#define EXCP_LSERR 21 /* v8M LSERR SecureFault */
+#define EXCP_UNALIGNED 22 /* v7M UNALIGNED UsageFault */
+#define EXCP_DIVBYZERO 23 /* v7M DIVBYZERO UsageFault */
+#define EXCP_VSERR 24
+#define EXCP_GPC 25 /* v9 Granule Protection Check Fault */
/* NB: add new EXCP_ defines to the array in arm_log_exception() too */
#define ARMV7M_EXCP_RESET 1
@@ -71,25 +75,11 @@
#define ARMV7M_EXCP_PENDSV 14
#define ARMV7M_EXCP_SYSTICK 15
-/* For M profile, some registers are banked secure vs non-secure;
- * these are represented as a 2-element array where the first element
- * is the non-secure copy and the second is the secure copy.
- * When the CPU does not have implement the security extension then
- * only the first element is used.
- * This means that the copy for the current security state can be
- * accessed via env->registerfield[env->v7m.secure] (whether the security
- * extension is implemented or not).
- */
-enum {
- M_REG_NS = 0,
- M_REG_S = 1,
- M_REG_NUM_BANKS = 2,
-};
-
/* ARM-specific interrupt pending bits. */
#define CPU_INTERRUPT_FIQ CPU_INTERRUPT_TGT_EXT_1
#define CPU_INTERRUPT_VIRQ CPU_INTERRUPT_TGT_EXT_2
#define CPU_INTERRUPT_VFIQ CPU_INTERRUPT_TGT_EXT_3
+#define CPU_INTERRUPT_VSERR CPU_INTERRUPT_TGT_INT_0
/* The usual mapping for an AArch64 system register to its AArch32
* counterpart is for the 32 bit world to have access to the lower
@@ -97,7 +87,7 @@ enum {
* therefore useful to be able to pass TCG the offset of the least
* significant half of a uint64_t struct member.
*/
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
#define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t))
#define offsetofhigh32(S, M) offsetof(S, M)
#else
@@ -105,13 +95,6 @@ enum {
#define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t))
#endif
-/* Meanings of the ARMCPU object's four inbound GPIO lines */
-#define ARM_CPU_IRQ 0
-#define ARM_CPU_FIQ 1
-#define ARM_CPU_VIRQ 2
-#define ARM_CPU_VFIQ 3
-
-#define NB_MMU_MODES 8
/* ARM-specific extra insn start words:
* 1: Conditional execution bits
* 2: Partial exception syndrome for data aborts
@@ -119,12 +102,12 @@ enum {
#define TARGET_INSN_START_EXTRA_WORDS 2
/* The 2nd extra word holding syndrome info for data aborts does not use
- * the upper 6 bits nor the lower 14 bits. We mask and shift it down to
+ * the upper 6 bits nor the lower 13 bits. We mask and shift it down to
* help the sleb128 encoder do a better job.
* When restoring the CPU state, we shift it back up.
*/
#define ARM_INSN_START_WORD2_MASK ((1 << 26) - 1)
-#define ARM_INSN_START_WORD2_SHIFT 14
+#define ARM_INSN_START_WORD2_SHIFT 13
/* We currently assume float and double are IEEE single and double
precision respectively.
@@ -135,17 +118,21 @@ enum {
*/
/**
- * DynamicGDBXMLInfo:
- * @desc: Contains the XML descriptions.
- * @num_cpregs: Number of the Coprocessor registers seen by GDB.
- * @cpregs_keys: Array that contains the corresponding Key of
- * a given cpreg with the same order of the cpreg in the XML description.
- */
-typedef struct DynamicGDBXMLInfo {
- char *desc;
- int num_cpregs;
- uint32_t *cpregs_keys;
-} DynamicGDBXMLInfo;
+ * DynamicGDBFeatureInfo:
+ * @desc: Contains the feature descriptions.
+ * @data: A union with data specific to the set of registers
+ * @cpregs_keys: Array that contains the corresponding Key of
+ * a given cpreg with the same order of the cpreg
+ * in the XML description.
+ */
+typedef struct DynamicGDBFeatureInfo {
+ GDBFeature desc;
+ union {
+ struct {
+ uint32_t *keys;
+ } cpregs;
+ } data;
+} DynamicGDBFeatureInfo;
/* CPU state for each instance of a generic timer (in cp15 c14) */
typedef struct ARMGenericTimer {
@@ -153,18 +140,6 @@ typedef struct ARMGenericTimer {
uint64_t ctl; /* Timer Control register */
} ARMGenericTimer;
-#define GTIMER_PHYS 0
-#define GTIMER_VIRT 1
-#define GTIMER_HYP 2
-#define GTIMER_SEC 3
-#define NUM_GTIMERS 4
-
-typedef struct {
- uint64_t raw_tcr;
- uint32_t mask;
- uint32_t base_mask;
-} TCR;
-
/* Define a maximum sized vector register.
* For 32-bit, this is a 128-bit NEON/AdvSIMD register.
* For 64-bit, this is a 2048-bit SVE register.
@@ -201,15 +176,29 @@ typedef struct ARMVectorReg {
uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16);
} ARMVectorReg;
-/* In AArch32 mode, predicate registers do not exist at all. */
#ifdef TARGET_AARCH64
+/* In AArch32 mode, predicate registers do not exist at all. */
typedef struct ARMPredicateReg {
- uint64_t p[2 * ARM_MAX_VQ / 8] QEMU_ALIGNED(16);
+ uint64_t p[DIV_ROUND_UP(2 * ARM_MAX_VQ, 8)] QEMU_ALIGNED(16);
} ARMPredicateReg;
+
+/* In AArch32 mode, PAC keys do not exist at all. */
+typedef struct ARMPACKey {
+ uint64_t lo, hi;
+} ARMPACKey;
#endif
+/* See the commentary above the TBFLAG field definitions. */
+typedef struct CPUARMTBFlags {
+ uint32_t flags;
+ target_ulong flags2;
+} CPUARMTBFlags;
+
+typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
-typedef struct CPUARMState {
+typedef struct NVICState NVICState;
+
+typedef struct CPUArchState {
/* Regs for current mode. */
uint32_t regs[16];
@@ -229,10 +218,16 @@ typedef struct CPUARMState {
* semantics as for AArch32, as described in the comments on each field)
* nRW (also known as M[4]) is kept, inverted, in env->aarch64
* DAIF (exception masks) are kept in env->daif
+ * BTYPE is kept in env->btype
+ * SM and ZA are kept in env->svcr
* all other bits are stored in their correct places in env->pstate
*/
uint32_t pstate;
- uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */
+ bool aarch64; /* True if CPU is in aarch64 state; inverse of PSTATE.nRW */
+ bool thumb; /* True if CPU is in thumb mode; cpsr[5] */
+
+ /* Cached TBFLAGS state. See below for which bits are included. */
+ CPUARMTBFlags hflags;
/* Frequently accessed CPSR bits are stored separately for efficiency.
This contains all the other bits. Use cpsr_{read,write} to access
@@ -256,9 +251,10 @@ typedef struct CPUARMState {
uint32_t ZF; /* Z set if zero. */
uint32_t QF; /* 0 or 1 */
uint32_t GE; /* cpsr[19:16] */
- uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
uint32_t condexec_bits; /* IT bits. cpsr[15:10,26:25]. */
+ uint32_t btype; /* BTI branch type. spsr[11:10]. */
uint64_t daif; /* exception masks, in the bits they are in PSTATE */
+ uint64_t svcr; /* PSTATE.{SM,ZA} in the bits they are in SVCR */
uint64_t elr_el[4]; /* AArch64 exception link regs */
uint64_t sp_el[4]; /* AArch64 banked stack pointers */
@@ -284,6 +280,7 @@ typedef struct CPUARMState {
};
uint64_t sctlr_el[4];
};
+ uint64_t vsctlr; /* Virtualization System control register. */
uint64_t cpacr_el1; /* Architectural feature access control register */
uint64_t cptr_el[4]; /* ARMv8 feature trap registers */
uint32_t c1_xscaleauxcr; /* XScale auxiliary control register. */
@@ -308,9 +305,11 @@ typedef struct CPUARMState {
uint64_t ttbr1_el[4];
};
uint64_t vttbr_el2; /* Virtualization Translation Table Base. */
+ uint64_t vsttbr_el2; /* Secure Virtualization Translation Table. */
/* MMU translation table base control. */
- TCR tcr_el[4];
- TCR vtcr_el2; /* Virtualization Translation Control. */
+ uint64_t tcr_el[4];
+ uint64_t vtcr_el2; /* Virtualization Translation Control. */
+ uint64_t vstcr_el2; /* Secure Virtualization Translation Control. */
uint32_t c2_data; /* MPU data cacheable bits. */
uint32_t c2_insn; /* MPU instruction cacheable bits. */
union { /* MMU domain access control register
@@ -327,6 +326,7 @@ typedef struct CPUARMState {
uint32_t pmsav5_data_ap; /* PMSAv5 MPU data access permissions */
uint32_t pmsav5_insn_ap; /* PMSAv5 MPU insn access permissions */
uint64_t hcr_el2; /* Hypervisor configuration register */
+ uint64_t hcrx_el2; /* Extended Hypervisor configuration register */
uint64_t scr_el3; /* Secure configuration register. */
union { /* Fault status registers. */
struct {
@@ -350,7 +350,7 @@ typedef struct CPUARMState {
union { /* Fault address registers. */
struct {
uint64_t _unused_far0;
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
uint32_t ifar_ns;
uint32_t dfar_ns;
uint32_t ifar_s;
@@ -387,7 +387,7 @@ typedef struct CPUARMState {
uint64_t c9_pminten; /* perf monitor interrupt enables */
union { /* Memory attribute redirection */
struct {
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
uint64_t _unused_mair_0;
uint32_t mair1_ns;
uint32_t mair0_ns;
@@ -415,6 +415,7 @@ typedef struct CPUARMState {
uint64_t vbar_el[4];
};
uint32_t mvbar; /* (monitor) vector base address register */
+ uint64_t rvbar; /* rvbar sampled from rvbar property at reset */
struct { /* FCSE PID. */
uint32_t fcseidr_ns;
uint32_t fcseidr_s;
@@ -437,6 +438,7 @@ typedef struct CPUARMState {
};
uint64_t tpidr_el[4];
};
+ uint64_t tpidr2_el0;
/* The secure banks of these registers don't map anywhere */
uint64_t tpidrurw_s;
uint64_t tpidrprw_s;
@@ -448,8 +450,9 @@ typedef struct CPUARMState {
};
uint64_t c14_cntfrq; /* Counter Frequency register */
uint64_t c14_cntkctl; /* Timer Control register */
- uint32_t cnthctl_el2; /* Counter/Timer Hyp Control register */
+ uint64_t cnthctl_el2; /* Counter/Timer Hyp Control register */
uint64_t cntvoff_el2; /* Counter Virtual Offset register */
+ uint64_t cntpoff_el2; /* Counter Physical Offset register */
ARMGenericTimer c14_timer[NUM_GTIMERS];
uint32_t c15_cpar; /* XScale Coprocessor Access Register */
uint32_t c15_ticonfig; /* TI925T configuration byte. */
@@ -464,17 +467,58 @@ typedef struct CPUARMState {
uint64_t dbgbcr[16]; /* breakpoint control registers */
uint64_t dbgwvr[16]; /* watchpoint value registers */
uint64_t dbgwcr[16]; /* watchpoint control registers */
+ uint64_t dbgclaim; /* DBGCLAIM bits */
uint64_t mdscr_el1;
uint64_t oslsr_el1; /* OS Lock Status */
+ uint64_t osdlr_el1; /* OS DoubleLock status */
uint64_t mdcr_el2;
uint64_t mdcr_el3;
- /* If the counter is enabled, this stores the last time the counter
- * was reset. Otherwise it stores the counter value
+ /* Stores the architectural value of the counter *the last time it was
+ * updated* by pmccntr_op_start. Accesses should always be surrounded
+ * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
+ * architecturally-correct value is being read/set.
*/
uint64_t c15_ccnt;
+ /* Stores the delta between the architectural value and the underlying
+ * cycle count during normal operation. It is used to update c15_ccnt
+ * to be the correct architectural value before accesses. During
+ * accesses, c15_ccnt_delta contains the underlying count being used
+ * for the access, after which it reverts to the delta value in
+ * pmccntr_op_finish.
+ */
+ uint64_t c15_ccnt_delta;
+ uint64_t c14_pmevcntr[31];
+ uint64_t c14_pmevcntr_delta[31];
+ uint64_t c14_pmevtyper[31];
uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
uint64_t vpidr_el2; /* Virtualization Processor ID Register */
uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
+ uint64_t tfsr_el[4]; /* tfsre0_el1 is index 0. */
+ uint64_t gcr_el1;
+ uint64_t rgsr_el1;
+
+ /* Minimal RAS registers */
+ uint64_t disr_el1;
+ uint64_t vdisr_el2;
+ uint64_t vsesr_el2;
+
+ /*
+ * Fine-Grained Trap registers. We store these as arrays so the
+ * access checking code doesn't have to manually select
+ * HFGRTR_EL2 vs HFDFGRTR_EL2 etc when looking up the bit to test.
+ * FEAT_FGT2 will add more elements to these arrays.
+ */
+ uint64_t fgt_read[2]; /* HFGRTR, HDFGRTR */
+ uint64_t fgt_write[2]; /* HFGWTR, HDFGWTR */
+ uint64_t fgt_exec[1]; /* HFGITR */
+
+ /* RME registers */
+ uint64_t gpccr_el3;
+ uint64_t gptbr_el3;
+ uint64_t mfar_el3;
+
+ /* NV2 register */
+ uint64_t vncr_el2;
} cp15;
struct {
@@ -513,6 +557,13 @@ typedef struct CPUARMState {
uint32_t scr[M_REG_NUM_BANKS];
uint32_t msplim[M_REG_NUM_BANKS];
uint32_t psplim[M_REG_NUM_BANKS];
+ uint32_t fpcar[M_REG_NUM_BANKS];
+ uint32_t fpccr[M_REG_NUM_BANKS];
+ uint32_t fpdscr[M_REG_NUM_BANKS];
+ uint32_t cpacr[M_REG_NUM_BANKS];
+ uint32_t nsacr;
+ uint32_t ltpsize;
+ uint32_t vpr;
} v7m;
/* Information associated with an exception about to be taken:
@@ -531,6 +582,18 @@ typedef struct CPUARMState {
*/
} exception;
+ /* Information associated with an SError */
+ struct {
+ uint8_t pending;
+ uint8_t has_esr;
+ uint64_t esr;
+ } serror;
+
+ uint8_t ext_dabt_raised; /* Tracking/verifying injection of ext DABT */
+
+ /* State of our input IRQ/FIQ/VIRQ/VFIQ lines */
+ uint32_t irq_line_state;
+
/* Thumb-2 EE state. */
uint32_t teecr;
uint32_t teehbr;
@@ -547,11 +610,13 @@ typedef struct CPUARMState {
ARMPredicateReg preg_tmp;
#endif
- uint32_t xregs[16];
/* We store these fpcsr fields separately for convenience. */
+ uint32_t qc[4] QEMU_ALIGNED(16);
int vec_len;
int vec_stride;
+ uint32_t xregs[16];
+
/* Scratch space for aa32 neon expansion. */
uint32_t scratch[8];
@@ -560,6 +625,8 @@ typedef struct CPUARMState {
* fp_status: is the "normal" fp status.
* fp_status_fp16: used for half-precision calculations
* standard_fp_status : the ARM "Standard FPSCR Value"
+ * standard_fp_status_fp16 : used for half-precision
+ * calculations with the ARM "Standard FPSCR Value"
*
* Half-precision operations are governed by a separate
* flush-to-zero control bit in FPSCR:FZ16. We pass a separate
@@ -570,21 +637,34 @@ typedef struct CPUARMState {
* Neon) which the architecture defines as controlled by the
* standard FPSCR value rather than the FPSCR.
*
+ * The "standard FPSCR but for fp16 ops" is needed because
+ * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
+ * using a fixed value for it.
+ *
* To avoid having to transfer exception bits around, we simply
* say that the FPSCR cumulative exception flags are the logical
- * OR of the flags in the three fp statuses. This relies on the
+ * OR of the flags in the four fp statuses. This relies on the
* only thing which needs to read the exception flags being
* an explicit FPSCR read.
*/
float_status fp_status;
float_status fp_status_f16;
float_status standard_fp_status;
+ float_status standard_fp_status_f16;
- /* ZCR_EL[1-3] */
- uint64_t zcr_el[4];
+ uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
+ uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
} vfp;
+
uint64_t exclusive_addr;
uint64_t exclusive_val;
+ /*
+ * Contains the 'val' for the second 64-bit register of LDXP, which comes
+ * from the higher address, not the high part of a complete 128-bit value.
+ * In some ways it might be more convenient to record the exclusive value
+ * as the low and high halves of a 128 bit data value, but the current
+ * semantics of these fields are baked into the migration format.
+ */
uint64_t exclusive_high;
/* iwMMXt coprocessor state. */
@@ -595,20 +675,50 @@ typedef struct CPUARMState {
uint32_t cregs[16];
} iwmmxt;
-#if defined(CONFIG_USER_ONLY)
- /* For usermode syscall translation. */
- int eabi;
+#ifdef TARGET_AARCH64
+ struct {
+ ARMPACKey apia;
+ ARMPACKey apib;
+ ARMPACKey apda;
+ ARMPACKey apdb;
+ ARMPACKey apga;
+ } keys;
+
+ uint64_t scxtnum_el[4];
+
+ /*
+ * SME ZA storage -- 256 x 256 byte array, with bytes in host word order,
+ * as we do with vfp.zregs[]. This corresponds to the architectural ZA
+ * array, where ZA[N] is in the least-significant bytes of env->zarray[N].
+ * When SVL is less than the architectural maximum, the accessible
+ * storage is restricted, such that if the SVL is X bytes the guest can
+ * see only the bottom X elements of zarray[], and only the least
+ * significant X bytes of each element of the array. (In other words,
+ * the observable part is always square.)
+ *
+ * The ZA storage can also be considered as a set of square tiles of
+ * elements of different sizes. The mapping from tiles to the ZA array
+ * is architecturally defined, such that for tiles of elements of esz
+ * bytes, the Nth row (or "horizontal slice") of tile T is in
+ * ZA[T + N * esz]. Note that this means that each tile is not contiguous
+ * in the ZA storage, because its rows are striped through the ZA array.
+ *
+ * Because this is so large, keep this toward the end of the reset area,
+ * to keep the offsets into the rest of the structure smaller.
+ */
+ ARMVectorReg zarray[ARM_MAX_VQ * 16];
#endif
struct CPUBreakpoint *cpu_breakpoint[16];
struct CPUWatchpoint *cpu_watchpoint[16];
+ /* Optional fault info across tlb lookup. */
+ ARMMMUFaultInfo *tlb_fi;
+
/* Fields up to this point are cleared by a CPU reset */
struct {} end_reset_fields;
- CPU_COMMON
-
- /* Fields after CPU_COMMON are preserved across CPU reset. */
+ /* Fields after this point are preserved across CPU reset. */
/* Internal CPU feature flags. */
uint64_t features;
@@ -630,8 +740,11 @@ typedef struct CPUARMState {
*/
uint32_t *rbar[M_REG_NUM_BANKS];
uint32_t *rlar[M_REG_NUM_BANKS];
+ uint32_t *hprbar;
+ uint32_t *hprlar;
uint32_t mair0[M_REG_NUM_BANKS];
uint32_t mair1[M_REG_NUM_BANKS];
+ uint32_t hprselr;
} pmsav8;
/* v8M SAU */
@@ -642,12 +755,32 @@ typedef struct CPUARMState {
uint32_t ctrl;
} sau;
- void *nvic;
+#if !defined(CONFIG_USER_ONLY)
+ NVICState *nvic;
const struct arm_boot_info *boot_info;
/* Store GICv3CPUState to access from this struct */
void *gicv3state;
+#else /* CONFIG_USER_ONLY */
+ /* For usermode syscall translation. */
+ bool eabi;
+#endif /* CONFIG_USER_ONLY */
+
+#ifdef TARGET_TAGGED_ADDRESSES
+ /* Linux syscall tagged address support */
+ bool tagged_addr_enable;
+#endif
} CPUARMState;
+static inline void set_feature(CPUARMState *env, int feature)
+{
+ env->features |= 1ULL << feature;
+}
+
+static inline void unset_feature(CPUARMState *env, int feature)
+{
+ env->features &= ~(1ULL << feature);
+}
+
/**
* ARMELChangeHookFn:
* type of a function which can be registered via arm_register_el_change_hook()
@@ -669,16 +802,29 @@ typedef enum ARMPSCIState {
PSCI_ON_PENDING = 2
} ARMPSCIState;
+typedef struct ARMISARegisters ARMISARegisters;
+
+/*
+ * In map, each set bit is a supported vector length of (bit-number + 1) * 16
+ * bytes, i.e. each bit number + 1 is the vector length in quadwords.
+ *
+ * While processing properties during initialization, corresponding init bits
+ * are set for bits in sve_vq_map that have been set by properties.
+ *
+ * Bits set in supported represent valid vector lengths for the CPU type.
+ */
+typedef struct {
+ uint32_t map, init, supported;
+} ARMVQMap;
+
/**
* ARMCPU:
* @env: #CPUARMState
*
* An ARM CPU core.
*/
-struct ARMCPU {
- /*< private >*/
+struct ArchCPU {
CPUState parent_obj;
- /*< public >*/
CPUARMState env;
@@ -704,10 +850,18 @@ struct ARMCPU {
uint64_t *cpreg_vmstate_values;
int32_t cpreg_vmstate_array_len;
- DynamicGDBXMLInfo dyn_xml;
+ DynamicGDBFeatureInfo dyn_sysreg_feature;
+ DynamicGDBFeatureInfo dyn_svereg_feature;
+ DynamicGDBFeatureInfo dyn_m_systemreg_feature;
+ DynamicGDBFeatureInfo dyn_m_secextreg_feature;
/* Timers used by the generic (architected) timer */
QEMUTimer *gt_timer[NUM_GTIMERS];
+ /*
+ * Timer used by the PMU. Its state is restored after migration by
+ * pmu_op_finish() - it does not need other handling during migration
+ */
+ QEMUTimer *pmu_timer;
/* GPIO outputs for generic timer */
qemu_irq gt_timer_outputs[NUM_GTIMERS];
/* GPIO output for GICv3 maintenance interrupt signal */
@@ -718,6 +872,10 @@ struct ARMCPU {
/* MemoryRegion to use for secure physical accesses */
MemoryRegion *secure_memory;
+ /* MemoryRegion to use for allocation tag accesses */
+ MemoryRegion *tag_memory;
+ MemoryRegion *secure_tag_memory;
+
/* For v8M, pointer to the IDAU interface provided by board/SoC */
Object *idau;
@@ -730,9 +888,6 @@ struct ARMCPU {
*/
uint32_t psci_version;
- /* Should CPU start in PSCI powered-off state? */
- bool start_powered_off;
-
/* Current power state, access guarded by BQL */
ARMPSCIState power_state;
@@ -742,11 +897,21 @@ struct ARMCPU {
bool has_el3;
/* CPU has PMU (Performance Monitor Unit) */
bool has_pmu;
+ /* CPU has VFP */
+ bool has_vfp;
+ /* CPU has 32 VFP registers */
+ bool has_vfp_d32;
+ /* CPU has Neon */
+ bool has_neon;
+ /* CPU has M-profile DSP extension */
+ bool has_dsp;
/* CPU has memory protection unit */
bool has_mpu;
/* PMSAv7 MPU number of supported regions */
uint32_t pmsav7_dregion;
+ /* PMSAv8 MPU number of supported hyp regions */
+ uint32_t pmsav8r_hdregion;
/* v8M SAU number of supported regions */
uint32_t sau_sregion;
@@ -757,15 +922,29 @@ struct ARMCPU {
/* For v8M, initial value of the Secure VTOR */
uint32_t init_svtor;
+ /* For v8M, initial value of the Non-secure VTOR */
+ uint32_t init_nsvtor;
/* [QEMU_]KVM_ARM_TARGET_* constant for this CPU, or
* QEMU_KVM_ARM_TARGET_NONE if the kernel doesn't support this CPU type.
*/
uint32_t kvm_target;
+#ifdef CONFIG_KVM
/* KVM init features for this CPU */
uint32_t kvm_init_features[7];
+ /* KVM CPU state */
+
+ /* KVM virtual time adjustment */
+ bool kvm_adjvtime;
+ bool kvm_vtime_dirty;
+ uint64_t kvm_vtime;
+
+ /* KVM steal time */
+ OnOffAuto kvm_steal_time;
+#endif /* CONFIG_KVM */
+
/* Uniprocessor system with MP extensions */
bool mp_is_up;
@@ -788,61 +967,95 @@ struct ARMCPU {
* ARMv7AR ARM Architecture Reference Manual. A reset_ prefix
* is used for reset values of non-constant registers; no reset_
* prefix means a constant register.
+ * Some of these registers are split out into a substructure that
+ * is shared with the translators to control the ISA.
+ *
+ * Note that if you add an ID register to the ARMISARegisters struct
+ * you need to also update the 32-bit and 64-bit versions of the
+ * kvm_arm_get_host_cpu_features() function to correctly populate the
+ * field by reading the value from the KVM vCPU.
*/
- uint32_t midr;
+ struct ARMISARegisters {
+ uint32_t id_isar0;
+ uint32_t id_isar1;
+ uint32_t id_isar2;
+ uint32_t id_isar3;
+ uint32_t id_isar4;
+ uint32_t id_isar5;
+ uint32_t id_isar6;
+ uint32_t id_mmfr0;
+ uint32_t id_mmfr1;
+ uint32_t id_mmfr2;
+ uint32_t id_mmfr3;
+ uint32_t id_mmfr4;
+ uint32_t id_mmfr5;
+ uint32_t id_pfr0;
+ uint32_t id_pfr1;
+ uint32_t id_pfr2;
+ uint32_t mvfr0;
+ uint32_t mvfr1;
+ uint32_t mvfr2;
+ uint32_t id_dfr0;
+ uint32_t id_dfr1;
+ uint32_t dbgdidr;
+ uint32_t dbgdevid;
+ uint32_t dbgdevid1;
+ uint64_t id_aa64isar0;
+ uint64_t id_aa64isar1;
+ uint64_t id_aa64isar2;
+ uint64_t id_aa64pfr0;
+ uint64_t id_aa64pfr1;
+ uint64_t id_aa64mmfr0;
+ uint64_t id_aa64mmfr1;
+ uint64_t id_aa64mmfr2;
+ uint64_t id_aa64dfr0;
+ uint64_t id_aa64dfr1;
+ uint64_t id_aa64zfr0;
+ uint64_t id_aa64smfr0;
+ uint64_t reset_pmcr_el0;
+ } isar;
+ uint64_t midr;
uint32_t revidr;
uint32_t reset_fpsid;
- uint32_t mvfr0;
- uint32_t mvfr1;
- uint32_t mvfr2;
- uint32_t ctr;
+ uint64_t ctr;
uint32_t reset_sctlr;
- uint32_t id_pfr0;
- uint32_t id_pfr1;
- uint32_t id_dfr0;
- uint32_t pmceid0;
- uint32_t pmceid1;
+ uint64_t pmceid0;
+ uint64_t pmceid1;
uint32_t id_afr0;
- uint32_t id_mmfr0;
- uint32_t id_mmfr1;
- uint32_t id_mmfr2;
- uint32_t id_mmfr3;
- uint32_t id_mmfr4;
- uint32_t id_isar0;
- uint32_t id_isar1;
- uint32_t id_isar2;
- uint32_t id_isar3;
- uint32_t id_isar4;
- uint32_t id_isar5;
- uint32_t id_isar6;
- uint64_t id_aa64pfr0;
- uint64_t id_aa64pfr1;
- uint64_t id_aa64dfr0;
- uint64_t id_aa64dfr1;
uint64_t id_aa64afr0;
uint64_t id_aa64afr1;
- uint64_t id_aa64isar0;
- uint64_t id_aa64isar1;
- uint64_t id_aa64mmfr0;
- uint64_t id_aa64mmfr1;
- uint32_t dbgdidr;
- uint32_t clidr;
+ uint64_t clidr;
uint64_t mp_affinity; /* MP ID without feature bits */
/* The elements of this array are the CCSIDR values for each cache,
* in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
*/
- uint32_t ccsidr[16];
+ uint64_t ccsidr[16];
uint64_t reset_cbar;
uint32_t reset_auxcr;
bool reset_hivecs;
+ uint8_t reset_l0gptsz;
+
+ /*
+ * Intermediate values used during property parsing.
+ * Once finalized, the values should be read from ID_AA64*.
+ */
+ bool prop_pauth;
+ bool prop_pauth_impdef;
+ bool prop_pauth_qarma3;
+ bool prop_lpa2;
+
/* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
- uint32_t dcz_blocksize;
- uint64_t rvbar;
+ uint8_t dcz_blocksize;
+ /* GM blocksize, in log_2(words), ie low 4 bits of GMID_EL0 */
+ uint8_t gm_blocksize;
+
+ uint64_t rvbar_prop; /* Property/input signals. */
/* Configurable aspects of GIC cpu interface (which is part of the CPU) */
int gic_num_lrs; /* number of list registers */
int gic_vpribits; /* number of virtual priority bits */
int gic_vprebits; /* number of virtual preemption bits */
+ int gic_pribits; /* number of physical priority bits */
/* Whether the cfgend input is high (i.e. this CPU should reset into
* big-endian mode). This setting isn't used directly: instead it modifies
@@ -861,58 +1074,146 @@ struct ARMCPU {
/* Used to set the maximum vector length the cpu will support. */
uint32_t sve_max_vq;
+
+#ifdef CONFIG_USER_ONLY
+ /* Used to set the default vector length at process start. */
+ uint32_t sve_default_vq;
+ uint32_t sme_default_vq;
+#endif
+
+ ARMVQMap sve_vq;
+ ARMVQMap sme_vq;
+
+ /* Generic timer counter frequency, in Hz */
+ uint64_t gt_cntfrq_hz;
};
-static inline ARMCPU *arm_env_get_cpu(CPUARMState *env)
-{
- return container_of(env, ARMCPU, env);
-}
+typedef struct ARMCPUInfo {
+ const char *name;
+ void (*initfn)(Object *obj);
+ void (*class_init)(ObjectClass *oc, void *data);
+} ARMCPUInfo;
+
+/**
+ * ARMCPUClass:
+ * @parent_realize: The parent class' realize handler.
+ * @parent_phases: The parent class' reset phase handlers.
+ *
+ * An ARM CPU model.
+ */
+struct ARMCPUClass {
+ CPUClass parent_class;
+
+ const ARMCPUInfo *info;
+ DeviceRealize parent_realize;
+ ResettablePhases parent_phases;
+};
-uint64_t arm_cpu_mp_affinity(int idx, uint8_t clustersz);
+struct AArch64CPUClass {
+ ARMCPUClass parent_class;
+};
+
+/* Callback functions for the generic timer's timers. */
+void arm_gt_ptimer_cb(void *opaque);
+void arm_gt_vtimer_cb(void *opaque);
+void arm_gt_htimer_cb(void *opaque);
+void arm_gt_stimer_cb(void *opaque);
+void arm_gt_hvtimer_cb(void *opaque);
+
+unsigned int gt_cntfrq_period_ns(ARMCPU *cpu);
+void gt_rme_post_el_change(ARMCPU *cpu, void *opaque);
+
+void arm_cpu_post_init(Object *obj);
+
+#define ARM_AFF0_SHIFT 0
+#define ARM_AFF0_MASK (0xFFULL << ARM_AFF0_SHIFT)
+#define ARM_AFF1_SHIFT 8
+#define ARM_AFF1_MASK (0xFFULL << ARM_AFF1_SHIFT)
+#define ARM_AFF2_SHIFT 16
+#define ARM_AFF2_MASK (0xFFULL << ARM_AFF2_SHIFT)
+#define ARM_AFF3_SHIFT 32
+#define ARM_AFF3_MASK (0xFFULL << ARM_AFF3_SHIFT)
+#define ARM_DEFAULT_CPUS_PER_CLUSTER 8
-#define ENV_GET_CPU(e) CPU(arm_env_get_cpu(e))
+#define ARM32_AFFINITY_MASK (ARM_AFF0_MASK | ARM_AFF1_MASK | ARM_AFF2_MASK)
+#define ARM64_AFFINITY_MASK \
+ (ARM_AFF0_MASK | ARM_AFF1_MASK | ARM_AFF2_MASK | ARM_AFF3_MASK)
+#define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK)
-#define ENV_OFFSET offsetof(ARMCPU, env)
+uint64_t arm_build_mp_affinity(int idx, uint8_t clustersz);
#ifndef CONFIG_USER_ONLY
-extern const struct VMStateDescription vmstate_arm_cpu;
-#endif
+extern const VMStateDescription vmstate_arm_cpu;
void arm_cpu_do_interrupt(CPUState *cpu);
void arm_v7m_cpu_do_interrupt(CPUState *cpu);
-bool arm_cpu_exec_interrupt(CPUState *cpu, int int_req);
-
-void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
- int flags);
hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
MemTxAttrs *attrs);
+#endif /* !CONFIG_USER_ONLY */
-int arm_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
+int arm_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int arm_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
-/* Dynamically generates for gdb stub an XML description of the sysregs from
- * the cp_regs hashtable. Returns the registered sysregs number.
- */
-int arm_gen_dynamic_xml(CPUState *cpu);
-
-/* Returns the dynamically generated XML for the gdb stub.
- * Returns a pointer to the XML contents for the specified XML file or NULL
- * if the XML name doesn't match the predefined one.
- */
-const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname);
-
int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
- int cpuid, void *opaque);
+ int cpuid, DumpState *s);
+
+/**
+ * arm_emulate_firmware_reset: Emulate firmware CPU reset handling
+ * @cpu: CPU (which must have been freshly reset)
+ * @target_el: exception level to put the CPU into
+ * @secure: whether to put the CPU in secure state
+ *
+ * When QEMU is directly running a guest kernel at a lower level than
+ * EL3 it implicitly emulates some aspects of the guest firmware.
+ * This includes that on reset we need to configure the parts of the
+ * CPU corresponding to EL3 so that the real guest code can run at its
+ * lower exception level. This function does that post-reset CPU setup,
+ * for when we do direct boot of a guest kernel, and for when we
+ * emulate PSCI and similar firmware interfaces starting a CPU at a
+ * lower exception level.
+ *
+ * @target_el must be an EL implemented by the CPU between 1 and 3.
+ * We do not support dropping into a Secure EL other than 3.
+ *
+ * It is the responsibility of the caller to call arm_rebuild_hflags().
+ */
+void arm_emulate_firmware_reset(CPUState *cpustate, int target_el);
#ifdef TARGET_AARCH64
-int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
+int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
void aarch64_sve_change_el(CPUARMState *env, int old_el,
int new_el, bool el0_a64);
+void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask);
+
+/*
+ * SVE registers are encoded in KVM's memory in an endianness-invariant format.
+ * The byte at offset i from the start of the in-memory representation contains
+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
+ * lowest offsets are stored in the lowest memory addresses, then that nearly
+ * matches QEMU's representation, which is to use an array of host-endian
+ * uint64_t's, where the lower offsets are at the lower indices. To complete
+ * the translation we just need to byte swap the uint64_t's on big-endian hosts.
+ */
+static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
+{
+#if HOST_BIG_ENDIAN
+ int i;
+
+ for (i = 0; i < nr; ++i) {
+ dst[i] = bswap64(src[i]);
+ }
+
+ return dst;
+#else
+ return src;
+#endif
+}
+
#else
static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
static inline void aarch64_sve_change_el(CPUARMState *env, int o,
@@ -920,35 +1221,63 @@ static inline void aarch64_sve_change_el(CPUARMState *env, int o,
{ }
#endif
-target_ulong do_arm_semihosting(CPUARMState *env);
void aarch64_sync_32_to_64(CPUARMState *env);
void aarch64_sync_64_to_32(CPUARMState *env);
int fp_exception_el(CPUARMState *env, int cur_el);
int sve_exception_el(CPUARMState *env, int cur_el);
-uint32_t sve_zcr_len_for_el(CPUARMState *env, int el);
+int sme_exception_el(CPUARMState *env, int cur_el);
+
+/**
+ * sve_vqm1_for_el_sm:
+ * @env: CPUARMState
+ * @el: exception level
+ * @sm: streaming mode
+ *
+ * Compute the current vector length for @el & @sm, in units of
+ * Quadwords Minus 1 -- the same scale used for ZCR_ELx.LEN.
+ * If @sm, compute for SVL, otherwise NVL.
+ */
+uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm);
+
+/* Likewise, but using @sm = PSTATE.SM. */
+uint32_t sve_vqm1_for_el(CPUARMState *env, int el);
static inline bool is_a64(CPUARMState *env)
{
return env->aarch64;
}
-/* you can call this signal handler from your SIGBUS and SIGSEGV
- signal handlers to inform the virtual CPU of exceptions. non zero
- is returned if the signal was handled by the virtual CPU. */
-int cpu_arm_signal_handler(int host_signum, void *pinfo,
- void *puc);
-
/**
- * pmccntr_sync
+ * pmu_op_start/finish
* @env: CPUARMState
*
- * Synchronises the counter in the PMCCNTR. This must always be called twice,
- * once before any action that might affect the timer and again afterwards.
- * The function is used to swap the state of the register if required.
- * This only happens when not in user mode (!CONFIG_USER_ONLY)
+ * Convert all PMU counters between their delta form (the typical mode when
+ * they are enabled) and the guest-visible values. These two calls must
+ * surround any action which might affect the counters.
*/
-void pmccntr_sync(CPUARMState *env);
+void pmu_op_start(CPUARMState *env);
+void pmu_op_finish(CPUARMState *env);
+
+/*
+ * Called when a PMU counter is due to overflow
+ */
+void arm_pmu_timer_cb(void *opaque);
+
+/**
+ * Functions to register as EL change hooks for PMU mode filtering
+ */
+void pmu_pre_el_change(ARMCPU *cpu, void *ignored);
+void pmu_post_el_change(ARMCPU *cpu, void *ignored);
+
+/*
+ * pmu_init
+ * @cpu: ARMCPU
+ *
+ * Initialize the CPU's PMCEID[01]_EL0 registers and associated internal state
+ * for the current configuration
+ */
+void pmu_init(ARMCPU *cpu);
/* SCTLR bit meanings. Several bits have been reused in newer
* versions of the architecture; in that case we define constants
@@ -960,12 +1289,15 @@ void pmccntr_sync(CPUARMState *env);
#define SCTLR_A (1U << 1)
#define SCTLR_C (1U << 2)
#define SCTLR_W (1U << 3) /* up to v6; RAO in v7 */
-#define SCTLR_SA (1U << 3)
+#define SCTLR_nTLSMD_32 (1U << 3) /* v8.2-LSMAOC, AArch32 only */
+#define SCTLR_SA (1U << 3) /* AArch64 only */
#define SCTLR_P (1U << 4) /* up to v5; RAO in v6 and v7 */
+#define SCTLR_LSMAOE_32 (1U << 4) /* v8.2-LSMAOC, AArch32 only */
#define SCTLR_SA0 (1U << 4) /* v8 onward, AArch64 only */
#define SCTLR_D (1U << 5) /* up to v5; RAO in v6 */
#define SCTLR_CP15BEN (1U << 5) /* v7 onward */
#define SCTLR_L (1U << 6) /* up to v5; RAO in v6 and v7; RAZ in v8 */
+#define SCTLR_nAA (1U << 6) /* when FEAT_LSE2 is implemented */
#define SCTLR_B (1U << 7) /* up to v6; RAZ in v7 */
#define SCTLR_ITD (1U << 7) /* v8 onward */
#define SCTLR_S (1U << 8) /* up to v6; RAZ in v7 */
@@ -973,57 +1305,71 @@ void pmccntr_sync(CPUARMState *env);
#define SCTLR_R (1U << 9) /* up to v6; RAZ in v7 */
#define SCTLR_UMA (1U << 9) /* v8 onward, AArch64 only */
#define SCTLR_F (1U << 10) /* up to v6 */
-#define SCTLR_SW (1U << 10) /* v7 onward */
-#define SCTLR_Z (1U << 11)
+#define SCTLR_SW (1U << 10) /* v7 */
+#define SCTLR_EnRCTX (1U << 10) /* in v8.0-PredInv */
+#define SCTLR_Z (1U << 11) /* in v7, RES1 in v8 */
+#define SCTLR_EOS (1U << 11) /* v8.5-ExS */
#define SCTLR_I (1U << 12)
-#define SCTLR_V (1U << 13)
+#define SCTLR_V (1U << 13) /* AArch32 only */
+#define SCTLR_EnDB (1U << 13) /* v8.3, AArch64 only */
#define SCTLR_RR (1U << 14) /* up to v7 */
#define SCTLR_DZE (1U << 14) /* v8 onward, AArch64 only */
#define SCTLR_L4 (1U << 15) /* up to v6; RAZ in v7 */
#define SCTLR_UCT (1U << 15) /* v8 onward, AArch64 only */
#define SCTLR_DT (1U << 16) /* up to ??, RAO in v6 and v7 */
#define SCTLR_nTWI (1U << 16) /* v8 onward */
-#define SCTLR_HA (1U << 17)
+#define SCTLR_HA (1U << 17) /* up to v7, RES0 in v8 */
#define SCTLR_BR (1U << 17) /* PMSA only */
#define SCTLR_IT (1U << 18) /* up to ??, RAO in v6 and v7 */
#define SCTLR_nTWE (1U << 18) /* v8 onward */
#define SCTLR_WXN (1U << 19)
#define SCTLR_ST (1U << 20) /* up to ??, RAZ in v6 */
-#define SCTLR_UWXN (1U << 20) /* v7 onward */
-#define SCTLR_FI (1U << 21)
-#define SCTLR_U (1U << 22)
+#define SCTLR_UWXN (1U << 20) /* v7 onward, AArch32 only */
+#define SCTLR_TSCXT (1U << 20) /* FEAT_CSV2_1p2, AArch64 only */
+#define SCTLR_FI (1U << 21) /* up to v7, v8 RES0 */
+#define SCTLR_IESB (1U << 21) /* v8.2-IESB, AArch64 only */
+#define SCTLR_U (1U << 22) /* up to v6, RAO in v7 */
+#define SCTLR_EIS (1U << 22) /* v8.5-ExS */
#define SCTLR_XP (1U << 23) /* up to v6; v7 onward RAO */
+#define SCTLR_SPAN (1U << 23) /* v8.1-PAN */
#define SCTLR_VE (1U << 24) /* up to v7 */
#define SCTLR_E0E (1U << 24) /* v8 onward, AArch64 only */
#define SCTLR_EE (1U << 25)
#define SCTLR_L2 (1U << 26) /* up to v6, RAZ in v7 */
#define SCTLR_UCI (1U << 26) /* v8 onward, AArch64 only */
-#define SCTLR_NMFI (1U << 27)
-#define SCTLR_TRE (1U << 28)
-#define SCTLR_AFE (1U << 29)
-#define SCTLR_TE (1U << 30)
-
-#define CPTR_TCPAC (1U << 31)
-#define CPTR_TTA (1U << 20)
-#define CPTR_TFP (1U << 10)
-#define CPTR_TZ (1U << 8) /* CPTR_EL2 */
-#define CPTR_EZ (1U << 8) /* CPTR_EL3 */
-
-#define MDCR_EPMAD (1U << 21)
-#define MDCR_EDAD (1U << 20)
-#define MDCR_SPME (1U << 17)
-#define MDCR_SDD (1U << 16)
-#define MDCR_SPD (3U << 14)
-#define MDCR_TDRA (1U << 11)
-#define MDCR_TDOSA (1U << 10)
-#define MDCR_TDA (1U << 9)
-#define MDCR_TDE (1U << 8)
-#define MDCR_HPME (1U << 7)
-#define MDCR_TPM (1U << 6)
-#define MDCR_TPMCR (1U << 5)
-
-/* Not all of the MDCR_EL3 bits are present in the 32-bit SDCR */
-#define SDCR_VALID_MASK (MDCR_EPMAD | MDCR_EDAD | MDCR_SPME | MDCR_SPD)
+#define SCTLR_NMFI (1U << 27) /* up to v7, RAZ in v7VE and v8 */
+#define SCTLR_EnDA (1U << 27) /* v8.3, AArch64 only */
+#define SCTLR_TRE (1U << 28) /* AArch32 only */
+#define SCTLR_nTLSMD_64 (1U << 28) /* v8.2-LSMAOC, AArch64 only */
+#define SCTLR_AFE (1U << 29) /* AArch32 only */
+#define SCTLR_LSMAOE_64 (1U << 29) /* v8.2-LSMAOC, AArch64 only */
+#define SCTLR_TE (1U << 30) /* AArch32 only */
+#define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */
+#define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */
+#define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */
+#define SCTLR_MSCEN (1ULL << 33) /* FEAT_MOPS */
+#define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */
+#define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */
+#define SCTLR_ITFSB (1ULL << 37) /* v8.5-MemTag */
+#define SCTLR_TCF0 (3ULL << 38) /* v8.5-MemTag */
+#define SCTLR_TCF (3ULL << 40) /* v8.5-MemTag */
+#define SCTLR_ATA0 (1ULL << 42) /* v8.5-MemTag */
+#define SCTLR_ATA (1ULL << 43) /* v8.5-MemTag */
+#define SCTLR_DSSBS_64 (1ULL << 44) /* v8.5, AArch64 only */
+#define SCTLR_TWEDEn (1ULL << 45) /* FEAT_TWED */
+#define SCTLR_TWEDEL MAKE_64_MASK(46, 4) /* FEAT_TWED */
+#define SCTLR_TMT0 (1ULL << 50) /* FEAT_TME */
+#define SCTLR_TMT (1ULL << 51) /* FEAT_TME */
+#define SCTLR_TME0 (1ULL << 52) /* FEAT_TME */
+#define SCTLR_TME (1ULL << 53) /* FEAT_TME */
+#define SCTLR_EnASR (1ULL << 54) /* FEAT_LS64_V */
+#define SCTLR_EnAS0 (1ULL << 55) /* FEAT_LS64_ACCDATA */
+#define SCTLR_EnALS (1ULL << 56) /* FEAT_LS64 */
+#define SCTLR_EPAN (1ULL << 57) /* FEAT_PAN3 */
+#define SCTLR_EnTP2 (1ULL << 60) /* FEAT_SME */
+#define SCTLR_NMI (1ULL << 61) /* FEAT_NMI */
+#define SCTLR_SPINTMASK (1ULL << 62) /* FEAT_NMI */
+#define SCTLR_TIDCP (1ULL << 63) /* FEAT_TIDCP1 */
#define CPSR_M (0x1fU)
#define CPSR_T (1U << 5)
@@ -1034,12 +1380,9 @@ void pmccntr_sync(CPUARMState *env);
#define CPSR_IT_2_7 (0xfc00U)
#define CPSR_GE (0xfU << 16)
#define CPSR_IL (1U << 20)
-/* Note that the RESERVED bits include bit 21, which is PSTATE_SS in
- * an AArch64 SPSR but RES0 in AArch32 SPSR and CPSR. In QEMU we use
- * env->uncached_cpsr bit 21 to store PSTATE.SS when executing in AArch32,
- * where it is live state but not accessible to the AArch32 code.
- */
-#define CPSR_RESERVED (0x7U << 21)
+#define CPSR_DIT (1U << 21)
+#define CPSR_PAN (1U << 22)
+#define CPSR_SSBS (1U << 23)
#define CPSR_J (1U << 24)
#define CPSR_IT_0_1 (3U << 25)
#define CPSR_Q (1U << 27)
@@ -1054,11 +1397,9 @@ void pmccntr_sync(CPUARMState *env);
#define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \
| CPSR_NZCV)
/* Bits writable in user mode. */
-#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE)
+#define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE | CPSR_E)
/* Execution state bits. MRS read as zero, MSR writes ignored. */
#define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J | CPSR_IL)
-/* Mask of bits which may be set by exception return copying them from SPSR */
-#define CPSR_ERET_MASK (~CPSR_RESERVED)
/* Bit definitions for M profile XPSR. Most are the same as CPSR. */
#define XPSR_EXCP 0x1ffU
@@ -1076,22 +1417,6 @@ void pmccntr_sync(CPUARMState *env);
#define XPSR_NZCV CPSR_NZCV
#define XPSR_IT CPSR_IT
-#define TTBCR_N (7U << 0) /* TTBCR.EAE==0 */
-#define TTBCR_T0SZ (7U << 0) /* TTBCR.EAE==1 */
-#define TTBCR_PD0 (1U << 4)
-#define TTBCR_PD1 (1U << 5)
-#define TTBCR_EPD0 (1U << 7)
-#define TTBCR_IRGN0 (3U << 8)
-#define TTBCR_ORGN0 (3U << 10)
-#define TTBCR_SH0 (3U << 12)
-#define TTBCR_T1SZ (3U << 16)
-#define TTBCR_A1 (1U << 22)
-#define TTBCR_EPD1 (1U << 23)
-#define TTBCR_IRGN1 (3U << 24)
-#define TTBCR_ORGN1 (3U << 26)
-#define TTBCR_SH1 (1U << 28)
-#define TTBCR_EAE (1U << 31)
-
/* Bit definitions for ARMv8 SPSR (PSTATE) format.
* Only these are valid when in AArch64 mode; in
* AArch32 mode SPSRs are basically CPSR-format.
@@ -1103,15 +1428,21 @@ void pmccntr_sync(CPUARMState *env);
#define PSTATE_I (1U << 7)
#define PSTATE_A (1U << 8)
#define PSTATE_D (1U << 9)
+#define PSTATE_BTYPE (3U << 10)
+#define PSTATE_SSBS (1U << 12)
#define PSTATE_IL (1U << 20)
#define PSTATE_SS (1U << 21)
+#define PSTATE_PAN (1U << 22)
+#define PSTATE_UAO (1U << 23)
+#define PSTATE_DIT (1U << 24)
+#define PSTATE_TCO (1U << 25)
#define PSTATE_V (1U << 28)
#define PSTATE_C (1U << 29)
#define PSTATE_Z (1U << 30)
#define PSTATE_N (1U << 31)
#define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V)
#define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F)
-#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF)
+#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF | PSTATE_BTYPE)
/* Mode values for AArch64 */
#define PSTATE_MODE_EL3h 13
#define PSTATE_MODE_EL3t 12
@@ -1121,6 +1452,14 @@ void pmccntr_sync(CPUARMState *env);
#define PSTATE_MODE_EL1t 4
#define PSTATE_MODE_EL0t 0
+/* PSTATE bits that are accessed via SVCR and not stored in SPSR_ELx. */
+FIELD(SVCR, SM, 0, 1)
+FIELD(SVCR, ZA, 1, 1)
+
+/* Fields for SMCR_ELx. */
+FIELD(SMCR, LEN, 0, 4)
+FIELD(SMCR, FA64, 31, 1)
+
/* Write a new value to v7m.exception, thus transitioning into or out
* of Handler mode; this may result in a change of active stack pointer.
*/
@@ -1143,7 +1482,7 @@ static inline uint32_t pstate_read(CPUARMState *env)
ZF = (env->ZF == 0);
return (env->NF & 0x80000000) | (ZF << 30)
| (env->CF << 29) | ((env->VF & 0x80000000) >> 3)
- | env->pstate | env->daif;
+ | env->pstate | env->daif | (env->btype << 10);
}
static inline void pstate_write(CPUARMState *env, uint32_t val)
@@ -1153,6 +1492,7 @@ static inline void pstate_write(CPUARMState *env, uint32_t val)
env->CF = (val >> 29) & 1;
env->VF = (val << 3) & 0x80000000;
env->daif = val & PSTATE_DAIF;
+ env->btype = (val >> 10) & 3;
env->pstate = val & ~CACHED_PSTATE_BITS;
}
@@ -1162,11 +1502,17 @@ uint32_t cpsr_read(CPUARMState *env);
typedef enum CPSRWriteType {
CPSRWriteByInstr = 0, /* from guest MSR or CPS */
CPSRWriteExceptionReturn = 1, /* from guest exception return insn */
- CPSRWriteRaw = 2, /* trust values, do not switch reg banks */
+ CPSRWriteRaw = 2,
+ /* trust values, no reg bank switch, no hflags rebuild */
CPSRWriteByGDBStub = 3, /* from the GDB stub */
} CPSRWriteType;
-/* Set the CPSR. Note that some bits of mask must be all-set or all-clear.*/
+/*
+ * Set the CPSR. Note that some bits of mask must be all-set or all-clear.
+ * This will do an arm_rebuild_hflags() if any of the bits in @mask
+ * correspond to TB flags bits cached in the hflags, unless @write_type
+ * is CPSRWriteRaw.
+ */
void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
CPSRWriteType write_type);
@@ -1179,6 +1525,7 @@ static inline uint32_t xpsr_read(CPUARMState *env)
| (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27)
| (env->thumb << 24) | ((env->condexec_bits & 3) << 25)
| ((env->condexec_bits & 0xfc) << 8)
+ | (env->GE << 16)
| env->v7m.exception;
}
@@ -1194,6 +1541,10 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
if (mask & XPSR_Q) {
env->QF = ((val & XPSR_Q) != 0);
}
+ if (mask & XPSR_GE) {
+ env->GE = (val & XPSR_GE) >> 16;
+ }
+#ifndef CONFIG_USER_ONLY
if (mask & XPSR_T) {
env->thumb = ((val & XPSR_T) != 0);
}
@@ -1209,6 +1560,7 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
/* Note that this only happens on exception exit */
write_v7m_exception(env, val & XPSR_EXCP);
}
+#endif
}
#define HCR_VM (1ULL << 0)
@@ -1233,7 +1585,7 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
#define HCR_TIDCP (1ULL << 20)
#define HCR_TACR (1ULL << 21)
#define HCR_TSW (1ULL << 22)
-#define HCR_TPC (1ULL << 23)
+#define HCR_TPCP (1ULL << 23)
#define HCR_TPU (1ULL << 24)
#define HCR_TTLB (1ULL << 25)
#define HCR_TVM (1ULL << 26)
@@ -1245,29 +1597,70 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
#define HCR_CD (1ULL << 32)
#define HCR_ID (1ULL << 33)
#define HCR_E2H (1ULL << 34)
-/*
- * When we actually implement ARMv8.1-VHE we should add HCR_E2H to
- * HCR_MASK and then clear it again if the feature bit is not set in
- * hcr_write().
- */
-#define HCR_MASK ((1ULL << 34) - 1)
-
-#define SCR_NS (1U << 0)
-#define SCR_IRQ (1U << 1)
-#define SCR_FIQ (1U << 2)
-#define SCR_EA (1U << 3)
-#define SCR_FW (1U << 4)
-#define SCR_AW (1U << 5)
-#define SCR_NET (1U << 6)
-#define SCR_SMD (1U << 7)
-#define SCR_HCE (1U << 8)
-#define SCR_SIF (1U << 9)
-#define SCR_RW (1U << 10)
-#define SCR_ST (1U << 11)
-#define SCR_TWI (1U << 12)
-#define SCR_TWE (1U << 13)
-#define SCR_AARCH32_MASK (0x3fff & ~(SCR_RW | SCR_ST))
-#define SCR_AARCH64_MASK (0x3fff & ~SCR_NET)
+#define HCR_TLOR (1ULL << 35)
+#define HCR_TERR (1ULL << 36)
+#define HCR_TEA (1ULL << 37)
+#define HCR_MIOCNCE (1ULL << 38)
+#define HCR_TME (1ULL << 39)
+#define HCR_APK (1ULL << 40)
+#define HCR_API (1ULL << 41)
+#define HCR_NV (1ULL << 42)
+#define HCR_NV1 (1ULL << 43)
+#define HCR_AT (1ULL << 44)
+#define HCR_NV2 (1ULL << 45)
+#define HCR_FWB (1ULL << 46)
+#define HCR_FIEN (1ULL << 47)
+#define HCR_GPF (1ULL << 48)
+#define HCR_TID4 (1ULL << 49)
+#define HCR_TICAB (1ULL << 50)
+#define HCR_AMVOFFEN (1ULL << 51)
+#define HCR_TOCU (1ULL << 52)
+#define HCR_ENSCXT (1ULL << 53)
+#define HCR_TTLBIS (1ULL << 54)
+#define HCR_TTLBOS (1ULL << 55)
+#define HCR_ATA (1ULL << 56)
+#define HCR_DCT (1ULL << 57)
+#define HCR_TID5 (1ULL << 58)
+#define HCR_TWEDEN (1ULL << 59)
+#define HCR_TWEDEL MAKE_64BIT_MASK(60, 4)
+
+#define SCR_NS (1ULL << 0)
+#define SCR_IRQ (1ULL << 1)
+#define SCR_FIQ (1ULL << 2)
+#define SCR_EA (1ULL << 3)
+#define SCR_FW (1ULL << 4)
+#define SCR_AW (1ULL << 5)
+#define SCR_NET (1ULL << 6)
+#define SCR_SMD (1ULL << 7)
+#define SCR_HCE (1ULL << 8)
+#define SCR_SIF (1ULL << 9)
+#define SCR_RW (1ULL << 10)
+#define SCR_ST (1ULL << 11)
+#define SCR_TWI (1ULL << 12)
+#define SCR_TWE (1ULL << 13)
+#define SCR_TLOR (1ULL << 14)
+#define SCR_TERR (1ULL << 15)
+#define SCR_APK (1ULL << 16)
+#define SCR_API (1ULL << 17)
+#define SCR_EEL2 (1ULL << 18)
+#define SCR_EASE (1ULL << 19)
+#define SCR_NMEA (1ULL << 20)
+#define SCR_FIEN (1ULL << 21)
+#define SCR_ENSCXT (1ULL << 25)
+#define SCR_ATA (1ULL << 26)
+#define SCR_FGTEN (1ULL << 27)
+#define SCR_ECVEN (1ULL << 28)
+#define SCR_TWEDEN (1ULL << 29)
+#define SCR_TWEDEL MAKE_64BIT_MASK(30, 4)
+#define SCR_TME (1ULL << 34)
+#define SCR_AMVOFFEN (1ULL << 35)
+#define SCR_ENAS0 (1ULL << 36)
+#define SCR_ADEN (1ULL << 37)
+#define SCR_HXEN (1ULL << 38)
+#define SCR_TRNDR (1ULL << 40)
+#define SCR_ENTP2 (1ULL << 41)
+#define SCR_GPF (1ULL << 48)
+#define SCR_NSE (1ULL << 62)
/* Return the current FPSCR value. */
uint32_t vfp_get_fpscr(CPUARMState *env);
@@ -1283,9 +1676,29 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
#define FPSR_MASK 0xf800009f
#define FPCR_MASK 0x07ff9f00
+#define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */
+#define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */
+#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
+#define FPCR_UFE (1 << 11) /* Underflow exception trap enable */
+#define FPCR_IXE (1 << 12) /* Inexact exception trap enable */
+#define FPCR_IDE (1 << 15) /* Input Denormal exception trap enable */
#define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */
+#define FPCR_RMODE_MASK (3 << 22) /* Rounding mode */
#define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */
#define FPCR_DN (1 << 25) /* Default NaN enable bit */
+#define FPCR_AHP (1 << 26) /* Alternative half-precision */
+#define FPCR_QC (1 << 27) /* Cumulative saturation bit */
+#define FPCR_V (1 << 28) /* FP overflow flag */
+#define FPCR_C (1 << 29) /* FP carry flag */
+#define FPCR_Z (1 << 30) /* FP zero flag */
+#define FPCR_N (1 << 31) /* FP negative flag */
+
+#define FPCR_LTPSIZE_SHIFT 16 /* LTPSIZE, M-profile only */
+#define FPCR_LTPSIZE_MASK (7 << FPCR_LTPSIZE_SHIFT)
+#define FPCR_LTPSIZE_LENGTH 3
+
+#define FPCR_NZCV_MASK (FPCR_N | FPCR_Z | FPCR_C | FPCR_V)
+#define FPCR_NZCVQC_MASK (FPCR_NZCV_MASK | FPCR_QC)
static inline uint32_t vfp_get_fpsr(CPUARMState *env)
{
@@ -1330,6 +1743,15 @@ enum arm_cpu_mode {
#define ARM_VFP_FPEXC 8
#define ARM_VFP_FPINST 9
#define ARM_VFP_FPINST2 10
+/* These ones are M-profile only */
+#define ARM_VFP_FPSCR_NZCVQC 2
+#define ARM_VFP_VPR 12
+#define ARM_VFP_P0 13
+#define ARM_VFP_FPCXT_NS 14
+#define ARM_VFP_FPCXT_S 15
+
+/* QEMU-internal value meaning "FPSCR, but we care only about NZCV" */
+#define QEMU_VFP_FPSCR_NZCV 0xffff
/* iwMMXt coprocessor control registers. */
#define ARM_IWMMXT_wCID 0
@@ -1352,6 +1774,8 @@ FIELD(V7M_CCR, STKOFHFNMIGN, 10, 1)
FIELD(V7M_CCR, DC, 16, 1)
FIELD(V7M_CCR, IC, 17, 1)
FIELD(V7M_CCR, BP, 18, 1)
+FIELD(V7M_CCR, LOB, 19, 1)
+FIELD(V7M_CCR, TRD, 20, 1)
/* V7M SCR bits */
FIELD(V7M_SCR, SLEEPONEXIT, 1, 1)
@@ -1443,9 +1867,80 @@ FIELD(V7M_CSSELR, LEVEL, 1, 3)
*/
FIELD(V7M_CSSELR, INDEX, 0, 4)
+/* v7M FPCCR bits */
+FIELD(V7M_FPCCR, LSPACT, 0, 1)
+FIELD(V7M_FPCCR, USER, 1, 1)
+FIELD(V7M_FPCCR, S, 2, 1)
+FIELD(V7M_FPCCR, THREAD, 3, 1)
+FIELD(V7M_FPCCR, HFRDY, 4, 1)
+FIELD(V7M_FPCCR, MMRDY, 5, 1)
+FIELD(V7M_FPCCR, BFRDY, 6, 1)
+FIELD(V7M_FPCCR, SFRDY, 7, 1)
+FIELD(V7M_FPCCR, MONRDY, 8, 1)
+FIELD(V7M_FPCCR, SPLIMVIOL, 9, 1)
+FIELD(V7M_FPCCR, UFRDY, 10, 1)
+FIELD(V7M_FPCCR, RES0, 11, 15)
+FIELD(V7M_FPCCR, TS, 26, 1)
+FIELD(V7M_FPCCR, CLRONRETS, 27, 1)
+FIELD(V7M_FPCCR, CLRONRET, 28, 1)
+FIELD(V7M_FPCCR, LSPENS, 29, 1)
+FIELD(V7M_FPCCR, LSPEN, 30, 1)
+FIELD(V7M_FPCCR, ASPEN, 31, 1)
+/* These bits are banked. Others are non-banked and live in the M_REG_S bank */
+#define R_V7M_FPCCR_BANKED_MASK \
+ (R_V7M_FPCCR_LSPACT_MASK | \
+ R_V7M_FPCCR_USER_MASK | \
+ R_V7M_FPCCR_THREAD_MASK | \
+ R_V7M_FPCCR_MMRDY_MASK | \
+ R_V7M_FPCCR_SPLIMVIOL_MASK | \
+ R_V7M_FPCCR_UFRDY_MASK | \
+ R_V7M_FPCCR_ASPEN_MASK)
+
+/* v7M VPR bits */
+FIELD(V7M_VPR, P0, 0, 16)
+FIELD(V7M_VPR, MASK01, 16, 4)
+FIELD(V7M_VPR, MASK23, 20, 4)
+
/*
* System register ID fields.
*/
+FIELD(CLIDR_EL1, CTYPE1, 0, 3)
+FIELD(CLIDR_EL1, CTYPE2, 3, 3)
+FIELD(CLIDR_EL1, CTYPE3, 6, 3)
+FIELD(CLIDR_EL1, CTYPE4, 9, 3)
+FIELD(CLIDR_EL1, CTYPE5, 12, 3)
+FIELD(CLIDR_EL1, CTYPE6, 15, 3)
+FIELD(CLIDR_EL1, CTYPE7, 18, 3)
+FIELD(CLIDR_EL1, LOUIS, 21, 3)
+FIELD(CLIDR_EL1, LOC, 24, 3)
+FIELD(CLIDR_EL1, LOUU, 27, 3)
+FIELD(CLIDR_EL1, ICB, 30, 3)
+
+/* When FEAT_CCIDX is implemented */
+FIELD(CCSIDR_EL1, CCIDX_LINESIZE, 0, 3)
+FIELD(CCSIDR_EL1, CCIDX_ASSOCIATIVITY, 3, 21)
+FIELD(CCSIDR_EL1, CCIDX_NUMSETS, 32, 24)
+
+/* When FEAT_CCIDX is not implemented */
+FIELD(CCSIDR_EL1, LINESIZE, 0, 3)
+FIELD(CCSIDR_EL1, ASSOCIATIVITY, 3, 10)
+FIELD(CCSIDR_EL1, NUMSETS, 13, 15)
+
+FIELD(CTR_EL0, IMINLINE, 0, 4)
+FIELD(CTR_EL0, L1IP, 14, 2)
+FIELD(CTR_EL0, DMINLINE, 16, 4)
+FIELD(CTR_EL0, ERG, 20, 4)
+FIELD(CTR_EL0, CWG, 24, 4)
+FIELD(CTR_EL0, IDC, 28, 1)
+FIELD(CTR_EL0, DIC, 29, 1)
+FIELD(CTR_EL0, TMINLINE, 32, 6)
+
+FIELD(MIDR_EL1, REVISION, 0, 4)
+FIELD(MIDR_EL1, PARTNUM, 4, 12)
+FIELD(MIDR_EL1, ARCHITECTURE, 16, 4)
+FIELD(MIDR_EL1, VARIANT, 20, 4)
+FIELD(MIDR_EL1, IMPLEMENTER, 24, 8)
+
FIELD(ID_ISAR0, SWAP, 0, 4)
FIELD(ID_ISAR0, BITCOUNT, 4, 4)
FIELD(ID_ISAR0, BITFIELD, 8, 4)
@@ -1503,12 +1998,85 @@ FIELD(ID_ISAR6, DP, 4, 4)
FIELD(ID_ISAR6, FHM, 8, 4)
FIELD(ID_ISAR6, SB, 12, 4)
FIELD(ID_ISAR6, SPECRES, 16, 4)
+FIELD(ID_ISAR6, BF16, 20, 4)
+FIELD(ID_ISAR6, I8MM, 24, 4)
+
+FIELD(ID_MMFR0, VMSA, 0, 4)
+FIELD(ID_MMFR0, PMSA, 4, 4)
+FIELD(ID_MMFR0, OUTERSHR, 8, 4)
+FIELD(ID_MMFR0, SHARELVL, 12, 4)
+FIELD(ID_MMFR0, TCM, 16, 4)
+FIELD(ID_MMFR0, AUXREG, 20, 4)
+FIELD(ID_MMFR0, FCSE, 24, 4)
+FIELD(ID_MMFR0, INNERSHR, 28, 4)
+
+FIELD(ID_MMFR1, L1HVDVA, 0, 4)
+FIELD(ID_MMFR1, L1UNIVA, 4, 4)
+FIELD(ID_MMFR1, L1HVDSW, 8, 4)
+FIELD(ID_MMFR1, L1UNISW, 12, 4)
+FIELD(ID_MMFR1, L1HVD, 16, 4)
+FIELD(ID_MMFR1, L1UNI, 20, 4)
+FIELD(ID_MMFR1, L1TSTCLN, 24, 4)
+FIELD(ID_MMFR1, BPRED, 28, 4)
+
+FIELD(ID_MMFR2, L1HVDFG, 0, 4)
+FIELD(ID_MMFR2, L1HVDBG, 4, 4)
+FIELD(ID_MMFR2, L1HVDRNG, 8, 4)
+FIELD(ID_MMFR2, HVDTLB, 12, 4)
+FIELD(ID_MMFR2, UNITLB, 16, 4)
+FIELD(ID_MMFR2, MEMBARR, 20, 4)
+FIELD(ID_MMFR2, WFISTALL, 24, 4)
+FIELD(ID_MMFR2, HWACCFLG, 28, 4)
+
+FIELD(ID_MMFR3, CMAINTVA, 0, 4)
+FIELD(ID_MMFR3, CMAINTSW, 4, 4)
+FIELD(ID_MMFR3, BPMAINT, 8, 4)
+FIELD(ID_MMFR3, MAINTBCST, 12, 4)
+FIELD(ID_MMFR3, PAN, 16, 4)
+FIELD(ID_MMFR3, COHWALK, 20, 4)
+FIELD(ID_MMFR3, CMEMSZ, 24, 4)
+FIELD(ID_MMFR3, SUPERSEC, 28, 4)
+
+FIELD(ID_MMFR4, SPECSEI, 0, 4)
+FIELD(ID_MMFR4, AC2, 4, 4)
+FIELD(ID_MMFR4, XNX, 8, 4)
+FIELD(ID_MMFR4, CNP, 12, 4)
+FIELD(ID_MMFR4, HPDS, 16, 4)
+FIELD(ID_MMFR4, LSM, 20, 4)
+FIELD(ID_MMFR4, CCIDX, 24, 4)
+FIELD(ID_MMFR4, EVT, 28, 4)
+
+FIELD(ID_MMFR5, ETS, 0, 4)
+FIELD(ID_MMFR5, NTLBPA, 4, 4)
+
+FIELD(ID_PFR0, STATE0, 0, 4)
+FIELD(ID_PFR0, STATE1, 4, 4)
+FIELD(ID_PFR0, STATE2, 8, 4)
+FIELD(ID_PFR0, STATE3, 12, 4)
+FIELD(ID_PFR0, CSV2, 16, 4)
+FIELD(ID_PFR0, AMU, 20, 4)
+FIELD(ID_PFR0, DIT, 24, 4)
+FIELD(ID_PFR0, RAS, 28, 4)
+
+FIELD(ID_PFR1, PROGMOD, 0, 4)
+FIELD(ID_PFR1, SECURITY, 4, 4)
+FIELD(ID_PFR1, MPROGMOD, 8, 4)
+FIELD(ID_PFR1, VIRTUALIZATION, 12, 4)
+FIELD(ID_PFR1, GENTIMER, 16, 4)
+FIELD(ID_PFR1, SEC_FRAC, 20, 4)
+FIELD(ID_PFR1, VIRT_FRAC, 24, 4)
+FIELD(ID_PFR1, GIC, 28, 4)
+
+FIELD(ID_PFR2, CSV3, 0, 4)
+FIELD(ID_PFR2, SSBS, 4, 4)
+FIELD(ID_PFR2, RAS_FRAC, 8, 4)
FIELD(ID_AA64ISAR0, AES, 4, 4)
FIELD(ID_AA64ISAR0, SHA1, 8, 4)
FIELD(ID_AA64ISAR0, SHA2, 12, 4)
FIELD(ID_AA64ISAR0, CRC32, 16, 4)
FIELD(ID_AA64ISAR0, ATOMIC, 20, 4)
+FIELD(ID_AA64ISAR0, TME, 24, 4)
FIELD(ID_AA64ISAR0, RDM, 28, 4)
FIELD(ID_AA64ISAR0, SHA3, 32, 4)
FIELD(ID_AA64ISAR0, SM3, 36, 4)
@@ -1530,6 +2098,212 @@ FIELD(ID_AA64ISAR1, GPI, 28, 4)
FIELD(ID_AA64ISAR1, FRINTTS, 32, 4)
FIELD(ID_AA64ISAR1, SB, 36, 4)
FIELD(ID_AA64ISAR1, SPECRES, 40, 4)
+FIELD(ID_AA64ISAR1, BF16, 44, 4)
+FIELD(ID_AA64ISAR1, DGH, 48, 4)
+FIELD(ID_AA64ISAR1, I8MM, 52, 4)
+FIELD(ID_AA64ISAR1, XS, 56, 4)
+FIELD(ID_AA64ISAR1, LS64, 60, 4)
+
+FIELD(ID_AA64ISAR2, WFXT, 0, 4)
+FIELD(ID_AA64ISAR2, RPRES, 4, 4)
+FIELD(ID_AA64ISAR2, GPA3, 8, 4)
+FIELD(ID_AA64ISAR2, APA3, 12, 4)
+FIELD(ID_AA64ISAR2, MOPS, 16, 4)
+FIELD(ID_AA64ISAR2, BC, 20, 4)
+FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4)
+FIELD(ID_AA64ISAR2, CLRBHB, 28, 4)
+FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4)
+FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4)
+FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4)
+FIELD(ID_AA64ISAR2, RPRFM, 48, 4)
+FIELD(ID_AA64ISAR2, CSSC, 52, 4)
+FIELD(ID_AA64ISAR2, ATS1A, 60, 4)
+
+FIELD(ID_AA64PFR0, EL0, 0, 4)
+FIELD(ID_AA64PFR0, EL1, 4, 4)
+FIELD(ID_AA64PFR0, EL2, 8, 4)
+FIELD(ID_AA64PFR0, EL3, 12, 4)
+FIELD(ID_AA64PFR0, FP, 16, 4)
+FIELD(ID_AA64PFR0, ADVSIMD, 20, 4)
+FIELD(ID_AA64PFR0, GIC, 24, 4)
+FIELD(ID_AA64PFR0, RAS, 28, 4)
+FIELD(ID_AA64PFR0, SVE, 32, 4)
+FIELD(ID_AA64PFR0, SEL2, 36, 4)
+FIELD(ID_AA64PFR0, MPAM, 40, 4)
+FIELD(ID_AA64PFR0, AMU, 44, 4)
+FIELD(ID_AA64PFR0, DIT, 48, 4)
+FIELD(ID_AA64PFR0, RME, 52, 4)
+FIELD(ID_AA64PFR0, CSV2, 56, 4)
+FIELD(ID_AA64PFR0, CSV3, 60, 4)
+
+FIELD(ID_AA64PFR1, BT, 0, 4)
+FIELD(ID_AA64PFR1, SSBS, 4, 4)
+FIELD(ID_AA64PFR1, MTE, 8, 4)
+FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4)
+FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4)
+FIELD(ID_AA64PFR1, SME, 24, 4)
+FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4)
+FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4)
+FIELD(ID_AA64PFR1, NMI, 36, 4)
+FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4)
+FIELD(ID_AA64PFR1, GCS, 44, 4)
+FIELD(ID_AA64PFR1, THE, 48, 4)
+FIELD(ID_AA64PFR1, MTEX, 52, 4)
+FIELD(ID_AA64PFR1, DF2, 56, 4)
+FIELD(ID_AA64PFR1, PFAR, 60, 4)
+
+FIELD(ID_AA64MMFR0, PARANGE, 0, 4)
+FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4)
+FIELD(ID_AA64MMFR0, BIGEND, 8, 4)
+FIELD(ID_AA64MMFR0, SNSMEM, 12, 4)
+FIELD(ID_AA64MMFR0, BIGENDEL0, 16, 4)
+FIELD(ID_AA64MMFR0, TGRAN16, 20, 4)
+FIELD(ID_AA64MMFR0, TGRAN64, 24, 4)
+FIELD(ID_AA64MMFR0, TGRAN4, 28, 4)
+FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4)
+FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4)
+FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4)
+FIELD(ID_AA64MMFR0, EXS, 44, 4)
+FIELD(ID_AA64MMFR0, FGT, 56, 4)
+FIELD(ID_AA64MMFR0, ECV, 60, 4)
+
+FIELD(ID_AA64MMFR1, HAFDBS, 0, 4)
+FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4)
+FIELD(ID_AA64MMFR1, VH, 8, 4)
+FIELD(ID_AA64MMFR1, HPDS, 12, 4)
+FIELD(ID_AA64MMFR1, LO, 16, 4)
+FIELD(ID_AA64MMFR1, PAN, 20, 4)
+FIELD(ID_AA64MMFR1, SPECSEI, 24, 4)
+FIELD(ID_AA64MMFR1, XNX, 28, 4)
+FIELD(ID_AA64MMFR1, TWED, 32, 4)
+FIELD(ID_AA64MMFR1, ETS, 36, 4)
+FIELD(ID_AA64MMFR1, HCX, 40, 4)
+FIELD(ID_AA64MMFR1, AFP, 44, 4)
+FIELD(ID_AA64MMFR1, NTLBPA, 48, 4)
+FIELD(ID_AA64MMFR1, TIDCP1, 52, 4)
+FIELD(ID_AA64MMFR1, CMOW, 56, 4)
+FIELD(ID_AA64MMFR1, ECBHB, 60, 4)
+
+FIELD(ID_AA64MMFR2, CNP, 0, 4)
+FIELD(ID_AA64MMFR2, UAO, 4, 4)
+FIELD(ID_AA64MMFR2, LSM, 8, 4)
+FIELD(ID_AA64MMFR2, IESB, 12, 4)
+FIELD(ID_AA64MMFR2, VARANGE, 16, 4)
+FIELD(ID_AA64MMFR2, CCIDX, 20, 4)
+FIELD(ID_AA64MMFR2, NV, 24, 4)
+FIELD(ID_AA64MMFR2, ST, 28, 4)
+FIELD(ID_AA64MMFR2, AT, 32, 4)
+FIELD(ID_AA64MMFR2, IDS, 36, 4)
+FIELD(ID_AA64MMFR2, FWB, 40, 4)
+FIELD(ID_AA64MMFR2, TTL, 48, 4)
+FIELD(ID_AA64MMFR2, BBM, 52, 4)
+FIELD(ID_AA64MMFR2, EVT, 56, 4)
+FIELD(ID_AA64MMFR2, E0PD, 60, 4)
+
+FIELD(ID_AA64DFR0, DEBUGVER, 0, 4)
+FIELD(ID_AA64DFR0, TRACEVER, 4, 4)
+FIELD(ID_AA64DFR0, PMUVER, 8, 4)
+FIELD(ID_AA64DFR0, BRPS, 12, 4)
+FIELD(ID_AA64DFR0, PMSS, 16, 4)
+FIELD(ID_AA64DFR0, WRPS, 20, 4)
+FIELD(ID_AA64DFR0, SEBEP, 24, 4)
+FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4)
+FIELD(ID_AA64DFR0, PMSVER, 32, 4)
+FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4)
+FIELD(ID_AA64DFR0, TRACEFILT, 40, 4)
+FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4)
+FIELD(ID_AA64DFR0, MTPMU, 48, 4)
+FIELD(ID_AA64DFR0, BRBE, 52, 4)
+FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4)
+FIELD(ID_AA64DFR0, HPMN0, 60, 4)
+
+FIELD(ID_AA64ZFR0, SVEVER, 0, 4)
+FIELD(ID_AA64ZFR0, AES, 4, 4)
+FIELD(ID_AA64ZFR0, BITPERM, 16, 4)
+FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4)
+FIELD(ID_AA64ZFR0, B16B16, 24, 4)
+FIELD(ID_AA64ZFR0, SHA3, 32, 4)
+FIELD(ID_AA64ZFR0, SM4, 40, 4)
+FIELD(ID_AA64ZFR0, I8MM, 44, 4)
+FIELD(ID_AA64ZFR0, F32MM, 52, 4)
+FIELD(ID_AA64ZFR0, F64MM, 56, 4)
+
+FIELD(ID_AA64SMFR0, F32F32, 32, 1)
+FIELD(ID_AA64SMFR0, BI32I32, 33, 1)
+FIELD(ID_AA64SMFR0, B16F32, 34, 1)
+FIELD(ID_AA64SMFR0, F16F32, 35, 1)
+FIELD(ID_AA64SMFR0, I8I32, 36, 4)
+FIELD(ID_AA64SMFR0, F16F16, 42, 1)
+FIELD(ID_AA64SMFR0, B16B16, 43, 1)
+FIELD(ID_AA64SMFR0, I16I32, 44, 4)
+FIELD(ID_AA64SMFR0, F64F64, 48, 1)
+FIELD(ID_AA64SMFR0, I16I64, 52, 4)
+FIELD(ID_AA64SMFR0, SMEVER, 56, 4)
+FIELD(ID_AA64SMFR0, FA64, 63, 1)
+
+FIELD(ID_DFR0, COPDBG, 0, 4)
+FIELD(ID_DFR0, COPSDBG, 4, 4)
+FIELD(ID_DFR0, MMAPDBG, 8, 4)
+FIELD(ID_DFR0, COPTRC, 12, 4)
+FIELD(ID_DFR0, MMAPTRC, 16, 4)
+FIELD(ID_DFR0, MPROFDBG, 20, 4)
+FIELD(ID_DFR0, PERFMON, 24, 4)
+FIELD(ID_DFR0, TRACEFILT, 28, 4)
+
+FIELD(ID_DFR1, MTPMU, 0, 4)
+FIELD(ID_DFR1, HPMN0, 4, 4)
+
+FIELD(DBGDIDR, SE_IMP, 12, 1)
+FIELD(DBGDIDR, NSUHD_IMP, 14, 1)
+FIELD(DBGDIDR, VERSION, 16, 4)
+FIELD(DBGDIDR, CTX_CMPS, 20, 4)
+FIELD(DBGDIDR, BRPS, 24, 4)
+FIELD(DBGDIDR, WRPS, 28, 4)
+
+FIELD(DBGDEVID, PCSAMPLE, 0, 4)
+FIELD(DBGDEVID, WPADDRMASK, 4, 4)
+FIELD(DBGDEVID, BPADDRMASK, 8, 4)
+FIELD(DBGDEVID, VECTORCATCH, 12, 4)
+FIELD(DBGDEVID, VIRTEXTNS, 16, 4)
+FIELD(DBGDEVID, DOUBLELOCK, 20, 4)
+FIELD(DBGDEVID, AUXREGS, 24, 4)
+FIELD(DBGDEVID, CIDMASK, 28, 4)
+
+FIELD(MVFR0, SIMDREG, 0, 4)
+FIELD(MVFR0, FPSP, 4, 4)
+FIELD(MVFR0, FPDP, 8, 4)
+FIELD(MVFR0, FPTRAP, 12, 4)
+FIELD(MVFR0, FPDIVIDE, 16, 4)
+FIELD(MVFR0, FPSQRT, 20, 4)
+FIELD(MVFR0, FPSHVEC, 24, 4)
+FIELD(MVFR0, FPROUND, 28, 4)
+
+FIELD(MVFR1, FPFTZ, 0, 4)
+FIELD(MVFR1, FPDNAN, 4, 4)
+FIELD(MVFR1, SIMDLS, 8, 4) /* A-profile only */
+FIELD(MVFR1, SIMDINT, 12, 4) /* A-profile only */
+FIELD(MVFR1, SIMDSP, 16, 4) /* A-profile only */
+FIELD(MVFR1, SIMDHP, 20, 4) /* A-profile only */
+FIELD(MVFR1, MVE, 8, 4) /* M-profile only */
+FIELD(MVFR1, FP16, 20, 4) /* M-profile only */
+FIELD(MVFR1, FPHP, 24, 4)
+FIELD(MVFR1, SIMDFMAC, 28, 4)
+
+FIELD(MVFR2, SIMDMISC, 0, 4)
+FIELD(MVFR2, FPMISC, 4, 4)
+
+FIELD(GPCCR, PPS, 0, 3)
+FIELD(GPCCR, IRGN, 8, 2)
+FIELD(GPCCR, ORGN, 10, 2)
+FIELD(GPCCR, SH, 12, 2)
+FIELD(GPCCR, PGS, 14, 2)
+FIELD(GPCCR, GPC, 16, 1)
+FIELD(GPCCR, GPCP, 17, 1)
+FIELD(GPCCR, L0GPTSZ, 20, 4)
+
+FIELD(MFAR, FPA, 12, 40)
+FIELD(MFAR, NSE, 62, 1)
+FIELD(MFAR, NS, 63, 1)
QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK);
@@ -1538,7 +2312,6 @@ QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK);
* mapping in linux-user/elfload.c:get_elf_hwcap().
*/
enum arm_features {
- ARM_FEATURE_VFP,
ARM_FEATURE_AUXCR, /* ARM1026 Auxiliary control register. */
ARM_FEATURE_XSCALE, /* Intel XScale extensions. */
ARM_FEATURE_IWMMXT, /* Intel iwMMXt extension. */
@@ -1547,10 +2320,7 @@ enum arm_features {
ARM_FEATURE_V7,
ARM_FEATURE_THUMB2,
ARM_FEATURE_PMSA, /* no MMU; may have Memory Protection Unit */
- ARM_FEATURE_VFP3,
- ARM_FEATURE_VFP_FP16,
ARM_FEATURE_NEON,
- ARM_FEATURE_THUMB_DIV, /* divide supported in Thumb encoding */
ARM_FEATURE_M, /* Microcontroller profile. */
ARM_FEATURE_OMAPCP, /* OMAP specific CP15 ops handling. */
ARM_FEATURE_THUMB2EE,
@@ -1560,8 +2330,6 @@ enum arm_features {
ARM_FEATURE_V5,
ARM_FEATURE_STRONGARM,
ARM_FEATURE_VAPA, /* cp15 VA to PA lookups */
- ARM_FEATURE_ARM_DIV, /* divide supported in ARM encoding */
- ARM_FEATURE_VFP4, /* VFPv4 (implies that NEON is v2) */
ARM_FEATURE_GENERIC_TIMER,
ARM_FEATURE_MVFR, /* Media and VFP Feature Registers 0 and 1 */
ARM_FEATURE_DUMMY_C15_REGS, /* RAZ/WI all of cp15 crn=15 */
@@ -1569,35 +2337,19 @@ enum arm_features {
ARM_FEATURE_CACHE_DIRTY_REG, /* 1136/1176 cache dirty status register */
ARM_FEATURE_CACHE_BLOCK_OPS, /* v6 optional cache block operations */
ARM_FEATURE_MPIDR, /* has cp15 MPIDR */
- ARM_FEATURE_PXN, /* has Privileged Execute Never bit */
ARM_FEATURE_LPAE, /* has Large Physical Address Extension */
ARM_FEATURE_V8,
ARM_FEATURE_AARCH64, /* supports 64 bit mode */
- ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
ARM_FEATURE_CBAR, /* has cp15 CBAR */
- ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
ARM_FEATURE_EL2, /* has EL2 Virtualization support */
ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
- ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
- ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
- ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
ARM_FEATURE_THUMB_DSP, /* DSP insns supported in the Thumb encodings */
ARM_FEATURE_PMU, /* has PMU support */
ARM_FEATURE_VBAR, /* has cp15 VBAR */
ARM_FEATURE_M_SECURITY, /* M profile Security Extension */
- ARM_FEATURE_JAZELLE, /* has (trivial) Jazelle implementation */
- ARM_FEATURE_SVE, /* has Scalable Vector Extension */
- ARM_FEATURE_V8_SHA512, /* implements SHA512 part of v8 Crypto Extensions */
- ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */
- ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */
- ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
- ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
- ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
- ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */
- ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
- ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
ARM_FEATURE_M_MAIN, /* M profile Main Extension */
+ ARM_FEATURE_V8_1M, /* M profile extras only in v8.1M and later */
};
static inline int arm_feature(CPUARMState *env, int feature)
@@ -1605,28 +2357,61 @@ static inline int arm_feature(CPUARMState *env, int feature)
return (env->features & (1ULL << feature)) != 0;
}
+void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp);
+
+/*
+ * ARM v9 security states.
+ * The ordering of the enumeration corresponds to the low 2 bits
+ * of the GPI value, and (except for Root) the concat of NSE:NS.
+ */
+
+typedef enum ARMSecuritySpace {
+ ARMSS_Secure = 0,
+ ARMSS_NonSecure = 1,
+ ARMSS_Root = 2,
+ ARMSS_Realm = 3,
+} ARMSecuritySpace;
+
+/* Return true if @space is secure, in the pre-v9 sense. */
+static inline bool arm_space_is_secure(ARMSecuritySpace space)
+{
+ return space == ARMSS_Secure || space == ARMSS_Root;
+}
+
+/* Return the ARMSecuritySpace for @secure, assuming !RME or EL[0-2]. */
+static inline ARMSecuritySpace arm_secure_to_space(bool secure)
+{
+ return secure ? ARMSS_Secure : ARMSS_NonSecure;
+}
+
#if !defined(CONFIG_USER_ONLY)
-/* Return true if exception levels below EL3 are in secure state,
- * or would be following an exception return to that level.
- * Unlike arm_is_secure() (which is always a question about the
- * _current_ state of the CPU) this doesn't care about the current
- * EL or mode.
+/**
+ * arm_security_space_below_el3:
+ * @env: cpu context
+ *
+ * Return the security space of exception levels below EL3, following
+ * an exception return to those levels. Unlike arm_security_space,
+ * this doesn't care about the current EL.
+ */
+ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env);
+
+/**
+ * arm_is_secure_below_el3:
+ * @env: cpu context
+ *
+ * Return true if exception levels below EL3 are in secure state,
+ * or would be following an exception return to those levels.
*/
static inline bool arm_is_secure_below_el3(CPUARMState *env)
{
- if (arm_feature(env, ARM_FEATURE_EL3)) {
- return !(env->cp15.scr_el3 & SCR_NS);
- } else {
- /* If EL3 is not supported then the secure state is implementation
- * defined, in which case QEMU defaults to non-secure.
- */
- return false;
- }
+ ARMSecuritySpace ss = arm_security_space_below_el3(env);
+ return ss == ARMSS_Secure;
}
/* Return true if the CPU is AArch64 EL3 or AArch32 Mon */
static inline bool arm_is_el3_or_mon(CPUARMState *env)
{
+ assert(!arm_feature(env, ARM_FEATURE_M));
if (arm_feature(env, ARM_FEATURE_EL3)) {
if (is_a64(env) && extract32(env->pstate, 2, 2) == 3) {
/* CPU currently in AArch64 state and EL3 */
@@ -1640,27 +2425,85 @@ static inline bool arm_is_el3_or_mon(CPUARMState *env)
return false;
}
-/* Return true if the processor is in secure state */
+/**
+ * arm_security_space:
+ * @env: cpu context
+ *
+ * Return the current security space of the cpu.
+ */
+ARMSecuritySpace arm_security_space(CPUARMState *env);
+
+/**
+ * arm_is_secure:
+ * @env: cpu context
+ *
+ * Return true if the processor is in secure state.
+ */
static inline bool arm_is_secure(CPUARMState *env)
{
- if (arm_is_el3_or_mon(env)) {
- return true;
- }
- return arm_is_secure_below_el3(env);
+ return arm_space_is_secure(arm_security_space(env));
+}
+
+/*
+ * Return true if the current security state has AArch64 EL2 or AArch32 Hyp.
+ * This corresponds to the pseudocode EL2Enabled().
+ */
+static inline bool arm_is_el2_enabled_secstate(CPUARMState *env,
+ ARMSecuritySpace space)
+{
+ assert(space != ARMSS_Root);
+ return arm_feature(env, ARM_FEATURE_EL2)
+ && (space != ARMSS_Secure || (env->cp15.scr_el3 & SCR_EEL2));
+}
+
+static inline bool arm_is_el2_enabled(CPUARMState *env)
+{
+ return arm_is_el2_enabled_secstate(env, arm_security_space_below_el3(env));
}
#else
+static inline ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env)
+{
+ return ARMSS_NonSecure;
+}
+
static inline bool arm_is_secure_below_el3(CPUARMState *env)
{
return false;
}
+static inline ARMSecuritySpace arm_security_space(CPUARMState *env)
+{
+ return ARMSS_NonSecure;
+}
+
static inline bool arm_is_secure(CPUARMState *env)
{
return false;
}
+
+static inline bool arm_is_el2_enabled_secstate(CPUARMState *env,
+ ARMSecuritySpace space)
+{
+ return false;
+}
+
+static inline bool arm_is_el2_enabled(CPUARMState *env)
+{
+ return false;
+}
#endif
+/**
+ * arm_hcr_el2_eff(): Return the effective value of HCR_EL2.
+ * E.g. when in secure state, fields in HCR_EL2 are suppressed,
+ * "for all purposes other than a direct read or write access of HCR_EL2."
+ * Not included here is HCR_RW.
+ */
+uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space);
+uint64_t arm_hcr_el2_eff(CPUARMState *env);
+uint64_t arm_hcrx_el2_eff(CPUARMState *env);
+
/* Return true if the specified exception level is running in AArch64 state. */
static inline bool arm_el_is_aa64(CPUARMState *env, int el)
{
@@ -1678,7 +2521,8 @@ static inline bool arm_el_is_aa64(CPUARMState *env, int el)
return aa64;
}
- if (arm_feature(env, ARM_FEATURE_EL3)) {
+ if (arm_feature(env, ARM_FEATURE_EL3) &&
+ ((env->cp15.scr_el3 & SCR_NS) || !(env->cp15.scr_el3 & SCR_EEL2))) {
aa64 = aa64 && (env->cp15.scr_el3 & SCR_RW);
}
@@ -1686,14 +2530,14 @@ static inline bool arm_el_is_aa64(CPUARMState *env, int el)
return aa64;
}
- if (arm_feature(env, ARM_FEATURE_EL2) && !arm_is_secure_below_el3(env)) {
+ if (arm_is_el2_enabled(env)) {
aa64 = aa64 && (env->cp15.hcr_el2 & HCR_RW);
}
return aa64;
}
-/* Function for determing whether guest cp register reads and writes should
+/* Function for determining whether guest cp register reads and writes should
* access the secure or non-secure bank of a cp register. When EL3 is
* operating in AArch32 state, the NS-bit determines whether the secure
* instance of a cp register should be used. When EL3 is AArch64 (or if
@@ -1736,314 +2580,9 @@ static inline bool access_secure_reg(CPUARMState *env)
(arm_is_secure(_env) && !arm_el_is_aa64((_env), 3)), \
(_val))
-void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
uint32_t cur_el, bool secure);
-/* Interface between CPU and Interrupt controller. */
-#ifndef CONFIG_USER_ONLY
-bool armv7m_nvic_can_take_pending_exception(void *opaque);
-#else
-static inline bool armv7m_nvic_can_take_pending_exception(void *opaque)
-{
- return true;
-}
-#endif
-/**
- * armv7m_nvic_set_pending: mark the specified exception as pending
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Marks the specified exception as pending. Note that we will assert()
- * if @secure is true and @irq does not specify one of the fixed set
- * of architecturally banked exceptions.
- */
-void armv7m_nvic_set_pending(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_set_pending_derived: mark this derived exception as pending
- * @opaque: the NVIC
- * @irq: the exception number to mark pending
- * @secure: false for non-banked exceptions or for the nonsecure
- * version of a banked exception, true for the secure version of a banked
- * exception.
- *
- * Similar to armv7m_nvic_set_pending(), but specifically for derived
- * exceptions (exceptions generated in the course of trying to take
- * a different exception).
- */
-void armv7m_nvic_set_pending_derived(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_get_pending_irq_info: return highest priority pending
- * exception, and whether it targets Secure state
- * @opaque: the NVIC
- * @pirq: set to pending exception number
- * @ptargets_secure: set to whether pending exception targets Secure
- *
- * This function writes the number of the highest priority pending
- * exception (the one which would be made active by
- * armv7m_nvic_acknowledge_irq()) to @pirq, and sets @ptargets_secure
- * to true if the current highest priority pending exception should
- * be taken to Secure state, false for NS.
- */
-void armv7m_nvic_get_pending_irq_info(void *opaque, int *pirq,
- bool *ptargets_secure);
-/**
- * armv7m_nvic_acknowledge_irq: make highest priority pending exception active
- * @opaque: the NVIC
- *
- * Move the current highest priority pending exception from the pending
- * state to the active state, and update v7m.exception to indicate that
- * it is the exception currently being handled.
- */
-void armv7m_nvic_acknowledge_irq(void *opaque);
-/**
- * armv7m_nvic_complete_irq: complete specified interrupt or exception
- * @opaque: the NVIC
- * @irq: the exception number to complete
- * @secure: true if this exception was secure
- *
- * Returns: -1 if the irq was not active
- * 1 if completing this irq brought us back to base (no active irqs)
- * 0 if there is still an irq active after this one was completed
- * (Ignoring -1, this is the same as the RETTOBASE value before completion.)
- */
-int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure);
-/**
- * armv7m_nvic_raw_execution_priority: return the raw execution priority
- * @opaque: the NVIC
- *
- * Returns: the raw execution priority as defined by the v8M architecture.
- * This is the execution priority minus the effects of AIRCR.PRIS,
- * and minus any PRIMASK/FAULTMASK/BASEPRI priority boosting.
- * (v8M ARM ARM I_PKLD.)
- */
-int armv7m_nvic_raw_execution_priority(void *opaque);
-/**
- * armv7m_nvic_neg_prio_requested: return true if the requested execution
- * priority is negative for the specified security state.
- * @opaque: the NVIC
- * @secure: the security state to test
- * This corresponds to the pseudocode IsReqExecPriNeg().
- */
-#ifndef CONFIG_USER_ONLY
-bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure);
-#else
-static inline bool armv7m_nvic_neg_prio_requested(void *opaque, bool secure)
-{
- return false;
-}
-#endif
-
-/* Interface for defining coprocessor registers.
- * Registers are defined in tables of arm_cp_reginfo structs
- * which are passed to define_arm_cp_regs().
- */
-
-/* When looking up a coprocessor register we look for it
- * via an integer which encodes all of:
- * coprocessor number
- * Crn, Crm, opc1, opc2 fields
- * 32 or 64 bit register (ie is it accessed via MRC/MCR
- * or via MRRC/MCRR?)
- * non-secure/secure bank (AArch32 only)
- * We allow 4 bits for opc1 because MRRC/MCRR have a 4 bit field.
- * (In this case crn and opc2 should be zero.)
- * For AArch64, there is no 32/64 bit size distinction;
- * instead all registers have a 2 bit op0, 3 bit op1 and op2,
- * and 4 bit CRn and CRm. The encoding patterns are chosen
- * to be easy to convert to and from the KVM encodings, and also
- * so that the hashtable can contain both AArch32 and AArch64
- * registers (to allow for interprocessing where we might run
- * 32 bit code on a 64 bit core).
- */
-/* This bit is private to our hashtable cpreg; in KVM register
- * IDs the AArch64/32 distinction is the KVM_REG_ARM/ARM64
- * in the upper bits of the 64 bit ID.
- */
-#define CP_REG_AA64_SHIFT 28
-#define CP_REG_AA64_MASK (1 << CP_REG_AA64_SHIFT)
-
-/* To enable banking of coprocessor registers depending on ns-bit we
- * add a bit to distinguish between secure and non-secure cpregs in the
- * hashtable.
- */
-#define CP_REG_NS_SHIFT 29
-#define CP_REG_NS_MASK (1 << CP_REG_NS_SHIFT)
-
-#define ENCODE_CP_REG(cp, is64, ns, crn, crm, opc1, opc2) \
- ((ns) << CP_REG_NS_SHIFT | ((cp) << 16) | ((is64) << 15) | \
- ((crn) << 11) | ((crm) << 7) | ((opc1) << 3) | (opc2))
-
-#define ENCODE_AA64_CP_REG(cp, crn, crm, op0, op1, op2) \
- (CP_REG_AA64_MASK | \
- ((cp) << CP_REG_ARM_COPROC_SHIFT) | \
- ((op0) << CP_REG_ARM64_SYSREG_OP0_SHIFT) | \
- ((op1) << CP_REG_ARM64_SYSREG_OP1_SHIFT) | \
- ((crn) << CP_REG_ARM64_SYSREG_CRN_SHIFT) | \
- ((crm) << CP_REG_ARM64_SYSREG_CRM_SHIFT) | \
- ((op2) << CP_REG_ARM64_SYSREG_OP2_SHIFT))
-
-/* Convert a full 64 bit KVM register ID to the truncated 32 bit
- * version used as a key for the coprocessor register hashtable
- */
-static inline uint32_t kvm_to_cpreg_id(uint64_t kvmid)
-{
- uint32_t cpregid = kvmid;
- if ((kvmid & CP_REG_ARCH_MASK) == CP_REG_ARM64) {
- cpregid |= CP_REG_AA64_MASK;
- } else {
- if ((kvmid & CP_REG_SIZE_MASK) == CP_REG_SIZE_U64) {
- cpregid |= (1 << 15);
- }
-
- /* KVM is always non-secure so add the NS flag on AArch32 register
- * entries.
- */
- cpregid |= 1 << CP_REG_NS_SHIFT;
- }
- return cpregid;
-}
-
-/* Convert a truncated 32 bit hashtable key into the full
- * 64 bit KVM register ID.
- */
-static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
-{
- uint64_t kvmid;
-
- if (cpregid & CP_REG_AA64_MASK) {
- kvmid = cpregid & ~CP_REG_AA64_MASK;
- kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM64;
- } else {
- kvmid = cpregid & ~(1 << 15);
- if (cpregid & (1 << 15)) {
- kvmid |= CP_REG_SIZE_U64 | CP_REG_ARM;
- } else {
- kvmid |= CP_REG_SIZE_U32 | CP_REG_ARM;
- }
- }
- return kvmid;
-}
-
-/* ARMCPRegInfo type field bits. If the SPECIAL bit is set this is a
- * special-behaviour cp reg and bits [11..8] indicate what behaviour
- * it has. Otherwise it is a simple cp reg, where CONST indicates that
- * TCG can assume the value to be constant (ie load at translate time)
- * and 64BIT indicates a 64 bit wide coprocessor register. SUPPRESS_TB_END
- * indicates that the TB should not be ended after a write to this register
- * (the default is that the TB ends after cp writes). OVERRIDE permits
- * a register definition to override a previous definition for the
- * same (cp, is64, crn, crm, opc1, opc2) tuple: either the new or the
- * old must have the OVERRIDE bit set.
- * ALIAS indicates that this register is an alias view of some underlying
- * state which is also visible via another register, and that the other
- * register is handling migration and reset; registers marked ALIAS will not be
- * migrated but may have their state set by syncing of register state from KVM.
- * NO_RAW indicates that this register has no underlying state and does not
- * support raw access for state saving/loading; it will not be used for either
- * migration or KVM state synchronization. (Typically this is for "registers"
- * which are actually used as instructions for cache maintenance and so on.)
- * IO indicates that this register does I/O and therefore its accesses
- * need to be surrounded by gen_io_start()/gen_io_end(). In particular,
- * registers which implement clocks or timers require this.
- */
-#define ARM_CP_SPECIAL 0x0001
-#define ARM_CP_CONST 0x0002
-#define ARM_CP_64BIT 0x0004
-#define ARM_CP_SUPPRESS_TB_END 0x0008
-#define ARM_CP_OVERRIDE 0x0010
-#define ARM_CP_ALIAS 0x0020
-#define ARM_CP_IO 0x0040
-#define ARM_CP_NO_RAW 0x0080
-#define ARM_CP_NOP (ARM_CP_SPECIAL | 0x0100)
-#define ARM_CP_WFI (ARM_CP_SPECIAL | 0x0200)
-#define ARM_CP_NZCV (ARM_CP_SPECIAL | 0x0300)
-#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | 0x0400)
-#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500)
-#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
-#define ARM_CP_FPU 0x1000
-#define ARM_CP_SVE 0x2000
-#define ARM_CP_NO_GDB 0x4000
-/* Used only as a terminator for ARMCPRegInfo lists */
-#define ARM_CP_SENTINEL 0xffff
-/* Mask of only the flag bits in a type field */
-#define ARM_CP_FLAG_MASK 0x70ff
-
-/* Valid values for ARMCPRegInfo state field, indicating which of
- * the AArch32 and AArch64 execution states this register is visible in.
- * If the reginfo doesn't explicitly specify then it is AArch32 only.
- * If the reginfo is declared to be visible in both states then a second
- * reginfo is synthesised for the AArch32 view of the AArch64 register,
- * such that the AArch32 view is the lower 32 bits of the AArch64 one.
- * Note that we rely on the values of these enums as we iterate through
- * the various states in some places.
- */
-enum {
- ARM_CP_STATE_AA32 = 0,
- ARM_CP_STATE_AA64 = 1,
- ARM_CP_STATE_BOTH = 2,
-};
-
-/* ARM CP register secure state flags. These flags identify security state
- * attributes for a given CP register entry.
- * The existence of both or neither secure and non-secure flags indicates that
- * the register has both a secure and non-secure hash entry. A single one of
- * these flags causes the register to only be hashed for the specified
- * security state.
- * Although definitions may have any combination of the S/NS bits, each
- * registered entry will only have one to identify whether the entry is secure
- * or non-secure.
- */
-enum {
- ARM_CP_SECSTATE_S = (1 << 0), /* bit[0]: Secure state register */
- ARM_CP_SECSTATE_NS = (1 << 1), /* bit[1]: Non-secure state register */
-};
-
-/* Return true if cptype is a valid type field. This is used to try to
- * catch errors where the sentinel has been accidentally left off the end
- * of a list of registers.
- */
-static inline bool cptype_valid(int cptype)
-{
- return ((cptype & ~ARM_CP_FLAG_MASK) == 0)
- || ((cptype & ARM_CP_SPECIAL) &&
- ((cptype & ~ARM_CP_FLAG_MASK) <= ARM_LAST_SPECIAL));
-}
-
-/* Access rights:
- * We define bits for Read and Write access for what rev C of the v7-AR ARM ARM
- * defines as PL0 (user), PL1 (fiq/irq/svc/abt/und/sys, ie privileged), and
- * PL2 (hyp). The other level which has Read and Write bits is Secure PL1
- * (ie any of the privileged modes in Secure state, or Monitor mode).
- * If a register is accessible in one privilege level it's always accessible
- * in higher privilege levels too. Since "Secure PL1" also follows this rule
- * (ie anything visible in PL2 is visible in S-PL1, some things are only
- * visible in S-PL1) but "Secure PL1" is a bit of a mouthful, we bend the
- * terminology a little and call this PL3.
- * In AArch64 things are somewhat simpler as the PLx bits line up exactly
- * with the ELx exception levels.
- *
- * If access permissions for a register are more complex than can be
- * described with these bits, then use a laxer set of restrictions, and
- * do the more restrictive/complex check inside a helper function.
- */
-#define PL3_R 0x80
-#define PL3_W 0x40
-#define PL2_R (0x20 | PL3_R)
-#define PL2_W (0x10 | PL3_W)
-#define PL1_R (0x08 | PL2_R)
-#define PL1_W (0x04 | PL2_W)
-#define PL0_R (0x02 | PL1_R)
-#define PL0_W (0x01 | PL1_W)
-
-#define PL3_RW (PL3_R | PL3_W)
-#define PL2_RW (PL2_R | PL2_W)
-#define PL1_RW (PL1_R | PL1_W)
-#define PL0_RW (PL0_R | PL0_W)
-
/* Return the highest implemented Exception Level */
static inline int arm_highest_el(CPUARMState *env)
{
@@ -2095,204 +2634,6 @@ static inline int arm_current_el(CPUARMState *env)
}
}
-typedef struct ARMCPRegInfo ARMCPRegInfo;
-
-typedef enum CPAccessResult {
- /* Access is permitted */
- CP_ACCESS_OK = 0,
- /* Access fails due to a configurable trap or enable which would
- * result in a categorized exception syndrome giving information about
- * the failing instruction (ie syndrome category 0x3, 0x4, 0x5, 0x6,
- * 0xc or 0x18). The exception is taken to the usual target EL (EL1 or
- * PL1 if in EL0, otherwise to the current EL).
- */
- CP_ACCESS_TRAP = 1,
- /* Access fails and results in an exception syndrome 0x0 ("uncategorized").
- * Note that this is not a catch-all case -- the set of cases which may
- * result in this failure is specifically defined by the architecture.
- */
- CP_ACCESS_TRAP_UNCATEGORIZED = 2,
- /* As CP_ACCESS_TRAP, but for traps directly to EL2 or EL3 */
- CP_ACCESS_TRAP_EL2 = 3,
- CP_ACCESS_TRAP_EL3 = 4,
- /* As CP_ACCESS_UNCATEGORIZED, but for traps directly to EL2 or EL3 */
- CP_ACCESS_TRAP_UNCATEGORIZED_EL2 = 5,
- CP_ACCESS_TRAP_UNCATEGORIZED_EL3 = 6,
- /* Access fails and results in an exception syndrome for an FP access,
- * trapped directly to EL2 or EL3
- */
- CP_ACCESS_TRAP_FP_EL2 = 7,
- CP_ACCESS_TRAP_FP_EL3 = 8,
-} CPAccessResult;
-
-/* Access functions for coprocessor registers. These cannot fail and
- * may not raise exceptions.
- */
-typedef uint64_t CPReadFn(CPUARMState *env, const ARMCPRegInfo *opaque);
-typedef void CPWriteFn(CPUARMState *env, const ARMCPRegInfo *opaque,
- uint64_t value);
-/* Access permission check functions for coprocessor registers. */
-typedef CPAccessResult CPAccessFn(CPUARMState *env,
- const ARMCPRegInfo *opaque,
- bool isread);
-/* Hook function for register reset */
-typedef void CPResetFn(CPUARMState *env, const ARMCPRegInfo *opaque);
-
-#define CP_ANY 0xff
-
-/* Definition of an ARM coprocessor register */
-struct ARMCPRegInfo {
- /* Name of register (useful mainly for debugging, need not be unique) */
- const char *name;
- /* Location of register: coprocessor number and (crn,crm,opc1,opc2)
- * tuple. Any of crm, opc1 and opc2 may be CP_ANY to indicate a
- * 'wildcard' field -- any value of that field in the MRC/MCR insn
- * will be decoded to this register. The register read and write
- * callbacks will be passed an ARMCPRegInfo with the crn/crm/opc1/opc2
- * used by the program, so it is possible to register a wildcard and
- * then behave differently on read/write if necessary.
- * For 64 bit registers, only crm and opc1 are relevant; crn and opc2
- * must both be zero.
- * For AArch64-visible registers, opc0 is also used.
- * Since there are no "coprocessors" in AArch64, cp is purely used as a
- * way to distinguish (for KVM's benefit) guest-visible system registers
- * from demuxed ones provided to preserve the "no side effects on
- * KVM register read/write from QEMU" semantics. cp==0x13 is guest
- * visible (to match KVM's encoding); cp==0 will be converted to
- * cp==0x13 when the ARMCPRegInfo is registered, for convenience.
- */
- uint8_t cp;
- uint8_t crn;
- uint8_t crm;
- uint8_t opc0;
- uint8_t opc1;
- uint8_t opc2;
- /* Execution state in which this register is visible: ARM_CP_STATE_* */
- int state;
- /* Register type: ARM_CP_* bits/values */
- int type;
- /* Access rights: PL*_[RW] */
- int access;
- /* Security state: ARM_CP_SECSTATE_* bits/values */
- int secure;
- /* The opaque pointer passed to define_arm_cp_regs_with_opaque() when
- * this register was defined: can be used to hand data through to the
- * register read/write functions, since they are passed the ARMCPRegInfo*.
- */
- void *opaque;
- /* Value of this register, if it is ARM_CP_CONST. Otherwise, if
- * fieldoffset is non-zero, the reset value of the register.
- */
- uint64_t resetvalue;
- /* Offset of the field in CPUARMState for this register.
- *
- * This is not needed if either:
- * 1. type is ARM_CP_CONST or one of the ARM_CP_SPECIALs
- * 2. both readfn and writefn are specified
- */
- ptrdiff_t fieldoffset; /* offsetof(CPUARMState, field) */
-
- /* Offsets of the secure and non-secure fields in CPUARMState for the
- * register if it is banked. These fields are only used during the static
- * registration of a register. During hashing the bank associated
- * with a given security state is copied to fieldoffset which is used from
- * there on out.
- *
- * It is expected that register definitions use either fieldoffset or
- * bank_fieldoffsets in the definition but not both. It is also expected
- * that both bank offsets are set when defining a banked register. This
- * use indicates that a register is banked.
- */
- ptrdiff_t bank_fieldoffsets[2];
-
- /* Function for making any access checks for this register in addition to
- * those specified by the 'access' permissions bits. If NULL, no extra
- * checks required. The access check is performed at runtime, not at
- * translate time.
- */
- CPAccessFn *accessfn;
- /* Function for handling reads of this register. If NULL, then reads
- * will be done by loading from the offset into CPUARMState specified
- * by fieldoffset.
- */
- CPReadFn *readfn;
- /* Function for handling writes of this register. If NULL, then writes
- * will be done by writing to the offset into CPUARMState specified
- * by fieldoffset.
- */
- CPWriteFn *writefn;
- /* Function for doing a "raw" read; used when we need to copy
- * coprocessor state to the kernel for KVM or out for
- * migration. This only needs to be provided if there is also a
- * readfn and it has side effects (for instance clear-on-read bits).
- */
- CPReadFn *raw_readfn;
- /* Function for doing a "raw" write; used when we need to copy KVM
- * kernel coprocessor state into userspace, or for inbound
- * migration. This only needs to be provided if there is also a
- * writefn and it masks out "unwritable" bits or has write-one-to-clear
- * or similar behaviour.
- */
- CPWriteFn *raw_writefn;
- /* Function for resetting the register. If NULL, then reset will be done
- * by writing resetvalue to the field specified in fieldoffset. If
- * fieldoffset is 0 then no reset will be done.
- */
- CPResetFn *resetfn;
-};
-
-/* Macros which are lvalues for the field in CPUARMState for the
- * ARMCPRegInfo *ri.
- */
-#define CPREG_FIELD32(env, ri) \
- (*(uint32_t *)((char *)(env) + (ri)->fieldoffset))
-#define CPREG_FIELD64(env, ri) \
- (*(uint64_t *)((char *)(env) + (ri)->fieldoffset))
-
-#define REGINFO_SENTINEL { .type = ARM_CP_SENTINEL }
-
-void define_arm_cp_regs_with_opaque(ARMCPU *cpu,
- const ARMCPRegInfo *regs, void *opaque);
-void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
- const ARMCPRegInfo *regs, void *opaque);
-static inline void define_arm_cp_regs(ARMCPU *cpu, const ARMCPRegInfo *regs)
-{
- define_arm_cp_regs_with_opaque(cpu, regs, 0);
-}
-static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
-{
- define_one_arm_cp_reg_with_opaque(cpu, regs, 0);
-}
-const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);
-
-/* CPWriteFn that can be used to implement writes-ignored behaviour */
-void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value);
-/* CPReadFn that can be used for read-as-zero behaviour */
-uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri);
-
-/* CPResetFn that does nothing, for use if no reset is required even
- * if fieldoffset is non zero.
- */
-void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque);
-
-/* Return true if this reginfo struct's field in the cpu state struct
- * is 64 bits wide.
- */
-static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri)
-{
- return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT);
-}
-
-static inline bool cp_access_ok(int current_el,
- const ARMCPRegInfo *ri, int isread)
-{
- return (ri->access >> ((current_el * 2) + isread)) & 1;
-}
-
-/* Raw read of a coprocessor register (as needed for migration, etc) */
-uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri);
-
/**
* write_list_to_cpustate
* @cpu: ARMCPU
@@ -2312,202 +2653,32 @@ bool write_list_to_cpustate(ARMCPU *cpu);
/**
* write_cpustate_to_list:
* @cpu: ARMCPU
+ * @kvm_sync: true if this is for syncing back to KVM
*
* For each register listed in the ARMCPU cpreg_indexes list, write
* its value from the ARMCPUState structure into the cpreg_values list.
* This is used to copy info from TCG's working data structures into
* KVM or for outbound migration.
*
+ * @kvm_sync is true if we are doing this in order to sync the
+ * register state back to KVM. In this case we will only update
+ * values in the list if the previous list->cpustate sync actually
+ * successfully wrote the CPU state. Otherwise we will keep the value
+ * that is in the list.
+ *
* Returns: true if all register values were read correctly,
* false if some register was unknown or could not be read.
* Note that we do not stop early on failure -- we will attempt
* reading all registers in the list.
*/
-bool write_cpustate_to_list(ARMCPU *cpu);
+bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
#define ARM_CPUID_TI915T 0x54029152
#define ARM_CPUID_TI925T 0x54029252
-#if defined(CONFIG_USER_ONLY)
-#define TARGET_PAGE_BITS 12
-#else
-/* ARMv7 and later CPUs have 4K pages minimum, but ARMv5 and v6
- * have to support 1K tiny pages.
- */
-#define TARGET_PAGE_BITS_VARY
-#define TARGET_PAGE_BITS_MIN 10
-#endif
-
-#if defined(TARGET_AARCH64)
-# define TARGET_PHYS_ADDR_SPACE_BITS 48
-# define TARGET_VIRT_ADDR_SPACE_BITS 64
-#else
-# define TARGET_PHYS_ADDR_SPACE_BITS 40
-# define TARGET_VIRT_ADDR_SPACE_BITS 32
-#endif
-
-/**
- * arm_hcr_el2_imo(): Return the effective value of HCR_EL2.IMO.
- * Depending on the values of HCR_EL2.E2H and TGE, this may be
- * "behaves as 1 for all purposes other than direct read/write" or
- * "behaves as 0 for all purposes other than direct read/write"
- */
-static inline bool arm_hcr_el2_imo(CPUARMState *env)
-{
- switch (env->cp15.hcr_el2 & (HCR_TGE | HCR_E2H)) {
- case HCR_TGE:
- return true;
- case HCR_TGE | HCR_E2H:
- return false;
- default:
- return env->cp15.hcr_el2 & HCR_IMO;
- }
-}
-
-/**
- * arm_hcr_el2_fmo(): Return the effective value of HCR_EL2.FMO.
- */
-static inline bool arm_hcr_el2_fmo(CPUARMState *env)
-{
- switch (env->cp15.hcr_el2 & (HCR_TGE | HCR_E2H)) {
- case HCR_TGE:
- return true;
- case HCR_TGE | HCR_E2H:
- return false;
- default:
- return env->cp15.hcr_el2 & HCR_FMO;
- }
-}
-
-/**
- * arm_hcr_el2_amo(): Return the effective value of HCR_EL2.AMO.
- */
-static inline bool arm_hcr_el2_amo(CPUARMState *env)
-{
- switch (env->cp15.hcr_el2 & (HCR_TGE | HCR_E2H)) {
- case HCR_TGE:
- return true;
- case HCR_TGE | HCR_E2H:
- return false;
- default:
- return env->cp15.hcr_el2 & HCR_AMO;
- }
-}
-
-static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
- unsigned int target_el)
-{
- CPUARMState *env = cs->env_ptr;
- unsigned int cur_el = arm_current_el(env);
- bool secure = arm_is_secure(env);
- bool pstate_unmasked;
- int8_t unmasked = 0;
-
- /* Don't take exceptions if they target a lower EL.
- * This check should catch any exceptions that would not be taken but left
- * pending.
- */
- if (cur_el > target_el) {
- return false;
- }
-
- switch (excp_idx) {
- case EXCP_FIQ:
- pstate_unmasked = !(env->daif & PSTATE_F);
- break;
-
- case EXCP_IRQ:
- pstate_unmasked = !(env->daif & PSTATE_I);
- break;
-
- case EXCP_VFIQ:
- if (secure || !arm_hcr_el2_fmo(env) || (env->cp15.hcr_el2 & HCR_TGE)) {
- /* VFIQs are only taken when hypervized and non-secure. */
- return false;
- }
- return !(env->daif & PSTATE_F);
- case EXCP_VIRQ:
- if (secure || !arm_hcr_el2_imo(env) || (env->cp15.hcr_el2 & HCR_TGE)) {
- /* VIRQs are only taken when hypervized and non-secure. */
- return false;
- }
- return !(env->daif & PSTATE_I);
- default:
- g_assert_not_reached();
- }
-
- /* Use the target EL, current execution state and SCR/HCR settings to
- * determine whether the corresponding CPSR bit is used to mask the
- * interrupt.
- */
- if ((target_el > cur_el) && (target_el != 1)) {
- /* Exceptions targeting a higher EL may not be maskable */
- if (arm_feature(env, ARM_FEATURE_AARCH64)) {
- /* 64-bit masking rules are simple: exceptions to EL3
- * can't be masked, and exceptions to EL2 can only be
- * masked from Secure state. The HCR and SCR settings
- * don't affect the masking logic, only the interrupt routing.
- */
- if (target_el == 3 || !secure) {
- unmasked = 1;
- }
- } else {
- /* The old 32-bit-only environment has a more complicated
- * masking setup. HCR and SCR bits not only affect interrupt
- * routing but also change the behaviour of masking.
- */
- bool hcr, scr;
-
- switch (excp_idx) {
- case EXCP_FIQ:
- /* If FIQs are routed to EL3 or EL2 then there are cases where
- * we override the CPSR.F in determining if the exception is
- * masked or not. If neither of these are set then we fall back
- * to the CPSR.F setting otherwise we further assess the state
- * below.
- */
- hcr = arm_hcr_el2_fmo(env);
- scr = (env->cp15.scr_el3 & SCR_FIQ);
-
- /* When EL3 is 32-bit, the SCR.FW bit controls whether the
- * CPSR.F bit masks FIQ interrupts when taken in non-secure
- * state. If SCR.FW is set then FIQs can be masked by CPSR.F
- * when non-secure but only when FIQs are only routed to EL3.
- */
- scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
- break;
- case EXCP_IRQ:
- /* When EL3 execution state is 32-bit, if HCR.IMO is set then
- * we may override the CPSR.I masking when in non-secure state.
- * The SCR.IRQ setting has already been taken into consideration
- * when setting the target EL, so it does not have a further
- * affect here.
- */
- hcr = arm_hcr_el2_imo(env);
- scr = false;
- break;
- default:
- g_assert_not_reached();
- }
-
- if ((scr || hcr) && !secure) {
- unmasked = 1;
- }
- }
- }
-
- /* The PSTATE bits only mask the interrupt if we have not overriden the
- * ability above.
- */
- return unmasked || pstate_unmasked;
-}
-
-#define ARM_CPU_TYPE_SUFFIX "-" TYPE_ARM_CPU
-#define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
#define CPU_RESOLVING_TYPE TYPE_ARM_CPU
-#define cpu_signal_handler cpu_arm_signal_handler
-#define cpu_list arm_cpu_list
+#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
/* ARM has the following "translation regimes" (as the ARM ARM calls them):
*
@@ -2515,18 +2686,21 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
* + NonSecure EL1 & 0 stage 1
* + NonSecure EL1 & 0 stage 2
* + NonSecure EL2
- * + Secure EL1 & EL0
+ * + NonSecure EL2 & 0 (ARMv8.1-VHE)
+ * + Secure EL1 & 0
* + Secure EL3
* If EL3 is 32-bit:
* + NonSecure PL1 & 0 stage 1
* + NonSecure PL1 & 0 stage 2
* + NonSecure PL2
- * + Secure PL0 & PL1
+ * + Secure PL0
+ * + Secure PL1
* (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.)
*
* For QEMU, an mmu_idx is not quite the same as a translation regime because:
- * 1. we need to split the "EL1 & 0" regimes into two mmu_idxes, because they
- * may differ in access permissions even if the VA->PA map is the same
+ * 1. we need to split the "EL1 & 0" and "EL2 & 0" regimes into two mmu_idxes,
+ * because they may differ in access permissions even if the VA->PA map is
+ * the same
* 2. we want to cache in our TLB the full VA->IPA->PA lookup for a stage 1+2
* translation, which means that we have one mmu_idx that deals with two
* concatenated translation regimes [this sort of combined s1+2 TLB is
@@ -2535,26 +2709,40 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
* handling via the TLB. The only way to do a stage 1 translation without
* the immediate stage 2 translation is via the ATS or AT system insns,
* which can be slow-pathed and always do a page table walk.
+ * The only use of stage 2 translations is either as part of an s1+2
+ * lookup or when loading the descriptors during a stage 1 page table walk,
+ * and in both those cases we don't use the TLB.
* 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3"
* translation regimes, because they map reasonably well to each other
* and they can't both be active at the same time.
- * This gives us the following list of mmu_idx values:
+ * 5. we want to be able to use the TLB for accesses done as part of a
+ * stage1 page table walk, rather than having to walk the stage2 page
+ * table over and over.
+ * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access
+ * Never (PAN) bit within PSTATE.
+ * 7. we fold together the secure and non-secure regimes for A-profile,
+ * because there are no banked system registers for aarch64, so the
+ * process of switching between secure and non-secure is
+ * already heavyweight.
*
- * NS EL0 (aka NS PL0) stage 1+2
- * NS EL1 (aka NS PL1) stage 1+2
- * NS EL2 (aka NS PL2)
- * S EL3 (aka S PL1)
- * S EL0 (aka S PL0)
- * S EL1 (not used if EL3 is 32 bit)
- * NS EL0+1 stage 2
+ * This gives us the following list of cases:
*
- * (The last of these is an mmu_idx because we want to be able to use the TLB
- * for the accesses done as part of a stage 1 page table walk, rather than
- * having to walk the stage 2 page table over and over.)
+ * EL0 EL1&0 stage 1+2 (aka NS PL0)
+ * EL1 EL1&0 stage 1+2 (aka NS PL1)
+ * EL1 EL1&0 stage 1+2 +PAN
+ * EL0 EL2&0
+ * EL2 EL2&0
+ * EL2 EL2&0 +PAN
+ * EL2 (aka NS PL2)
+ * EL3 (aka S PL1)
+ * Physical (NS & S)
+ * Stage2 (NS & S)
+ *
+ * for a total of 12 different mmu_idx.
*
* R profile CPUs have an MPU, but can use the same set of MMU indexes
- * as A profile. They only need to distinguish NS EL0 and NS EL1 (and
- * NS EL2 if we ever model a Cortex-R52).
+ * as A profile. They only need to distinguish EL0 and EL1 (and
+ * EL2 if we ever model a Cortex-R52).
*
* M profile CPUs are rather different as they do not have a true MMU.
* They have the following different MMU indexes:
@@ -2572,7 +2760,8 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
* are not quite the same -- different CPU types (most notably M profile
* vs A/R profile) would like to use MMU indexes with different semantics,
* but since we don't ever need to use all of those in a single CPU we
- * can avoid setting NB_MMU_MODES to more than 8. The lower bits of
+ * can avoid having to set NB_MMU_MODES to "total number of A profile MMU
+ * modes + total number of M profile MMU modes". The lower bits of
* ARMMMUIdx are the core TLB mmu index, and the higher bits are always
* the same for any particular CPU.
* Variables of type ARMMUIdx are always full values, and the core
@@ -2588,166 +2777,124 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
* For M profile we arrange them to have a bit for priv, a bit for negpri
* and a bit for secure.
*/
-#define ARM_MMU_IDX_A 0x10 /* A profile */
-#define ARM_MMU_IDX_NOTLB 0x20 /* does not have a TLB */
-#define ARM_MMU_IDX_M 0x40 /* M profile */
+#define ARM_MMU_IDX_A 0x10 /* A profile */
+#define ARM_MMU_IDX_NOTLB 0x20 /* does not have a TLB */
+#define ARM_MMU_IDX_M 0x40 /* M profile */
-/* meanings of the bits for M profile mmu idx values */
-#define ARM_MMU_IDX_M_PRIV 0x1
+/* Meanings of the bits for M profile mmu idx values */
+#define ARM_MMU_IDX_M_PRIV 0x1
#define ARM_MMU_IDX_M_NEGPRI 0x2
-#define ARM_MMU_IDX_M_S 0x4
+#define ARM_MMU_IDX_M_S 0x4 /* Secure */
-#define ARM_MMU_IDX_TYPE_MASK (~0x7)
-#define ARM_MMU_IDX_COREIDX_MASK 0x7
+#define ARM_MMU_IDX_TYPE_MASK \
+ (ARM_MMU_IDX_A | ARM_MMU_IDX_M | ARM_MMU_IDX_NOTLB)
+#define ARM_MMU_IDX_COREIDX_MASK 0xf
typedef enum ARMMMUIdx {
- ARMMMUIdx_S12NSE0 = 0 | ARM_MMU_IDX_A,
- ARMMMUIdx_S12NSE1 = 1 | ARM_MMU_IDX_A,
- ARMMMUIdx_S1E2 = 2 | ARM_MMU_IDX_A,
- ARMMMUIdx_S1E3 = 3 | ARM_MMU_IDX_A,
- ARMMMUIdx_S1SE0 = 4 | ARM_MMU_IDX_A,
- ARMMMUIdx_S1SE1 = 5 | ARM_MMU_IDX_A,
- ARMMMUIdx_S2NS = 6 | ARM_MMU_IDX_A,
- ARMMMUIdx_MUser = 0 | ARM_MMU_IDX_M,
- ARMMMUIdx_MPriv = 1 | ARM_MMU_IDX_M,
- ARMMMUIdx_MUserNegPri = 2 | ARM_MMU_IDX_M,
- ARMMMUIdx_MPrivNegPri = 3 | ARM_MMU_IDX_M,
- ARMMMUIdx_MSUser = 4 | ARM_MMU_IDX_M,
- ARMMMUIdx_MSPriv = 5 | ARM_MMU_IDX_M,
- ARMMMUIdx_MSUserNegPri = 6 | ARM_MMU_IDX_M,
- ARMMMUIdx_MSPrivNegPri = 7 | ARM_MMU_IDX_M,
- /* Indexes below here don't have TLBs and are used only for AT system
+ /*
+ * A-profile.
+ */
+ ARMMMUIdx_E10_0 = 0 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E20_0 = 1 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E10_1 = 2 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E20_2 = 3 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E10_1_PAN = 4 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A,
+ ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A,
+
+ /*
+ * Used for second stage of an S12 page table walk, or for descriptor
+ * loads during first stage of an S1 page table walk. Note that both
+ * are in use simultaneously for SecureEL2: the security state for
+ * the S2 ptw is selected by the NS bit from the S1 ptw.
+ */
+ ARMMMUIdx_Stage2_S = 8 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Stage2 = 9 | ARM_MMU_IDX_A,
+
+ /* TLBs with 1-1 mapping to the physical address spaces. */
+ ARMMMUIdx_Phys_S = 10 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_NS = 11 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_Root = 12 | ARM_MMU_IDX_A,
+ ARMMMUIdx_Phys_Realm = 13 | ARM_MMU_IDX_A,
+
+ /*
+ * These are not allocated TLBs and are used only for AT system
* instructions or for the first stage of an S12 page table walk.
*/
- ARMMMUIdx_S1NSE0 = 0 | ARM_MMU_IDX_NOTLB,
- ARMMMUIdx_S1NSE1 = 1 | ARM_MMU_IDX_NOTLB,
+ ARMMMUIdx_Stage1_E0 = 0 | ARM_MMU_IDX_NOTLB,
+ ARMMMUIdx_Stage1_E1 = 1 | ARM_MMU_IDX_NOTLB,
+ ARMMMUIdx_Stage1_E1_PAN = 2 | ARM_MMU_IDX_NOTLB,
+
+ /*
+ * M-profile.
+ */
+ ARMMMUIdx_MUser = ARM_MMU_IDX_M,
+ ARMMMUIdx_MPriv = ARM_MMU_IDX_M | ARM_MMU_IDX_M_PRIV,
+ ARMMMUIdx_MUserNegPri = ARMMMUIdx_MUser | ARM_MMU_IDX_M_NEGPRI,
+ ARMMMUIdx_MPrivNegPri = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_NEGPRI,
+ ARMMMUIdx_MSUser = ARMMMUIdx_MUser | ARM_MMU_IDX_M_S,
+ ARMMMUIdx_MSPriv = ARMMMUIdx_MPriv | ARM_MMU_IDX_M_S,
+ ARMMMUIdx_MSUserNegPri = ARMMMUIdx_MUserNegPri | ARM_MMU_IDX_M_S,
+ ARMMMUIdx_MSPrivNegPri = ARMMMUIdx_MPrivNegPri | ARM_MMU_IDX_M_S,
} ARMMMUIdx;
-/* Bit macros for the core-mmu-index values for each index,
+/*
+ * Bit macros for the core-mmu-index values for each index,
* for use when calling tlb_flush_by_mmuidx() and friends.
*/
+#define TO_CORE_BIT(NAME) \
+ ARMMMUIdxBit_##NAME = 1 << (ARMMMUIdx_##NAME & ARM_MMU_IDX_COREIDX_MASK)
+
typedef enum ARMMMUIdxBit {
- ARMMMUIdxBit_S12NSE0 = 1 << 0,
- ARMMMUIdxBit_S12NSE1 = 1 << 1,
- ARMMMUIdxBit_S1E2 = 1 << 2,
- ARMMMUIdxBit_S1E3 = 1 << 3,
- ARMMMUIdxBit_S1SE0 = 1 << 4,
- ARMMMUIdxBit_S1SE1 = 1 << 5,
- ARMMMUIdxBit_S2NS = 1 << 6,
- ARMMMUIdxBit_MUser = 1 << 0,
- ARMMMUIdxBit_MPriv = 1 << 1,
- ARMMMUIdxBit_MUserNegPri = 1 << 2,
- ARMMMUIdxBit_MPrivNegPri = 1 << 3,
- ARMMMUIdxBit_MSUser = 1 << 4,
- ARMMMUIdxBit_MSPriv = 1 << 5,
- ARMMMUIdxBit_MSUserNegPri = 1 << 6,
- ARMMMUIdxBit_MSPrivNegPri = 1 << 7,
+ TO_CORE_BIT(E10_0),
+ TO_CORE_BIT(E20_0),
+ TO_CORE_BIT(E10_1),
+ TO_CORE_BIT(E10_1_PAN),
+ TO_CORE_BIT(E2),
+ TO_CORE_BIT(E20_2),
+ TO_CORE_BIT(E20_2_PAN),
+ TO_CORE_BIT(E3),
+ TO_CORE_BIT(Stage2),
+ TO_CORE_BIT(Stage2_S),
+
+ TO_CORE_BIT(MUser),
+ TO_CORE_BIT(MPriv),
+ TO_CORE_BIT(MUserNegPri),
+ TO_CORE_BIT(MPrivNegPri),
+ TO_CORE_BIT(MSUser),
+ TO_CORE_BIT(MSPriv),
+ TO_CORE_BIT(MSUserNegPri),
+ TO_CORE_BIT(MSPrivNegPri),
} ARMMMUIdxBit;
-#define MMU_USER_IDX 0
+#undef TO_CORE_BIT
-static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx)
-{
- return mmu_idx & ARM_MMU_IDX_COREIDX_MASK;
-}
-
-static inline ARMMMUIdx core_to_arm_mmu_idx(CPUARMState *env, int mmu_idx)
-{
- if (arm_feature(env, ARM_FEATURE_M)) {
- return mmu_idx | ARM_MMU_IDX_M;
- } else {
- return mmu_idx | ARM_MMU_IDX_A;
- }
-}
-
-/* Return the exception level we're running at if this is our mmu_idx */
-static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
-{
- switch (mmu_idx & ARM_MMU_IDX_TYPE_MASK) {
- case ARM_MMU_IDX_A:
- return mmu_idx & 3;
- case ARM_MMU_IDX_M:
- return mmu_idx & ARM_MMU_IDX_M_PRIV;
- default:
- g_assert_not_reached();
- }
-}
-
-/* Return the MMU index for a v7M CPU in the specified security and
- * privilege state
- */
-static inline ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
- bool secstate,
- bool priv)
-{
- ARMMMUIdx mmu_idx = ARM_MMU_IDX_M;
-
- if (priv) {
- mmu_idx |= ARM_MMU_IDX_M_PRIV;
- }
-
- if (armv7m_nvic_neg_prio_requested(env->nvic, secstate)) {
- mmu_idx |= ARM_MMU_IDX_M_NEGPRI;
- }
-
- if (secstate) {
- mmu_idx |= ARM_MMU_IDX_M_S;
- }
-
- return mmu_idx;
-}
-
-/* Return the MMU index for a v7M CPU in the specified security state */
-static inline ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env,
- bool secstate)
-{
- bool priv = arm_current_el(env) != 0;
-
- return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
-}
-
-/* Determine the current mmu_idx to use for normal loads/stores */
-static inline int cpu_mmu_index(CPUARMState *env, bool ifetch)
-{
- int el = arm_current_el(env);
-
- if (arm_feature(env, ARM_FEATURE_M)) {
- ARMMMUIdx mmu_idx = arm_v7m_mmu_idx_for_secstate(env, env->v7m.secure);
-
- return arm_to_core_mmu_idx(mmu_idx);
- }
-
- if (el < 2 && arm_is_secure_below_el3(env)) {
- return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0 + el);
- }
- return el;
-}
+#define MMU_USER_IDX 0
/* Indexes used when registering address spaces with cpu_address_space_init */
typedef enum ARMASIdx {
ARMASIdx_NS = 0,
ARMASIdx_S = 1,
+ ARMASIdx_TagNS = 2,
+ ARMASIdx_TagS = 3,
} ARMASIdx;
-/* Return the Exception Level targeted by debug exceptions. */
-static inline int arm_debug_target_el(CPUARMState *env)
+static inline ARMMMUIdx arm_space_to_phys(ARMSecuritySpace space)
{
- bool secure = arm_is_secure(env);
- bool route_to_el2 = false;
+ /* Assert the relative order of the physical mmu indexes. */
+ QEMU_BUILD_BUG_ON(ARMSS_Secure != 0);
+ QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_NS != ARMMMUIdx_Phys_S + ARMSS_NonSecure);
+ QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_Root != ARMMMUIdx_Phys_S + ARMSS_Root);
+ QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_Realm != ARMMMUIdx_Phys_S + ARMSS_Realm);
- if (arm_feature(env, ARM_FEATURE_EL2) && !secure) {
- route_to_el2 = env->cp15.hcr_el2 & HCR_TGE ||
- env->cp15.mdcr_el2 & (1 << 8);
- }
+ return ARMMMUIdx_Phys_S + space;
+}
- if (route_to_el2) {
- return 2;
- } else if (arm_feature(env, ARM_FEATURE_EL3) &&
- !arm_el_is_aa64(env, 3) && secure) {
- return 3;
- } else {
- return 1;
- }
+static inline ARMSecuritySpace arm_phys_to_space(ARMMMUIdx idx)
+{
+ assert(idx >= ARMMMUIdx_Phys_S && idx <= ARMMMUIdx_Phys_Realm);
+ return idx - ARMMMUIdx_Phys_S;
}
static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu)
@@ -2758,98 +2905,6 @@ static inline bool arm_v7m_csselr_razwi(ARMCPU *cpu)
return (cpu->clidr & R_V7M_CLIDR_CTYPE_ALL_MASK) != 0;
}
-static inline bool aa64_generate_debug_exceptions(CPUARMState *env)
-{
- if (arm_is_secure(env)) {
- /* MDCR_EL3.SDD disables debug events from Secure state */
- if (extract32(env->cp15.mdcr_el3, 16, 1) != 0
- || arm_current_el(env) == 3) {
- return false;
- }
- }
-
- if (arm_current_el(env) == arm_debug_target_el(env)) {
- if ((extract32(env->cp15.mdscr_el1, 13, 1) == 0)
- || (env->daif & PSTATE_D)) {
- return false;
- }
- }
- return true;
-}
-
-static inline bool aa32_generate_debug_exceptions(CPUARMState *env)
-{
- int el = arm_current_el(env);
-
- if (el == 0 && arm_el_is_aa64(env, 1)) {
- return aa64_generate_debug_exceptions(env);
- }
-
- if (arm_is_secure(env)) {
- int spd;
-
- if (el == 0 && (env->cp15.sder & 1)) {
- /* SDER.SUIDEN means debug exceptions from Secure EL0
- * are always enabled. Otherwise they are controlled by
- * SDCR.SPD like those from other Secure ELs.
- */
- return true;
- }
-
- spd = extract32(env->cp15.mdcr_el3, 14, 2);
- switch (spd) {
- case 1:
- /* SPD == 0b01 is reserved, but behaves as 0b00. */
- case 0:
- /* For 0b00 we return true if external secure invasive debug
- * is enabled. On real hardware this is controlled by external
- * signals to the core. QEMU always permits debug, and behaves
- * as if DBGEN, SPIDEN, NIDEN and SPNIDEN are all tied high.
- */
- return true;
- case 2:
- return false;
- case 3:
- return true;
- }
- }
-
- return el != 2;
-}
-
-/* Return true if debugging exceptions are currently enabled.
- * This corresponds to what in ARM ARM pseudocode would be
- * if UsingAArch32() then
- * return AArch32.GenerateDebugExceptions()
- * else
- * return AArch64.GenerateDebugExceptions()
- * We choose to push the if() down into this function for clarity,
- * since the pseudocode has it at all callsites except for the one in
- * CheckSoftwareStep(), where it is elided because both branches would
- * always return the same value.
- *
- * Parts of the pseudocode relating to EL2 and EL3 are omitted because we
- * don't yet implement those exception levels or their associated trap bits.
- */
-static inline bool arm_generate_debug_exceptions(CPUARMState *env)
-{
- if (env->aarch64) {
- return aa64_generate_debug_exceptions(env);
- } else {
- return aa32_generate_debug_exceptions(env);
- }
-}
-
-/* Is single-stepping active? (Note that the "is EL_D AArch64?" check
- * implicitly means this always returns false in pre-v8 CPUs.)
- */
-static inline bool arm_singlestep_active(CPUARMState *env)
-{
- return extract32(env->cp15.mdscr_el1, 0, 1)
- && arm_el_is_aa64(env, arm_debug_target_el(env))
- && arm_generate_debug_exceptions(env);
-}
-
static inline bool arm_sctlr_b(CPUARMState *env)
{
return
@@ -2863,156 +2918,224 @@ static inline bool arm_sctlr_b(CPUARMState *env)
(env->cp15.sctlr_el[1] & SCTLR_B) != 0;
}
-/* Return true if the processor is in big-endian mode. */
-static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
-{
- int cur_el;
+uint64_t arm_sctlr(CPUARMState *env, int el);
- /* In 32bit endianness is determined by looking at CPSR's E bit */
- if (!is_a64(env)) {
- return
+static inline bool arm_cpu_data_is_big_endian_a32(CPUARMState *env,
+ bool sctlr_b)
+{
#ifdef CONFIG_USER_ONLY
- /* In system mode, BE32 is modelled in line with the
- * architecture (as word-invariant big-endianness), where loads
- * and stores are done little endian but from addresses which
- * are adjusted by XORing with the appropriate constant. So the
- * endianness to use for the raw data access is not affected by
- * SCTLR.B.
- * In user mode, however, we model BE32 as byte-invariant
- * big-endianness (because user-only code cannot tell the
- * difference), and so we need to use a data access endianness
- * that depends on SCTLR.B.
- */
- arm_sctlr_b(env) ||
-#endif
- ((env->uncached_cpsr & CPSR_E) ? 1 : 0);
+ /*
+ * In system mode, BE32 is modelled in line with the
+ * architecture (as word-invariant big-endianness), where loads
+ * and stores are done little endian but from addresses which
+ * are adjusted by XORing with the appropriate constant. So the
+ * endianness to use for the raw data access is not affected by
+ * SCTLR.B.
+ * In user mode, however, we model BE32 as byte-invariant
+ * big-endianness (because user-only code cannot tell the
+ * difference), and so we need to use a data access endianness
+ * that depends on SCTLR.B.
+ */
+ if (sctlr_b) {
+ return true;
}
+#endif
+ /* In 32bit endianness is determined by looking at CPSR's E bit */
+ return env->uncached_cpsr & CPSR_E;
+}
- cur_el = arm_current_el(env);
+static inline bool arm_cpu_data_is_big_endian_a64(int el, uint64_t sctlr)
+{
+ return sctlr & (el ? SCTLR_EE : SCTLR_E0E);
+}
- if (cur_el == 0) {
- return (env->cp15.sctlr_el[1] & SCTLR_E0E) != 0;
+/* Return true if the processor is in big-endian mode. */
+static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
+{
+ if (!is_a64(env)) {
+ return arm_cpu_data_is_big_endian_a32(env, arm_sctlr_b(env));
+ } else {
+ int cur_el = arm_current_el(env);
+ uint64_t sctlr = arm_sctlr(env, cur_el);
+ return arm_cpu_data_is_big_endian_a64(cur_el, sctlr);
}
-
- return (env->cp15.sctlr_el[cur_el] & SCTLR_EE) != 0;
}
#include "exec/cpu-all.h"
-/* Bit usage in the TB flags field: bit 31 indicates whether we are
- * in 32 or 64 bit mode. The meaning of the other bits depends on that.
- * We put flags which are shared between 32 and 64 bit mode at the top
- * of the word, and flags which apply to only one mode at the bottom.
+/*
+ * We have more than 32-bits worth of state per TB, so we split the data
+ * between tb->flags and tb->cs_base, which is otherwise unused for ARM.
+ * We collect these two parts in CPUARMTBFlags where they are named
+ * flags and flags2 respectively.
+ *
+ * The flags that are shared between all execution modes, TBFLAG_ANY,
+ * are stored in flags. The flags that are specific to a given mode
+ * are stores in flags2. Since cs_base is sized on the configured
+ * address size, flags2 always has 64-bits for A64, and a minimum of
+ * 32-bits for A32 and M32.
+ *
+ * The bits for 32-bit A-profile and M-profile partially overlap:
+ *
+ * 31 23 11 10 0
+ * +-------------+----------+----------------+
+ * | | | TBFLAG_A32 |
+ * | TBFLAG_AM32 | +-----+----------+
+ * | | |TBFLAG_M32|
+ * +-------------+----------------+----------+
+ * 31 23 6 5 0
+ *
+ * Unless otherwise noted, these bits are cached in env->hflags.
*/
-#define ARM_TBFLAG_AARCH64_STATE_SHIFT 31
-#define ARM_TBFLAG_AARCH64_STATE_MASK (1U << ARM_TBFLAG_AARCH64_STATE_SHIFT)
-#define ARM_TBFLAG_MMUIDX_SHIFT 28
-#define ARM_TBFLAG_MMUIDX_MASK (0x7 << ARM_TBFLAG_MMUIDX_SHIFT)
-#define ARM_TBFLAG_SS_ACTIVE_SHIFT 27
-#define ARM_TBFLAG_SS_ACTIVE_MASK (1 << ARM_TBFLAG_SS_ACTIVE_SHIFT)
-#define ARM_TBFLAG_PSTATE_SS_SHIFT 26
-#define ARM_TBFLAG_PSTATE_SS_MASK (1 << ARM_TBFLAG_PSTATE_SS_SHIFT)
+FIELD(TBFLAG_ANY, AARCH64_STATE, 0, 1)
+FIELD(TBFLAG_ANY, SS_ACTIVE, 1, 1)
+FIELD(TBFLAG_ANY, PSTATE__SS, 2, 1) /* Not cached. */
+FIELD(TBFLAG_ANY, BE_DATA, 3, 1)
+FIELD(TBFLAG_ANY, MMUIDX, 4, 4)
/* Target EL if we take a floating-point-disabled exception */
-#define ARM_TBFLAG_FPEXC_EL_SHIFT 24
-#define ARM_TBFLAG_FPEXC_EL_MASK (0x3 << ARM_TBFLAG_FPEXC_EL_SHIFT)
-
-/* Bit usage when in AArch32 state: */
-#define ARM_TBFLAG_THUMB_SHIFT 0
-#define ARM_TBFLAG_THUMB_MASK (1 << ARM_TBFLAG_THUMB_SHIFT)
-#define ARM_TBFLAG_VECLEN_SHIFT 1
-#define ARM_TBFLAG_VECLEN_MASK (0x7 << ARM_TBFLAG_VECLEN_SHIFT)
-#define ARM_TBFLAG_VECSTRIDE_SHIFT 4
-#define ARM_TBFLAG_VECSTRIDE_MASK (0x3 << ARM_TBFLAG_VECSTRIDE_SHIFT)
-#define ARM_TBFLAG_VFPEN_SHIFT 7
-#define ARM_TBFLAG_VFPEN_MASK (1 << ARM_TBFLAG_VFPEN_SHIFT)
-#define ARM_TBFLAG_CONDEXEC_SHIFT 8
-#define ARM_TBFLAG_CONDEXEC_MASK (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
-#define ARM_TBFLAG_SCTLR_B_SHIFT 16
-#define ARM_TBFLAG_SCTLR_B_MASK (1 << ARM_TBFLAG_SCTLR_B_SHIFT)
-/* We store the bottom two bits of the CPAR as TB flags and handle
- * checks on the other bits at runtime
+FIELD(TBFLAG_ANY, FPEXC_EL, 8, 2)
+/* Memory operations require alignment: SCTLR_ELx.A or CCR.UNALIGN_TRP */
+FIELD(TBFLAG_ANY, ALIGN_MEM, 10, 1)
+FIELD(TBFLAG_ANY, PSTATE__IL, 11, 1)
+FIELD(TBFLAG_ANY, FGT_ACTIVE, 12, 1)
+FIELD(TBFLAG_ANY, FGT_SVC, 13, 1)
+
+/*
+ * Bit usage when in AArch32 state, both A- and M-profile.
*/
-#define ARM_TBFLAG_XSCALE_CPAR_SHIFT 17
-#define ARM_TBFLAG_XSCALE_CPAR_MASK (3 << ARM_TBFLAG_XSCALE_CPAR_SHIFT)
-/* Indicates whether cp register reads and writes by guest code should access
+FIELD(TBFLAG_AM32, CONDEXEC, 24, 8) /* Not cached. */
+FIELD(TBFLAG_AM32, THUMB, 23, 1) /* Not cached. */
+
+/*
+ * Bit usage when in AArch32 state, for A-profile only.
+ */
+FIELD(TBFLAG_A32, VECLEN, 0, 3) /* Not cached. */
+FIELD(TBFLAG_A32, VECSTRIDE, 3, 2) /* Not cached. */
+/*
+ * We store the bottom two bits of the CPAR as TB flags and handle
+ * checks on the other bits at runtime. This shares the same bits as
+ * VECSTRIDE, which is OK as no XScale CPU has VFP.
+ * Not cached, because VECLEN+VECSTRIDE are not cached.
+ */
+FIELD(TBFLAG_A32, XSCALE_CPAR, 5, 2)
+FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Partially cached, minus FPEXC. */
+FIELD(TBFLAG_A32, SCTLR__B, 8, 1) /* Cannot overlap with SCTLR_B */
+FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
+/*
+ * Indicates whether cp register reads and writes by guest code should access
* the secure or nonsecure bank of banked registers; note that this is not
* the same thing as the current security state of the processor!
*/
-#define ARM_TBFLAG_NS_SHIFT 19
-#define ARM_TBFLAG_NS_MASK (1 << ARM_TBFLAG_NS_SHIFT)
-#define ARM_TBFLAG_BE_DATA_SHIFT 20
-#define ARM_TBFLAG_BE_DATA_MASK (1 << ARM_TBFLAG_BE_DATA_SHIFT)
-/* For M profile only, Handler (ie not Thread) mode */
-#define ARM_TBFLAG_HANDLER_SHIFT 21
-#define ARM_TBFLAG_HANDLER_MASK (1 << ARM_TBFLAG_HANDLER_SHIFT)
-/* For M profile only, whether we should generate stack-limit checks */
-#define ARM_TBFLAG_STACKCHECK_SHIFT 22
-#define ARM_TBFLAG_STACKCHECK_MASK (1 << ARM_TBFLAG_STACKCHECK_SHIFT)
-
-/* Bit usage when in AArch64 state */
-#define ARM_TBFLAG_TBI0_SHIFT 0 /* TBI0 for EL0/1 or TBI for EL2/3 */
-#define ARM_TBFLAG_TBI0_MASK (0x1ull << ARM_TBFLAG_TBI0_SHIFT)
-#define ARM_TBFLAG_TBI1_SHIFT 1 /* TBI1 for EL0/1 */
-#define ARM_TBFLAG_TBI1_MASK (0x1ull << ARM_TBFLAG_TBI1_SHIFT)
-#define ARM_TBFLAG_SVEEXC_EL_SHIFT 2
-#define ARM_TBFLAG_SVEEXC_EL_MASK (0x3 << ARM_TBFLAG_SVEEXC_EL_SHIFT)
-#define ARM_TBFLAG_ZCR_LEN_SHIFT 4
-#define ARM_TBFLAG_ZCR_LEN_MASK (0xf << ARM_TBFLAG_ZCR_LEN_SHIFT)
-
-/* some convenience accessor macros */
-#define ARM_TBFLAG_AARCH64_STATE(F) \
- (((F) & ARM_TBFLAG_AARCH64_STATE_MASK) >> ARM_TBFLAG_AARCH64_STATE_SHIFT)
-#define ARM_TBFLAG_MMUIDX(F) \
- (((F) & ARM_TBFLAG_MMUIDX_MASK) >> ARM_TBFLAG_MMUIDX_SHIFT)
-#define ARM_TBFLAG_SS_ACTIVE(F) \
- (((F) & ARM_TBFLAG_SS_ACTIVE_MASK) >> ARM_TBFLAG_SS_ACTIVE_SHIFT)
-#define ARM_TBFLAG_PSTATE_SS(F) \
- (((F) & ARM_TBFLAG_PSTATE_SS_MASK) >> ARM_TBFLAG_PSTATE_SS_SHIFT)
-#define ARM_TBFLAG_FPEXC_EL(F) \
- (((F) & ARM_TBFLAG_FPEXC_EL_MASK) >> ARM_TBFLAG_FPEXC_EL_SHIFT)
-#define ARM_TBFLAG_THUMB(F) \
- (((F) & ARM_TBFLAG_THUMB_MASK) >> ARM_TBFLAG_THUMB_SHIFT)
-#define ARM_TBFLAG_VECLEN(F) \
- (((F) & ARM_TBFLAG_VECLEN_MASK) >> ARM_TBFLAG_VECLEN_SHIFT)
-#define ARM_TBFLAG_VECSTRIDE(F) \
- (((F) & ARM_TBFLAG_VECSTRIDE_MASK) >> ARM_TBFLAG_VECSTRIDE_SHIFT)
-#define ARM_TBFLAG_VFPEN(F) \
- (((F) & ARM_TBFLAG_VFPEN_MASK) >> ARM_TBFLAG_VFPEN_SHIFT)
-#define ARM_TBFLAG_CONDEXEC(F) \
- (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
-#define ARM_TBFLAG_SCTLR_B(F) \
- (((F) & ARM_TBFLAG_SCTLR_B_MASK) >> ARM_TBFLAG_SCTLR_B_SHIFT)
-#define ARM_TBFLAG_XSCALE_CPAR(F) \
- (((F) & ARM_TBFLAG_XSCALE_CPAR_MASK) >> ARM_TBFLAG_XSCALE_CPAR_SHIFT)
-#define ARM_TBFLAG_NS(F) \
- (((F) & ARM_TBFLAG_NS_MASK) >> ARM_TBFLAG_NS_SHIFT)
-#define ARM_TBFLAG_BE_DATA(F) \
- (((F) & ARM_TBFLAG_BE_DATA_MASK) >> ARM_TBFLAG_BE_DATA_SHIFT)
-#define ARM_TBFLAG_HANDLER(F) \
- (((F) & ARM_TBFLAG_HANDLER_MASK) >> ARM_TBFLAG_HANDLER_SHIFT)
-#define ARM_TBFLAG_STACKCHECK(F) \
- (((F) & ARM_TBFLAG_STACKCHECK_MASK) >> ARM_TBFLAG_STACKCHECK_SHIFT)
-#define ARM_TBFLAG_TBI0(F) \
- (((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT)
-#define ARM_TBFLAG_TBI1(F) \
- (((F) & ARM_TBFLAG_TBI1_MASK) >> ARM_TBFLAG_TBI1_SHIFT)
-#define ARM_TBFLAG_SVEEXC_EL(F) \
- (((F) & ARM_TBFLAG_SVEEXC_EL_MASK) >> ARM_TBFLAG_SVEEXC_EL_SHIFT)
-#define ARM_TBFLAG_ZCR_LEN(F) \
- (((F) & ARM_TBFLAG_ZCR_LEN_MASK) >> ARM_TBFLAG_ZCR_LEN_SHIFT)
+FIELD(TBFLAG_A32, NS, 10, 1)
+/*
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
+ * This requires an SME trap from AArch32 mode when using NEON.
+ */
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
+
+/*
+ * Bit usage when in AArch32 state, for M-profile only.
+ */
+/* Handler (ie not Thread) mode */
+FIELD(TBFLAG_M32, HANDLER, 0, 1)
+/* Whether we should generate stack-limit checks */
+FIELD(TBFLAG_M32, STACKCHECK, 1, 1)
+/* Set if FPCCR.LSPACT is set */
+FIELD(TBFLAG_M32, LSPACT, 2, 1) /* Not cached. */
+/* Set if we must create a new FP context */
+FIELD(TBFLAG_M32, NEW_FP_CTXT_NEEDED, 3, 1) /* Not cached. */
+/* Set if FPCCR.S does not match current security state */
+FIELD(TBFLAG_M32, FPCCR_S_WRONG, 4, 1) /* Not cached. */
+/* Set if MVE insns are definitely not predicated by VPR or LTPSIZE */
+FIELD(TBFLAG_M32, MVE_NO_PRED, 5, 1) /* Not cached. */
+/* Set if in secure mode */
+FIELD(TBFLAG_M32, SECURE, 6, 1)
+
+/*
+ * Bit usage when in AArch64 state
+ */
+FIELD(TBFLAG_A64, TBII, 0, 2)
+FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2)
+/* The current vector length, either NVL or SVL. */
+FIELD(TBFLAG_A64, VL, 4, 4)
+FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
+FIELD(TBFLAG_A64, BT, 9, 1)
+FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */
+FIELD(TBFLAG_A64, TBID, 12, 2)
+FIELD(TBFLAG_A64, UNPRIV, 14, 1)
+FIELD(TBFLAG_A64, ATA, 15, 1)
+FIELD(TBFLAG_A64, TCMA, 16, 2)
+FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1)
+FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1)
+FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
+FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
+FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
+FIELD(TBFLAG_A64, SVL, 24, 4)
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
+FIELD(TBFLAG_A64, TRAP_ERET, 29, 1)
+FIELD(TBFLAG_A64, NAA, 30, 1)
+FIELD(TBFLAG_A64, ATA0, 31, 1)
+FIELD(TBFLAG_A64, NV, 32, 1)
+FIELD(TBFLAG_A64, NV1, 33, 1)
+FIELD(TBFLAG_A64, NV2, 34, 1)
+/* Set if FEAT_NV2 RAM accesses use the EL2&0 translation regime */
+FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
+/* Set if FEAT_NV2 RAM accesses are big-endian */
+FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
+
+/*
+ * Helpers for using the above. Note that only the A64 accessors use
+ * FIELD_DP64() and FIELD_EX64(), because in the other cases the flags
+ * word either is or might be 32 bits only.
+ */
+#define DP_TBFLAG_ANY(DST, WHICH, VAL) \
+ (DST.flags = FIELD_DP32(DST.flags, TBFLAG_ANY, WHICH, VAL))
+#define DP_TBFLAG_A64(DST, WHICH, VAL) \
+ (DST.flags2 = FIELD_DP64(DST.flags2, TBFLAG_A64, WHICH, VAL))
+#define DP_TBFLAG_A32(DST, WHICH, VAL) \
+ (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_A32, WHICH, VAL))
+#define DP_TBFLAG_M32(DST, WHICH, VAL) \
+ (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_M32, WHICH, VAL))
+#define DP_TBFLAG_AM32(DST, WHICH, VAL) \
+ (DST.flags2 = FIELD_DP32(DST.flags2, TBFLAG_AM32, WHICH, VAL))
+
+#define EX_TBFLAG_ANY(IN, WHICH) FIELD_EX32(IN.flags, TBFLAG_ANY, WHICH)
+#define EX_TBFLAG_A64(IN, WHICH) FIELD_EX64(IN.flags2, TBFLAG_A64, WHICH)
+#define EX_TBFLAG_A32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_A32, WHICH)
+#define EX_TBFLAG_M32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_M32, WHICH)
+#define EX_TBFLAG_AM32(IN, WHICH) FIELD_EX32(IN.flags2, TBFLAG_AM32, WHICH)
+
+/**
+ * sve_vq
+ * @env: the cpu context
+ *
+ * Return the VL cached within env->hflags, in units of quadwords.
+ */
+static inline int sve_vq(CPUARMState *env)
+{
+ return EX_TBFLAG_A64(env->hflags, VL) + 1;
+}
+
+/**
+ * sme_vq
+ * @env: the cpu context
+ *
+ * Return the SVL cached within env->hflags, in units of quadwords.
+ */
+static inline int sme_vq(CPUARMState *env)
+{
+ return EX_TBFLAG_A64(env->hflags, SVL) + 1;
+}
static inline bool bswap_code(bool sctlr_b)
{
#ifdef CONFIG_USER_ONLY
- /* BE8 (SCTLR.B = 0, TARGET_WORDS_BIGENDIAN = 1) is mixed endian.
- * The invalid combination SCTLR.B=1/CPSR.E=1/TARGET_WORDS_BIGENDIAN=0
+ /* BE8 (SCTLR.B = 0, TARGET_BIG_ENDIAN = 1) is mixed endian.
+ * The invalid combination SCTLR.B=1/CPSR.E=1/TARGET_BIG_ENDIAN=0
* would also end up as a mixed-endian mode with BE code, LE data.
*/
- return
-#ifdef TARGET_WORDS_BIGENDIAN
- 1 ^
-#endif
- sctlr_b;
+ return TARGET_BIG_ENDIAN ^ sctlr_b;
#else
/* All code access in ARM is little endian, and there are no loaders
* doing swaps that need to be reversed
@@ -3024,51 +3147,12 @@ static inline bool bswap_code(bool sctlr_b)
#ifdef CONFIG_USER_ONLY
static inline bool arm_cpu_bswap_data(CPUARMState *env)
{
- return
-#ifdef TARGET_WORDS_BIGENDIAN
- 1 ^
-#endif
- arm_cpu_data_is_big_endian(env);
-}
-#endif
-
-#ifndef CONFIG_USER_ONLY
-/**
- * arm_regime_tbi0:
- * @env: CPUARMState
- * @mmu_idx: MMU index indicating required translation regime
- *
- * Extracts the TBI0 value from the appropriate TCR for the current EL
- *
- * Returns: the TBI0 value.
- */
-uint32_t arm_regime_tbi0(CPUARMState *env, ARMMMUIdx mmu_idx);
-
-/**
- * arm_regime_tbi1:
- * @env: CPUARMState
- * @mmu_idx: MMU index indicating required translation regime
- *
- * Extracts the TBI1 value from the appropriate TCR for the current EL
- *
- * Returns: the TBI1 value.
- */
-uint32_t arm_regime_tbi1(CPUARMState *env, ARMMMUIdx mmu_idx);
-#else
-/* We can't handle tagged addresses properly in user-only mode */
-static inline uint32_t arm_regime_tbi0(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- return 0;
-}
-
-static inline uint32_t arm_regime_tbi1(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- return 0;
+ return TARGET_BIG_ENDIAN ^ arm_cpu_data_is_big_endian(env);
}
#endif
-void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
- target_ulong *cs_base, uint32_t *flags);
+void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc,
+ uint64_t *cs_base, uint32_t *flags);
enum {
QEMU_PSCI_CONDUIT_DISABLED = 0,
@@ -3119,6 +3203,12 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void
*opaque);
/**
+ * arm_rebuild_hflags:
+ * Rebuild the cached TBFLAGS for arbitrary changed processor state.
+ */
+void arm_rebuild_hflags(CPUARMState *env);
+
+/**
* aa32_vfp_dreg:
* Return a pointer to the Dn register within env in 32-bit mode.
*/
@@ -3146,6 +3236,49 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
}
/* Shared between translate-sve.c and sve_helper.c. */
-extern const uint64_t pred_esz_masks[4];
+extern const uint64_t pred_esz_masks[5];
+
+/*
+ * AArch64 usage of the PAGE_TARGET_* bits for linux-user.
+ * Note that with the Linux kernel, PROT_MTE may not be cleared by mprotect
+ * mprotect but PROT_BTI may be cleared. C.f. the kernel's VM_ARCH_CLEAR.
+ */
+#define PAGE_BTI PAGE_TARGET_1
+#define PAGE_MTE PAGE_TARGET_2
+#define PAGE_TARGET_STICKY PAGE_MTE
+
+/* We associate one allocation tag per 16 bytes, the minimum. */
+#define LOG2_TAG_GRANULE 4
+#define TAG_GRANULE (1 << LOG2_TAG_GRANULE)
+
+#ifdef CONFIG_USER_ONLY
+#define TARGET_PAGE_DATA_SIZE (TARGET_PAGE_SIZE >> (LOG2_TAG_GRANULE + 1))
+#endif
+
+#ifdef TARGET_TAGGED_ADDRESSES
+/**
+ * cpu_untagged_addr:
+ * @cs: CPU context
+ * @x: tagged address
+ *
+ * Remove any address tag from @x. This is explicitly related to the
+ * linux syscall TIF_TAGGED_ADDR setting, not TBI in general.
+ *
+ * There should be a better place to put this, but we need this in
+ * include/exec/cpu_ldst.h, and not some place linux-user specific.
+ */
+static inline target_ulong cpu_untagged_addr(CPUState *cs, target_ulong x)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ if (cpu->env.tagged_addr_enable) {
+ /*
+ * TBI is enabled for userspace but not kernelspace addresses.
+ * Only clear the tag if bit 55 is clear.
+ */
+ x &= sextract64(x, 0, 56);
+ }
+ return x;
+}
+#endif
#endif
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 44fdf0f6fa..985b1efe16 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -21,82 +21,575 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "cpu.h"
-#include "qemu-common.h"
-#if !defined(CONFIG_USER_ONLY)
-#include "hw/loader.h"
-#endif
-#include "hw/arm/arm.h"
-#include "sysemu/sysemu.h"
+#include "cpregs.h"
+#include "qemu/module.h"
#include "sysemu/kvm.h"
+#include "sysemu/hvf.h"
+#include "sysemu/qtest.h"
+#include "sysemu/tcg.h"
#include "kvm_arm.h"
+#include "hvf_arm.h"
#include "qapi/visitor.h"
+#include "hw/qdev-properties.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "cpregs.h"
+
+void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
+{
+ /*
+ * If any vector lengths are explicitly enabled with sve<N> properties,
+ * then all other lengths are implicitly disabled. If sve-max-vq is
+ * specified then it is the same as explicitly enabling all lengths
+ * up to and including the specified maximum, which means all larger
+ * lengths will be implicitly disabled. If no sve<N> properties
+ * are enabled and sve-max-vq is not specified, then all lengths not
+ * explicitly disabled will be enabled. Additionally, all power-of-two
+ * vector lengths less than the maximum enabled length will be
+ * automatically enabled and all vector lengths larger than the largest
+ * disabled power-of-two vector length will be automatically disabled.
+ * Errors are generated if the user provided input that interferes with
+ * any of the above. Finally, if SVE is not disabled, then at least one
+ * vector length must be enabled.
+ */
+ uint32_t vq_map = cpu->sve_vq.map;
+ uint32_t vq_init = cpu->sve_vq.init;
+ uint32_t vq_supported;
+ uint32_t vq_mask = 0;
+ uint32_t tmp, vq, max_vq = 0;
+
+ /*
+ * CPU models specify a set of supported vector lengths which are
+ * enabled by default. Attempting to enable any vector length not set
+ * in the supported bitmap results in an error. When KVM is enabled we
+ * fetch the supported bitmap from the host.
+ */
+ if (kvm_enabled()) {
+ if (kvm_arm_sve_supported()) {
+ cpu->sve_vq.supported = kvm_arm_sve_get_vls(cpu);
+ vq_supported = cpu->sve_vq.supported;
+ } else {
+ assert(!cpu_isar_feature(aa64_sve, cpu));
+ vq_supported = 0;
+ }
+ } else {
+ vq_supported = cpu->sve_vq.supported;
+ }
+
+ /*
+ * Process explicit sve<N> properties.
+ * From the properties, sve_vq_map<N> implies sve_vq_init<N>.
+ * Check first for any sve<N> enabled.
+ */
+ if (vq_map != 0) {
+ max_vq = 32 - clz32(vq_map);
+ vq_mask = MAKE_64BIT_MASK(0, max_vq);
+
+ if (cpu->sve_max_vq && max_vq > cpu->sve_max_vq) {
+ error_setg(errp, "cannot enable sve%d", max_vq * 128);
+ error_append_hint(errp, "sve%d is larger than the maximum vector "
+ "length, sve-max-vq=%d (%d bits)\n",
+ max_vq * 128, cpu->sve_max_vq,
+ cpu->sve_max_vq * 128);
+ return;
+ }
+
+ if (kvm_enabled()) {
+ /*
+ * For KVM we have to automatically enable all supported uninitialized
+ * lengths, even when the smaller lengths are not all powers-of-two.
+ */
+ vq_map |= vq_supported & ~vq_init & vq_mask;
+ } else {
+ /* Propagate enabled bits down through required powers-of-two. */
+ vq_map |= SVE_VQ_POW2_MAP & ~vq_init & vq_mask;
+ }
+ } else if (cpu->sve_max_vq == 0) {
+ /*
+ * No explicit bits enabled, and no implicit bits from sve-max-vq.
+ */
+ if (!cpu_isar_feature(aa64_sve, cpu)) {
+ /* SVE is disabled and so are all vector lengths. Good. */
+ return;
+ }
+
+ if (kvm_enabled()) {
+ /* Disabling a supported length disables all larger lengths. */
+ tmp = vq_init & vq_supported;
+ } else {
+ /* Disabling a power-of-two disables all larger lengths. */
+ tmp = vq_init & SVE_VQ_POW2_MAP;
+ }
+ vq = ctz32(tmp) + 1;
+
+ max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
+ vq_mask = max_vq > 0 ? MAKE_64BIT_MASK(0, max_vq) : 0;
+ vq_map = vq_supported & ~vq_init & vq_mask;
+
+ if (vq_map == 0) {
+ error_setg(errp, "cannot disable sve%d", vq * 128);
+ error_append_hint(errp, "Disabling sve%d results in all "
+ "vector lengths being disabled.\n",
+ vq * 128);
+ error_append_hint(errp, "With SVE enabled, at least one "
+ "vector length must be enabled.\n");
+ return;
+ }
+
+ max_vq = 32 - clz32(vq_map);
+ vq_mask = MAKE_64BIT_MASK(0, max_vq);
+ }
+
+ /*
+ * Process the sve-max-vq property.
+ * Note that we know from the above that no bit above
+ * sve-max-vq is currently set.
+ */
+ if (cpu->sve_max_vq != 0) {
+ max_vq = cpu->sve_max_vq;
+ vq_mask = MAKE_64BIT_MASK(0, max_vq);
+
+ if (vq_init & ~vq_map & (1 << (max_vq - 1))) {
+ error_setg(errp, "cannot disable sve%d", max_vq * 128);
+ error_append_hint(errp, "The maximum vector length must be "
+ "enabled, sve-max-vq=%d (%d bits)\n",
+ max_vq, max_vq * 128);
+ return;
+ }
+
+ /* Set all bits not explicitly set within sve-max-vq. */
+ vq_map |= ~vq_init & vq_mask;
+ }
+
+ /*
+ * We should know what max-vq is now. Also, as we're done
+ * manipulating sve-vq-map, we ensure any bits above max-vq
+ * are clear, just in case anybody looks.
+ */
+ assert(max_vq != 0);
+ assert(vq_mask != 0);
+ vq_map &= vq_mask;
+
+ /* Ensure the set of lengths matches what is supported. */
+ tmp = vq_map ^ (vq_supported & vq_mask);
+ if (tmp) {
+ vq = 32 - clz32(tmp);
+ if (vq_map & (1 << (vq - 1))) {
+ if (cpu->sve_max_vq) {
+ error_setg(errp, "cannot set sve-max-vq=%d", cpu->sve_max_vq);
+ error_append_hint(errp, "This CPU does not support "
+ "the vector length %d-bits.\n", vq * 128);
+ error_append_hint(errp, "It may not be possible to use "
+ "sve-max-vq with this CPU. Try "
+ "using only sve<N> properties.\n");
+ } else {
+ error_setg(errp, "cannot enable sve%d", vq * 128);
+ if (vq_supported) {
+ error_append_hint(errp, "This CPU does not support "
+ "the vector length %d-bits.\n", vq * 128);
+ } else {
+ error_append_hint(errp, "SVE not supported by KVM "
+ "on this host\n");
+ }
+ }
+ return;
+ } else {
+ if (kvm_enabled()) {
+ error_setg(errp, "cannot disable sve%d", vq * 128);
+ error_append_hint(errp, "The KVM host requires all "
+ "supported vector lengths smaller "
+ "than %d bits to also be enabled.\n",
+ max_vq * 128);
+ return;
+ } else {
+ /* Ensure all required powers-of-two are enabled. */
+ tmp = SVE_VQ_POW2_MAP & vq_mask & ~vq_map;
+ if (tmp) {
+ vq = 32 - clz32(tmp);
+ error_setg(errp, "cannot disable sve%d", vq * 128);
+ error_append_hint(errp, "sve%d is required as it "
+ "is a power-of-two length smaller "
+ "than the maximum, sve%d\n",
+ vq * 128, max_vq * 128);
+ return;
+ }
+ }
+ }
+ }
+
+ /*
+ * Now that we validated all our vector lengths, the only question
+ * left to answer is if we even want SVE at all.
+ */
+ if (!cpu_isar_feature(aa64_sve, cpu)) {
+ error_setg(errp, "cannot enable sve%d", max_vq * 128);
+ error_append_hint(errp, "SVE must be enabled to enable vector "
+ "lengths.\n");
+ error_append_hint(errp, "Add sve=on to the CPU property list.\n");
+ return;
+ }
+
+ /* From now on sve_max_vq is the actual maximum supported length. */
+ cpu->sve_max_vq = max_vq;
+ cpu->sve_vq.map = vq_map;
+}
+
+/*
+ * Note that cpu_arm_{get,set}_vq cannot use the simpler
+ * object_property_add_bool interface because they make use of the
+ * contents of "name" to determine which bit on which to operate.
+ */
+static void cpu_arm_get_vq(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ ARMVQMap *vq_map = opaque;
+ uint32_t vq = atoi(&name[3]) / 128;
+ bool sve = vq_map == &cpu->sve_vq;
+ bool value;
+
+ /* All vector lengths are disabled when feature is off. */
+ if (sve
+ ? !cpu_isar_feature(aa64_sve, cpu)
+ : !cpu_isar_feature(aa64_sme, cpu)) {
+ value = false;
+ } else {
+ value = extract32(vq_map->map, vq - 1, 1);
+ }
+ visit_type_bool(v, name, &value, errp);
+}
+
+static void cpu_arm_set_vq(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ ARMVQMap *vq_map = opaque;
+ uint32_t vq = atoi(&name[3]) / 128;
+ bool value;
+
+ if (!visit_type_bool(v, name, &value, errp)) {
+ return;
+ }
+
+ vq_map->map = deposit32(vq_map->map, vq - 1, 1, value);
+ vq_map->init |= 1 << (vq - 1);
+}
+
+static bool cpu_arm_get_sve(Object *obj, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ return cpu_isar_feature(aa64_sve, cpu);
+}
+
+static void cpu_arm_set_sve(Object *obj, bool value, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint64_t t;
+
+ if (value && kvm_enabled() && !kvm_arm_sve_supported()) {
+ error_setg(errp, "'sve' feature not supported by KVM on this host");
+ return;
+ }
-static inline void set_feature(CPUARMState *env, int feature)
+ t = cpu->isar.id_aa64pfr0;
+ t = FIELD_DP64(t, ID_AA64PFR0, SVE, value);
+ cpu->isar.id_aa64pfr0 = t;
+}
+
+void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp)
{
- env->features |= 1ULL << feature;
+ uint32_t vq_map = cpu->sme_vq.map;
+ uint32_t vq_init = cpu->sme_vq.init;
+ uint32_t vq_supported = cpu->sme_vq.supported;
+ uint32_t vq;
+
+ if (vq_map == 0) {
+ if (!cpu_isar_feature(aa64_sme, cpu)) {
+ cpu->isar.id_aa64smfr0 = 0;
+ return;
+ }
+
+ /* TODO: KVM will require limitations via SMCR_EL2. */
+ vq_map = vq_supported & ~vq_init;
+
+ if (vq_map == 0) {
+ vq = ctz32(vq_supported) + 1;
+ error_setg(errp, "cannot disable sme%d", vq * 128);
+ error_append_hint(errp, "All SME vector lengths are disabled.\n");
+ error_append_hint(errp, "With SME enabled, at least one "
+ "vector length must be enabled.\n");
+ return;
+ }
+ } else {
+ if (!cpu_isar_feature(aa64_sme, cpu)) {
+ vq = 32 - clz32(vq_map);
+ error_setg(errp, "cannot enable sme%d", vq * 128);
+ error_append_hint(errp, "SME must be enabled to enable "
+ "vector lengths.\n");
+ error_append_hint(errp, "Add sme=on to the CPU property list.\n");
+ return;
+ }
+ /* TODO: KVM will require limitations via SMCR_EL2. */
+ }
+
+ cpu->sme_vq.map = vq_map;
}
-static inline void unset_feature(CPUARMState *env, int feature)
+static bool cpu_arm_get_sme(Object *obj, Error **errp)
{
- env->features &= ~(1ULL << feature);
+ ARMCPU *cpu = ARM_CPU(obj);
+ return cpu_isar_feature(aa64_sme, cpu);
}
-#ifndef CONFIG_USER_ONLY
-static uint64_t a57_a53_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+static void cpu_arm_set_sme(Object *obj, bool value, Error **errp)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint64_t t;
- /* Number of cores is in [25:24]; otherwise we RAZ */
- return (cpu->core_count - 1) << 24;
+ t = cpu->isar.id_aa64pfr1;
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, value);
+ cpu->isar.id_aa64pfr1 = t;
+}
+
+static bool cpu_arm_get_sme_fa64(Object *obj, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ return cpu_isar_feature(aa64_sme, cpu) &&
+ cpu_isar_feature(aa64_sme_fa64, cpu);
+}
+
+static void cpu_arm_set_sme_fa64(Object *obj, bool value, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint64_t t;
+
+ t = cpu->isar.id_aa64smfr0;
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, value);
+ cpu->isar.id_aa64smfr0 = t;
+}
+
+#ifdef CONFIG_USER_ONLY
+/* Mirror linux /proc/sys/abi/{sve,sme}_default_vector_length. */
+static void cpu_arm_set_default_vec_len(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ uint32_t *ptr_default_vq = opaque;
+ int32_t default_len, default_vq, remainder;
+
+ if (!visit_type_int32(v, name, &default_len, errp)) {
+ return;
+ }
+
+ /* Undocumented, but the kernel allows -1 to indicate "maximum". */
+ if (default_len == -1) {
+ *ptr_default_vq = ARM_MAX_VQ;
+ return;
+ }
+
+ default_vq = default_len / 16;
+ remainder = default_len % 16;
+
+ /*
+ * Note that the 512 max comes from include/uapi/asm/sve_context.h
+ * and is the maximum architectural width of ZCR_ELx.LEN.
+ */
+ if (remainder || default_vq < 1 || default_vq > 512) {
+ ARMCPU *cpu = ARM_CPU(obj);
+ const char *which =
+ (ptr_default_vq == &cpu->sve_default_vq ? "sve" : "sme");
+
+ error_setg(errp, "cannot set %s-default-vector-length", which);
+ if (remainder) {
+ error_append_hint(errp, "Vector length not a multiple of 16\n");
+ } else if (default_vq < 1) {
+ error_append_hint(errp, "Vector length smaller than 16\n");
+ } else {
+ error_append_hint(errp, "Vector length larger than %d\n",
+ 512 * 16);
+ }
+ return;
+ }
+
+ *ptr_default_vq = default_vq;
+}
+
+static void cpu_arm_get_default_vec_len(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ uint32_t *ptr_default_vq = opaque;
+ int32_t value = *ptr_default_vq * 16;
+
+ visit_type_int32(v, name, &value, errp);
}
#endif
-static const ARMCPRegInfo cortex_a72_a57_a53_cp_reginfo[] = {
-#ifndef CONFIG_USER_ONLY
- { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2,
- .access = PL1_RW, .readfn = a57_a53_l2ctlr_read,
- .writefn = arm_cp_write_ignore },
- { .name = "L2CTLR",
- .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 2,
- .access = PL1_RW, .readfn = a57_a53_l2ctlr_read,
- .writefn = arm_cp_write_ignore },
+void aarch64_add_sve_properties(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint32_t vq;
+
+ object_property_add_bool(obj, "sve", cpu_arm_get_sve, cpu_arm_set_sve);
+
+ for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
+ char name[8];
+ sprintf(name, "sve%d", vq * 128);
+ object_property_add(obj, name, "bool", cpu_arm_get_vq,
+ cpu_arm_set_vq, NULL, &cpu->sve_vq);
+ }
+
+#ifdef CONFIG_USER_ONLY
+ /* Mirror linux /proc/sys/abi/sve_default_vector_length. */
+ object_property_add(obj, "sve-default-vector-length", "int32",
+ cpu_arm_get_default_vec_len,
+ cpu_arm_set_default_vec_len, NULL,
+ &cpu->sve_default_vq);
#endif
- { .name = "L2ECTLR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 3,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "L2ECTLR",
- .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 3,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "L2ACTLR", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 0, .opc2 = 0,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 0,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CPUACTLR",
- .cp = 15, .opc1 = 0, .crm = 15,
- .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
- { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 1,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CPUECTLR",
- .cp = 15, .opc1 = 1, .crm = 15,
- .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
- { .name = "CPUMERRSR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 2,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CPUMERRSR",
- .cp = 15, .opc1 = 2, .crm = 15,
- .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
- { .name = "L2MERRSR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 3,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "L2MERRSR",
- .cp = 15, .opc1 = 3, .crm = 15,
- .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
- REGINFO_SENTINEL
-};
+}
+
+void aarch64_add_sme_properties(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint32_t vq;
+
+ object_property_add_bool(obj, "sme", cpu_arm_get_sme, cpu_arm_set_sme);
+ object_property_add_bool(obj, "sme_fa64", cpu_arm_get_sme_fa64,
+ cpu_arm_set_sme_fa64);
+
+ for (vq = 1; vq <= ARM_MAX_VQ; vq <<= 1) {
+ char name[8];
+ sprintf(name, "sme%d", vq * 128);
+ object_property_add(obj, name, "bool", cpu_arm_get_vq,
+ cpu_arm_set_vq, NULL, &cpu->sme_vq);
+ }
+
+#ifdef CONFIG_USER_ONLY
+ /* Mirror linux /proc/sys/abi/sme_default_vector_length. */
+ object_property_add(obj, "sme-default-vector-length", "int32",
+ cpu_arm_get_default_vec_len,
+ cpu_arm_set_default_vec_len, NULL,
+ &cpu->sme_default_vq);
+#endif
+}
+
+void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
+{
+ ARMPauthFeature features = cpu_isar_feature(pauth_feature, cpu);
+ uint64_t isar1, isar2;
+
+ /*
+ * These properties enable or disable Pauth as a whole, or change
+ * the pauth algorithm, but do not change the set of features that
+ * are present. We have saved a copy of those features above and
+ * will now place it into the field that chooses the algorithm.
+ *
+ * Begin by disabling all fields.
+ */
+ isar1 = cpu->isar.id_aa64isar1;
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, 0);
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 0);
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, 0);
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 0);
+
+ isar2 = cpu->isar.id_aa64isar2;
+ isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0);
+ isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0);
+
+ if (kvm_enabled() || hvf_enabled()) {
+ /*
+ * Exit early if PAuth is enabled and fall through to disable it.
+ * The algorithm selection properties are not present.
+ */
+ if (cpu->prop_pauth) {
+ if (features == 0) {
+ error_setg(errp, "'pauth' feature not supported by "
+ "%s on this host", current_accel_name());
+ }
+ return;
+ }
+ } else {
+ /* Pauth properties are only present when the model supports it. */
+ if (features == 0) {
+ assert(!cpu->prop_pauth);
+ return;
+ }
+
+ if (cpu->prop_pauth) {
+ if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) {
+ error_setg(errp,
+ "cannot enable both pauth-impdef and pauth-qarma3");
+ return;
+ }
+
+ if (cpu->prop_pauth_impdef) {
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features);
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1);
+ } else if (cpu->prop_pauth_qarma3) {
+ isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features);
+ isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1);
+ } else {
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features);
+ isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1);
+ }
+ } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) {
+ error_setg(errp, "cannot enable pauth-impdef or "
+ "pauth-qarma3 without pauth");
+ error_append_hint(errp, "Add pauth=on to the CPU property list.\n");
+ }
+ }
+
+ cpu->isar.id_aa64isar1 = isar1;
+ cpu->isar.id_aa64isar2 = isar2;
+}
+
+static Property arm_cpu_pauth_property =
+ DEFINE_PROP_BOOL("pauth", ARMCPU, prop_pauth, true);
+static Property arm_cpu_pauth_impdef_property =
+ DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false);
+static Property arm_cpu_pauth_qarma3_property =
+ DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false);
+
+void aarch64_add_pauth_properties(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ /* Default to PAUTH on, with the architected algorithm on TCG. */
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_property);
+ if (kvm_enabled() || hvf_enabled()) {
+ /*
+ * Mirror PAuth support from the probed sysregs back into the
+ * property for KVM or hvf. Is it just a bit backward? Yes it is!
+ * Note that prop_pauth is true whether the host CPU supports the
+ * architected QARMA5 algorithm or the IMPDEF one. We don't
+ * provide the separate pauth-impdef property for KVM or hvf,
+ * only for TCG.
+ */
+ cpu->prop_pauth = cpu_isar_feature(aa64_pauth, cpu);
+ } else {
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property);
+ }
+}
+
+void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp)
+{
+ uint64_t t;
+
+ /*
+ * We only install the property for tcg -cpu max; this is the
+ * only situation in which the cpu field can be true.
+ */
+ if (!cpu->prop_lpa2) {
+ return;
+ }
+
+ t = cpu->isar.id_aa64mmfr0;
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 2); /* 16k pages w/ LPA2 */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4, 1); /* 4k pages w/ LPA2 */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 3); /* 16k stage2 w/ LPA2 */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 3); /* 4k stage2 w/ LPA2 */
+ cpu->isar.id_aa64mmfr0 = t;
+}
static void aarch64_a57_initfn(Object *obj)
{
@@ -104,16 +597,10 @@ static void aarch64_a57_initfn(Object *obj)
cpu->dtb_compatible = "arm,cortex-a57";
set_feature(&cpu->env, ARM_FEATURE_V8);
- set_feature(&cpu->env, ARM_FEATURE_VFP4);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
- set_feature(&cpu->env, ARM_FEATURE_V8_AES);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
- set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
- set_feature(&cpu->env, ARM_FEATURE_CRC);
set_feature(&cpu->env, ARM_FEATURE_EL2);
set_feature(&cpu->env, ARM_FEATURE_EL3);
set_feature(&cpu->env, ARM_FEATURE_PMU);
@@ -121,33 +608,34 @@ static void aarch64_a57_initfn(Object *obj)
cpu->midr = 0x411fd070;
cpu->revidr = 0x00000000;
cpu->reset_fpsid = 0x41034070;
- cpu->mvfr0 = 0x10110222;
- cpu->mvfr1 = 0x12111111;
- cpu->mvfr2 = 0x00000043;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x12111111;
+ cpu->isar.mvfr2 = 0x00000043;
cpu->ctr = 0x8444c004;
cpu->reset_sctlr = 0x00c50838;
- cpu->id_pfr0 = 0x00000131;
- cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x03010066;
+ cpu->isar.id_pfr0 = 0x00000131;
+ cpu->isar.id_pfr1 = 0x00011011;
+ cpu->isar.id_dfr0 = 0x03010066;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10101105;
- cpu->id_mmfr1 = 0x40000000;
- cpu->id_mmfr2 = 0x01260000;
- cpu->id_mmfr3 = 0x02102211;
- cpu->id_isar0 = 0x02101110;
- cpu->id_isar1 = 0x13112111;
- cpu->id_isar2 = 0x21232042;
- cpu->id_isar3 = 0x01112131;
- cpu->id_isar4 = 0x00011142;
- cpu->id_isar5 = 0x00011121;
- cpu->id_isar6 = 0;
- cpu->id_aa64pfr0 = 0x00002222;
- cpu->id_aa64dfr0 = 0x10305106;
- cpu->pmceid0 = 0x00000000;
- cpu->pmceid1 = 0x00000000;
- cpu->id_aa64isar0 = 0x00011120;
- cpu->id_aa64mmfr0 = 0x00001124;
- cpu->dbgdidr = 0x3516d000;
+ cpu->isar.id_mmfr0 = 0x10101105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02102211;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00011142;
+ cpu->isar.id_isar5 = 0x00011121;
+ cpu->isar.id_isar6 = 0;
+ cpu->isar.id_aa64pfr0 = 0x00002222;
+ cpu->isar.id_aa64dfr0 = 0x10305106;
+ cpu->isar.id_aa64isar0 = 0x00011120;
+ cpu->isar.id_aa64mmfr0 = 0x00001124;
+ cpu->isar.dbgdidr = 0x3516d000;
+ cpu->isar.dbgdevid = 0x01110f13;
+ cpu->isar.dbgdevid1 = 0x2;
+ cpu->isar.reset_pmcr_el0 = 0x41013000;
cpu->clidr = 0x0a200023;
cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
@@ -156,7 +644,8 @@ static void aarch64_a57_initfn(Object *obj)
cpu->gic_num_lrs = 4;
cpu->gic_vpribits = 5;
cpu->gic_vprebits = 5;
- define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
+ cpu->gic_pribits = 5;
+ define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
static void aarch64_a53_initfn(Object *obj)
@@ -165,48 +654,45 @@ static void aarch64_a53_initfn(Object *obj)
cpu->dtb_compatible = "arm,cortex-a53";
set_feature(&cpu->env, ARM_FEATURE_V8);
- set_feature(&cpu->env, ARM_FEATURE_VFP4);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
- set_feature(&cpu->env, ARM_FEATURE_V8_AES);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
- set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
- set_feature(&cpu->env, ARM_FEATURE_CRC);
set_feature(&cpu->env, ARM_FEATURE_EL2);
set_feature(&cpu->env, ARM_FEATURE_EL3);
set_feature(&cpu->env, ARM_FEATURE_PMU);
cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A53;
cpu->midr = 0x410fd034;
- cpu->revidr = 0x00000000;
+ cpu->revidr = 0x00000100;
cpu->reset_fpsid = 0x41034070;
- cpu->mvfr0 = 0x10110222;
- cpu->mvfr1 = 0x12111111;
- cpu->mvfr2 = 0x00000043;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x12111111;
+ cpu->isar.mvfr2 = 0x00000043;
cpu->ctr = 0x84448004; /* L1Ip = VIPT */
cpu->reset_sctlr = 0x00c50838;
- cpu->id_pfr0 = 0x00000131;
- cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x03010066;
+ cpu->isar.id_pfr0 = 0x00000131;
+ cpu->isar.id_pfr1 = 0x00011011;
+ cpu->isar.id_dfr0 = 0x03010066;
cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10101105;
- cpu->id_mmfr1 = 0x40000000;
- cpu->id_mmfr2 = 0x01260000;
- cpu->id_mmfr3 = 0x02102211;
- cpu->id_isar0 = 0x02101110;
- cpu->id_isar1 = 0x13112111;
- cpu->id_isar2 = 0x21232042;
- cpu->id_isar3 = 0x01112131;
- cpu->id_isar4 = 0x00011142;
- cpu->id_isar5 = 0x00011121;
- cpu->id_isar6 = 0;
- cpu->id_aa64pfr0 = 0x00002222;
- cpu->id_aa64dfr0 = 0x10305106;
- cpu->id_aa64isar0 = 0x00011120;
- cpu->id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */
- cpu->dbgdidr = 0x3516d000;
+ cpu->isar.id_mmfr0 = 0x10101105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02102211;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00011142;
+ cpu->isar.id_isar5 = 0x00011121;
+ cpu->isar.id_isar6 = 0;
+ cpu->isar.id_aa64pfr0 = 0x00002222;
+ cpu->isar.id_aa64dfr0 = 0x10305106;
+ cpu->isar.id_aa64isar0 = 0x00011120;
+ cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */
+ cpu->isar.dbgdidr = 0x3516d000;
+ cpu->isar.dbgdevid = 0x00110f13;
+ cpu->isar.dbgdevid1 = 0x1;
+ cpu->isar.reset_pmcr_el0 = 0x41033000;
cpu->clidr = 0x0a200023;
cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
@@ -215,146 +701,53 @@ static void aarch64_a53_initfn(Object *obj)
cpu->gic_num_lrs = 4;
cpu->gic_vpribits = 5;
cpu->gic_vprebits = 5;
- define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
-}
-
-static void aarch64_a72_initfn(Object *obj)
-{
- ARMCPU *cpu = ARM_CPU(obj);
-
- cpu->dtb_compatible = "arm,cortex-a72";
- set_feature(&cpu->env, ARM_FEATURE_V8);
- set_feature(&cpu->env, ARM_FEATURE_VFP4);
- set_feature(&cpu->env, ARM_FEATURE_NEON);
- set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
- set_feature(&cpu->env, ARM_FEATURE_AARCH64);
- set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
- set_feature(&cpu->env, ARM_FEATURE_V8_AES);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
- set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
- set_feature(&cpu->env, ARM_FEATURE_CRC);
- set_feature(&cpu->env, ARM_FEATURE_EL2);
- set_feature(&cpu->env, ARM_FEATURE_EL3);
- set_feature(&cpu->env, ARM_FEATURE_PMU);
- cpu->midr = 0x410fd083;
- cpu->revidr = 0x00000000;
- cpu->reset_fpsid = 0x41034080;
- cpu->mvfr0 = 0x10110222;
- cpu->mvfr1 = 0x12111111;
- cpu->mvfr2 = 0x00000043;
- cpu->ctr = 0x8444c004;
- cpu->reset_sctlr = 0x00c50838;
- cpu->id_pfr0 = 0x00000131;
- cpu->id_pfr1 = 0x00011011;
- cpu->id_dfr0 = 0x03010066;
- cpu->id_afr0 = 0x00000000;
- cpu->id_mmfr0 = 0x10201105;
- cpu->id_mmfr1 = 0x40000000;
- cpu->id_mmfr2 = 0x01260000;
- cpu->id_mmfr3 = 0x02102211;
- cpu->id_isar0 = 0x02101110;
- cpu->id_isar1 = 0x13112111;
- cpu->id_isar2 = 0x21232042;
- cpu->id_isar3 = 0x01112131;
- cpu->id_isar4 = 0x00011142;
- cpu->id_isar5 = 0x00011121;
- cpu->id_aa64pfr0 = 0x00002222;
- cpu->id_aa64dfr0 = 0x10305106;
- cpu->pmceid0 = 0x00000000;
- cpu->pmceid1 = 0x00000000;
- cpu->id_aa64isar0 = 0x00011120;
- cpu->id_aa64mmfr0 = 0x00001124;
- cpu->dbgdidr = 0x3516d000;
- cpu->clidr = 0x0a200023;
- cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
- cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
- cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */
- cpu->dcz_blocksize = 4; /* 64 bytes */
- cpu->gic_num_lrs = 4;
- cpu->gic_vpribits = 5;
- cpu->gic_vprebits = 5;
- define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
+ cpu->gic_pribits = 5;
+ define_cortex_a72_a57_a53_cp_reginfo(cpu);
}
-static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
+static void aarch64_host_initfn(Object *obj)
{
+#if defined(CONFIG_KVM)
ARMCPU *cpu = ARM_CPU(obj);
- visit_type_uint32(v, name, &cpu->sve_max_vq, errp);
-}
-
-static void cpu_max_set_sve_vq(Object *obj, Visitor *v, const char *name,
- void *opaque, Error **errp)
-{
- ARMCPU *cpu = ARM_CPU(obj);
- Error *err = NULL;
-
- visit_type_uint32(v, name, &cpu->sve_max_vq, &err);
-
- if (!err && (cpu->sve_max_vq == 0 || cpu->sve_max_vq > ARM_MAX_VQ)) {
- error_setg(&err, "unsupported SVE vector length");
- error_append_hint(&err, "Valid sve-max-vq in range [1-%d]\n",
- ARM_MAX_VQ);
+ kvm_arm_set_cpu_features_from_host(cpu);
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ aarch64_add_sve_properties(obj);
+ aarch64_add_pauth_properties(obj);
}
- error_propagate(errp, err);
+#elif defined(CONFIG_HVF)
+ ARMCPU *cpu = ARM_CPU(obj);
+ hvf_arm_set_cpu_features_from_host(cpu);
+ aarch64_add_pauth_properties(obj);
+#else
+ g_assert_not_reached();
+#endif
}
-/* -cpu max: if KVM is enabled, like -cpu host (best possible with this host);
- * otherwise, a CPU with as many features enabled as our emulation supports.
- * The version of '-cpu max' for qemu-system-arm is defined in cpu.c;
- * this only needs to handle 64 bits.
- */
static void aarch64_max_initfn(Object *obj)
{
- ARMCPU *cpu = ARM_CPU(obj);
+ if (kvm_enabled() || hvf_enabled()) {
+ /* With KVM or HVF, '-cpu max' is identical to '-cpu host' */
+ aarch64_host_initfn(obj);
+ return;
+ }
- if (kvm_enabled()) {
- kvm_arm_set_cpu_features_from_host(cpu);
- } else {
+ if (tcg_enabled() || qtest_enabled()) {
aarch64_a57_initfn(obj);
-#ifdef CONFIG_USER_ONLY
- /* We don't set these in system emulation mode for the moment,
- * since we don't correctly set the ID registers to advertise them,
- * and in some cases they're only available in AArch64 and not AArch32,
- * whereas the architecture requires them to be present in both if
- * present in either.
- */
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA512);
- set_feature(&cpu->env, ARM_FEATURE_V8_SHA3);
- set_feature(&cpu->env, ARM_FEATURE_V8_SM3);
- set_feature(&cpu->env, ARM_FEATURE_V8_SM4);
- set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
- set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
- set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD);
- set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
- set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
- set_feature(&cpu->env, ARM_FEATURE_SVE);
- /* For usermode -cpu max we can use a larger and more efficient DCZ
- * blocksize since we don't have to follow what the hardware does.
- */
- cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */
- cpu->dcz_blocksize = 7; /* 512 bytes */
-#endif
+ }
- cpu->sve_max_vq = ARM_MAX_VQ;
- object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_vq,
- cpu_max_set_sve_vq, NULL, NULL, &error_fatal);
+ /* '-cpu max' for TCG: we currently do this as "A57 with extra things" */
+ if (tcg_enabled()) {
+ aarch64_max_tcg_initfn(obj);
}
}
-typedef struct ARMCPUInfo {
- const char *name;
- void (*initfn)(Object *obj);
- void (*class_init)(ObjectClass *oc, void *data);
-} ARMCPUInfo;
-
static const ARMCPUInfo aarch64_cpus[] = {
{ .name = "cortex-a57", .initfn = aarch64_a57_initfn },
{ .name = "cortex-a53", .initfn = aarch64_a53_initfn },
- { .name = "cortex-a72", .initfn = aarch64_a72_initfn },
{ .name = "max", .initfn = aarch64_max_initfn },
- { .name = NULL }
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
+ { .name = "host", .initfn = aarch64_host_initfn },
+#endif
};
static bool aarch64_cpu_get_aarch64(Object *obj, Error **errp)
@@ -372,73 +765,66 @@ static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp)
* restriction allows us to avoid fixing up functionality that assumes a
* uniform execution state like do_interrupt.
*/
- if (!kvm_enabled()) {
- error_setg(errp, "'aarch64' feature cannot be disabled "
- "unless KVM is enabled");
- return;
- }
-
if (value == false) {
+ if (!kvm_enabled() || !kvm_arm_aarch32_supported()) {
+ error_setg(errp, "'aarch64' feature cannot be disabled "
+ "unless KVM is enabled and 32-bit EL1 "
+ "is supported");
+ return;
+ }
unset_feature(&cpu->env, ARM_FEATURE_AARCH64);
} else {
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
}
}
-static void aarch64_cpu_initfn(Object *obj)
-{
- object_property_add_bool(obj, "aarch64", aarch64_cpu_get_aarch64,
- aarch64_cpu_set_aarch64, NULL);
- object_property_set_description(obj, "aarch64",
- "Set on/off to enable/disable aarch64 "
- "execution state ",
- NULL);
-}
-
static void aarch64_cpu_finalizefn(Object *obj)
{
}
-static void aarch64_cpu_set_pc(CPUState *cs, vaddr value)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- /* It's OK to look at env for the current mode here, because it's
- * never possible for an AArch64 TB to chain to an AArch32 TB.
- * (Otherwise we would need to use synchronize_from_tb instead.)
- */
- if (is_a64(&cpu->env)) {
- cpu->env.pc = value;
- } else {
- cpu->env.regs[15] = value;
- }
-}
-
-static gchar *aarch64_gdb_arch_name(CPUState *cs)
+static const gchar *aarch64_gdb_arch_name(CPUState *cs)
{
- return g_strdup("aarch64");
+ return "aarch64";
}
static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
{
CPUClass *cc = CPU_CLASS(oc);
- cc->cpu_exec_interrupt = arm_cpu_exec_interrupt;
- cc->set_pc = aarch64_cpu_set_pc;
cc->gdb_read_register = aarch64_cpu_gdb_read_register;
cc->gdb_write_register = aarch64_cpu_gdb_write_register;
- cc->gdb_num_core_regs = 34;
cc->gdb_core_xml_file = "aarch64-core.xml";
cc->gdb_arch_name = aarch64_gdb_arch_name;
+
+ object_class_property_add_bool(oc, "aarch64", aarch64_cpu_get_aarch64,
+ aarch64_cpu_set_aarch64);
+ object_class_property_set_description(oc, "aarch64",
+ "Set on/off to enable/disable aarch64 "
+ "execution state ");
+}
+
+static void aarch64_cpu_instance_init(Object *obj)
+{
+ ARMCPUClass *acc = ARM_CPU_GET_CLASS(obj);
+
+ acc->info->initfn(obj);
+ arm_cpu_post_init(obj);
+}
+
+static void cpu_register_class_init(ObjectClass *oc, void *data)
+{
+ ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+
+ acc->info = data;
}
-static void aarch64_cpu_register(const ARMCPUInfo *info)
+void aarch64_cpu_register(const ARMCPUInfo *info)
{
TypeInfo type_info = {
.parent = TYPE_AARCH64_CPU,
- .instance_size = sizeof(ARMCPU),
- .instance_init = info->initfn,
- .class_size = sizeof(ARMCPUClass),
- .class_init = info->class_init,
+ .instance_init = aarch64_cpu_instance_init,
+ .class_init = info->class_init ?: cpu_register_class_init,
+ .class_data = (void *)info,
};
type_info.name = g_strdup_printf("%s-" TYPE_ARM_CPU, info->name);
@@ -449,23 +835,19 @@ static void aarch64_cpu_register(const ARMCPUInfo *info)
static const TypeInfo aarch64_cpu_type_info = {
.name = TYPE_AARCH64_CPU,
.parent = TYPE_ARM_CPU,
- .instance_size = sizeof(ARMCPU),
- .instance_init = aarch64_cpu_initfn,
.instance_finalize = aarch64_cpu_finalizefn,
.abstract = true,
- .class_size = sizeof(AArch64CPUClass),
.class_init = aarch64_cpu_class_init,
};
static void aarch64_cpu_register_types(void)
{
- const ARMCPUInfo *info = aarch64_cpus;
+ size_t i;
type_register_static(&aarch64_cpu_type_info);
- while (info->name) {
- aarch64_cpu_register(info);
- info++;
+ for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) {
+ aarch64_cpu_register(&aarch64_cpus[i]);
}
}
diff --git a/target/arm/crypto_helper.c b/target/arm/crypto_helper.c
deleted file mode 100644
index f800266727..0000000000
--- a/target/arm/crypto_helper.c
+++ /dev/null
@@ -1,695 +0,0 @@
-/*
- * crypto_helper.c - emulate v8 Crypto Extensions instructions
- *
- * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "crypto/aes.h"
-
-union CRYPTO_STATE {
- uint8_t bytes[16];
- uint32_t words[4];
- uint64_t l[2];
-};
-
-#ifdef HOST_WORDS_BIGENDIAN
-#define CR_ST_BYTE(state, i) (state.bytes[(15 - (i)) ^ 8])
-#define CR_ST_WORD(state, i) (state.words[(3 - (i)) ^ 2])
-#else
-#define CR_ST_BYTE(state, i) (state.bytes[i])
-#define CR_ST_WORD(state, i) (state.words[i])
-#endif
-
-void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
-{
- static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
- static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
- uint64_t *rd = vd;
- uint64_t *rm = vm;
- union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
- union CRYPTO_STATE st = { .l = { rd[0], rd[1] } };
- int i;
-
- assert(decrypt < 2);
-
- /* xor state vector with round key */
- rk.l[0] ^= st.l[0];
- rk.l[1] ^= st.l[1];
-
- /* combine ShiftRows operation and sbox substitution */
- for (i = 0; i < 16; i++) {
- CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
- }
-
- rd[0] = st.l[0];
- rd[1] = st.l[1];
-}
-
-void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
-{
- static uint32_t const mc[][256] = { {
- /* MixColumns lookup table */
- 0x00000000, 0x03010102, 0x06020204, 0x05030306,
- 0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
- 0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
- 0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
- 0x30101020, 0x33111122, 0x36121224, 0x35131326,
- 0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
- 0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
- 0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
- 0x60202040, 0x63212142, 0x66222244, 0x65232346,
- 0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
- 0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
- 0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
- 0x50303060, 0x53313162, 0x56323264, 0x55333366,
- 0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
- 0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
- 0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
- 0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
- 0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
- 0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
- 0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
- 0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
- 0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
- 0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
- 0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
- 0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
- 0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
- 0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
- 0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
- 0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
- 0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
- 0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
- 0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
- 0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
- 0x97848413, 0x94858511, 0x91868617, 0x92878715,
- 0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
- 0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
- 0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
- 0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
- 0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
- 0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
- 0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
- 0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
- 0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
- 0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
- 0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
- 0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
- 0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
- 0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
- 0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
- 0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
- 0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
- 0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
- 0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
- 0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
- 0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
- 0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
- 0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
- 0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
- 0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
- 0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
- 0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
- 0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
- 0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
- 0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
- }, {
- /* Inverse MixColumns lookup table */
- 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
- 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
- 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
- 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
- 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
- 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
- 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
- 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
- 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
- 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
- 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
- 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
- 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
- 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
- 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
- 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
- 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
- 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
- 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
- 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
- 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
- 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
- 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
- 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
- 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
- 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
- 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
- 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
- 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
- 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
- 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
- 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
- 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
- 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
- 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
- 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
- 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
- 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
- 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
- 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
- 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
- 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
- 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
- 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
- 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
- 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
- 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
- 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
- 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
- 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
- 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
- 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
- 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
- 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
- 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
- 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
- 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
- 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
- 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
- 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
- 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
- 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
- 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
- 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
- } };
-
- uint64_t *rd = vd;
- uint64_t *rm = vm;
- union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
- int i;
-
- assert(decrypt < 2);
-
- for (i = 0; i < 16; i += 4) {
- CR_ST_WORD(st, i >> 2) =
- mc[decrypt][CR_ST_BYTE(st, i)] ^
- rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
- rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
- rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
- }
-
- rd[0] = st.l[0];
- rd[1] = st.l[1];
-}
-
-/*
- * SHA-1 logical functions
- */
-
-static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
-{
- return (x & (y ^ z)) ^ z;
-}
-
-static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
-{
- return x ^ y ^ z;
-}
-
-static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
-{
- return (x & y) | ((x | y) & z);
-}
-
-void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
-
- if (op == 3) { /* sha1su0 */
- d.l[0] ^= d.l[1] ^ m.l[0];
- d.l[1] ^= n.l[0] ^ m.l[1];
- } else {
- int i;
-
- for (i = 0; i < 4; i++) {
- uint32_t t;
-
- switch (op) {
- case 0: /* sha1c */
- t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- case 1: /* sha1p */
- t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- case 2: /* sha1m */
- t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
- break;
- default:
- g_assert_not_reached();
- }
- t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
- + CR_ST_WORD(m, i);
-
- CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
- CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
- CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
- CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
- CR_ST_WORD(d, 0) = t;
- }
- }
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-void HELPER(crypto_sha1h)(void *vd, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rm = vm;
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
-
- CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
- CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
-
- rd[0] = m.l[0];
- rd[1] = m.l[1];
-}
-
-void HELPER(crypto_sha1su1)(void *vd, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
-
- CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
- CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
- CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
- CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-/*
- * The SHA-256 logical functions, according to
- * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
- */
-
-static uint32_t S0(uint32_t x)
-{
- return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
-}
-
-static uint32_t S1(uint32_t x)
-{
- return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
-}
-
-static uint32_t s0(uint32_t x)
-{
- return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
-}
-
-static uint32_t s1(uint32_t x)
-{
- return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
-}
-
-void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
- int i;
-
- for (i = 0; i < 4; i++) {
- uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
- + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
- + CR_ST_WORD(m, i);
-
- CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
- CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
- CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
- CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
-
- t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
- + S0(CR_ST_WORD(d, 0));
-
- CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
- CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
- CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
- CR_ST_WORD(d, 0) = t;
- }
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
- int i;
-
- for (i = 0; i < 4; i++) {
- uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
- + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
- + CR_ST_WORD(m, i);
-
- CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
- CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
- CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
- CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
- }
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-void HELPER(crypto_sha256su0)(void *vd, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
-
- CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
- CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
- CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
- CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
-
- CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
- CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
- CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
- CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-/*
- * The SHA-512 logical functions (same as above but using 64-bit operands)
- */
-
-static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
-{
- return (x & (y ^ z)) ^ z;
-}
-
-static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
-{
- return (x & y) | ((x | y) & z);
-}
-
-static uint64_t S0_512(uint64_t x)
-{
- return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
-}
-
-static uint64_t S1_512(uint64_t x)
-{
- return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
-}
-
-static uint64_t s0_512(uint64_t x)
-{
- return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
-}
-
-static uint64_t s1_512(uint64_t x)
-{
- return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
-}
-
-void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- uint64_t d0 = rd[0];
- uint64_t d1 = rd[1];
-
- d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
- d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
-
- rd[0] = d0;
- rd[1] = d1;
-}
-
-void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- uint64_t d0 = rd[0];
- uint64_t d1 = rd[1];
-
- d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
- d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
-
- rd[0] = d0;
- rd[1] = d1;
-}
-
-void HELPER(crypto_sha512su0)(void *vd, void *vn)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t d0 = rd[0];
- uint64_t d1 = rd[1];
-
- d0 += s0_512(rd[1]);
- d1 += s0_512(rn[0]);
-
- rd[0] = d0;
- rd[1] = d1;
-}
-
-void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
-
- rd[0] += s1_512(rn[0]) + rm[0];
- rd[1] += s1_512(rn[1]) + rm[1];
-}
-
-void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
- uint32_t t;
-
- t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
- CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
-
- t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
- CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
-
- t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
- CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
-
- t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
- CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
- uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
-
- CR_ST_WORD(d, 0) ^= t;
- CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
- CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
- CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
- ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
- uint32_t opcode)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
- uint32_t t;
-
- assert(imm2 < 4);
-
- if (opcode == 0 || opcode == 2) {
- /* SM3TT1A, SM3TT2A */
- t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
- } else if (opcode == 1) {
- /* SM3TT1B */
- t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
- } else if (opcode == 3) {
- /* SM3TT2B */
- t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
- } else {
- g_assert_not_reached();
- }
-
- t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
-
- CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
-
- if (opcode < 2) {
- /* SM3TT1A, SM3TT1B */
- t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
-
- CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
- } else {
- /* SM3TT2A, SM3TT2B */
- t += CR_ST_WORD(n, 3);
- t ^= rol32(t, 9) ^ rol32(t, 17);
-
- CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
- }
-
- CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
- CR_ST_WORD(d, 3) = t;
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-static uint8_t const sm4_sbox[] = {
- 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
- 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
- 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
- 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
- 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
- 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
- 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
- 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
- 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
- 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
- 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
- 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
- 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
- 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
- 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
- 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
- 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
- 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
- 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
- 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
- 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
- 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
- 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
- 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
- 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
- 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
- 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
- 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
- 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
- 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
- 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
- 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
-};
-
-void HELPER(crypto_sm4e)(void *vd, void *vn)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- uint32_t t, i;
-
- for (i = 0; i < 4; i++) {
- t = CR_ST_WORD(d, (i + 1) % 4) ^
- CR_ST_WORD(d, (i + 2) % 4) ^
- CR_ST_WORD(d, (i + 3) % 4) ^
- CR_ST_WORD(n, i);
-
- t = sm4_sbox[t & 0xff] |
- sm4_sbox[(t >> 8) & 0xff] << 8 |
- sm4_sbox[(t >> 16) & 0xff] << 16 |
- sm4_sbox[(t >> 24) & 0xff] << 24;
-
- CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
- rol32(t, 24);
- }
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
-
-void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
-{
- uint64_t *rd = vd;
- uint64_t *rn = vn;
- uint64_t *rm = vm;
- union CRYPTO_STATE d;
- union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
- union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
- uint32_t t, i;
-
- d = n;
- for (i = 0; i < 4; i++) {
- t = CR_ST_WORD(d, (i + 1) % 4) ^
- CR_ST_WORD(d, (i + 2) % 4) ^
- CR_ST_WORD(d, (i + 3) % 4) ^
- CR_ST_WORD(m, i);
-
- t = sm4_sbox[t & 0xff] |
- sm4_sbox[(t >> 8) & 0xff] << 8 |
- sm4_sbox[(t >> 16) & 0xff] << 16 |
- sm4_sbox[(t >> 24) & 0xff] << 24;
-
- CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
- }
-
- rd[0] = d.l[0];
- rd[1] = d.l[1];
-}
diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c
new file mode 100644
index 0000000000..7d856acddf
--- /dev/null
+++ b/target/arm/debug_helper.c
@@ -0,0 +1,1280 @@
+/*
+ * ARM debug helpers.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "cpregs.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "sysemu/tcg.h"
+
+#ifdef CONFIG_TCG
+/* Return the Exception Level targeted by debug exceptions. */
+static int arm_debug_target_el(CPUARMState *env)
+{
+ bool secure = arm_is_secure(env);
+ bool route_to_el2 = false;
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return 1;
+ }
+
+ if (arm_is_el2_enabled(env)) {
+ route_to_el2 = env->cp15.hcr_el2 & HCR_TGE ||
+ env->cp15.mdcr_el2 & MDCR_TDE;
+ }
+
+ if (route_to_el2) {
+ return 2;
+ } else if (arm_feature(env, ARM_FEATURE_EL3) &&
+ !arm_el_is_aa64(env, 3) && secure) {
+ return 3;
+ } else {
+ return 1;
+ }
+}
+
+/*
+ * Raise an exception to the debug target el.
+ * Modify syndrome to indicate when origin and target EL are the same.
+ */
+G_NORETURN static void
+raise_exception_debug(CPUARMState *env, uint32_t excp, uint32_t syndrome)
+{
+ int debug_el = arm_debug_target_el(env);
+ int cur_el = arm_current_el(env);
+
+ /*
+ * If singlestep is targeting a lower EL than the current one, then
+ * DisasContext.ss_active must be false and we can never get here.
+ * Similarly for watchpoint and breakpoint matches.
+ */
+ assert(debug_el >= cur_el);
+ syndrome |= (debug_el == cur_el) << ARM_EL_EC_SHIFT;
+ raise_exception(env, excp, syndrome, debug_el);
+}
+
+/* See AArch64.GenerateDebugExceptionsFrom() in ARM ARM pseudocode */
+static bool aa64_generate_debug_exceptions(CPUARMState *env)
+{
+ int cur_el = arm_current_el(env);
+ int debug_el;
+
+ if (cur_el == 3) {
+ return false;
+ }
+
+ /* MDCR_EL3.SDD disables debug events from Secure state */
+ if (arm_is_secure_below_el3(env)
+ && extract32(env->cp15.mdcr_el3, 16, 1)) {
+ return false;
+ }
+
+ /*
+ * Same EL to same EL debug exceptions need MDSCR_KDE enabled
+ * while not masking the (D)ebug bit in DAIF.
+ */
+ debug_el = arm_debug_target_el(env);
+
+ if (cur_el == debug_el) {
+ return extract32(env->cp15.mdscr_el1, 13, 1)
+ && !(env->daif & PSTATE_D);
+ }
+
+ /* Otherwise the debug target needs to be a higher EL */
+ return debug_el > cur_el;
+}
+
+static bool aa32_generate_debug_exceptions(CPUARMState *env)
+{
+ int el = arm_current_el(env);
+
+ if (el == 0 && arm_el_is_aa64(env, 1)) {
+ return aa64_generate_debug_exceptions(env);
+ }
+
+ if (arm_is_secure(env)) {
+ int spd;
+
+ if (el == 0 && (env->cp15.sder & 1)) {
+ /*
+ * SDER.SUIDEN means debug exceptions from Secure EL0
+ * are always enabled. Otherwise they are controlled by
+ * SDCR.SPD like those from other Secure ELs.
+ */
+ return true;
+ }
+
+ spd = extract32(env->cp15.mdcr_el3, 14, 2);
+ switch (spd) {
+ case 1:
+ /* SPD == 0b01 is reserved, but behaves as 0b00. */
+ case 0:
+ /*
+ * For 0b00 we return true if external secure invasive debug
+ * is enabled. On real hardware this is controlled by external
+ * signals to the core. QEMU always permits debug, and behaves
+ * as if DBGEN, SPIDEN, NIDEN and SPNIDEN are all tied high.
+ */
+ return true;
+ case 2:
+ return false;
+ case 3:
+ return true;
+ }
+ }
+
+ return el != 2;
+}
+
+/*
+ * Return true if debugging exceptions are currently enabled.
+ * This corresponds to what in ARM ARM pseudocode would be
+ * if UsingAArch32() then
+ * return AArch32.GenerateDebugExceptions()
+ * else
+ * return AArch64.GenerateDebugExceptions()
+ * We choose to push the if() down into this function for clarity,
+ * since the pseudocode has it at all callsites except for the one in
+ * CheckSoftwareStep(), where it is elided because both branches would
+ * always return the same value.
+ */
+bool arm_generate_debug_exceptions(CPUARMState *env)
+{
+ if ((env->cp15.oslsr_el1 & 1) || (env->cp15.osdlr_el1 & 1)) {
+ return false;
+ }
+ if (is_a64(env)) {
+ return aa64_generate_debug_exceptions(env);
+ } else {
+ return aa32_generate_debug_exceptions(env);
+ }
+}
+
+/*
+ * Is single-stepping active? (Note that the "is EL_D AArch64?" check
+ * implicitly means this always returns false in pre-v8 CPUs.)
+ */
+bool arm_singlestep_active(CPUARMState *env)
+{
+ return extract32(env->cp15.mdscr_el1, 0, 1)
+ && arm_el_is_aa64(env, arm_debug_target_el(env))
+ && arm_generate_debug_exceptions(env);
+}
+
+/* Return true if the linked breakpoint entry lbn passes its checks */
+static bool linked_bp_matches(ARMCPU *cpu, int lbn)
+{
+ CPUARMState *env = &cpu->env;
+ uint64_t bcr = env->cp15.dbgbcr[lbn];
+ int brps = arm_num_brps(cpu);
+ int ctx_cmps = arm_num_ctx_cmps(cpu);
+ int bt;
+ uint32_t contextidr;
+ uint64_t hcr_el2;
+
+ /*
+ * Links to unimplemented or non-context aware breakpoints are
+ * CONSTRAINED UNPREDICTABLE: either behave as if disabled, or
+ * as if linked to an UNKNOWN context-aware breakpoint (in which
+ * case DBGWCR<n>_EL1.LBN must indicate that breakpoint).
+ * We choose the former.
+ */
+ if (lbn >= brps || lbn < (brps - ctx_cmps)) {
+ return false;
+ }
+
+ bcr = env->cp15.dbgbcr[lbn];
+
+ if (extract64(bcr, 0, 1) == 0) {
+ /* Linked breakpoint disabled : generate no events */
+ return false;
+ }
+
+ bt = extract64(bcr, 20, 4);
+ hcr_el2 = arm_hcr_el2_eff(env);
+
+ switch (bt) {
+ case 3: /* linked context ID match */
+ switch (arm_current_el(env)) {
+ default:
+ /* Context matches never fire in AArch64 EL3 */
+ return false;
+ case 2:
+ if (!(hcr_el2 & HCR_E2H)) {
+ /* Context matches never fire in EL2 without E2H enabled. */
+ return false;
+ }
+ contextidr = env->cp15.contextidr_el[2];
+ break;
+ case 1:
+ contextidr = env->cp15.contextidr_el[1];
+ break;
+ case 0:
+ if ((hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ contextidr = env->cp15.contextidr_el[2];
+ } else {
+ contextidr = env->cp15.contextidr_el[1];
+ }
+ break;
+ }
+ break;
+
+ case 7: /* linked contextidr_el1 match */
+ contextidr = env->cp15.contextidr_el[1];
+ break;
+ case 13: /* linked contextidr_el2 match */
+ contextidr = env->cp15.contextidr_el[2];
+ break;
+
+ case 9: /* linked VMID match (reserved if no EL2) */
+ case 11: /* linked context ID and VMID match (reserved if no EL2) */
+ case 15: /* linked full context ID match */
+ default:
+ /*
+ * Links to Unlinked context breakpoints must generate no
+ * events; we choose to do the same for reserved values too.
+ */
+ return false;
+ }
+
+ /*
+ * We match the whole register even if this is AArch32 using the
+ * short descriptor format (in which case it holds both PROCID and ASID),
+ * since we don't implement the optional v7 context ID masking.
+ */
+ return contextidr == (uint32_t)env->cp15.dbgbvr[lbn];
+}
+
+static bool bp_wp_matches(ARMCPU *cpu, int n, bool is_wp)
+{
+ CPUARMState *env = &cpu->env;
+ uint64_t cr;
+ int pac, hmc, ssc, wt, lbn;
+ /*
+ * Note that for watchpoints the check is against the CPU security
+ * state, not the S/NS attribute on the offending data access.
+ */
+ bool is_secure = arm_is_secure(env);
+ int access_el = arm_current_el(env);
+
+ if (is_wp) {
+ CPUWatchpoint *wp = env->cpu_watchpoint[n];
+
+ if (!wp || !(wp->flags & BP_WATCHPOINT_HIT)) {
+ return false;
+ }
+ cr = env->cp15.dbgwcr[n];
+ if (wp->hitattrs.user) {
+ /*
+ * The LDRT/STRT/LDT/STT "unprivileged access" instructions should
+ * match watchpoints as if they were accesses done at EL0, even if
+ * the CPU is at EL1 or higher.
+ */
+ access_el = 0;
+ }
+ } else {
+ uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
+
+ if (!env->cpu_breakpoint[n] || env->cpu_breakpoint[n]->pc != pc) {
+ return false;
+ }
+ cr = env->cp15.dbgbcr[n];
+ }
+ /*
+ * The WATCHPOINT_HIT flag guarantees us that the watchpoint is
+ * enabled and that the address and access type match; for breakpoints
+ * we know the address matched; check the remaining fields, including
+ * linked breakpoints. We rely on WCR and BCR having the same layout
+ * for the LBN, SSC, HMC, PAC/PMC and is-linked fields.
+ * Note that some combinations of {PAC, HMC, SSC} are reserved and
+ * must act either like some valid combination or as if the watchpoint
+ * were disabled. We choose the former, and use this together with
+ * the fact that EL3 must always be Secure and EL2 must always be
+ * Non-Secure to simplify the code slightly compared to the full
+ * table in the ARM ARM.
+ */
+ pac = FIELD_EX64(cr, DBGWCR, PAC);
+ hmc = FIELD_EX64(cr, DBGWCR, HMC);
+ ssc = FIELD_EX64(cr, DBGWCR, SSC);
+
+ switch (ssc) {
+ case 0:
+ break;
+ case 1:
+ case 3:
+ if (is_secure) {
+ return false;
+ }
+ break;
+ case 2:
+ if (!is_secure) {
+ return false;
+ }
+ break;
+ }
+
+ switch (access_el) {
+ case 3:
+ case 2:
+ if (!hmc) {
+ return false;
+ }
+ break;
+ case 1:
+ if (extract32(pac, 0, 1) == 0) {
+ return false;
+ }
+ break;
+ case 0:
+ if (extract32(pac, 1, 1) == 0) {
+ return false;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ wt = FIELD_EX64(cr, DBGWCR, WT);
+ lbn = FIELD_EX64(cr, DBGWCR, LBN);
+
+ if (wt && !linked_bp_matches(cpu, lbn)) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_watchpoints(ARMCPU *cpu)
+{
+ CPUARMState *env = &cpu->env;
+ int n;
+
+ /*
+ * If watchpoints are disabled globally or we can't take debug
+ * exceptions here then watchpoint firings are ignored.
+ */
+ if (extract32(env->cp15.mdscr_el1, 15, 1) == 0
+ || !arm_generate_debug_exceptions(env)) {
+ return false;
+ }
+
+ for (n = 0; n < ARRAY_SIZE(env->cpu_watchpoint); n++) {
+ if (bp_wp_matches(cpu, n, true)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool arm_debug_check_breakpoint(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ target_ulong pc;
+ int n;
+
+ /*
+ * If breakpoints are disabled globally or we can't take debug
+ * exceptions here then breakpoint firings are ignored.
+ */
+ if (extract32(env->cp15.mdscr_el1, 15, 1) == 0
+ || !arm_generate_debug_exceptions(env)) {
+ return false;
+ }
+
+ /*
+ * Single-step exceptions have priority over breakpoint exceptions.
+ * If single-step state is active-pending, suppress the bp.
+ */
+ if (arm_singlestep_active(env) && !(env->pstate & PSTATE_SS)) {
+ return false;
+ }
+
+ /*
+ * PC alignment faults have priority over breakpoint exceptions.
+ */
+ pc = is_a64(env) ? env->pc : env->regs[15];
+ if ((is_a64(env) || !env->thumb) && (pc & 3) != 0) {
+ return false;
+ }
+
+ /*
+ * Instruction aborts have priority over breakpoint exceptions.
+ * TODO: We would need to look up the page for PC and verify that
+ * it is present and executable.
+ */
+
+ for (n = 0; n < ARRAY_SIZE(env->cpu_breakpoint); n++) {
+ if (bp_wp_matches(cpu, n, false)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool arm_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
+{
+ /*
+ * Called by core code when a CPU watchpoint fires; need to check if this
+ * is also an architectural watchpoint match.
+ */
+ ARMCPU *cpu = ARM_CPU(cs);
+
+ return check_watchpoints(cpu);
+}
+
+/*
+ * Return the FSR value for a debug exception (watchpoint, hardware
+ * breakpoint or BKPT insn) targeting the specified exception level.
+ */
+static uint32_t arm_debug_exception_fsr(CPUARMState *env)
+{
+ ARMMMUFaultInfo fi = { .type = ARMFault_Debug };
+ int target_el = arm_debug_target_el(env);
+ bool using_lpae;
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ using_lpae = false;
+ } else if (target_el == 2 || arm_el_is_aa64(env, target_el)) {
+ using_lpae = true;
+ } else if (arm_feature(env, ARM_FEATURE_PMSA) &&
+ arm_feature(env, ARM_FEATURE_V8)) {
+ using_lpae = true;
+ } else if (arm_feature(env, ARM_FEATURE_LPAE) &&
+ (env->cp15.tcr_el[target_el] & TTBCR_EAE)) {
+ using_lpae = true;
+ } else {
+ using_lpae = false;
+ }
+
+ if (using_lpae) {
+ return arm_fi_to_lfsc(&fi);
+ } else {
+ return arm_fi_to_sfsc(&fi);
+ }
+}
+
+void arm_debug_excp_handler(CPUState *cs)
+{
+ /*
+ * Called by core code when a watchpoint or breakpoint fires;
+ * need to check which one and raise the appropriate exception.
+ */
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ CPUWatchpoint *wp_hit = cs->watchpoint_hit;
+
+ if (wp_hit) {
+ if (wp_hit->flags & BP_CPU) {
+ bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0;
+
+ cs->watchpoint_hit = NULL;
+
+ env->exception.fsr = arm_debug_exception_fsr(env);
+ env->exception.vaddress = wp_hit->hitaddr;
+ raise_exception_debug(env, EXCP_DATA_ABORT,
+ syn_watchpoint(0, 0, wnr));
+ }
+ } else {
+ uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
+
+ /*
+ * (1) GDB breakpoints should be handled first.
+ * (2) Do not raise a CPU exception if no CPU breakpoint has fired,
+ * since singlestep is also done by generating a debug internal
+ * exception.
+ */
+ if (cpu_breakpoint_test(cs, pc, BP_GDB)
+ || !cpu_breakpoint_test(cs, pc, BP_CPU)) {
+ return;
+ }
+
+ env->exception.fsr = arm_debug_exception_fsr(env);
+ /*
+ * FAR is UNKNOWN: clear vaddress to avoid potentially exposing
+ * values to the guest that it shouldn't be able to see at its
+ * exception/security level.
+ */
+ env->exception.vaddress = 0;
+ raise_exception_debug(env, EXCP_PREFETCH_ABORT, syn_breakpoint(0));
+ }
+}
+
+/*
+ * Raise an EXCP_BKPT with the specified syndrome register value,
+ * targeting the correct exception level for debug exceptions.
+ */
+void HELPER(exception_bkpt_insn)(CPUARMState *env, uint32_t syndrome)
+{
+ int debug_el = arm_debug_target_el(env);
+ int cur_el = arm_current_el(env);
+
+ /* FSR will only be used if the debug target EL is AArch32. */
+ env->exception.fsr = arm_debug_exception_fsr(env);
+ /*
+ * FAR is UNKNOWN: clear vaddress to avoid potentially exposing
+ * values to the guest that it shouldn't be able to see at its
+ * exception/security level.
+ */
+ env->exception.vaddress = 0;
+ /*
+ * Other kinds of architectural debug exception are ignored if
+ * they target an exception level below the current one (in QEMU
+ * this is checked by arm_generate_debug_exceptions()). Breakpoint
+ * instructions are special because they always generate an exception
+ * to somewhere: if they can't go to the configured debug exception
+ * level they are taken to the current exception level.
+ */
+ if (debug_el < cur_el) {
+ debug_el = cur_el;
+ }
+ raise_exception(env, EXCP_BKPT, syndrome, debug_el);
+}
+
+void HELPER(exception_swstep)(CPUARMState *env, uint32_t syndrome)
+{
+ raise_exception_debug(env, EXCP_UDEF, syndrome);
+}
+
+void hw_watchpoint_update(ARMCPU *cpu, int n)
+{
+ CPUARMState *env = &cpu->env;
+ vaddr len = 0;
+ vaddr wvr = env->cp15.dbgwvr[n];
+ uint64_t wcr = env->cp15.dbgwcr[n];
+ int mask;
+ int flags = BP_CPU | BP_STOP_BEFORE_ACCESS;
+
+ if (env->cpu_watchpoint[n]) {
+ cpu_watchpoint_remove_by_ref(CPU(cpu), env->cpu_watchpoint[n]);
+ env->cpu_watchpoint[n] = NULL;
+ }
+
+ if (!FIELD_EX64(wcr, DBGWCR, E)) {
+ /* E bit clear : watchpoint disabled */
+ return;
+ }
+
+ switch (FIELD_EX64(wcr, DBGWCR, LSC)) {
+ case 0:
+ /* LSC 00 is reserved and must behave as if the wp is disabled */
+ return;
+ case 1:
+ flags |= BP_MEM_READ;
+ break;
+ case 2:
+ flags |= BP_MEM_WRITE;
+ break;
+ case 3:
+ flags |= BP_MEM_ACCESS;
+ break;
+ }
+
+ /*
+ * Attempts to use both MASK and BAS fields simultaneously are
+ * CONSTRAINED UNPREDICTABLE; we opt to ignore BAS in this case,
+ * thus generating a watchpoint for every byte in the masked region.
+ */
+ mask = FIELD_EX64(wcr, DBGWCR, MASK);
+ if (mask == 1 || mask == 2) {
+ /*
+ * Reserved values of MASK; we must act as if the mask value was
+ * some non-reserved value, or as if the watchpoint were disabled.
+ * We choose the latter.
+ */
+ return;
+ } else if (mask) {
+ /* Watchpoint covers an aligned area up to 2GB in size */
+ len = 1ULL << mask;
+ /*
+ * If masked bits in WVR are not zero it's CONSTRAINED UNPREDICTABLE
+ * whether the watchpoint fires when the unmasked bits match; we opt
+ * to generate the exceptions.
+ */
+ wvr &= ~(len - 1);
+ } else {
+ /* Watchpoint covers bytes defined by the byte address select bits */
+ int bas = FIELD_EX64(wcr, DBGWCR, BAS);
+ int basstart;
+
+ if (extract64(wvr, 2, 1)) {
+ /*
+ * Deprecated case of an only 4-aligned address. BAS[7:4] are
+ * ignored, and BAS[3:0] define which bytes to watch.
+ */
+ bas &= 0xf;
+ }
+
+ if (bas == 0) {
+ /* This must act as if the watchpoint is disabled */
+ return;
+ }
+
+ /*
+ * The BAS bits are supposed to be programmed to indicate a contiguous
+ * range of bytes. Otherwise it is CONSTRAINED UNPREDICTABLE whether
+ * we fire for each byte in the word/doubleword addressed by the WVR.
+ * We choose to ignore any non-zero bits after the first range of 1s.
+ */
+ basstart = ctz32(bas);
+ len = cto32(bas >> basstart);
+ wvr += basstart;
+ }
+
+ cpu_watchpoint_insert(CPU(cpu), wvr, len, flags,
+ &env->cpu_watchpoint[n]);
+}
+
+void hw_watchpoint_update_all(ARMCPU *cpu)
+{
+ int i;
+ CPUARMState *env = &cpu->env;
+
+ /*
+ * Completely clear out existing QEMU watchpoints and our array, to
+ * avoid possible stale entries following migration load.
+ */
+ cpu_watchpoint_remove_all(CPU(cpu), BP_CPU);
+ memset(env->cpu_watchpoint, 0, sizeof(env->cpu_watchpoint));
+
+ for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_watchpoint); i++) {
+ hw_watchpoint_update(cpu, i);
+ }
+}
+
+void hw_breakpoint_update(ARMCPU *cpu, int n)
+{
+ CPUARMState *env = &cpu->env;
+ uint64_t bvr = env->cp15.dbgbvr[n];
+ uint64_t bcr = env->cp15.dbgbcr[n];
+ vaddr addr;
+ int bt;
+ int flags = BP_CPU;
+
+ if (env->cpu_breakpoint[n]) {
+ cpu_breakpoint_remove_by_ref(CPU(cpu), env->cpu_breakpoint[n]);
+ env->cpu_breakpoint[n] = NULL;
+ }
+
+ if (!extract64(bcr, 0, 1)) {
+ /* E bit clear : watchpoint disabled */
+ return;
+ }
+
+ bt = extract64(bcr, 20, 4);
+
+ switch (bt) {
+ case 4: /* unlinked address mismatch (reserved if AArch64) */
+ case 5: /* linked address mismatch (reserved if AArch64) */
+ qemu_log_mask(LOG_UNIMP,
+ "arm: address mismatch breakpoint types not implemented\n");
+ return;
+ case 0: /* unlinked address match */
+ case 1: /* linked address match */
+ {
+ /*
+ * Bits [1:0] are RES0.
+ *
+ * It is IMPLEMENTATION DEFINED whether bits [63:49]
+ * ([63:53] for FEAT_LVA) are hardwired to a copy of the sign bit
+ * of the VA field ([48] or [52] for FEAT_LVA), or whether the
+ * value is read as written. It is CONSTRAINED UNPREDICTABLE
+ * whether the RESS bits are ignored when comparing an address.
+ * Therefore we are allowed to compare the entire register, which
+ * lets us avoid considering whether FEAT_LVA is actually enabled.
+ *
+ * The BAS field is used to allow setting breakpoints on 16-bit
+ * wide instructions; it is CONSTRAINED UNPREDICTABLE whether
+ * a bp will fire if the addresses covered by the bp and the addresses
+ * covered by the insn overlap but the insn doesn't start at the
+ * start of the bp address range. We choose to require the insn and
+ * the bp to have the same address. The constraints on writing to
+ * BAS enforced in dbgbcr_write mean we have only four cases:
+ * 0b0000 => no breakpoint
+ * 0b0011 => breakpoint on addr
+ * 0b1100 => breakpoint on addr + 2
+ * 0b1111 => breakpoint on addr
+ * See also figure D2-3 in the v8 ARM ARM (DDI0487A.c).
+ */
+ int bas = extract64(bcr, 5, 4);
+ addr = bvr & ~3ULL;
+ if (bas == 0) {
+ return;
+ }
+ if (bas == 0xc) {
+ addr += 2;
+ }
+ break;
+ }
+ case 2: /* unlinked context ID match */
+ case 8: /* unlinked VMID match (reserved if no EL2) */
+ case 10: /* unlinked context ID and VMID match (reserved if no EL2) */
+ qemu_log_mask(LOG_UNIMP,
+ "arm: unlinked context breakpoint types not implemented\n");
+ return;
+ case 9: /* linked VMID match (reserved if no EL2) */
+ case 11: /* linked context ID and VMID match (reserved if no EL2) */
+ case 3: /* linked context ID match */
+ default:
+ /*
+ * We must generate no events for Linked context matches (unless
+ * they are linked to by some other bp/wp, which is handled in
+ * updates for the linking bp/wp). We choose to also generate no events
+ * for reserved values.
+ */
+ return;
+ }
+
+ cpu_breakpoint_insert(CPU(cpu), addr, flags, &env->cpu_breakpoint[n]);
+}
+
+void hw_breakpoint_update_all(ARMCPU *cpu)
+{
+ int i;
+ CPUARMState *env = &cpu->env;
+
+ /*
+ * Completely clear out existing QEMU breakpoints and our array, to
+ * avoid possible stale entries following migration load.
+ */
+ cpu_breakpoint_remove_all(CPU(cpu), BP_CPU);
+ memset(env->cpu_breakpoint, 0, sizeof(env->cpu_breakpoint));
+
+ for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_breakpoint); i++) {
+ hw_breakpoint_update(cpu, i);
+ }
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ /*
+ * In BE32 system mode, target memory is stored byteswapped (on a
+ * little-endian host system), and by the time we reach here (via an
+ * opcode helper) the addresses of subword accesses have been adjusted
+ * to account for that, which means that watchpoints will not match.
+ * Undo the adjustment here.
+ */
+ if (arm_sctlr_b(env)) {
+ if (len == 1) {
+ addr ^= 3;
+ } else if (len == 2) {
+ addr ^= 2;
+ }
+ }
+
+ return addr;
+}
+
+#endif /* !CONFIG_USER_ONLY */
+#endif /* CONFIG_TCG */
+
+/*
+ * Check for traps to "powerdown debug" registers, which are controlled
+ * by MDCR.TDOSA
+ */
+static CPAccessResult access_tdosa(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
+ uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
+ bool mdcr_el2_tdosa = (mdcr_el2 & MDCR_TDOSA) || (mdcr_el2 & MDCR_TDE) ||
+ (arm_hcr_el2_eff(env) & HCR_TGE);
+
+ if (el < 2 && mdcr_el2_tdosa) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDOSA)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+/*
+ * Check for traps to "debug ROM" registers, which are controlled
+ * by MDCR_EL2.TDRA for EL2 but by the more general MDCR_EL3.TDA for EL3.
+ */
+static CPAccessResult access_tdra(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
+ uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
+ bool mdcr_el2_tdra = (mdcr_el2 & MDCR_TDRA) || (mdcr_el2 & MDCR_TDE) ||
+ (arm_hcr_el2_eff(env) & HCR_TGE);
+
+ if (el < 2 && mdcr_el2_tdra) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+/*
+ * Check for traps to general debug registers, which are controlled
+ * by MDCR_EL2.TDA for EL2 and MDCR_EL3.TDA for EL3.
+ */
+static CPAccessResult access_tda(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
+ uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
+ bool mdcr_el2_tda = (mdcr_el2 & MDCR_TDA) || (mdcr_el2 & MDCR_TDE) ||
+ (arm_hcr_el2_eff(env) & HCR_TGE);
+
+ if (el < 2 && mdcr_el2_tda) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_dbgvcr32(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ /* MCDR_EL3.TDMA doesn't apply for FEAT_NV traps */
+ if (arm_current_el(env) == 2 && (env->cp15.mdcr_el3 & MDCR_TDA)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+/*
+ * Check for traps to Debug Comms Channel registers. If FEAT_FGT
+ * is implemented then these are controlled by MDCR_EL2.TDCC for
+ * EL2 and MDCR_EL3.TDCC for EL3. They are also controlled by
+ * the general debug access trap bits MDCR_EL2.TDA and MDCR_EL3.TDA.
+ * For EL0, they are also controlled by MDSCR_EL1.TDCC.
+ */
+static CPAccessResult access_tdcc(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
+ uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
+ bool mdscr_el1_tdcc = extract32(env->cp15.mdscr_el1, 12, 1);
+ bool mdcr_el2_tda = (mdcr_el2 & MDCR_TDA) || (mdcr_el2 & MDCR_TDE) ||
+ (arm_hcr_el2_eff(env) & HCR_TGE);
+ bool mdcr_el2_tdcc = cpu_isar_feature(aa64_fgt, env_archcpu(env)) &&
+ (mdcr_el2 & MDCR_TDCC);
+ bool mdcr_el3_tdcc = cpu_isar_feature(aa64_fgt, env_archcpu(env)) &&
+ (env->cp15.mdcr_el3 & MDCR_TDCC);
+
+ if (el < 1 && mdscr_el1_tdcc) {
+ return CP_ACCESS_TRAP;
+ }
+ if (el < 2 && (mdcr_el2_tda || mdcr_el2_tdcc)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 3 && ((env->cp15.mdcr_el3 & MDCR_TDA) || mdcr_el3_tdcc)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+static void oslar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Writes to OSLAR_EL1 may update the OS lock status, which can be
+ * read via a bit in OSLSR_EL1.
+ */
+ int oslock;
+
+ if (ri->state == ARM_CP_STATE_AA32) {
+ oslock = (value == 0xC5ACCE55);
+ } else {
+ oslock = value & 1;
+ }
+
+ env->cp15.oslsr_el1 = deposit32(env->cp15.oslsr_el1, 1, 1, oslock);
+}
+
+static void osdlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ /*
+ * Only defined bit is bit 0 (DLK); if Feat_DoubleLock is not
+ * implemented this is RAZ/WI.
+ */
+ if(arm_feature(env, ARM_FEATURE_AARCH64)
+ ? cpu_isar_feature(aa64_doublelock, cpu)
+ : cpu_isar_feature(aa32_doublelock, cpu)) {
+ env->cp15.osdlr_el1 = value & 1;
+ }
+}
+
+static void dbgclaimset_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ env->cp15.dbgclaim |= (value & 0xFF);
+}
+
+static uint64_t dbgclaimset_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /* CLAIM bits are RAO */
+ return 0xFF;
+}
+
+static void dbgclaimclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ env->cp15.dbgclaim &= ~(value & 0xFF);
+}
+
+static const ARMCPRegInfo debug_cp_reginfo[] = {
+ /*
+ * DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped
+ * debug components. The AArch64 version of DBGDRAR is named MDRAR_EL1;
+ * unlike DBGDRAR it is never accessible from EL0.
+ * DBGDSAR is deprecated and must RAZ from v8 anyway, so it has no AArch64
+ * accessor.
+ */
+ { .name = "DBGDRAR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
+ .access = PL0_R, .accessfn = access_tdra,
+ .type = ARM_CP_CONST | ARM_CP_NO_GDB, .resetvalue = 0 },
+ { .name = "MDRAR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0,
+ .access = PL1_R, .accessfn = access_tdra,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
+ .access = PL0_R, .accessfn = access_tdra,
+ .type = ARM_CP_CONST | ARM_CP_NO_GDB, .resetvalue = 0 },
+ /* Monitor debug system control register; the 32-bit alias is DBGDSCRext. */
+ { .name = "MDSCR_EL1", .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
+ .access = PL1_RW, .accessfn = access_tda,
+ .fgt = FGT_MDSCR_EL1,
+ .nv2_redirect_offset = 0x158,
+ .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1),
+ .resetvalue = 0 },
+ /*
+ * MDCCSR_EL0[30:29] map to EDSCR[30:29]. Simply RAZ as the external
+ * Debug Communication Channel is not implemented.
+ */
+ { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 1, .opc2 = 0,
+ .access = PL0_R, .accessfn = access_tdcc,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ /*
+ * These registers belong to the Debug Communications Channel,
+ * which is not implemented. However we implement RAZ/WI behaviour
+ * with trapping to prevent spurious SIGILLs if the guest OS does
+ * access them as the support cannot be probed for.
+ */
+ { .name = "OSDTRRX_EL1", .state = ARM_CP_STATE_BOTH, .cp = 14,
+ .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 2,
+ .access = PL1_RW, .accessfn = access_tdcc,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "OSDTRTX_EL1", .state = ARM_CP_STATE_BOTH, .cp = 14,
+ .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2,
+ .access = PL1_RW, .accessfn = access_tdcc,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ /* DBGDTRTX_EL0/DBGDTRRX_EL0 depend on direction */
+ { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_BOTH, .cp = 14,
+ .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0,
+ .access = PL0_RW, .accessfn = access_tdcc,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ /*
+ * OSECCR_EL1 provides a mechanism for an operating system
+ * to access the contents of EDECCR. EDECCR is not implemented though,
+ * as is the rest of external device mechanism.
+ */
+ { .name = "OSECCR_EL1", .state = ARM_CP_STATE_BOTH, .cp = 14,
+ .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2,
+ .access = PL1_RW, .accessfn = access_tda,
+ .fgt = FGT_OSECCR_EL1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ /*
+ * DBGDSCRint[15,12,5:2] map to MDSCR_EL1[15,12,5:2]. Map all bits as
+ * it is unlikely a guest will care.
+ * We don't implement the configurable EL0 access.
+ */
+ { .name = "DBGDSCRint", .state = ARM_CP_STATE_AA32,
+ .cp = 14, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
+ .type = ARM_CP_ALIAS,
+ .access = PL1_R, .accessfn = access_tda,
+ .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), },
+ { .name = "OSLAR_EL1", .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
+ .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .accessfn = access_tdosa,
+ .fgt = FGT_OSLAR_EL1,
+ .writefn = oslar_write },
+ { .name = "OSLSR_EL1", .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 4,
+ .access = PL1_R, .resetvalue = 10,
+ .accessfn = access_tdosa,
+ .fgt = FGT_OSLSR_EL1,
+ .fieldoffset = offsetof(CPUARMState, cp15.oslsr_el1) },
+ /* Dummy OSDLR_EL1: 32-bit Linux will read this */
+ { .name = "OSDLR_EL1", .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 3, .opc2 = 4,
+ .access = PL1_RW, .accessfn = access_tdosa,
+ .fgt = FGT_OSDLR_EL1,
+ .writefn = osdlr_write,
+ .fieldoffset = offsetof(CPUARMState, cp15.osdlr_el1) },
+ /*
+ * Dummy DBGVCR: Linux wants to clear this on startup, but we don't
+ * implement vector catch debug events yet.
+ */
+ { .name = "DBGVCR",
+ .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_tda,
+ .type = ARM_CP_NOP },
+ /*
+ * Dummy MDCCINT_EL1, since we don't implement the Debug Communications
+ * Channel but Linux may try to access this register. The 32-bit
+ * alias is DBGDCCINT.
+ */
+ { .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_tdcc,
+ .type = ARM_CP_NOP },
+ /*
+ * Dummy DBGCLAIM registers.
+ * "The architecture does not define any functionality for the CLAIM tag bits.",
+ * so we only keep the raw bits
+ */
+ { .name = "DBGCLAIMSET_EL1", .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 6,
+ .type = ARM_CP_ALIAS,
+ .access = PL1_RW, .accessfn = access_tda,
+ .fgt = FGT_DBGCLAIM,
+ .writefn = dbgclaimset_write, .readfn = dbgclaimset_read },
+ { .name = "DBGCLAIMCLR_EL1", .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 6,
+ .access = PL1_RW, .accessfn = access_tda,
+ .fgt = FGT_DBGCLAIM,
+ .writefn = dbgclaimclr_write, .raw_writefn = raw_write,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgclaim) },
+};
+
+/* These are present only when EL1 supports AArch32 */
+static const ARMCPRegInfo debug_aa32_el1_reginfo[] = {
+ /*
+ * Dummy DBGVCR32_EL2 (which is only for a 64-bit hypervisor
+ * to save and restore a 32-bit guest's DBGVCR)
+ */
+ { .name = "DBGVCR32_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 2, .opc1 = 4, .crn = 0, .crm = 7, .opc2 = 0,
+ .access = PL2_RW, .accessfn = access_dbgvcr32,
+ .type = ARM_CP_NOP | ARM_CP_EL3_NO_EL2_KEEP },
+};
+
+static const ARMCPRegInfo debug_lpae_cp_reginfo[] = {
+ /* 64 bit access versions of the (dummy) debug registers */
+ { .name = "DBGDRAR", .cp = 14, .crm = 1, .opc1 = 0,
+ .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_64BIT | ARM_CP_NO_GDB,
+ .resetvalue = 0 },
+ { .name = "DBGDSAR", .cp = 14, .crm = 2, .opc1 = 0,
+ .access = PL0_R, .type = ARM_CP_CONST | ARM_CP_64BIT | ARM_CP_NO_GDB,
+ .resetvalue = 0 },
+};
+
+static void dbgwvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ int i = ri->crm;
+
+ /*
+ * Bits [1:0] are RES0.
+ *
+ * It is IMPLEMENTATION DEFINED whether [63:49] ([63:53] with FEAT_LVA)
+ * are hardwired to the value of bit [48] ([52] with FEAT_LVA), or if
+ * they contain the value written. It is CONSTRAINED UNPREDICTABLE
+ * whether the RESS bits are ignored when comparing an address.
+ *
+ * Therefore we are allowed to compare the entire register, which lets
+ * us avoid considering whether or not FEAT_LVA is actually enabled.
+ */
+ value &= ~3ULL;
+
+ raw_write(env, ri, value);
+ if (tcg_enabled()) {
+ hw_watchpoint_update(cpu, i);
+ }
+}
+
+static void dbgwcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ int i = ri->crm;
+
+ raw_write(env, ri, value);
+ if (tcg_enabled()) {
+ hw_watchpoint_update(cpu, i);
+ }
+}
+
+static void dbgbvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ int i = ri->crm;
+
+ raw_write(env, ri, value);
+ if (tcg_enabled()) {
+ hw_breakpoint_update(cpu, i);
+ }
+}
+
+static void dbgbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ int i = ri->crm;
+
+ /*
+ * BAS[3] is a read-only copy of BAS[2], and BAS[1] a read-only
+ * copy of BAS[0].
+ */
+ value = deposit64(value, 6, 1, extract64(value, 5, 1));
+ value = deposit64(value, 8, 1, extract64(value, 7, 1));
+
+ raw_write(env, ri, value);
+ if (tcg_enabled()) {
+ hw_breakpoint_update(cpu, i);
+ }
+}
+
+void define_debug_regs(ARMCPU *cpu)
+{
+ /*
+ * Define v7 and v8 architectural debug registers.
+ * These are just dummy implementations for now.
+ */
+ int i;
+ int wrps, brps, ctx_cmps;
+
+ /*
+ * The Arm ARM says DBGDIDR is optional and deprecated if EL1 cannot
+ * use AArch32. Given that bit 15 is RES1, if the value is 0 then
+ * the register must not exist for this cpu.
+ */
+ if (cpu->isar.dbgdidr != 0) {
+ ARMCPRegInfo dbgdidr = {
+ .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0,
+ .opc1 = 0, .opc2 = 0,
+ .access = PL0_R, .accessfn = access_tda,
+ .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr,
+ };
+ define_one_arm_cp_reg(cpu, &dbgdidr);
+ }
+
+ /*
+ * DBGDEVID is present in the v7 debug architecture if
+ * DBGDIDR.DEVID_imp is 1 (bit 15); from v7.1 and on it is
+ * mandatory (and bit 15 is RES1). DBGDEVID1 and DBGDEVID2 exist
+ * from v7.1 of the debug architecture. Because no fields have yet
+ * been defined in DBGDEVID2 (and quite possibly none will ever
+ * be) we don't define an ARMISARegisters field for it.
+ * These registers exist only if EL1 can use AArch32, but that
+ * happens naturally because they are only PL1 accessible anyway.
+ */
+ if (extract32(cpu->isar.dbgdidr, 15, 1)) {
+ ARMCPRegInfo dbgdevid = {
+ .name = "DBGDEVID",
+ .cp = 14, .opc1 = 0, .crn = 7, .opc2 = 2, .crn = 7,
+ .access = PL1_R, .accessfn = access_tda,
+ .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdevid,
+ };
+ define_one_arm_cp_reg(cpu, &dbgdevid);
+ }
+ if (cpu_isar_feature(aa32_debugv7p1, cpu)) {
+ ARMCPRegInfo dbgdevid12[] = {
+ {
+ .name = "DBGDEVID1",
+ .cp = 14, .opc1 = 0, .crn = 7, .opc2 = 1, .crn = 7,
+ .access = PL1_R, .accessfn = access_tda,
+ .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdevid1,
+ }, {
+ .name = "DBGDEVID2",
+ .cp = 14, .opc1 = 0, .crn = 7, .opc2 = 0, .crn = 7,
+ .access = PL1_R, .accessfn = access_tda,
+ .type = ARM_CP_CONST, .resetvalue = 0,
+ },
+ };
+ define_arm_cp_regs(cpu, dbgdevid12);
+ }
+
+ brps = arm_num_brps(cpu);
+ wrps = arm_num_wrps(cpu);
+ ctx_cmps = arm_num_ctx_cmps(cpu);
+
+ assert(ctx_cmps <= brps);
+
+ define_arm_cp_regs(cpu, debug_cp_reginfo);
+ if (cpu_isar_feature(aa64_aa32_el1, cpu)) {
+ define_arm_cp_regs(cpu, debug_aa32_el1_reginfo);
+ }
+
+ if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) {
+ define_arm_cp_regs(cpu, debug_lpae_cp_reginfo);
+ }
+
+ for (i = 0; i < brps; i++) {
+ char *dbgbvr_el1_name = g_strdup_printf("DBGBVR%d_EL1", i);
+ char *dbgbcr_el1_name = g_strdup_printf("DBGBCR%d_EL1", i);
+ ARMCPRegInfo dbgregs[] = {
+ { .name = dbgbvr_el1_name, .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4,
+ .access = PL1_RW, .accessfn = access_tda,
+ .fgt = FGT_DBGBVRN_EL1,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]),
+ .writefn = dbgbvr_write, .raw_writefn = raw_write
+ },
+ { .name = dbgbcr_el1_name, .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5,
+ .access = PL1_RW, .accessfn = access_tda,
+ .fgt = FGT_DBGBCRN_EL1,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]),
+ .writefn = dbgbcr_write, .raw_writefn = raw_write
+ },
+ };
+ define_arm_cp_regs(cpu, dbgregs);
+ g_free(dbgbvr_el1_name);
+ g_free(dbgbcr_el1_name);
+ }
+
+ for (i = 0; i < wrps; i++) {
+ char *dbgwvr_el1_name = g_strdup_printf("DBGWVR%d_EL1", i);
+ char *dbgwcr_el1_name = g_strdup_printf("DBGWCR%d_EL1", i);
+ ARMCPRegInfo dbgregs[] = {
+ { .name = dbgwvr_el1_name, .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
+ .access = PL1_RW, .accessfn = access_tda,
+ .fgt = FGT_DBGWVRN_EL1,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]),
+ .writefn = dbgwvr_write, .raw_writefn = raw_write
+ },
+ { .name = dbgwcr_el1_name, .state = ARM_CP_STATE_BOTH,
+ .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7,
+ .access = PL1_RW, .accessfn = access_tda,
+ .fgt = FGT_DBGWCRN_EL1,
+ .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]),
+ .writefn = dbgwcr_write, .raw_writefn = raw_write
+ },
+ };
+ define_arm_cp_regs(cpu, dbgregs);
+ g_free(dbgwvr_el1_name);
+ g_free(dbgwcr_el1_name);
+ }
+}
diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
index 0c64c0292e..a3bb73cfa7 100644
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -7,7 +7,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -18,14 +18,19 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
-#include "qemu-common.h"
#include "cpu.h"
#include "exec/gdbstub.h"
+#include "gdbstub/helpers.h"
+#include "sysemu/tcg.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "cpregs.h"
-typedef struct RegisterSysregXmlParam {
+typedef struct RegisterSysregFeatureParam {
CPUState *cs;
- GString *s;
-} RegisterSysregXmlParam;
+ GDBFeatureBuilder builder;
+ int n;
+} RegisterSysregFeatureParam;
/* Old gdb always expect FPA registers. Newer (xml-aware) gdb only expect
whatever the target description contains. Due to a historical mishap
@@ -33,7 +38,7 @@ typedef struct RegisterSysregXmlParam {
We hack round this by giving the FPA regs zero size when talking to a
newer gdb. */
-int arm_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
+int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
@@ -42,24 +47,13 @@ int arm_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
/* Core integer register. */
return gdb_get_reg32(mem_buf, env->regs[n]);
}
- if (n < 24) {
- /* FPA registers. */
- if (gdb_has_xml) {
- return 0;
- }
- memset(mem_buf, 0, 12);
- return 12;
- }
- switch (n) {
- case 24:
- /* FPA status register. */
- if (gdb_has_xml) {
- return 0;
+ if (n == 25) {
+ /* CPSR, or XPSR for M-profile */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return gdb_get_reg32(mem_buf, xpsr_read(env));
+ } else {
+ return gdb_get_reg32(mem_buf, cpsr_read(env));
}
- return gdb_get_reg32(mem_buf, 0);
- case 25:
- /* CPSR */
- return gdb_get_reg32(mem_buf, cpsr_read(env));
}
/* Unknown register. */
return 0;
@@ -73,66 +67,213 @@ int arm_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
tmp = ldl_p(mem_buf);
- /* Mask out low bit of PC to workaround gdb bugs. This will probably
- cause problems if we ever implement the Jazelle DBX extensions. */
+ /*
+ * Mask out low bits of PC to workaround gdb bugs.
+ * This avoids an assert in thumb_tr_translate_insn, because it is
+ * architecturally impossible to misalign the pc.
+ * This will probably cause problems if we ever implement the
+ * Jazelle DBX extensions.
+ */
if (n == 15) {
tmp &= ~1;
}
if (n < 16) {
/* Core integer register. */
+ if (n == 13 && arm_feature(env, ARM_FEATURE_M)) {
+ /* M profile SP low bits are always 0 */
+ tmp &= ~3;
+ }
env->regs[n] = tmp;
return 4;
}
- if (n < 24) { /* 16-23 */
- /* FPA registers (ignored). */
- if (gdb_has_xml) {
- return 0;
+ if (n == 25) {
+ /* CPSR, or XPSR for M-profile */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ /*
+ * Don't allow writing to XPSR.Exception as it can cause
+ * a transition into or out of handler mode (it's not
+ * writable via the MSR insn so this is a reasonable
+ * restriction). Other fields are safe to update.
+ */
+ xpsr_write(env, tmp, ~XPSR_EXCP);
+ } else {
+ cpsr_write(env, tmp, 0xffffffff, CPSRWriteByGDBStub);
}
- return 12;
+ return 4;
+ }
+ /* Unknown register. */
+ return 0;
+}
+
+static int vfp_gdb_get_reg(CPUState *cs, GByteArray *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16;
+
+ /* VFP data registers are always little-endian. */
+ if (reg < nregs) {
+ return gdb_get_reg64(buf, *aa32_vfp_dreg(env, reg));
}
- switch (n) {
- case 24:
- /* FPA status register (ignored). */
- if (gdb_has_xml) {
- return 0;
+ if (arm_feature(env, ARM_FEATURE_NEON)) {
+ /* Aliases for Q regs. */
+ nregs += 16;
+ if (reg < nregs) {
+ uint64_t *q = aa32_vfp_qreg(env, reg - 32);
+ return gdb_get_reg128(buf, q[0], q[1]);
}
+ }
+ switch (reg - nregs) {
+ case 0:
+ return gdb_get_reg32(buf, vfp_get_fpscr(env));
+ }
+ return 0;
+}
+
+static int vfp_gdb_set_reg(CPUState *cs, uint8_t *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ int nregs = cpu_isar_feature(aa32_simd_r32, cpu) ? 32 : 16;
+
+ if (reg < nregs) {
+ *aa32_vfp_dreg(env, reg) = ldq_le_p(buf);
+ return 8;
+ }
+ if (arm_feature(env, ARM_FEATURE_NEON)) {
+ nregs += 16;
+ if (reg < nregs) {
+ uint64_t *q = aa32_vfp_qreg(env, reg - 32);
+ q[0] = ldq_le_p(buf);
+ q[1] = ldq_le_p(buf + 8);
+ return 16;
+ }
+ }
+ switch (reg - nregs) {
+ case 0:
+ vfp_set_fpscr(env, ldl_p(buf));
+ return 4;
+ }
+ return 0;
+}
+
+static int vfp_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ switch (reg) {
+ case 0:
+ return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPSID]);
+ case 1:
+ return gdb_get_reg32(buf, env->vfp.xregs[ARM_VFP_FPEXC]);
+ }
+ return 0;
+}
+
+static int vfp_gdb_set_sysreg(CPUState *cs, uint8_t *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ switch (reg) {
+ case 0:
+ env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf);
return 4;
- case 25:
- /* CPSR */
- cpsr_write(env, tmp, 0xffffffff, CPSRWriteByGDBStub);
+ case 1:
+ env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30);
return 4;
}
- /* Unknown register. */
return 0;
}
-static void arm_gen_one_xml_reg_tag(GString *s, DynamicGDBXMLInfo *dyn_xml,
- ARMCPRegInfo *ri, uint32_t ri_key,
- int bitsize)
+static int mve_gdb_get_reg(CPUState *cs, GByteArray *buf, int reg)
{
- g_string_append_printf(s, "<reg name=\"%s\"", ri->name);
- g_string_append_printf(s, " bitsize=\"%d\"", bitsize);
- g_string_append_printf(s, " group=\"cp_regs\"/>");
- dyn_xml->num_cpregs++;
- dyn_xml->cpregs_keys[dyn_xml->num_cpregs - 1] = ri_key;
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ switch (reg) {
+ case 0:
+ return gdb_get_reg32(buf, env->v7m.vpr);
+ default:
+ return 0;
+ }
}
-static void arm_register_sysreg_for_xml(gpointer key, gpointer value,
- gpointer p)
+static int mve_gdb_set_reg(CPUState *cs, uint8_t *buf, int reg)
{
- uint32_t ri_key = *(uint32_t *)key;
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ switch (reg) {
+ case 0:
+ env->v7m.vpr = ldl_p(buf);
+ return 4;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * arm_get/set_gdb_*: get/set a gdb register
+ * @env: the CPU state
+ * @buf: a buffer to copy to/from
+ * @reg: register number (offset from start of group)
+ *
+ * We return the number of bytes copied
+ */
+
+static int arm_gdb_get_sysreg(CPUState *cs, GByteArray *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ const ARMCPRegInfo *ri;
+ uint32_t key;
+
+ key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg];
+ ri = get_arm_cp_reginfo(cpu->cp_regs, key);
+ if (ri) {
+ if (cpreg_field_is_64bit(ri)) {
+ return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
+ } else {
+ return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri));
+ }
+ }
+ return 0;
+}
+
+static int arm_gdb_set_sysreg(CPUState *cs, uint8_t *buf, int reg)
+{
+ return 0;
+}
+
+static void arm_gen_one_feature_sysreg(GDBFeatureBuilder *builder,
+ DynamicGDBFeatureInfo *dyn_feature,
+ ARMCPRegInfo *ri, uint32_t ri_key,
+ int bitsize, int n)
+{
+ gdb_feature_builder_append_reg(builder, ri->name, bitsize, n,
+ "int", "cp_regs");
+
+ dyn_feature->data.cpregs.keys[n] = ri_key;
+}
+
+static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
+ gpointer p)
+{
+ uint32_t ri_key = (uintptr_t)key;
ARMCPRegInfo *ri = value;
- RegisterSysregXmlParam *param = (RegisterSysregXmlParam *)p;
- GString *s = param->s;
+ RegisterSysregFeatureParam *param = p;
ARMCPU *cpu = ARM_CPU(param->cs);
CPUARMState *env = &cpu->env;
- DynamicGDBXMLInfo *dyn_xml = &cpu->dyn_xml;
+ DynamicGDBFeatureInfo *dyn_feature = &cpu->dyn_sysreg_feature;
if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_NO_GDB))) {
if (arm_feature(env, ARM_FEATURE_AARCH64)) {
if (ri->state == ARM_CP_STATE_AA64) {
- arm_gen_one_xml_reg_tag(s , dyn_xml, ri, ri_key, 64);
+ arm_gen_one_feature_sysreg(&param->builder, dyn_feature,
+ ri, ri_key, 64, param->n++);
}
} else {
if (ri->state == ARM_CP_STATE_AA32) {
@@ -141,38 +282,276 @@ static void arm_register_sysreg_for_xml(gpointer key, gpointer value,
return;
}
if (ri->type & ARM_CP_64BIT) {
- arm_gen_one_xml_reg_tag(s , dyn_xml, ri, ri_key, 64);
+ arm_gen_one_feature_sysreg(&param->builder, dyn_feature,
+ ri, ri_key, 64, param->n++);
} else {
- arm_gen_one_xml_reg_tag(s , dyn_xml, ri, ri_key, 32);
+ arm_gen_one_feature_sysreg(&param->builder, dyn_feature,
+ ri, ri_key, 32, param->n++);
}
}
}
}
}
-int arm_gen_dynamic_xml(CPUState *cs)
+static GDBFeature *arm_gen_dynamic_sysreg_feature(CPUState *cs, int base_reg)
{
ARMCPU *cpu = ARM_CPU(cs);
- GString *s = g_string_new(NULL);
- RegisterSysregXmlParam param = {cs, s};
+ RegisterSysregFeatureParam param = {cs};
+ gsize num_regs = g_hash_table_size(cpu->cp_regs);
+
+ gdb_feature_builder_init(&param.builder,
+ &cpu->dyn_sysreg_feature.desc,
+ "org.qemu.gdb.arm.sys.regs",
+ "system-registers.xml",
+ base_reg);
+ cpu->dyn_sysreg_feature.data.cpregs.keys = g_new(uint32_t, num_regs);
+ g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_feature, &param);
+ gdb_feature_builder_end(&param.builder);
+ return &cpu->dyn_sysreg_feature.desc;
+}
- cpu->dyn_xml.num_cpregs = 0;
- cpu->dyn_xml.cpregs_keys = g_new(uint32_t, g_hash_table_size(cpu->cp_regs));
- g_string_printf(s, "<?xml version=\"1.0\"?>");
- g_string_append_printf(s, "<!DOCTYPE target SYSTEM \"gdb-target.dtd\">");
- g_string_append_printf(s, "<feature name=\"org.qemu.gdb.arm.sys.regs\">");
- g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_xml, &param);
- g_string_append_printf(s, "</feature>");
- cpu->dyn_xml.desc = g_string_free(s, false);
- return cpu->dyn_xml.num_cpregs;
+#ifdef CONFIG_TCG
+typedef enum {
+ M_SYSREG_MSP,
+ M_SYSREG_PSP,
+ M_SYSREG_PRIMASK,
+ M_SYSREG_CONTROL,
+ M_SYSREG_BASEPRI,
+ M_SYSREG_FAULTMASK,
+ M_SYSREG_MSPLIM,
+ M_SYSREG_PSPLIM,
+} MProfileSysreg;
+
+static const struct {
+ const char *name;
+ int feature;
+} m_sysreg_def[] = {
+ [M_SYSREG_MSP] = { "msp", ARM_FEATURE_M },
+ [M_SYSREG_PSP] = { "psp", ARM_FEATURE_M },
+ [M_SYSREG_PRIMASK] = { "primask", ARM_FEATURE_M },
+ [M_SYSREG_CONTROL] = { "control", ARM_FEATURE_M },
+ [M_SYSREG_BASEPRI] = { "basepri", ARM_FEATURE_M_MAIN },
+ [M_SYSREG_FAULTMASK] = { "faultmask", ARM_FEATURE_M_MAIN },
+ [M_SYSREG_MSPLIM] = { "msplim", ARM_FEATURE_V8 },
+ [M_SYSREG_PSPLIM] = { "psplim", ARM_FEATURE_V8 },
+};
+
+static uint32_t *m_sysreg_ptr(CPUARMState *env, MProfileSysreg reg, bool sec)
+{
+ uint32_t *ptr;
+
+ switch (reg) {
+ case M_SYSREG_MSP:
+ ptr = arm_v7m_get_sp_ptr(env, sec, false, true);
+ break;
+ case M_SYSREG_PSP:
+ ptr = arm_v7m_get_sp_ptr(env, sec, true, true);
+ break;
+ case M_SYSREG_MSPLIM:
+ ptr = &env->v7m.msplim[sec];
+ break;
+ case M_SYSREG_PSPLIM:
+ ptr = &env->v7m.psplim[sec];
+ break;
+ case M_SYSREG_PRIMASK:
+ ptr = &env->v7m.primask[sec];
+ break;
+ case M_SYSREG_BASEPRI:
+ ptr = &env->v7m.basepri[sec];
+ break;
+ case M_SYSREG_FAULTMASK:
+ ptr = &env->v7m.faultmask[sec];
+ break;
+ case M_SYSREG_CONTROL:
+ ptr = &env->v7m.control[sec];
+ break;
+ default:
+ return NULL;
+ }
+ return arm_feature(env, m_sysreg_def[reg].feature) ? ptr : NULL;
}
-const char *arm_gdb_get_dynamic_xml(CPUState *cs, const char *xmlname)
+static int m_sysreg_get(CPUARMState *env, GByteArray *buf,
+ MProfileSysreg reg, bool secure)
+{
+ uint32_t *ptr = m_sysreg_ptr(env, reg, secure);
+
+ if (ptr == NULL) {
+ return 0;
+ }
+ return gdb_get_reg32(buf, *ptr);
+}
+
+static int arm_gdb_get_m_systemreg(CPUState *cs, GByteArray *buf, int reg)
{
ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ /*
+ * Here, we emulate MRS instruction, where CONTROL has a mix of
+ * banked and non-banked bits.
+ */
+ if (reg == M_SYSREG_CONTROL) {
+ return gdb_get_reg32(buf, arm_v7m_mrs_control(env, env->v7m.secure));
+ }
+ return m_sysreg_get(env, buf, reg, env->v7m.secure);
+}
+
+static int arm_gdb_set_m_systemreg(CPUState *cs, uint8_t *buf, int reg)
+{
+ return 0; /* TODO */
+}
+
+static GDBFeature *arm_gen_dynamic_m_systemreg_feature(CPUState *cs,
+ int base_reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ GDBFeatureBuilder builder;
+ int reg = 0;
+ int i;
+
+ gdb_feature_builder_init(&builder, &cpu->dyn_m_systemreg_feature.desc,
+ "org.gnu.gdb.arm.m-system", "arm-m-system.xml",
+ base_reg);
+
+ for (i = 0; i < ARRAY_SIZE(m_sysreg_def); i++) {
+ if (arm_feature(env, m_sysreg_def[i].feature)) {
+ gdb_feature_builder_append_reg(&builder, m_sysreg_def[i].name, 32,
+ reg++, "int", NULL);
+ }
+ }
+
+ gdb_feature_builder_end(&builder);
- if (strcmp(xmlname, "system-registers.xml") == 0) {
- return cpu->dyn_xml.desc;
+ return &cpu->dyn_m_systemreg_feature.desc;
+}
+
+#ifndef CONFIG_USER_ONLY
+/*
+ * For user-only, we see the non-secure registers via m_systemreg above.
+ * For secext, encode the non-secure view as even and secure view as odd.
+ */
+static int arm_gdb_get_m_secextreg(CPUState *cs, GByteArray *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ return m_sysreg_get(env, buf, reg >> 1, reg & 1);
+}
+
+static int arm_gdb_set_m_secextreg(CPUState *cs, uint8_t *buf, int reg)
+{
+ return 0; /* TODO */
+}
+
+static GDBFeature *arm_gen_dynamic_m_secextreg_feature(CPUState *cs,
+ int base_reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ GDBFeatureBuilder builder;
+ char *name;
+ int reg = 0;
+ int i;
+
+ gdb_feature_builder_init(&builder, &cpu->dyn_m_secextreg_feature.desc,
+ "org.gnu.gdb.arm.secext", "arm-m-secext.xml",
+ base_reg);
+
+ for (i = 0; i < ARRAY_SIZE(m_sysreg_def); i++) {
+ name = g_strconcat(m_sysreg_def[i].name, "_ns", NULL);
+ gdb_feature_builder_append_reg(&builder, name, 32, reg++,
+ "int", NULL);
+ name = g_strconcat(m_sysreg_def[i].name, "_s", NULL);
+ gdb_feature_builder_append_reg(&builder, name, 32, reg++,
+ "int", NULL);
+ }
+
+ gdb_feature_builder_end(&builder);
+
+ return &cpu->dyn_m_secextreg_feature.desc;
+}
+#endif
+#endif /* CONFIG_TCG */
+
+void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+
+ if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+ /*
+ * The lower part of each SVE register aliases to the FPU
+ * registers so we don't need to include both.
+ */
+#ifdef TARGET_AARCH64
+ if (isar_feature_aa64_sve(&cpu->isar)) {
+ GDBFeature *feature = arm_gen_dynamic_svereg_feature(cs, cs->gdb_num_regs);
+ gdb_register_coprocessor(cs, aarch64_gdb_get_sve_reg,
+ aarch64_gdb_set_sve_reg, feature, 0);
+ } else {
+ gdb_register_coprocessor(cs, aarch64_gdb_get_fpu_reg,
+ aarch64_gdb_set_fpu_reg,
+ gdb_find_static_feature("aarch64-fpu.xml"),
+ 0);
+ }
+ /*
+ * Note that we report pauth information via the feature name
+ * org.gnu.gdb.aarch64.pauth_v2, not org.gnu.gdb.aarch64.pauth.
+ * GDB versions 9 through 12 have a bug where they will crash
+ * if they see the latter XML from QEMU.
+ */
+ if (isar_feature_aa64_pauth(&cpu->isar)) {
+ gdb_register_coprocessor(cs, aarch64_gdb_get_pauth_reg,
+ aarch64_gdb_set_pauth_reg,
+ gdb_find_static_feature("aarch64-pauth.xml"),
+ 0);
+ }
+#endif
+ } else {
+ if (arm_feature(env, ARM_FEATURE_NEON)) {
+ gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
+ gdb_find_static_feature("arm-neon.xml"),
+ 0);
+ } else if (cpu_isar_feature(aa32_simd_r32, cpu)) {
+ gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
+ gdb_find_static_feature("arm-vfp3.xml"),
+ 0);
+ } else if (cpu_isar_feature(aa32_vfp_simd, cpu)) {
+ gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
+ gdb_find_static_feature("arm-vfp.xml"), 0);
+ }
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ /*
+ * A and R profile have FP sysregs FPEXC and FPSID that we
+ * expose to gdb.
+ */
+ gdb_register_coprocessor(cs, vfp_gdb_get_sysreg, vfp_gdb_set_sysreg,
+ gdb_find_static_feature("arm-vfp-sysregs.xml"),
+ 0);
+ }
+ }
+ if (cpu_isar_feature(aa32_mve, cpu) && tcg_enabled()) {
+ gdb_register_coprocessor(cs, mve_gdb_get_reg, mve_gdb_set_reg,
+ gdb_find_static_feature("arm-m-profile-mve.xml"),
+ 0);
+ }
+ gdb_register_coprocessor(cs, arm_gdb_get_sysreg, arm_gdb_set_sysreg,
+ arm_gen_dynamic_sysreg_feature(cs, cs->gdb_num_regs),
+ 0);
+
+#ifdef CONFIG_TCG
+ if (arm_feature(env, ARM_FEATURE_M) && tcg_enabled()) {
+ gdb_register_coprocessor(cs,
+ arm_gdb_get_m_systemreg, arm_gdb_set_m_systemreg,
+ arm_gen_dynamic_m_systemreg_feature(cs, cs->gdb_num_regs), 0);
+#ifndef CONFIG_USER_ONLY
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ gdb_register_coprocessor(cs,
+ arm_gdb_get_m_secextreg, arm_gdb_set_m_secextreg,
+ arm_gen_dynamic_m_secextreg_feature(cs, cs->gdb_num_regs), 0);
+ }
+#endif
}
- return NULL;
+#endif /* CONFIG_TCG */
}
diff --git a/target/arm/gdbstub64.c b/target/arm/gdbstub64.c
index 49bc3fc521..caa31ff3fa 100644
--- a/target/arm/gdbstub64.c
+++ b/target/arm/gdbstub64.c
@@ -6,7 +6,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -17,11 +17,12 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qemu/log.h"
#include "cpu.h"
-#include "exec/gdbstub.h"
+#include "internals.h"
+#include "gdbstub/helpers.h"
-int aarch64_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
+int aarch64_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
@@ -70,3 +71,313 @@ int aarch64_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
/* Unknown register. */
return 0;
}
+
+int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ switch (reg) {
+ case 0 ... 31:
+ {
+ /* 128 bit FP register - quads are in LE order */
+ uint64_t *q = aa64_vfp_qreg(env, reg);
+ return gdb_get_reg128(buf, q[1], q[0]);
+ }
+ case 32:
+ /* FPSR */
+ return gdb_get_reg32(buf, vfp_get_fpsr(env));
+ case 33:
+ /* FPCR */
+ return gdb_get_reg32(buf, vfp_get_fpcr(env));
+ default:
+ return 0;
+ }
+}
+
+int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ switch (reg) {
+ case 0 ... 31:
+ /* 128 bit FP register */
+ {
+ uint64_t *q = aa64_vfp_qreg(env, reg);
+ q[0] = ldq_le_p(buf);
+ q[1] = ldq_le_p(buf + 8);
+ return 16;
+ }
+ case 32:
+ /* FPSR */
+ vfp_set_fpsr(env, ldl_p(buf));
+ return 4;
+ case 33:
+ /* FPCR */
+ vfp_set_fpcr(env, ldl_p(buf));
+ return 4;
+ default:
+ return 0;
+ }
+}
+
+int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ switch (reg) {
+ /* The first 32 registers are the zregs */
+ case 0 ... 31:
+ {
+ int vq, len = 0;
+ for (vq = 0; vq < cpu->sve_max_vq; vq++) {
+ len += gdb_get_reg128(buf,
+ env->vfp.zregs[reg].d[vq * 2 + 1],
+ env->vfp.zregs[reg].d[vq * 2]);
+ }
+ return len;
+ }
+ case 32:
+ return gdb_get_reg32(buf, vfp_get_fpsr(env));
+ case 33:
+ return gdb_get_reg32(buf, vfp_get_fpcr(env));
+ /* then 16 predicates and the ffr */
+ case 34 ... 50:
+ {
+ int preg = reg - 34;
+ int vq, len = 0;
+ for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) {
+ len += gdb_get_reg64(buf, env->vfp.pregs[preg].p[vq / 4]);
+ }
+ return len;
+ }
+ case 51:
+ {
+ /*
+ * We report in Vector Granules (VG) which is 64bit in a Z reg
+ * while the ZCR works in Vector Quads (VQ) which is 128bit chunks.
+ */
+ int vq = sve_vqm1_for_el(env, arm_current_el(env)) + 1;
+ return gdb_get_reg64(buf, vq * 2);
+ }
+ default:
+ /* gdbstub asked for something out our range */
+ qemu_log_mask(LOG_UNIMP, "%s: out of range register %d", __func__, reg);
+ break;
+ }
+
+ return 0;
+}
+
+int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ /* The first 32 registers are the zregs */
+ switch (reg) {
+ /* The first 32 registers are the zregs */
+ case 0 ... 31:
+ {
+ int vq, len = 0;
+ uint64_t *p = (uint64_t *) buf;
+ for (vq = 0; vq < cpu->sve_max_vq; vq++) {
+ env->vfp.zregs[reg].d[vq * 2 + 1] = *p++;
+ env->vfp.zregs[reg].d[vq * 2] = *p++;
+ len += 16;
+ }
+ return len;
+ }
+ case 32:
+ vfp_set_fpsr(env, *(uint32_t *)buf);
+ return 4;
+ case 33:
+ vfp_set_fpcr(env, *(uint32_t *)buf);
+ return 4;
+ case 34 ... 50:
+ {
+ int preg = reg - 34;
+ int vq, len = 0;
+ uint64_t *p = (uint64_t *) buf;
+ for (vq = 0; vq < cpu->sve_max_vq; vq = vq + 4) {
+ env->vfp.pregs[preg].p[vq / 4] = *p++;
+ len += 8;
+ }
+ return len;
+ }
+ case 51:
+ /* cannot set vg via gdbstub */
+ return 0;
+ default:
+ /* gdbstub asked for something out our range */
+ break;
+ }
+
+ return 0;
+}
+
+int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ switch (reg) {
+ case 0: /* pauth_dmask */
+ case 1: /* pauth_cmask */
+ case 2: /* pauth_dmask_high */
+ case 3: /* pauth_cmask_high */
+ /*
+ * Note that older versions of this feature only contained
+ * pauth_{d,c}mask, for use with Linux user processes, and
+ * thus exclusively in the low half of the address space.
+ *
+ * To support system mode, and to debug kernels, two new regs
+ * were added to cover the high half of the address space.
+ * For the purpose of pauth_ptr_mask, we can use any well-formed
+ * address within the address space half -- here, 0 and -1.
+ */
+ {
+ bool is_data = !(reg & 1);
+ bool is_high = reg & 2;
+ ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
+ ARMVAParameters param;
+
+ param = aa64_va_parameters(env, -is_high, mmu_idx, is_data, false);
+ return gdb_get_reg64(buf, pauth_ptr_mask(param));
+ }
+ default:
+ return 0;
+ }
+}
+
+int aarch64_gdb_set_pauth_reg(CPUState *cs, uint8_t *buf, int reg)
+{
+ /* All pseudo registers are read-only. */
+ return 0;
+}
+
+static void output_vector_union_type(GDBFeatureBuilder *builder, int reg_width,
+ const char *name)
+{
+ struct TypeSize {
+ const char *gdb_type;
+ short size;
+ char sz, suffix;
+ };
+
+ static const struct TypeSize vec_lanes[] = {
+ /* quads */
+ { "uint128", 128, 'q', 'u' },
+ { "int128", 128, 'q', 's' },
+ /* 64 bit */
+ { "ieee_double", 64, 'd', 'f' },
+ { "uint64", 64, 'd', 'u' },
+ { "int64", 64, 'd', 's' },
+ /* 32 bit */
+ { "ieee_single", 32, 's', 'f' },
+ { "uint32", 32, 's', 'u' },
+ { "int32", 32, 's', 's' },
+ /* 16 bit */
+ { "ieee_half", 16, 'h', 'f' },
+ { "uint16", 16, 'h', 'u' },
+ { "int16", 16, 'h', 's' },
+ /* bytes */
+ { "uint8", 8, 'b', 'u' },
+ { "int8", 8, 'b', 's' },
+ };
+
+ static const char suf[] = { 'b', 'h', 's', 'd', 'q' };
+ int i, j;
+
+ /* First define types and totals in a whole VL */
+ for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) {
+ gdb_feature_builder_append_tag(
+ builder, "<vector id=\"%s%c%c\" type=\"%s\" count=\"%d\"/>",
+ name, vec_lanes[i].sz, vec_lanes[i].suffix,
+ vec_lanes[i].gdb_type, reg_width / vec_lanes[i].size);
+ }
+
+ /*
+ * Now define a union for each size group containing unsigned and
+ * signed and potentially float versions of each size from 128 to
+ * 8 bits.
+ */
+ for (i = 0; i < ARRAY_SIZE(suf); i++) {
+ int bits = 8 << i;
+
+ gdb_feature_builder_append_tag(builder, "<union id=\"%sn%c\">",
+ name, suf[i]);
+ for (j = 0; j < ARRAY_SIZE(vec_lanes); j++) {
+ if (vec_lanes[j].size == bits) {
+ gdb_feature_builder_append_tag(
+ builder, "<field name=\"%c\" type=\"%s%c%c\"/>",
+ vec_lanes[j].suffix, name,
+ vec_lanes[j].sz, vec_lanes[j].suffix);
+ }
+ }
+ gdb_feature_builder_append_tag(builder, "</union>");
+ }
+
+ /* And now the final union of unions */
+ gdb_feature_builder_append_tag(builder, "<union id=\"%s\">", name);
+ for (i = ARRAY_SIZE(suf) - 1; i >= 0; i--) {
+ gdb_feature_builder_append_tag(builder,
+ "<field name=\"%c\" type=\"%sn%c\"/>",
+ suf[i], name, suf[i]);
+ }
+ gdb_feature_builder_append_tag(builder, "</union>");
+}
+
+GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cs, int base_reg)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ int reg_width = cpu->sve_max_vq * 128;
+ int pred_width = cpu->sve_max_vq * 16;
+ GDBFeatureBuilder builder;
+ char *name;
+ int reg = 0;
+ int i;
+
+ gdb_feature_builder_init(&builder, &cpu->dyn_svereg_feature.desc,
+ "org.gnu.gdb.aarch64.sve", "sve-registers.xml",
+ base_reg);
+
+ /* Create the vector union type. */
+ output_vector_union_type(&builder, reg_width, "svev");
+
+ /* Create the predicate vector type. */
+ gdb_feature_builder_append_tag(
+ &builder, "<vector id=\"svep\" type=\"uint8\" count=\"%d\"/>",
+ pred_width / 8);
+
+ /* Define the vector registers. */
+ for (i = 0; i < 32; i++) {
+ name = g_strdup_printf("z%d", i);
+ gdb_feature_builder_append_reg(&builder, name, reg_width, reg++,
+ "svev", NULL);
+ }
+
+ /* fpscr & status registers */
+ gdb_feature_builder_append_reg(&builder, "fpsr", 32, reg++,
+ "int", "float");
+ gdb_feature_builder_append_reg(&builder, "fpcr", 32, reg++,
+ "int", "float");
+
+ /* Define the predicate registers. */
+ for (i = 0; i < 16; i++) {
+ name = g_strdup_printf("p%d", i);
+ gdb_feature_builder_append_reg(&builder, name, pred_width, reg++,
+ "svep", NULL);
+ }
+ gdb_feature_builder_append_reg(&builder, "ffr", pred_width, reg++,
+ "svep", "vector");
+
+ /* Define the vector length pseudo-register. */
+ gdb_feature_builder_append_reg(&builder, "vg", 64, reg++, "int", NULL);
+
+ gdb_feature_builder_end(&builder);
+
+ return &cpu->dyn_svereg_feature.desc;
+}
diff --git a/target/arm/gtimer.h b/target/arm/gtimer.h
new file mode 100644
index 0000000000..b992941bef
--- /dev/null
+++ b/target/arm/gtimer.h
@@ -0,0 +1,21 @@
+/*
+ * ARM generic timer definitions for Arm A-class CPU
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef TARGET_ARM_GTIMER_H
+#define TARGET_ARM_GTIMER_H
+
+enum {
+ GTIMER_PHYS = 0,
+ GTIMER_VIRT = 1,
+ GTIMER_HYP = 2,
+ GTIMER_SEC = 3,
+ GTIMER_HYPVIRT = 4,
+#define NUM_GTIMERS 5
+};
+
+#endif
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
deleted file mode 100644
index 7f6ad3000b..0000000000
--- a/target/arm/helper-a64.c
+++ /dev/null
@@ -1,902 +0,0 @@
-/*
- * AArch64 specific helpers
- *
- * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/gdbstub.h"
-#include "exec/helper-proto.h"
-#include "qemu/host-utils.h"
-#include "qemu/log.h"
-#include "sysemu/sysemu.h"
-#include "qemu/bitops.h"
-#include "internals.h"
-#include "qemu/crc32c.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "qemu/int128.h"
-#include "tcg.h"
-#include "fpu/softfloat.h"
-#include <zlib.h> /* For crc32 */
-
-/* C2.4.7 Multiply and divide */
-/* special cases for 0 and LLONG_MIN are mandated by the standard */
-uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
-{
- if (den == 0) {
- return 0;
- }
- return num / den;
-}
-
-int64_t HELPER(sdiv64)(int64_t num, int64_t den)
-{
- if (den == 0) {
- return 0;
- }
- if (num == LLONG_MIN && den == -1) {
- return LLONG_MIN;
- }
- return num / den;
-}
-
-uint64_t HELPER(rbit64)(uint64_t x)
-{
- return revbit64(x);
-}
-
-/* Convert a softfloat float_relation_ (as returned by
- * the float*_compare functions) to the correct ARM
- * NZCV flag state.
- */
-static inline uint32_t float_rel_to_flags(int res)
-{
- uint64_t flags;
- switch (res) {
- case float_relation_equal:
- flags = PSTATE_Z | PSTATE_C;
- break;
- case float_relation_less:
- flags = PSTATE_N;
- break;
- case float_relation_greater:
- flags = PSTATE_C;
- break;
- case float_relation_unordered:
- default:
- flags = PSTATE_C | PSTATE_V;
- break;
- }
- return flags;
-}
-
-uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
-{
- return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
-}
-
-uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
-{
- return float_rel_to_flags(float16_compare(x, y, fp_status));
-}
-
-uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
-{
- return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
-}
-
-uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
-{
- return float_rel_to_flags(float32_compare(x, y, fp_status));
-}
-
-uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
-{
- return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
-}
-
-uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
-{
- return float_rel_to_flags(float64_compare(x, y, fp_status));
-}
-
-float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
-
- if ((float32_is_zero(a) && float32_is_infinity(b)) ||
- (float32_is_infinity(a) && float32_is_zero(b))) {
- /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
- return make_float32((1U << 30) |
- ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
- }
- return float32_mul(a, b, fpst);
-}
-
-float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- if ((float64_is_zero(a) && float64_is_infinity(b)) ||
- (float64_is_infinity(a) && float64_is_zero(b))) {
- /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
- return make_float64((1ULL << 62) |
- ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
- }
- return float64_mul(a, b, fpst);
-}
-
-uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
- uint32_t rn, uint32_t numregs)
-{
- /* Helper function for SIMD TBL and TBX. We have to do the table
- * lookup part for the 64 bits worth of indices we're passed in.
- * result is the initial results vector (either zeroes for TBL
- * or some guest values for TBX), rn the register number where
- * the table starts, and numregs the number of registers in the table.
- * We return the results of the lookups.
- */
- int shift;
-
- for (shift = 0; shift < 64; shift += 8) {
- int index = extract64(indices, shift, 8);
- if (index < 16 * numregs) {
- /* Convert index (a byte offset into the virtual table
- * which is a series of 128-bit vectors concatenated)
- * into the correct register element plus a bit offset
- * into that element, bearing in mind that the table
- * can wrap around from V31 to V0.
- */
- int elt = (rn * 2 + (index >> 3)) % 64;
- int bitidx = (index & 7) * 8;
- uint64_t *q = aa64_vfp_qreg(env, elt >> 1);
- uint64_t val = extract64(q[elt & 1], bitidx, 8);
-
- result = deposit64(result, shift, 8, val);
- }
- }
- return result;
-}
-
-/* 64bit/double versions of the neon float compare functions */
-uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
- return -float64_eq_quiet(a, b, fpst);
-}
-
-uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
- return -float64_le(b, a, fpst);
-}
-
-uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
- return -float64_lt(b, a, fpst);
-}
-
-/* Reciprocal step and sqrt step. Note that unlike the A32/T32
- * versions, these do a fully fused multiply-add or
- * multiply-add-and-halve.
- */
-#define float16_two make_float16(0x4000)
-#define float16_three make_float16(0x4200)
-#define float16_one_point_five make_float16(0x3e00)
-
-#define float32_two make_float32(0x40000000)
-#define float32_three make_float32(0x40400000)
-#define float32_one_point_five make_float32(0x3fc00000)
-
-#define float64_two make_float64(0x4000000000000000ULL)
-#define float64_three make_float64(0x4008000000000000ULL)
-#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
-
-uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- a = float16_chs(a);
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
- (float16_is_infinity(b) && float16_is_zero(a))) {
- return float16_two;
- }
- return float16_muladd(a, b, float16_two, 0, fpst);
-}
-
-float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
-
- a = float32_chs(a);
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
- (float32_is_infinity(b) && float32_is_zero(a))) {
- return float32_two;
- }
- return float32_muladd(a, b, float32_two, 0, fpst);
-}
-
-float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- a = float64_chs(a);
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_infinity(b) && float64_is_zero(a))) {
- return float64_two;
- }
- return float64_muladd(a, b, float64_two, 0, fpst);
-}
-
-uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- a = float16_chs(a);
- if ((float16_is_infinity(a) && float16_is_zero(b)) ||
- (float16_is_infinity(b) && float16_is_zero(a))) {
- return float16_one_point_five;
- }
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
-}
-
-float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float32_squash_input_denormal(a, fpst);
- b = float32_squash_input_denormal(b, fpst);
-
- a = float32_chs(a);
- if ((float32_is_infinity(a) && float32_is_zero(b)) ||
- (float32_is_infinity(b) && float32_is_zero(a))) {
- return float32_one_point_five;
- }
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
-}
-
-float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float64_squash_input_denormal(a, fpst);
- b = float64_squash_input_denormal(b, fpst);
-
- a = float64_chs(a);
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_infinity(b) && float64_is_zero(a))) {
- return float64_one_point_five;
- }
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
-}
-
-/* Pairwise long add: add pairs of adjacent elements into
- * double-width elements in the result (eg _s8 is an 8x8->16 op)
- */
-uint64_t HELPER(neon_addlp_s8)(uint64_t a)
-{
- uint64_t nsignmask = 0x0080008000800080ULL;
- uint64_t wsignmask = 0x8000800080008000ULL;
- uint64_t elementmask = 0x00ff00ff00ff00ffULL;
- uint64_t tmp1, tmp2;
- uint64_t res, signres;
-
- /* Extract odd elements, sign extend each to a 16 bit field */
- tmp1 = a & elementmask;
- tmp1 ^= nsignmask;
- tmp1 |= wsignmask;
- tmp1 = (tmp1 - nsignmask) ^ wsignmask;
- /* Ditto for the even elements */
- tmp2 = (a >> 8) & elementmask;
- tmp2 ^= nsignmask;
- tmp2 |= wsignmask;
- tmp2 = (tmp2 - nsignmask) ^ wsignmask;
-
- /* calculate the result by summing bits 0..14, 16..22, etc,
- * and then adjusting the sign bits 15, 23, etc manually.
- * This ensures the addition can't overflow the 16 bit field.
- */
- signres = (tmp1 ^ tmp2) & wsignmask;
- res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
- res ^= signres;
-
- return res;
-}
-
-uint64_t HELPER(neon_addlp_u8)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x00ff00ff00ff00ffULL;
- tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
- return tmp;
-}
-
-uint64_t HELPER(neon_addlp_s16)(uint64_t a)
-{
- int32_t reslo, reshi;
-
- reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
- reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
-
- return (uint32_t)reslo | (((uint64_t)reshi) << 32);
-}
-
-uint64_t HELPER(neon_addlp_u16)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x0000ffff0000ffffULL;
- tmp += (a >> 16) & 0x0000ffff0000ffffULL;
- return tmp;
-}
-
-/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
-uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
-{
- float_status *fpst = fpstp;
- uint16_t val16, sbit;
- int16_t exp;
-
- if (float16_is_any_nan(a)) {
- float16 nan = a;
- if (float16_is_signaling_nan(a, fpst)) {
- float_raise(float_flag_invalid, fpst);
- nan = float16_silence_nan(a, fpst);
- }
- if (fpst->default_nan_mode) {
- nan = float16_default_nan(fpst);
- }
- return nan;
- }
-
- a = float16_squash_input_denormal(a, fpst);
-
- val16 = float16_val(a);
- sbit = 0x8000 & val16;
- exp = extract32(val16, 10, 5);
-
- if (exp == 0) {
- return make_float16(deposit32(sbit, 10, 5, 0x1e));
- } else {
- return make_float16(deposit32(sbit, 10, 5, ~exp));
- }
-}
-
-float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
-{
- float_status *fpst = fpstp;
- uint32_t val32, sbit;
- int32_t exp;
-
- if (float32_is_any_nan(a)) {
- float32 nan = a;
- if (float32_is_signaling_nan(a, fpst)) {
- float_raise(float_flag_invalid, fpst);
- nan = float32_silence_nan(a, fpst);
- }
- if (fpst->default_nan_mode) {
- nan = float32_default_nan(fpst);
- }
- return nan;
- }
-
- a = float32_squash_input_denormal(a, fpst);
-
- val32 = float32_val(a);
- sbit = 0x80000000ULL & val32;
- exp = extract32(val32, 23, 8);
-
- if (exp == 0) {
- return make_float32(sbit | (0xfe << 23));
- } else {
- return make_float32(sbit | (~exp & 0xff) << 23);
- }
-}
-
-float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
-{
- float_status *fpst = fpstp;
- uint64_t val64, sbit;
- int64_t exp;
-
- if (float64_is_any_nan(a)) {
- float64 nan = a;
- if (float64_is_signaling_nan(a, fpst)) {
- float_raise(float_flag_invalid, fpst);
- nan = float64_silence_nan(a, fpst);
- }
- if (fpst->default_nan_mode) {
- nan = float64_default_nan(fpst);
- }
- return nan;
- }
-
- a = float64_squash_input_denormal(a, fpst);
-
- val64 = float64_val(a);
- sbit = 0x8000000000000000ULL & val64;
- exp = extract64(float64_val(a), 52, 11);
-
- if (exp == 0) {
- return make_float64(sbit | (0x7feULL << 52));
- } else {
- return make_float64(sbit | (~exp & 0x7ffULL) << 52);
- }
-}
-
-float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
-{
- /* Von Neumann rounding is implemented by using round-to-zero
- * and then setting the LSB of the result if Inexact was raised.
- */
- float32 r;
- float_status *fpst = &env->vfp.fp_status;
- float_status tstat = *fpst;
- int exflags;
-
- set_float_rounding_mode(float_round_to_zero, &tstat);
- set_float_exception_flags(0, &tstat);
- r = float64_to_float32(a, &tstat);
- exflags = get_float_exception_flags(&tstat);
- if (exflags & float_flag_inexact) {
- r = make_float32(float32_val(r) | 1);
- }
- exflags |= get_float_exception_flags(fpst);
- set_float_exception_flags(exflags, fpst);
- return r;
-}
-
-/* 64-bit versions of the CRC helpers. Note that although the operation
- * (and the prototypes of crc32c() and crc32() mean that only the bottom
- * 32 bits of the accumulator and result are used, we pass and return
- * uint64_t for convenience of the generated code. Unlike the 32-bit
- * instruction set versions, val may genuinely have 64 bits of data in it.
- * The upper bytes of val (above the number specified by 'bytes') must have
- * been zeroed out by the caller.
- */
-uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
-{
- uint8_t buf[8];
-
- stq_le_p(buf, val);
-
- /* zlib crc32 converts the accumulator and output to one's complement. */
- return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
-}
-
-uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
-{
- uint8_t buf[8];
-
- stq_le_p(buf, val);
-
- /* Linux crc32c converts the output to one's complement. */
- return crc32c(acc, buf, bytes) ^ 0xffffffff;
-}
-
-/* Returns 0 on success; 1 otherwise. */
-static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi,
- bool parallel, uintptr_t ra)
-{
- Int128 oldv, cmpv, newv;
- bool success;
-
- cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
- newv = int128_make128(new_lo, new_hi);
-
- if (parallel) {
-#ifndef CONFIG_ATOMIC128
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
- int mem_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
- oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
- success = int128_eq(oldv, cmpv);
-#endif
- } else {
- uint64_t o0, o1;
-
-#ifdef CONFIG_USER_ONLY
- /* ??? Enforce alignment. */
- uint64_t *haddr = g2h(addr);
-
- helper_retaddr = ra;
- o0 = ldq_le_p(haddr + 0);
- o1 = ldq_le_p(haddr + 1);
- oldv = int128_make128(o0, o1);
-
- success = int128_eq(oldv, cmpv);
- if (success) {
- stq_le_p(haddr + 0, int128_getlo(newv));
- stq_le_p(haddr + 1, int128_gethi(newv));
- }
- helper_retaddr = 0;
-#else
- int mem_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
- TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
-
- o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
- o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
- oldv = int128_make128(o0, o1);
-
- success = int128_eq(oldv, cmpv);
- if (success) {
- helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
- helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
- }
-#endif
- }
-
- return !success;
-}
-
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi)
-{
- return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC());
-}
-
-uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi)
-{
- return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC());
-}
-
-static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi,
- bool parallel, uintptr_t ra)
-{
- Int128 oldv, cmpv, newv;
- bool success;
-
- /* high and low need to be switched here because this is not actually a
- * 128bit store but two doublewords stored consecutively
- */
- cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
- newv = int128_make128(new_hi, new_lo);
-
- if (parallel) {
-#ifndef CONFIG_ATOMIC128
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
- int mem_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
- oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
- success = int128_eq(oldv, cmpv);
-#endif
- } else {
- uint64_t o0, o1;
-
-#ifdef CONFIG_USER_ONLY
- /* ??? Enforce alignment. */
- uint64_t *haddr = g2h(addr);
-
- helper_retaddr = ra;
- o1 = ldq_be_p(haddr + 0);
- o0 = ldq_be_p(haddr + 1);
- oldv = int128_make128(o0, o1);
-
- success = int128_eq(oldv, cmpv);
- if (success) {
- stq_be_p(haddr + 0, int128_gethi(newv));
- stq_be_p(haddr + 1, int128_getlo(newv));
- }
- helper_retaddr = 0;
-#else
- int mem_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
- TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
-
- o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
- o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
- oldv = int128_make128(o0, o1);
-
- success = int128_eq(oldv, cmpv);
- if (success) {
- helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
- helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
- }
-#endif
- }
-
- return !success;
-}
-
-uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi)
-{
- return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC());
-}
-
-uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi)
-{
- return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
-}
-
-/* Writes back the old data into Rs. */
-void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
- uint64_t new_lo, uint64_t new_hi)
-{
- uintptr_t ra = GETPC();
-#ifndef CONFIG_ATOMIC128
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
- Int128 oldv, cmpv, newv;
-
- cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
- newv = int128_make128(new_lo, new_hi);
-
- int mem_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
- oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
-
- env->xregs[rs] = int128_getlo(oldv);
- env->xregs[rs + 1] = int128_gethi(oldv);
-#endif
-}
-
-void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
- uint64_t new_hi, uint64_t new_lo)
-{
- uintptr_t ra = GETPC();
-#ifndef CONFIG_ATOMIC128
- cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
-#else
- Int128 oldv, cmpv, newv;
-
- cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
- newv = int128_make128(new_lo, new_hi);
-
- int mem_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
- oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-
- env->xregs[rs + 1] = int128_getlo(oldv);
- env->xregs[rs] = int128_gethi(oldv);
-#endif
-}
-
-/*
- * AdvSIMD half-precision
- */
-
-#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
-
-#define ADVSIMD_HALFOP(name) \
-uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
-{ \
- float_status *fpst = fpstp; \
- return float16_ ## name(a, b, fpst); \
-}
-
-ADVSIMD_HALFOP(add)
-ADVSIMD_HALFOP(sub)
-ADVSIMD_HALFOP(mul)
-ADVSIMD_HALFOP(div)
-ADVSIMD_HALFOP(min)
-ADVSIMD_HALFOP(max)
-ADVSIMD_HALFOP(minnum)
-ADVSIMD_HALFOP(maxnum)
-
-#define ADVSIMD_TWOHALFOP(name) \
-uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
-{ \
- float16 a1, a2, b1, b2; \
- uint32_t r1, r2; \
- float_status *fpst = fpstp; \
- a1 = extract32(two_a, 0, 16); \
- a2 = extract32(two_a, 16, 16); \
- b1 = extract32(two_b, 0, 16); \
- b2 = extract32(two_b, 16, 16); \
- r1 = float16_ ## name(a1, b1, fpst); \
- r2 = float16_ ## name(a2, b2, fpst); \
- return deposit32(r1, 16, 16, r2); \
-}
-
-ADVSIMD_TWOHALFOP(add)
-ADVSIMD_TWOHALFOP(sub)
-ADVSIMD_TWOHALFOP(mul)
-ADVSIMD_TWOHALFOP(div)
-ADVSIMD_TWOHALFOP(min)
-ADVSIMD_TWOHALFOP(max)
-ADVSIMD_TWOHALFOP(minnum)
-ADVSIMD_TWOHALFOP(maxnum)
-
-/* Data processing - scalar floating-point and advanced SIMD */
-static float16 float16_mulx(float16 a, float16 b, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- a = float16_squash_input_denormal(a, fpst);
- b = float16_squash_input_denormal(b, fpst);
-
- if ((float16_is_zero(a) && float16_is_infinity(b)) ||
- (float16_is_infinity(a) && float16_is_zero(b))) {
- /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
- return make_float16((1U << 14) |
- ((float16_val(a) ^ float16_val(b)) & (1U << 15)));
- }
- return float16_mul(a, b, fpst);
-}
-
-ADVSIMD_HALFOP(mulx)
-ADVSIMD_TWOHALFOP(mulx)
-
-/* fused multiply-accumulate */
-uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
- void *fpstp)
-{
- float_status *fpst = fpstp;
- return float16_muladd(a, b, c, 0, fpst);
-}
-
-uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
- uint32_t two_c, void *fpstp)
-{
- float_status *fpst = fpstp;
- float16 a1, a2, b1, b2, c1, c2;
- uint32_t r1, r2;
- a1 = extract32(two_a, 0, 16);
- a2 = extract32(two_a, 16, 16);
- b1 = extract32(two_b, 0, 16);
- b2 = extract32(two_b, 16, 16);
- c1 = extract32(two_c, 0, 16);
- c2 = extract32(two_c, 16, 16);
- r1 = float16_muladd(a1, b1, c1, 0, fpst);
- r2 = float16_muladd(a2, b2, c2, 0, fpst);
- return deposit32(r1, 16, 16, r2);
-}
-
-/*
- * Floating point comparisons produce an integer result. Softfloat
- * routines return float_relation types which we convert to the 0/-1
- * Neon requires.
- */
-
-#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
-
-uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
- int compare = float16_compare_quiet(a, b, fpst);
- return ADVSIMD_CMPRES(compare == float_relation_equal);
-}
-
-uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
- int compare = float16_compare(a, b, fpst);
- return ADVSIMD_CMPRES(compare == float_relation_greater ||
- compare == float_relation_equal);
-}
-
-uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
- int compare = float16_compare(a, b, fpst);
- return ADVSIMD_CMPRES(compare == float_relation_greater);
-}
-
-uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
- float16 f0 = float16_abs(a);
- float16 f1 = float16_abs(b);
- int compare = float16_compare(f0, f1, fpst);
- return ADVSIMD_CMPRES(compare == float_relation_greater ||
- compare == float_relation_equal);
-}
-
-uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
- float16 f0 = float16_abs(a);
- float16 f1 = float16_abs(b);
- int compare = float16_compare(f0, f1, fpst);
- return ADVSIMD_CMPRES(compare == float_relation_greater);
-}
-
-/* round to integral */
-uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
-{
- return float16_round_to_int(x, fp_status);
-}
-
-uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
-{
- int old_flags = get_float_exception_flags(fp_status), new_flags;
- float16 ret;
-
- ret = float16_round_to_int(x, fp_status);
-
- /* Suppress any inexact exceptions the conversion produced */
- if (!(old_flags & float_flag_inexact)) {
- new_flags = get_float_exception_flags(fp_status);
- set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
- }
-
- return ret;
-}
-
-/*
- * Half-precision floating point conversion functions
- *
- * There are a multitude of conversion functions with various
- * different rounding modes. This is dealt with by the calling code
- * setting the mode appropriately before calling the helper.
- */
-
-uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- /* Invalid if we are passed a NaN */
- if (float16_is_any_nan(a)) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_int16(a, fpst);
-}
-
-uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
-{
- float_status *fpst = fpstp;
-
- /* Invalid if we are passed a NaN */
- if (float16_is_any_nan(a)) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_uint16(a, fpst);
-}
-
-/*
- * Square Root and Reciprocal square root
- */
-
-uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
-{
- float_status *s = fpstp;
-
- return float16_sqrt(a, s);
-}
-
-
diff --git a/target/arm/helper.c b/target/arm/helper.c
index e3946562aa..a620481d7c 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1,162 +1,40 @@
+/*
+ * ARM generic helpers.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
#include "qemu/osdep.h"
-#include "target/arm/idau.h"
+#include "qemu/log.h"
#include "trace.h"
#include "cpu.h"
#include "internals.h"
-#include "exec/gdbstub.h"
+#include "cpu-features.h"
#include "exec/helper-proto.h"
-#include "qemu/host-utils.h"
-#include "sysemu/arch_init.h"
-#include "sysemu/sysemu.h"
+#include "qemu/main-loop.h"
+#include "qemu/timer.h"
#include "qemu/bitops.h"
#include "qemu/crc32c.h"
+#include "qemu/qemu-print.h"
#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "arm_ldst.h"
#include <zlib.h> /* For crc32 */
-#include "exec/semihost.h"
+#include "hw/irq.h"
+#include "sysemu/cpu-timers.h"
#include "sysemu/kvm.h"
-#include "fpu/softfloat.h"
-#include "qemu/range.h"
-
-#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
-
-#ifndef CONFIG_USER_ONLY
-/* Cacheability and shareability attributes for a memory access */
-typedef struct ARMCacheAttrs {
- unsigned int attrs:8; /* as in the MAIR register encoding */
- unsigned int shareability:2; /* as in the SH field of the VMSAv8-64 PTEs */
-} ARMCacheAttrs;
-
-static bool get_phys_addr(CPUARMState *env, target_ulong address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
- target_ulong *page_size,
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
-
-static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
- target_ulong *page_size_ptr,
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
-
-/* Security attributes for an address, as returned by v8m_security_lookup. */
-typedef struct V8M_SAttributes {
- bool subpage; /* true if these attrs don't cover the whole TARGET_PAGE */
- bool ns;
- bool nsc;
- uint8_t sregion;
- bool srvalid;
- uint8_t iregion;
- bool irvalid;
-} V8M_SAttributes;
-
-static void v8m_security_lookup(CPUARMState *env, uint32_t address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- V8M_SAttributes *sattrs);
+#include "sysemu/tcg.h"
+#include "qapi/error.h"
+#include "qemu/guest-random.h"
+#ifdef CONFIG_TCG
+#include "semihosting/common-semi.h"
#endif
+#include "cpregs.h"
+#include "target/arm/gtimer.h"
-static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
-{
- int nregs;
-
- /* VFP data registers are always little-endian. */
- nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16;
- if (reg < nregs) {
- stq_le_p(buf, *aa32_vfp_dreg(env, reg));
- return 8;
- }
- if (arm_feature(env, ARM_FEATURE_NEON)) {
- /* Aliases for Q regs. */
- nregs += 16;
- if (reg < nregs) {
- uint64_t *q = aa32_vfp_qreg(env, reg - 32);
- stq_le_p(buf, q[0]);
- stq_le_p(buf + 8, q[1]);
- return 16;
- }
- }
- switch (reg - nregs) {
- case 0: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSID]); return 4;
- case 1: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSCR]); return 4;
- case 2: stl_p(buf, env->vfp.xregs[ARM_VFP_FPEXC]); return 4;
- }
- return 0;
-}
-
-static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
-{
- int nregs;
-
- nregs = arm_feature(env, ARM_FEATURE_VFP3) ? 32 : 16;
- if (reg < nregs) {
- *aa32_vfp_dreg(env, reg) = ldq_le_p(buf);
- return 8;
- }
- if (arm_feature(env, ARM_FEATURE_NEON)) {
- nregs += 16;
- if (reg < nregs) {
- uint64_t *q = aa32_vfp_qreg(env, reg - 32);
- q[0] = ldq_le_p(buf);
- q[1] = ldq_le_p(buf + 8);
- return 16;
- }
- }
- switch (reg - nregs) {
- case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4;
- case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4;
- case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4;
- }
- return 0;
-}
-
-static int aarch64_fpu_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
-{
- switch (reg) {
- case 0 ... 31:
- /* 128 bit FP register */
- {
- uint64_t *q = aa64_vfp_qreg(env, reg);
- stq_le_p(buf, q[0]);
- stq_le_p(buf + 8, q[1]);
- return 16;
- }
- case 32:
- /* FPSR */
- stl_p(buf, vfp_get_fpsr(env));
- return 4;
- case 33:
- /* FPCR */
- stl_p(buf, vfp_get_fpcr(env));
- return 4;
- default:
- return 0;
- }
-}
+#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
-static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
-{
- switch (reg) {
- case 0 ... 31:
- /* 128 bit FP register */
- {
- uint64_t *q = aa64_vfp_qreg(env, reg);
- q[0] = ldq_le_p(buf);
- q[1] = ldq_le_p(buf + 8);
- return 16;
- }
- case 32:
- /* FPSR */
- vfp_set_fpsr(env, ldl_p(buf));
- return 4;
- case 33:
- /* FPCR */
- vfp_set_fpcr(env, ldl_p(buf));
- return 4;
- default:
- return 0;
- }
-}
+static void switch_mode(CPUARMState *env, int mode);
static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
@@ -168,8 +46,7 @@ static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
}
}
-static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+void raw_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
assert(ri->fieldoffset);
if (cpreg_field_is_64bit(ri)) {
@@ -201,7 +78,8 @@ uint64_t read_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri)
static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t v)
{
- /* Raw write of a coprocessor register (as needed for migration, etc).
+ /*
+ * Raw write of a coprocessor register (as needed for migration, etc).
* Note that constant registers are treated as write-ignored; the
* caller should check for success by whether a readback gives the
* value written.
@@ -217,32 +95,10 @@ static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
}
}
-static int arm_gdb_get_sysreg(CPUARMState *env, uint8_t *buf, int reg)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
- const ARMCPRegInfo *ri;
- uint32_t key;
-
- key = cpu->dyn_xml.cpregs_keys[reg];
- ri = get_arm_cp_reginfo(cpu->cp_regs, key);
- if (ri) {
- if (cpreg_field_is_64bit(ri)) {
- return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
- } else {
- return gdb_get_reg32(buf, (uint32_t)read_raw_cp_reg(env, ri));
- }
- }
- return 0;
-}
-
-static int arm_gdb_set_sysreg(CPUARMState *env, uint8_t *buf, int reg)
-{
- return 0;
-}
-
static bool raw_accessors_invalid(const ARMCPRegInfo *ri)
{
- /* Return true if the regdef would cause an assertion if you called
+ /*
+ * Return true if the regdef would cause an assertion if you called
* read_raw_cp_reg() or write_raw_cp_reg() on it (ie if it is a
* program bug for it not to have the NO_RAW flag).
* NB that returning false here doesn't necessarily mean that calling
@@ -261,7 +117,7 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri)
return true;
}
-bool write_cpustate_to_list(ARMCPU *cpu)
+bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync)
{
/* Write the coprocessor state from cpu->env to the (index,value) list. */
int i;
@@ -270,6 +126,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
for (i = 0; i < cpu->cpreg_array_len; i++) {
uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
const ARMCPRegInfo *ri;
+ uint64_t newval;
ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
if (!ri) {
@@ -279,7 +136,29 @@ bool write_cpustate_to_list(ARMCPU *cpu)
if (ri->type & ARM_CP_NO_RAW) {
continue;
}
- cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri);
+
+ newval = read_raw_cp_reg(&cpu->env, ri);
+ if (kvm_sync) {
+ /*
+ * Only sync if the previous list->cpustate sync succeeded.
+ * Rather than tracking the success/failure state for every
+ * item in the list, we just recheck "does the raw write we must
+ * have made in write_list_to_cpustate() read back OK" here.
+ */
+ uint64_t oldval = cpu->cpreg_values[i];
+
+ if (oldval == newval) {
+ continue;
+ }
+
+ write_raw_cp_reg(&cpu->env, ri, oldval);
+ if (read_raw_cp_reg(&cpu->env, ri) != oldval) {
+ continue;
+ }
+
+ write_raw_cp_reg(&cpu->env, ri, newval);
+ }
+ cpu->cpreg_values[i] = newval;
}
return ok;
}
@@ -302,7 +181,8 @@ bool write_list_to_cpustate(ARMCPU *cpu)
if (ri->type & ARM_CP_NO_RAW) {
continue;
}
- /* Write value and confirm it reads back as written
+ /*
+ * Write value and confirm it reads back as written
* (to catch read-only registers and partially read-only
* registers where the incoming migration value doesn't match)
*/
@@ -317,13 +197,10 @@ bool write_list_to_cpustate(ARMCPU *cpu)
static void add_cpreg_to_list(gpointer key, gpointer opaque)
{
ARMCPU *cpu = opaque;
- uint64_t regidx;
- const ARMCPRegInfo *ri;
-
- regidx = *(uint32_t *)key;
- ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
+ uint32_t regidx = (uintptr_t)key;
+ const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
- if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) {
+ if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) {
cpu->cpreg_indexes[cpu->cpreg_array_len] = cpreg_to_kvm_id(regidx);
/* The value array need not be initialized at this point */
cpu->cpreg_array_len++;
@@ -333,21 +210,19 @@ static void add_cpreg_to_list(gpointer key, gpointer opaque)
static void count_cpreg(gpointer key, gpointer opaque)
{
ARMCPU *cpu = opaque;
- uint64_t regidx;
const ARMCPRegInfo *ri;
- regidx = *(uint32_t *)key;
- ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
+ ri = g_hash_table_lookup(cpu->cp_regs, key);
- if (!(ri->type & (ARM_CP_NO_RAW|ARM_CP_ALIAS))) {
+ if (!(ri->type & (ARM_CP_NO_RAW | ARM_CP_ALIAS))) {
cpu->cpreg_array_len++;
}
}
static gint cpreg_key_compare(gconstpointer a, gconstpointer b)
{
- uint64_t aidx = cpreg_to_kvm_id(*(uint32_t *)a);
- uint64_t bidx = cpreg_to_kvm_id(*(uint32_t *)b);
+ uint64_t aidx = cpreg_to_kvm_id((uintptr_t)a);
+ uint64_t bidx = cpreg_to_kvm_id((uintptr_t)b);
if (aidx > bidx) {
return 1;
@@ -360,7 +235,8 @@ static gint cpreg_key_compare(gconstpointer a, gconstpointer b)
void init_cpreg_list(ARMCPU *cpu)
{
- /* Initialise the cpreg_tuples[] array based on the cp_regs hash.
+ /*
+ * Initialise the cpreg_tuples[] array based on the cp_regs hash.
* Note that we require cpreg_tuples[] to be sorted by key ID.
*/
GList *keys;
@@ -388,37 +264,34 @@ void init_cpreg_list(ARMCPU *cpu)
g_list_free(keys);
}
+static bool arm_pan_enabled(CPUARMState *env)
+{
+ if (is_a64(env)) {
+ if ((arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1)) {
+ return false;
+ }
+ return env->pstate & PSTATE_PAN;
+ } else {
+ return env->uncached_cpsr & CPSR_PAN;
+ }
+}
+
/*
- * Some registers are not accessible if EL3.NS=0 and EL3 is using AArch32 but
- * they are accessible when EL3 is using AArch64 regardless of EL3.NS.
- *
- * access_el3_aa32ns: Used to check AArch32 register views.
- * access_el3_aa32ns_aa64any: Used to check both AArch32/64 register views.
+ * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0.
*/
static CPAccessResult access_el3_aa32ns(CPUARMState *env,
const ARMCPRegInfo *ri,
bool isread)
{
- bool secure = arm_is_secure_below_el3(env);
-
- assert(!arm_el_is_aa64(env, 3));
- if (secure) {
+ if (!is_a64(env) && arm_current_el(env) == 3 &&
+ arm_is_secure_below_el3(env)) {
return CP_ACCESS_TRAP_UNCATEGORIZED;
}
return CP_ACCESS_OK;
}
-static CPAccessResult access_el3_aa32ns_aa64any(CPUARMState *env,
- const ARMCPRegInfo *ri,
- bool isread)
-{
- if (!arm_el_is_aa64(env, 3)) {
- return access_el3_aa32ns(env, ri, isread);
- }
- return CP_ACCESS_OK;
-}
-
-/* Some secure-only AArch32 registers trap to EL3 if used from
+/*
+ * Some secure-only AArch32 registers trap to EL3 if used from
* Secure EL1 (but are just ordinary UNDEF in other non-EL3 contexts).
* Note that an access from Secure EL1 can only happen if EL3 is AArch64.
* We assume that the .access field is set to PL1_RW.
@@ -431,93 +304,104 @@ static CPAccessResult access_trap_aa32s_el1(CPUARMState *env,
return CP_ACCESS_OK;
}
if (arm_is_secure_below_el3(env)) {
+ if (env->cp15.scr_el3 & SCR_EEL2) {
+ return CP_ACCESS_TRAP_EL2;
+ }
return CP_ACCESS_TRAP_EL3;
}
/* This will be EL1 NS and EL2 NS, which just UNDEF */
return CP_ACCESS_TRAP_UNCATEGORIZED;
}
-/* Check for traps to "powerdown debug" registers, which are controlled
- * by MDCR.TDOSA
+/*
+ * Check for traps to performance monitor registers, which are controlled
+ * by MDCR_EL2.TPM for EL2 and MDCR_EL3.TPM for EL3.
*/
-static CPAccessResult access_tdosa(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
+static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
int el = arm_current_el(env);
- bool mdcr_el2_tdosa = (env->cp15.mdcr_el2 & MDCR_TDOSA) ||
- (env->cp15.mdcr_el2 & MDCR_TDE) ||
- (env->cp15.hcr_el2 & HCR_TGE);
+ uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
- if (el < 2 && mdcr_el2_tdosa && !arm_is_secure_below_el3(env)) {
+ if (el < 2 && (mdcr_el2 & MDCR_TPM)) {
return CP_ACCESS_TRAP_EL2;
}
- if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDOSA)) {
+ if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) {
return CP_ACCESS_TRAP_EL3;
}
return CP_ACCESS_OK;
}
-/* Check for traps to "debug ROM" registers, which are controlled
- * by MDCR_EL2.TDRA for EL2 but by the more general MDCR_EL3.TDA for EL3.
- */
-static CPAccessResult access_tdra(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
+/* Check for traps from EL1 due to HCR_EL2.TVM and HCR_EL2.TRVM. */
+CPAccessResult access_tvm_trvm(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- int el = arm_current_el(env);
- bool mdcr_el2_tdra = (env->cp15.mdcr_el2 & MDCR_TDRA) ||
- (env->cp15.mdcr_el2 & MDCR_TDE) ||
- (env->cp15.hcr_el2 & HCR_TGE);
+ if (arm_current_el(env) == 1) {
+ uint64_t trap = isread ? HCR_TRVM : HCR_TVM;
+ if (arm_hcr_el2_eff(env) & trap) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ return CP_ACCESS_OK;
+}
- if (el < 2 && mdcr_el2_tdra && !arm_is_secure_below_el3(env)) {
+/* Check for traps from EL1 due to HCR_EL2.TSW. */
+static CPAccessResult access_tsw(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TSW)) {
return CP_ACCESS_TRAP_EL2;
}
- if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) {
- return CP_ACCESS_TRAP_EL3;
- }
return CP_ACCESS_OK;
}
-/* Check for traps to general debug registers, which are controlled
- * by MDCR_EL2.TDA for EL2 and MDCR_EL3.TDA for EL3.
- */
-static CPAccessResult access_tda(CPUARMState *env, const ARMCPRegInfo *ri,
+/* Check for traps from EL1 due to HCR_EL2.TACR. */
+static CPAccessResult access_tacr(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
- int el = arm_current_el(env);
- bool mdcr_el2_tda = (env->cp15.mdcr_el2 & MDCR_TDA) ||
- (env->cp15.mdcr_el2 & MDCR_TDE) ||
- (env->cp15.hcr_el2 & HCR_TGE);
-
- if (el < 2 && mdcr_el2_tda && !arm_is_secure_below_el3(env)) {
+ if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TACR)) {
return CP_ACCESS_TRAP_EL2;
}
- if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) {
- return CP_ACCESS_TRAP_EL3;
- }
return CP_ACCESS_OK;
}
-/* Check for traps to performance monitor registers, which are controlled
- * by MDCR_EL2.TPM for EL2 and MDCR_EL3.TPM for EL3.
- */
-static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
+/* Check for traps from EL1 due to HCR_EL2.TTLB. */
+static CPAccessResult access_ttlb(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- int el = arm_current_el(env);
+ if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TTLB)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ return CP_ACCESS_OK;
+}
- if (el < 2 && (env->cp15.mdcr_el2 & MDCR_TPM)
- && !arm_is_secure_below_el3(env)) {
+/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBIS. */
+static CPAccessResult access_ttlbis(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 &&
+ (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBIS))) {
return CP_ACCESS_TRAP_EL2;
}
- if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) {
- return CP_ACCESS_TRAP_EL3;
+ return CP_ACCESS_OK;
+}
+
+#ifdef TARGET_AARCH64
+/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBOS. */
+static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 &&
+ (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBOS))) {
+ return CP_ACCESS_TRAP_EL2;
}
return CP_ACCESS_OK;
}
+#endif
static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
raw_write(env, ri, value);
tlb_flush(CPU(cpu)); /* Flush TLB as domain not tracked in TLB */
@@ -525,10 +409,11 @@ static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
if (raw_read(env, ri) != value) {
- /* Unlike real hardware the qemu TLB uses virtual addresses,
+ /*
+ * Unlike real hardware the qemu TLB uses virtual addresses,
* not modified virtual addresses, so this causes a TLB flush.
*/
tlb_flush(CPU(cpu));
@@ -539,11 +424,12 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_PMSA)
&& !extended_addresses_enabled(env)) {
- /* For VMSA (when not using the LPAE long descriptor page table
+ /*
+ * For VMSA (when not using the LPAE long descriptor page table
* format) this register includes the ASID, so do a TLB flush.
* For PMSA it is purely a process ID and no action is needed.
*/
@@ -552,47 +438,26 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
raw_write(env, ri, value);
}
-static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static int alle1_tlbmask(CPUARMState *env)
{
- /* Invalidate all (TLBIALL) */
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- tlb_flush(CPU(cpu));
-}
-
-static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
-}
-
-static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate by ASID (TLBIASID) */
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- tlb_flush(CPU(cpu));
+ /*
+ * Note that the 'ALL' scope must invalidate both stage 1 and
+ * stage 2 translations, whereas most other scopes only invalidate
+ * stage 1 translations.
+ */
+ return (ARMMMUIdxBit_E10_1 |
+ ARMMMUIdxBit_E10_1_PAN |
+ ARMMMUIdxBit_E10_0 |
+ ARMMMUIdxBit_Stage2 |
+ ARMMMUIdxBit_Stage2_S);
}
-static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
-}
/* IS variants of TLB operations must affect all cores */
static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
tlb_flush_all_cpus_synced(cs);
}
@@ -600,7 +465,7 @@ static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
tlb_flush_all_cpus_synced(cs);
}
@@ -608,7 +473,7 @@ static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
}
@@ -616,107 +481,148 @@ static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
}
-static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+/*
+ * Non-IS variants of TLB operations are upgraded to
+ * IS versions if we are at EL1 and HCR_EL2.FB is effectively set to
+ * force broadcast of these operations.
+ */
+static bool tlb_force_broadcast(CPUARMState *env)
{
- CPUState *cs = ENV_GET_CPU(env);
+ return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB);
+}
+
+static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate all (TLBIALL) */
+ CPUState *cs = env_cpu(env);
- tlb_flush_by_mmuidx(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0 |
- ARMMMUIdxBit_S2NS);
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_all_cpus_synced(cs);
+ } else {
+ tlb_flush(cs);
+ }
}
-static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
+ CPUState *cs = env_cpu(env);
- tlb_flush_by_mmuidx_all_cpus_synced(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0 |
- ARMMMUIdxBit_S2NS);
+ value &= TARGET_PAGE_MASK;
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_page_all_cpus_synced(cs, value);
+ } else {
+ tlb_flush_page(cs, value);
+ }
}
-static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- /* Invalidate by IPA. This has to invalidate any structures that
- * contain only stage 2 translation information, but does not need
- * to apply to structures that contain combined stage 1 and stage 2
- * translation information.
- * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
- */
- CPUState *cs = ENV_GET_CPU(env);
- uint64_t pageaddr;
+ /* Invalidate by ASID (TLBIASID) */
+ CPUState *cs = env_cpu(env);
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
- return;
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_all_cpus_synced(cs);
+ } else {
+ tlb_flush(cs);
}
+}
- pageaddr = sextract64(value << 12, 0, 40);
+static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
+ CPUState *cs = env_cpu(env);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_S2NS);
+ value &= TARGET_PAGE_MASK;
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_page_all_cpus_synced(cs, value);
+ } else {
+ tlb_flush_page(cs, value);
+ }
}
-static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
- uint64_t pageaddr;
+ CPUState *cs = env_cpu(env);
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
- return;
- }
+ tlb_flush_by_mmuidx(cs, alle1_tlbmask(env));
+}
- pageaddr = sextract64(value << 12, 0, 40);
+static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_S2NS);
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, alle1_tlbmask(env));
}
+
static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
- tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_S1E2);
+ tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E2);
}
static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
- tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E2);
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E2);
}
static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_S1E2);
+ tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E2);
}
static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_S1E2);
+ ARMMMUIdxBit_E2);
+}
+
+static void tlbiipas2_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12;
+
+ tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_Stage2);
+}
+
+static void tlbiipas2is_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ uint64_t pageaddr = (value & MAKE_64BIT_MASK(0, 28)) << 12;
+
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, ARMMMUIdxBit_Stage2);
}
static const ARMCPRegInfo cp_reginfo[] = {
- /* Define the secure and non-secure FCSE identifier CP registers
+ /*
+ * Define the secure and non-secure FCSE identifier CP registers
* separately because there is no secure bank in V8 (no _EL3). This allows
* the secure register to be properly reset and migrated. There is also no
* v8 EL1 version of the register so the non-secure instance stands alone.
@@ -731,7 +637,8 @@ static const ARMCPRegInfo cp_reginfo[] = {
.access = PL1_RW, .secure = ARM_CP_SECSTATE_S,
.fieldoffset = offsetof(CPUARMState, cp15.fcseidr_s),
.resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
- /* Define the secure and non-secure context identifier CP registers
+ /*
+ * Define the secure and non-secure context identifier CP registers
* separately because there is no secure bank in V8 (no _EL3). This allows
* the secure register to be properly reset and migrated. In the
* non-secure case, the 32-bit register will have reset and migration
@@ -739,29 +646,34 @@ static const ARMCPRegInfo cp_reginfo[] = {
*/
{ .name = "CONTEXTIDR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1,
- .access = PL1_RW, .secure = ARM_CP_SECSTATE_NS,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_CONTEXTIDR_EL1,
+ .nv2_redirect_offset = 0x108 | NV2_REDIR_NV1,
+ .secure = ARM_CP_SECSTATE_NS,
.fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[1]),
.resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
{ .name = "CONTEXTIDR_S", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1,
- .access = PL1_RW, .secure = ARM_CP_SECSTATE_S,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .secure = ARM_CP_SECSTATE_S,
.fieldoffset = offsetof(CPUARMState, cp15.contextidr_s),
.resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo not_v8_cp_reginfo[] = {
- /* NB: Some of these registers exist in v8 but with more precise
+ /*
+ * NB: Some of these registers exist in v8 but with more precise
* definitions that don't use CP_ANY wildcards (mostly in v8_cp_reginfo[]).
*/
/* MMU Domain access control / MPU write buffer control */
{ .name = "DACR",
.cp = 15, .opc1 = CP_ANY, .crn = 3, .crm = CP_ANY, .opc2 = CP_ANY,
- .access = PL1_RW, .resetvalue = 0,
+ .access = PL1_RW, .accessfn = access_tvm_trvm, .resetvalue = 0,
.writefn = dacr_write, .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s),
offsetoflow32(CPUARMState, cp15.dacr_ns) } },
- /* ARMv7 allocates a range of implementation defined TLB LOCKDOWN regs.
+ /*
+ * ARMv7 allocates a range of implementation defined TLB LOCKDOWN regs.
* For v6 and v5, these mappings are overly broad.
*/
{ .name = "TLB_LOCKDOWN", .cp = 15, .crn = 10, .crm = 0,
@@ -776,25 +688,26 @@ static const ARMCPRegInfo not_v8_cp_reginfo[] = {
{ .name = "CACHEMAINT", .cp = 15, .crn = 7, .crm = CP_ANY,
.opc1 = 0, .opc2 = CP_ANY, .access = PL1_W,
.type = ARM_CP_NOP | ARM_CP_OVERRIDE },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo not_v6_cp_reginfo[] = {
- /* Not all pre-v6 cores implemented this WFI, so this is slightly
+ /*
+ * Not all pre-v6 cores implemented this WFI, so this is slightly
* over-broad.
*/
{ .name = "WFI_v5", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = 2,
.access = PL1_W, .type = ARM_CP_WFI },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo not_v7_cp_reginfo[] = {
- /* Standard v6 WFI (also used in some pre-v6 cores); not in v7 (which
+ /*
+ * Standard v6 WFI (also used in some pre-v6 cores); not in v7 (which
* is UNPREDICTABLE; we choose to NOP as most implementations do).
*/
{ .name = "WFI_v6", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4,
.access = PL1_W, .type = ARM_CP_WFI },
- /* L1 cache lockdown. Not architectural in v6 and earlier but in practice
+ /*
+ * L1 cache lockdown. Not architectural in v6 and earlier but in practice
* implemented in 926, 946, 1026, 1136, 1176 and 11MPCore. StrongARM and
* OMAPCP will override this space.
*/
@@ -808,14 +721,16 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = {
{ .name = "DUMMY", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = CP_ANY,
.access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW,
.resetvalue = 0 },
- /* We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR;
+ /*
+ * We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR;
* implementing it as RAZ means the "debug architecture version" bits
* will read as a reserved value, which should cause Linux to not try
* to use the debug hardware.
*/
{ .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
- /* MMU TLB control. Note that the wildcarding means we cover not just
+ /*
+ * MMU TLB control. Note that the wildcarding means we cover not just
* the unified TLB ops but also the dside/iside/inner-shareable variants.
*/
{ .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY,
@@ -834,7 +749,6 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = {
.opc1 = 0, .opc2 = 0, .access = PL1_RW, .type = ARM_CP_NOP },
{ .name = "NMRR", .cp = 15, .crn = 10, .crm = 2,
.opc1 = 0, .opc2 = 1, .access = PL1_RW, .type = ARM_CP_NOP },
- REGINFO_SENTINEL
};
static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -844,36 +758,68 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
/* In ARMv8 most bits of CPACR_EL1 are RES0. */
if (!arm_feature(env, ARM_FEATURE_V8)) {
- /* ARMv7 defines bits for unimplemented coprocessors as RAZ/WI.
+ /*
+ * ARMv7 defines bits for unimplemented coprocessors as RAZ/WI.
* ASEDIS [31] and D32DIS [30] are both UNK/SBZP without VFP.
* TRCDIS [28] is RAZ/WI since we do not implement a trace macrocell.
*/
- if (arm_feature(env, ARM_FEATURE_VFP)) {
+ if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
/* VFP coprocessor: cp10 & cp11 [23:20] */
- mask |= (1 << 31) | (1 << 30) | (0xf << 20);
+ mask |= R_CPACR_ASEDIS_MASK |
+ R_CPACR_D32DIS_MASK |
+ R_CPACR_CP11_MASK |
+ R_CPACR_CP10_MASK;
if (!arm_feature(env, ARM_FEATURE_NEON)) {
/* ASEDIS [31] bit is RAO/WI */
- value |= (1 << 31);
+ value |= R_CPACR_ASEDIS_MASK;
}
- /* VFPv3 and upwards with NEON implement 32 double precision
+ /*
+ * VFPv3 and upwards with NEON implement 32 double precision
* registers (D0-D31).
*/
- if (!arm_feature(env, ARM_FEATURE_NEON) ||
- !arm_feature(env, ARM_FEATURE_VFP3)) {
+ if (!cpu_isar_feature(aa32_simd_r32, env_archcpu(env))) {
/* D32DIS [30] is RAO/WI if D16-31 are not implemented. */
- value |= (1 << 30);
+ value |= R_CPACR_D32DIS_MASK;
}
}
value &= mask;
}
+
+ /*
+ * For A-profile AArch32 EL3 (but not M-profile secure mode), if NSACR.CP10
+ * is 0 then CPACR.{CP11,CP10} ignore writes and read as 0b00.
+ */
+ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+ !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
+ mask = R_CPACR_CP11_MASK | R_CPACR_CP10_MASK;
+ value = (value & ~mask) | (env->cp15.cpacr_el1 & mask);
+ }
+
env->cp15.cpacr_el1 = value;
}
+static uint64_t cpacr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /*
+ * For A-profile AArch32 EL3 (but not M-profile secure mode), if NSACR.CP10
+ * is 0 then CPACR.{CP11,CP10} ignore writes and read as 0b00.
+ */
+ uint64_t value = env->cp15.cpacr_el1;
+
+ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+ !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
+ value = ~(R_CPACR_CP11_MASK | R_CPACR_CP10_MASK);
+ }
+ return value;
+}
+
+
static void cpacr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
- /* Call cpacr_write() so that we reset with the correct RAO bits set
+ /*
+ * Call cpacr_write() so that we reset with the correct RAO bits set
* for our CPU features.
*/
cpacr_write(env, ri, 0);
@@ -884,12 +830,12 @@ static CPAccessResult cpacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
{
if (arm_feature(env, ARM_FEATURE_V8)) {
/* Check if CPACR accesses are to be trapped to EL2 */
- if (arm_current_el(env) == 1 &&
- (env->cp15.cptr_el[2] & CPTR_TCPAC) && !arm_is_secure(env)) {
+ if (arm_current_el(env) == 1 && arm_is_el2_enabled(env) &&
+ FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TCPAC)) {
return CP_ACCESS_TRAP_EL2;
/* Check if CPACR accesses are to be trapped to EL3 */
} else if (arm_current_el(env) < 3 &&
- (env->cp15.cptr_el[3] & CPTR_TCPAC)) {
+ FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TCPAC)) {
return CP_ACCESS_TRAP_EL3;
}
}
@@ -901,7 +847,8 @@ static CPAccessResult cptr_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
/* Check if CPTR accesses are set to trap to EL3 */
- if (arm_current_el(env) == 2 && (env->cp15.cptr_el[3] & CPTR_TCPAC)) {
+ if (arm_current_el(env) == 2 &&
+ FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TCPAC)) {
return CP_ACCESS_TRAP_EL3;
}
@@ -913,7 +860,8 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
{ .name = "MVA_prefetch",
.cp = 15, .crn = 7, .crm = 13, .opc1 = 0, .opc2 = 1,
.access = PL1_W, .type = ARM_CP_NOP },
- /* We need to break the TB after ISB to execute self-modifying code
+ /*
+ * We need to break the TB after ISB to execute self-modifying code
* correctly and also to take any pending interrupts immediately.
* So use arm_cp_write_ignore() function instead of ARM_CP_NOP flag.
*/
@@ -924,54 +872,233 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
{ .name = "DMB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 5,
.access = PL0_W, .type = ARM_CP_NOP },
{ .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 2,
- .access = PL1_RW,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ifar_s),
offsetof(CPUARMState, cp15.ifar_ns) },
.resetvalue = 0, },
- /* Watchpoint Fault Address Register : should actually only be present
+ /*
+ * Watchpoint Fault Address Register : should actually only be present
* for 1136, 1176, 11MPCore.
*/
{ .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
.access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, },
{ .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
.crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access,
+ .fgt = FGT_CPACR_EL1,
+ .nv2_redirect_offset = 0x100 | NV2_REDIR_NV1,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1),
- .resetfn = cpacr_reset, .writefn = cpacr_write },
- REGINFO_SENTINEL
+ .resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read },
+};
+
+typedef struct pm_event {
+ uint16_t number; /* PMEVTYPER.evtCount is 16 bits wide */
+ /* If the event is supported on this CPU (used to generate PMCEID[01]) */
+ bool (*supported)(CPUARMState *);
+ /*
+ * Retrieve the current count of the underlying event. The programmed
+ * counters hold a difference from the return value from this function
+ */
+ uint64_t (*get_count)(CPUARMState *);
+ /*
+ * Return how many nanoseconds it will take (at a minimum) for count events
+ * to occur. A negative value indicates the counter will never overflow, or
+ * that the counter has otherwise arranged for the overflow bit to be set
+ * and the PMU interrupt to be raised on overflow.
+ */
+ int64_t (*ns_per_count)(uint64_t);
+} pm_event;
+
+static bool event_always_supported(CPUARMState *env)
+{
+ return true;
+}
+
+static uint64_t swinc_get_count(CPUARMState *env)
+{
+ /*
+ * SW_INCR events are written directly to the pmevcntr's by writes to
+ * PMSWINC, so there is no underlying count maintained by the PMU itself
+ */
+ return 0;
+}
+
+static int64_t swinc_ns_per(uint64_t ignored)
+{
+ return -1;
+}
+
+/*
+ * Return the underlying cycle count for the PMU cycle counters. If we're in
+ * usermode, simply return 0.
+ */
+static uint64_t cycles_get_count(CPUARMState *env)
+{
+#ifndef CONFIG_USER_ONLY
+ return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
+#else
+ return cpu_get_host_ticks();
+#endif
+}
+
+#ifndef CONFIG_USER_ONLY
+static int64_t cycles_ns_per(uint64_t cycles)
+{
+ return (ARM_CPU_FREQ / NANOSECONDS_PER_SECOND) * cycles;
+}
+
+static bool instructions_supported(CPUARMState *env)
+{
+ /* Precise instruction counting */
+ return icount_enabled() == ICOUNT_PRECISE;
+}
+
+static uint64_t instructions_get_count(CPUARMState *env)
+{
+ assert(icount_enabled() == ICOUNT_PRECISE);
+ return (uint64_t)icount_get_raw();
+}
+
+static int64_t instructions_ns_per(uint64_t icount)
+{
+ assert(icount_enabled() == ICOUNT_PRECISE);
+ return icount_to_ns((int64_t)icount);
+}
+#endif
+
+static bool pmuv3p1_events_supported(CPUARMState *env)
+{
+ /* For events which are supported in any v8.1 PMU */
+ return cpu_isar_feature(any_pmuv3p1, env_archcpu(env));
+}
+
+static bool pmuv3p4_events_supported(CPUARMState *env)
+{
+ /* For events which are supported in any v8.1 PMU */
+ return cpu_isar_feature(any_pmuv3p4, env_archcpu(env));
+}
+
+static uint64_t zero_event_get_count(CPUARMState *env)
+{
+ /* For events which on QEMU never fire, so their count is always zero */
+ return 0;
+}
+
+static int64_t zero_event_ns_per(uint64_t cycles)
+{
+ /* An event which never fires can never overflow */
+ return -1;
+}
+
+static const pm_event pm_events[] = {
+ { .number = 0x000, /* SW_INCR */
+ .supported = event_always_supported,
+ .get_count = swinc_get_count,
+ .ns_per_count = swinc_ns_per,
+ },
+#ifndef CONFIG_USER_ONLY
+ { .number = 0x008, /* INST_RETIRED, Instruction architecturally executed */
+ .supported = instructions_supported,
+ .get_count = instructions_get_count,
+ .ns_per_count = instructions_ns_per,
+ },
+ { .number = 0x011, /* CPU_CYCLES, Cycle */
+ .supported = event_always_supported,
+ .get_count = cycles_get_count,
+ .ns_per_count = cycles_ns_per,
+ },
+#endif
+ { .number = 0x023, /* STALL_FRONTEND */
+ .supported = pmuv3p1_events_supported,
+ .get_count = zero_event_get_count,
+ .ns_per_count = zero_event_ns_per,
+ },
+ { .number = 0x024, /* STALL_BACKEND */
+ .supported = pmuv3p1_events_supported,
+ .get_count = zero_event_get_count,
+ .ns_per_count = zero_event_ns_per,
+ },
+ { .number = 0x03c, /* STALL */
+ .supported = pmuv3p4_events_supported,
+ .get_count = zero_event_get_count,
+ .ns_per_count = zero_event_ns_per,
+ },
};
-/* Definitions for the PMU registers */
-#define PMCRN_MASK 0xf800
-#define PMCRN_SHIFT 11
-#define PMCRD 0x8
-#define PMCRC 0x4
-#define PMCRE 0x1
+/*
+ * Note: Before increasing MAX_EVENT_ID beyond 0x3f into the 0x40xx range of
+ * events (i.e. the statistical profiling extension), this implementation
+ * should first be updated to something sparse instead of the current
+ * supported_event_map[] array.
+ */
+#define MAX_EVENT_ID 0x3c
+#define UNSUPPORTED_EVENT UINT16_MAX
+static uint16_t supported_event_map[MAX_EVENT_ID + 1];
-static inline uint32_t pmu_num_counters(CPUARMState *env)
+/*
+ * Called upon CPU initialization to initialize PMCEID[01]_EL0 and build a map
+ * of ARM event numbers to indices in our pm_events array.
+ *
+ * Note: Events in the 0x40XX range are not currently supported.
+ */
+void pmu_init(ARMCPU *cpu)
{
- return (env->cp15.c9_pmcr & PMCRN_MASK) >> PMCRN_SHIFT;
+ unsigned int i;
+
+ /*
+ * Empty supported_event_map and cpu->pmceid[01] before adding supported
+ * events to them
+ */
+ for (i = 0; i < ARRAY_SIZE(supported_event_map); i++) {
+ supported_event_map[i] = UNSUPPORTED_EVENT;
+ }
+ cpu->pmceid0 = 0;
+ cpu->pmceid1 = 0;
+
+ for (i = 0; i < ARRAY_SIZE(pm_events); i++) {
+ const pm_event *cnt = &pm_events[i];
+ assert(cnt->number <= MAX_EVENT_ID);
+ /* We do not currently support events in the 0x40xx range */
+ assert(cnt->number <= 0x3f);
+
+ if (cnt->supported(&cpu->env)) {
+ supported_event_map[cnt->number] = i;
+ uint64_t event_mask = 1ULL << (cnt->number & 0x1f);
+ if (cnt->number & 0x20) {
+ cpu->pmceid1 |= event_mask;
+ } else {
+ cpu->pmceid0 |= event_mask;
+ }
+ }
+ }
}
-/* Bits allowed to be set/cleared for PMCNTEN* and PMINTEN* */
-static inline uint64_t pmu_counter_mask(CPUARMState *env)
+/*
+ * Check at runtime whether a PMU event is supported for the current machine
+ */
+static bool event_supported(uint16_t number)
{
- return (1 << 31) | ((1 << pmu_num_counters(env)) - 1);
+ if (number > MAX_EVENT_ID) {
+ return false;
+ }
+ return supported_event_map[number] != UNSUPPORTED_EVENT;
}
static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
- /* Performance monitor registers user accessibility is controlled
+ /*
+ * Performance monitor registers user accessibility is controlled
* by PMUSERENR. MDCR_EL2.TPM and MDCR_EL3.TPM allow configurable
* trapping to EL2 or EL3 for other accesses.
*/
int el = arm_current_el(env);
+ uint64_t mdcr_el2 = arm_mdcr_el2_eff(env);
if (el == 0 && !(env->cp15.c9_pmuserenr & 1)) {
return CP_ACCESS_TRAP;
}
- if (el < 2 && (env->cp15.mdcr_el2 & MDCR_TPM)
- && !arm_is_secure_below_el3(env)) {
+ if (el < 2 && (mdcr_el2 & MDCR_TPM)) {
return CP_ACCESS_TRAP_EL2;
}
if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TPM)) {
@@ -1011,8 +1138,6 @@ static CPAccessResult pmreg_access_swinc(CPUARMState *env,
return pmreg_access(env, ri, isread);
}
-#ifndef CONFIG_USER_ONLY
-
static CPAccessResult pmreg_access_selr(CPUARMState *env,
const ARMCPRegInfo *ri,
bool isread)
@@ -1042,74 +1167,406 @@ static CPAccessResult pmreg_access_ccntr(CPUARMState *env,
return pmreg_access(env, ri, isread);
}
-static inline bool arm_ccnt_enabled(CPUARMState *env)
+/*
+ * Bits in MDCR_EL2 and MDCR_EL3 which pmu_counter_enabled() looks at.
+ * We use these to decide whether we need to wrap a write to MDCR_EL2
+ * or MDCR_EL3 in pmu_op_start()/pmu_op_finish() calls.
+ */
+#define MDCR_EL2_PMU_ENABLE_BITS \
+ (MDCR_HPME | MDCR_HPMD | MDCR_HPMN | MDCR_HCCD | MDCR_HLP)
+#define MDCR_EL3_PMU_ENABLE_BITS (MDCR_SPME | MDCR_SCCD)
+
+/*
+ * Returns true if the counter (pass 31 for PMCCNTR) should count events using
+ * the current EL, security state, and register configuration.
+ */
+static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter)
{
- /* This does not support checking PMCCFILTR_EL0 register */
+ uint64_t filter;
+ bool e, p, u, nsk, nsu, nsh, m;
+ bool enabled, prohibited = false, filtered;
+ bool secure = arm_is_secure(env);
+ int el = arm_current_el(env);
+ uint64_t mdcr_el2;
+ uint8_t hpmn;
- if (!(env->cp15.c9_pmcr & PMCRE) || !(env->cp15.c9_pmcnten & (1 << 31))) {
+ /*
+ * We might be called for M-profile cores where MDCR_EL2 doesn't
+ * exist and arm_mdcr_el2_eff() will assert, so this early-exit check
+ * must be before we read that value.
+ */
+ if (!arm_feature(env, ARM_FEATURE_PMU)) {
return false;
}
- return true;
+ mdcr_el2 = arm_mdcr_el2_eff(env);
+ hpmn = mdcr_el2 & MDCR_HPMN;
+
+ if (!arm_feature(env, ARM_FEATURE_EL2) ||
+ (counter < hpmn || counter == 31)) {
+ e = env->cp15.c9_pmcr & PMCRE;
+ } else {
+ e = mdcr_el2 & MDCR_HPME;
+ }
+ enabled = e && (env->cp15.c9_pmcnten & (1 << counter));
+
+ /* Is event counting prohibited? */
+ if (el == 2 && (counter < hpmn || counter == 31)) {
+ prohibited = mdcr_el2 & MDCR_HPMD;
+ }
+ if (secure) {
+ prohibited = prohibited || !(env->cp15.mdcr_el3 & MDCR_SPME);
+ }
+
+ if (counter == 31) {
+ /*
+ * The cycle counter defaults to running. PMCR.DP says "disable
+ * the cycle counter when event counting is prohibited".
+ * Some MDCR bits disable the cycle counter specifically.
+ */
+ prohibited = prohibited && env->cp15.c9_pmcr & PMCRDP;
+ if (cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) {
+ if (secure) {
+ prohibited = prohibited || (env->cp15.mdcr_el3 & MDCR_SCCD);
+ }
+ if (el == 2) {
+ prohibited = prohibited || (mdcr_el2 & MDCR_HCCD);
+ }
+ }
+ }
+
+ if (counter == 31) {
+ filter = env->cp15.pmccfiltr_el0;
+ } else {
+ filter = env->cp15.c14_pmevtyper[counter];
+ }
+
+ p = filter & PMXEVTYPER_P;
+ u = filter & PMXEVTYPER_U;
+ nsk = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSK);
+ nsu = arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_NSU);
+ nsh = arm_feature(env, ARM_FEATURE_EL2) && (filter & PMXEVTYPER_NSH);
+ m = arm_el_is_aa64(env, 1) &&
+ arm_feature(env, ARM_FEATURE_EL3) && (filter & PMXEVTYPER_M);
+
+ if (el == 0) {
+ filtered = secure ? u : u != nsu;
+ } else if (el == 1) {
+ filtered = secure ? p : p != nsk;
+ } else if (el == 2) {
+ filtered = !nsh;
+ } else { /* EL3 */
+ filtered = m != p;
+ }
+
+ if (counter != 31) {
+ /*
+ * If not checking PMCCNTR, ensure the counter is setup to an event we
+ * support
+ */
+ uint16_t event = filter & PMXEVTYPER_EVTCOUNT;
+ if (!event_supported(event)) {
+ return false;
+ }
+ }
+
+ return enabled && !prohibited && !filtered;
}
-void pmccntr_sync(CPUARMState *env)
+static void pmu_update_irq(CPUARMState *env)
{
- uint64_t temp_ticks;
+ ARMCPU *cpu = env_archcpu(env);
+ qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) &&
+ (env->cp15.c9_pminten & env->cp15.c9_pmovsr));
+}
- temp_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
- ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
+static bool pmccntr_clockdiv_enabled(CPUARMState *env)
+{
+ /*
+ * Return true if the clock divider is enabled and the cycle counter
+ * is supposed to tick only once every 64 clock cycles. This is
+ * controlled by PMCR.D, but if PMCR.LC is set to enable the long
+ * (64-bit) cycle counter PMCR.D has no effect.
+ */
+ return (env->cp15.c9_pmcr & (PMCRD | PMCRLC)) == PMCRD;
+}
- if (env->cp15.c9_pmcr & PMCRD) {
- /* Increment once every 64 processor clock cycles */
- temp_ticks /= 64;
+static bool pmevcntr_is_64_bit(CPUARMState *env, int counter)
+{
+ /* Return true if the specified event counter is configured to be 64 bit */
+
+ /* This isn't intended to be used with the cycle counter */
+ assert(counter < 31);
+
+ if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) {
+ return false;
}
- if (arm_ccnt_enabled(env)) {
- env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
+ if (arm_feature(env, ARM_FEATURE_EL2)) {
+ /*
+ * MDCR_EL2.HLP still applies even when EL2 is disabled in the
+ * current security state, so we don't use arm_mdcr_el2_eff() here.
+ */
+ bool hlp = env->cp15.mdcr_el2 & MDCR_HLP;
+ int hpmn = env->cp15.mdcr_el2 & MDCR_HPMN;
+
+ if (counter >= hpmn) {
+ return hlp;
+ }
}
+ return env->cp15.c9_pmcr & PMCRLP;
+}
+
+/*
+ * Ensure c15_ccnt is the guest-visible count so that operations such as
+ * enabling/disabling the counter or filtering, modifying the count itself,
+ * etc. can be done logically. This is essentially a no-op if the counter is
+ * not enabled at the time of the call.
+ */
+static void pmccntr_op_start(CPUARMState *env)
+{
+ uint64_t cycles = cycles_get_count(env);
+
+ if (pmu_counter_enabled(env, 31)) {
+ uint64_t eff_cycles = cycles;
+ if (pmccntr_clockdiv_enabled(env)) {
+ eff_cycles /= 64;
+ }
+
+ uint64_t new_pmccntr = eff_cycles - env->cp15.c15_ccnt_delta;
+
+ uint64_t overflow_mask = env->cp15.c9_pmcr & PMCRLC ? \
+ 1ull << 63 : 1ull << 31;
+ if (env->cp15.c15_ccnt & ~new_pmccntr & overflow_mask) {
+ env->cp15.c9_pmovsr |= (1ULL << 31);
+ pmu_update_irq(env);
+ }
+
+ env->cp15.c15_ccnt = new_pmccntr;
+ }
+ env->cp15.c15_ccnt_delta = cycles;
+}
+
+/*
+ * If PMCCNTR is enabled, recalculate the delta between the clock and the
+ * guest-visible count. A call to pmccntr_op_finish should follow every call to
+ * pmccntr_op_start.
+ */
+static void pmccntr_op_finish(CPUARMState *env)
+{
+ if (pmu_counter_enabled(env, 31)) {
+#ifndef CONFIG_USER_ONLY
+ /* Calculate when the counter will next overflow */
+ uint64_t remaining_cycles = -env->cp15.c15_ccnt;
+ if (!(env->cp15.c9_pmcr & PMCRLC)) {
+ remaining_cycles = (uint32_t)remaining_cycles;
+ }
+ int64_t overflow_in = cycles_ns_per(remaining_cycles);
+
+ if (overflow_in > 0) {
+ int64_t overflow_at;
+
+ if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ overflow_in, &overflow_at)) {
+ ARMCPU *cpu = env_archcpu(env);
+ timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at);
+ }
+ }
+#endif
+
+ uint64_t prev_cycles = env->cp15.c15_ccnt_delta;
+ if (pmccntr_clockdiv_enabled(env)) {
+ prev_cycles /= 64;
+ }
+ env->cp15.c15_ccnt_delta = prev_cycles - env->cp15.c15_ccnt;
+ }
+}
+
+static void pmevcntr_op_start(CPUARMState *env, uint8_t counter)
+{
+
+ uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT;
+ uint64_t count = 0;
+ if (event_supported(event)) {
+ uint16_t event_idx = supported_event_map[event];
+ count = pm_events[event_idx].get_count(env);
+ }
+
+ if (pmu_counter_enabled(env, counter)) {
+ uint64_t new_pmevcntr = count - env->cp15.c14_pmevcntr_delta[counter];
+ uint64_t overflow_mask = pmevcntr_is_64_bit(env, counter) ?
+ 1ULL << 63 : 1ULL << 31;
+
+ if (env->cp15.c14_pmevcntr[counter] & ~new_pmevcntr & overflow_mask) {
+ env->cp15.c9_pmovsr |= (1 << counter);
+ pmu_update_irq(env);
+ }
+ env->cp15.c14_pmevcntr[counter] = new_pmevcntr;
+ }
+ env->cp15.c14_pmevcntr_delta[counter] = count;
+}
+
+static void pmevcntr_op_finish(CPUARMState *env, uint8_t counter)
+{
+ if (pmu_counter_enabled(env, counter)) {
+#ifndef CONFIG_USER_ONLY
+ uint16_t event = env->cp15.c14_pmevtyper[counter] & PMXEVTYPER_EVTCOUNT;
+ uint16_t event_idx = supported_event_map[event];
+ uint64_t delta = -(env->cp15.c14_pmevcntr[counter] + 1);
+ int64_t overflow_in;
+
+ if (!pmevcntr_is_64_bit(env, counter)) {
+ delta = (uint32_t)delta;
+ }
+ overflow_in = pm_events[event_idx].ns_per_count(delta);
+
+ if (overflow_in > 0) {
+ int64_t overflow_at;
+
+ if (!sadd64_overflow(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ overflow_in, &overflow_at)) {
+ ARMCPU *cpu = env_archcpu(env);
+ timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at);
+ }
+ }
+#endif
+
+ env->cp15.c14_pmevcntr_delta[counter] -=
+ env->cp15.c14_pmevcntr[counter];
+ }
+}
+
+void pmu_op_start(CPUARMState *env)
+{
+ unsigned int i;
+ pmccntr_op_start(env);
+ for (i = 0; i < pmu_num_counters(env); i++) {
+ pmevcntr_op_start(env, i);
+ }
+}
+
+void pmu_op_finish(CPUARMState *env)
+{
+ unsigned int i;
+ pmccntr_op_finish(env);
+ for (i = 0; i < pmu_num_counters(env); i++) {
+ pmevcntr_op_finish(env, i);
+ }
+}
+
+void pmu_pre_el_change(ARMCPU *cpu, void *ignored)
+{
+ pmu_op_start(&cpu->env);
+}
+
+void pmu_post_el_change(ARMCPU *cpu, void *ignored)
+{
+ pmu_op_finish(&cpu->env);
+}
+
+void arm_pmu_timer_cb(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+
+ /*
+ * Update all the counter values based on the current underlying counts,
+ * triggering interrupts to be raised, if necessary. pmu_op_finish() also
+ * has the effect of setting the cpu->pmu_timer to the next earliest time a
+ * counter may expire.
+ */
+ pmu_op_start(&cpu->env);
+ pmu_op_finish(&cpu->env);
}
static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- pmccntr_sync(env);
+ pmu_op_start(env);
if (value & PMCRC) {
/* The counter has been reset */
env->cp15.c15_ccnt = 0;
}
- /* only the DP, X, D and E bits are writable */
- env->cp15.c9_pmcr &= ~0x39;
- env->cp15.c9_pmcr |= (value & 0x39);
+ if (value & PMCRP) {
+ unsigned int i;
+ for (i = 0; i < pmu_num_counters(env); i++) {
+ env->cp15.c14_pmevcntr[i] = 0;
+ }
+ }
- pmccntr_sync(env);
+ env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK;
+ env->cp15.c9_pmcr |= (value & PMCR_WRITABLE_MASK);
+
+ pmu_op_finish(env);
}
-static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+static uint64_t pmcr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- uint64_t total_ticks;
+ uint64_t pmcr = env->cp15.c9_pmcr;
- if (!arm_ccnt_enabled(env)) {
- /* Counter is disabled, do not change value */
- return env->cp15.c15_ccnt;
+ /*
+ * If EL2 is implemented and enabled for the current security state, reads
+ * of PMCR.N from EL1 or EL0 return the value of MDCR_EL2.HPMN or HDCR.HPMN.
+ */
+ if (arm_current_el(env) <= 1 && arm_is_el2_enabled(env)) {
+ pmcr &= ~PMCRN_MASK;
+ pmcr |= (env->cp15.mdcr_el2 & MDCR_HPMN) << PMCRN_SHIFT;
}
- total_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
- ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
+ return pmcr;
+}
- if (env->cp15.c9_pmcr & PMCRD) {
- /* Increment once every 64 processor clock cycles */
- total_ticks /= 64;
+static void pmswinc_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ unsigned int i;
+ uint64_t overflow_mask, new_pmswinc;
+
+ for (i = 0; i < pmu_num_counters(env); i++) {
+ /* Increment a counter's count iff: */
+ if ((value & (1 << i)) && /* counter's bit is set */
+ /* counter is enabled and not filtered */
+ pmu_counter_enabled(env, i) &&
+ /* counter is SW_INCR */
+ (env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) {
+ pmevcntr_op_start(env, i);
+
+ /*
+ * Detect if this write causes an overflow since we can't predict
+ * PMSWINC overflows like we can for other events
+ */
+ new_pmswinc = env->cp15.c14_pmevcntr[i] + 1;
+
+ overflow_mask = pmevcntr_is_64_bit(env, i) ?
+ 1ULL << 63 : 1ULL << 31;
+
+ if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & overflow_mask) {
+ env->cp15.c9_pmovsr |= (1 << i);
+ pmu_update_irq(env);
+ }
+
+ env->cp15.c14_pmevcntr[i] = new_pmswinc;
+
+ pmevcntr_op_finish(env, i);
+ }
}
- return total_ticks - env->cp15.c15_ccnt;
+}
+
+static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ uint64_t ret;
+ pmccntr_op_start(env);
+ ret = env->cp15.c15_ccnt;
+ pmccntr_op_finish(env);
+ return ret;
}
static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* The value of PMSELR.SEL affects the behavior of PMXEVTYPER and
+ /*
+ * The value of PMSELR.SEL affects the behavior of PMXEVTYPER and
* PMXEVCNTR. We allow [0..31] to be written to PMSELR here; in the
* meanwhile, we check PMSELR.SEL when PMXEVTYPER and PMXEVCNTR are
* accessed.
@@ -1120,22 +1577,9 @@ static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- uint64_t total_ticks;
-
- if (!arm_ccnt_enabled(env)) {
- /* Counter is disabled, set the absolute value */
- env->cp15.c15_ccnt = value;
- return;
- }
-
- total_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
- ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
-
- if (env->cp15.c9_pmcr & PMCRD) {
- /* Increment once every 64 processor clock cycles */
- total_ticks /= 64;
- }
- env->cp15.c15_ccnt = total_ticks - value;
+ pmccntr_op_start(env);
+ env->cp15.c15_ccnt = value;
+ pmccntr_op_finish(env);
}
static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1146,34 +1590,46 @@ static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value));
}
-#else /* CONFIG_USER_ONLY */
-
-void pmccntr_sync(CPUARMState *env)
+static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
+ pmccntr_op_start(env);
+ env->cp15.pmccfiltr_el0 = value & PMCCFILTR_EL0;
+ pmccntr_op_finish(env);
}
-#endif
-
-static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+static void pmccfiltr_write_a32(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- pmccntr_sync(env);
- env->cp15.pmccfiltr_el0 = value & 0xfc000000;
- pmccntr_sync(env);
+ pmccntr_op_start(env);
+ /* M is not accessible from AArch32 */
+ env->cp15.pmccfiltr_el0 = (env->cp15.pmccfiltr_el0 & PMCCFILTR_M) |
+ (value & PMCCFILTR);
+ pmccntr_op_finish(env);
+}
+
+static uint64_t pmccfiltr_read_a32(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /* M is not visible in AArch32 */
+ return env->cp15.pmccfiltr_el0 & PMCCFILTR;
}
static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
+ pmu_op_start(env);
value &= pmu_counter_mask(env);
env->cp15.c9_pmcnten |= value;
+ pmu_op_finish(env);
}
static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
+ pmu_op_start(env);
value &= pmu_counter_mask(env);
env->cp15.c9_pmcnten &= ~value;
+ pmu_op_finish(env);
}
static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1181,32 +1637,196 @@ static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
value &= pmu_counter_mask(env);
env->cp15.c9_pmovsr &= ~value;
+ pmu_update_irq(env);
}
-static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void pmovsset_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- /* Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when
+ value &= pmu_counter_mask(env);
+ env->cp15.c9_pmovsr |= value;
+ pmu_update_irq(env);
+}
+
+static void pmevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value, const uint8_t counter)
+{
+ if (counter == 31) {
+ pmccfiltr_write(env, ri, value);
+ } else if (counter < pmu_num_counters(env)) {
+ pmevcntr_op_start(env, counter);
+
+ /*
+ * If this counter's event type is changing, store the current
+ * underlying count for the new type in c14_pmevcntr_delta[counter] so
+ * pmevcntr_op_finish has the correct baseline when it converts back to
+ * a delta.
+ */
+ uint16_t old_event = env->cp15.c14_pmevtyper[counter] &
+ PMXEVTYPER_EVTCOUNT;
+ uint16_t new_event = value & PMXEVTYPER_EVTCOUNT;
+ if (old_event != new_event) {
+ uint64_t count = 0;
+ if (event_supported(new_event)) {
+ uint16_t event_idx = supported_event_map[new_event];
+ count = pm_events[event_idx].get_count(env);
+ }
+ env->cp15.c14_pmevcntr_delta[counter] = count;
+ }
+
+ env->cp15.c14_pmevtyper[counter] = value & PMXEVTYPER_MASK;
+ pmevcntr_op_finish(env, counter);
+ }
+ /*
+ * Attempts to access PMXEVTYPER are CONSTRAINED UNPREDICTABLE when
* PMSELR value is equal to or greater than the number of implemented
* counters, but not equal to 0x1f. We opt to behave as a RAZ/WI.
*/
- if (env->cp15.c9_pmselr == 0x1f) {
- pmccfiltr_write(env, ri, value);
+}
+
+static uint64_t pmevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri,
+ const uint8_t counter)
+{
+ if (counter == 31) {
+ return env->cp15.pmccfiltr_el0;
+ } else if (counter < pmu_num_counters(env)) {
+ return env->cp15.c14_pmevtyper[counter];
+ } else {
+ /*
+ * We opt to behave as a RAZ/WI when attempts to access PMXEVTYPER
+ * are CONSTRAINED UNPREDICTABLE. See comments in pmevtyper_write().
+ */
+ return 0;
+ }
+}
+
+static void pmevtyper_writefn(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7);
+ pmevtyper_write(env, ri, value, counter);
+}
+
+static void pmevtyper_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7);
+ env->cp15.c14_pmevtyper[counter] = value;
+
+ /*
+ * pmevtyper_rawwrite is called between a pair of pmu_op_start and
+ * pmu_op_finish calls when loading saved state for a migration. Because
+ * we're potentially updating the type of event here, the value written to
+ * c14_pmevcntr_delta by the preceding pmu_op_start call may be for a
+ * different counter type. Therefore, we need to set this value to the
+ * current count for the counter type we're writing so that pmu_op_finish
+ * has the correct count for its calculation.
+ */
+ uint16_t event = value & PMXEVTYPER_EVTCOUNT;
+ if (event_supported(event)) {
+ uint16_t event_idx = supported_event_map[event];
+ env->cp15.c14_pmevcntr_delta[counter] =
+ pm_events[event_idx].get_count(env);
}
}
+static uint64_t pmevtyper_readfn(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7);
+ return pmevtyper_read(env, ri, counter);
+}
+
+static void pmxevtyper_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ pmevtyper_write(env, ri, value, env->cp15.c9_pmselr & 31);
+}
+
static uint64_t pmxevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- /* We opt to behave as a RAZ/WI when attempts to access PMXEVTYPER
- * are CONSTRAINED UNPREDICTABLE. See comments in pmxevtyper_write().
+ return pmevtyper_read(env, ri, env->cp15.c9_pmselr & 31);
+}
+
+static void pmevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value, uint8_t counter)
+{
+ if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) {
+ /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */
+ value &= MAKE_64BIT_MASK(0, 32);
+ }
+ if (counter < pmu_num_counters(env)) {
+ pmevcntr_op_start(env, counter);
+ env->cp15.c14_pmevcntr[counter] = value;
+ pmevcntr_op_finish(env, counter);
+ }
+ /*
+ * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR
+ * are CONSTRAINED UNPREDICTABLE.
*/
- if (env->cp15.c9_pmselr == 0x1f) {
- return env->cp15.pmccfiltr_el0;
+}
+
+static uint64_t pmevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint8_t counter)
+{
+ if (counter < pmu_num_counters(env)) {
+ uint64_t ret;
+ pmevcntr_op_start(env, counter);
+ ret = env->cp15.c14_pmevcntr[counter];
+ pmevcntr_op_finish(env, counter);
+ if (!cpu_isar_feature(any_pmuv3p5, env_archcpu(env))) {
+ /* Before FEAT_PMUv3p5, top 32 bits of event counters are RES0 */
+ ret &= MAKE_64BIT_MASK(0, 32);
+ }
+ return ret;
} else {
+ /*
+ * We opt to behave as a RAZ/WI when attempts to access PM[X]EVCNTR
+ * are CONSTRAINED UNPREDICTABLE.
+ */
return 0;
}
}
+static void pmevcntr_writefn(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7);
+ pmevcntr_write(env, ri, value, counter);
+}
+
+static uint64_t pmevcntr_readfn(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7);
+ return pmevcntr_read(env, ri, counter);
+}
+
+static void pmevcntr_rawwrite(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7);
+ assert(counter < pmu_num_counters(env));
+ env->cp15.c14_pmevcntr[counter] = value;
+ pmevcntr_write(env, ri, value, counter);
+}
+
+static uint64_t pmevcntr_rawread(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ uint8_t counter = ((ri->crm & 3) << 3) | (ri->opc2 & 7);
+ assert(counter < pmu_num_counters(env));
+ return env->cp15.c14_pmevcntr[counter];
+}
+
+static void pmxevcntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ pmevcntr_write(env, ri, value, env->cp15.c9_pmselr & 31);
+}
+
+static uint64_t pmxevcntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return pmevcntr_read(env, ri, env->cp15.c9_pmselr & 31);
+}
+
static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
@@ -1223,6 +1843,7 @@ static void pmintenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
/* We have no event counters so only the C bit can be changed */
value &= pmu_counter_mask(env);
env->cp15.c9_pminten |= value;
+ pmu_update_irq(env);
}
static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1230,12 +1851,14 @@ static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
value &= pmu_counter_mask(env);
env->cp15.c9_pminten &= ~value;
+ pmu_update_irq(env);
}
static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* Note that even though the AArch64 view of this register has bits
+ /*
+ * Note that even though the AArch64 view of this register has bits
* [10:0] all RES0 we can only mask the bottom 5, to comply with the
* architectural requirements for bits which are RES0 only in some
* contexts. (ARMv8 would permit us to do no masking at all, but ARMv7
@@ -1246,16 +1869,75 @@ static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
- /* We only mask off bits that are RES0 both for AArch64 and AArch32.
- * For bits that vary between AArch32/64, code needs to check the
- * current execution mode before directly using the feature bit.
+ /* Begin with base v8.0 state. */
+ uint64_t valid_mask = 0x3fff;
+ ARMCPU *cpu = env_archcpu(env);
+ uint64_t changed;
+
+ /*
+ * Because SCR_EL3 is the "real" cpreg and SCR is the alias, reset always
+ * passes the reginfo for SCR_EL3, which has type ARM_CP_STATE_AA64.
+ * Instead, choose the format based on the mode of EL3.
*/
- uint32_t valid_mask = SCR_AARCH64_MASK | SCR_AARCH32_MASK;
+ if (arm_el_is_aa64(env, 3)) {
+ value |= SCR_FW | SCR_AW; /* RES1 */
+ valid_mask &= ~SCR_NET; /* RES0 */
+
+ if (!cpu_isar_feature(aa64_aa32_el1, cpu) &&
+ !cpu_isar_feature(aa64_aa32_el2, cpu)) {
+ value |= SCR_RW; /* RAO/WI */
+ }
+ if (cpu_isar_feature(aa64_ras, cpu)) {
+ valid_mask |= SCR_TERR;
+ }
+ if (cpu_isar_feature(aa64_lor, cpu)) {
+ valid_mask |= SCR_TLOR;
+ }
+ if (cpu_isar_feature(aa64_pauth, cpu)) {
+ valid_mask |= SCR_API | SCR_APK;
+ }
+ if (cpu_isar_feature(aa64_sel2, cpu)) {
+ valid_mask |= SCR_EEL2;
+ } else if (cpu_isar_feature(aa64_rme, cpu)) {
+ /* With RME and without SEL2, NS is RES1 (R_GSWWH, I_DJJQJ). */
+ value |= SCR_NS;
+ }
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ valid_mask |= SCR_ATA;
+ }
+ if (cpu_isar_feature(aa64_scxtnum, cpu)) {
+ valid_mask |= SCR_ENSCXT;
+ }
+ if (cpu_isar_feature(aa64_doublefault, cpu)) {
+ valid_mask |= SCR_EASE | SCR_NMEA;
+ }
+ if (cpu_isar_feature(aa64_sme, cpu)) {
+ valid_mask |= SCR_ENTP2;
+ }
+ if (cpu_isar_feature(aa64_hcx, cpu)) {
+ valid_mask |= SCR_HXEN;
+ }
+ if (cpu_isar_feature(aa64_fgt, cpu)) {
+ valid_mask |= SCR_FGTEN;
+ }
+ if (cpu_isar_feature(aa64_rme, cpu)) {
+ valid_mask |= SCR_NSE | SCR_GPF;
+ }
+ if (cpu_isar_feature(aa64_ecv, cpu)) {
+ valid_mask |= SCR_ECVEN;
+ }
+ } else {
+ valid_mask &= ~(SCR_RW | SCR_ST);
+ if (cpu_isar_feature(aa32_ras, cpu)) {
+ valid_mask |= SCR_TERR;
+ }
+ }
if (!arm_feature(env, ARM_FEATURE_EL2)) {
valid_mask &= ~SCR_HCE;
- /* On ARMv7, SMD (or SCD as it is called in v7) is only
+ /*
+ * On ARMv7, SMD (or SCD as it is called in v7) is only
* supported if EL2 exists. The bit is UNK/SBZP when
* EL2 is unavailable. In QEMU ARMv7, we force it to always zero
* when EL2 is unavailable.
@@ -1269,14 +1951,51 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
/* Clear all-context RES0 bits. */
value &= valid_mask;
- raw_write(env, ri, value);
+ changed = env->cp15.scr_el3 ^ value;
+ env->cp15.scr_el3 = value;
+
+ /*
+ * If SCR_EL3.{NS,NSE} changes, i.e. change of security state,
+ * we must invalidate all TLBs below EL3.
+ */
+ if (changed & (SCR_NS | SCR_NSE)) {
+ tlb_flush_by_mmuidx(env_cpu(env), (ARMMMUIdxBit_E10_0 |
+ ARMMMUIdxBit_E20_0 |
+ ARMMMUIdxBit_E10_1 |
+ ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E10_1_PAN |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E2));
+ }
+}
+
+static void scr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /*
+ * scr_write will set the RES1 bits on an AArch64-only CPU.
+ * The reset value will be 0x30 on an AArch64-only CPU and 0 otherwise.
+ */
+ scr_write(env, ri, 0);
+}
+
+static CPAccessResult access_tid4(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 &&
+ (arm_hcr_el2_eff(env) & (HCR_TID2 | HCR_TID4))) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+
+ return CP_ACCESS_OK;
}
static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
- /* Acquire the CSSELR index from the bank corresponding to the CCSIDR
+ /*
+ * Acquire the CSSELR index from the bank corresponding to the CCSIDR
* bank
*/
uint32_t index = A32_BANKED_REG_GET(env, csselr,
@@ -1293,26 +2012,68 @@ static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
+ bool el1 = arm_current_el(env) == 1;
+ uint64_t hcr_el2 = el1 ? arm_hcr_el2_eff(env) : 0;
uint64_t ret = 0;
- if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
- ret |= CPSR_I;
+ if (hcr_el2 & HCR_IMO) {
+ if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) {
+ ret |= CPSR_I;
+ }
+ } else {
+ if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
+ ret |= CPSR_I;
+ }
}
- if (cs->interrupt_request & CPU_INTERRUPT_FIQ) {
- ret |= CPSR_F;
+
+ if (hcr_el2 & HCR_FMO) {
+ if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) {
+ ret |= CPSR_F;
+ }
+ } else {
+ if (cs->interrupt_request & CPU_INTERRUPT_FIQ) {
+ ret |= CPSR_F;
+ }
+ }
+
+ if (hcr_el2 & HCR_AMO) {
+ if (cs->interrupt_request & CPU_INTERRUPT_VSERR) {
+ ret |= CPSR_A;
+ }
}
- /* External aborts are not possible in QEMU so A bit is always clear */
+
return ret;
}
+static CPAccessResult access_aa64_tid1(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TID1)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_aa32_tid1(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ return access_aa64_tid1(env, ri, isread);
+ }
+
+ return CP_ACCESS_OK;
+}
+
static const ARMCPRegInfo v7_cp_reginfo[] = {
/* the old v6 WFI, UNPREDICTABLE in v7 but we choose to NOP */
{ .name = "NOP", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4,
.access = PL1_W, .type = ARM_CP_NOP },
- /* Performance monitors are implementation defined in v7,
+ /*
+ * Performance monitors are implementation defined in v7,
* but with an ARM recommended set of registers, which we
- * follow (although we don't actually implement any counters)
+ * follow.
*
* Performance registers fall into three categories:
* (a) always UNDEF in PL0, RW in PL1 (PMINTENSET, PMINTENCLR)
@@ -1322,83 +2083,119 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
* or PL0_RO as appropriate and then check PMUSERENR in the helper fn.
*/
{ .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1,
- .access = PL0_RW, .type = ARM_CP_ALIAS,
+ .access = PL0_RW, .type = ARM_CP_ALIAS | ARM_CP_IO,
.fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten),
.writefn = pmcntenset_write,
.accessfn = pmreg_access,
+ .fgt = FGT_PMCNTEN,
.raw_writefn = raw_write },
- { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64,
+ { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64, .type = ARM_CP_IO,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1,
.access = PL0_RW, .accessfn = pmreg_access,
+ .fgt = FGT_PMCNTEN,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0,
.writefn = pmcntenset_write, .raw_writefn = raw_write },
{ .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2,
.access = PL0_RW,
.fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten),
.accessfn = pmreg_access,
+ .fgt = FGT_PMCNTEN,
.writefn = pmcntenclr_write,
- .type = ARM_CP_ALIAS },
+ .type = ARM_CP_ALIAS | ARM_CP_IO },
{ .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2,
.access = PL0_RW, .accessfn = pmreg_access,
- .type = ARM_CP_ALIAS,
+ .fgt = FGT_PMCNTEN,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
.writefn = pmcntenclr_write },
{ .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3,
- .access = PL0_RW,
+ .access = PL0_RW, .type = ARM_CP_IO,
.fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr),
.accessfn = pmreg_access,
+ .fgt = FGT_PMOVS,
.writefn = pmovsr_write,
.raw_writefn = raw_write },
{ .name = "PMOVSCLR_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 3,
.access = PL0_RW, .accessfn = pmreg_access,
- .type = ARM_CP_ALIAS,
+ .fgt = FGT_PMOVS,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
.writefn = pmovsr_write,
.raw_writefn = raw_write },
- /* Unimplemented so WI. */
{ .name = "PMSWINC", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 4,
- .access = PL0_W, .accessfn = pmreg_access_swinc, .type = ARM_CP_NOP },
-#ifndef CONFIG_USER_ONLY
+ .access = PL0_W, .accessfn = pmreg_access_swinc,
+ .fgt = FGT_PMSWINC_EL0,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .writefn = pmswinc_write },
+ { .name = "PMSWINC_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 4,
+ .access = PL0_W, .accessfn = pmreg_access_swinc,
+ .fgt = FGT_PMSWINC_EL0,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .writefn = pmswinc_write },
{ .name = "PMSELR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 5,
.access = PL0_RW, .type = ARM_CP_ALIAS,
+ .fgt = FGT_PMSELR_EL0,
.fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmselr),
.accessfn = pmreg_access_selr, .writefn = pmselr_write,
.raw_writefn = raw_write},
{ .name = "PMSELR_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 5,
.access = PL0_RW, .accessfn = pmreg_access_selr,
+ .fgt = FGT_PMSELR_EL0,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pmselr),
.writefn = pmselr_write, .raw_writefn = raw_write, },
{ .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0,
.access = PL0_RW, .resetvalue = 0, .type = ARM_CP_ALIAS | ARM_CP_IO,
+ .fgt = FGT_PMCCNTR_EL0,
.readfn = pmccntr_read, .writefn = pmccntr_write32,
.accessfn = pmreg_access_ccntr },
{ .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0,
.access = PL0_RW, .accessfn = pmreg_access_ccntr,
+ .fgt = FGT_PMCCNTR_EL0,
.type = ARM_CP_IO,
- .readfn = pmccntr_read, .writefn = pmccntr_write, },
-#endif
+ .fieldoffset = offsetof(CPUARMState, cp15.c15_ccnt),
+ .readfn = pmccntr_read, .writefn = pmccntr_write,
+ .raw_readfn = raw_read, .raw_writefn = raw_write, },
+ { .name = "PMCCFILTR", .cp = 15, .opc1 = 0, .crn = 14, .crm = 15, .opc2 = 7,
+ .writefn = pmccfiltr_write_a32, .readfn = pmccfiltr_read_a32,
+ .access = PL0_RW, .accessfn = pmreg_access,
+ .fgt = FGT_PMCCFILTR_EL0,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
+ .resetvalue = 0, },
{ .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7,
- .writefn = pmccfiltr_write,
+ .writefn = pmccfiltr_write, .raw_writefn = raw_write,
.access = PL0_RW, .accessfn = pmreg_access,
+ .fgt = FGT_PMCCFILTR_EL0,
.type = ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0),
.resetvalue = 0, },
{ .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1,
- .access = PL0_RW, .type = ARM_CP_NO_RAW, .accessfn = pmreg_access,
+ .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .accessfn = pmreg_access,
+ .fgt = FGT_PMEVTYPERN_EL0,
.writefn = pmxevtyper_write, .readfn = pmxevtyper_read },
{ .name = "PMXEVTYPER_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 1,
- .access = PL0_RW, .type = ARM_CP_NO_RAW, .accessfn = pmreg_access,
+ .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .accessfn = pmreg_access,
+ .fgt = FGT_PMEVTYPERN_EL0,
.writefn = pmxevtyper_write, .readfn = pmxevtyper_read },
- /* Unimplemented, RAZ/WI. */
{ .name = "PMXEVCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 2,
- .access = PL0_RW, .type = ARM_CP_CONST, .resetvalue = 0,
- .accessfn = pmreg_access_xevcntr },
+ .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .accessfn = pmreg_access_xevcntr,
+ .fgt = FGT_PMEVCNTRN_EL0,
+ .writefn = pmxevcntr_write, .readfn = pmxevcntr_read },
+ { .name = "PMXEVCNTR_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 2,
+ .access = PL0_RW, .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .accessfn = pmreg_access_xevcntr,
+ .fgt = FGT_PMEVCNTRN_EL0,
+ .writefn = pmxevcntr_write, .readfn = pmxevcntr_read },
{ .name = "PMUSERENR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 0,
.access = PL0_R | PL1_RW, .accessfn = access_tpm,
.fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmuserenr),
@@ -1412,6 +2209,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
.writefn = pmuserenr_write, .raw_writefn = raw_write },
{ .name = "PMINTENSET", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 1,
.access = PL1_RW, .accessfn = access_tpm,
+ .fgt = FGT_PMINTEN,
.type = ARM_CP_ALIAS | ARM_CP_IO,
.fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pminten),
.resetvalue = 0,
@@ -1419,114 +2217,172 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
{ .name = "PMINTENSET_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 1,
.access = PL1_RW, .accessfn = access_tpm,
+ .fgt = FGT_PMINTEN,
.type = ARM_CP_IO,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
.writefn = pmintenset_write, .raw_writefn = raw_write,
.resetvalue = 0x0 },
{ .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
.access = PL1_RW, .accessfn = access_tpm,
- .type = ARM_CP_ALIAS | ARM_CP_IO,
+ .fgt = FGT_PMINTEN,
+ .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
.writefn = pmintenclr_write, },
{ .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2,
.access = PL1_RW, .accessfn = access_tpm,
- .type = ARM_CP_ALIAS | ARM_CP_IO,
+ .fgt = FGT_PMINTEN,
+ .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW,
.fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
.writefn = pmintenclr_write },
{ .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
- .access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_RAW },
+ .access = PL1_R,
+ .accessfn = access_tid4,
+ .fgt = FGT_CCSIDR_EL1,
+ .readfn = ccsidr_read, .type = ARM_CP_NO_RAW },
{ .name = "CSSELR", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0,
- .access = PL1_RW, .writefn = csselr_write, .resetvalue = 0,
+ .access = PL1_RW,
+ .accessfn = access_tid4,
+ .fgt = FGT_CSSELR_EL1,
+ .writefn = csselr_write, .resetvalue = 0,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.csselr_s),
offsetof(CPUARMState, cp15.csselr_ns) } },
- /* Auxiliary ID register: this actually has an IMPDEF value but for now
+ /*
+ * Auxiliary ID register: this actually has an IMPDEF value but for now
* just RAZ for all cores:
*/
{ .name = "AIDR", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 7,
- .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
- /* Auxiliary fault status registers: these also are IMPDEF, and we
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid1,
+ .fgt = FGT_AIDR_EL1,
+ .resetvalue = 0 },
+ /*
+ * Auxiliary fault status registers: these also are IMPDEF, and we
* choose to RAZ/WI for all cores.
*/
{ .name = "AFSR0_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 0,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_AFSR0_EL1,
+ .nv2_redirect_offset = 0x128 | NV2_REDIR_NV1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1,
- .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- /* MAIR can just read-as-written because we don't implement caches
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_AFSR1_EL1,
+ .nv2_redirect_offset = 0x130 | NV2_REDIR_NV1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ /*
+ * MAIR can just read-as-written because we don't implement caches
* and so don't need to care about memory attributes.
*/
{ .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0,
- .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]),
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_MAIR_EL1,
+ .nv2_redirect_offset = 0x140 | NV2_REDIR_NV1,
+ .fieldoffset = offsetof(CPUARMState, cp15.mair_el[1]),
.resetvalue = 0 },
{ .name = "MAIR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 10, .crm = 2, .opc2 = 0,
.access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[3]),
.resetvalue = 0 },
- /* For non-long-descriptor page tables these are PRRR and NMRR;
+ /*
+ * For non-long-descriptor page tables these are PRRR and NMRR;
* regardless they still act as reads-as-written for QEMU.
*/
- /* MAIR0/1 are defined separately from their 64-bit counterpart which
+ /*
+ * MAIR0/1 are defined separately from their 64-bit counterpart which
* allows them to assign the correct fieldoffset based on the endianness
* handled in the field definitions.
*/
{ .name = "MAIR0", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, .access = PL1_RW,
+ .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.mair0_s),
offsetof(CPUARMState, cp15.mair0_ns) },
.resetfn = arm_cp_reset_ignore },
{ .name = "MAIR1", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW,
+ .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.mair1_s),
offsetof(CPUARMState, cp15.mair1_ns) },
.resetfn = arm_cp_reset_ignore },
{ .name = "ISR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0,
+ .fgt = FGT_ISR_EL1,
.type = ARM_CP_NO_RAW, .access = PL1_R, .readfn = isr_read },
/* 32 bit ITLB invalidates */
{ .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiall_write },
{ .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimva_write },
{ .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiasid_write },
/* 32 bit DTLB invalidates */
{ .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiall_write },
{ .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimva_write },
{ .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiasid_write },
/* 32 bit TLB invalidates */
{ .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiall_write },
{ .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimva_write },
{ .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiasid_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbiasid_write },
{ .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimvaa_write },
- REGINFO_SENTINEL
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimvaa_write },
};
static const ARMCPRegInfo v7mp_cp_reginfo[] = {
/* 32 bit TLB invalidates, Inner Shareable */
{ .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbiall_is_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbiall_is_write },
{ .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_is_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbimva_is_write },
{ .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
- .type = ARM_CP_NO_RAW, .access = PL1_W,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
.writefn = tlbiasid_is_write },
{ .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
- .type = ARM_CP_NO_RAW, .access = PL1_W,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
.writefn = tlbimvaa_is_write },
- REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo pmovsset_cp_reginfo[] = {
+ /* PMOVSSET is not implemented in v7 before v7ve */
+ { .name = "PMOVSSET", .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 3,
+ .access = PL0_RW, .accessfn = pmreg_access,
+ .fgt = FGT_PMOVS,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
+ .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr),
+ .writefn = pmovsset_write,
+ .raw_writefn = raw_write },
+ { .name = "PMOVSSET_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 14, .opc2 = 3,
+ .access = PL0_RW, .accessfn = pmreg_access,
+ .fgt = FGT_PMOVS,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
+ .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
+ .writefn = pmovsset_write,
+ .raw_writefn = raw_write },
};
static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1536,56 +2392,73 @@ static void teecr_write(CPUARMState *env, const ARMCPRegInfo *ri,
env->teecr = value;
}
+static CPAccessResult teecr_access(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ /*
+ * HSTR.TTEE only exists in v7A, not v8A, but v8A doesn't have T2EE
+ * at all, so we don't need to check whether we're v8A.
+ */
+ if (arm_current_el(env) < 2 && !arm_is_secure_below_el3(env) &&
+ (env->cp15.hstr_el2 & HSTR_TTEE)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ return CP_ACCESS_OK;
+}
+
static CPAccessResult teehbr_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
if (arm_current_el(env) == 0 && (env->teecr & 1)) {
return CP_ACCESS_TRAP;
}
- return CP_ACCESS_OK;
+ return teecr_access(env, ri, isread);
}
static const ARMCPRegInfo t2ee_cp_reginfo[] = {
{ .name = "TEECR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 6, .opc2 = 0,
.access = PL1_RW, .fieldoffset = offsetof(CPUARMState, teecr),
.resetvalue = 0,
- .writefn = teecr_write },
+ .writefn = teecr_write, .accessfn = teecr_access },
{ .name = "TEEHBR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 6, .opc2 = 0,
.access = PL0_RW, .fieldoffset = offsetof(CPUARMState, teehbr),
.accessfn = teehbr_access, .resetvalue = 0 },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo v6k_cp_reginfo[] = {
{ .name = "TPIDR_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 2, .crn = 13, .crm = 0,
.access = PL0_RW,
+ .fgt = FGT_TPIDR_EL0,
.fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[0]), .resetvalue = 0 },
{ .name = "TPIDRURW", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 2,
.access = PL0_RW,
+ .fgt = FGT_TPIDR_EL0,
.bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidrurw_s),
offsetoflow32(CPUARMState, cp15.tpidrurw_ns) },
.resetfn = arm_cp_reset_ignore },
{ .name = "TPIDRRO_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 3, .crn = 13, .crm = 0,
- .access = PL0_R|PL1_W,
+ .access = PL0_R | PL1_W,
+ .fgt = FGT_TPIDRRO_EL0,
.fieldoffset = offsetof(CPUARMState, cp15.tpidrro_el[0]),
.resetvalue = 0},
{ .name = "TPIDRURO", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 3,
- .access = PL0_R|PL1_W,
+ .access = PL0_R | PL1_W,
+ .fgt = FGT_TPIDRRO_EL0,
.bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidruro_s),
offsetoflow32(CPUARMState, cp15.tpidruro_ns) },
.resetfn = arm_cp_reset_ignore },
{ .name = "TPIDR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .opc2 = 4, .crn = 13, .crm = 0,
.access = PL1_RW,
+ .fgt = FGT_TPIDR_EL1,
.fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[1]), .resetvalue = 0 },
{ .name = "TPIDRPRW", .opc1 = 0, .cp = 15, .crn = 13, .crm = 0, .opc2 = 4,
.access = PL1_RW,
.bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tpidrprw_s),
offsetoflow32(CPUARMState, cp15.tpidrprw_ns) },
.resetvalue = 0 },
- REGINFO_SENTINEL
};
#ifndef CONFIG_USER_ONLY
@@ -1593,14 +2466,23 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = {
static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
- /* CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero.
+ /*
+ * CNTFRQ: not visible from PL0 if both PL0PCTEN and PL0VCTEN are zero.
* Writable only at the highest implemented exception level.
*/
int el = arm_current_el(env);
+ uint64_t hcr;
+ uint32_t cntkctl;
switch (el) {
case 0:
- if (!extract32(env->cp15.c14_cntkctl, 0, 2)) {
+ hcr = arm_hcr_el2_eff(env);
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ cntkctl = env->cp15.cnthctl_el2;
+ } else {
+ cntkctl = env->cp15.c14_cntkctl;
+ }
+ if (!extract32(cntkctl, 0, 2)) {
return CP_ACCESS_TRAP;
}
break;
@@ -1627,18 +2509,36 @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx,
bool isread)
{
unsigned int cur_el = arm_current_el(env);
- bool secure = arm_is_secure(env);
+ bool has_el2 = arm_is_el2_enabled(env);
+ uint64_t hcr = arm_hcr_el2_eff(env);
- /* CNT[PV]CT: not visible from PL0 if ELO[PV]CTEN is zero */
- if (cur_el == 0 &&
- !extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
- return CP_ACCESS_TRAP;
- }
+ switch (cur_el) {
+ case 0:
+ /* If HCR_EL2.<E2H,TGE> == '11': check CNTHCTL_EL2.EL0[PV]CTEN. */
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ return (extract32(env->cp15.cnthctl_el2, timeridx, 1)
+ ? CP_ACCESS_OK : CP_ACCESS_TRAP_EL2);
+ }
- if (arm_feature(env, ARM_FEATURE_EL2) &&
- timeridx == GTIMER_PHYS && !secure && cur_el < 2 &&
- !extract32(env->cp15.cnthctl_el2, 0, 1)) {
- return CP_ACCESS_TRAP_EL2;
+ /* CNT[PV]CT: not visible from PL0 if EL0[PV]CTEN is zero */
+ if (!extract32(env->cp15.c14_cntkctl, timeridx, 1)) {
+ return CP_ACCESS_TRAP;
+ }
+ /* fall through */
+ case 1:
+ /* Check CNTHCTL_EL2.EL1PCTEN, which changes location based on E2H. */
+ if (has_el2 && timeridx == GTIMER_PHYS &&
+ (hcr & HCR_E2H
+ ? !extract32(env->cp15.cnthctl_el2, 10, 1)
+ : !extract32(env->cp15.cnthctl_el2, 0, 1))) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (has_el2 && timeridx == GTIMER_VIRT) {
+ if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1TVCT)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ break;
}
return CP_ACCESS_OK;
}
@@ -1647,20 +2547,46 @@ static CPAccessResult gt_timer_access(CPUARMState *env, int timeridx,
bool isread)
{
unsigned int cur_el = arm_current_el(env);
- bool secure = arm_is_secure(env);
+ bool has_el2 = arm_is_el2_enabled(env);
+ uint64_t hcr = arm_hcr_el2_eff(env);
- /* CNT[PV]_CVAL, CNT[PV]_CTL, CNT[PV]_TVAL: not visible from PL0 if
- * EL0[PV]TEN is zero.
- */
- if (cur_el == 0 &&
- !extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
- return CP_ACCESS_TRAP;
- }
+ switch (cur_el) {
+ case 0:
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ /* If HCR_EL2.<E2H,TGE> == '11': check CNTHCTL_EL2.EL0[PV]TEN. */
+ return (extract32(env->cp15.cnthctl_el2, 9 - timeridx, 1)
+ ? CP_ACCESS_OK : CP_ACCESS_TRAP_EL2);
+ }
- if (arm_feature(env, ARM_FEATURE_EL2) &&
- timeridx == GTIMER_PHYS && !secure && cur_el < 2 &&
- !extract32(env->cp15.cnthctl_el2, 1, 1)) {
- return CP_ACCESS_TRAP_EL2;
+ /*
+ * CNT[PV]_CVAL, CNT[PV]_CTL, CNT[PV]_TVAL: not visible from
+ * EL0 if EL0[PV]TEN is zero.
+ */
+ if (!extract32(env->cp15.c14_cntkctl, 9 - timeridx, 1)) {
+ return CP_ACCESS_TRAP;
+ }
+ /* fall through */
+
+ case 1:
+ if (has_el2 && timeridx == GTIMER_PHYS) {
+ if (hcr & HCR_E2H) {
+ /* If HCR_EL2.<E2H,TGE> == '10': check CNTHCTL_EL2.EL1PTEN. */
+ if (!extract32(env->cp15.cnthctl_el2, 11, 1)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ } else {
+ /* If HCR_EL2.<E2H> == 0: check CNTHCTL_EL2.EL1PCEN. */
+ if (!extract32(env->cp15.cnthctl_el2, 1, 1)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ }
+ if (has_el2 && timeridx == GTIMER_VIRT) {
+ if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1TVT)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ break;
}
return CP_ACCESS_OK;
}
@@ -1695,7 +2621,8 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
const ARMCPRegInfo *ri,
bool isread)
{
- /* The AArch64 register view of the secure physical timer is
+ /*
+ * The AArch64 register view of the secure physical timer is
* always accessible from EL3, and configurably accessible from
* Secure EL1.
*/
@@ -1720,7 +2647,61 @@ static CPAccessResult gt_stimer_access(CPUARMState *env,
static uint64_t gt_get_countervalue(CPUARMState *env)
{
- return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / GTIMER_SCALE;
+ ARMCPU *cpu = env_archcpu(env);
+
+ return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / gt_cntfrq_period_ns(cpu);
+}
+
+static void gt_update_irq(ARMCPU *cpu, int timeridx)
+{
+ CPUARMState *env = &cpu->env;
+ uint64_t cnthctl = env->cp15.cnthctl_el2;
+ ARMSecuritySpace ss = arm_security_space(env);
+ /* ISTATUS && !IMASK */
+ int irqstate = (env->cp15.c14_timer[timeridx].ctl & 6) == 4;
+
+ /*
+ * If bit CNTHCTL_EL2.CNT[VP]MASK is set, it overrides IMASK.
+ * It is RES0 in Secure and NonSecure state.
+ */
+ if ((ss == ARMSS_Root || ss == ARMSS_Realm) &&
+ ((timeridx == GTIMER_VIRT && (cnthctl & R_CNTHCTL_CNTVMASK_MASK)) ||
+ (timeridx == GTIMER_PHYS && (cnthctl & R_CNTHCTL_CNTPMASK_MASK)))) {
+ irqstate = 0;
+ }
+
+ qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate);
+ trace_arm_gt_update_irq(timeridx, irqstate);
+}
+
+void gt_rme_post_el_change(ARMCPU *cpu, void *ignored)
+{
+ /*
+ * Changing security state between Root and Secure/NonSecure, which may
+ * happen when switching EL, can change the effective value of CNTHCTL_EL2
+ * mask bits. Update the IRQ state accordingly.
+ */
+ gt_update_irq(cpu, GTIMER_VIRT);
+ gt_update_irq(cpu, GTIMER_PHYS);
+}
+
+static uint64_t gt_phys_raw_cnt_offset(CPUARMState *env)
+{
+ if ((env->cp15.scr_el3 & SCR_ECVEN) &&
+ FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, ECV) &&
+ arm_is_el2_enabled(env) &&
+ (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
+ return env->cp15.cntpoff_el2;
+ }
+ return 0;
+}
+
+static uint64_t gt_phys_cnt_offset(CPUARMState *env)
+{
+ if (arm_current_el(env) >= 2) {
+ return 0;
+ }
+ return gt_phys_raw_cnt_offset(env);
}
static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
@@ -1728,64 +2709,102 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
ARMGenericTimer *gt = &cpu->env.cp15.c14_timer[timeridx];
if (gt->ctl & 1) {
- /* Timer enabled: calculate and set current ISTATUS, irq, and
+ /*
+ * Timer enabled: calculate and set current ISTATUS, irq, and
* reset timer to when ISTATUS next has to change
*/
uint64_t offset = timeridx == GTIMER_VIRT ?
- cpu->env.cp15.cntvoff_el2 : 0;
+ cpu->env.cp15.cntvoff_el2 : gt_phys_raw_cnt_offset(&cpu->env);
uint64_t count = gt_get_countervalue(&cpu->env);
/* Note that this must be unsigned 64 bit arithmetic: */
int istatus = count - offset >= gt->cval;
uint64_t nexttick;
- int irqstate;
gt->ctl = deposit32(gt->ctl, 2, 1, istatus);
- irqstate = (istatus && !(gt->ctl & 2));
- qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate);
-
if (istatus) {
- /* Next transition is when count rolls back over to zero */
- nexttick = UINT64_MAX;
+ /*
+ * Next transition is when (count - offset) rolls back over to 0.
+ * If offset > count then this is when count == offset;
+ * if offset <= count then this is when count == offset + 2^64
+ * For the latter case we set nexttick to an "as far in future
+ * as possible" value and let the code below handle it.
+ */
+ if (offset > count) {
+ nexttick = offset;
+ } else {
+ nexttick = UINT64_MAX;
+ }
} else {
- /* Next transition is when we hit cval */
- nexttick = gt->cval + offset;
+ /*
+ * Next transition is when (count - offset) == cval, i.e.
+ * when count == (cval + offset).
+ * If that would overflow, then again we set up the next interrupt
+ * for "as far in the future as possible" for the code below.
+ */
+ if (uadd64_overflow(gt->cval, offset, &nexttick)) {
+ nexttick = UINT64_MAX;
+ }
}
- /* Note that the desired next expiry time might be beyond the
+ /*
+ * Note that the desired next expiry time might be beyond the
* signed-64-bit range of a QEMUTimer -- in this case we just
* set the timer for as far in the future as possible. When the
* timer expires we will reset the timer for any remaining period.
*/
- if (nexttick > INT64_MAX / GTIMER_SCALE) {
- nexttick = INT64_MAX / GTIMER_SCALE;
+ if (nexttick > INT64_MAX / gt_cntfrq_period_ns(cpu)) {
+ timer_mod_ns(cpu->gt_timer[timeridx], INT64_MAX);
+ } else {
+ timer_mod(cpu->gt_timer[timeridx], nexttick);
}
- timer_mod(cpu->gt_timer[timeridx], nexttick);
- trace_arm_gt_recalc(timeridx, irqstate, nexttick);
+ trace_arm_gt_recalc(timeridx, nexttick);
} else {
/* Timer disabled: ISTATUS and timer output always clear */
gt->ctl &= ~4;
- qemu_set_irq(cpu->gt_timer_outputs[timeridx], 0);
timer_del(cpu->gt_timer[timeridx]);
trace_arm_gt_recalc_disabled(timeridx);
}
+ gt_update_irq(cpu, timeridx);
}
static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri,
int timeridx)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
timer_del(cpu->gt_timer[timeridx]);
}
static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- return gt_get_countervalue(env);
+ return gt_get_countervalue(env) - gt_phys_cnt_offset(env);
+}
+
+static uint64_t gt_virt_cnt_offset(CPUARMState *env)
+{
+ uint64_t hcr;
+
+ switch (arm_current_el(env)) {
+ case 2:
+ hcr = arm_hcr_el2_eff(env);
+ if (hcr & HCR_E2H) {
+ return 0;
+ }
+ break;
+ case 0:
+ hcr = arm_hcr_el2_eff(env);
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ return 0;
+ }
+ break;
+ }
+
+ return env->cp15.cntvoff_el2;
}
static uint64_t gt_virt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- return gt_get_countervalue(env) - env->cp15.cntvoff_el2;
+ return gt_get_countervalue(env) - gt_virt_cnt_offset(env);
}
static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1794,13 +2813,23 @@ static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
trace_arm_gt_cval_write(timeridx, value);
env->cp15.c14_timer[timeridx].cval = value;
- gt_recalc_timer(arm_env_get_cpu(env), timeridx);
+ gt_recalc_timer(env_archcpu(env), timeridx);
}
static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
int timeridx)
{
- uint64_t offset = timeridx == GTIMER_VIRT ? env->cp15.cntvoff_el2 : 0;
+ uint64_t offset = 0;
+
+ switch (timeridx) {
+ case GTIMER_VIRT:
+ case GTIMER_HYPVIRT:
+ offset = gt_virt_cnt_offset(env);
+ break;
+ case GTIMER_PHYS:
+ offset = gt_phys_cnt_offset(env);
+ break;
+ }
return (uint32_t)(env->cp15.c14_timer[timeridx].cval -
(gt_get_countervalue(env) - offset));
@@ -1810,19 +2839,29 @@ static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
int timeridx,
uint64_t value)
{
- uint64_t offset = timeridx == GTIMER_VIRT ? env->cp15.cntvoff_el2 : 0;
+ uint64_t offset = 0;
+
+ switch (timeridx) {
+ case GTIMER_VIRT:
+ case GTIMER_HYPVIRT:
+ offset = gt_virt_cnt_offset(env);
+ break;
+ case GTIMER_PHYS:
+ offset = gt_phys_cnt_offset(env);
+ break;
+ }
trace_arm_gt_tval_write(timeridx, value);
env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) - offset +
sextract64(value, 0, 32);
- gt_recalc_timer(arm_env_get_cpu(env), timeridx);
+ gt_recalc_timer(env_archcpu(env), timeridx);
}
static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
int timeridx,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
uint32_t oldval = env->cp15.c14_timer[timeridx].ctl;
trace_arm_gt_ctl_write(timeridx, value);
@@ -1831,13 +2870,12 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
/* Enable toggled */
gt_recalc_timer(cpu, timeridx);
} else if ((oldval ^ value) & 2) {
- /* IMASK toggled: don't need to recalculate,
+ /*
+ * IMASK toggled: don't need to recalculate,
* just set the interrupt line based on ISTATUS
*/
- int irqstate = (oldval & 4) && !(value & 2);
-
- trace_arm_gt_imask_toggle(timeridx, irqstate);
- qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate);
+ trace_arm_gt_imask_toggle(timeridx);
+ gt_update_irq(cpu, timeridx);
}
}
@@ -1869,6 +2907,72 @@ static void gt_phys_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
gt_ctl_write(env, ri, GTIMER_PHYS, value);
}
+static int gt_phys_redir_timeridx(CPUARMState *env)
+{
+ switch (arm_mmu_idx(env)) {
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ return GTIMER_HYP;
+ default:
+ return GTIMER_PHYS;
+ }
+}
+
+static int gt_virt_redir_timeridx(CPUARMState *env)
+{
+ switch (arm_mmu_idx(env)) {
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ return GTIMER_HYPVIRT;
+ default:
+ return GTIMER_VIRT;
+ }
+}
+
+static uint64_t gt_phys_redir_cval_read(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ int timeridx = gt_phys_redir_timeridx(env);
+ return env->cp15.c14_timer[timeridx].cval;
+}
+
+static void gt_phys_redir_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ int timeridx = gt_phys_redir_timeridx(env);
+ gt_cval_write(env, ri, timeridx, value);
+}
+
+static uint64_t gt_phys_redir_tval_read(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ int timeridx = gt_phys_redir_timeridx(env);
+ return gt_tval_read(env, ri, timeridx);
+}
+
+static void gt_phys_redir_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ int timeridx = gt_phys_redir_timeridx(env);
+ gt_tval_write(env, ri, timeridx, value);
+}
+
+static uint64_t gt_phys_redir_ctl_read(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ int timeridx = gt_phys_redir_timeridx(env);
+ return env->cp15.c14_timer[timeridx].ctl;
+}
+
+static void gt_phys_redir_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ int timeridx = gt_phys_redir_timeridx(env);
+ gt_ctl_write(env, ri, timeridx, value);
+}
+
static void gt_virt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
gt_timer_reset(env, ri, GTIMER_VIRT);
@@ -1897,16 +3001,101 @@ static void gt_virt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
gt_ctl_write(env, ri, GTIMER_VIRT, value);
}
+static void gt_cnthctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ uint32_t oldval = env->cp15.cnthctl_el2;
+ uint32_t valid_mask =
+ R_CNTHCTL_EL0PCTEN_E2H1_MASK |
+ R_CNTHCTL_EL0VCTEN_E2H1_MASK |
+ R_CNTHCTL_EVNTEN_MASK |
+ R_CNTHCTL_EVNTDIR_MASK |
+ R_CNTHCTL_EVNTI_MASK |
+ R_CNTHCTL_EL0VTEN_MASK |
+ R_CNTHCTL_EL0PTEN_MASK |
+ R_CNTHCTL_EL1PCTEN_E2H1_MASK |
+ R_CNTHCTL_EL1PTEN_MASK;
+
+ if (cpu_isar_feature(aa64_rme, cpu)) {
+ valid_mask |= R_CNTHCTL_CNTVMASK_MASK | R_CNTHCTL_CNTPMASK_MASK;
+ }
+ if (cpu_isar_feature(aa64_ecv_traps, cpu)) {
+ valid_mask |=
+ R_CNTHCTL_EL1TVT_MASK |
+ R_CNTHCTL_EL1TVCT_MASK |
+ R_CNTHCTL_EL1NVPCT_MASK |
+ R_CNTHCTL_EL1NVVCT_MASK |
+ R_CNTHCTL_EVNTIS_MASK;
+ }
+ if (cpu_isar_feature(aa64_ecv, cpu)) {
+ valid_mask |= R_CNTHCTL_ECV_MASK;
+ }
+
+ /* Clear RES0 bits */
+ value &= valid_mask;
+
+ raw_write(env, ri, value);
+
+ if ((oldval ^ value) & R_CNTHCTL_CNTVMASK_MASK) {
+ gt_update_irq(cpu, GTIMER_VIRT);
+ } else if ((oldval ^ value) & R_CNTHCTL_CNTPMASK_MASK) {
+ gt_update_irq(cpu, GTIMER_PHYS);
+ }
+}
+
static void gt_cntvoff_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
trace_arm_gt_cntvoff_write(value);
raw_write(env, ri, value);
gt_recalc_timer(cpu, GTIMER_VIRT);
}
+static uint64_t gt_virt_redir_cval_read(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ int timeridx = gt_virt_redir_timeridx(env);
+ return env->cp15.c14_timer[timeridx].cval;
+}
+
+static void gt_virt_redir_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ int timeridx = gt_virt_redir_timeridx(env);
+ gt_cval_write(env, ri, timeridx, value);
+}
+
+static uint64_t gt_virt_redir_tval_read(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ int timeridx = gt_virt_redir_timeridx(env);
+ return gt_tval_read(env, ri, timeridx);
+}
+
+static void gt_virt_redir_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ int timeridx = gt_virt_redir_timeridx(env);
+ gt_tval_write(env, ri, timeridx, value);
+}
+
+static uint64_t gt_virt_redir_ctl_read(CPUARMState *env,
+ const ARMCPRegInfo *ri)
+{
+ int timeridx = gt_virt_redir_timeridx(env);
+ return env->cp15.c14_timer[timeridx].ctl;
+}
+
+static void gt_virt_redir_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ int timeridx = gt_virt_redir_timeridx(env);
+ gt_ctl_write(env, ri, timeridx, value);
+}
+
static void gt_hyp_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
gt_timer_reset(env, ri, GTIMER_HYP);
@@ -1963,6 +3152,34 @@ static void gt_sec_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
gt_ctl_write(env, ri, GTIMER_SEC, value);
}
+static void gt_hv_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ gt_timer_reset(env, ri, GTIMER_HYPVIRT);
+}
+
+static void gt_hv_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_cval_write(env, ri, GTIMER_HYPVIRT, value);
+}
+
+static uint64_t gt_hv_tval_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return gt_tval_read(env, ri, GTIMER_HYPVIRT);
+}
+
+static void gt_hv_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_tval_write(env, ri, GTIMER_HYPVIRT, value);
+}
+
+static void gt_hv_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ gt_ctl_write(env, ri, GTIMER_HYPVIRT, value);
+}
+
void arm_gt_ptimer_cb(void *opaque)
{
ARMCPU *cpu = opaque;
@@ -1991,8 +3208,23 @@ void arm_gt_stimer_cb(void *opaque)
gt_recalc_timer(cpu, GTIMER_SEC);
}
+void arm_gt_hvtimer_cb(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+
+ gt_recalc_timer(cpu, GTIMER_HYPVIRT);
+}
+
+static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ cpu->env.cp15.c14_cntfrq = cpu->gt_cntfrq_hz;
+}
+
static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
- /* Note that CNTFRQ is purely reads-as-written for the benefit
+ /*
+ * Note that CNTFRQ is purely reads-as-written for the benefit
* of software; writing it doesn't actually change the timer frequency.
* Our reset value matches the fixed frequency we implement the timer at.
*/
@@ -2005,7 +3237,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
.access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
.fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
- .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE,
+ .resetfn = arm_gt_cntfrq_reset,
},
/* overall control: mostly access permissions */
{ .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,
@@ -2017,16 +3249,17 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
/* per-timer control */
{ .name = "CNTP_CTL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1,
.secure = ARM_CP_SECSTATE_NS,
- .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL0_RW,
.accessfn = gt_ptimer_access,
.fieldoffset = offsetoflow32(CPUARMState,
cp15.c14_timer[GTIMER_PHYS].ctl),
- .writefn = gt_phys_ctl_write, .raw_writefn = raw_write,
+ .readfn = gt_phys_redir_ctl_read, .raw_readfn = raw_read,
+ .writefn = gt_phys_redir_ctl_write, .raw_writefn = raw_write,
},
{ .name = "CNTP_CTL_S",
.cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1,
.secure = ARM_CP_SECSTATE_S,
- .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL0_RW,
.accessfn = gt_ptimer_access,
.fieldoffset = offsetoflow32(CPUARMState,
cp15.c14_timer[GTIMER_SEC].ctl),
@@ -2034,57 +3267,62 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
},
{ .name = "CNTP_CTL_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1,
- .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_IO, .access = PL0_RW,
.accessfn = gt_ptimer_access,
+ .nv2_redirect_offset = 0x180 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
.resetvalue = 0,
- .writefn = gt_phys_ctl_write, .raw_writefn = raw_write,
+ .readfn = gt_phys_redir_ctl_read, .raw_readfn = raw_read,
+ .writefn = gt_phys_redir_ctl_write, .raw_writefn = raw_write,
},
{ .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1,
- .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL0_RW,
.accessfn = gt_vtimer_access,
.fieldoffset = offsetoflow32(CPUARMState,
cp15.c14_timer[GTIMER_VIRT].ctl),
- .writefn = gt_virt_ctl_write, .raw_writefn = raw_write,
+ .readfn = gt_virt_redir_ctl_read, .raw_readfn = raw_read,
+ .writefn = gt_virt_redir_ctl_write, .raw_writefn = raw_write,
},
{ .name = "CNTV_CTL_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1,
- .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_IO, .access = PL0_RW,
.accessfn = gt_vtimer_access,
+ .nv2_redirect_offset = 0x170 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
.resetvalue = 0,
- .writefn = gt_virt_ctl_write, .raw_writefn = raw_write,
+ .readfn = gt_virt_redir_ctl_read, .raw_readfn = raw_read,
+ .writefn = gt_virt_redir_ctl_write, .raw_writefn = raw_write,
},
/* TimerValue views: a 32 bit downcounting view of the underlying state */
{ .name = "CNTP_TVAL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0,
.secure = ARM_CP_SECSTATE_NS,
- .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
.accessfn = gt_ptimer_access,
- .readfn = gt_phys_tval_read, .writefn = gt_phys_tval_write,
+ .readfn = gt_phys_redir_tval_read, .writefn = gt_phys_redir_tval_write,
},
{ .name = "CNTP_TVAL_S",
.cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0,
.secure = ARM_CP_SECSTATE_S,
- .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
.accessfn = gt_ptimer_access,
.readfn = gt_sec_tval_read, .writefn = gt_sec_tval_write,
},
{ .name = "CNTP_TVAL_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 0,
- .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
.accessfn = gt_ptimer_access, .resetfn = gt_phys_timer_reset,
- .readfn = gt_phys_tval_read, .writefn = gt_phys_tval_write,
+ .readfn = gt_phys_redir_tval_read, .writefn = gt_phys_redir_tval_write,
},
{ .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0,
- .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
.accessfn = gt_vtimer_access,
- .readfn = gt_virt_tval_read, .writefn = gt_virt_tval_write,
+ .readfn = gt_virt_redir_tval_read, .writefn = gt_virt_redir_tval_write,
},
{ .name = "CNTV_TVAL_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 0,
- .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
.accessfn = gt_vtimer_access, .resetfn = gt_virt_timer_reset,
- .readfn = gt_virt_tval_read, .writefn = gt_virt_tval_write,
+ .readfn = gt_virt_redir_tval_read, .writefn = gt_virt_redir_tval_write,
},
/* The counter itself */
{ .name = "CNTPCT", .cp = 15, .crm = 14, .opc1 = 0,
@@ -2110,15 +3348,16 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
/* Comparison value, indicating when the timer goes off */
{ .name = "CNTP_CVAL", .cp = 15, .crm = 14, .opc1 = 2,
.secure = ARM_CP_SECSTATE_NS,
- .access = PL1_RW | PL0_R,
+ .access = PL0_RW,
.type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
.accessfn = gt_ptimer_access,
- .writefn = gt_phys_cval_write, .raw_writefn = raw_write,
+ .readfn = gt_phys_redir_cval_read, .raw_readfn = raw_read,
+ .writefn = gt_phys_redir_cval_write, .raw_writefn = raw_write,
},
{ .name = "CNTP_CVAL_S", .cp = 15, .crm = 14, .opc1 = 2,
.secure = ARM_CP_SECSTATE_S,
- .access = PL1_RW | PL0_R,
+ .access = PL0_RW,
.type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_SEC].cval),
.accessfn = gt_ptimer_access,
@@ -2126,28 +3365,34 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
},
{ .name = "CNTP_CVAL_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2,
- .access = PL1_RW | PL0_R,
+ .access = PL0_RW,
.type = ARM_CP_IO,
+ .nv2_redirect_offset = 0x178 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
.resetvalue = 0, .accessfn = gt_ptimer_access,
- .writefn = gt_phys_cval_write, .raw_writefn = raw_write,
+ .readfn = gt_phys_redir_cval_read, .raw_readfn = raw_read,
+ .writefn = gt_phys_redir_cval_write, .raw_writefn = raw_write,
},
{ .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3,
- .access = PL1_RW | PL0_R,
+ .access = PL0_RW,
.type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
.accessfn = gt_vtimer_access,
- .writefn = gt_virt_cval_write, .raw_writefn = raw_write,
+ .readfn = gt_virt_redir_cval_read, .raw_readfn = raw_read,
+ .writefn = gt_virt_redir_cval_write, .raw_writefn = raw_write,
},
{ .name = "CNTV_CVAL_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2,
- .access = PL1_RW | PL0_R,
+ .access = PL0_RW,
.type = ARM_CP_IO,
+ .nv2_redirect_offset = 0x168 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
.resetvalue = 0, .accessfn = gt_vtimer_access,
- .writefn = gt_virt_cval_write, .raw_writefn = raw_write,
+ .readfn = gt_virt_redir_cval_read, .raw_readfn = raw_read,
+ .writefn = gt_virt_redir_cval_write, .raw_writefn = raw_write,
},
- /* Secure timer -- this is actually restricted to only EL3
+ /*
+ * Secure timer -- this is actually restricted to only EL3
* and configurably Secure-EL1 via the accessfn.
*/
{ .name = "CNTPS_TVAL_EL1", .state = ARM_CP_STATE_AA64,
@@ -2173,22 +3418,82 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_SEC].cval),
.writefn = gt_sec_cval_write, .raw_writefn = raw_write,
},
- REGINFO_SENTINEL
};
+/*
+ * FEAT_ECV adds extra views of CNTVCT_EL0 and CNTPCT_EL0 which
+ * are "self-synchronizing". For QEMU all sysregs are self-synchronizing,
+ * so our implementations here are identical to the normal registers.
+ */
+static const ARMCPRegInfo gen_timer_ecv_cp_reginfo[] = {
+ { .name = "CNTVCTSS", .cp = 15, .crm = 14, .opc1 = 9,
+ .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO,
+ .accessfn = gt_vct_access,
+ .readfn = gt_virt_cnt_read, .resetfn = arm_cp_reset_ignore,
+ },
+ { .name = "CNTVCTSS_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 6,
+ .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .accessfn = gt_vct_access, .readfn = gt_virt_cnt_read,
+ },
+ { .name = "CNTPCTSS", .cp = 15, .crm = 14, .opc1 = 8,
+ .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_RAW | ARM_CP_IO,
+ .accessfn = gt_pct_access,
+ .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore,
+ },
+ { .name = "CNTPCTSS_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 5,
+ .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .accessfn = gt_pct_access, .readfn = gt_cnt_read,
+ },
+};
+
+static CPAccessResult gt_cntpoff_access(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 2 && arm_feature(env, ARM_FEATURE_EL3) &&
+ !(env->cp15.scr_el3 & SCR_ECVEN)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+static void gt_cntpoff_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ trace_arm_gt_cntpoff_write(value);
+ raw_write(env, ri, value);
+ gt_recalc_timer(cpu, GTIMER_PHYS);
+}
+
+static const ARMCPRegInfo gen_timer_cntpoff_reginfo = {
+ .name = "CNTPOFF_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 6,
+ .access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 0,
+ .accessfn = gt_cntpoff_access, .writefn = gt_cntpoff_write,
+ .nv2_redirect_offset = 0x1a8,
+ .fieldoffset = offsetof(CPUARMState, cp15.cntpoff_el2),
+};
#else
-/* In user-mode most of the generic timer registers are inaccessible
+/*
+ * In user-mode most of the generic timer registers are inaccessible
* however modern kernels (4.12+) allow access to cntvct_el0
*/
static uint64_t gt_virt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- /* Currently we have no support for QEMUTimer in linux-user so we
+ ARMCPU *cpu = env_archcpu(env);
+
+ /*
+ * Currently we have no support for QEMUTimer in linux-user so we
* can't call gt_get_countervalue(env), instead we directly
* call the lower level functions.
*/
- return cpu_get_clock() / GTIMER_SCALE;
+ return cpu_get_clock() / gt_cntfrq_period_ns(cpu);
}
static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
@@ -2203,7 +3508,18 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
.access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO,
.readfn = gt_virt_cnt_read,
},
- REGINFO_SENTINEL
+};
+
+/*
+ * CNTVCTSS_EL0 has the same trap conditions as CNTVCT_EL0, so it also
+ * is exposed to userspace by Linux.
+ */
+static const ARMCPRegInfo gen_timer_ecv_cp_reginfo[] = {
+ { .name = "CNTVCTSS_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 6,
+ .access = PL0_R, .type = ARM_CP_NO_RAW | ARM_CP_IO,
+ .readfn = gt_virt_cnt_read,
+ },
};
#endif
@@ -2226,14 +3542,18 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
if (ri->opc2 & 4) {
- /* The ATS12NSO* operations must trap to EL3 if executed in
+ /*
+ * The ATS12NSO* operations must trap to EL3 or EL2 if executed in
* Secure EL1 (which can only happen if EL3 is AArch64).
* They are simply UNDEF if executed from NS EL1.
* They function normally from EL2 or EL3.
*/
if (arm_current_el(env) == 1) {
if (arm_is_secure_below_el3(env)) {
- return CP_ACCESS_TRAP_UNCATEGORIZED_EL3;
+ if (env->cp15.scr_el3 & SCR_EEL2) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ return CP_ACCESS_TRAP_EL3;
}
return CP_ACCESS_TRAP_UNCATEGORIZED;
}
@@ -2241,21 +3561,112 @@ static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri,
return CP_ACCESS_OK;
}
+#ifdef CONFIG_TCG
+static int par_el1_shareability(GetPhysAddrResult *res)
+{
+ /*
+ * The PAR_EL1.SH field must be 0b10 for Device or Normal-NC
+ * memory -- see pseudocode PAREncodeShareability().
+ */
+ if (((res->cacheattrs.attrs & 0xf0) == 0) ||
+ res->cacheattrs.attrs == 0x44 || res->cacheattrs.attrs == 0x40) {
+ return 2;
+ }
+ return res->cacheattrs.shareability;
+}
+
static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
- MMUAccessType access_type, ARMMMUIdx mmu_idx)
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ ARMSecuritySpace ss)
{
- hwaddr phys_addr;
- target_ulong page_size;
- int prot;
bool ret;
uint64_t par64;
bool format64 = false;
- MemTxAttrs attrs = {};
ARMMMUFaultInfo fi = {};
- ARMCacheAttrs cacheattrs = {};
+ GetPhysAddrResult res = {};
- ret = get_phys_addr(env, value, access_type, mmu_idx, &phys_addr, &attrs,
- &prot, &page_size, &fi, &cacheattrs);
+ /*
+ * I_MXTJT: Granule protection checks are not performed on the final address
+ * of a successful translation.
+ */
+ ret = get_phys_addr_with_space_nogpc(env, value, access_type, mmu_idx, ss,
+ &res, &fi);
+
+ /*
+ * ATS operations only do S1 or S1+S2 translations, so we never
+ * have to deal with the ARMCacheAttrs format for S2 only.
+ */
+ assert(!res.cacheattrs.is_s2_format);
+
+ if (ret) {
+ /*
+ * Some kinds of translation fault must cause exceptions rather
+ * than being reported in the PAR.
+ */
+ int current_el = arm_current_el(env);
+ int target_el;
+ uint32_t syn, fsr, fsc;
+ bool take_exc = false;
+
+ if (fi.s1ptw && current_el == 1
+ && arm_mmu_idx_is_stage1_of_2(mmu_idx)) {
+ /*
+ * Synchronous stage 2 fault on an access made as part of the
+ * translation table walk for AT S1E0* or AT S1E1* insn
+ * executed from NS EL1. If this is a synchronous external abort
+ * and SCR_EL3.EA == 1, then we take a synchronous external abort
+ * to EL3. Otherwise the fault is taken as an exception to EL2,
+ * and HPFAR_EL2 holds the faulting IPA.
+ */
+ if (fi.type == ARMFault_SyncExternalOnWalk &&
+ (env->cp15.scr_el3 & SCR_EA)) {
+ target_el = 3;
+ } else {
+ env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4;
+ if (arm_is_secure_below_el3(env) && fi.s1ns) {
+ env->cp15.hpfar_el2 |= HPFAR_NS;
+ }
+ target_el = 2;
+ }
+ take_exc = true;
+ } else if (fi.type == ARMFault_SyncExternalOnWalk) {
+ /*
+ * Synchronous external aborts during a translation table walk
+ * are taken as Data Abort exceptions.
+ */
+ if (fi.stage2) {
+ if (current_el == 3) {
+ target_el = 3;
+ } else {
+ target_el = 2;
+ }
+ } else {
+ target_el = exception_target_el(env);
+ }
+ take_exc = true;
+ }
+
+ if (take_exc) {
+ /* Construct FSR and FSC using same logic as arm_deliver_fault() */
+ if (target_el == 2 || arm_el_is_aa64(env, target_el) ||
+ arm_s1_regime_using_lpae_format(env, mmu_idx)) {
+ fsr = arm_fi_to_lfsc(&fi);
+ fsc = extract32(fsr, 0, 6);
+ } else {
+ fsr = arm_fi_to_sfsc(&fi);
+ fsc = 0x3f;
+ }
+ /*
+ * Report exception with ESR indicating a fault due to a
+ * translation table walk for a cache maintenance instruction.
+ */
+ syn = syn_data_abort_no_iss(current_el == target_el, 0,
+ fi.ea, 1, fi.s1ptw, 1, fsc);
+ env->exception.vaddress = value;
+ env->exception.fsr = fsr;
+ raise_exception(env, EXCP_DATA_ABORT, syn, target_el);
+ }
+ }
if (is_a64(env)) {
format64 = true;
@@ -2270,13 +3681,17 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
* * The Non-secure TTBCR.EAE bit is set to 1
* * The implementation includes EL2, and the value of HCR.VM is 1
*
- * ATS1Hx always uses the 64bit format (not supported yet).
+ * (Note that HCR.DC makes HCR.VM behave as if it is 1.)
+ *
+ * ATS1Hx always uses the 64bit format.
*/
format64 = arm_s1_regime_using_lpae_format(env, mmu_idx);
if (arm_feature(env, ARM_FEATURE_EL2)) {
- if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
- format64 |= env->cp15.hcr_el2 & HCR_VM;
+ if (mmu_idx == ARMMMUIdx_E10_0 ||
+ mmu_idx == ARMMMUIdx_E10_1 ||
+ mmu_idx == ARMMMUIdx_E10_1_PAN) {
+ format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC);
} else {
format64 |= arm_current_el(env) == 2;
}
@@ -2287,36 +3702,39 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
/* Create a 64-bit PAR */
par64 = (1 << 11); /* LPAE bit always set */
if (!ret) {
- par64 |= phys_addr & ~0xfffULL;
- if (!attrs.secure) {
+ par64 |= res.f.phys_addr & ~0xfffULL;
+ if (!res.f.attrs.secure) {
par64 |= (1 << 9); /* NS */
}
- par64 |= (uint64_t)cacheattrs.attrs << 56; /* ATTR */
- par64 |= cacheattrs.shareability << 7; /* SH */
+ par64 |= (uint64_t)res.cacheattrs.attrs << 56; /* ATTR */
+ par64 |= par_el1_shareability(&res) << 7; /* SH */
} else {
uint32_t fsr = arm_fi_to_lfsc(&fi);
par64 |= 1; /* F */
par64 |= (fsr & 0x3f) << 1; /* FS */
- /* Note that S2WLK and FSTAGE are always zero, because we don't
- * implement virtualization and therefore there can't be a stage 2
- * fault.
- */
+ if (fi.stage2) {
+ par64 |= (1 << 9); /* S */
+ }
+ if (fi.s1ptw) {
+ par64 |= (1 << 8); /* PTW */
+ }
}
} else {
- /* fsr is a DFSR/IFSR value for the short descriptor
+ /*
+ * fsr is a DFSR/IFSR value for the short descriptor
* translation table format (with WnR always clear).
* Convert it to a 32-bit PAR.
*/
if (!ret) {
/* We do not set any attribute bits in the PAR */
- if (page_size == (1 << 24)
+ if (res.f.lg_page_size == 24
&& arm_feature(env, ARM_FEATURE_V7)) {
- par64 = (phys_addr & 0xff000000) | (1 << 1);
+ par64 = (res.f.phys_addr & 0xff000000) | (1 << 1);
} else {
- par64 = phys_addr & 0xfffff000;
+ par64 = res.f.phys_addr & 0xfffff000;
}
- if (!attrs.secure) {
+ if (!res.f.attrs.secure) {
par64 |= (1 << 9); /* NS */
}
} else {
@@ -2328,27 +3746,33 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
}
return par64;
}
+#endif /* CONFIG_TCG */
static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
+#ifdef CONFIG_TCG
MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
uint64_t par64;
ARMMMUIdx mmu_idx;
int el = arm_current_el(env);
- bool secure = arm_is_secure_below_el3(env);
+ ARMSecuritySpace ss = arm_security_space(env);
switch (ri->opc2 & 6) {
case 0:
- /* stage 1 current state PL1: ATS1CPR, ATS1CPW */
+ /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */
switch (el) {
case 3:
- mmu_idx = ARMMMUIdx_S1E3;
+ mmu_idx = ARMMMUIdx_E3;
break;
case 2:
- mmu_idx = ARMMMUIdx_S1NSE1;
- break;
+ g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */
+ /* fall through */
case 1:
- mmu_idx = secure ? ARMMMUIdx_S1SE1 : ARMMMUIdx_S1NSE1;
+ if (ri->crm == 9 && arm_pan_enabled(env)) {
+ mmu_idx = ARMMMUIdx_Stage1_E1_PAN;
+ } else {
+ mmu_idx = ARMMMUIdx_Stage1_E1;
+ }
break;
default:
g_assert_not_reached();
@@ -2358,13 +3782,14 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
/* stage 1 current state PL0: ATS1CUR, ATS1CUW */
switch (el) {
case 3:
- mmu_idx = ARMMMUIdx_S1SE0;
+ mmu_idx = ARMMMUIdx_E10_0;
break;
case 2:
- mmu_idx = ARMMMUIdx_S1NSE0;
+ g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */
+ mmu_idx = ARMMMUIdx_Stage1_E0;
break;
case 1:
- mmu_idx = secure ? ARMMMUIdx_S1SE0 : ARMMMUIdx_S1NSE0;
+ mmu_idx = ARMMMUIdx_Stage1_E0;
break;
default:
g_assert_not_reached();
@@ -2372,96 +3797,135 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
break;
case 4:
/* stage 1+2 NonSecure PL1: ATS12NSOPR, ATS12NSOPW */
- mmu_idx = ARMMMUIdx_S12NSE1;
+ mmu_idx = ARMMMUIdx_E10_1;
+ ss = ARMSS_NonSecure;
break;
case 6:
/* stage 1+2 NonSecure PL0: ATS12NSOUR, ATS12NSOUW */
- mmu_idx = ARMMMUIdx_S12NSE0;
+ mmu_idx = ARMMMUIdx_E10_0;
+ ss = ARMSS_NonSecure;
break;
default:
g_assert_not_reached();
}
- par64 = do_ats_write(env, value, access_type, mmu_idx);
+ par64 = do_ats_write(env, value, access_type, mmu_idx, ss);
A32_BANKED_CURRENT_REG_SET(env, par, par64);
+#else
+ /* Handled by hardware accelerator. */
+ g_assert_not_reached();
+#endif /* CONFIG_TCG */
}
static void ats1h_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
+#ifdef CONFIG_TCG
MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
uint64_t par64;
- par64 = do_ats_write(env, value, access_type, ARMMMUIdx_S2NS);
+ /* There is no SecureEL2 for AArch32. */
+ par64 = do_ats_write(env, value, access_type, ARMMMUIdx_E2,
+ ARMSS_NonSecure);
A32_BANKED_CURRENT_REG_SET(env, par, par64);
+#else
+ /* Handled by hardware accelerator. */
+ g_assert_not_reached();
+#endif /* CONFIG_TCG */
}
-static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri,
+static CPAccessResult at_e012_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
- if (arm_current_el(env) == 3 && !(env->cp15.scr_el3 & SCR_NS)) {
+ /*
+ * R_NYXTL: instruction is UNDEFINED if it applies to an Exception level
+ * lower than EL3 and the combination SCR_EL3.{NSE,NS} is reserved. This can
+ * only happen when executing at EL3 because that combination also causes an
+ * illegal exception return. We don't need to check FEAT_RME either, because
+ * scr_write() ensures that the NSE bit is not set otherwise.
+ */
+ if ((env->cp15.scr_el3 & (SCR_NSE | SCR_NS)) == SCR_NSE) {
return CP_ACCESS_TRAP;
}
return CP_ACCESS_OK;
}
+static CPAccessResult at_s1e2_access(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 3 &&
+ !(env->cp15.scr_el3 & (SCR_NS | SCR_EEL2))) {
+ return CP_ACCESS_TRAP;
+ }
+ return at_e012_access(env, ri, isread);
+}
+
+static CPAccessResult at_s1e01_access(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_AT)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ return at_e012_access(env, ri, isread);
+}
+
static void ats_write64(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
+#ifdef CONFIG_TCG
MMUAccessType access_type = ri->opc2 & 1 ? MMU_DATA_STORE : MMU_DATA_LOAD;
ARMMMUIdx mmu_idx;
- int secure = arm_is_secure_below_el3(env);
+ uint64_t hcr_el2 = arm_hcr_el2_eff(env);
+ bool regime_e20 = (hcr_el2 & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE);
+ bool for_el3 = false;
+ ARMSecuritySpace ss;
switch (ri->opc2 & 6) {
case 0:
switch (ri->opc1) {
- case 0: /* AT S1E1R, AT S1E1W */
- mmu_idx = secure ? ARMMMUIdx_S1SE1 : ARMMMUIdx_S1NSE1;
+ case 0: /* AT S1E1R, AT S1E1W, AT S1E1RP, AT S1E1WP */
+ if (ri->crm == 9 && arm_pan_enabled(env)) {
+ mmu_idx = regime_e20 ?
+ ARMMMUIdx_E20_2_PAN : ARMMMUIdx_Stage1_E1_PAN;
+ } else {
+ mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_Stage1_E1;
+ }
break;
case 4: /* AT S1E2R, AT S1E2W */
- mmu_idx = ARMMMUIdx_S1E2;
+ mmu_idx = hcr_el2 & HCR_E2H ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
break;
case 6: /* AT S1E3R, AT S1E3W */
- mmu_idx = ARMMMUIdx_S1E3;
+ mmu_idx = ARMMMUIdx_E3;
+ for_el3 = true;
break;
default:
g_assert_not_reached();
}
break;
case 2: /* AT S1E0R, AT S1E0W */
- mmu_idx = secure ? ARMMMUIdx_S1SE0 : ARMMMUIdx_S1NSE0;
+ mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_Stage1_E0;
break;
case 4: /* AT S12E1R, AT S12E1W */
- mmu_idx = secure ? ARMMMUIdx_S1SE1 : ARMMMUIdx_S12NSE1;
+ mmu_idx = regime_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E10_1;
break;
case 6: /* AT S12E0R, AT S12E0W */
- mmu_idx = secure ? ARMMMUIdx_S1SE0 : ARMMMUIdx_S12NSE0;
+ mmu_idx = regime_e20 ? ARMMMUIdx_E20_0 : ARMMMUIdx_E10_0;
break;
default:
g_assert_not_reached();
}
- env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx);
+ ss = for_el3 ? arm_security_space(env) : arm_security_space_below_el3(env);
+ env->cp15.par_el[1] = do_ats_write(env, value, access_type, mmu_idx, ss);
+#else
+ /* Handled by hardware accelerator. */
+ g_assert_not_reached();
+#endif /* CONFIG_TCG */
}
#endif
-static const ARMCPRegInfo vapa_cp_reginfo[] = {
- { .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW, .resetvalue = 0,
- .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.par_s),
- offsetoflow32(CPUARMState, cp15.par_ns) },
- .writefn = par_write },
-#ifndef CONFIG_USER_ONLY
- /* This underdecoding is safe because the reginfo is NO_RAW. */
- { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY,
- .access = PL1_W, .accessfn = ats_access,
- .writefn = ats_write, .type = ARM_CP_NO_RAW },
-#endif
- REGINFO_SENTINEL
-};
-
/* Return basic MPU access permission bits. */
static uint32_t simple_mpu_ap_bits(uint32_t val)
{
@@ -2529,7 +3993,7 @@ static uint64_t pmsav7_read(CPUARMState *env, const ARMCPRegInfo *ri)
static void pmsav7_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
uint32_t *u32p = *(uint32_t **)raw_ptr(env, ri);
if (!u32p) {
@@ -2544,7 +4008,7 @@ static void pmsav7_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void pmsav7_rgnr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
uint32_t nrgs = cpu->pmsav7_dregion;
if (value >= nrgs) {
@@ -2557,8 +4021,225 @@ static void pmsav7_rgnr_write(CPUARMState *env, const ARMCPRegInfo *ri,
raw_write(env, ri, value);
}
+static void prbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */
+ env->pmsav8.rbar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]] = value;
+}
+
+static uint64_t prbar_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pmsav8.rbar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]];
+}
+
+static void prlar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */
+ env->pmsav8.rlar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]] = value;
+}
+
+static uint64_t prlar_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pmsav8.rlar[M_REG_NS][env->pmsav7.rnr[M_REG_NS]];
+}
+
+static void prselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ /*
+ * Ignore writes that would select not implemented region.
+ * This is architecturally UNPREDICTABLE.
+ */
+ if (value >= cpu->pmsav7_dregion) {
+ return;
+ }
+
+ env->pmsav7.rnr[M_REG_NS] = value;
+}
+
+static void hprbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */
+ env->pmsav8.hprbar[env->pmsav8.hprselr] = value;
+}
+
+static uint64_t hprbar_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pmsav8.hprbar[env->pmsav8.hprselr];
+}
+
+static void hprlar_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */
+ env->pmsav8.hprlar[env->pmsav8.hprselr] = value;
+}
+
+static uint64_t hprlar_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pmsav8.hprlar[env->pmsav8.hprselr];
+}
+
+static void hprenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ uint32_t n;
+ uint32_t bit;
+ ARMCPU *cpu = env_archcpu(env);
+
+ /* Ignore writes to unimplemented regions */
+ int rmax = MIN(cpu->pmsav8r_hdregion, 32);
+ value &= MAKE_64BIT_MASK(0, rmax);
+
+ tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */
+
+ /* Register alias is only valid for first 32 indexes */
+ for (n = 0; n < rmax; ++n) {
+ bit = extract32(value, n, 1);
+ env->pmsav8.hprlar[n] = deposit32(
+ env->pmsav8.hprlar[n], 0, 1, bit);
+ }
+}
+
+static uint64_t hprenr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ uint32_t n;
+ uint32_t result = 0x0;
+ ARMCPU *cpu = env_archcpu(env);
+
+ /* Register alias is only valid for first 32 indexes */
+ for (n = 0; n < MIN(cpu->pmsav8r_hdregion, 32); ++n) {
+ if (env->pmsav8.hprlar[n] & 0x1) {
+ result |= (0x1 << n);
+ }
+ }
+ return result;
+}
+
+static void hprselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ /*
+ * Ignore writes that would select not implemented region.
+ * This is architecturally UNPREDICTABLE.
+ */
+ if (value >= cpu->pmsav8r_hdregion) {
+ return;
+ }
+
+ env->pmsav8.hprselr = value;
+}
+
+static void pmsav8r_regn_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ uint8_t index = (extract32(ri->opc0, 0, 1) << 4) |
+ (extract32(ri->crm, 0, 3) << 1) | extract32(ri->opc2, 2, 1);
+
+ tlb_flush(CPU(cpu)); /* Mappings may have changed - purge! */
+
+ if (ri->opc1 & 4) {
+ if (index >= cpu->pmsav8r_hdregion) {
+ return;
+ }
+ if (ri->opc2 & 0x1) {
+ env->pmsav8.hprlar[index] = value;
+ } else {
+ env->pmsav8.hprbar[index] = value;
+ }
+ } else {
+ if (index >= cpu->pmsav7_dregion) {
+ return;
+ }
+ if (ri->opc2 & 0x1) {
+ env->pmsav8.rlar[M_REG_NS][index] = value;
+ } else {
+ env->pmsav8.rbar[M_REG_NS][index] = value;
+ }
+ }
+}
+
+static uint64_t pmsav8r_regn_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ uint8_t index = (extract32(ri->opc0, 0, 1) << 4) |
+ (extract32(ri->crm, 0, 3) << 1) | extract32(ri->opc2, 2, 1);
+
+ if (ri->opc1 & 4) {
+ if (index >= cpu->pmsav8r_hdregion) {
+ return 0x0;
+ }
+ if (ri->opc2 & 0x1) {
+ return env->pmsav8.hprlar[index];
+ } else {
+ return env->pmsav8.hprbar[index];
+ }
+ } else {
+ if (index >= cpu->pmsav7_dregion) {
+ return 0x0;
+ }
+ if (ri->opc2 & 0x1) {
+ return env->pmsav8.rlar[M_REG_NS][index];
+ } else {
+ return env->pmsav8.rbar[M_REG_NS][index];
+ }
+ }
+}
+
+static const ARMCPRegInfo pmsav8r_cp_reginfo[] = {
+ { .name = "PRBAR",
+ .cp = 15, .opc1 = 0, .crn = 6, .crm = 3, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_NO_RAW,
+ .accessfn = access_tvm_trvm,
+ .readfn = prbar_read, .writefn = prbar_write },
+ { .name = "PRLAR",
+ .cp = 15, .opc1 = 0, .crn = 6, .crm = 3, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_NO_RAW,
+ .accessfn = access_tvm_trvm,
+ .readfn = prlar_read, .writefn = prlar_write },
+ { .name = "PRSELR", .resetvalue = 0,
+ .cp = 15, .opc1 = 0, .crn = 6, .crm = 2, .opc2 = 1,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .writefn = prselr_write,
+ .fieldoffset = offsetof(CPUARMState, pmsav7.rnr[M_REG_NS]) },
+ { .name = "HPRBAR", .resetvalue = 0,
+ .cp = 15, .opc1 = 4, .crn = 6, .crm = 3, .opc2 = 0,
+ .access = PL2_RW, .type = ARM_CP_NO_RAW,
+ .readfn = hprbar_read, .writefn = hprbar_write },
+ { .name = "HPRLAR",
+ .cp = 15, .opc1 = 4, .crn = 6, .crm = 3, .opc2 = 1,
+ .access = PL2_RW, .type = ARM_CP_NO_RAW,
+ .readfn = hprlar_read, .writefn = hprlar_write },
+ { .name = "HPRSELR", .resetvalue = 0,
+ .cp = 15, .opc1 = 4, .crn = 6, .crm = 2, .opc2 = 1,
+ .access = PL2_RW,
+ .writefn = hprselr_write,
+ .fieldoffset = offsetof(CPUARMState, pmsav8.hprselr) },
+ { .name = "HPRENR",
+ .cp = 15, .opc1 = 4, .crn = 6, .crm = 1, .opc2 = 1,
+ .access = PL2_RW, .type = ARM_CP_NO_RAW,
+ .readfn = hprenr_read, .writefn = hprenr_write },
+};
+
static const ARMCPRegInfo pmsav7_cp_reginfo[] = {
- /* Reset for all these registers is handled in arm_cpu_reset(),
+ /*
+ * Reset for all these registers is handled in arm_cpu_reset(),
* because the PMSAv7 is also used by M-profile CPUs, which do
* not register cpregs but still need the state to be reset.
*/
@@ -2582,7 +4263,6 @@ static const ARMCPRegInfo pmsav7_cp_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, pmsav7.rnr[M_REG_NS]),
.writefn = pmsav7_rgnr_write,
.resetfn = arm_cp_reset_ignore },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
@@ -2633,22 +4313,23 @@ static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
{ .name = "946_PRBS7", .cp = 15, .crn = 6, .crm = 7, .opc1 = 0,
.opc2 = CP_ANY, .access = PL1_RW, .resetvalue = 0,
.fieldoffset = offsetof(CPUARMState, cp15.c6_region[7]) },
- REGINFO_SENTINEL
};
-static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- TCR *tcr = raw_ptr(env, ri);
- int maskshift = extract32(value, 0, 3);
+ ARMCPU *cpu = env_archcpu(env);
if (!arm_feature(env, ARM_FEATURE_V8)) {
if (arm_feature(env, ARM_FEATURE_LPAE) && (value & TTBCR_EAE)) {
- /* Pre ARMv8 bits [21:19], [15:14] and [6:3] are UNK/SBZP when
- * using Long-desciptor translation table format */
+ /*
+ * Pre ARMv8 bits [21:19], [15:14] and [6:3] are UNK/SBZP when
+ * using Long-descriptor translation table format
+ */
value &= ~((7 << 19) | (3 << 14) | (0xf << 3));
} else if (arm_feature(env, ARM_FEATURE_EL3)) {
- /* In an implementation that includes the Security Extensions
+ /*
+ * In an implementation that includes the Security Extensions
* TTBCR has additional fields PD0 [4] and PD1 [5] for
* Short-descriptor translation table format.
*/
@@ -2658,130 +4339,147 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
}
}
- /* Update the masks corresponding to the TCR bank being written
- * Note that we always calculate mask and base_mask, but
- * they are only used for short-descriptor tables (ie if EAE is 0);
- * for long-descriptor tables the TCR fields are used differently
- * and the mask and base_mask values are meaningless.
- */
- tcr->raw_tcr = value;
- tcr->mask = ~(((uint32_t)0xffffffffu) >> maskshift);
- tcr->base_mask = ~((uint32_t)0x3fffu >> maskshift);
-}
-
-static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
-
if (arm_feature(env, ARM_FEATURE_LPAE)) {
- /* With LPAE the TTBCR could result in a change of ASID
+ /*
+ * With LPAE the TTBCR could result in a change of ASID
* via the TTBCR.A1 bit, so do a TLB flush.
*/
tlb_flush(CPU(cpu));
}
- vmsa_ttbcr_raw_write(env, ri, value);
-}
-
-static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
- TCR *tcr = raw_ptr(env, ri);
-
- /* Reset both the TCR as well as the masks corresponding to the bank of
- * the TCR being reset.
- */
- tcr->raw_tcr = 0;
- tcr->mask = 0;
- tcr->base_mask = 0xffffc000u;
+ raw_write(env, ri, value);
}
-static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+static void vmsa_tcr_el12_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- TCR *tcr = raw_ptr(env, ri);
+ ARMCPU *cpu = env_archcpu(env);
/* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
tlb_flush(CPU(cpu));
- tcr->raw_tcr = value;
+ raw_write(env, ri, value);
}
static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* 64 bit accesses to the TTBRs can change the ASID and so we
- * must flush the TLB.
- */
- if (cpreg_field_is_64bit(ri)) {
- ARMCPU *cpu = arm_env_get_cpu(env);
-
+ /* If the ASID changes (with a 64-bit write), we must flush the TLB. */
+ if (cpreg_field_is_64bit(ri) &&
+ extract64(raw_read(env, ri) ^ value, 48, 16) != 0) {
+ ARMCPU *cpu = env_archcpu(env);
tlb_flush(CPU(cpu));
}
raw_write(env, ri, value);
}
+static void vmsa_tcr_ttbr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * If we are running with E2&0 regime, then an ASID is active.
+ * Flush if that might be changing. Note we're not checking
+ * TCR_EL2.A1 to know if this is really the TTBRx_EL2 that
+ * holds the active ASID, only checking the field that might.
+ */
+ if (extract64(raw_read(env, ri) ^ value, 48, 16) &&
+ (arm_hcr_el2_eff(env) & HCR_E2H)) {
+ uint16_t mask = ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E20_0;
+ tlb_flush_by_mmuidx(env_cpu(env), mask);
+ }
+ raw_write(env, ri, value);
+}
+
static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
CPUState *cs = CPU(cpu);
- /* Accesses to VTTBR may change the VMID so we must flush the TLB. */
- if (raw_read(env, ri) != value) {
- tlb_flush_by_mmuidx(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0 |
- ARMMMUIdxBit_S2NS);
- raw_write(env, ri, value);
+ /*
+ * A change in VMID to the stage2 page table (Stage2) invalidates
+ * the stage2 and combined stage 1&2 tlbs (EL10_1 and EL10_0).
+ */
+ if (extract64(raw_read(env, ri) ^ value, 48, 16) != 0) {
+ tlb_flush_by_mmuidx(cs, alle1_tlbmask(env));
}
+ raw_write(env, ri, value);
}
static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = {
{ .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW, .type = ARM_CP_ALIAS,
+ .access = PL1_RW, .accessfn = access_tvm_trvm, .type = ARM_CP_ALIAS,
.bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dfsr_s),
offsetoflow32(CPUARMState, cp15.dfsr_ns) }, },
{ .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
- .access = PL1_RW, .resetvalue = 0,
+ .access = PL1_RW, .accessfn = access_tvm_trvm, .resetvalue = 0,
.bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.ifsr_s),
offsetoflow32(CPUARMState, cp15.ifsr_ns) } },
{ .name = "DFAR", .cp = 15, .opc1 = 0, .crn = 6, .crm = 0, .opc2 = 0,
- .access = PL1_RW, .resetvalue = 0,
+ .access = PL1_RW, .accessfn = access_tvm_trvm, .resetvalue = 0,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.dfar_s),
offsetof(CPUARMState, cp15.dfar_ns) } },
{ .name = "FAR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[1]),
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_FAR_EL1,
+ .nv2_redirect_offset = 0x220 | NV2_REDIR_NV1,
+ .fieldoffset = offsetof(CPUARMState, cp15.far_el[1]),
.resetvalue = 0, },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo vmsa_cp_reginfo[] = {
{ .name = "ESR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .crn = 5, .crm = 2, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_ESR_EL1,
+ .nv2_redirect_offset = 0x138 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, cp15.esr_el[1]), .resetvalue = 0, },
{ .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 0,
- .access = PL1_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_TTBR0_EL1,
+ .nv2_redirect_offset = 0x200 | NV2_REDIR_NV1,
+ .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s),
offsetof(CPUARMState, cp15.ttbr0_ns) } },
{ .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 1,
- .access = PL1_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_TTBR1_EL1,
+ .nv2_redirect_offset = 0x210 | NV2_REDIR_NV1,
+ .writefn = vmsa_ttbr_write, .resetvalue = 0, .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s),
offsetof(CPUARMState, cp15.ttbr1_ns) } },
{ .name = "TCR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
- .access = PL1_RW, .writefn = vmsa_tcr_el1_write,
- .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_TCR_EL1,
+ .nv2_redirect_offset = 0x120 | NV2_REDIR_NV1,
+ .writefn = vmsa_tcr_el12_write,
+ .raw_writefn = raw_write,
+ .resetvalue = 0,
.fieldoffset = offsetof(CPUARMState, cp15.tcr_el[1]) },
{ .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
- .access = PL1_RW, .type = ARM_CP_ALIAS, .writefn = vmsa_ttbcr_write,
- .raw_writefn = vmsa_ttbcr_raw_write,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .type = ARM_CP_ALIAS, .writefn = vmsa_ttbcr_write,
+ .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.tcr_el[3]),
offsetoflow32(CPUARMState, cp15.tcr_el[1])} },
- REGINFO_SENTINEL
+};
+
+/*
+ * Note that unlike TTBCR, writing to TTBCR2 does not require flushing
+ * qemu tlbs nor adjusting cached masks.
+ */
+static const ARMCPRegInfo ttbcr2_reginfo = {
+ .name = "TTBCR2", .cp = 15, .opc1 = 0, .crn = 2, .crm = 0, .opc2 = 3,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .type = ARM_CP_ALIAS,
+ .bank_fieldoffsets = {
+ offsetofhigh32(CPUARMState, cp15.tcr_el[3]),
+ offsetofhigh32(CPUARMState, cp15.tcr_el[1]),
+ },
};
static void omap_ticonfig_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2803,13 +4501,14 @@ static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
/* Wait-for-interrupt (deprecated) */
- cpu_interrupt(CPU(arm_env_get_cpu(env)), CPU_INTERRUPT_HALT);
+ cpu_interrupt(env_cpu(env), CPU_INTERRUPT_HALT);
}
static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* On OMAP there are registers indicating the max/min index of dcache lines
+ /*
+ * On OMAP there are registers indicating the max/min index of dcache lines
* containing a dirty line; cache flush operations have to reset these.
*/
env->cp15.c15_i_max = 0x000;
@@ -2841,7 +4540,8 @@ static const ARMCPRegInfo omap_cp_reginfo[] = {
.crm = 8, .opc1 = 0, .opc2 = 0, .access = PL1_RW,
.type = ARM_CP_NO_RAW,
.readfn = arm_cp_read_zero, .writefn = omap_wfi_write, },
- /* TODO: Peripheral port remap register:
+ /*
+ * TODO: Peripheral port remap register:
* On OMAP2 mcr p15, 0, rn, c15, c2, 4 sets up the interrupt controller
* base address at $rn & ~0xfff and map size of 0x200 << ($rn & 0xfff),
* when MMU is off.
@@ -2853,7 +4553,6 @@ static const ARMCPRegInfo omap_cp_reginfo[] = {
{ .name = "C9", .cp = 15, .crn = 9,
.crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW,
.type = ARM_CP_CONST | ARM_CP_OVERRIDE, .resetvalue = 0 },
- REGINFO_SENTINEL
};
static void xscale_cpar_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2871,7 +4570,8 @@ static const ARMCPRegInfo xscale_cp_reginfo[] = {
.cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW,
.fieldoffset = offsetof(CPUARMState, cp15.c1_xscaleauxcr),
.resetvalue = 0, },
- /* XScale specific cache-lockdown: since we have no cache we NOP these
+ /*
+ * XScale specific cache-lockdown: since we have no cache we NOP these
* and hope the guest does not really rely on cache behaviour.
*/
{ .name = "XSCALE_LOCK_ICACHE_LINE",
@@ -2886,11 +4586,11 @@ static const ARMCPRegInfo xscale_cp_reginfo[] = {
{ .name = "XSCALE_UNLOCK_DCACHE",
.cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 1,
.access = PL1_W, .type = ARM_CP_NOP },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo dummy_c15_cp_reginfo[] = {
- /* RAZ/WI the whole crn=15 space, when we don't have a more specific
+ /*
+ * RAZ/WI the whole crn=15 space, when we don't have a more specific
* implementation of this implementation-defined space.
* Ideally this should eventually disappear in favour of actually
* implementing the correct behaviour for all cores.
@@ -2900,7 +4600,6 @@ static const ARMCPRegInfo dummy_c15_cp_reginfo[] = {
.access = PL1_RW,
.type = ARM_CP_CONST | ARM_CP_NO_RAW | ARM_CP_OVERRIDE,
.resetvalue = 0 },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo cache_dirty_status_cp_reginfo[] = {
@@ -2908,32 +4607,31 @@ static const ARMCPRegInfo cache_dirty_status_cp_reginfo[] = {
{ .name = "CDSR", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW,
.resetvalue = 0 },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo cache_block_ops_cp_reginfo[] = {
- /* We never have a a block transfer operation in progress */
+ /* We never have a block transfer operation in progress */
{ .name = "BXSR", .cp = 15, .crn = 7, .crm = 12, .opc1 = 0, .opc2 = 4,
.access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW,
.resetvalue = 0 },
/* The cache ops themselves: these all NOP for QEMU */
{ .name = "IICR", .cp = 15, .crm = 5, .opc1 = 0,
- .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT },
+ .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT },
{ .name = "IDCR", .cp = 15, .crm = 6, .opc1 = 0,
- .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT },
+ .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT },
{ .name = "CDCR", .cp = 15, .crm = 12, .opc1 = 0,
- .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT },
+ .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT },
{ .name = "PIR", .cp = 15, .crm = 12, .opc1 = 1,
- .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT },
+ .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT },
{ .name = "PDR", .cp = 15, .crm = 12, .opc1 = 2,
- .access = PL0_W, .type = ARM_CP_NOP|ARM_CP_64BIT },
+ .access = PL0_W, .type = ARM_CP_NOP | ARM_CP_64BIT },
{ .name = "CIDCR", .cp = 15, .crm = 14, .opc1 = 0,
- .access = PL1_W, .type = ARM_CP_NOP|ARM_CP_64BIT },
- REGINFO_SENTINEL
+ .access = PL1_W, .type = ARM_CP_NOP | ARM_CP_64BIT },
};
static const ARMCPRegInfo cache_test_clean_cp_reginfo[] = {
- /* The cache test-and-clean instructions always return (1 << 30)
+ /*
+ * The cache test-and-clean instructions always return (1 << 30)
* to indicate that there are no dirty cache lines.
*/
{ .name = "TC_DCACHE", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 3,
@@ -2942,7 +4640,6 @@ static const ARMCPRegInfo cache_test_clean_cp_reginfo[] = {
{ .name = "TCI_DCACHE", .cp = 15, .crn = 7, .crm = 14, .opc1 = 0, .opc2 = 3,
.access = PL0_R, .type = ARM_CP_CONST | ARM_CP_NO_RAW,
.resetvalue = (1 << 30) },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo strongarm_cp_reginfo[] = {
@@ -2951,16 +4648,13 @@ static const ARMCPRegInfo strongarm_cp_reginfo[] = {
.crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY,
.access = PL1_RW, .resetvalue = 0,
.type = ARM_CP_CONST | ARM_CP_OVERRIDE | ARM_CP_NO_RAW },
- REGINFO_SENTINEL
};
static uint64_t midr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
unsigned int cur_el = arm_current_el(env);
- bool secure = arm_is_secure(env);
- if (arm_feature(&cpu->env, ARM_FEATURE_EL2) && !secure && cur_el == 1) {
+ if (arm_is_el2_enabled(env) && cur_el == 1) {
return env->cp15.vpidr_el2;
}
return raw_read(env, ri);
@@ -2968,12 +4662,13 @@ static uint64_t midr_read(CPUARMState *env, const ARMCPRegInfo *ri)
static uint64_t mpidr_read_val(CPUARMState *env)
{
- ARMCPU *cpu = ARM_CPU(arm_env_get_cpu(env));
+ ARMCPU *cpu = env_archcpu(env);
uint64_t mpidr = cpu->mp_affinity;
if (arm_feature(env, ARM_FEATURE_V7MP)) {
mpidr |= (1U << 31);
- /* Cores which are uniprocessor (non-coherent)
+ /*
+ * Cores which are uniprocessor (non-coherent)
* but still implement the MP extensions set
* bit 30. (For instance, Cortex-R5).
*/
@@ -2987,46 +4682,41 @@ static uint64_t mpidr_read_val(CPUARMState *env)
static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
unsigned int cur_el = arm_current_el(env);
- bool secure = arm_is_secure(env);
- if (arm_feature(env, ARM_FEATURE_EL2) && !secure && cur_el == 1) {
+ if (arm_is_el2_enabled(env) && cur_el == 1) {
return env->cp15.vmpidr_el2;
}
return mpidr_read_val(env);
}
-static const ARMCPRegInfo mpidr_cp_reginfo[] = {
- { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
- .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW },
- REGINFO_SENTINEL
-};
-
static const ARMCPRegInfo lpae_cp_reginfo[] = {
/* NOP AMAIR0/1 */
{ .name = "AMAIR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
- .access = PL1_RW, .type = ARM_CP_CONST,
- .resetvalue = 0 },
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_AMAIR_EL1,
+ .nv2_redirect_offset = 0x148 | NV2_REDIR_NV1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
/* AMAIR1 is mapped to AMAIR_EL1[63:32] */
{ .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1,
- .access = PL1_RW, .type = ARM_CP_CONST,
- .resetvalue = 0 },
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "PAR", .cp = 15, .crm = 7, .opc1 = 0,
.access = PL1_RW, .type = ARM_CP_64BIT, .resetvalue = 0,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.par_s),
offsetof(CPUARMState, cp15.par_ns)} },
{ .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0,
- .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .type = ARM_CP_64BIT | ARM_CP_ALIAS,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr0_s),
offsetof(CPUARMState, cp15.ttbr0_ns) },
- .writefn = vmsa_ttbr_write, },
+ .writefn = vmsa_ttbr_write, .raw_writefn = raw_write },
{ .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1,
- .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .type = ARM_CP_64BIT | ARM_CP_ALIAS,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.ttbr1_s),
offsetof(CPUARMState, cp15.ttbr1_ns) },
- .writefn = vmsa_ttbr_write, },
- REGINFO_SENTINEL
+ .writefn = vmsa_ttbr_write, .raw_writefn = raw_write },
};
static uint64_t aa64_fpcr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -3054,7 +4744,7 @@ static void aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
- if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UMA)) {
+ if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
return CP_ACCESS_TRAP;
}
return CP_ACCESS_OK;
@@ -3066,288 +4756,646 @@ static void aa64_daif_write(CPUARMState *env, const ARMCPRegInfo *ri,
env->daif = value & PSTATE_DAIF;
}
-static CPAccessResult aa64_cacheop_access(CPUARMState *env,
- const ARMCPRegInfo *ri,
- bool isread)
+static uint64_t aa64_pan_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless
- * SCTLR_EL1.UCI is set.
- */
- if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UCI)) {
- return CP_ACCESS_TRAP;
+ return env->pstate & PSTATE_PAN;
+}
+
+static void aa64_pan_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ env->pstate = (env->pstate & ~PSTATE_PAN) | (value & PSTATE_PAN);
+}
+
+static const ARMCPRegInfo pan_reginfo = {
+ .name = "PAN", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 3,
+ .type = ARM_CP_NO_RAW, .access = PL1_RW,
+ .readfn = aa64_pan_read, .writefn = aa64_pan_write
+};
+
+static uint64_t aa64_uao_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pstate & PSTATE_UAO;
+}
+
+static void aa64_uao_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ env->pstate = (env->pstate & ~PSTATE_UAO) | (value & PSTATE_UAO);
+}
+
+static const ARMCPRegInfo uao_reginfo = {
+ .name = "UAO", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 4,
+ .type = ARM_CP_NO_RAW, .access = PL1_RW,
+ .readfn = aa64_uao_read, .writefn = aa64_uao_write
+};
+
+static uint64_t aa64_dit_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pstate & PSTATE_DIT;
+}
+
+static void aa64_dit_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ env->pstate = (env->pstate & ~PSTATE_DIT) | (value & PSTATE_DIT);
+}
+
+static const ARMCPRegInfo dit_reginfo = {
+ .name = "DIT", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 5,
+ .type = ARM_CP_NO_RAW, .access = PL0_RW,
+ .readfn = aa64_dit_read, .writefn = aa64_dit_write
+};
+
+static uint64_t aa64_ssbs_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pstate & PSTATE_SSBS;
+}
+
+static void aa64_ssbs_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ env->pstate = (env->pstate & ~PSTATE_SSBS) | (value & PSTATE_SSBS);
+}
+
+static const ARMCPRegInfo ssbs_reginfo = {
+ .name = "SSBS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 6,
+ .type = ARM_CP_NO_RAW, .access = PL0_RW,
+ .readfn = aa64_ssbs_read, .writefn = aa64_ssbs_write
+};
+
+static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ bool isread)
+{
+ /* Cache invalidate/clean to Point of Coherency or Persistence... */
+ switch (arm_current_el(env)) {
+ case 0:
+ /* ... EL0 must UNDEF unless SCTLR_EL1.UCI is set. */
+ if (!(arm_sctlr(env, 0) & SCTLR_UCI)) {
+ return CP_ACCESS_TRAP;
+ }
+ /* fall through */
+ case 1:
+ /* ... EL1 must trap to EL2 if HCR_EL2.TPCP is set. */
+ if (arm_hcr_el2_eff(env) & HCR_TPCP) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ break;
+ }
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult do_cacheop_pou_access(CPUARMState *env, uint64_t hcrflags)
+{
+ /* Cache invalidate/clean to Point of Unification... */
+ switch (arm_current_el(env)) {
+ case 0:
+ /* ... EL0 must UNDEF unless SCTLR_EL1.UCI is set. */
+ if (!(arm_sctlr(env, 0) & SCTLR_UCI)) {
+ return CP_ACCESS_TRAP;
+ }
+ /* fall through */
+ case 1:
+ /* ... EL1 must trap to EL2 if relevant HCR_EL2 flags are set. */
+ if (arm_hcr_el2_eff(env) & hcrflags) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ break;
}
return CP_ACCESS_OK;
}
-/* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
+static CPAccessResult access_ticab(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ return do_cacheop_pou_access(env, HCR_TICAB | HCR_TPU);
+}
+
+static CPAccessResult access_tocu(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ return do_cacheop_pou_access(env, HCR_TOCU | HCR_TPU);
+}
+
+/*
+ * See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions
* Page D4-1736 (DDI0487A.b)
*/
-static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static int vae1_tlbmask(CPUARMState *env)
{
- CPUState *cs = ENV_GET_CPU(env);
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ uint16_t mask;
- if (arm_is_secure_below_el3(env)) {
- tlb_flush_by_mmuidx(cs,
- ARMMMUIdxBit_S1SE1 |
- ARMMMUIdxBit_S1SE0);
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ mask = ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E20_0;
} else {
- tlb_flush_by_mmuidx(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0);
+ mask = ARMMMUIdxBit_E10_1 |
+ ARMMMUIdxBit_E10_1_PAN |
+ ARMMMUIdxBit_E10_0;
}
+ return mask;
+}
+
+static int vae2_tlbmask(CPUARMState *env)
+{
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ uint16_t mask;
+
+ if (hcr & HCR_E2H) {
+ mask = ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E20_0;
+ } else {
+ mask = ARMMMUIdxBit_E2;
+ }
+ return mask;
+}
+
+/* Return 56 if TBI is enabled, 64 otherwise. */
+static int tlbbits_for_regime(CPUARMState *env, ARMMMUIdx mmu_idx,
+ uint64_t addr)
+{
+ uint64_t tcr = regime_tcr(env, mmu_idx);
+ int tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
+ int select = extract64(addr, 55, 1);
+
+ return (tbi >> select) & 1 ? 56 : 64;
+}
+
+static int vae1_tlbbits(CPUARMState *env, uint64_t addr)
+{
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ ARMMMUIdx mmu_idx;
+
+ /* Only the regime of the mmu_idx below is significant. */
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ mmu_idx = ARMMMUIdx_E20_0;
+ } else {
+ mmu_idx = ARMMMUIdx_E10_0;
+ }
+
+ return tlbbits_for_regime(env, mmu_idx, addr);
+}
+
+static int vae2_tlbbits(CPUARMState *env, uint64_t addr)
+{
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ ARMMMUIdx mmu_idx;
+
+ /*
+ * Only the regime of the mmu_idx below is significant.
+ * Regime EL2&0 has two ranges with separate TBI configuration, while EL2
+ * only has one.
+ */
+ if (hcr & HCR_E2H) {
+ mmu_idx = ARMMMUIdx_E20_2;
+ } else {
+ mmu_idx = ARMMMUIdx_E2;
+ }
+
+ return tlbbits_for_regime(env, mmu_idx, addr);
}
static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
- bool sec = arm_is_secure_below_el3(env);
+ CPUState *cs = env_cpu(env);
+ int mask = vae1_tlbmask(env);
- if (sec) {
- tlb_flush_by_mmuidx_all_cpus_synced(cs,
- ARMMMUIdxBit_S1SE1 |
- ARMMMUIdxBit_S1SE0);
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
+}
+
+static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = vae1_tlbmask(env);
+
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
} else {
- tlb_flush_by_mmuidx_all_cpus_synced(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0);
+ tlb_flush_by_mmuidx(cs, mask);
}
}
+static int e2_tlbmask(CPUARMState *env)
+{
+ return (ARMMMUIdxBit_E20_0 |
+ ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E2);
+}
+
static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* Note that the 'ALL' scope must invalidate both stage 1 and
- * stage 2 translations, whereas most other scopes only invalidate
- * stage 1 translations.
- */
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
+ CPUState *cs = env_cpu(env);
+ int mask = alle1_tlbmask(env);
- if (arm_is_secure_below_el3(env)) {
- tlb_flush_by_mmuidx(cs,
- ARMMMUIdxBit_S1SE1 |
- ARMMMUIdxBit_S1SE0);
- } else {
- if (arm_feature(env, ARM_FEATURE_EL2)) {
- tlb_flush_by_mmuidx(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0 |
- ARMMMUIdxBit_S2NS);
- } else {
- tlb_flush_by_mmuidx(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0);
- }
- }
+ tlb_flush_by_mmuidx(cs, mask);
}
static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
+ CPUState *cs = env_cpu(env);
+ int mask = e2_tlbmask(env);
- tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_S1E2);
+ tlb_flush_by_mmuidx(cs, mask);
}
static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
CPUState *cs = CPU(cpu);
- tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_S1E3);
+ tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_E3);
}
static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* Note that the 'ALL' scope must invalidate both stage 1 and
- * stage 2 translations, whereas most other scopes only invalidate
- * stage 1 translations.
- */
- CPUState *cs = ENV_GET_CPU(env);
- bool sec = arm_is_secure_below_el3(env);
- bool has_el2 = arm_feature(env, ARM_FEATURE_EL2);
-
- if (sec) {
- tlb_flush_by_mmuidx_all_cpus_synced(cs,
- ARMMMUIdxBit_S1SE1 |
- ARMMMUIdxBit_S1SE0);
- } else if (has_el2) {
- tlb_flush_by_mmuidx_all_cpus_synced(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0 |
- ARMMMUIdxBit_S2NS);
- } else {
- tlb_flush_by_mmuidx_all_cpus_synced(cs,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0);
- }
+ CPUState *cs = env_cpu(env);
+ int mask = alle1_tlbmask(env);
+
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
}
static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
+ int mask = e2_tlbmask(env);
- tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E2);
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, mask);
}
static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
- tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E3);
-}
-
-static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
-{
- /* Invalidate by VA, EL1&0 (AArch64 version).
- * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1,
- * since we don't support flush-for-specific-ASID-only or
- * flush-last-level-only.
- */
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
- uint64_t pageaddr = sextract64(value << 12, 0, 56);
-
- if (arm_is_secure_below_el3(env)) {
- tlb_flush_page_by_mmuidx(cs, pageaddr,
- ARMMMUIdxBit_S1SE1 |
- ARMMMUIdxBit_S1SE0);
- } else {
- tlb_flush_page_by_mmuidx(cs, pageaddr,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0);
- }
+ tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_E3);
}
static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* Invalidate by VA, EL2
+ /*
+ * Invalidate by VA, EL2
* Currently handles both VAE2 and VALE2, since we don't support
* flush-last-level-only.
*/
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
+ CPUState *cs = env_cpu(env);
+ int mask = vae2_tlbmask(env);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = vae2_tlbbits(env, pageaddr);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_S1E2);
+ tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits);
}
static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* Invalidate by VA, EL3
+ /*
+ * Invalidate by VA, EL3
* Currently handles both VAE3 and VALE3, since we don't support
* flush-last-level-only.
*/
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
CPUState *cs = CPU(cpu);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_S1E3);
+ tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_E3);
}
static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
- bool sec = arm_is_secure_below_el3(env);
+ CPUState *cs = env_cpu(env);
+ int mask = vae1_tlbmask(env);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = vae1_tlbbits(env, pageaddr);
+
+ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
+}
- if (sec) {
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_S1SE1 |
- ARMMMUIdxBit_S1SE0);
+static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA, EL1&0 (AArch64 version).
+ * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1,
+ * since we don't support flush-for-specific-ASID-only or
+ * flush-last-level-only.
+ */
+ CPUState *cs = env_cpu(env);
+ int mask = vae1_tlbmask(env);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = vae1_tlbbits(env, pageaddr);
+
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
} else {
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_S12NSE1 |
- ARMMMUIdxBit_S12NSE0);
+ tlb_flush_page_bits_by_mmuidx(cs, pageaddr, mask, bits);
}
}
static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
+ int mask = vae2_tlbmask(env);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = vae2_tlbbits(env, pageaddr);
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_S1E2);
+ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr, mask, bits);
}
static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- CPUState *cs = ENV_GET_CPU(env);
+ CPUState *cs = env_cpu(env);
uint64_t pageaddr = sextract64(value << 12, 0, 56);
+ int bits = tlbbits_for_regime(env, ARMMMUIdx_E3, pageaddr);
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_S1E3);
+ tlb_flush_page_bits_by_mmuidx_all_cpus_synced(cs, pageaddr,
+ ARMMMUIdxBit_E3, bits);
+}
+
+static int ipas2e1_tlbmask(CPUARMState *env, int64_t value)
+{
+ /*
+ * The MSB of value is the NS field, which only applies if SEL2
+ * is implemented and SCR_EL3.NS is not set (i.e. in secure mode).
+ */
+ return (value >= 0
+ && cpu_isar_feature(aa64_sel2, env_archcpu(env))
+ && arm_is_secure_below_el3(env)
+ ? ARMMMUIdxBit_Stage2_S
+ : ARMMMUIdxBit_Stage2);
}
static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- /* Invalidate by IPA. This has to invalidate any structures that
- * contain only stage 2 translation information, but does not need
- * to apply to structures that contain combined stage 1 and stage 2
- * translation information.
- * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
+ CPUState *cs = env_cpu(env);
+ int mask = ipas2e1_tlbmask(env, value);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+ if (tlb_force_broadcast(env)) {
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask);
+ } else {
+ tlb_flush_page_by_mmuidx(cs, pageaddr, mask);
+ }
+}
+
+static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+ int mask = ipas2e1_tlbmask(env, value);
+ uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+ tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr, mask);
+}
+
+#ifdef TARGET_AARCH64
+typedef struct {
+ uint64_t base;
+ uint64_t length;
+} TLBIRange;
+
+static ARMGranuleSize tlbi_range_tg_to_gran_size(int tg)
+{
+ /*
+ * Note that the TLBI range TG field encoding differs from both
+ * TG0 and TG1 encodings.
*/
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
- uint64_t pageaddr;
+ switch (tg) {
+ case 1:
+ return Gran4K;
+ case 2:
+ return Gran16K;
+ case 3:
+ return Gran64K;
+ default:
+ return GranInvalid;
+ }
+}
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
- return;
+static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx,
+ uint64_t value)
+{
+ unsigned int page_size_granule, page_shift, num, scale, exponent;
+ /* Extract one bit to represent the va selector in use. */
+ uint64_t select = sextract64(value, 36, 1);
+ ARMVAParameters param = aa64_va_parameters(env, select, mmuidx, true, false);
+ TLBIRange ret = { };
+ ARMGranuleSize gran;
+
+ page_size_granule = extract64(value, 46, 2);
+ gran = tlbi_range_tg_to_gran_size(page_size_granule);
+
+ /* The granule encoded in value must match the granule in use. */
+ if (gran != param.gran) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid tlbi page size granule %d\n",
+ page_size_granule);
+ return ret;
}
- pageaddr = sextract64(value << 12, 0, 48);
+ page_shift = arm_granule_bits(gran);
+ num = extract64(value, 39, 5);
+ scale = extract64(value, 44, 2);
+ exponent = (5 * scale) + 1;
+
+ ret.length = (num + 1) << (exponent + page_shift);
+
+ if (param.select) {
+ ret.base = sextract64(value, 0, 37);
+ } else {
+ ret.base = extract64(value, 0, 37);
+ }
+ if (param.ds) {
+ /*
+ * With DS=1, BaseADDR is always shifted 16 so that it is able
+ * to address all 52 va bits. The input address is perforce
+ * aligned on a 64k boundary regardless of translation granule.
+ */
+ page_shift = 16;
+ }
+ ret.base <<= page_shift;
- tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_S2NS);
+ return ret;
}
-static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static void do_rvae_write(CPUARMState *env, uint64_t value,
+ int idxmap, bool synced)
{
- CPUState *cs = ENV_GET_CPU(env);
- uint64_t pageaddr;
+ ARMMMUIdx one_idx = ARM_MMU_IDX_A | ctz32(idxmap);
+ TLBIRange range;
+ int bits;
- if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
- return;
+ range = tlbi_aa64_get_range(env, one_idx, value);
+ bits = tlbbits_for_regime(env, one_idx, range.base);
+
+ if (synced) {
+ tlb_flush_range_by_mmuidx_all_cpus_synced(env_cpu(env),
+ range.base,
+ range.length,
+ idxmap,
+ bits);
+ } else {
+ tlb_flush_range_by_mmuidx(env_cpu(env), range.base,
+ range.length, idxmap, bits);
}
+}
- pageaddr = sextract64(value << 12, 0, 48);
+static void tlbi_aa64_rvae1_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, EL1&0.
+ * Currently handles all of RVAE1, RVAAE1, RVAALE1 and RVALE1,
+ * since we don't support flush-for-specific-ASID-only or
+ * flush-last-level-only.
+ */
- tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
- ARMMMUIdxBit_S2NS);
+ do_rvae_write(env, value, vae1_tlbmask(env),
+ tlb_force_broadcast(env));
+}
+
+static void tlbi_aa64_rvae1is_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, Inner/Outer Shareable EL1&0.
+ * Currently handles all of RVAE1IS, RVAE1OS, RVAAE1IS, RVAAE1OS,
+ * RVAALE1IS, RVAALE1OS, RVALE1IS and RVALE1OS, since we don't support
+ * flush-for-specific-ASID-only, flush-last-level-only or inner/outer
+ * shareable specific flushes.
+ */
+
+ do_rvae_write(env, value, vae1_tlbmask(env), true);
+}
+
+static void tlbi_aa64_rvae2_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, EL2.
+ * Currently handles all of RVAE2 and RVALE2,
+ * since we don't support flush-for-specific-ASID-only or
+ * flush-last-level-only.
+ */
+
+ do_rvae_write(env, value, vae2_tlbmask(env),
+ tlb_force_broadcast(env));
+
+
+}
+
+static void tlbi_aa64_rvae2is_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, Inner/Outer Shareable, EL2.
+ * Currently handles all of RVAE2IS, RVAE2OS, RVALE2IS and RVALE2OS,
+ * since we don't support flush-for-specific-ASID-only,
+ * flush-last-level-only or inner/outer shareable specific flushes.
+ */
+
+ do_rvae_write(env, value, vae2_tlbmask(env), true);
+
+}
+
+static void tlbi_aa64_rvae3_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, EL3.
+ * Currently handles all of RVAE3 and RVALE3,
+ * since we don't support flush-for-specific-ASID-only or
+ * flush-last-level-only.
+ */
+
+ do_rvae_write(env, value, ARMMMUIdxBit_E3, tlb_force_broadcast(env));
+}
+
+static void tlbi_aa64_rvae3is_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Invalidate by VA range, EL3, Inner/Outer Shareable.
+ * Currently handles all of RVAE3IS, RVAE3OS, RVALE3IS and RVALE3OS,
+ * since we don't support flush-for-specific-ASID-only,
+ * flush-last-level-only or inner/outer specific flushes.
+ */
+
+ do_rvae_write(env, value, ARMMMUIdxBit_E3, true);
+}
+
+static void tlbi_aa64_ripas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ do_rvae_write(env, value, ipas2e1_tlbmask(env, value),
+ tlb_force_broadcast(env));
}
+static void tlbi_aa64_ripas2e1is_write(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ do_rvae_write(env, value, ipas2e1_tlbmask(env, value), true);
+}
+#endif
+
static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
- /* We don't implement EL2, so the only control on DC ZVA is the
- * bit in the SCTLR which can prohibit access for EL0.
- */
- if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_DZE)) {
- return CP_ACCESS_TRAP;
+ int cur_el = arm_current_el(env);
+
+ if (cur_el < 2) {
+ uint64_t hcr = arm_hcr_el2_eff(env);
+
+ if (cur_el == 0) {
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ if (!(env->cp15.sctlr_el[2] & SCTLR_DZE)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ } else {
+ if (!(env->cp15.sctlr_el[1] & SCTLR_DZE)) {
+ return CP_ACCESS_TRAP;
+ }
+ if (hcr & HCR_TDZ) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ } else if (hcr & HCR_TDZ) {
+ return CP_ACCESS_TRAP_EL2;
+ }
}
return CP_ACCESS_OK;
}
static uint64_t aa64_dczid_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
int dzp_bit = 1 << 4;
/* DZP indicates whether DC ZVA access is allowed */
@@ -3361,7 +5409,8 @@ static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
if (!(env->pstate & PSTATE_SP)) {
- /* Access to SP_EL0 is undefined if it's being used as
+ /*
+ * Access to SP_EL0 is undefined if it's being used as
* the stack pointer.
*/
return CP_ACCESS_TRAP_UNCATEGORIZED;
@@ -3382,46 +5431,139 @@ static void spsel_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
+
+ if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) {
+ /* M bit is RAZ/WI for PMSA with no MPU implemented */
+ value &= ~SCTLR_M;
+ }
+
+ /* ??? Lots of these bits are not implemented. */
+
+ if (ri->state == ARM_CP_STATE_AA64 && !cpu_isar_feature(aa64_mte, cpu)) {
+ if (ri->opc1 == 6) { /* SCTLR_EL3 */
+ value &= ~(SCTLR_ITFSB | SCTLR_TCF | SCTLR_ATA);
+ } else {
+ value &= ~(SCTLR_ITFSB | SCTLR_TCF0 | SCTLR_TCF |
+ SCTLR_ATA0 | SCTLR_ATA);
+ }
+ }
if (raw_read(env, ri) == value) {
- /* Skip the TLB flush if nothing actually changed; Linux likes
+ /*
+ * Skip the TLB flush if nothing actually changed; Linux likes
* to do a lot of pointless SCTLR writes.
*/
return;
}
- if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) {
- /* M bit is RAZ/WI for PMSA with no MPU implemented */
- value &= ~SCTLR_M;
- }
-
raw_write(env, ri, value);
- /* ??? Lots of these bits are not implemented. */
+
/* This may enable/disable the MMU, so do a TLB flush. */
tlb_flush(CPU(cpu));
+
+ if (tcg_enabled() && ri->type & ARM_CP_SUPPRESS_TB_END) {
+ /*
+ * Normally we would always end the TB on an SCTLR write; see the
+ * comment in ARMCPRegInfo sctlr initialization below for why Xscale
+ * is special. Setting ARM_CP_SUPPRESS_TB_END also stops the rebuild
+ * of hflags from the translator, so do it here.
+ */
+ arm_rebuild_hflags(env);
+ }
}
-static CPAccessResult fpexc32_access(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
+static void mdcr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- if ((env->cp15.cptr_el[2] & CPTR_TFP) && arm_current_el(env) == 2) {
- return CP_ACCESS_TRAP_FP_EL2;
+ /*
+ * Some MDCR_EL3 bits affect whether PMU counters are running:
+ * if we are trying to change any of those then we must
+ * bracket this update with PMU start/finish calls.
+ */
+ bool pmu_op = (env->cp15.mdcr_el3 ^ value) & MDCR_EL3_PMU_ENABLE_BITS;
+
+ if (pmu_op) {
+ pmu_op_start(env);
}
- if (env->cp15.cptr_el[3] & CPTR_TFP) {
- return CP_ACCESS_TRAP_FP_EL3;
+ env->cp15.mdcr_el3 = value;
+ if (pmu_op) {
+ pmu_op_finish(env);
}
- return CP_ACCESS_OK;
}
static void sdcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- env->cp15.mdcr_el3 = value & SDCR_VALID_MASK;
+ /* Not all bits defined for MDCR_EL3 exist in the AArch32 SDCR */
+ mdcr_el3_write(env, ri, value & SDCR_VALID_MASK);
}
+static void mdcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * Some MDCR_EL2 bits affect whether PMU counters are running:
+ * if we are trying to change any of those then we must
+ * bracket this update with PMU start/finish calls.
+ */
+ bool pmu_op = (env->cp15.mdcr_el2 ^ value) & MDCR_EL2_PMU_ENABLE_BITS;
+
+ if (pmu_op) {
+ pmu_op_start(env);
+ }
+ env->cp15.mdcr_el2 = value;
+ if (pmu_op) {
+ pmu_op_finish(env);
+ }
+}
+
+static CPAccessResult access_nv1(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1) {
+ uint64_t hcr_nv = arm_hcr_el2_eff(env) & (HCR_NV | HCR_NV1 | HCR_NV2);
+
+ if (hcr_nv == (HCR_NV | HCR_NV1)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ return CP_ACCESS_OK;
+}
+
+#ifdef CONFIG_USER_ONLY
+/*
+ * `IC IVAU` is handled to improve compatibility with JITs that dual-map their
+ * code to get around W^X restrictions, where one region is writable and the
+ * other is executable.
+ *
+ * Since the executable region is never written to we cannot detect code
+ * changes when running in user mode, and rely on the emulated JIT telling us
+ * that the code has changed by executing this instruction.
+ */
+static void ic_ivau_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ uint64_t icache_line_mask, start_address, end_address;
+ const ARMCPU *cpu;
+
+ cpu = env_archcpu(env);
+
+ icache_line_mask = (4 << extract32(cpu->ctr, 0, 4)) - 1;
+ start_address = value & ~icache_line_mask;
+ end_address = value | icache_line_mask;
+
+ mmap_lock();
+
+ tb_invalidate_phys_range(start_address, end_address);
+
+ mmap_unlock();
+}
+#endif
+
static const ARMCPRegInfo v8_cp_reginfo[] = {
- /* Minimal set of EL0-visible registers. This will need to be expanded
+ /*
+ * Minimal set of EL0-visible registers. This will need to be expanded
* significantly for system emulation of AArch64 CPUs.
*/
{ .name = "NZCV", .state = ARM_CP_STATE_AA64,
@@ -3444,6 +5586,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
{ .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
.access = PL0_R, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_DCZID_EL0,
.readfn = aa64_dczid_read },
{ .name = "DC_ZVA", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 1,
@@ -3451,94 +5594,131 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
#ifndef CONFIG_USER_ONLY
/* Avoid overhead of an access check that always passes in user-mode */
.accessfn = aa64_zva_access,
+ .fgt = FGT_DCZVA,
#endif
},
{ .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
.access = PL1_R, .type = ARM_CP_CURRENTEL },
- /* Cache ops: all NOPs since we don't emulate caches */
+ /*
+ * Instruction cache ops. All of these except `IC IVAU` NOP because we
+ * don't emulate caches.
+ */
{ .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
- .access = PL1_W, .type = ARM_CP_NOP },
+ .access = PL1_W, .type = ARM_CP_NOP,
+ .fgt = FGT_ICIALLUIS,
+ .accessfn = access_ticab },
{ .name = "IC_IALLU", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0,
- .access = PL1_W, .type = ARM_CP_NOP },
+ .access = PL1_W, .type = ARM_CP_NOP,
+ .fgt = FGT_ICIALLU,
+ .accessfn = access_tocu },
{ .name = "IC_IVAU", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1,
- .access = PL0_W, .type = ARM_CP_NOP,
- .accessfn = aa64_cacheop_access },
+ .access = PL0_W,
+ .fgt = FGT_ICIVAU,
+ .accessfn = access_tocu,
+#ifdef CONFIG_USER_ONLY
+ .type = ARM_CP_NO_RAW,
+ .writefn = ic_ivau_write
+#else
+ .type = ARM_CP_NOP
+#endif
+ },
+ /* Cache ops: all NOPs since we don't emulate caches */
{ .name = "DC_IVAC", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
- .access = PL1_W, .type = ARM_CP_NOP },
+ .access = PL1_W, .accessfn = aa64_cacheop_poc_access,
+ .fgt = FGT_DCIVAC,
+ .type = ARM_CP_NOP },
{ .name = "DC_ISW", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2,
- .access = PL1_W, .type = ARM_CP_NOP },
+ .fgt = FGT_DCISW,
+ .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP },
{ .name = "DC_CVAC", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
.access = PL0_W, .type = ARM_CP_NOP,
- .accessfn = aa64_cacheop_access },
+ .fgt = FGT_DCCVAC,
+ .accessfn = aa64_cacheop_poc_access },
{ .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
- .access = PL1_W, .type = ARM_CP_NOP },
+ .fgt = FGT_DCCSW,
+ .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP },
{ .name = "DC_CVAU", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1,
.access = PL0_W, .type = ARM_CP_NOP,
- .accessfn = aa64_cacheop_access },
+ .fgt = FGT_DCCVAU,
+ .accessfn = access_tocu },
{ .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1,
.access = PL0_W, .type = ARM_CP_NOP,
- .accessfn = aa64_cacheop_access },
+ .fgt = FGT_DCCIVAC,
+ .accessfn = aa64_cacheop_poc_access },
{ .name = "DC_CISW", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
- .access = PL1_W, .type = ARM_CP_NOP },
+ .fgt = FGT_DCCISW,
+ .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP },
/* TLBI operations */
{ .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVMALLE1IS,
.writefn = tlbi_aa64_vmalle1is_write },
{ .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVAE1IS,
.writefn = tlbi_aa64_vae1is_write },
{ .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIASIDE1IS,
.writefn = tlbi_aa64_vmalle1is_write },
{ .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVAAE1IS,
.writefn = tlbi_aa64_vae1is_write },
{ .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVALE1IS,
.writefn = tlbi_aa64_vae1is_write },
{ .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVAALE1IS,
.writefn = tlbi_aa64_vae1is_write },
{ .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVMALLE1,
.writefn = tlbi_aa64_vmalle1_write },
{ .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVAE1,
.writefn = tlbi_aa64_vae1_write },
{ .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIASIDE1,
.writefn = tlbi_aa64_vmalle1_write },
{ .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVAAE1,
.writefn = tlbi_aa64_vae1_write },
{ .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVALE1,
.writefn = tlbi_aa64_vae1_write },
{ .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVAALE1,
.writefn = tlbi_aa64_vae1_write },
{ .name = "TLBI_IPAS2E1IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
@@ -3576,52 +5756,70 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
/* 64 bit address translation operations */
{ .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0,
- .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .fgt = FGT_ATS1E1R,
+ .accessfn = at_s1e01_access, .writefn = ats_write64 },
{ .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1,
- .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .fgt = FGT_ATS1E1W,
+ .accessfn = at_s1e01_access, .writefn = ats_write64 },
{ .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2,
- .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .fgt = FGT_ATS1E0R,
+ .accessfn = at_s1e01_access, .writefn = ats_write64 },
{ .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3,
- .access = PL1_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .fgt = FGT_ATS1E0W,
+ .accessfn = at_s1e01_access, .writefn = ats_write64 },
{ .name = "AT_S12E1R", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 4,
- .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .accessfn = at_e012_access, .writefn = ats_write64 },
{ .name = "AT_S12E1W", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .accessfn = at_e012_access, .writefn = ats_write64 },
{ .name = "AT_S12E0R", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 6,
- .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .accessfn = at_e012_access, .writefn = ats_write64 },
{ .name = "AT_S12E0W", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 7,
- .access = PL2_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .accessfn = at_e012_access, .writefn = ats_write64 },
/* AT S1E2* are elsewhere as they UNDEF from EL3 if EL2 is not present */
{ .name = "AT_S1E3R", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 0,
- .access = PL3_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .writefn = ats_write64 },
{ .name = "AT_S1E3W", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 6, .crn = 7, .crm = 8, .opc2 = 1,
- .access = PL3_W, .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .access = PL3_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .writefn = ats_write64 },
{ .name = "PAR_EL1", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_ALIAS,
.opc0 = 3, .opc1 = 0, .crn = 7, .crm = 4, .opc2 = 0,
.access = PL1_RW, .resetvalue = 0,
+ .fgt = FGT_PAR_EL1,
.fieldoffset = offsetof(CPUARMState, cp15.par_el[1]),
.writefn = par_write },
#endif
/* TLB invalidate last level of translation table walk */
{ .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_is_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
+ .writefn = tlbimva_is_write },
{ .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
- .type = ARM_CP_NO_RAW, .access = PL1_W,
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis,
.writefn = tlbimvaa_is_write },
{ .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimva_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimva_write },
{ .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
- .type = ARM_CP_NO_RAW, .access = PL1_W, .writefn = tlbimvaa_write },
+ .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb,
+ .writefn = tlbimvaa_write },
{ .name = "TLBIMVALH", .cp = 15, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5,
.type = ARM_CP_NO_RAW, .access = PL2_W,
.writefn = tlbimva_hyp_write },
@@ -3632,63 +5830,66 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
{ .name = "TLBIIPAS2",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 1,
.type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_write },
+ .writefn = tlbiipas2_hyp_write },
{ .name = "TLBIIPAS2IS",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 1,
.type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_is_write },
+ .writefn = tlbiipas2is_hyp_write },
{ .name = "TLBIIPAS2L",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 5,
.type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_write },
+ .writefn = tlbiipas2_hyp_write },
{ .name = "TLBIIPAS2LIS",
.cp = 15, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 5,
.type = ARM_CP_NO_RAW, .access = PL2_W,
- .writefn = tlbiipas2_is_write },
+ .writefn = tlbiipas2is_hyp_write },
/* 32 bit cache operations */
{ .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_ticab },
{ .name = "BPIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 6,
.type = ARM_CP_NOP, .access = PL1_W },
{ .name = "ICIALLU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu },
{ .name = "ICIMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 1,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu },
{ .name = "BPIALL", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 6,
.type = ARM_CP_NOP, .access = PL1_W },
{ .name = "BPIMVA", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 7,
.type = ARM_CP_NOP, .access = PL1_W },
{ .name = "DCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_poc_access },
{ .name = "DCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
{ .name = "DCCMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 1,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_poc_access },
{ .name = "DCCSW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
{ .name = "DCCMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 11, .opc2 = 1,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu },
{ .name = "DCCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 1,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_poc_access },
{ .name = "DCCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
- .type = ARM_CP_NOP, .access = PL1_W },
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
/* MMU Domain access control / MPU write buffer control */
{ .name = "DACR", .cp = 15, .opc1 = 0, .crn = 3, .crm = 0, .opc2 = 0,
- .access = PL1_RW, .resetvalue = 0,
+ .access = PL1_RW, .accessfn = access_tvm_trvm, .resetvalue = 0,
.writefn = dacr_write, .raw_writefn = raw_write,
.bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.dacr_s),
offsetoflow32(CPUARMState, cp15.dacr_ns) } },
{ .name = "ELR_EL1", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_ALIAS,
.opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1,
- .access = PL1_RW,
+ .access = PL1_RW, .accessfn = access_nv1,
+ .nv2_redirect_offset = 0x230 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, elr_el[1]) },
{ .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_ALIAS,
.opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0,
- .access = PL1_RW,
+ .access = PL1_RW, .accessfn = access_nv1,
+ .nv2_redirect_offset = 0x160 | NV2_REDIR_NV1,
.fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_SVC]) },
- /* We rely on the access checks not allowing the guest to write to the
+ /*
+ * We rely on the access checks not allowing the guest to write to the
* state field when SPSel indicates that it's being used as the stack
* pointer.
*/
@@ -3699,26 +5900,13 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, sp_el[0]) },
{ .name = "SP_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 4, .crm = 1, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_ALIAS,
+ .nv2_redirect_offset = 0x240,
+ .access = PL2_RW, .type = ARM_CP_ALIAS | ARM_CP_EL3_NO_EL2_KEEP,
.fieldoffset = offsetof(CPUARMState, sp_el[1]) },
{ .name = "SPSel", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 0,
.type = ARM_CP_NO_RAW,
.access = PL1_RW, .readfn = spsel_read, .writefn = spsel_write },
- { .name = "FPEXC32_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 3, .opc2 = 0,
- .type = ARM_CP_ALIAS,
- .fieldoffset = offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPEXC]),
- .access = PL2_RW, .accessfn = fpexc32_access },
- { .name = "DACR32_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 0, .opc2 = 0,
- .access = PL2_RW, .resetvalue = 0,
- .writefn = dacr_write, .raw_writefn = raw_write,
- .fieldoffset = offsetof(CPUARMState, cp15.dacr32_el2) },
- { .name = "IFSR32_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 0, .opc2 = 1,
- .access = PL2_RW, .resetvalue = 0,
- .fieldoffset = offsetof(CPUARMState, cp15.ifsr32_el2) },
{ .name = "SPSR_IRQ", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_ALIAS,
.opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 0,
@@ -3740,144 +5928,52 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
.access = PL2_RW,
.fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_FIQ]) },
{ .name = "MDCR_EL3", .state = ARM_CP_STATE_AA64,
+ .type = ARM_CP_IO,
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 3, .opc2 = 1,
.resetvalue = 0,
- .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el3) },
- { .name = "SDCR", .type = ARM_CP_ALIAS,
+ .access = PL3_RW,
+ .writefn = mdcr_el3_write,
+ .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el3) },
+ { .name = "SDCR", .type = ARM_CP_ALIAS | ARM_CP_IO,
.cp = 15, .opc1 = 0, .crn = 1, .crm = 3, .opc2 = 1,
.access = PL1_RW, .accessfn = access_trap_aa32s_el1,
.writefn = sdcr_write,
.fieldoffset = offsetoflow32(CPUARMState, cp15.mdcr_el3) },
- REGINFO_SENTINEL
};
-/* Used to describe the behaviour of EL2 regs when EL2 does not exist. */
-static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
- { .name = "VBAR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0,
- .access = PL2_RW,
- .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
- { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH,
- .type = ARM_CP_NO_RAW,
- .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
- .access = PL2_RW,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
- .access = PL2_RW,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "MAIR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "AMAIR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "HAMAIR1", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
- .access = PL2_RW, .type = ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "AFSR0_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 1, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "AFSR1_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 1, .opc2 = 1,
- .access = PL2_RW, .type = ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
- .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 6, .crm = 2,
- .access = PL2_RW, .accessfn = access_el3_aa32ns,
- .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
- { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "TPIDR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 2,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2,
- .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "CNTHCTL_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 1, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CNTVOFF_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 3,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CNTVOFF", .cp = 15, .opc1 = 4, .crm = 14,
- .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "CNTHP_CVAL_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 2, .opc2 = 2,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CNTHP_CVAL", .cp = 15, .opc1 = 6, .crm = 14,
- .access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "CNTHP_TVAL_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 2, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "CNTHP_CTL_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 2, .opc2 = 1,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
- .access = PL2_RW, .accessfn = access_tda,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
- .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "HIFAR", .state = ARM_CP_STATE_AA32,
- .type = ARM_CP_CONST,
- .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 2,
- .access = PL2_RW, .resetvalue = 0 },
- REGINFO_SENTINEL
-};
-
-/* Ditto, but for registers which exist in ARMv8 but not v7 */
-static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = {
- { .name = "HCR2", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
+/* These are present only when EL1 supports AArch32 */
+static const ARMCPRegInfo v8_aa32_el1_reginfo[] = {
+ { .name = "FPEXC32_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 3, .opc2 = 0,
.access = PL2_RW,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- REGINFO_SENTINEL
+ .type = ARM_CP_ALIAS | ARM_CP_FPU | ARM_CP_EL3_NO_EL2_KEEP,
+ .fieldoffset = offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPEXC]) },
+ { .name = "DACR32_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 0, .opc2 = 0,
+ .access = PL2_RW, .resetvalue = 0, .type = ARM_CP_EL3_NO_EL2_KEEP,
+ .writefn = dacr_write, .raw_writefn = raw_write,
+ .fieldoffset = offsetof(CPUARMState, cp15.dacr32_el2) },
+ { .name = "IFSR32_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 0, .opc2 = 1,
+ .access = PL2_RW, .resetvalue = 0, .type = ARM_CP_EL3_NO_EL2_KEEP,
+ .fieldoffset = offsetof(CPUARMState, cp15.ifsr32_el2) },
};
-static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- uint64_t valid_mask = HCR_MASK;
+ ARMCPU *cpu = env_archcpu(env);
+
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ valid_mask |= MAKE_64BIT_MASK(0, 34); /* ARMv8.0 */
+ } else {
+ valid_mask |= MAKE_64BIT_MASK(0, 28); /* ARMv7VE */
+ }
if (arm_feature(env, ARM_FEATURE_EL3)) {
valid_mask &= ~HCR_HCD;
} else if (cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) {
- /* Architecturally HCR.TSC is RES0 if EL3 is not implemented.
+ /*
+ * Architecturally HCR.TSC is RES0 if EL3 is not implemented.
* However, if we're using the SMC PSCI conduit then QEMU is
* effectively acting like EL3 firmware and so the guest at
* EL2 should retain the ability to prevent EL1 from being
@@ -3887,18 +5983,83 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
valid_mask &= ~HCR_TSC;
}
+ if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+ if (cpu_isar_feature(aa64_vh, cpu)) {
+ valid_mask |= HCR_E2H;
+ }
+ if (cpu_isar_feature(aa64_ras, cpu)) {
+ valid_mask |= HCR_TERR | HCR_TEA;
+ }
+ if (cpu_isar_feature(aa64_lor, cpu)) {
+ valid_mask |= HCR_TLOR;
+ }
+ if (cpu_isar_feature(aa64_pauth, cpu)) {
+ valid_mask |= HCR_API | HCR_APK;
+ }
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ valid_mask |= HCR_ATA | HCR_DCT | HCR_TID5;
+ }
+ if (cpu_isar_feature(aa64_scxtnum, cpu)) {
+ valid_mask |= HCR_ENSCXT;
+ }
+ if (cpu_isar_feature(aa64_fwb, cpu)) {
+ valid_mask |= HCR_FWB;
+ }
+ if (cpu_isar_feature(aa64_rme, cpu)) {
+ valid_mask |= HCR_GPF;
+ }
+ if (cpu_isar_feature(aa64_nv, cpu)) {
+ valid_mask |= HCR_NV | HCR_NV1 | HCR_AT;
+ }
+ if (cpu_isar_feature(aa64_nv2, cpu)) {
+ valid_mask |= HCR_NV2;
+ }
+ }
+
+ if (cpu_isar_feature(any_evt, cpu)) {
+ valid_mask |= HCR_TTLBIS | HCR_TTLBOS | HCR_TICAB | HCR_TOCU | HCR_TID4;
+ } else if (cpu_isar_feature(any_half_evt, cpu)) {
+ valid_mask |= HCR_TICAB | HCR_TOCU | HCR_TID4;
+ }
+
/* Clear RES0 bits. */
value &= valid_mask;
- /* These bits change the MMU setup:
+ /*
+ * These bits change the MMU setup:
* HCR_VM enables stage 2 translation
* HCR_PTW forbids certain page-table setups
- * HCR_DC Disables stage1 and enables stage2 translation
+ * HCR_DC disables stage1 and enables stage2 translation
+ * HCR_DCT enables tagging on (disabled) stage1 translation
+ * HCR_FWB changes the interpretation of stage2 descriptor bits
+ * HCR_NV and HCR_NV1 affect interpretation of descriptor bits
*/
- if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) {
+ if ((env->cp15.hcr_el2 ^ value) &
+ (HCR_VM | HCR_PTW | HCR_DC | HCR_DCT | HCR_FWB | HCR_NV | HCR_NV1)) {
tlb_flush(CPU(cpu));
}
env->cp15.hcr_el2 = value;
+
+ /*
+ * Updates to VI and VF require us to update the status of
+ * virtual interrupts, which are the logical OR of these bits
+ * and the state of the input lines from the GIC. (This requires
+ * that we have the BQL, which is done by marking the
+ * reginfo structs as ARM_CP_IO.)
+ * Note that if a write to HCR pends a VIRQ or VFIQ it is never
+ * possible for it to be taken immediately, because VIRQ and
+ * VFIQ are masked unless running at EL0 or EL1, and HCR
+ * can only be written at EL2.
+ */
+ g_assert(bql_locked());
+ arm_cpu_update_virq(cpu);
+ arm_cpu_update_vfiq(cpu);
+ arm_cpu_update_vserr(cpu);
+}
+
+static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
+{
+ do_hcr_write(env, value, 0);
}
static void hcr_writehigh(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3906,7 +6067,7 @@ static void hcr_writehigh(CPUARMState *env, const ARMCPRegInfo *ri,
{
/* Handle HCR2 write, i.e. write to high half of HCR_EL2 */
value = deposit64(env->cp15.hcr_el2, 32, 32, value);
- hcr_write(env, NULL, value);
+ do_hcr_write(env, value, MAKE_64BIT_MASK(0, 32));
}
static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3914,28 +6075,232 @@ static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri,
{
/* Handle HCR write, i.e. write to low half of HCR_EL2 */
value = deposit64(env->cp15.hcr_el2, 0, 32, value);
- hcr_write(env, NULL, value);
+ do_hcr_write(env, value, MAKE_64BIT_MASK(32, 32));
+}
+
+/*
+ * Return the effective value of HCR_EL2, at the given security state.
+ * Bits that are not included here:
+ * RW (read from SCR_EL3.RW as needed)
+ */
+uint64_t arm_hcr_el2_eff_secstate(CPUARMState *env, ARMSecuritySpace space)
+{
+ uint64_t ret = env->cp15.hcr_el2;
+
+ assert(space != ARMSS_Root);
+
+ if (!arm_is_el2_enabled_secstate(env, space)) {
+ /*
+ * "This register has no effect if EL2 is not enabled in the
+ * current Security state". This is ARMv8.4-SecEL2 speak for
+ * !(SCR_EL3.NS==1 || SCR_EL3.EEL2==1).
+ *
+ * Prior to that, the language was "In an implementation that
+ * includes EL3, when the value of SCR_EL3.NS is 0 the PE behaves
+ * as if this field is 0 for all purposes other than a direct
+ * read or write access of HCR_EL2". With lots of enumeration
+ * on a per-field basis. In current QEMU, this is condition
+ * is arm_is_secure_below_el3.
+ *
+ * Since the v8.4 language applies to the entire register, and
+ * appears to be backward compatible, use that.
+ */
+ return 0;
+ }
+
+ /*
+ * For a cpu that supports both aarch64 and aarch32, we can set bits
+ * in HCR_EL2 (e.g. via EL3) that are RES0 when we enter EL2 as aa32.
+ * Ignore all of the bits in HCR+HCR2 that are not valid for aarch32.
+ */
+ if (!arm_el_is_aa64(env, 2)) {
+ uint64_t aa32_valid;
+
+ /*
+ * These bits are up-to-date as of ARMv8.6.
+ * For HCR, it's easiest to list just the 2 bits that are invalid.
+ * For HCR2, list those that are valid.
+ */
+ aa32_valid = MAKE_64BIT_MASK(0, 32) & ~(HCR_RW | HCR_TDZ);
+ aa32_valid |= (HCR_CD | HCR_ID | HCR_TERR | HCR_TEA | HCR_MIOCNCE |
+ HCR_TID4 | HCR_TICAB | HCR_TOCU | HCR_TTLBIS);
+ ret &= aa32_valid;
+ }
+
+ if (ret & HCR_TGE) {
+ /* These bits are up-to-date as of ARMv8.6. */
+ if (ret & HCR_E2H) {
+ ret &= ~(HCR_VM | HCR_FMO | HCR_IMO | HCR_AMO |
+ HCR_BSU_MASK | HCR_DC | HCR_TWI | HCR_TWE |
+ HCR_TID0 | HCR_TID2 | HCR_TPCP | HCR_TPU |
+ HCR_TDZ | HCR_CD | HCR_ID | HCR_MIOCNCE |
+ HCR_TID4 | HCR_TICAB | HCR_TOCU | HCR_ENSCXT |
+ HCR_TTLBIS | HCR_TTLBOS | HCR_TID5);
+ } else {
+ ret |= HCR_FMO | HCR_IMO | HCR_AMO;
+ }
+ ret &= ~(HCR_SWIO | HCR_PTW | HCR_VF | HCR_VI | HCR_VSE |
+ HCR_FB | HCR_TID1 | HCR_TID3 | HCR_TSC | HCR_TACR |
+ HCR_TSW | HCR_TTLB | HCR_TVM | HCR_HCD | HCR_TRVM |
+ HCR_TLOR);
+ }
+
+ return ret;
+}
+
+uint64_t arm_hcr_el2_eff(CPUARMState *env)
+{
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return 0;
+ }
+ return arm_hcr_el2_eff_secstate(env, arm_security_space_below_el3(env));
+}
+
+/*
+ * Corresponds to ARM pseudocode function ELIsInHost().
+ */
+bool el_is_in_host(CPUARMState *env, int el)
+{
+ uint64_t mask;
+
+ /*
+ * Since we only care about E2H and TGE, we can skip arm_hcr_el2_eff().
+ * Perform the simplest bit tests first, and validate EL2 afterward.
+ */
+ if (el & 1) {
+ return false; /* EL1 or EL3 */
+ }
+
+ /*
+ * Note that hcr_write() checks isar_feature_aa64_vh(),
+ * aka HaveVirtHostExt(), in allowing HCR_E2H to be set.
+ */
+ mask = el ? HCR_E2H : HCR_E2H | HCR_TGE;
+ if ((env->cp15.hcr_el2 & mask) != mask) {
+ return false;
+ }
+
+ /* TGE and/or E2H set: double check those bits are currently legal. */
+ return arm_is_el2_enabled(env) && arm_el_is_aa64(env, 2);
+}
+
+static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ uint64_t valid_mask = 0;
+
+ /* FEAT_MOPS adds MSCEn and MCE2 */
+ if (cpu_isar_feature(aa64_mops, env_archcpu(env))) {
+ valid_mask |= HCRX_MSCEN | HCRX_MCE2;
+ }
+
+ /* Clear RES0 bits. */
+ env->cp15.hcrx_el2 = value & valid_mask;
+}
+
+static CPAccessResult access_hxen(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 2
+ && arm_feature(env, ARM_FEATURE_EL3)
+ && !(env->cp15.scr_el3 & SCR_HXEN)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+static const ARMCPRegInfo hcrx_el2_reginfo = {
+ .name = "HCRX_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 2,
+ .access = PL2_RW, .writefn = hcrx_write, .accessfn = access_hxen,
+ .nv2_redirect_offset = 0xa0,
+ .fieldoffset = offsetof(CPUARMState, cp15.hcrx_el2),
+};
+
+/* Return the effective value of HCRX_EL2. */
+uint64_t arm_hcrx_el2_eff(CPUARMState *env)
+{
+ /*
+ * The bits in this register behave as 0 for all purposes other than
+ * direct reads of the register if SCR_EL3.HXEn is 0.
+ * If EL2 is not enabled in the current security state, then the
+ * bit may behave as if 0, or as if 1, depending on the bit.
+ * For the moment, we treat the EL2-disabled case as taking
+ * priority over the HXEn-disabled case. This is true for the only
+ * bit for a feature which we implement where the answer is different
+ * for the two cases (MSCEn for FEAT_MOPS).
+ * This may need to be revisited for future bits.
+ */
+ if (!arm_is_el2_enabled(env)) {
+ uint64_t hcrx = 0;
+ if (cpu_isar_feature(aa64_mops, env_archcpu(env))) {
+ /* MSCEn behaves as 1 if EL2 is not enabled */
+ hcrx |= HCRX_MSCEN;
+ }
+ return hcrx;
+ }
+ if (arm_feature(env, ARM_FEATURE_EL3) && !(env->cp15.scr_el3 & SCR_HXEN)) {
+ return 0;
+ }
+ return env->cp15.hcrx_el2;
+}
+
+static void cptr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /*
+ * For A-profile AArch32 EL3, if NSACR.CP10
+ * is 0 then HCPTR.{TCP11,TCP10} ignore writes and read as 1.
+ */
+ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+ !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
+ uint64_t mask = R_HCPTR_TCP11_MASK | R_HCPTR_TCP10_MASK;
+ value = (value & ~mask) | (env->cp15.cptr_el[2] & mask);
+ }
+ env->cp15.cptr_el[2] = value;
+}
+
+static uint64_t cptr_el2_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /*
+ * For A-profile AArch32 EL3, if NSACR.CP10
+ * is 0 then HCPTR.{TCP11,TCP10} ignore writes and read as 1.
+ */
+ uint64_t value = env->cp15.cptr_el[2];
+
+ if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+ !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
+ value |= R_HCPTR_TCP11_MASK | R_HCPTR_TCP10_MASK;
+ }
+ return value;
}
static const ARMCPRegInfo el2_cp_reginfo[] = {
{ .name = "HCR_EL2", .state = ARM_CP_STATE_AA64,
+ .type = ARM_CP_IO,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
- .writefn = hcr_write },
+ .nv2_redirect_offset = 0x78,
+ .writefn = hcr_write, .raw_writefn = raw_write },
{ .name = "HCR", .state = ARM_CP_STATE_AA32,
- .type = ARM_CP_ALIAS,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
.cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
.writefn = hcr_writelow },
+ { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
{ .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
- .type = ARM_CP_ALIAS,
+ .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT,
.opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
.access = PL2_RW,
.fieldoffset = offsetof(CPUARMState, elr_el[2]) },
{ .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
+ .type = ARM_CP_NV2_REDIRECT,
.opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[2]) },
{ .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH,
+ .type = ARM_CP_NV2_REDIRECT,
.opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[2]) },
{ .name = "HIFAR", .state = ARM_CP_STATE_AA32,
@@ -3944,7 +6309,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.access = PL2_RW,
.fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el[2]) },
{ .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64,
- .type = ARM_CP_ALIAS,
+ .type = ARM_CP_ALIAS | ARM_CP_NV2_REDIRECT,
.opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0,
.access = PL2_RW,
.fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_HYP]) },
@@ -3960,7 +6325,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
{ .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2,
.access = PL2_RW, .accessfn = cptr_access, .resetvalue = 0,
- .fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]) },
+ .fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]),
+ .readfn = cptr_el2_read, .writefn = cptr_el2_write },
{ .name = "MAIR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 0,
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[2]),
@@ -3988,32 +6354,30 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.resetvalue = 0 },
{ .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2,
- .access = PL2_RW,
- /* no .writefn needed as this can't cause an ASID change;
- * no .raw_writefn or .resetfn needed as we never use mask/base_mask
- */
+ .access = PL2_RW, .writefn = vmsa_tcr_el12_write,
+ .raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) },
{ .name = "VTCR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
.type = ARM_CP_ALIAS,
.access = PL2_RW, .accessfn = access_el3_aa32ns,
- .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
+ .fieldoffset = offsetoflow32(CPUARMState, cp15.vtcr_el2) },
{ .name = "VTCR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
.access = PL2_RW,
- /* no .writefn needed as this can't cause an ASID change;
- * no .raw_writefn or .resetfn needed as we never use mask/base_mask
- */
+ .nv2_redirect_offset = 0x40,
+ /* no .writefn needed as this can't cause an ASID change */
.fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
{ .name = "VTTBR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 6, .crm = 2,
.type = ARM_CP_64BIT | ARM_CP_ALIAS,
.access = PL2_RW, .accessfn = access_el3_aa32ns,
.fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2),
- .writefn = vttbr_write },
+ .writefn = vttbr_write, .raw_writefn = raw_write },
{ .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
- .access = PL2_RW, .writefn = vttbr_write,
+ .access = PL2_RW, .writefn = vttbr_write, .raw_writefn = raw_write,
+ .nv2_redirect_offset = 0x20,
.fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) },
{ .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
@@ -4022,10 +6386,12 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
{ .name = "TPIDR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 2,
.access = PL2_RW, .resetvalue = 0,
+ .nv2_redirect_offset = 0x90,
.fieldoffset = offsetof(CPUARMState, cp15.tpidr_el[2]) },
{ .name = "TTBR0_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0,
.access = PL2_RW, .resetvalue = 0,
+ .writefn = vmsa_tcr_ttbr_el2_write, .raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[2]) },
{ .name = "HTTBR", .cp = 15, .opc1 = 4, .crm = 2,
.access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS,
@@ -4052,64 +6418,71 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.writefn = tlbimva_hyp_is_write },
{ .name = "TLBI_ALLE2", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 0,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
.writefn = tlbi_aa64_alle2_write },
{ .name = "TLBI_VAE2", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
.writefn = tlbi_aa64_vae2_write },
{ .name = "TLBI_VALE2", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 7, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
.writefn = tlbi_aa64_vae2_write },
{ .name = "TLBI_ALLE2IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 0,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
.writefn = tlbi_aa64_alle2is_write },
{ .name = "TLBI_VAE2IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 1,
- .type = ARM_CP_NO_RAW, .access = PL2_W,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
.writefn = tlbi_aa64_vae2is_write },
{ .name = "TLBI_VALE2IS", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 8, .crm = 3, .opc2 = 5,
- .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
.writefn = tlbi_aa64_vae2is_write },
#ifndef CONFIG_USER_ONLY
- /* Unlike the other EL2-related AT operations, these must
+ /*
+ * Unlike the other EL2-related AT operations, these must
* UNDEF from EL3 if EL2 is not implemented, which is why we
* define them here rather than with the rest of the AT ops.
*/
{ .name = "AT_S1E2R", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0,
.access = PL2_W, .accessfn = at_s1e2_access,
- .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
+ .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = ats_write64 },
{ .name = "AT_S1E2W", .state = ARM_CP_STATE_AA64,
.opc0 = 1, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1,
.access = PL2_W, .accessfn = at_s1e2_access,
- .type = ARM_CP_NO_RAW, .writefn = ats_write64 },
- /* The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE
+ .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = ats_write64 },
+ /*
+ * The AArch32 ATS1H* operations are CONSTRAINED UNPREDICTABLE
* if EL2 is not implemented; we choose to UNDEF. Behaviour at EL3
* with SCR.NS == 0 outside Monitor mode is UNPREDICTABLE; we choose
* to behave as if SCR.NS was 1.
*/
{ .name = "ATS1HR", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 0,
.access = PL2_W,
- .writefn = ats1h_write, .type = ARM_CP_NO_RAW },
+ .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC },
{ .name = "ATS1HW", .cp = 15, .opc1 = 4, .crn = 7, .crm = 8, .opc2 = 1,
.access = PL2_W,
- .writefn = ats1h_write, .type = ARM_CP_NO_RAW },
+ .writefn = ats1h_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC },
{ .name = "CNTHCTL_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 14, .crm = 1, .opc2 = 0,
- /* ARMv7 requires bit 0 and 1 to reset to 1. ARMv8 defines the
+ /*
+ * ARMv7 requires bit 0 and 1 to reset to 1. ARMv8 defines the
* reset values as IMPDEF. We choose to reset to 3 to comply with
* both ARMv7 and ARMv8.
*/
- .access = PL2_RW, .resetvalue = 3,
+ .access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 3,
+ .writefn = gt_cnthctl_write, .raw_writefn = raw_write,
.fieldoffset = offsetof(CPUARMState, cp15.cnthctl_el2) },
{ .name = "CNTVOFF_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 14, .crm = 0, .opc2 = 3,
.access = PL2_RW, .type = ARM_CP_IO, .resetvalue = 0,
.writefn = gt_cntvoff_write,
+ .nv2_redirect_offset = 0x60,
.fieldoffset = offsetof(CPUARMState, cp15.cntvoff_el2) },
{ .name = "CNTVOFF", .cp = 15, .opc1 = 4, .crm = 14,
.access = PL2_RW, .type = ARM_CP_64BIT | ARM_CP_ALIAS | ARM_CP_IO,
@@ -4137,15 +6510,6 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.resetvalue = 0,
.writefn = gt_hyp_ctl_write, .raw_writefn = raw_write },
#endif
- /* The only field of MDCR_EL2 that has a defined architectural reset value
- * is MDCR_EL2.HPMN which should reset to the value of PMCR_EL0.N; but we
- * don't impelment any PMU event counters, so using zero as a reset
- * value for MDCR_EL2 is okay
- */
- { .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
- .access = PL2_RW, .resetvalue = 0,
- .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el2), },
{ .name = "HPFAR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
.access = PL2_RW, .accessfn = access_el3_aa32ns,
@@ -4157,30 +6521,55 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
{ .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH,
.cp = 15, .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3,
.access = PL2_RW,
+ .nv2_redirect_offset = 0x80,
.fieldoffset = offsetof(CPUARMState, cp15.hstr_el2) },
- REGINFO_SENTINEL
};
static const ARMCPRegInfo el2_v8_cp_reginfo[] = {
{ .name = "HCR2", .state = ARM_CP_STATE_AA32,
- .type = ARM_CP_ALIAS,
+ .type = ARM_CP_ALIAS | ARM_CP_IO,
.cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
.access = PL2_RW,
.fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2),
.writefn = hcr_writehigh },
- REGINFO_SENTINEL
+};
+
+static CPAccessResult sel2_access(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 3 || arm_is_secure_below_el3(env)) {
+ return CP_ACCESS_OK;
+ }
+ return CP_ACCESS_TRAP_UNCATEGORIZED;
+}
+
+static const ARMCPRegInfo el2_sec_cp_reginfo[] = {
+ { .name = "VSTTBR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 6, .opc2 = 0,
+ .access = PL2_RW, .accessfn = sel2_access,
+ .nv2_redirect_offset = 0x30,
+ .fieldoffset = offsetof(CPUARMState, cp15.vsttbr_el2) },
+ { .name = "VSTCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 6, .opc2 = 2,
+ .access = PL2_RW, .accessfn = sel2_access,
+ .nv2_redirect_offset = 0x48,
+ .fieldoffset = offsetof(CPUARMState, cp15.vstcr_el2) },
};
static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
bool isread)
{
- /* The NSACR is RW at EL3, and RO for NS EL1 and NS EL2.
- * At Secure EL1 it traps to EL3.
+ /*
+ * The NSACR is RW at EL3, and RO for NS EL1 and NS EL2.
+ * At Secure EL1 it traps to EL3 or EL2.
*/
if (arm_current_el(env) == 3) {
return CP_ACCESS_OK;
}
if (arm_is_secure_below_el3(env)) {
+ if (env->cp15.scr_el3 & SCR_EEL2) {
+ return CP_ACCESS_TRAP_EL2;
+ }
return CP_ACCESS_TRAP_EL3;
}
/* Accesses from EL1 NS and EL2 NS are UNDEF for write but allow reads. */
@@ -4194,12 +6583,12 @@ static const ARMCPRegInfo el3_cp_reginfo[] = {
{ .name = "SCR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.scr_el3),
- .resetvalue = 0, .writefn = scr_write },
- { .name = "SCR", .type = ARM_CP_ALIAS,
+ .resetfn = scr_reset, .writefn = scr_write, .raw_writefn = raw_write },
+ { .name = "SCR", .type = ARM_CP_ALIAS | ARM_CP_NEWEL,
.cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 0,
.access = PL1_RW, .accessfn = access_trap_aa32s_el1,
.fieldoffset = offsetoflow32(CPUARMState, cp15.scr_el3),
- .writefn = scr_write },
+ .writefn = scr_write, .raw_writefn = raw_write },
{ .name = "SDER32_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 1, .opc2 = 1,
.access = PL3_RW, .resetvalue = 0,
@@ -4214,17 +6603,13 @@ static const ARMCPRegInfo el3_cp_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, cp15.mvbar) },
{ .name = "TTBR0_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 0,
- .access = PL3_RW, .writefn = vmsa_ttbr_write, .resetvalue = 0,
+ .access = PL3_RW, .resetvalue = 0,
.fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) },
{ .name = "TCR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2,
.access = PL3_RW,
- /* no .writefn needed as this can't cause an ASID change;
- * we must provide a .raw_writefn and .resetfn because we handle
- * reset and migration for the AArch32 TTBCR(S), which might be
- * using mask and base_mask.
- */
- .resetfn = vmsa_ttbcr_reset, .raw_writefn = vmsa_ttbcr_raw_write,
+ /* no .writefn needed as this can't cause an ASID change */
+ .resetvalue = 0,
.fieldoffset = offsetof(CPUARMState, cp15.tcr_el[3]) },
{ .name = "ELR_EL3", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_ALIAS,
@@ -4291,173 +6676,497 @@ static const ARMCPRegInfo el3_cp_reginfo[] = {
.opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 5,
.access = PL3_W, .type = ARM_CP_NO_RAW,
.writefn = tlbi_aa64_vae3_write },
- REGINFO_SENTINEL
};
-static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri,
- bool isread)
+#ifndef CONFIG_USER_ONLY
+
+static CPAccessResult e2h_access(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- /* Only accessible in EL0 if SCTLR.UCT is set (and only in AArch64,
- * but the AArch32 CTR has its own reginfo struct)
- */
- if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UCT)) {
- return CP_ACCESS_TRAP;
+ if (arm_current_el(env) == 1) {
+ /* This must be a FEAT_NV access */
+ return CP_ACCESS_OK;
+ }
+ if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
+ return CP_ACCESS_TRAP_UNCATEGORIZED;
}
return CP_ACCESS_OK;
}
-static void oslar_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static CPAccessResult access_el1nvpct(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- /* Writes to OSLAR_EL1 may update the OS lock status, which can be
- * read via a bit in OSLSR_EL1.
- */
- int oslock;
+ if (arm_current_el(env) == 1) {
+ /* This must be a FEAT_NV access with NVx == 101 */
+ if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1NVPCT)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ return e2h_access(env, ri, isread);
+}
- if (ri->state == ARM_CP_STATE_AA32) {
- oslock = (value == 0xC5ACCE55);
+static CPAccessResult access_el1nvvct(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 1) {
+ /* This must be a FEAT_NV access with NVx == 101 */
+ if (FIELD_EX64(env->cp15.cnthctl_el2, CNTHCTL, EL1NVVCT)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ return e2h_access(env, ri, isread);
+}
+
+/* Test if system register redirection is to occur in the current state. */
+static bool redirect_for_e2h(CPUARMState *env)
+{
+ return arm_current_el(env) == 2 && (arm_hcr_el2_eff(env) & HCR_E2H);
+}
+
+static uint64_t el2_e2h_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ CPReadFn *readfn;
+
+ if (redirect_for_e2h(env)) {
+ /* Switch to the saved EL2 version of the register. */
+ ri = ri->opaque;
+ readfn = ri->readfn;
} else {
- oslock = value & 1;
+ readfn = ri->orig_readfn;
}
+ if (readfn == NULL) {
+ readfn = raw_read;
+ }
+ return readfn(env, ri);
+}
- env->cp15.oslsr_el1 = deposit32(env->cp15.oslsr_el1, 1, 1, oslock);
+static void el2_e2h_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPWriteFn *writefn;
+
+ if (redirect_for_e2h(env)) {
+ /* Switch to the saved EL2 version of the register. */
+ ri = ri->opaque;
+ writefn = ri->writefn;
+ } else {
+ writefn = ri->orig_writefn;
+ }
+ if (writefn == NULL) {
+ writefn = raw_write;
+ }
+ writefn(env, ri, value);
}
-static const ARMCPRegInfo debug_cp_reginfo[] = {
- /* DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped
- * debug components. The AArch64 version of DBGDRAR is named MDRAR_EL1;
- * unlike DBGDRAR it is never accessible from EL0.
- * DBGDSAR is deprecated and must RAZ from v8 anyway, so it has no AArch64
- * accessor.
- */
- { .name = "DBGDRAR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
- .access = PL0_R, .accessfn = access_tdra,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "MDRAR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0,
- .access = PL1_R, .accessfn = access_tdra,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
- .access = PL0_R, .accessfn = access_tdra,
- .type = ARM_CP_CONST, .resetvalue = 0 },
- /* Monitor debug system control register; the 32-bit alias is DBGDSCRext. */
- { .name = "MDSCR_EL1", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
- .access = PL1_RW, .accessfn = access_tda,
- .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1),
- .resetvalue = 0 },
- /* MDCCSR_EL0, aka DBGDSCRint. This is a read-only mirror of MDSCR_EL1.
- * We don't implement the configurable EL0 access.
- */
- { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
- .type = ARM_CP_ALIAS,
- .access = PL1_R, .accessfn = access_tda,
- .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1), },
- { .name = "OSLAR_EL1", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
- .access = PL1_W, .type = ARM_CP_NO_RAW,
- .accessfn = access_tdosa,
- .writefn = oslar_write },
- { .name = "OSLSR_EL1", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 4,
- .access = PL1_R, .resetvalue = 10,
- .accessfn = access_tdosa,
- .fieldoffset = offsetof(CPUARMState, cp15.oslsr_el1) },
- /* Dummy OSDLR_EL1: 32-bit Linux will read this */
- { .name = "OSDLR_EL1", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 3, .opc2 = 4,
- .access = PL1_RW, .accessfn = access_tdosa,
- .type = ARM_CP_NOP },
- /* Dummy DBGVCR: Linux wants to clear this on startup, but we don't
- * implement vector catch debug events yet.
- */
- { .name = "DBGVCR",
- .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
- .access = PL1_RW, .accessfn = access_tda,
- .type = ARM_CP_NOP },
- /* Dummy DBGVCR32_EL2 (which is only for a 64-bit hypervisor
- * to save and restore a 32-bit guest's DBGVCR)
- */
- { .name = "DBGVCR32_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 2, .opc1 = 4, .crn = 0, .crm = 7, .opc2 = 0,
- .access = PL2_RW, .accessfn = access_tda,
- .type = ARM_CP_NOP },
- /* Dummy MDCCINT_EL1, since we don't implement the Debug Communications
- * Channel but Linux may try to access this register. The 32-bit
- * alias is DBGDCCINT.
- */
- { .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
- .access = PL1_RW, .accessfn = access_tda,
- .type = ARM_CP_NOP },
- REGINFO_SENTINEL
-};
+static uint64_t el2_e2h_e12_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /* Pass the EL1 register accessor its ri, not the EL12 alias ri */
+ return ri->orig_readfn(env, ri->opaque);
+}
-static const ARMCPRegInfo debug_lpae_cp_reginfo[] = {
- /* 64 bit access versions of the (dummy) debug registers */
- { .name = "DBGDRAR", .cp = 14, .crm = 1, .opc1 = 0,
- .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
- { .name = "DBGDSAR", .cp = 14, .crm = 2, .opc1 = 0,
- .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
- REGINFO_SENTINEL
-};
+static void el2_e2h_e12_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Pass the EL1 register accessor its ri, not the EL12 alias ri */
+ return ri->orig_writefn(env, ri->opaque, value);
+}
-/* Return the exception level to which exceptions should be taken
- * via SVEAccessTrap. If an exception should be routed through
- * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should
- * take care of raising that exception.
- * C.f. the ARM pseudocode function CheckSVEEnabled.
- */
-int sve_exception_el(CPUARMState *env, int el)
+static CPAccessResult el2_e2h_e12_access(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ bool isread)
{
-#ifndef CONFIG_USER_ONLY
- if (el <= 1) {
- bool disabled = false;
+ if (arm_current_el(env) == 1) {
+ /*
+ * This must be a FEAT_NV access (will either trap or redirect
+ * to memory). None of the registers with _EL12 aliases want to
+ * apply their trap controls for this kind of access, so don't
+ * call the orig_accessfn or do the "UNDEF when E2H is 0" check.
+ */
+ return CP_ACCESS_OK;
+ }
+ /* FOO_EL12 aliases only exist when E2H is 1; otherwise they UNDEF */
+ if (!(arm_hcr_el2_eff(env) & HCR_E2H)) {
+ return CP_ACCESS_TRAP_UNCATEGORIZED;
+ }
+ if (ri->orig_accessfn) {
+ return ri->orig_accessfn(env, ri->opaque, isread);
+ }
+ return CP_ACCESS_OK;
+}
+
+static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu)
+{
+ struct E2HAlias {
+ uint32_t src_key, dst_key, new_key;
+ const char *src_name, *dst_name, *new_name;
+ bool (*feature)(const ARMISARegisters *id);
+ };
+
+#define K(op0, op1, crn, crm, op2) \
+ ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
+
+ static const struct E2HAlias aliases[] = {
+ { K(3, 0, 1, 0, 0), K(3, 4, 1, 0, 0), K(3, 5, 1, 0, 0),
+ "SCTLR", "SCTLR_EL2", "SCTLR_EL12" },
+ { K(3, 0, 1, 0, 2), K(3, 4, 1, 1, 2), K(3, 5, 1, 0, 2),
+ "CPACR", "CPTR_EL2", "CPACR_EL12" },
+ { K(3, 0, 2, 0, 0), K(3, 4, 2, 0, 0), K(3, 5, 2, 0, 0),
+ "TTBR0_EL1", "TTBR0_EL2", "TTBR0_EL12" },
+ { K(3, 0, 2, 0, 1), K(3, 4, 2, 0, 1), K(3, 5, 2, 0, 1),
+ "TTBR1_EL1", "TTBR1_EL2", "TTBR1_EL12" },
+ { K(3, 0, 2, 0, 2), K(3, 4, 2, 0, 2), K(3, 5, 2, 0, 2),
+ "TCR_EL1", "TCR_EL2", "TCR_EL12" },
+ { K(3, 0, 4, 0, 0), K(3, 4, 4, 0, 0), K(3, 5, 4, 0, 0),
+ "SPSR_EL1", "SPSR_EL2", "SPSR_EL12" },
+ { K(3, 0, 4, 0, 1), K(3, 4, 4, 0, 1), K(3, 5, 4, 0, 1),
+ "ELR_EL1", "ELR_EL2", "ELR_EL12" },
+ { K(3, 0, 5, 1, 0), K(3, 4, 5, 1, 0), K(3, 5, 5, 1, 0),
+ "AFSR0_EL1", "AFSR0_EL2", "AFSR0_EL12" },
+ { K(3, 0, 5, 1, 1), K(3, 4, 5, 1, 1), K(3, 5, 5, 1, 1),
+ "AFSR1_EL1", "AFSR1_EL2", "AFSR1_EL12" },
+ { K(3, 0, 5, 2, 0), K(3, 4, 5, 2, 0), K(3, 5, 5, 2, 0),
+ "ESR_EL1", "ESR_EL2", "ESR_EL12" },
+ { K(3, 0, 6, 0, 0), K(3, 4, 6, 0, 0), K(3, 5, 6, 0, 0),
+ "FAR_EL1", "FAR_EL2", "FAR_EL12" },
+ { K(3, 0, 10, 2, 0), K(3, 4, 10, 2, 0), K(3, 5, 10, 2, 0),
+ "MAIR_EL1", "MAIR_EL2", "MAIR_EL12" },
+ { K(3, 0, 10, 3, 0), K(3, 4, 10, 3, 0), K(3, 5, 10, 3, 0),
+ "AMAIR0", "AMAIR_EL2", "AMAIR_EL12" },
+ { K(3, 0, 12, 0, 0), K(3, 4, 12, 0, 0), K(3, 5, 12, 0, 0),
+ "VBAR", "VBAR_EL2", "VBAR_EL12" },
+ { K(3, 0, 13, 0, 1), K(3, 4, 13, 0, 1), K(3, 5, 13, 0, 1),
+ "CONTEXTIDR_EL1", "CONTEXTIDR_EL2", "CONTEXTIDR_EL12" },
+ { K(3, 0, 14, 1, 0), K(3, 4, 14, 1, 0), K(3, 5, 14, 1, 0),
+ "CNTKCTL", "CNTHCTL_EL2", "CNTKCTL_EL12" },
- /* The CPACR.ZEN controls traps to EL1:
- * 0, 2 : trap EL0 and EL1 accesses
- * 1 : trap only EL0 accesses
- * 3 : trap no accesses
+ /*
+ * Note that redirection of ZCR is mentioned in the description
+ * of ZCR_EL2, and aliasing in the description of ZCR_EL1, but
+ * not in the summary table.
*/
- if (!extract32(env->cp15.cpacr_el1, 16, 1)) {
- disabled = true;
- } else if (!extract32(env->cp15.cpacr_el1, 17, 1)) {
- disabled = el == 0;
+ { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0),
+ "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve },
+ { K(3, 0, 1, 2, 6), K(3, 4, 1, 2, 6), K(3, 5, 1, 2, 6),
+ "SMCR_EL1", "SMCR_EL2", "SMCR_EL12", isar_feature_aa64_sme },
+
+ { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0),
+ "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte },
+
+ { K(3, 0, 13, 0, 7), K(3, 4, 13, 0, 7), K(3, 5, 13, 0, 7),
+ "SCXTNUM_EL1", "SCXTNUM_EL2", "SCXTNUM_EL12",
+ isar_feature_aa64_scxtnum },
+
+ /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */
+ /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */
+ };
+#undef K
+
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(aliases); i++) {
+ const struct E2HAlias *a = &aliases[i];
+ ARMCPRegInfo *src_reg, *dst_reg, *new_reg;
+ bool ok;
+
+ if (a->feature && !a->feature(&cpu->isar)) {
+ continue;
}
- if (disabled) {
- /* route_to_el2 */
- return (arm_feature(env, ARM_FEATURE_EL2)
- && !arm_is_secure(env)
- && (env->cp15.hcr_el2 & HCR_TGE) ? 2 : 1);
+
+ src_reg = g_hash_table_lookup(cpu->cp_regs,
+ (gpointer)(uintptr_t)a->src_key);
+ dst_reg = g_hash_table_lookup(cpu->cp_regs,
+ (gpointer)(uintptr_t)a->dst_key);
+ g_assert(src_reg != NULL);
+ g_assert(dst_reg != NULL);
+
+ /* Cross-compare names to detect typos in the keys. */
+ g_assert(strcmp(src_reg->name, a->src_name) == 0);
+ g_assert(strcmp(dst_reg->name, a->dst_name) == 0);
+
+ /* None of the core system registers use opaque; we will. */
+ g_assert(src_reg->opaque == NULL);
+
+ /* Create alias before redirection so we dup the right data. */
+ new_reg = g_memdup(src_reg, sizeof(ARMCPRegInfo));
+
+ new_reg->name = a->new_name;
+ new_reg->type |= ARM_CP_ALIAS;
+ /* Remove PL1/PL0 access, leaving PL2/PL3 R/W in place. */
+ new_reg->access &= PL2_RW | PL3_RW;
+ /* The new_reg op fields are as per new_key, not the target reg */
+ new_reg->crn = (a->new_key & CP_REG_ARM64_SYSREG_CRN_MASK)
+ >> CP_REG_ARM64_SYSREG_CRN_SHIFT;
+ new_reg->crm = (a->new_key & CP_REG_ARM64_SYSREG_CRM_MASK)
+ >> CP_REG_ARM64_SYSREG_CRM_SHIFT;
+ new_reg->opc0 = (a->new_key & CP_REG_ARM64_SYSREG_OP0_MASK)
+ >> CP_REG_ARM64_SYSREG_OP0_SHIFT;
+ new_reg->opc1 = (a->new_key & CP_REG_ARM64_SYSREG_OP1_MASK)
+ >> CP_REG_ARM64_SYSREG_OP1_SHIFT;
+ new_reg->opc2 = (a->new_key & CP_REG_ARM64_SYSREG_OP2_MASK)
+ >> CP_REG_ARM64_SYSREG_OP2_SHIFT;
+ new_reg->opaque = src_reg;
+ new_reg->orig_readfn = src_reg->readfn ?: raw_read;
+ new_reg->orig_writefn = src_reg->writefn ?: raw_write;
+ new_reg->orig_accessfn = src_reg->accessfn;
+ if (!new_reg->raw_readfn) {
+ new_reg->raw_readfn = raw_read;
+ }
+ if (!new_reg->raw_writefn) {
+ new_reg->raw_writefn = raw_write;
+ }
+ new_reg->readfn = el2_e2h_e12_read;
+ new_reg->writefn = el2_e2h_e12_write;
+ new_reg->accessfn = el2_e2h_e12_access;
+
+ /*
+ * If the _EL1 register is redirected to memory by FEAT_NV2,
+ * then it shares the offset with the _EL12 register,
+ * and which one is redirected depends on HCR_EL2.NV1.
+ */
+ if (new_reg->nv2_redirect_offset) {
+ assert(new_reg->nv2_redirect_offset & NV2_REDIR_NV1);
+ new_reg->nv2_redirect_offset &= ~NV2_REDIR_NV1;
+ new_reg->nv2_redirect_offset |= NV2_REDIR_NO_NV1;
}
- /* Check CPACR.FPEN. */
- if (!extract32(env->cp15.cpacr_el1, 20, 1)) {
- disabled = true;
- } else if (!extract32(env->cp15.cpacr_el1, 21, 1)) {
- disabled = el == 0;
+ ok = g_hash_table_insert(cpu->cp_regs,
+ (gpointer)(uintptr_t)a->new_key, new_reg);
+ g_assert(ok);
+
+ src_reg->opaque = dst_reg;
+ src_reg->orig_readfn = src_reg->readfn ?: raw_read;
+ src_reg->orig_writefn = src_reg->writefn ?: raw_write;
+ if (!src_reg->raw_readfn) {
+ src_reg->raw_readfn = raw_read;
}
- if (disabled) {
- return 0;
+ if (!src_reg->raw_writefn) {
+ src_reg->raw_writefn = raw_write;
}
+ src_reg->readfn = el2_e2h_read;
+ src_reg->writefn = el2_e2h_write;
}
+}
+#endif
- /* CPTR_EL2. Since TZ and TFP are positive,
- * they will be zero when EL2 is not present.
- */
- if (el <= 2 && !arm_is_secure_below_el3(env)) {
- if (env->cp15.cptr_el[2] & CPTR_TZ) {
- return 2;
+static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int cur_el = arm_current_el(env);
+
+ if (cur_el < 2) {
+ uint64_t hcr = arm_hcr_el2_eff(env);
+
+ if (cur_el == 0) {
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ if (!(env->cp15.sctlr_el[2] & SCTLR_UCT)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ } else {
+ if (!(env->cp15.sctlr_el[1] & SCTLR_UCT)) {
+ return CP_ACCESS_TRAP;
+ }
+ if (hcr & HCR_TID2) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ } else if (hcr & HCR_TID2) {
+ return CP_ACCESS_TRAP_EL2;
}
- if (env->cp15.cptr_el[2] & CPTR_TFP) {
- return 0;
+ }
+
+ if (arm_current_el(env) < 2 && arm_hcr_el2_eff(env) & HCR_TID2) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+
+ return CP_ACCESS_OK;
+}
+
+/*
+ * Check for traps to RAS registers, which are controlled
+ * by HCR_EL2.TERR and SCR_EL3.TERR.
+ */
+static CPAccessResult access_terr(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
+
+ if (el < 2 && (arm_hcr_el2_eff(env) & HCR_TERR)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 3 && (env->cp15.scr_el3 & SCR_TERR)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+static uint64_t disr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ int el = arm_current_el(env);
+
+ if (el < 2 && (arm_hcr_el2_eff(env) & HCR_AMO)) {
+ return env->cp15.vdisr_el2;
+ }
+ if (el < 3 && (env->cp15.scr_el3 & SCR_EA)) {
+ return 0; /* RAZ/WI */
+ }
+ return env->cp15.disr_el1;
+}
+
+static void disr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
+{
+ int el = arm_current_el(env);
+
+ if (el < 2 && (arm_hcr_el2_eff(env) & HCR_AMO)) {
+ env->cp15.vdisr_el2 = val;
+ return;
+ }
+ if (el < 3 && (env->cp15.scr_el3 & SCR_EA)) {
+ return; /* RAZ/WI */
+ }
+ env->cp15.disr_el1 = val;
+}
+
+/*
+ * Minimal RAS implementation with no Error Records.
+ * Which means that all of the Error Record registers:
+ * ERXADDR_EL1
+ * ERXCTLR_EL1
+ * ERXFR_EL1
+ * ERXMISC0_EL1
+ * ERXMISC1_EL1
+ * ERXMISC2_EL1
+ * ERXMISC3_EL1
+ * ERXPFGCDN_EL1 (RASv1p1)
+ * ERXPFGCTL_EL1 (RASv1p1)
+ * ERXPFGF_EL1 (RASv1p1)
+ * ERXSTATUS_EL1
+ * and
+ * ERRSELR_EL1
+ * may generate UNDEFINED, which is the effect we get by not
+ * listing them at all.
+ *
+ * These registers have fine-grained trap bits, but UNDEF-to-EL1
+ * is higher priority than FGT-to-EL2 so we do not need to list them
+ * in order to check for an FGT.
+ */
+static const ARMCPRegInfo minimal_ras_reginfo[] = {
+ { .name = "DISR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 1,
+ .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.disr_el1),
+ .readfn = disr_read, .writefn = disr_write, .raw_writefn = raw_write },
+ { .name = "ERRIDR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 3, .opc2 = 0,
+ .access = PL1_R, .accessfn = access_terr,
+ .fgt = FGT_ERRIDR_EL1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "VDISR_EL2", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 1, .opc2 = 1,
+ .nv2_redirect_offset = 0x500,
+ .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vdisr_el2) },
+ { .name = "VSESR_EL2", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 3,
+ .nv2_redirect_offset = 0x508,
+ .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vsesr_el2) },
+};
+
+/*
+ * Return the exception level to which exceptions should be taken
+ * via SVEAccessTrap. This excludes the check for whether the exception
+ * should be routed through AArch64.AdvSIMDFPAccessTrap. That can easily
+ * be found by testing 0 < fp_exception_el < sve_exception_el.
+ *
+ * C.f. the ARM pseudocode function CheckSVEEnabled. Note that the
+ * pseudocode does *not* separate out the FP trap checks, but has them
+ * all in one function.
+ */
+int sve_exception_el(CPUARMState *env, int el)
+{
+#ifndef CONFIG_USER_ONLY
+ if (el <= 1 && !el_is_in_host(env, el)) {
+ switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, ZEN)) {
+ case 1:
+ if (el != 0) {
+ break;
+ }
+ /* fall through */
+ case 0:
+ case 2:
+ return 1;
+ }
+ }
+
+ if (el <= 2 && arm_is_el2_enabled(env)) {
+ /* CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE). */
+ if (env->cp15.hcr_el2 & HCR_E2H) {
+ switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, ZEN)) {
+ case 1:
+ if (el != 0 || !(env->cp15.hcr_el2 & HCR_TGE)) {
+ break;
+ }
+ /* fall through */
+ case 0:
+ case 2:
+ return 2;
+ }
+ } else {
+ if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TZ)) {
+ return 2;
+ }
}
}
/* CPTR_EL3. Since EZ is negative we must check for EL3. */
if (arm_feature(env, ARM_FEATURE_EL3)
- && !(env->cp15.cptr_el[3] & CPTR_EZ)) {
+ && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, EZ)) {
+ return 3;
+ }
+#endif
+ return 0;
+}
+
+/*
+ * Return the exception level to which exceptions should be taken for SME.
+ * C.f. the ARM pseudocode function CheckSMEAccess.
+ */
+int sme_exception_el(CPUARMState *env, int el)
+{
+#ifndef CONFIG_USER_ONLY
+ if (el <= 1 && !el_is_in_host(env, el)) {
+ switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, SMEN)) {
+ case 1:
+ if (el != 0) {
+ break;
+ }
+ /* fall through */
+ case 0:
+ case 2:
+ return 1;
+ }
+ }
+
+ if (el <= 2 && arm_is_el2_enabled(env)) {
+ /* CPTR_EL2 changes format with HCR_EL2.E2H (regardless of TGE). */
+ if (env->cp15.hcr_el2 & HCR_E2H) {
+ switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, SMEN)) {
+ case 1:
+ if (el != 0 || !(env->cp15.hcr_el2 & HCR_TGE)) {
+ break;
+ }
+ /* fall through */
+ case 0:
+ case 2:
+ return 2;
+ }
+ } else {
+ if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TSM)) {
+ return 2;
+ }
+ }
+ }
+
+ /* CPTR_EL3. Since ESM is negative we must check for EL3. */
+ if (arm_feature(env, ARM_FEATURE_EL3)
+ && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, ESM)) {
return 3;
}
#endif
@@ -4467,420 +7176,1456 @@ int sve_exception_el(CPUARMState *env, int el)
/*
* Given that SVE is enabled, return the vector length for EL.
*/
-uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
+uint32_t sve_vqm1_for_el_sm(CPUARMState *env, int el, bool sm)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- uint32_t zcr_len = cpu->sve_max_vq - 1;
+ ARMCPU *cpu = env_archcpu(env);
+ uint64_t *cr = env->vfp.zcr_el;
+ uint32_t map = cpu->sve_vq.map;
+ uint32_t len = ARM_MAX_VQ - 1;
- if (el <= 1) {
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
+ if (sm) {
+ cr = env->vfp.smcr_el;
+ map = cpu->sme_vq.map;
}
- if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
+
+ if (el <= 1 && !el_is_in_host(env, el)) {
+ len = MIN(len, 0xf & (uint32_t)cr[1]);
}
- if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
- zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
+ if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+ len = MIN(len, 0xf & (uint32_t)cr[2]);
}
- return zcr_len;
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
+ len = MIN(len, 0xf & (uint32_t)cr[3]);
+ }
+
+ map &= MAKE_64BIT_MASK(0, len + 1);
+ if (map != 0) {
+ return 31 - clz32(map);
+ }
+
+ /* Bit 0 is always set for Normal SVE -- not so for Streaming SVE. */
+ assert(sm);
+ return ctz32(cpu->sme_vq.map);
+}
+
+uint32_t sve_vqm1_for_el(CPUARMState *env, int el)
+{
+ return sve_vqm1_for_el_sm(env, el, FIELD_EX64(env->svcr, SVCR, SM));
}
static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
int cur_el = arm_current_el(env);
- int old_len = sve_zcr_len_for_el(env, cur_el);
+ int old_len = sve_vqm1_for_el(env, cur_el);
int new_len;
/* Bits other than [3:0] are RAZ/WI. */
+ QEMU_BUILD_BUG_ON(ARM_MAX_VQ > 16);
raw_write(env, ri, value & 0xf);
/*
* Because we arrived here, we know both FP and SVE are enabled;
* otherwise we would have trapped access to the ZCR_ELn register.
*/
- new_len = sve_zcr_len_for_el(env, cur_el);
+ new_len = sve_vqm1_for_el(env, cur_el);
if (new_len < old_len) {
aarch64_sve_narrow_vq(env, new_len + 1);
}
}
-static const ARMCPRegInfo zcr_el1_reginfo = {
- .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL1_RW, .type = ARM_CP_SVE,
- .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]),
- .writefn = zcr_write, .raw_writefn = raw_write
-};
-
-static const ARMCPRegInfo zcr_el2_reginfo = {
- .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_SVE,
- .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[2]),
- .writefn = zcr_write, .raw_writefn = raw_write
+static const ARMCPRegInfo zcr_reginfo[] = {
+ { .name = "ZCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 0,
+ .nv2_redirect_offset = 0x1e0 | NV2_REDIR_NV1,
+ .access = PL1_RW, .type = ARM_CP_SVE,
+ .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[1]),
+ .writefn = zcr_write, .raw_writefn = raw_write },
+ { .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
+ .access = PL2_RW, .type = ARM_CP_SVE,
+ .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[2]),
+ .writefn = zcr_write, .raw_writefn = raw_write },
+ { .name = "ZCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_SVE,
+ .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[3]),
+ .writefn = zcr_write, .raw_writefn = raw_write },
};
-static const ARMCPRegInfo zcr_no_el2_reginfo = {
- .name = "ZCR_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL2_RW, .type = ARM_CP_SVE,
- .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore
-};
+#ifdef TARGET_AARCH64
+static CPAccessResult access_tpidr2(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
-static const ARMCPRegInfo zcr_el3_reginfo = {
- .name = "ZCR_EL3", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 0,
- .access = PL3_RW, .type = ARM_CP_SVE,
- .fieldoffset = offsetof(CPUARMState, vfp.zcr_el[3]),
- .writefn = zcr_write, .raw_writefn = raw_write
-};
+ if (el == 0) {
+ uint64_t sctlr = arm_sctlr(env, el);
+ if (!(sctlr & SCTLR_EnTP2)) {
+ return CP_ACCESS_TRAP;
+ }
+ }
+ /* TODO: FEAT_FGT */
+ if (el < 3
+ && arm_feature(env, ARM_FEATURE_EL3)
+ && !(env->cp15.scr_el3 & SCR_ENTP2)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
-void hw_watchpoint_update(ARMCPU *cpu, int n)
+static CPAccessResult access_smprimap(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- CPUARMState *env = &cpu->env;
- vaddr len = 0;
- vaddr wvr = env->cp15.dbgwvr[n];
- uint64_t wcr = env->cp15.dbgwcr[n];
- int mask;
- int flags = BP_CPU | BP_STOP_BEFORE_ACCESS;
+ /* If EL1 this is a FEAT_NV access and CPTR_EL3.ESM doesn't apply */
+ if (arm_current_el(env) == 2
+ && arm_feature(env, ARM_FEATURE_EL3)
+ && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, ESM)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
- if (env->cpu_watchpoint[n]) {
- cpu_watchpoint_remove_by_ref(CPU(cpu), env->cpu_watchpoint[n]);
- env->cpu_watchpoint[n] = NULL;
+static CPAccessResult access_smpri(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) < 3
+ && arm_feature(env, ARM_FEATURE_EL3)
+ && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, ESM)) {
+ return CP_ACCESS_TRAP_EL3;
}
+ return CP_ACCESS_OK;
+}
- if (!extract64(wcr, 0, 1)) {
- /* E bit clear : watchpoint disabled */
+/* ResetSVEState */
+static void arm_reset_sve_state(CPUARMState *env)
+{
+ memset(env->vfp.zregs, 0, sizeof(env->vfp.zregs));
+ /* Recall that FFR is stored as pregs[16]. */
+ memset(env->vfp.pregs, 0, sizeof(env->vfp.pregs));
+ vfp_set_fpcr(env, 0x0800009f);
+}
+
+void aarch64_set_svcr(CPUARMState *env, uint64_t new, uint64_t mask)
+{
+ uint64_t change = (env->svcr ^ new) & mask;
+
+ if (change == 0) {
return;
}
+ env->svcr ^= change;
- switch (extract64(wcr, 3, 2)) {
- case 0:
- /* LSC 00 is reserved and must behave as if the wp is disabled */
- return;
- case 1:
- flags |= BP_MEM_READ;
- break;
- case 2:
- flags |= BP_MEM_WRITE;
- break;
- case 3:
- flags |= BP_MEM_ACCESS;
- break;
+ if (change & R_SVCR_SM_MASK) {
+ arm_reset_sve_state(env);
}
- /* Attempts to use both MASK and BAS fields simultaneously are
- * CONSTRAINED UNPREDICTABLE; we opt to ignore BAS in this case,
- * thus generating a watchpoint for every byte in the masked region.
+ /*
+ * ResetSMEState.
+ *
+ * SetPSTATE_ZA zeros on enable and disable. We can zero this only
+ * on enable: while disabled, the storage is inaccessible and the
+ * value does not matter. We're not saving the storage in vmstate
+ * when disabled either.
*/
- mask = extract64(wcr, 24, 4);
- if (mask == 1 || mask == 2) {
- /* Reserved values of MASK; we must act as if the mask value was
- * some non-reserved value, or as if the watchpoint were disabled.
- * We choose the latter.
- */
- return;
- } else if (mask) {
- /* Watchpoint covers an aligned area up to 2GB in size */
- len = 1ULL << mask;
- /* If masked bits in WVR are not zero it's CONSTRAINED UNPREDICTABLE
- * whether the watchpoint fires when the unmasked bits match; we opt
- * to generate the exceptions.
- */
- wvr &= ~(len - 1);
- } else {
- /* Watchpoint covers bytes defined by the byte address select bits */
- int bas = extract64(wcr, 5, 8);
- int basstart;
-
- if (bas == 0) {
- /* This must act as if the watchpoint is disabled */
- return;
- }
+ if (change & new & R_SVCR_ZA_MASK) {
+ memset(env->zarray, 0, sizeof(env->zarray));
+ }
- if (extract64(wvr, 2, 1)) {
- /* Deprecated case of an only 4-aligned address. BAS[7:4] are
- * ignored, and BAS[3:0] define which bytes to watch.
- */
- bas &= 0xf;
- }
- /* The BAS bits are supposed to be programmed to indicate a contiguous
- * range of bytes. Otherwise it is CONSTRAINED UNPREDICTABLE whether
- * we fire for each byte in the word/doubleword addressed by the WVR.
- * We choose to ignore any non-zero bits after the first range of 1s.
- */
- basstart = ctz32(bas);
- len = cto32(bas >> basstart);
- wvr += basstart;
+ if (tcg_enabled()) {
+ arm_rebuild_hflags(env);
}
+}
- cpu_watchpoint_insert(CPU(cpu), wvr, len, flags,
- &env->cpu_watchpoint[n]);
+static void svcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ aarch64_set_svcr(env, value, -1);
}
-void hw_watchpoint_update_all(ARMCPU *cpu)
+static void smcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- int i;
- CPUARMState *env = &cpu->env;
+ int cur_el = arm_current_el(env);
+ int old_len = sve_vqm1_for_el(env, cur_el);
+ int new_len;
- /* Completely clear out existing QEMU watchpoints and our array, to
- * avoid possible stale entries following migration load.
- */
- cpu_watchpoint_remove_all(CPU(cpu), BP_CPU);
- memset(env->cpu_watchpoint, 0, sizeof(env->cpu_watchpoint));
+ QEMU_BUILD_BUG_ON(ARM_MAX_VQ > R_SMCR_LEN_MASK + 1);
+ value &= R_SMCR_LEN_MASK | R_SMCR_FA64_MASK;
+ raw_write(env, ri, value);
- for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_watchpoint); i++) {
- hw_watchpoint_update(cpu, i);
+ /*
+ * Note that it is CONSTRAINED UNPREDICTABLE what happens to ZA storage
+ * when SVL is widened (old values kept, or zeros). Choose to keep the
+ * current values for simplicity. But for QEMU internals, we must still
+ * apply the narrower SVL to the Zregs and Pregs -- see the comment
+ * above aarch64_sve_narrow_vq.
+ */
+ new_len = sve_vqm1_for_el(env, cur_el);
+ if (new_len < old_len) {
+ aarch64_sve_narrow_vq(env, new_len + 1);
}
}
-static void dbgwvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static const ARMCPRegInfo sme_reginfo[] = {
+ { .name = "TPIDR2_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 13, .crm = 0, .opc2 = 5,
+ .access = PL0_RW, .accessfn = access_tpidr2,
+ .fgt = FGT_NTPIDR2_EL0,
+ .fieldoffset = offsetof(CPUARMState, cp15.tpidr2_el0) },
+ { .name = "SVCR", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 2,
+ .access = PL0_RW, .type = ARM_CP_SME,
+ .fieldoffset = offsetof(CPUARMState, svcr),
+ .writefn = svcr_write, .raw_writefn = raw_write },
+ { .name = "SMCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 6,
+ .nv2_redirect_offset = 0x1f0 | NV2_REDIR_NV1,
+ .access = PL1_RW, .type = ARM_CP_SME,
+ .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[1]),
+ .writefn = smcr_write, .raw_writefn = raw_write },
+ { .name = "SMCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 6,
+ .access = PL2_RW, .type = ARM_CP_SME,
+ .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[2]),
+ .writefn = smcr_write, .raw_writefn = raw_write },
+ { .name = "SMCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 2, .opc2 = 6,
+ .access = PL3_RW, .type = ARM_CP_SME,
+ .fieldoffset = offsetof(CPUARMState, vfp.smcr_el[3]),
+ .writefn = smcr_write, .raw_writefn = raw_write },
+ { .name = "SMIDR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 6,
+ .access = PL1_R, .accessfn = access_aa64_tid1,
+ /*
+ * IMPLEMENTOR = 0 (software)
+ * REVISION = 0 (implementation defined)
+ * SMPS = 0 (no streaming execution priority in QEMU)
+ * AFFINITY = 0 (streaming sve mode not shared with other PEs)
+ */
+ .type = ARM_CP_CONST, .resetvalue = 0, },
+ /*
+ * Because SMIDR_EL1.SMPS is 0, SMPRI_EL1 and SMPRIMAP_EL2 are RES 0.
+ */
+ { .name = "SMPRI_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 2, .opc2 = 4,
+ .access = PL1_RW, .accessfn = access_smpri,
+ .fgt = FGT_NSMPRI_EL1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "SMPRIMAP_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 5,
+ .nv2_redirect_offset = 0x1f8,
+ .access = PL2_RW, .accessfn = access_smprimap,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+};
+
+static void tlbi_aa64_paall_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush(cs);
+}
+
+static void gpccr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- int i = ri->crm;
+ /* L0GPTSZ is RO; other bits not mentioned are RES0. */
+ uint64_t rw_mask = R_GPCCR_PPS_MASK | R_GPCCR_IRGN_MASK |
+ R_GPCCR_ORGN_MASK | R_GPCCR_SH_MASK | R_GPCCR_PGS_MASK |
+ R_GPCCR_GPC_MASK | R_GPCCR_GPCP_MASK;
- /* Bits [63:49] are hardwired to the value of bit [48]; that is, the
- * register reads and behaves as if values written are sign extended.
- * Bits [1:0] are RES0.
+ env->cp15.gpccr_el3 = (value & rw_mask) | (env->cp15.gpccr_el3 & ~rw_mask);
+}
+
+static void gpccr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ env->cp15.gpccr_el3 = FIELD_DP64(0, GPCCR, L0GPTSZ,
+ env_archcpu(env)->reset_l0gptsz);
+}
+
+static void tlbi_aa64_paallos_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ tlb_flush_all_cpus_synced(cs);
+}
+
+static const ARMCPRegInfo rme_reginfo[] = {
+ { .name = "GPCCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 1, .opc2 = 6,
+ .access = PL3_RW, .writefn = gpccr_write, .resetfn = gpccr_reset,
+ .fieldoffset = offsetof(CPUARMState, cp15.gpccr_el3) },
+ { .name = "GPTBR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 2, .crm = 1, .opc2 = 4,
+ .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.gptbr_el3) },
+ { .name = "MFAR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 6, .crm = 0, .opc2 = 5,
+ .access = PL3_RW, .fieldoffset = offsetof(CPUARMState, cp15.mfar_el3) },
+ { .name = "TLBI_PAALL", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 7, .opc2 = 4,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_paall_write },
+ { .name = "TLBI_PAALLOS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 4,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_paallos_write },
+ /*
+ * QEMU does not have a way to invalidate by physical address, thus
+ * invalidating a range of physical addresses is accomplished by
+ * flushing all tlb entries in the outer shareable domain,
+ * just like PAALLOS.
*/
- value = sextract64(value, 0, 49) & ~3ULL;
+ { .name = "TLBI_RPALOS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 7,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_paallos_write },
+ { .name = "TLBI_RPAOS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 4, .opc2 = 3,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_paallos_write },
+ { .name = "DC_CIPAPA", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 14, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NOP },
+};
- raw_write(env, ri, value);
- hw_watchpoint_update(cpu, i);
+static const ARMCPRegInfo rme_mte_reginfo[] = {
+ { .name = "DC_CIGDPAPA", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 7, .crm = 14, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NOP },
+};
+#endif /* TARGET_AARCH64 */
+
+static void define_pmu_regs(ARMCPU *cpu)
+{
+ /*
+ * v7 performance monitor control register: same implementor
+ * field as main ID register, and we implement four counters in
+ * addition to the cycle count register.
+ */
+ unsigned int i, pmcrn = pmu_num_counters(&cpu->env);
+ ARMCPRegInfo pmcr = {
+ .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
+ .access = PL0_RW,
+ .fgt = FGT_PMCR_EL0,
+ .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr),
+ .accessfn = pmreg_access,
+ .readfn = pmcr_read, .raw_readfn = raw_read,
+ .writefn = pmcr_write, .raw_writefn = raw_write,
+ };
+ ARMCPRegInfo pmcr64 = {
+ .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0,
+ .access = PL0_RW, .accessfn = pmreg_access,
+ .fgt = FGT_PMCR_EL0,
+ .type = ARM_CP_IO,
+ .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
+ .resetvalue = cpu->isar.reset_pmcr_el0,
+ .readfn = pmcr_read, .raw_readfn = raw_read,
+ .writefn = pmcr_write, .raw_writefn = raw_write,
+ };
+
+ define_one_arm_cp_reg(cpu, &pmcr);
+ define_one_arm_cp_reg(cpu, &pmcr64);
+ for (i = 0; i < pmcrn; i++) {
+ char *pmevcntr_name = g_strdup_printf("PMEVCNTR%d", i);
+ char *pmevcntr_el0_name = g_strdup_printf("PMEVCNTR%d_EL0", i);
+ char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i);
+ char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i);
+ ARMCPRegInfo pmev_regs[] = {
+ { .name = pmevcntr_name, .cp = 15, .crn = 14,
+ .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
+ .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .fgt = FGT_PMEVCNTRN_EL0,
+ .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
+ .accessfn = pmreg_access_xevcntr },
+ { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)),
+ .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access_xevcntr,
+ .type = ARM_CP_IO,
+ .fgt = FGT_PMEVCNTRN_EL0,
+ .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
+ .raw_readfn = pmevcntr_rawread,
+ .raw_writefn = pmevcntr_rawwrite },
+ { .name = pmevtyper_name, .cp = 15, .crn = 14,
+ .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
+ .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .fgt = FGT_PMEVTYPERN_EL0,
+ .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
+ .accessfn = pmreg_access },
+ { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)),
+ .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
+ .fgt = FGT_PMEVTYPERN_EL0,
+ .type = ARM_CP_IO,
+ .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
+ .raw_writefn = pmevtyper_rawwrite },
+ };
+ define_arm_cp_regs(cpu, pmev_regs);
+ g_free(pmevcntr_name);
+ g_free(pmevcntr_el0_name);
+ g_free(pmevtyper_name);
+ g_free(pmevtyper_el0_name);
+ }
+ if (cpu_isar_feature(aa32_pmuv3p1, cpu)) {
+ ARMCPRegInfo v81_pmu_regs[] = {
+ { .name = "PMCEID2", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4,
+ .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
+ .fgt = FGT_PMCEIDN_EL0,
+ .resetvalue = extract64(cpu->pmceid0, 32, 32) },
+ { .name = "PMCEID3", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 5,
+ .access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
+ .fgt = FGT_PMCEIDN_EL0,
+ .resetvalue = extract64(cpu->pmceid1, 32, 32) },
+ };
+ define_arm_cp_regs(cpu, v81_pmu_regs);
+ }
+ if (cpu_isar_feature(any_pmuv3p4, cpu)) {
+ static const ARMCPRegInfo v84_pmmir = {
+ .name = "PMMIR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 6,
+ .access = PL1_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
+ .fgt = FGT_PMMIR_EL1,
+ .resetvalue = 0
+ };
+ define_one_arm_cp_reg(cpu, &v84_pmmir);
+ }
}
-static void dbgwcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+#ifndef CONFIG_USER_ONLY
+/*
+ * We don't know until after realize whether there's a GICv3
+ * attached, and that is what registers the gicv3 sysregs.
+ * So we have to fill in the GIC fields in ID_PFR/ID_PFR1_EL1/ID_AA64PFR0_EL1
+ * at runtime.
+ */
+static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- int i = ri->crm;
+ ARMCPU *cpu = env_archcpu(env);
+ uint64_t pfr1 = cpu->isar.id_pfr1;
- raw_write(env, ri, value);
- hw_watchpoint_update(cpu, i);
+ if (env->gicv3state) {
+ pfr1 |= 1 << 28;
+ }
+ return pfr1;
+}
+
+static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ uint64_t pfr0 = cpu->isar.id_aa64pfr0;
+
+ if (env->gicv3state) {
+ pfr0 |= 1 << 24;
+ }
+ return pfr0;
}
+#endif
-void hw_breakpoint_update(ARMCPU *cpu, int n)
+/*
+ * Shared logic between LORID and the rest of the LOR* registers.
+ * Secure state exclusion has already been dealt with.
+ */
+static CPAccessResult access_lor_ns(CPUARMState *env,
+ const ARMCPRegInfo *ri, bool isread)
{
- CPUARMState *env = &cpu->env;
- uint64_t bvr = env->cp15.dbgbvr[n];
- uint64_t bcr = env->cp15.dbgbcr[n];
- vaddr addr;
- int bt;
- int flags = BP_CPU;
+ int el = arm_current_el(env);
- if (env->cpu_breakpoint[n]) {
- cpu_breakpoint_remove_by_ref(CPU(cpu), env->cpu_breakpoint[n]);
- env->cpu_breakpoint[n] = NULL;
+ if (el < 2 && (arm_hcr_el2_eff(env) & HCR_TLOR)) {
+ return CP_ACCESS_TRAP_EL2;
}
+ if (el < 3 && (env->cp15.scr_el3 & SCR_TLOR)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
- if (!extract64(bcr, 0, 1)) {
- /* E bit clear : watchpoint disabled */
- return;
+static CPAccessResult access_lor_other(CPUARMState *env,
+ const ARMCPRegInfo *ri, bool isread)
+{
+ if (arm_is_secure_below_el3(env)) {
+ /* Access denied in secure mode. */
+ return CP_ACCESS_TRAP;
}
+ return access_lor_ns(env, ri, isread);
+}
- bt = extract64(bcr, 20, 4);
+/*
+ * A trivial implementation of ARMv8.1-LOR leaves all of these
+ * registers fixed at 0, which indicates that there are zero
+ * supported Limited Ordering regions.
+ */
+static const ARMCPRegInfo lor_reginfo[] = {
+ { .name = "LORSA_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_lor_other,
+ .fgt = FGT_LORSA_EL1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "LOREA_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 1,
+ .access = PL1_RW, .accessfn = access_lor_other,
+ .fgt = FGT_LOREA_EL1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "LORN_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 2,
+ .access = PL1_RW, .accessfn = access_lor_other,
+ .fgt = FGT_LORN_EL1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "LORC_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 3,
+ .access = PL1_RW, .accessfn = access_lor_other,
+ .fgt = FGT_LORC_EL1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "LORID_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 7,
+ .access = PL1_R, .accessfn = access_lor_ns,
+ .fgt = FGT_LORID_EL1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+};
- switch (bt) {
- case 4: /* unlinked address mismatch (reserved if AArch64) */
- case 5: /* linked address mismatch (reserved if AArch64) */
- qemu_log_mask(LOG_UNIMP,
- "arm: address mismatch breakpoint types not implemented\n");
- return;
- case 0: /* unlinked address match */
- case 1: /* linked address match */
- {
- /* Bits [63:49] are hardwired to the value of bit [48]; that is,
- * we behave as if the register was sign extended. Bits [1:0] are
- * RES0. The BAS field is used to allow setting breakpoints on 16
- * bit wide instructions; it is CONSTRAINED UNPREDICTABLE whether
- * a bp will fire if the addresses covered by the bp and the addresses
- * covered by the insn overlap but the insn doesn't start at the
- * start of the bp address range. We choose to require the insn and
- * the bp to have the same address. The constraints on writing to
- * BAS enforced in dbgbcr_write mean we have only four cases:
- * 0b0000 => no breakpoint
- * 0b0011 => breakpoint on addr
- * 0b1100 => breakpoint on addr + 2
- * 0b1111 => breakpoint on addr
- * See also figure D2-3 in the v8 ARM ARM (DDI0487A.c).
+#ifdef TARGET_AARCH64
+static CPAccessResult access_pauth(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
+
+ if (el < 2 &&
+ arm_is_el2_enabled(env) &&
+ !(arm_hcr_el2_eff(env) & HCR_APK)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 3 &&
+ arm_feature(env, ARM_FEATURE_EL3) &&
+ !(env->cp15.scr_el3 & SCR_APK)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+static const ARMCPRegInfo pauth_reginfo[] = {
+ { .name = "APDAKEYLO_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APDAKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apda.lo) },
+ { .name = "APDAKEYHI_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 1,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APDAKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apda.hi) },
+ { .name = "APDBKEYLO_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 2,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APDBKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apdb.lo) },
+ { .name = "APDBKEYHI_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 3,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APDBKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apdb.hi) },
+ { .name = "APGAKEYLO_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 3, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APGAKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apga.lo) },
+ { .name = "APGAKEYHI_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 3, .opc2 = 1,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APGAKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apga.hi) },
+ { .name = "APIAKEYLO_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APIAKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apia.lo) },
+ { .name = "APIAKEYHI_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 1,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APIAKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apia.hi) },
+ { .name = "APIBKEYLO_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 2,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APIBKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apib.lo) },
+ { .name = "APIBKEYHI_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 3,
+ .access = PL1_RW, .accessfn = access_pauth,
+ .fgt = FGT_APIBKEY,
+ .fieldoffset = offsetof(CPUARMState, keys.apib.hi) },
+};
+
+static const ARMCPRegInfo tlbirange_reginfo[] = {
+ { .name = "TLBI_RVAE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 1,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAE1IS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAAE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAAE1IS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVALE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVALE1IS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAALE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAALE1IS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAE1OS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAAE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAAE1OS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVALE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVALE1OS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAALE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAALE1OS,
+ .writefn = tlbi_aa64_rvae1is_write },
+ { .name = "TLBI_RVAE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAE1,
+ .writefn = tlbi_aa64_rvae1_write },
+ { .name = "TLBI_RVAAE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAAE1,
+ .writefn = tlbi_aa64_rvae1_write },
+ { .name = "TLBI_RVALE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVALE1,
+ .writefn = tlbi_aa64_rvae1_write },
+ { .name = "TLBI_RVAALE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIRVAALE1,
+ .writefn = tlbi_aa64_rvae1_write },
+ { .name = "TLBI_RIPAS2E1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 2,
+ .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_ripas2e1is_write },
+ { .name = "TLBI_RIPAS2LE1IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 0, .opc2 = 6,
+ .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_ripas2e1is_write },
+ { .name = "TLBI_RVAE2IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 1,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2is_write },
+ { .name = "TLBI_RVALE2IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 2, .opc2 = 5,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2is_write },
+ { .name = "TLBI_RIPAS2E1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 2,
+ .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_ripas2e1_write },
+ { .name = "TLBI_RIPAS2LE1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 6,
+ .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_ripas2e1_write },
+ { .name = "TLBI_RVAE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 1,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2is_write },
+ { .name = "TLBI_RVALE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 5, .opc2 = 5,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2is_write },
+ { .name = "TLBI_RVAE2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 1,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2_write },
+ { .name = "TLBI_RVALE2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 6, .opc2 = 5,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_rvae2_write },
+ { .name = "TLBI_RVAE3IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_rvae3is_write },
+ { .name = "TLBI_RVALE3IS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 2, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_rvae3is_write },
+ { .name = "TLBI_RVAE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_rvae3is_write },
+ { .name = "TLBI_RVALE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 5, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_rvae3is_write },
+ { .name = "TLBI_RVAE3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_rvae3_write },
+ { .name = "TLBI_RVALE3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 6, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_rvae3_write },
+};
+
+static const ARMCPRegInfo tlbios_reginfo[] = {
+ { .name = "TLBI_VMALLE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 0,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVMALLE1OS,
+ .writefn = tlbi_aa64_vmalle1is_write },
+ { .name = "TLBI_VAE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 1,
+ .fgt = FGT_TLBIVAE1OS,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_ASIDE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 2,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIASIDE1OS,
+ .writefn = tlbi_aa64_vmalle1is_write },
+ { .name = "TLBI_VAAE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 3,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVAAE1OS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_VALE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 5,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVALE1OS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_VAALE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 7,
+ .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW,
+ .fgt = FGT_TLBIVAALE1OS,
+ .writefn = tlbi_aa64_vae1is_write },
+ { .name = "TLBI_ALLE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 0,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_alle2is_write },
+ { .name = "TLBI_VAE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 1,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_vae2is_write },
+ { .name = "TLBI_ALLE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 4,
+ .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_alle1is_write },
+ { .name = "TLBI_VALE2OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 5,
+ .access = PL2_W, .type = ARM_CP_NO_RAW | ARM_CP_EL3_NO_EL2_UNDEF,
+ .writefn = tlbi_aa64_vae2is_write },
+ { .name = "TLBI_VMALLS12E1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 6,
+ .access = PL2_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_alle1is_write },
+ { .name = "TLBI_IPAS2E1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 0,
+ .access = PL2_W, .type = ARM_CP_NOP },
+ { .name = "TLBI_RIPAS2E1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 3,
+ .access = PL2_W, .type = ARM_CP_NOP },
+ { .name = "TLBI_IPAS2LE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 4,
+ .access = PL2_W, .type = ARM_CP_NOP },
+ { .name = "TLBI_RIPAS2LE1OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 4, .opc2 = 7,
+ .access = PL2_W, .type = ARM_CP_NOP },
+ { .name = "TLBI_ALLE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 0,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_alle3is_write },
+ { .name = "TLBI_VAE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 1,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_vae3is_write },
+ { .name = "TLBI_VALE3OS", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 8, .crm = 1, .opc2 = 5,
+ .access = PL3_W, .type = ARM_CP_NO_RAW,
+ .writefn = tlbi_aa64_vae3is_write },
+};
+
+static uint64_t rndr_readfn(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ Error *err = NULL;
+ uint64_t ret;
+
+ /* Success sets NZCV = 0000. */
+ env->NF = env->CF = env->VF = 0, env->ZF = 1;
+
+ if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
+ /*
+ * ??? Failed, for unknown reasons in the crypto subsystem.
+ * The best we can do is log the reason and return the
+ * timed-out indication to the guest. There is no reason
+ * we know to expect this failure to be transitory, so the
+ * guest may well hang retrying the operation.
*/
- int bas = extract64(bcr, 5, 4);
- addr = sextract64(bvr, 0, 49) & ~3ULL;
- if (bas == 0) {
- return;
- }
- if (bas == 0xc) {
- addr += 2;
+ qemu_log_mask(LOG_UNIMP, "%s: Crypto failure: %s",
+ ri->name, error_get_pretty(err));
+ error_free(err);
+
+ env->ZF = 0; /* NZCF = 0100 */
+ return 0;
+ }
+ return ret;
+}
+
+/* We do not support re-seeding, so the two registers operate the same. */
+static const ARMCPRegInfo rndr_reginfo[] = {
+ { .name = "RNDR", .state = ARM_CP_STATE_AA64,
+ .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END | ARM_CP_IO,
+ .opc0 = 3, .opc1 = 3, .crn = 2, .crm = 4, .opc2 = 0,
+ .access = PL0_R, .readfn = rndr_readfn },
+ { .name = "RNDRRS", .state = ARM_CP_STATE_AA64,
+ .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END | ARM_CP_IO,
+ .opc0 = 3, .opc1 = 3, .crn = 2, .crm = 4, .opc2 = 1,
+ .access = PL0_R, .readfn = rndr_readfn },
+};
+
+static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque,
+ uint64_t value)
+{
+#ifdef CONFIG_TCG
+ ARMCPU *cpu = env_archcpu(env);
+ /* CTR_EL0 System register -> DminLine, bits [19:16] */
+ uint64_t dline_size = 4 << ((cpu->ctr >> 16) & 0xF);
+ uint64_t vaddr_in = (uint64_t) value;
+ uint64_t vaddr = vaddr_in & ~(dline_size - 1);
+ void *haddr;
+ int mem_idx = arm_env_mmu_index(env);
+
+ /* This won't be crossing page boundaries */
+ haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
+ if (haddr) {
+#ifndef CONFIG_USER_ONLY
+
+ ram_addr_t offset;
+ MemoryRegion *mr;
+
+ /* RCU lock is already being held */
+ mr = memory_region_from_host(haddr, &offset);
+
+ if (mr) {
+ memory_region_writeback(mr, offset, dline_size);
}
- break;
+#endif /*CONFIG_USER_ONLY*/
}
- case 2: /* unlinked context ID match */
- case 8: /* unlinked VMID match (reserved if no EL2) */
- case 10: /* unlinked context ID and VMID match (reserved if no EL2) */
- qemu_log_mask(LOG_UNIMP,
- "arm: unlinked context breakpoint types not implemented\n");
- return;
- case 9: /* linked VMID match (reserved if no EL2) */
- case 11: /* linked context ID and VMID match (reserved if no EL2) */
- case 3: /* linked context ID match */
- default:
- /* We must generate no events for Linked context matches (unless
- * they are linked to by some other bp/wp, which is handled in
- * updates for the linking bp/wp). We choose to also generate no events
- * for reserved values.
- */
- return;
+#else
+ /* Handled by hardware accelerator. */
+ g_assert_not_reached();
+#endif /* CONFIG_TCG */
+}
+
+static const ARMCPRegInfo dcpop_reg[] = {
+ { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
+ .access = PL0_W, .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END,
+ .fgt = FGT_DCCVAP,
+ .accessfn = aa64_cacheop_poc_access, .writefn = dccvap_writefn },
+};
+
+static const ARMCPRegInfo dcpodp_reg[] = {
+ { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
+ .access = PL0_W, .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END,
+ .fgt = FGT_DCCVADP,
+ .accessfn = aa64_cacheop_poc_access, .writefn = dccvap_writefn },
+};
+
+static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if ((arm_current_el(env) < 2) && (arm_hcr_el2_eff(env) & HCR_TID5)) {
+ return CP_ACCESS_TRAP_EL2;
}
- cpu_breakpoint_insert(CPU(cpu), addr, flags, &env->cpu_breakpoint[n]);
+ return CP_ACCESS_OK;
}
-void hw_breakpoint_update_all(ARMCPU *cpu)
+static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- int i;
- CPUARMState *env = &cpu->env;
+ int el = arm_current_el(env);
+ if (el < 2 && arm_is_el2_enabled(env)) {
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ if (el < 3 &&
+ arm_feature(env, ARM_FEATURE_EL3) &&
+ !(env->cp15.scr_el3 & SCR_ATA)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_tfsr_el1(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ CPAccessResult nv1 = access_nv1(env, ri, isread);
+
+ if (nv1 != CP_ACCESS_OK) {
+ return nv1;
+ }
+ return access_mte(env, ri, isread);
+}
- /* Completely clear out existing QEMU breakpoints and our array, to
- * avoid possible stale entries following migration load.
+static CPAccessResult access_tfsr_el2(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ /*
+ * TFSR_EL2: similar to generic access_mte(), but we need to
+ * account for FEAT_NV. At EL1 this must be a FEAT_NV access;
+ * if NV2 is enabled then we will redirect this to TFSR_EL1
+ * after doing the HCR and SCR ATA traps; otherwise this will
+ * be a trap to EL2 and the HCR/SCR traps do not apply.
*/
- cpu_breakpoint_remove_all(CPU(cpu), BP_CPU);
- memset(env->cpu_breakpoint, 0, sizeof(env->cpu_breakpoint));
+ int el = arm_current_el(env);
- for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_breakpoint); i++) {
- hw_breakpoint_update(cpu, i);
+ if (el == 1 && (arm_hcr_el2_eff(env) & HCR_NV2)) {
+ return CP_ACCESS_OK;
+ }
+ if (el < 2 && arm_is_el2_enabled(env)) {
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ }
+ if (el < 3 &&
+ arm_feature(env, ARM_FEATURE_EL3) &&
+ !(env->cp15.scr_el3 & SCR_ATA)) {
+ return CP_ACCESS_TRAP_EL3;
}
+ return CP_ACCESS_OK;
}
-static void dbgbvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- int i = ri->crm;
+ return env->pstate & PSTATE_TCO;
+}
- raw_write(env, ri, value);
- hw_breakpoint_update(cpu, i);
+static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
+{
+ env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO);
}
-static void dbgbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
- uint64_t value)
+static const ARMCPRegInfo mte_reginfo[] = {
+ { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 1,
+ .access = PL1_RW, .accessfn = access_mte,
+ .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) },
+ { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_tfsr_el1,
+ .nv2_redirect_offset = 0x190 | NV2_REDIR_NV1,
+ .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },
+ { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,
+ .type = ARM_CP_NV2_REDIRECT,
+ .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0,
+ .access = PL2_RW, .accessfn = access_tfsr_el2,
+ .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) },
+ { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 6, .opc2 = 0,
+ .access = PL3_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) },
+ { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5,
+ .access = PL1_RW, .accessfn = access_mte,
+ .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) },
+ { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6,
+ .access = PL1_RW, .accessfn = access_mte,
+ .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) },
+ { .name = "TCO", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
+ .type = ARM_CP_NO_RAW,
+ .access = PL0_RW, .readfn = tco_read, .writefn = tco_write },
+ { .name = "DC_IGVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL1_W,
+ .fgt = FGT_DCIVAC,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_IGSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4,
+ .fgt = FGT_DCISW,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_IGDVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL1_W,
+ .fgt = FGT_DCIVAC,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_IGDSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6,
+ .fgt = FGT_DCISW,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_CGSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4,
+ .fgt = FGT_DCCSW,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_CGDSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6,
+ .fgt = FGT_DCCSW,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_CIGSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4,
+ .fgt = FGT_DCCISW,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_CIGDSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6,
+ .fgt = FGT_DCCISW,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+};
+
+static const ARMCPRegInfo mte_tco_ro_reginfo[] = {
+ { .name = "TCO", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
+ .type = ARM_CP_CONST, .access = PL0_RW, },
+};
+
+static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = {
+ { .name = "DC_CGVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .fgt = FGT_DCCVAC,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGDVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .fgt = FGT_DCCVAC,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGVAP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .fgt = FGT_DCCVAP,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGDVAP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .fgt = FGT_DCCVAP,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGVADP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .fgt = FGT_DCCVADP,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGDVADP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .fgt = FGT_DCCVADP,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CIGVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .fgt = FGT_DCCIVAC,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CIGDVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .fgt = FGT_DCCIVAC,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_GVA", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3,
+ .access = PL0_W, .type = ARM_CP_DC_GVA,
+#ifndef CONFIG_USER_ONLY
+ /* Avoid overhead of an access check that always passes in user-mode */
+ .accessfn = aa64_zva_access,
+ .fgt = FGT_DCZVA,
+#endif
+ },
+ { .name = "DC_GZVA", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4,
+ .access = PL0_W, .type = ARM_CP_DC_GZVA,
+#ifndef CONFIG_USER_ONLY
+ /* Avoid overhead of an access check that always passes in user-mode */
+ .accessfn = aa64_zva_access,
+ .fgt = FGT_DCZVA,
+#endif
+ },
+};
+
+static CPAccessResult access_scxtnum(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- int i = ri->crm;
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ int el = arm_current_el(env);
- /* BAS[3] is a read-only copy of BAS[2], and BAS[1] a read-only
- * copy of BAS[0].
- */
- value = deposit64(value, 6, 1, extract64(value, 5, 1));
- value = deposit64(value, 8, 1, extract64(value, 7, 1));
+ if (el == 0 && !((hcr & HCR_E2H) && (hcr & HCR_TGE))) {
+ if (env->cp15.sctlr_el[1] & SCTLR_TSCXT) {
+ if (hcr & HCR_TGE) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ return CP_ACCESS_TRAP;
+ }
+ } else if (el < 2 && (env->cp15.sctlr_el[2] & SCTLR_TSCXT)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 2 && arm_is_el2_enabled(env) && !(hcr & HCR_ENSCXT)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 3
+ && arm_feature(env, ARM_FEATURE_EL3)
+ && !(env->cp15.scr_el3 & SCR_ENSCXT)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
- raw_write(env, ri, value);
- hw_breakpoint_update(cpu, i);
+static CPAccessResult access_scxtnum_el1(CPUARMState *env,
+ const ARMCPRegInfo *ri,
+ bool isread)
+{
+ CPAccessResult nv1 = access_nv1(env, ri, isread);
+
+ if (nv1 != CP_ACCESS_OK) {
+ return nv1;
+ }
+ return access_scxtnum(env, ri, isread);
+}
+
+static const ARMCPRegInfo scxtnum_reginfo[] = {
+ { .name = "SCXTNUM_EL0", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 13, .crm = 0, .opc2 = 7,
+ .access = PL0_RW, .accessfn = access_scxtnum,
+ .fgt = FGT_SCXTNUM_EL0,
+ .fieldoffset = offsetof(CPUARMState, scxtnum_el[0]) },
+ { .name = "SCXTNUM_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 7,
+ .access = PL1_RW, .accessfn = access_scxtnum_el1,
+ .fgt = FGT_SCXTNUM_EL1,
+ .nv2_redirect_offset = 0x188 | NV2_REDIR_NV1,
+ .fieldoffset = offsetof(CPUARMState, scxtnum_el[1]) },
+ { .name = "SCXTNUM_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 7,
+ .access = PL2_RW, .accessfn = access_scxtnum,
+ .fieldoffset = offsetof(CPUARMState, scxtnum_el[2]) },
+ { .name = "SCXTNUM_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 13, .crm = 0, .opc2 = 7,
+ .access = PL3_RW,
+ .fieldoffset = offsetof(CPUARMState, scxtnum_el[3]) },
+};
+
+static CPAccessResult access_fgt(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if (arm_current_el(env) == 2 &&
+ arm_feature(env, ARM_FEATURE_EL3) && !(env->cp15.scr_el3 & SCR_FGTEN)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
}
-static void define_debug_regs(ARMCPU *cpu)
+static const ARMCPRegInfo fgt_reginfo[] = {
+ { .name = "HFGRTR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
+ .nv2_redirect_offset = 0x1b8,
+ .access = PL2_RW, .accessfn = access_fgt,
+ .fieldoffset = offsetof(CPUARMState, cp15.fgt_read[FGTREG_HFGRTR]) },
+ { .name = "HFGWTR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 5,
+ .nv2_redirect_offset = 0x1c0,
+ .access = PL2_RW, .accessfn = access_fgt,
+ .fieldoffset = offsetof(CPUARMState, cp15.fgt_write[FGTREG_HFGWTR]) },
+ { .name = "HDFGRTR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 1, .opc2 = 4,
+ .nv2_redirect_offset = 0x1d0,
+ .access = PL2_RW, .accessfn = access_fgt,
+ .fieldoffset = offsetof(CPUARMState, cp15.fgt_read[FGTREG_HDFGRTR]) },
+ { .name = "HDFGWTR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 3, .crm = 1, .opc2 = 5,
+ .nv2_redirect_offset = 0x1d8,
+ .access = PL2_RW, .accessfn = access_fgt,
+ .fieldoffset = offsetof(CPUARMState, cp15.fgt_write[FGTREG_HDFGWTR]) },
+ { .name = "HFGITR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 6,
+ .nv2_redirect_offset = 0x1c8,
+ .access = PL2_RW, .accessfn = access_fgt,
+ .fieldoffset = offsetof(CPUARMState, cp15.fgt_exec[FGTREG_HFGITR]) },
+};
+
+static void vncr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
{
- /* Define v7 and v8 architectural debug registers.
- * These are just dummy implementations for now.
+ /*
+ * Clear the RES0 bottom 12 bits; this means at runtime we can guarantee
+ * that VNCR_EL2 + offset is 64-bit aligned. We don't need to do anything
+ * about the RESS bits at the top -- we choose the "generate an EL2
+ * translation abort on use" CONSTRAINED UNPREDICTABLE option (i.e. let
+ * the ptw.c code detect the resulting invalid address).
*/
- int i;
- int wrps, brps, ctx_cmps;
- ARMCPRegInfo dbgdidr = {
- .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
- .access = PL0_R, .accessfn = access_tda,
- .type = ARM_CP_CONST, .resetvalue = cpu->dbgdidr,
- };
+ env->cp15.vncr_el2 = value & ~0xfffULL;
+}
- /* Note that all these register fields hold "number of Xs minus 1". */
- brps = extract32(cpu->dbgdidr, 24, 4);
- wrps = extract32(cpu->dbgdidr, 28, 4);
- ctx_cmps = extract32(cpu->dbgdidr, 20, 4);
+static const ARMCPRegInfo nv2_reginfo[] = {
+ { .name = "VNCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 2, .opc2 = 0,
+ .access = PL2_RW,
+ .writefn = vncr_write,
+ .nv2_redirect_offset = 0xb0,
+ .fieldoffset = offsetof(CPUARMState, cp15.vncr_el2) },
+};
- assert(ctx_cmps <= brps);
+#endif /* TARGET_AARCH64 */
- /* The DBGDIDR and ID_AA64DFR0_EL1 define various properties
- * of the debug registers such as number of breakpoints;
- * check that if they both exist then they agree.
- */
- if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
- assert(extract32(cpu->id_aa64dfr0, 12, 4) == brps);
- assert(extract32(cpu->id_aa64dfr0, 20, 4) == wrps);
- assert(extract32(cpu->id_aa64dfr0, 28, 4) == ctx_cmps);
+static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
+
+ if (el == 0) {
+ uint64_t sctlr = arm_sctlr(env, el);
+ if (!(sctlr & SCTLR_EnRCTX)) {
+ return CP_ACCESS_TRAP;
+ }
+ } else if (el == 1) {
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ if (hcr & HCR_NV) {
+ return CP_ACCESS_TRAP_EL2;
+ }
}
+ return CP_ACCESS_OK;
+}
- define_one_arm_cp_reg(cpu, &dbgdidr);
- define_arm_cp_regs(cpu, debug_cp_reginfo);
+static const ARMCPRegInfo predinv_reginfo[] = {
+ { .name = "CFP_RCTX", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 4,
+ .fgt = FGT_CFPRCTX,
+ .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+ { .name = "DVP_RCTX", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 5,
+ .fgt = FGT_DVPRCTX,
+ .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+ { .name = "CPP_RCTX", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 7,
+ .fgt = FGT_CPPRCTX,
+ .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+ /*
+ * Note the AArch32 opcodes have a different OPC1.
+ */
+ { .name = "CFPRCTX", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 4,
+ .fgt = FGT_CFPRCTX,
+ .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+ { .name = "DVPRCTX", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 5,
+ .fgt = FGT_DVPRCTX,
+ .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+ { .name = "CPPRCTX", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 7,
+ .fgt = FGT_CPPRCTX,
+ .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+};
- if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) {
- define_arm_cp_regs(cpu, debug_lpae_cp_reginfo);
- }
+static uint64_t ccsidr2_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ /* Read the high 32 bits of the current CCSIDR */
+ return extract64(ccsidr_read(env, ri), 32, 32);
+}
- for (i = 0; i < brps + 1; i++) {
- ARMCPRegInfo dbgregs[] = {
- { .name = "DBGBVR", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4,
- .access = PL1_RW, .accessfn = access_tda,
- .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]),
- .writefn = dbgbvr_write, .raw_writefn = raw_write
- },
- { .name = "DBGBCR", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5,
- .access = PL1_RW, .accessfn = access_tda,
- .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]),
- .writefn = dbgbcr_write, .raw_writefn = raw_write
- },
- REGINFO_SENTINEL
- };
- define_arm_cp_regs(cpu, dbgregs);
- }
+static const ARMCPRegInfo ccsidr2_reginfo[] = {
+ { .name = "CCSIDR2", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 2,
+ .access = PL1_R,
+ .accessfn = access_tid4,
+ .readfn = ccsidr2_read, .type = ARM_CP_NO_RAW },
+};
- for (i = 0; i < wrps + 1; i++) {
- ARMCPRegInfo dbgregs[] = {
- { .name = "DBGWVR", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
- .access = PL1_RW, .accessfn = access_tda,
- .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]),
- .writefn = dbgwvr_write, .raw_writefn = raw_write
- },
- { .name = "DBGWCR", .state = ARM_CP_STATE_BOTH,
- .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7,
- .access = PL1_RW, .accessfn = access_tda,
- .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]),
- .writefn = dbgwcr_write, .raw_writefn = raw_write
- },
- REGINFO_SENTINEL
- };
- define_arm_cp_regs(cpu, dbgregs);
+static CPAccessResult access_aa64_tid3(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if ((arm_current_el(env) < 2) && (arm_hcr_el2_eff(env) & HCR_TID3)) {
+ return CP_ACCESS_TRAP_EL2;
}
+
+ return CP_ACCESS_OK;
}
-/* We don't know until after realize whether there's a GICv3
- * attached, and that is what registers the gicv3 sysregs.
- * So we have to fill in the GIC fields in ID_PFR/ID_PFR1_EL1/ID_AA64PFR0_EL1
- * at runtime.
- */
-static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
+static CPAccessResult access_aa32_tid3(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- uint64_t pfr1 = cpu->id_pfr1;
-
- if (env->gicv3state) {
- pfr1 |= 1 << 28;
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ return access_aa64_tid3(env, ri, isread);
}
- return pfr1;
+
+ return CP_ACCESS_OK;
}
-static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
+static CPAccessResult access_jazelle(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
- uint64_t pfr0 = cpu->id_aa64pfr0;
+ if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TID0)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
- if (env->gicv3state) {
- pfr0 |= 1 << 24;
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_joscr_jmcr(CPUARMState *env,
+ const ARMCPRegInfo *ri, bool isread)
+{
+ /*
+ * HSTR.TJDBX traps JOSCR and JMCR accesses, but it exists only
+ * in v7A, not in v8A.
+ */
+ if (!arm_feature(env, ARM_FEATURE_V8) &&
+ arm_current_el(env) < 2 && !arm_is_secure_below_el3(env) &&
+ (env->cp15.hstr_el2 & HSTR_TJDBX)) {
+ return CP_ACCESS_TRAP_EL2;
}
- return pfr0;
+ return CP_ACCESS_OK;
}
+static const ARMCPRegInfo jazelle_regs[] = {
+ { .name = "JIDR",
+ .cp = 14, .crn = 0, .crm = 0, .opc1 = 7, .opc2 = 0,
+ .access = PL1_R, .accessfn = access_jazelle,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "JOSCR",
+ .cp = 14, .crn = 1, .crm = 0, .opc1 = 7, .opc2 = 0,
+ .accessfn = access_joscr_jmcr,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "JMCR",
+ .cp = 14, .crn = 2, .crm = 0, .opc1 = 7, .opc2 = 0,
+ .accessfn = access_joscr_jmcr,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+};
+
+static const ARMCPRegInfo contextidr_el2 = {
+ .name = "CONTEXTIDR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 13, .crm = 0, .opc2 = 1,
+ .access = PL2_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el[2])
+};
+
+static const ARMCPRegInfo vhe_reginfo[] = {
+ { .name = "TTBR1_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 1,
+ .access = PL2_RW, .writefn = vmsa_tcr_ttbr_el2_write,
+ .raw_writefn = raw_write,
+ .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el[2]) },
+#ifndef CONFIG_USER_ONLY
+ { .name = "CNTHV_CVAL_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 3, .opc2 = 2,
+ .fieldoffset =
+ offsetof(CPUARMState, cp15.c14_timer[GTIMER_HYPVIRT].cval),
+ .type = ARM_CP_IO, .access = PL2_RW,
+ .writefn = gt_hv_cval_write, .raw_writefn = raw_write },
+ { .name = "CNTHV_TVAL_EL2", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 3, .opc2 = 0,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL2_RW,
+ .resetfn = gt_hv_timer_reset,
+ .readfn = gt_hv_tval_read, .writefn = gt_hv_tval_write },
+ { .name = "CNTHV_CTL_EL2", .state = ARM_CP_STATE_BOTH,
+ .type = ARM_CP_IO,
+ .opc0 = 3, .opc1 = 4, .crn = 14, .crm = 3, .opc2 = 1,
+ .access = PL2_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_HYPVIRT].ctl),
+ .writefn = gt_hv_ctl_write, .raw_writefn = raw_write },
+ { .name = "CNTP_CTL_EL02", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 2, .opc2 = 1,
+ .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .access = PL2_RW, .accessfn = access_el1nvpct,
+ .nv2_redirect_offset = 0x180 | NV2_REDIR_NO_NV1,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
+ .writefn = gt_phys_ctl_write, .raw_writefn = raw_write },
+ { .name = "CNTV_CTL_EL02", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 3, .opc2 = 1,
+ .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .access = PL2_RW, .accessfn = access_el1nvvct,
+ .nv2_redirect_offset = 0x170 | NV2_REDIR_NO_NV1,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
+ .writefn = gt_virt_ctl_write, .raw_writefn = raw_write },
+ { .name = "CNTP_TVAL_EL02", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 2, .opc2 = 0,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO | ARM_CP_ALIAS,
+ .access = PL2_RW, .accessfn = e2h_access,
+ .readfn = gt_phys_tval_read, .writefn = gt_phys_tval_write },
+ { .name = "CNTV_TVAL_EL02", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 3, .opc2 = 0,
+ .type = ARM_CP_NO_RAW | ARM_CP_IO | ARM_CP_ALIAS,
+ .access = PL2_RW, .accessfn = e2h_access,
+ .readfn = gt_virt_tval_read, .writefn = gt_virt_tval_write },
+ { .name = "CNTP_CVAL_EL02", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 2, .opc2 = 2,
+ .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
+ .nv2_redirect_offset = 0x178 | NV2_REDIR_NO_NV1,
+ .access = PL2_RW, .accessfn = access_el1nvpct,
+ .writefn = gt_phys_cval_write, .raw_writefn = raw_write },
+ { .name = "CNTV_CVAL_EL02", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 5, .crn = 14, .crm = 3, .opc2 = 2,
+ .type = ARM_CP_IO | ARM_CP_ALIAS,
+ .nv2_redirect_offset = 0x168 | NV2_REDIR_NO_NV1,
+ .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
+ .access = PL2_RW, .accessfn = access_el1nvvct,
+ .writefn = gt_virt_cval_write, .raw_writefn = raw_write },
+#endif
+};
+
+#ifndef CONFIG_USER_ONLY
+static const ARMCPRegInfo ats1e1_reginfo[] = {
+ { .name = "AT_S1E1RP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .fgt = FGT_ATS1E1RP,
+ .accessfn = at_s1e01_access, .writefn = ats_write64 },
+ { .name = "AT_S1E1WP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1,
+ .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .fgt = FGT_ATS1E1WP,
+ .accessfn = at_s1e01_access, .writefn = ats_write64 },
+};
+
+static const ARMCPRegInfo ats1cp_reginfo[] = {
+ { .name = "ATS1CPRP",
+ .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .writefn = ats_write },
+ { .name = "ATS1CPWP",
+ .cp = 15, .opc1 = 0, .crn = 7, .crm = 9, .opc2 = 1,
+ .access = PL1_W, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC,
+ .writefn = ats_write },
+};
+#endif
+
+/*
+ * ACTLR2 and HACTLR2 map to ACTLR_EL1[63:32] and
+ * ACTLR_EL2[63:32]. They exist only if the ID_MMFR4.AC2 field
+ * is non-zero, which is never for ARMv7, optionally in ARMv8
+ * and mandatorily for ARMv8.2 and up.
+ * ACTLR2 is banked for S and NS if EL3 is AArch32. Since QEMU's
+ * implementation is RAZ/WI we can ignore this detail, as we
+ * do for ACTLR.
+ */
+static const ARMCPRegInfo actlr2_hactlr2_reginfo[] = {
+ { .name = "ACTLR2", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 3,
+ .access = PL1_RW, .accessfn = access_tacr,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "HACTLR2", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3,
+ .access = PL2_RW, .type = ARM_CP_CONST,
+ .resetvalue = 0 },
+};
+
void register_cp_regs_for_features(ARMCPU *cpu)
{
/* Register all the coprocessor registers based on feature bits */
@@ -4892,7 +8637,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
define_arm_cp_regs(cpu, cp_reginfo);
if (!arm_feature(env, ARM_FEATURE_V8)) {
- /* Must go early as it is full of wildcards that may be
+ /*
+ * Must go early as it is full of wildcards that may be
* overridden by later definitions.
*/
define_arm_cp_regs(cpu, not_v8_cp_reginfo);
@@ -4904,72 +8650,96 @@ void register_cp_regs_for_features(ARMCPU *cpu)
{ .name = "ID_PFR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_pfr0 },
- /* ID_PFR1 is not a plain ARM_CP_CONST because we don't know
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_pfr0 },
+ /*
+ * ID_PFR1 is not a plain ARM_CP_CONST because we don't know
* the value of the GIC field until after we define these regs.
*/
{ .name = "ID_PFR1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_NO_RAW,
+ .accessfn = access_aa32_tid3,
+#ifdef CONFIG_USER_ONLY
+ .type = ARM_CP_CONST,
+ .resetvalue = cpu->isar.id_pfr1,
+#else
+ .type = ARM_CP_NO_RAW,
+ .accessfn = access_aa32_tid3,
.readfn = id_pfr1_read,
- .writefn = arm_cp_write_ignore },
+ .writefn = arm_cp_write_ignore
+#endif
+ },
{ .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_dfr0 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_dfr0 },
{ .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa32_tid3,
.resetvalue = cpu->id_afr0 },
{ .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_mmfr0 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_mmfr0 },
{ .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_mmfr1 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_mmfr1 },
{ .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_mmfr2 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_mmfr2 },
{ .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_mmfr3 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_mmfr3 },
{ .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_isar0 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_isar0 },
{ .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_isar1 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_isar1 },
{ .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_isar2 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_isar2 },
{ .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_isar3 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_isar3 },
{ .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_isar4 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_isar4 },
{ .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_isar5 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_isar5 },
{ .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_mmfr4 },
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_mmfr4 },
{ .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_isar6 },
- REGINFO_SENTINEL
+ .accessfn = access_aa32_tid3,
+ .resetvalue = cpu->isar.id_isar6 },
};
define_arm_cp_regs(cpu, v6_idregs);
define_arm_cp_regs(cpu, v6_cp_reginfo);
@@ -4983,331 +8753,553 @@ void register_cp_regs_for_features(ARMCPU *cpu)
!arm_feature(env, ARM_FEATURE_PMSA)) {
define_arm_cp_regs(cpu, v7mp_cp_reginfo);
}
+ if (arm_feature(env, ARM_FEATURE_V7VE)) {
+ define_arm_cp_regs(cpu, pmovsset_cp_reginfo);
+ }
if (arm_feature(env, ARM_FEATURE_V7)) {
- /* v7 performance monitor control register: same implementor
- * field as main ID register, and we implement only the cycle
- * count register.
- */
-#ifndef CONFIG_USER_ONLY
- ARMCPRegInfo pmcr = {
- .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
- .access = PL0_RW,
- .type = ARM_CP_IO | ARM_CP_ALIAS,
- .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr),
- .accessfn = pmreg_access, .writefn = pmcr_write,
- .raw_writefn = raw_write,
- };
- ARMCPRegInfo pmcr64 = {
- .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0,
- .access = PL0_RW, .accessfn = pmreg_access,
- .type = ARM_CP_IO,
- .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
- .resetvalue = cpu->midr & 0xff000000,
- .writefn = pmcr_write, .raw_writefn = raw_write,
- };
- define_one_arm_cp_reg(cpu, &pmcr);
- define_one_arm_cp_reg(cpu, &pmcr64);
-#endif
ARMCPRegInfo clidr = {
.name = "CLIDR", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
- .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->clidr
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_tid4,
+ .fgt = FGT_CLIDR_EL1,
+ .resetvalue = cpu->clidr
};
define_one_arm_cp_reg(cpu, &clidr);
define_arm_cp_regs(cpu, v7_cp_reginfo);
define_debug_regs(cpu);
+ define_pmu_regs(cpu);
} else {
define_arm_cp_regs(cpu, not_v7_cp_reginfo);
}
if (arm_feature(env, ARM_FEATURE_V8)) {
- /* AArch64 ID registers, which all have impdef reset values.
+ /*
+ * v8 ID registers, which all have impdef reset values.
* Note that within the ID register ranges the unused slots
* must all RAZ, not UNDEF; future architecture versions may
* define new registers here.
+ * ID registers which are AArch64 views of the AArch32 ID registers
+ * which already existed in v6 and v7 are handled elsewhere,
+ * in v6_idregs[].
*/
+ int i;
ARMCPRegInfo v8_idregs[] = {
- /* ID_AA64PFR0_EL1 is not a plain ARM_CP_CONST because we don't
- * know the right value for the GIC field until after we
- * define these regs.
+ /*
+ * ID_AA64PFR0_EL1 is not a plain ARM_CP_CONST in system
+ * emulation because we don't know the right value for the
+ * GIC field until after we define these regs.
*/
{ .name = "ID_AA64PFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 0,
- .access = PL1_R, .type = ARM_CP_NO_RAW,
+ .access = PL1_R,
+#ifdef CONFIG_USER_ONLY
+ .type = ARM_CP_CONST,
+ .resetvalue = cpu->isar.id_aa64pfr0
+#else
+ .type = ARM_CP_NO_RAW,
+ .accessfn = access_aa64_tid3,
.readfn = id_aa64pfr0_read,
- .writefn = arm_cp_write_ignore },
+ .writefn = arm_cp_write_ignore
+#endif
+ },
{ .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_aa64pfr1},
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64pfr1},
{ .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64PFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64ZFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
- /* At present, only SVEver == 0 is defined anyway. */
- .resetvalue = 0 },
- { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64zfr0 },
+ { .name = "ID_AA64SMFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = 0 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64smfr0 },
{ .name = "ID_AA64PFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64PFR7_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_aa64dfr0 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64dfr0 },
{ .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_aa64dfr1 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64dfr1 },
{ .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64DFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64AFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = cpu->id_aa64afr0 },
{ .name = "ID_AA64AFR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = cpu->id_aa64afr1 },
{ .name = "ID_AA64AFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64AFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_aa64isar0 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64isar0 },
{ .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_aa64isar1 },
- { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64isar1 },
+ { .name = "ID_AA64ISAR2_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = 0 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64isar2 },
{ .name = "ID_AA64ISAR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64ISAR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64ISAR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64ISAR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64ISAR7_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64MMFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_aa64mmfr0 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64mmfr0 },
{ .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->id_aa64mmfr1 },
- { .name = "ID_AA64MMFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64mmfr1 },
+ { .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = 0 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_aa64mmfr2 },
{ .name = "ID_AA64MMFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64MMFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64MMFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64MMFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "ID_AA64MMFR7_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->mvfr0 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.mvfr0 },
{ .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->mvfr1 },
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.mvfr1 },
{ .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = cpu->mvfr2 },
- { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.mvfr2 },
+ /*
+ * "0, c0, c3, {0,1,2}" are the encodings corresponding to
+ * AArch64 MVFR[012]_EL1. Define the STATE_AA32 encoding
+ * as RAZ, since it is in the "reserved for future ID
+ * registers, RAZ" part of the AArch32 encoding space.
+ */
+ { .name = "RES_0_C0_C3_0", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = 0 },
+ { .name = "RES_0_C0_C3_1", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = 0 },
+ { .name = "RES_0_C0_C3_2", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = 0 },
+ /*
+ * Other encodings in "0, c0, c3, ..." are STATE_BOTH because
+ * they're also RAZ for AArch64, and in v8 are gradually
+ * being filled with AArch64-view-of-AArch32-ID-register
+ * for new ID registers.
+ */
+ { .name = "RES_0_C0_C3_3", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
- { .name = "MVFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ { .name = "ID_PFR2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 4,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "MVFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_pfr2 },
+ { .name = "ID_DFR1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 5,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "MVFR6_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_dfr1 },
+ { .name = "ID_MMFR5", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 6,
.access = PL1_R, .type = ARM_CP_CONST,
- .resetvalue = 0 },
- { .name = "MVFR7_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = cpu->isar.id_mmfr5 },
+ { .name = "RES_0_C0_C3_7", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 7,
.access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
.resetvalue = 0 },
{ .name = "PMCEID0", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 6,
.access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
- .resetvalue = cpu->pmceid0 },
+ .fgt = FGT_PMCEIDN_EL0,
+ .resetvalue = extract64(cpu->pmceid0, 0, 32) },
{ .name = "PMCEID0_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 6,
.access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
+ .fgt = FGT_PMCEIDN_EL0,
.resetvalue = cpu->pmceid0 },
{ .name = "PMCEID1", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 0, .crn = 9, .crm = 12, .opc2 = 7,
.access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
- .resetvalue = cpu->pmceid1 },
+ .fgt = FGT_PMCEIDN_EL0,
+ .resetvalue = extract64(cpu->pmceid1, 0, 32) },
{ .name = "PMCEID1_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 7,
.access = PL0_R, .accessfn = pmreg_access, .type = ARM_CP_CONST,
+ .fgt = FGT_PMCEIDN_EL0,
.resetvalue = cpu->pmceid1 },
- REGINFO_SENTINEL
};
- /* RVBAR_EL1 is only implemented if EL1 is the highest EL */
+#ifdef CONFIG_USER_ONLY
+ static const ARMCPRegUserSpaceInfo v8_user_idregs[] = {
+ { .name = "ID_AA64PFR0_EL1",
+ .exported_bits = R_ID_AA64PFR0_FP_MASK |
+ R_ID_AA64PFR0_ADVSIMD_MASK |
+ R_ID_AA64PFR0_SVE_MASK |
+ R_ID_AA64PFR0_DIT_MASK,
+ .fixed_bits = (0x1u << R_ID_AA64PFR0_EL0_SHIFT) |
+ (0x1u << R_ID_AA64PFR0_EL1_SHIFT) },
+ { .name = "ID_AA64PFR1_EL1",
+ .exported_bits = R_ID_AA64PFR1_BT_MASK |
+ R_ID_AA64PFR1_SSBS_MASK |
+ R_ID_AA64PFR1_MTE_MASK |
+ R_ID_AA64PFR1_SME_MASK },
+ { .name = "ID_AA64PFR*_EL1_RESERVED",
+ .is_glob = true },
+ { .name = "ID_AA64ZFR0_EL1",
+ .exported_bits = R_ID_AA64ZFR0_SVEVER_MASK |
+ R_ID_AA64ZFR0_AES_MASK |
+ R_ID_AA64ZFR0_BITPERM_MASK |
+ R_ID_AA64ZFR0_BFLOAT16_MASK |
+ R_ID_AA64ZFR0_B16B16_MASK |
+ R_ID_AA64ZFR0_SHA3_MASK |
+ R_ID_AA64ZFR0_SM4_MASK |
+ R_ID_AA64ZFR0_I8MM_MASK |
+ R_ID_AA64ZFR0_F32MM_MASK |
+ R_ID_AA64ZFR0_F64MM_MASK },
+ { .name = "ID_AA64SMFR0_EL1",
+ .exported_bits = R_ID_AA64SMFR0_F32F32_MASK |
+ R_ID_AA64SMFR0_BI32I32_MASK |
+ R_ID_AA64SMFR0_B16F32_MASK |
+ R_ID_AA64SMFR0_F16F32_MASK |
+ R_ID_AA64SMFR0_I8I32_MASK |
+ R_ID_AA64SMFR0_F16F16_MASK |
+ R_ID_AA64SMFR0_B16B16_MASK |
+ R_ID_AA64SMFR0_I16I32_MASK |
+ R_ID_AA64SMFR0_F64F64_MASK |
+ R_ID_AA64SMFR0_I16I64_MASK |
+ R_ID_AA64SMFR0_SMEVER_MASK |
+ R_ID_AA64SMFR0_FA64_MASK },
+ { .name = "ID_AA64MMFR0_EL1",
+ .exported_bits = R_ID_AA64MMFR0_ECV_MASK,
+ .fixed_bits = (0xfu << R_ID_AA64MMFR0_TGRAN64_SHIFT) |
+ (0xfu << R_ID_AA64MMFR0_TGRAN4_SHIFT) },
+ { .name = "ID_AA64MMFR1_EL1",
+ .exported_bits = R_ID_AA64MMFR1_AFP_MASK },
+ { .name = "ID_AA64MMFR2_EL1",
+ .exported_bits = R_ID_AA64MMFR2_AT_MASK },
+ { .name = "ID_AA64MMFR*_EL1_RESERVED",
+ .is_glob = true },
+ { .name = "ID_AA64DFR0_EL1",
+ .fixed_bits = (0x6u << R_ID_AA64DFR0_DEBUGVER_SHIFT) },
+ { .name = "ID_AA64DFR1_EL1" },
+ { .name = "ID_AA64DFR*_EL1_RESERVED",
+ .is_glob = true },
+ { .name = "ID_AA64AFR*",
+ .is_glob = true },
+ { .name = "ID_AA64ISAR0_EL1",
+ .exported_bits = R_ID_AA64ISAR0_AES_MASK |
+ R_ID_AA64ISAR0_SHA1_MASK |
+ R_ID_AA64ISAR0_SHA2_MASK |
+ R_ID_AA64ISAR0_CRC32_MASK |
+ R_ID_AA64ISAR0_ATOMIC_MASK |
+ R_ID_AA64ISAR0_RDM_MASK |
+ R_ID_AA64ISAR0_SHA3_MASK |
+ R_ID_AA64ISAR0_SM3_MASK |
+ R_ID_AA64ISAR0_SM4_MASK |
+ R_ID_AA64ISAR0_DP_MASK |
+ R_ID_AA64ISAR0_FHM_MASK |
+ R_ID_AA64ISAR0_TS_MASK |
+ R_ID_AA64ISAR0_RNDR_MASK },
+ { .name = "ID_AA64ISAR1_EL1",
+ .exported_bits = R_ID_AA64ISAR1_DPB_MASK |
+ R_ID_AA64ISAR1_APA_MASK |
+ R_ID_AA64ISAR1_API_MASK |
+ R_ID_AA64ISAR1_JSCVT_MASK |
+ R_ID_AA64ISAR1_FCMA_MASK |
+ R_ID_AA64ISAR1_LRCPC_MASK |
+ R_ID_AA64ISAR1_GPA_MASK |
+ R_ID_AA64ISAR1_GPI_MASK |
+ R_ID_AA64ISAR1_FRINTTS_MASK |
+ R_ID_AA64ISAR1_SB_MASK |
+ R_ID_AA64ISAR1_BF16_MASK |
+ R_ID_AA64ISAR1_DGH_MASK |
+ R_ID_AA64ISAR1_I8MM_MASK },
+ { .name = "ID_AA64ISAR2_EL1",
+ .exported_bits = R_ID_AA64ISAR2_WFXT_MASK |
+ R_ID_AA64ISAR2_RPRES_MASK |
+ R_ID_AA64ISAR2_GPA3_MASK |
+ R_ID_AA64ISAR2_APA3_MASK |
+ R_ID_AA64ISAR2_MOPS_MASK |
+ R_ID_AA64ISAR2_BC_MASK |
+ R_ID_AA64ISAR2_RPRFM_MASK |
+ R_ID_AA64ISAR2_CSSC_MASK },
+ { .name = "ID_AA64ISAR*_EL1_RESERVED",
+ .is_glob = true },
+ };
+ modify_arm_cp_regs(v8_idregs, v8_user_idregs);
+#endif
+ /*
+ * RVBAR_EL1 and RMR_EL1 only implemented if EL1 is the highest EL.
+ * TODO: For RMR, a write with bit 1 set should do something with
+ * cpu_reset(). In the meantime, "the bit is strictly a request",
+ * so we are in spec just ignoring writes.
+ */
if (!arm_feature(env, ARM_FEATURE_EL3) &&
!arm_feature(env, ARM_FEATURE_EL2)) {
- ARMCPRegInfo rvbar = {
- .name = "RVBAR_EL1", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1,
- .type = ARM_CP_CONST, .access = PL1_R, .resetvalue = cpu->rvbar
+ ARMCPRegInfo el1_reset_regs[] = {
+ { .name = "RVBAR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1,
+ .access = PL1_R,
+ .fieldoffset = offsetof(CPUARMState, cp15.rvbar) },
+ { .name = "RMR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST,
+ .resetvalue = arm_feature(env, ARM_FEATURE_AARCH64) }
};
- define_one_arm_cp_reg(cpu, &rvbar);
+ define_arm_cp_regs(cpu, el1_reset_regs);
}
define_arm_cp_regs(cpu, v8_idregs);
define_arm_cp_regs(cpu, v8_cp_reginfo);
+ if (cpu_isar_feature(aa64_aa32_el1, cpu)) {
+ define_arm_cp_regs(cpu, v8_aa32_el1_reginfo);
+ }
+
+ for (i = 4; i < 16; i++) {
+ /*
+ * Encodings in "0, c0, {c4-c7}, {0-7}" are RAZ for AArch32.
+ * For pre-v8 cores there are RAZ patterns for these in
+ * id_pre_v8_midr_cp_reginfo[]; for v8 we do that here.
+ * v8 extends the "must RAZ" part of the ID register space
+ * to also cover c0, 0, c{8-15}, {0-7}.
+ * These are STATE_AA32 because in the AArch64 sysreg space
+ * c4-c7 is where the AArch64 ID registers live (and we've
+ * already defined those in v8_idregs[]), and c8-c15 are not
+ * "must RAZ" for AArch64.
+ */
+ g_autofree char *name = g_strdup_printf("RES_0_C0_C%d_X", i);
+ ARMCPRegInfo v8_aa32_raz_idregs = {
+ .name = name,
+ .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 0, .crm = i, .opc2 = CP_ANY,
+ .access = PL1_R, .type = ARM_CP_CONST,
+ .accessfn = access_aa64_tid3,
+ .resetvalue = 0 };
+ define_one_arm_cp_reg(cpu, &v8_aa32_raz_idregs);
+ }
}
- if (arm_feature(env, ARM_FEATURE_EL2)) {
+
+ /*
+ * Register the base EL2 cpregs.
+ * Pre v8, these registers are implemented only as part of the
+ * Virtualization Extensions (EL2 present). Beginning with v8,
+ * if EL2 is missing but EL3 is enabled, mostly these become
+ * RES0 from EL3, with some specific exceptions.
+ */
+ if (arm_feature(env, ARM_FEATURE_EL2)
+ || (arm_feature(env, ARM_FEATURE_EL3)
+ && arm_feature(env, ARM_FEATURE_V8))) {
uint64_t vmpidr_def = mpidr_read_val(env);
ARMCPRegInfo vpidr_regs[] = {
{ .name = "VPIDR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
.access = PL2_RW, .accessfn = access_el3_aa32ns,
- .resetvalue = cpu->midr, .type = ARM_CP_ALIAS,
+ .resetvalue = cpu->midr,
+ .type = ARM_CP_ALIAS | ARM_CP_EL3_NO_EL2_C_NZ,
.fieldoffset = offsetoflow32(CPUARMState, cp15.vpidr_el2) },
{ .name = "VPIDR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
.access = PL2_RW, .resetvalue = cpu->midr,
+ .type = ARM_CP_EL3_NO_EL2_C_NZ,
+ .nv2_redirect_offset = 0x88,
.fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
{ .name = "VMPIDR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
.access = PL2_RW, .accessfn = access_el3_aa32ns,
- .resetvalue = vmpidr_def, .type = ARM_CP_ALIAS,
+ .resetvalue = vmpidr_def,
+ .type = ARM_CP_ALIAS | ARM_CP_EL3_NO_EL2_C_NZ,
.fieldoffset = offsetoflow32(CPUARMState, cp15.vmpidr_el2) },
{ .name = "VMPIDR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
- .access = PL2_RW,
- .resetvalue = vmpidr_def,
+ .access = PL2_RW, .resetvalue = vmpidr_def,
+ .type = ARM_CP_EL3_NO_EL2_C_NZ,
+ .nv2_redirect_offset = 0x50,
.fieldoffset = offsetof(CPUARMState, cp15.vmpidr_el2) },
- REGINFO_SENTINEL
};
+ /*
+ * The only field of MDCR_EL2 that has a defined architectural reset
+ * value is MDCR_EL2.HPMN which should reset to the value of PMCR_EL0.N.
+ */
+ ARMCPRegInfo mdcr_el2 = {
+ .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH, .type = ARM_CP_IO,
+ .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
+ .writefn = mdcr_el2_write,
+ .access = PL2_RW, .resetvalue = pmu_num_counters(env),
+ .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el2),
+ };
+ define_one_arm_cp_reg(cpu, &mdcr_el2);
define_arm_cp_regs(cpu, vpidr_regs);
define_arm_cp_regs(cpu, el2_cp_reginfo);
if (arm_feature(env, ARM_FEATURE_V8)) {
define_arm_cp_regs(cpu, el2_v8_cp_reginfo);
}
- /* RVBAR_EL2 is only implemented if EL2 is the highest EL */
- if (!arm_feature(env, ARM_FEATURE_EL3)) {
- ARMCPRegInfo rvbar = {
- .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64,
- .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1,
- .type = ARM_CP_CONST, .access = PL2_R, .resetvalue = cpu->rvbar
- };
- define_one_arm_cp_reg(cpu, &rvbar);
+ if (cpu_isar_feature(aa64_sel2, cpu)) {
+ define_arm_cp_regs(cpu, el2_sec_cp_reginfo);
}
- } else {
- /* If EL2 is missing but higher ELs are enabled, we need to
- * register the no_el2 reginfos.
+ /*
+ * RVBAR_EL2 and RMR_EL2 only implemented if EL2 is the highest EL.
+ * See commentary near RMR_EL1.
*/
- if (arm_feature(env, ARM_FEATURE_EL3)) {
- /* When EL3 exists but not EL2, VPIDR and VMPIDR take the value
- * of MIDR_EL1 and MPIDR_EL1.
- */
- ARMCPRegInfo vpidr_regs[] = {
- { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
- .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
- .type = ARM_CP_CONST, .resetvalue = cpu->midr,
- .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
- { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH,
- .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
- .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
- .type = ARM_CP_NO_RAW,
- .writefn = arm_cp_write_ignore, .readfn = mpidr_read },
- REGINFO_SENTINEL
+ if (!arm_feature(env, ARM_FEATURE_EL3)) {
+ static const ARMCPRegInfo el2_reset_regs[] = {
+ { .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1,
+ .access = PL2_R,
+ .fieldoffset = offsetof(CPUARMState, cp15.rvbar) },
+ { .name = "RVBAR", .type = ARM_CP_ALIAS,
+ .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1,
+ .access = PL2_R,
+ .fieldoffset = offsetof(CPUARMState, cp15.rvbar) },
+ { .name = "RMR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 2,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 1 },
};
- define_arm_cp_regs(cpu, vpidr_regs);
- define_arm_cp_regs(cpu, el3_no_el2_cp_reginfo);
- if (arm_feature(env, ARM_FEATURE_V8)) {
- define_arm_cp_regs(cpu, el3_no_el2_v8_cp_reginfo);
- }
+ define_arm_cp_regs(cpu, el2_reset_regs);
}
}
+
+ /* Register the base EL3 cpregs. */
if (arm_feature(env, ARM_FEATURE_EL3)) {
define_arm_cp_regs(cpu, el3_cp_reginfo);
ARMCPRegInfo el3_regs[] = {
{ .name = "RVBAR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 1,
- .type = ARM_CP_CONST, .access = PL3_R, .resetvalue = cpu->rvbar },
+ .access = PL3_R,
+ .fieldoffset = offsetof(CPUARMState, cp15.rvbar), },
+ { .name = "RMR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 2,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 1 },
+ { .name = "RMR", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2,
+ .access = PL3_RW, .type = ARM_CP_CONST,
+ .resetvalue = arm_feature(env, ARM_FEATURE_AARCH64) },
{ .name = "SCTLR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 0, .opc2 = 0,
.access = PL3_RW,
.raw_writefn = raw_write, .writefn = sctlr_write,
.fieldoffset = offsetof(CPUARMState, cp15.sctlr_el[3]),
.resetvalue = cpu->reset_sctlr },
- REGINFO_SENTINEL
};
define_arm_cp_regs(cpu, el3_regs);
}
- /* The behaviour of NSACR is sufficiently various that we don't
+ /*
+ * The behaviour of NSACR is sufficiently various that we don't
* try to describe it in a single reginfo:
* if EL3 is 64 bit, then trap to EL3 from S EL1,
* reads as constant 0xc00 from NS EL1 and NS EL2
@@ -5317,7 +9309,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
*/
if (arm_feature(env, ARM_FEATURE_EL3)) {
if (arm_feature(env, ARM_FEATURE_AARCH64)) {
- ARMCPRegInfo nsacr = {
+ static const ARMCPRegInfo nsacr = {
.name = "NSACR", .type = ARM_CP_CONST,
.cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 2,
.access = PL1_RW, .accessfn = nsacr_access,
@@ -5325,7 +9317,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
};
define_one_arm_cp_reg(cpu, &nsacr);
} else {
- ARMCPRegInfo nsacr = {
+ static const ARMCPRegInfo nsacr = {
.name = "NSACR",
.cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 2,
.access = PL3_RW | PL1_R,
@@ -5336,7 +9328,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
}
} else {
if (arm_feature(env, ARM_FEATURE_V8)) {
- ARMCPRegInfo nsacr = {
+ static const ARMCPRegInfo nsacr = {
.name = "NSACR", .type = ARM_CP_CONST,
.cp = 15, .opc1 = 0, .crn = 1, .crm = 1, .opc2 = 2,
.access = PL1_R,
@@ -5358,6 +9350,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
} else {
define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo);
define_arm_cp_regs(cpu, vmsa_cp_reginfo);
+ /* TTCBR2 is introduced with ARMv8.2-AA32HPD. */
+ if (cpu_isar_feature(aa32_hpd, cpu)) {
+ define_one_arm_cp_reg(cpu, &ttbcr2_reginfo);
+ }
}
if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
define_arm_cp_regs(cpu, t2ee_cp_reginfo);
@@ -5365,7 +9361,36 @@ void register_cp_regs_for_features(ARMCPU *cpu)
if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
define_arm_cp_regs(cpu, generic_timer_cp_reginfo);
}
+ if (cpu_isar_feature(aa64_ecv_traps, cpu)) {
+ define_arm_cp_regs(cpu, gen_timer_ecv_cp_reginfo);
+ }
+#ifndef CONFIG_USER_ONLY
+ if (cpu_isar_feature(aa64_ecv, cpu)) {
+ define_one_arm_cp_reg(cpu, &gen_timer_cntpoff_reginfo);
+ }
+#endif
if (arm_feature(env, ARM_FEATURE_VAPA)) {
+ ARMCPRegInfo vapa_cp_reginfo[] = {
+ { .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0,
+ .access = PL1_RW, .resetvalue = 0,
+ .bank_fieldoffsets = { offsetoflow32(CPUARMState, cp15.par_s),
+ offsetoflow32(CPUARMState, cp15.par_ns) },
+ .writefn = par_write},
+#ifndef CONFIG_USER_ONLY
+ /* This underdecoding is safe because the reginfo is NO_RAW. */
+ { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY,
+ .access = PL1_W, .accessfn = ats_access,
+ .writefn = ats_write, .type = ARM_CP_NO_RAW | ARM_CP_RAISES_EXC },
+#endif
+ };
+
+ /*
+ * When LPAE exists this 32-bit PAR register is an alias of the
+ * 64-bit AArch32 PAR register defined in lpae_cp_reginfo[]
+ */
+ if (arm_feature(env, ARM_FEATURE_LPAE)) {
+ vapa_cp_reginfo[0].type = ARM_CP_ALIAS | ARM_CP_NO_GDB;
+ }
define_arm_cp_regs(cpu, vapa_cp_reginfo);
}
if (arm_feature(env, ARM_FEATURE_CACHE_TEST_CLEAN)) {
@@ -5392,13 +9417,18 @@ void register_cp_regs_for_features(ARMCPU *cpu)
if (arm_feature(env, ARM_FEATURE_LPAE)) {
define_arm_cp_regs(cpu, lpae_cp_reginfo);
}
- /* Slightly awkwardly, the OMAP and StrongARM cores need all of
+ if (cpu_isar_feature(aa32_jazelle, cpu)) {
+ define_arm_cp_regs(cpu, jazelle_regs);
+ }
+ /*
+ * Slightly awkwardly, the OMAP and StrongARM cores need all of
* cp15 crn=0 to be writes-ignored, whereas for other cores they should
* be read-only (ie write causes UNDEF exception).
*/
{
ARMCPRegInfo id_pre_v8_midr_cp_reginfo[] = {
- /* Pre-v8 MIDR space.
+ /*
+ * Pre-v8 MIDR space.
* Note that the MIDR isn't a simple constant register because
* of the TI925 behaviour where writes to another register can
* cause the MIDR value to change.
@@ -5430,46 +9460,55 @@ void register_cp_regs_for_features(ARMCPU *cpu)
{ .name = "DUMMY",
.cp = 15, .crn = 0, .crm = 7, .opc1 = 0, .opc2 = CP_ANY,
.access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
- REGINFO_SENTINEL
};
ARMCPRegInfo id_v8_midr_cp_reginfo[] = {
{ .name = "MIDR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 0,
.access = PL1_R, .type = ARM_CP_NO_RAW, .resetvalue = cpu->midr,
+ .fgt = FGT_MIDR_EL1,
.fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
.readfn = midr_read },
- /* crn = 0 op1 = 0 crm = 0 op2 = 4,7 : AArch32 aliases of MIDR */
- { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST,
- .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4,
- .access = PL1_R, .resetvalue = cpu->midr },
+ /* crn = 0 op1 = 0 crm = 0 op2 = 7 : AArch32 aliases of MIDR */
{ .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST,
.cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 7,
.access = PL1_R, .resetvalue = cpu->midr },
{ .name = "REVIDR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 6,
- .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->revidr },
- REGINFO_SENTINEL
+ .access = PL1_R,
+ .accessfn = access_aa64_tid1,
+ .fgt = FGT_REVIDR_EL1,
+ .type = ARM_CP_CONST, .resetvalue = cpu->revidr },
+ };
+ ARMCPRegInfo id_v8_midr_alias_cp_reginfo = {
+ .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST | ARM_CP_NO_GDB,
+ .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4,
+ .access = PL1_R, .resetvalue = cpu->midr
};
ARMCPRegInfo id_cp_reginfo[] = {
/* These are common to v8 and pre-v8 */
{ .name = "CTR",
.cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1,
- .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
+ .access = PL1_R, .accessfn = ctr_el0_access,
+ .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
{ .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
.access = PL0_R, .accessfn = ctr_el0_access,
+ .fgt = FGT_CTR_EL0,
.type = ARM_CP_CONST, .resetvalue = cpu->ctr },
/* TCMTR and TLBTR exist in v8 but have no 64-bit versions */
{ .name = "TCMTR",
.cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2,
- .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
- REGINFO_SENTINEL
+ .access = PL1_R,
+ .accessfn = access_aa32_tid1,
+ .type = ARM_CP_CONST, .resetvalue = 0 },
};
/* TLBTR is specific to VMSA */
ARMCPRegInfo id_tlbtr_reginfo = {
.name = "TLBTR",
.cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 3,
- .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0,
+ .access = PL1_R,
+ .accessfn = access_aa32_tid1,
+ .type = ARM_CP_CONST, .resetvalue = 0,
};
/* MPUIR is specific to PMSA V6+ */
ARMCPRegInfo id_mpuir_reginfo = {
@@ -5478,44 +9517,147 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.access = PL1_R, .type = ARM_CP_CONST,
.resetvalue = cpu->pmsav7_dregion << 8
};
- ARMCPRegInfo crn0_wi_reginfo = {
+ /* HMPUIR is specific to PMSA V8 */
+ ARMCPRegInfo id_hmpuir_reginfo = {
+ .name = "HMPUIR",
+ .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 4,
+ .access = PL2_R, .type = ARM_CP_CONST,
+ .resetvalue = cpu->pmsav8r_hdregion
+ };
+ static const ARMCPRegInfo crn0_wi_reginfo = {
.name = "CRN0_WI", .cp = 15, .crn = 0, .crm = CP_ANY,
.opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W,
.type = ARM_CP_NOP | ARM_CP_OVERRIDE
};
+#ifdef CONFIG_USER_ONLY
+ static const ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = {
+ { .name = "MIDR_EL1",
+ .exported_bits = R_MIDR_EL1_REVISION_MASK |
+ R_MIDR_EL1_PARTNUM_MASK |
+ R_MIDR_EL1_ARCHITECTURE_MASK |
+ R_MIDR_EL1_VARIANT_MASK |
+ R_MIDR_EL1_IMPLEMENTER_MASK },
+ { .name = "REVIDR_EL1" },
+ };
+ modify_arm_cp_regs(id_v8_midr_cp_reginfo, id_v8_user_midr_cp_reginfo);
+#endif
if (arm_feature(env, ARM_FEATURE_OMAPCP) ||
arm_feature(env, ARM_FEATURE_STRONGARM)) {
- ARMCPRegInfo *r;
- /* Register the blanket "writes ignored" value first to cover the
+ size_t i;
+ /*
+ * Register the blanket "writes ignored" value first to cover the
* whole space. Then update the specific ID registers to allow write
* access, so that they ignore writes rather than causing them to
* UNDEF.
*/
define_one_arm_cp_reg(cpu, &crn0_wi_reginfo);
- for (r = id_pre_v8_midr_cp_reginfo;
- r->type != ARM_CP_SENTINEL; r++) {
- r->access = PL1_RW;
+ for (i = 0; i < ARRAY_SIZE(id_pre_v8_midr_cp_reginfo); ++i) {
+ id_pre_v8_midr_cp_reginfo[i].access = PL1_RW;
}
- for (r = id_cp_reginfo; r->type != ARM_CP_SENTINEL; r++) {
- r->access = PL1_RW;
+ for (i = 0; i < ARRAY_SIZE(id_cp_reginfo); ++i) {
+ id_cp_reginfo[i].access = PL1_RW;
}
id_mpuir_reginfo.access = PL1_RW;
id_tlbtr_reginfo.access = PL1_RW;
}
if (arm_feature(env, ARM_FEATURE_V8)) {
define_arm_cp_regs(cpu, id_v8_midr_cp_reginfo);
+ if (!arm_feature(env, ARM_FEATURE_PMSA)) {
+ define_one_arm_cp_reg(cpu, &id_v8_midr_alias_cp_reginfo);
+ }
} else {
define_arm_cp_regs(cpu, id_pre_v8_midr_cp_reginfo);
}
define_arm_cp_regs(cpu, id_cp_reginfo);
if (!arm_feature(env, ARM_FEATURE_PMSA)) {
define_one_arm_cp_reg(cpu, &id_tlbtr_reginfo);
+ } else if (arm_feature(env, ARM_FEATURE_PMSA) &&
+ arm_feature(env, ARM_FEATURE_V8)) {
+ uint32_t i = 0;
+ char *tmp_string;
+
+ define_one_arm_cp_reg(cpu, &id_mpuir_reginfo);
+ define_one_arm_cp_reg(cpu, &id_hmpuir_reginfo);
+ define_arm_cp_regs(cpu, pmsav8r_cp_reginfo);
+
+ /* Register alias is only valid for first 32 indexes */
+ for (i = 0; i < MIN(cpu->pmsav7_dregion, 32); ++i) {
+ uint8_t crm = 0b1000 | extract32(i, 1, 3);
+ uint8_t opc1 = extract32(i, 4, 1);
+ uint8_t opc2 = extract32(i, 0, 1) << 2;
+
+ tmp_string = g_strdup_printf("PRBAR%u", i);
+ ARMCPRegInfo tmp_prbarn_reginfo = {
+ .name = tmp_string, .type = ARM_CP_ALIAS | ARM_CP_NO_RAW,
+ .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2,
+ .access = PL1_RW, .resetvalue = 0,
+ .accessfn = access_tvm_trvm,
+ .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read
+ };
+ define_one_arm_cp_reg(cpu, &tmp_prbarn_reginfo);
+ g_free(tmp_string);
+
+ opc2 = extract32(i, 0, 1) << 2 | 0x1;
+ tmp_string = g_strdup_printf("PRLAR%u", i);
+ ARMCPRegInfo tmp_prlarn_reginfo = {
+ .name = tmp_string, .type = ARM_CP_ALIAS | ARM_CP_NO_RAW,
+ .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2,
+ .access = PL1_RW, .resetvalue = 0,
+ .accessfn = access_tvm_trvm,
+ .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read
+ };
+ define_one_arm_cp_reg(cpu, &tmp_prlarn_reginfo);
+ g_free(tmp_string);
+ }
+
+ /* Register alias is only valid for first 32 indexes */
+ for (i = 0; i < MIN(cpu->pmsav8r_hdregion, 32); ++i) {
+ uint8_t crm = 0b1000 | extract32(i, 1, 3);
+ uint8_t opc1 = 0b100 | extract32(i, 4, 1);
+ uint8_t opc2 = extract32(i, 0, 1) << 2;
+
+ tmp_string = g_strdup_printf("HPRBAR%u", i);
+ ARMCPRegInfo tmp_hprbarn_reginfo = {
+ .name = tmp_string,
+ .type = ARM_CP_NO_RAW,
+ .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2,
+ .access = PL2_RW, .resetvalue = 0,
+ .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read
+ };
+ define_one_arm_cp_reg(cpu, &tmp_hprbarn_reginfo);
+ g_free(tmp_string);
+
+ opc2 = extract32(i, 0, 1) << 2 | 0x1;
+ tmp_string = g_strdup_printf("HPRLAR%u", i);
+ ARMCPRegInfo tmp_hprlarn_reginfo = {
+ .name = tmp_string,
+ .type = ARM_CP_NO_RAW,
+ .cp = 15, .opc1 = opc1, .crn = 6, .crm = crm, .opc2 = opc2,
+ .access = PL2_RW, .resetvalue = 0,
+ .writefn = pmsav8r_regn_write, .readfn = pmsav8r_regn_read
+ };
+ define_one_arm_cp_reg(cpu, &tmp_hprlarn_reginfo);
+ g_free(tmp_string);
+ }
} else if (arm_feature(env, ARM_FEATURE_V7)) {
define_one_arm_cp_reg(cpu, &id_mpuir_reginfo);
}
}
if (arm_feature(env, ARM_FEATURE_MPIDR)) {
+ ARMCPRegInfo mpidr_cp_reginfo[] = {
+ { .name = "MPIDR_EL1", .state = ARM_CP_STATE_BOTH,
+ .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
+ .fgt = FGT_MPIDR_EL1,
+ .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW },
+ };
+#ifdef CONFIG_USER_ONLY
+ static const ARMCPRegUserSpaceInfo mpidr_user_cp_reginfo[] = {
+ { .name = "MPIDR_EL1",
+ .fixed_bits = 0x0000000080000000 },
+ };
+ modify_arm_cp_regs(mpidr_cp_reginfo, mpidr_user_cp_reginfo);
+#endif
define_arm_cp_regs(cpu, mpidr_cp_reginfo);
}
@@ -5523,8 +9665,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
ARMCPRegInfo auxcr_reginfo[] = {
{ .name = "ACTLR_EL1", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 1,
- .access = PL1_RW, .type = ARM_CP_CONST,
- .resetvalue = cpu->reset_auxcr },
+ .access = PL1_RW, .accessfn = access_tacr,
+ .nv2_redirect_offset = 0x118,
+ .type = ARM_CP_CONST, .resetvalue = cpu->reset_auxcr },
{ .name = "ACTLR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 1,
.access = PL2_RW, .type = ARM_CP_CONST,
@@ -5533,36 +9676,40 @@ void register_cp_regs_for_features(ARMCPU *cpu)
.opc0 = 3, .opc1 = 6, .crn = 1, .crm = 0, .opc2 = 1,
.access = PL3_RW, .type = ARM_CP_CONST,
.resetvalue = 0 },
- REGINFO_SENTINEL
};
define_arm_cp_regs(cpu, auxcr_reginfo);
- if (arm_feature(env, ARM_FEATURE_V8)) {
- /* HACTLR2 maps to ACTLR_EL2[63:32] and is not in ARMv7 */
- ARMCPRegInfo hactlr2_reginfo = {
- .name = "HACTLR2", .state = ARM_CP_STATE_AA32,
- .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3,
- .access = PL2_RW, .type = ARM_CP_CONST,
- .resetvalue = 0
- };
- define_one_arm_cp_reg(cpu, &hactlr2_reginfo);
+ if (cpu_isar_feature(aa32_ac2, cpu)) {
+ define_arm_cp_regs(cpu, actlr2_hactlr2_reginfo);
}
}
if (arm_feature(env, ARM_FEATURE_CBAR)) {
- if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+ /*
+ * CBAR is IMPDEF, but common on Arm Cortex-A implementations.
+ * There are two flavours:
+ * (1) older 32-bit only cores have a simple 32-bit CBAR
+ * (2) 64-bit cores have a 64-bit CBAR visible to AArch64, plus a
+ * 32-bit register visible to AArch32 at a different encoding
+ * to the "flavour 1" register and with the bits rearranged to
+ * be able to squash a 64-bit address into the 32-bit view.
+ * We distinguish the two via the ARM_FEATURE_AARCH64 flag, but
+ * in future if we support AArch32-only configs of some of the
+ * AArch64 cores we might need to add a specific feature flag
+ * to indicate cores with "flavour 2" CBAR.
+ */
+ if (arm_feature(env, ARM_FEATURE_V8)) {
/* 32 bit view is [31:18] 0...0 [43:32]. */
uint32_t cbar32 = (extract64(cpu->reset_cbar, 18, 14) << 18)
| extract64(cpu->reset_cbar, 32, 12);
ARMCPRegInfo cbar_reginfo[] = {
{ .name = "CBAR",
.type = ARM_CP_CONST,
- .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0,
- .access = PL1_R, .resetvalue = cpu->reset_cbar },
+ .cp = 15, .crn = 15, .crm = 3, .opc1 = 1, .opc2 = 0,
+ .access = PL1_R, .resetvalue = cbar32 },
{ .name = "CBAR_EL1", .state = ARM_CP_STATE_AA64,
.type = ARM_CP_CONST,
.opc0 = 3, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 0,
- .access = PL1_R, .resetvalue = cbar32 },
- REGINFO_SENTINEL
+ .access = PL1_R, .resetvalue = cpu->reset_cbar },
};
/* We don't implement a r/w 64 bit CBAR currently */
assert(arm_feature(env, ARM_FEATURE_CBAR_RO));
@@ -5571,7 +9718,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
ARMCPRegInfo cbar = {
.name = "CBAR",
.cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0,
- .access = PL1_R|PL3_W, .resetvalue = cpu->reset_cbar,
+ .access = PL1_R | PL3_W, .resetvalue = cpu->reset_cbar,
.fieldoffset = offsetof(CPUARMState,
cp15.c15_config_base_address)
};
@@ -5585,14 +9732,16 @@ void register_cp_regs_for_features(ARMCPU *cpu)
}
if (arm_feature(env, ARM_FEATURE_VBAR)) {
- ARMCPRegInfo vbar_cp_reginfo[] = {
+ static const ARMCPRegInfo vbar_cp_reginfo[] = {
{ .name = "VBAR", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
.access = PL1_RW, .writefn = vbar_write,
+ .accessfn = access_nv1,
+ .fgt = FGT_VBAR_EL1,
+ .nv2_redirect_offset = 0x250 | NV2_REDIR_NV1,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.vbar_s),
offsetof(CPUARMState, cp15.vbar_ns) },
.resetvalue = 0 },
- REGINFO_SENTINEL
};
define_arm_cp_regs(cpu, vbar_cp_reginfo);
}
@@ -5602,238 +9751,344 @@ void register_cp_regs_for_features(ARMCPU *cpu)
ARMCPRegInfo sctlr = {
.name = "SCTLR", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0,
- .access = PL1_RW,
+ .access = PL1_RW, .accessfn = access_tvm_trvm,
+ .fgt = FGT_SCTLR_EL1,
+ .nv2_redirect_offset = 0x110 | NV2_REDIR_NV1,
.bank_fieldoffsets = { offsetof(CPUARMState, cp15.sctlr_s),
offsetof(CPUARMState, cp15.sctlr_ns) },
.writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
.raw_writefn = raw_write,
};
if (arm_feature(env, ARM_FEATURE_XSCALE)) {
- /* Normally we would always end the TB on an SCTLR write, but Linux
+ /*
+ * Normally we would always end the TB on an SCTLR write, but Linux
* arch/arm/mach-pxa/sleep.S expects two instructions following
* an MMU enable to execute from cache. Imitate this behaviour.
*/
sctlr.type |= ARM_CP_SUPPRESS_TB_END;
}
define_one_arm_cp_reg(cpu, &sctlr);
- }
- if (arm_feature(env, ARM_FEATURE_SVE)) {
- define_one_arm_cp_reg(cpu, &zcr_el1_reginfo);
- if (arm_feature(env, ARM_FEATURE_EL2)) {
- define_one_arm_cp_reg(cpu, &zcr_el2_reginfo);
- } else {
- define_one_arm_cp_reg(cpu, &zcr_no_el2_reginfo);
- }
- if (arm_feature(env, ARM_FEATURE_EL3)) {
- define_one_arm_cp_reg(cpu, &zcr_el3_reginfo);
+ if (arm_feature(env, ARM_FEATURE_PMSA) &&
+ arm_feature(env, ARM_FEATURE_V8)) {
+ ARMCPRegInfo vsctlr = {
+ .name = "VSCTLR", .state = ARM_CP_STATE_AA32,
+ .cp = 15, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 0,
+ .access = PL2_RW, .resetvalue = 0x0,
+ .fieldoffset = offsetoflow32(CPUARMState, cp15.vsctlr),
+ };
+ define_one_arm_cp_reg(cpu, &vsctlr);
}
}
-}
-void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
-{
- CPUState *cs = CPU(cpu);
- CPUARMState *env = &cpu->env;
+ if (cpu_isar_feature(aa64_lor, cpu)) {
+ define_arm_cp_regs(cpu, lor_reginfo);
+ }
+ if (cpu_isar_feature(aa64_pan, cpu)) {
+ define_one_arm_cp_reg(cpu, &pan_reginfo);
+ }
+#ifndef CONFIG_USER_ONLY
+ if (cpu_isar_feature(aa64_ats1e1, cpu)) {
+ define_arm_cp_regs(cpu, ats1e1_reginfo);
+ }
+ if (cpu_isar_feature(aa32_ats1e1, cpu)) {
+ define_arm_cp_regs(cpu, ats1cp_reginfo);
+ }
+#endif
+ if (cpu_isar_feature(aa64_uao, cpu)) {
+ define_one_arm_cp_reg(cpu, &uao_reginfo);
+ }
- if (arm_feature(env, ARM_FEATURE_AARCH64)) {
- gdb_register_coprocessor(cs, aarch64_fpu_gdb_get_reg,
- aarch64_fpu_gdb_set_reg,
- 34, "aarch64-fpu.xml", 0);
- } else if (arm_feature(env, ARM_FEATURE_NEON)) {
- gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
- 51, "arm-neon.xml", 0);
- } else if (arm_feature(env, ARM_FEATURE_VFP3)) {
- gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
- 35, "arm-vfp3.xml", 0);
- } else if (arm_feature(env, ARM_FEATURE_VFP)) {
- gdb_register_coprocessor(cs, vfp_gdb_get_reg, vfp_gdb_set_reg,
- 19, "arm-vfp.xml", 0);
- }
- gdb_register_coprocessor(cs, arm_gdb_get_sysreg, arm_gdb_set_sysreg,
- arm_gen_dynamic_xml(cs),
- "system-registers.xml", 0);
-}
-
-/* Sort alphabetically by type name, except for "any". */
-static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b)
-{
- ObjectClass *class_a = (ObjectClass *)a;
- ObjectClass *class_b = (ObjectClass *)b;
- const char *name_a, *name_b;
-
- name_a = object_class_get_name(class_a);
- name_b = object_class_get_name(class_b);
- if (strcmp(name_a, "any-" TYPE_ARM_CPU) == 0) {
- return 1;
- } else if (strcmp(name_b, "any-" TYPE_ARM_CPU) == 0) {
- return -1;
- } else {
- return strcmp(name_a, name_b);
+ if (cpu_isar_feature(aa64_dit, cpu)) {
+ define_one_arm_cp_reg(cpu, &dit_reginfo);
+ }
+ if (cpu_isar_feature(aa64_ssbs, cpu)) {
+ define_one_arm_cp_reg(cpu, &ssbs_reginfo);
+ }
+ if (cpu_isar_feature(any_ras, cpu)) {
+ define_arm_cp_regs(cpu, minimal_ras_reginfo);
}
-}
-static void arm_cpu_list_entry(gpointer data, gpointer user_data)
-{
- ObjectClass *oc = data;
- CPUListState *s = user_data;
- const char *typename;
- char *name;
+ if (cpu_isar_feature(aa64_vh, cpu) ||
+ cpu_isar_feature(aa64_debugv8p2, cpu)) {
+ define_one_arm_cp_reg(cpu, &contextidr_el2);
+ }
+ if (arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu)) {
+ define_arm_cp_regs(cpu, vhe_reginfo);
+ }
- typename = object_class_get_name(oc);
- name = g_strndup(typename, strlen(typename) - strlen("-" TYPE_ARM_CPU));
- (*s->cpu_fprintf)(s->file, " %s\n",
- name);
- g_free(name);
-}
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ define_arm_cp_regs(cpu, zcr_reginfo);
+ }
-void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf)
-{
- CPUListState s = {
- .file = f,
- .cpu_fprintf = cpu_fprintf,
- };
- GSList *list;
+ if (cpu_isar_feature(aa64_hcx, cpu)) {
+ define_one_arm_cp_reg(cpu, &hcrx_el2_reginfo);
+ }
- list = object_class_get_list(TYPE_ARM_CPU, false);
- list = g_slist_sort(list, arm_cpu_list_compare);
- (*cpu_fprintf)(f, "Available CPUs:\n");
- g_slist_foreach(list, arm_cpu_list_entry, &s);
- g_slist_free(list);
-}
+#ifdef TARGET_AARCH64
+ if (cpu_isar_feature(aa64_sme, cpu)) {
+ define_arm_cp_regs(cpu, sme_reginfo);
+ }
+ if (cpu_isar_feature(aa64_pauth, cpu)) {
+ define_arm_cp_regs(cpu, pauth_reginfo);
+ }
+ if (cpu_isar_feature(aa64_rndr, cpu)) {
+ define_arm_cp_regs(cpu, rndr_reginfo);
+ }
+ if (cpu_isar_feature(aa64_tlbirange, cpu)) {
+ define_arm_cp_regs(cpu, tlbirange_reginfo);
+ }
+ if (cpu_isar_feature(aa64_tlbios, cpu)) {
+ define_arm_cp_regs(cpu, tlbios_reginfo);
+ }
+ /* Data Cache clean instructions up to PoP */
+ if (cpu_isar_feature(aa64_dcpop, cpu)) {
+ define_one_arm_cp_reg(cpu, dcpop_reg);
-static void arm_cpu_add_definition(gpointer data, gpointer user_data)
-{
- ObjectClass *oc = data;
- CpuDefinitionInfoList **cpu_list = user_data;
- CpuDefinitionInfoList *entry;
- CpuDefinitionInfo *info;
- const char *typename;
+ if (cpu_isar_feature(aa64_dcpodp, cpu)) {
+ define_one_arm_cp_reg(cpu, dcpodp_reg);
+ }
+ }
- typename = object_class_get_name(oc);
- info = g_malloc0(sizeof(*info));
- info->name = g_strndup(typename,
- strlen(typename) - strlen("-" TYPE_ARM_CPU));
- info->q_typename = g_strdup(typename);
+ /*
+ * If full MTE is enabled, add all of the system registers.
+ * If only "instructions available at EL0" are enabled,
+ * then define only a RAZ/WI version of PSTATE.TCO.
+ */
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ ARMCPRegInfo gmid_reginfo = {
+ .name = "GMID_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4,
+ .access = PL1_R, .accessfn = access_aa64_tid5,
+ .type = ARM_CP_CONST, .resetvalue = cpu->gm_blocksize,
+ };
+ define_one_arm_cp_reg(cpu, &gmid_reginfo);
+ define_arm_cp_regs(cpu, mte_reginfo);
+ define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo);
+ } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) {
+ define_arm_cp_regs(cpu, mte_tco_ro_reginfo);
+ define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo);
+ }
- entry = g_malloc0(sizeof(*entry));
- entry->value = info;
- entry->next = *cpu_list;
- *cpu_list = entry;
-}
+ if (cpu_isar_feature(aa64_scxtnum, cpu)) {
+ define_arm_cp_regs(cpu, scxtnum_reginfo);
+ }
-CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
-{
- CpuDefinitionInfoList *cpu_list = NULL;
- GSList *list;
+ if (cpu_isar_feature(aa64_fgt, cpu)) {
+ define_arm_cp_regs(cpu, fgt_reginfo);
+ }
- list = object_class_get_list(TYPE_ARM_CPU, false);
- g_slist_foreach(list, arm_cpu_add_definition, &cpu_list);
- g_slist_free(list);
+ if (cpu_isar_feature(aa64_rme, cpu)) {
+ define_arm_cp_regs(cpu, rme_reginfo);
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ define_arm_cp_regs(cpu, rme_mte_reginfo);
+ }
+ }
+
+ if (cpu_isar_feature(aa64_nv2, cpu)) {
+ define_arm_cp_regs(cpu, nv2_reginfo);
+ }
+#endif
+
+ if (cpu_isar_feature(any_predinv, cpu)) {
+ define_arm_cp_regs(cpu, predinv_reginfo);
+ }
+
+ if (cpu_isar_feature(any_ccidx, cpu)) {
+ define_arm_cp_regs(cpu, ccsidr2_reginfo);
+ }
- return cpu_list;
+#ifndef CONFIG_USER_ONLY
+ /*
+ * Register redirections and aliases must be done last,
+ * after the registers from the other extensions have been defined.
+ */
+ if (arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu)) {
+ define_arm_vh_e2h_redirects_aliases(cpu);
+ }
+#endif
}
+/*
+ * Private utility function for define_one_arm_cp_reg_with_opaque():
+ * add a single reginfo struct to the hash table.
+ */
static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
- void *opaque, int state, int secstate,
+ void *opaque, CPState state,
+ CPSecureState secstate,
int crm, int opc1, int opc2,
const char *name)
{
- /* Private utility function for define_one_arm_cp_reg_with_opaque():
- * add a single reginfo struct to the hash table.
+ CPUARMState *env = &cpu->env;
+ uint32_t key;
+ ARMCPRegInfo *r2;
+ bool is64 = r->type & ARM_CP_64BIT;
+ bool ns = secstate & ARM_CP_SECSTATE_NS;
+ int cp = r->cp;
+ size_t name_len;
+ bool make_const;
+
+ switch (state) {
+ case ARM_CP_STATE_AA32:
+ /* We assume it is a cp15 register if the .cp field is left unset. */
+ if (cp == 0 && r->state == ARM_CP_STATE_BOTH) {
+ cp = 15;
+ }
+ key = ENCODE_CP_REG(cp, is64, ns, r->crn, crm, opc1, opc2);
+ break;
+ case ARM_CP_STATE_AA64:
+ /*
+ * To allow abbreviation of ARMCPRegInfo definitions, we treat
+ * cp == 0 as equivalent to the value for "standard guest-visible
+ * sysreg". STATE_BOTH definitions are also always "standard sysreg"
+ * in their AArch64 view (the .cp value may be non-zero for the
+ * benefit of the AArch32 view).
+ */
+ if (cp == 0 || r->state == ARM_CP_STATE_BOTH) {
+ cp = CP_REG_ARM64_SYSREG_CP;
+ }
+ key = ENCODE_AA64_CP_REG(cp, r->crn, crm, r->opc0, opc1, opc2);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* Overriding of an existing definition must be explicitly requested. */
+ if (!(r->type & ARM_CP_OVERRIDE)) {
+ const ARMCPRegInfo *oldreg = get_arm_cp_reginfo(cpu->cp_regs, key);
+ if (oldreg) {
+ assert(oldreg->type & ARM_CP_OVERRIDE);
+ }
+ }
+
+ /*
+ * Eliminate registers that are not present because the EL is missing.
+ * Doing this here makes it easier to put all registers for a given
+ * feature into the same ARMCPRegInfo array and define them all at once.
*/
- uint32_t *key = g_new(uint32_t, 1);
- ARMCPRegInfo *r2 = g_memdup(r, sizeof(ARMCPRegInfo));
- int is64 = (r->type & ARM_CP_64BIT) ? 1 : 0;
- int ns = (secstate & ARM_CP_SECSTATE_NS) ? 1 : 0;
+ make_const = false;
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
+ /*
+ * An EL2 register without EL2 but with EL3 is (usually) RES0.
+ * See rule RJFFP in section D1.1.3 of DDI0487H.a.
+ */
+ int min_el = ctz32(r->access) / 2;
+ if (min_el == 2 && !arm_feature(env, ARM_FEATURE_EL2)) {
+ if (r->type & ARM_CP_EL3_NO_EL2_UNDEF) {
+ return;
+ }
+ make_const = !(r->type & ARM_CP_EL3_NO_EL2_KEEP);
+ }
+ } else {
+ CPAccessRights max_el = (arm_feature(env, ARM_FEATURE_EL2)
+ ? PL2_RW : PL1_RW);
+ if ((r->access & max_el) == 0) {
+ return;
+ }
+ }
- r2->name = g_strdup(name);
- /* Reset the secure state to the specific incoming state. This is
- * necessary as the register may have been defined with both states.
+ /* Combine cpreg and name into one allocation. */
+ name_len = strlen(name) + 1;
+ r2 = g_malloc(sizeof(*r2) + name_len);
+ *r2 = *r;
+ r2->name = memcpy(r2 + 1, name, name_len);
+
+ /*
+ * Update fields to match the instantiation, overwiting wildcards
+ * such as CP_ANY, ARM_CP_STATE_BOTH, or ARM_CP_SECSTATE_BOTH.
*/
+ r2->cp = cp;
+ r2->crm = crm;
+ r2->opc1 = opc1;
+ r2->opc2 = opc2;
+ r2->state = state;
r2->secure = secstate;
-
- if (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]) {
- /* Register is banked (using both entries in array).
- * Overwriting fieldoffset as the array is only used to define
- * banked registers but later only fieldoffset is used.
- */
- r2->fieldoffset = r->bank_fieldoffsets[ns];
+ if (opaque) {
+ r2->opaque = opaque;
}
- if (state == ARM_CP_STATE_AA32) {
- if (r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1]) {
- /* If the register is banked then we don't need to migrate or
- * reset the 32-bit instance in certain cases:
- *
- * 1) If the register has both 32-bit and 64-bit instances then we
- * can count on the 64-bit instance taking care of the
- * non-secure bank.
- * 2) If ARMv8 is enabled then we can count on a 64-bit version
- * taking care of the secure bank. This requires that separate
- * 32 and 64-bit definitions are provided.
- */
- if ((r->state == ARM_CP_STATE_BOTH && ns) ||
- (arm_feature(&cpu->env, ARM_FEATURE_V8) && !ns)) {
- r2->type |= ARM_CP_ALIAS;
- }
- } else if ((secstate != r->secure) && !ns) {
- /* The register is not banked so we only want to allow migration of
- * the non-secure instance.
- */
- r2->type |= ARM_CP_ALIAS;
+ if (make_const) {
+ /* This should not have been a very special register to begin. */
+ int old_special = r2->type & ARM_CP_SPECIAL_MASK;
+ assert(old_special == 0 || old_special == ARM_CP_NOP);
+ /*
+ * Set the special function to CONST, retaining the other flags.
+ * This is important for e.g. ARM_CP_SVE so that we still
+ * take the SVE trap if CPTR_EL3.EZ == 0.
+ */
+ r2->type = (r2->type & ~ARM_CP_SPECIAL_MASK) | ARM_CP_CONST;
+ /*
+ * Usually, these registers become RES0, but there are a few
+ * special cases like VPIDR_EL2 which have a constant non-zero
+ * value with writes ignored.
+ */
+ if (!(r->type & ARM_CP_EL3_NO_EL2_C_NZ)) {
+ r2->resetvalue = 0;
}
+ /*
+ * ARM_CP_CONST has precedence, so removing the callbacks and
+ * offsets are not strictly necessary, but it is potentially
+ * less confusing to debug later.
+ */
+ r2->readfn = NULL;
+ r2->writefn = NULL;
+ r2->raw_readfn = NULL;
+ r2->raw_writefn = NULL;
+ r2->resetfn = NULL;
+ r2->fieldoffset = 0;
+ r2->bank_fieldoffsets[0] = 0;
+ r2->bank_fieldoffsets[1] = 0;
+ } else {
+ bool isbanked = r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1];
- if (r->state == ARM_CP_STATE_BOTH) {
- /* We assume it is a cp15 register if the .cp field is left unset.
+ if (isbanked) {
+ /*
+ * Register is banked (using both entries in array).
+ * Overwriting fieldoffset as the array is only used to define
+ * banked registers but later only fieldoffset is used.
*/
- if (r2->cp == 0) {
- r2->cp = 15;
+ r2->fieldoffset = r->bank_fieldoffsets[ns];
+ }
+ if (state == ARM_CP_STATE_AA32) {
+ if (isbanked) {
+ /*
+ * If the register is banked then we don't need to migrate or
+ * reset the 32-bit instance in certain cases:
+ *
+ * 1) If the register has both 32-bit and 64-bit instances
+ * then we can count on the 64-bit instance taking care
+ * of the non-secure bank.
+ * 2) If ARMv8 is enabled then we can count on a 64-bit
+ * version taking care of the secure bank. This requires
+ * that separate 32 and 64-bit definitions are provided.
+ */
+ if ((r->state == ARM_CP_STATE_BOTH && ns) ||
+ (arm_feature(env, ARM_FEATURE_V8) && !ns)) {
+ r2->type |= ARM_CP_ALIAS;
+ }
+ } else if ((secstate != r->secure) && !ns) {
+ /*
+ * The register is not banked so we only want to allow
+ * migration of the non-secure instance.
+ */
+ r2->type |= ARM_CP_ALIAS;
}
-#ifdef HOST_WORDS_BIGENDIAN
- if (r2->fieldoffset) {
+ if (HOST_BIG_ENDIAN &&
+ r->state == ARM_CP_STATE_BOTH && r2->fieldoffset) {
r2->fieldoffset += sizeof(uint32_t);
}
-#endif
}
}
- if (state == ARM_CP_STATE_AA64) {
- /* To allow abbreviation of ARMCPRegInfo
- * definitions, we treat cp == 0 as equivalent to
- * the value for "standard guest-visible sysreg".
- * STATE_BOTH definitions are also always "standard
- * sysreg" in their AArch64 view (the .cp value may
- * be non-zero for the benefit of the AArch32 view).
- */
- if (r->cp == 0 || r->state == ARM_CP_STATE_BOTH) {
- r2->cp = CP_REG_ARM64_SYSREG_CP;
- }
- *key = ENCODE_AA64_CP_REG(r2->cp, r2->crn, crm,
- r2->opc0, opc1, opc2);
- } else {
- *key = ENCODE_CP_REG(r2->cp, is64, ns, r2->crn, crm, opc1, opc2);
- }
- if (opaque) {
- r2->opaque = opaque;
- }
- /* reginfo passed to helpers is correct for the actual access,
- * and is never ARM_CP_STATE_BOTH:
- */
- r2->state = state;
- /* Make sure reginfo passed to helpers for wildcarded regs
- * has the correct crm/opc1/opc2 for this reg, not CP_ANY:
- */
- r2->crm = crm;
- r2->opc1 = opc1;
- r2->opc2 = opc2;
- /* By convention, for wildcarded registers only the first
+
+ /*
+ * By convention, for wildcarded registers only the first
* entry is used for migration; the others are marked as
* ALIAS so we don't try to transfer the register
* multiple times. Special registers (ie NOP/WFI) are
* never migratable and not even raw-accessible.
*/
- if ((r->type & ARM_CP_SPECIAL)) {
+ if (r2->type & ARM_CP_SPECIAL_MASK) {
r2->type |= ARM_CP_NO_RAW;
}
if (((r->crm == CP_ANY) && crm != 0) ||
@@ -5842,7 +10097,8 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
r2->type |= ARM_CP_ALIAS | ARM_CP_NO_GDB;
}
- /* Check that raw accesses are either forbidden or handled. Note that
+ /*
+ * Check that raw accesses are either forbidden or handled. Note that
* we can't assert this earlier because the setup of fieldoffset for
* banked registers has to be done first.
*/
@@ -5850,29 +10106,15 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
assert(!raw_accessors_invalid(r2));
}
- /* Overriding of an existing definition must be explicitly
- * requested.
- */
- if (!(r->type & ARM_CP_OVERRIDE)) {
- ARMCPRegInfo *oldreg;
- oldreg = g_hash_table_lookup(cpu->cp_regs, key);
- if (oldreg && !(oldreg->type & ARM_CP_OVERRIDE)) {
- fprintf(stderr, "Register redefined: cp=%d %d bit "
- "crn=%d crm=%d opc1=%d opc2=%d, "
- "was %s, now %s\n", r2->cp, 32 + 32 * is64,
- r2->crn, r2->crm, r2->opc1, r2->opc2,
- oldreg->name, r2->name);
- g_assert_not_reached();
- }
- }
- g_hash_table_insert(cpu->cp_regs, key, r2);
+ g_hash_table_insert(cpu->cp_regs, (gpointer)(uintptr_t)key, r2);
}
void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
const ARMCPRegInfo *r, void *opaque)
{
- /* Define implementations of coprocessor registers.
+ /*
+ * Define implementations of coprocessor registers.
* We store these in a hashtable because typically
* there are less than 150 registers in a space which
* is 16*16*16*8*8 = 262144 in size.
@@ -5895,29 +10137,65 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
* bits; the ARM_CP_64BIT* flag applies only to the AArch32 view of
* the register, if any.
*/
- int crm, opc1, opc2, state;
+ int crm, opc1, opc2;
int crmmin = (r->crm == CP_ANY) ? 0 : r->crm;
int crmmax = (r->crm == CP_ANY) ? 15 : r->crm;
int opc1min = (r->opc1 == CP_ANY) ? 0 : r->opc1;
int opc1max = (r->opc1 == CP_ANY) ? 7 : r->opc1;
int opc2min = (r->opc2 == CP_ANY) ? 0 : r->opc2;
int opc2max = (r->opc2 == CP_ANY) ? 7 : r->opc2;
+ CPState state;
+
/* 64 bit registers have only CRm and Opc1 fields */
assert(!((r->type & ARM_CP_64BIT) && (r->opc2 || r->crn)));
/* op0 only exists in the AArch64 encodings */
assert((r->state != ARM_CP_STATE_AA32) || (r->opc0 == 0));
/* AArch64 regs are all 64 bit so ARM_CP_64BIT is meaningless */
assert((r->state != ARM_CP_STATE_AA64) || !(r->type & ARM_CP_64BIT));
- /* The AArch64 pseudocode CheckSystemAccess() specifies that op1
+ /*
+ * This API is only for Arm's system coprocessors (14 and 15) or
+ * (M-profile or v7A-and-earlier only) for implementation defined
+ * coprocessors in the range 0..7. Our decode assumes this, since
+ * 8..13 can be used for other insns including VFP and Neon. See
+ * valid_cp() in translate.c. Assert here that we haven't tried
+ * to use an invalid coprocessor number.
+ */
+ switch (r->state) {
+ case ARM_CP_STATE_BOTH:
+ /* 0 has a special meaning, but otherwise the same rules as AA32. */
+ if (r->cp == 0) {
+ break;
+ }
+ /* fall through */
+ case ARM_CP_STATE_AA32:
+ if (arm_feature(&cpu->env, ARM_FEATURE_V8) &&
+ !arm_feature(&cpu->env, ARM_FEATURE_M)) {
+ assert(r->cp >= 14 && r->cp <= 15);
+ } else {
+ assert(r->cp < 8 || (r->cp >= 14 && r->cp <= 15));
+ }
+ break;
+ case ARM_CP_STATE_AA64:
+ assert(r->cp == 0 || r->cp == CP_REG_ARM64_SYSREG_CP);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ /*
+ * The AArch64 pseudocode CheckSystemAccess() specifies that op1
* encodes a minimum access level for the register. We roll this
* runtime check into our general permission check code, so check
* here that the reginfo's specified permissions are strict enough
* to encompass the generic architectural permission check.
*/
if (r->state != ARM_CP_STATE_AA32) {
- int mask = 0;
+ CPAccessRights mask;
switch (r->opc1) {
- case 0: case 1: case 2:
+ case 0:
+ /* min_EL EL1, but some accessible to EL0 via kernel ABI */
+ mask = PL0U_R | PL1_RW;
+ break;
+ case 1: case 2:
/* min_EL EL1 */
mask = PL1_RW;
break;
@@ -5926,13 +10204,10 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
mask = PL0_RW;
break;
case 4:
+ case 5:
/* min_EL EL2 */
mask = PL2_RW;
break;
- case 5:
- /* unallocated encoding, so not possible */
- assert(false);
- break;
case 6:
/* min_EL EL3 */
mask = PL3_RW;
@@ -5943,17 +10218,17 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
break;
default:
/* broken reginfo with out-of-range opc1 */
- assert(false);
- break;
+ g_assert_not_reached();
}
/* assert our permissions are not too lax (stricter is fine) */
assert((r->access & ~mask) == 0);
}
- /* Check that the register definition has enough info to handle
+ /*
+ * Check that the register definition has enough info to handle
* reads and writes if they are permitted.
*/
- if (!(r->type & (ARM_CP_SPECIAL|ARM_CP_CONST))) {
+ if (!(r->type & (ARM_CP_SPECIAL_MASK | ARM_CP_CONST))) {
if (r->access & PL3_R) {
assert((r->fieldoffset ||
(r->bank_fieldoffsets[0] && r->bank_fieldoffsets[1])) ||
@@ -5965,8 +10240,7 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
r->writefn);
}
}
- /* Bad type field probably means missing sentinel at end of reg list */
- assert(cptype_valid(r->type));
+
for (crm = crmmin; crm <= crmmax; crm++) {
for (opc1 = opc1min; opc1 <= opc1max; opc1++) {
for (opc2 = opc2min; opc2 <= opc2max; opc2++) {
@@ -5976,7 +10250,8 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
continue;
}
if (state == ARM_CP_STATE_AA32) {
- /* Under AArch32 CP registers can be common
+ /*
+ * Under AArch32 CP registers can be common
* (same for secure and non-secure world) or banked.
*/
char *name;
@@ -5988,7 +10263,7 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
r->secure, crm, opc1, opc2,
r->name);
break;
- default:
+ case ARM_CP_SECSTATE_BOTH:
name = g_strdup_printf("%s_S", r->name);
add_cpreg_to_hashtable(cpu, r, opaque, state,
ARM_CP_SECSTATE_S,
@@ -5998,10 +10273,14 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
ARM_CP_SECSTATE_NS,
crm, opc1, opc2, r->name);
break;
+ default:
+ g_assert_not_reached();
}
} else {
- /* AArch64 registers get mapped to non-secure instance
- * of AArch32 */
+ /*
+ * AArch64 registers get mapped to non-secure instance
+ * of AArch32
+ */
add_cpreg_to_hashtable(cpu, r, opaque, state,
ARM_CP_SECSTATE_NS,
crm, opc1, opc2, r->name);
@@ -6012,19 +10291,60 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
}
}
-void define_arm_cp_regs_with_opaque(ARMCPU *cpu,
- const ARMCPRegInfo *regs, void *opaque)
+/* Define a whole list of registers */
+void define_arm_cp_regs_with_opaque_len(ARMCPU *cpu, const ARMCPRegInfo *regs,
+ void *opaque, size_t len)
{
- /* Define a whole list of registers */
- const ARMCPRegInfo *r;
- for (r = regs; r->type != ARM_CP_SENTINEL; r++) {
- define_one_arm_cp_reg_with_opaque(cpu, r, opaque);
+ size_t i;
+ for (i = 0; i < len; ++i) {
+ define_one_arm_cp_reg_with_opaque(cpu, regs + i, opaque);
+ }
+}
+
+/*
+ * Modify ARMCPRegInfo for access from userspace.
+ *
+ * This is a data driven modification directed by
+ * ARMCPRegUserSpaceInfo. All registers become ARM_CP_CONST as
+ * user-space cannot alter any values and dynamic values pertaining to
+ * execution state are hidden from user space view anyway.
+ */
+void modify_arm_cp_regs_with_len(ARMCPRegInfo *regs, size_t regs_len,
+ const ARMCPRegUserSpaceInfo *mods,
+ size_t mods_len)
+{
+ for (size_t mi = 0; mi < mods_len; ++mi) {
+ const ARMCPRegUserSpaceInfo *m = mods + mi;
+ GPatternSpec *pat = NULL;
+
+ if (m->is_glob) {
+ pat = g_pattern_spec_new(m->name);
+ }
+ for (size_t ri = 0; ri < regs_len; ++ri) {
+ ARMCPRegInfo *r = regs + ri;
+
+ if (pat && g_pattern_match_string(pat, r->name)) {
+ r->type = ARM_CP_CONST;
+ r->access = PL0U_R;
+ r->resetvalue = 0;
+ /* continue */
+ } else if (strcmp(r->name, m->name) == 0) {
+ r->type = ARM_CP_CONST;
+ r->access = PL0U_R;
+ r->resetvalue &= m->exported_bits;
+ r->resetvalue |= m->fixed_bits;
+ break;
+ }
+ }
+ if (pat) {
+ g_pattern_spec_free(pat);
+ }
}
}
const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp)
{
- return g_hash_table_lookup(cpregs, &encoded_cp);
+ return g_hash_table_lookup(cpregs, (gpointer)(uintptr_t)encoded_cp);
}
void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -6046,7 +10366,8 @@ void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque)
static int bad_mode_switch(CPUARMState *env, int mode, CPSRWriteType write_type)
{
- /* Return true if it is not valid for us to switch to
+ /*
+ * Return true if it is not valid for us to switch to
* this CPU mode (ie all the UNPREDICTABLE cases in
* the ARM ARM CPSRWriteByInstr pseudocode).
*/
@@ -6067,22 +10388,22 @@ static int bad_mode_switch(CPUARMState *env, int mode, CPSRWriteType write_type)
case ARM_CPU_MODE_UND:
case ARM_CPU_MODE_IRQ:
case ARM_CPU_MODE_FIQ:
- /* Note that we don't implement the IMPDEF NSACR.RFR which in v7
+ /*
+ * Note that we don't implement the IMPDEF NSACR.RFR which in v7
* allows FIQ mode to be Secure-only. (In v8 this doesn't exist.)
*/
- /* If HCR.TGE is set then changes from Monitor to NS PL1 via MSR
+ /*
+ * If HCR.TGE is set then changes from Monitor to NS PL1 via MSR
* and CPS are treated as illegal mode changes.
*/
if (write_type == CPSRWriteByInstr &&
- (env->cp15.hcr_el2 & HCR_TGE) &&
(env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON &&
- !arm_is_secure_below_el3(env)) {
+ (arm_hcr_el2_eff(env) & HCR_TGE)) {
return 1;
}
return 0;
case ARM_CPU_MODE_HYP:
- return !arm_feature(env, ARM_FEATURE_EL2)
- || arm_current_el(env) < 2 || arm_is_secure(env);
+ return !arm_is_el2_enabled(env) || arm_current_el(env) < 2;
case ARM_CPU_MODE_MON:
return arm_current_el(env) < 3;
default:
@@ -6105,6 +10426,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
CPSRWriteType write_type)
{
uint32_t changed_daif;
+ bool rebuild_hflags = (write_type != CPSRWriteRaw) &&
+ (mask & (CPSR_M | CPSR_E | CPSR_IL));
if (mask & CPSR_NZCV) {
env->ZF = (~val) & CPSR_Z;
@@ -6112,10 +10435,12 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
env->CF = (val >> 29) & 1;
env->VF = (val << 3) & 0x80000000;
}
- if (mask & CPSR_Q)
+ if (mask & CPSR_Q) {
env->QF = ((val & CPSR_Q) != 0);
- if (mask & CPSR_T)
+ }
+ if (mask & CPSR_T) {
env->thumb = ((val & CPSR_T) != 0);
+ }
if (mask & CPSR_IT_0_1) {
env->condexec_bits &= ~3;
env->condexec_bits |= (val >> 25) & 3;
@@ -6128,7 +10453,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
env->GE = (val >> 16) & 0xf;
}
- /* In a V7 implementation that includes the security extensions but does
+ /*
+ * In a V7 implementation that includes the security extensions but does
* not include Virtualization Extensions the SCR.FW and SCR.AW bits control
* whether non-secure software is allowed to change the CPSR_F and CPSR_A
* bits respectively.
@@ -6144,7 +10470,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
changed_daif = (env->daif ^ val) & mask;
if (changed_daif & CPSR_A) {
- /* Check to see if we are allowed to change the masking of async
+ /*
+ * Check to see if we are allowed to change the masking of async
* abort exceptions from a non-secure state.
*/
if (!(env->cp15.scr_el3 & SCR_AW)) {
@@ -6156,7 +10483,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
}
if (changed_daif & CPSR_F) {
- /* Check to see if we are allowed to change the masking of FIQ
+ /*
+ * Check to see if we are allowed to change the masking of FIQ
* exceptions from a non-secure state.
*/
if (!(env->cp15.scr_el3 & SCR_FW)) {
@@ -6166,7 +10494,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
mask &= ~CPSR_F;
}
- /* Check whether non-maskable FIQ (NMFI) support is enabled.
+ /*
+ * Check whether non-maskable FIQ (NMFI) support is enabled.
* If this bit is set software is not allowed to mask
* FIQs, but is allowed to set CPSR_F to 0.
*/
@@ -6186,7 +10515,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
if (write_type != CPSRWriteRaw &&
((env->uncached_cpsr ^ val) & mask & CPSR_M)) {
if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR) {
- /* Note that we can only get here in USR mode if this is a
+ /*
+ * Note that we can only get here in USR mode if this is a
* gdb stub write; for this case we follow the architectural
* behaviour for guest writes in USR mode of ignoring an attempt
* to switch mode. (Those are caught by translate.c for writes
@@ -6194,7 +10524,8 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
*/
mask &= ~CPSR_M;
} else if (bad_mode_switch(env, val & CPSR_M, write_type)) {
- /* Attempt to switch to an invalid mode: this is UNPREDICTABLE in
+ /*
+ * Attempt to switch to an invalid mode: this is UNPREDICTABLE in
* v7, and has defined behaviour in v8:
* + leave CPSR.M untouched
* + allow changes to the other CPSR fields
@@ -6208,107 +10539,32 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
mask |= CPSR_IL;
val |= CPSR_IL;
}
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Illegal AArch32 mode switch attempt from %s to %s\n",
+ aarch32_mode_name(env->uncached_cpsr),
+ aarch32_mode_name(val));
} else {
+ qemu_log_mask(CPU_LOG_INT, "%s %s to %s PC 0x%" PRIx32 "\n",
+ write_type == CPSRWriteExceptionReturn ?
+ "Exception return from AArch32" :
+ "AArch32 mode switch from",
+ aarch32_mode_name(env->uncached_cpsr),
+ aarch32_mode_name(val), env->regs[15]);
switch_mode(env, val & CPSR_M);
}
}
mask &= ~CACHED_CPSR_BITS;
env->uncached_cpsr = (env->uncached_cpsr & ~mask) | (val & mask);
+ if (tcg_enabled() && rebuild_hflags) {
+ arm_rebuild_hflags(env);
+ }
}
-/* Sign/zero extend */
-uint32_t HELPER(sxtb16)(uint32_t x)
-{
- uint32_t res;
- res = (uint16_t)(int8_t)x;
- res |= (uint32_t)(int8_t)(x >> 16) << 16;
- return res;
-}
-
-uint32_t HELPER(uxtb16)(uint32_t x)
-{
- uint32_t res;
- res = (uint16_t)(uint8_t)x;
- res |= (uint32_t)(uint8_t)(x >> 16) << 16;
- return res;
-}
-
-int32_t HELPER(sdiv)(int32_t num, int32_t den)
-{
- if (den == 0)
- return 0;
- if (num == INT_MIN && den == -1)
- return INT_MIN;
- return num / den;
-}
-
-uint32_t HELPER(udiv)(uint32_t num, uint32_t den)
-{
- if (den == 0)
- return 0;
- return num / den;
-}
-
-uint32_t HELPER(rbit)(uint32_t x)
-{
- return revbit32(x);
-}
-
-#if defined(CONFIG_USER_ONLY)
-
-/* These should probably raise undefined insn exceptions. */
-void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- cpu_abort(CPU(cpu), "v7m_msr %d\n", reg);
-}
-
-uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- cpu_abort(CPU(cpu), "v7m_mrs %d\n", reg);
- return 0;
-}
-
-void HELPER(v7m_bxns)(CPUARMState *env, uint32_t dest)
-{
- /* translate.c should never generate calls here in user-only mode */
- g_assert_not_reached();
-}
-
-void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
-{
- /* translate.c should never generate calls here in user-only mode */
- g_assert_not_reached();
-}
-
-uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
-{
- /* The TT instructions can be used by unprivileged code, but in
- * user-only emulation we don't have the MPU.
- * Luckily since we know we are NonSecure unprivileged (and that in
- * turn means that the A flag wasn't specified), all the bits in the
- * register must be zero:
- * IREGION: 0 because IRVALID is 0
- * IRVALID: 0 because NS
- * S: 0 because NS
- * NSRW: 0 because NS
- * NSR: 0 because NS
- * RW: 0 because unpriv and A flag not set
- * R: 0 because unpriv and A flag not set
- * SRVALID: 0 because NS
- * MRVALID: 0 because unpriv and A flag not set
- * SREGION: 0 becaus SRVALID is 0
- * MREGION: 0 because MRVALID is 0
- */
- return 0;
-}
+#ifdef CONFIG_USER_ONLY
-void switch_mode(CPUARMState *env, int mode)
+static void switch_mode(CPUARMState *env, int mode)
{
- ARMCPU *cpu = arm_env_get_cpu(env);
+ ARMCPU *cpu = env_archcpu(env);
if (mode != ARM_CPU_MODE_USR) {
cpu_abort(CPU(cpu), "Tried to switch out of user mode\n");
@@ -6328,35 +10584,38 @@ void aarch64_sync_64_to_32(CPUARMState *env)
#else
-void switch_mode(CPUARMState *env, int mode)
+static void switch_mode(CPUARMState *env, int mode)
{
int old_mode;
int i;
old_mode = env->uncached_cpsr & CPSR_M;
- if (mode == old_mode)
+ if (mode == old_mode) {
return;
+ }
if (old_mode == ARM_CPU_MODE_FIQ) {
- memcpy (env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t));
- memcpy (env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t));
+ memcpy(env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t));
+ memcpy(env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t));
} else if (mode == ARM_CPU_MODE_FIQ) {
- memcpy (env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t));
- memcpy (env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t));
+ memcpy(env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t));
+ memcpy(env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t));
}
i = bank_number(old_mode);
env->banked_r13[i] = env->regs[13];
- env->banked_r14[i] = env->regs[14];
env->banked_spsr[i] = env->spsr;
i = bank_number(mode);
env->regs[13] = env->banked_r13[i];
- env->regs[14] = env->banked_r14[i];
env->spsr = env->banked_spsr[i];
+
+ env->banked_r14[r14_bank_number(old_mode)] = env->regs[14];
+ env->regs[14] = env->banked_r14[r14_bank_number(mode)];
}
-/* Physical Interrupt Target EL Lookup Table
+/*
+ * Physical Interrupt Target EL Lookup Table
*
* [ From ARM ARM section G1.13.4 (Table G1-15) ]
*
@@ -6403,13 +10662,13 @@ static const int8_t target_el_table[2][2][2][2][2][4] = {
{{/* 0 1 1 0 */{ 3, 3, 3, -1 },{ 3, -1, -1, 3 },},
{/* 0 1 1 1 */{ 3, 3, 3, -1 },{ 3, -1, -1, 3 },},},},},
{{{{/* 1 0 0 0 */{ 1, 1, 2, -1 },{ 1, 1, -1, 1 },},
- {/* 1 0 0 1 */{ 2, 2, 2, -1 },{ 1, 1, -1, 1 },},},
- {{/* 1 0 1 0 */{ 1, 1, 1, -1 },{ 1, 1, -1, 1 },},
- {/* 1 0 1 1 */{ 2, 2, 2, -1 },{ 1, 1, -1, 1 },},},},
+ {/* 1 0 0 1 */{ 2, 2, 2, -1 },{ 2, 2, -1, 1 },},},
+ {{/* 1 0 1 0 */{ 1, 1, 1, -1 },{ 1, 1, 1, 1 },},
+ {/* 1 0 1 1 */{ 2, 2, 2, -1 },{ 2, 2, 2, 1 },},},},
{{{/* 1 1 0 0 */{ 3, 3, 3, -1 },{ 3, 3, -1, 3 },},
{/* 1 1 0 1 */{ 3, 3, 3, -1 },{ 3, 3, -1, 3 },},},
- {{/* 1 1 1 0 */{ 3, 3, 3, -1 },{ 3, 3, -1, 3 },},
- {/* 1 1 1 1 */{ 3, 3, 3, -1 },{ 3, 3, -1, 3 },},},},},
+ {{/* 1 1 1 0 */{ 3, 3, 3, -1 },{ 3, 3, 3, 3 },},
+ {/* 1 1 1 1 */{ 3, 3, 3, -1 },{ 3, 3, 3, 3 },},},},},
};
/*
@@ -6418,41 +10677,47 @@ static const int8_t target_el_table[2][2][2][2][2][4] = {
uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
uint32_t cur_el, bool secure)
{
- CPUARMState *env = cs->env_ptr;
- int rw;
- int scr;
- int hcr;
+ CPUARMState *env = cpu_env(cs);
+ bool rw;
+ bool scr;
+ bool hcr;
int target_el;
/* Is the highest EL AArch64? */
- int is64 = arm_feature(env, ARM_FEATURE_AARCH64);
+ bool is64 = arm_feature(env, ARM_FEATURE_AARCH64);
+ uint64_t hcr_el2;
if (arm_feature(env, ARM_FEATURE_EL3)) {
rw = ((env->cp15.scr_el3 & SCR_RW) == SCR_RW);
} else {
- /* Either EL2 is the highest EL (and so the EL2 register width
+ /*
+ * Either EL2 is the highest EL (and so the EL2 register width
* is given by is64); or there is no EL2 or EL3, in which case
* the value of 'rw' does not affect the table lookup anyway.
*/
rw = is64;
}
+ hcr_el2 = arm_hcr_el2_eff(env);
switch (excp_idx) {
case EXCP_IRQ:
scr = ((env->cp15.scr_el3 & SCR_IRQ) == SCR_IRQ);
- hcr = arm_hcr_el2_imo(env);
+ hcr = hcr_el2 & HCR_IMO;
break;
case EXCP_FIQ:
scr = ((env->cp15.scr_el3 & SCR_FIQ) == SCR_FIQ);
- hcr = arm_hcr_el2_fmo(env);
+ hcr = hcr_el2 & HCR_FMO;
break;
default:
scr = ((env->cp15.scr_el3 & SCR_EA) == SCR_EA);
- hcr = arm_hcr_el2_amo(env);
+ hcr = hcr_el2 & HCR_AMO;
break;
};
- /* If HCR.TGE is set then HCR is treated as being 1 */
- hcr |= ((env->cp15.hcr_el2 & HCR_TGE) == HCR_TGE);
+ /*
+ * For these purposes, TGE and AMO/IMO/FMO both force the
+ * interrupt to EL2. Fold TGE into the bit extracted above.
+ */
+ hcr |= (hcr_el2 & HCR_TGE) != 0;
/* Perform a table-lookup for the target EL given the current state */
target_el = target_el_table[is64][scr][rw][hcr][secure][cur_el];
@@ -6462,1096 +10727,10 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
return target_el;
}
-static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
- ARMMMUIdx mmu_idx, bool ignfault)
+void arm_log_exception(CPUState *cs)
{
- CPUState *cs = CPU(cpu);
- CPUARMState *env = &cpu->env;
- MemTxAttrs attrs = {};
- MemTxResult txres;
- target_ulong page_size;
- hwaddr physaddr;
- int prot;
- ARMMMUFaultInfo fi = {};
- bool secure = mmu_idx & ARM_MMU_IDX_M_S;
- int exc;
- bool exc_secure;
-
- if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr,
- &attrs, &prot, &page_size, &fi, NULL)) {
- /* MPU/SAU lookup failed */
- if (fi.type == ARMFault_QEMU_SFault) {
- qemu_log_mask(CPU_LOG_INT,
- "...SecureFault with SFSR.AUVIOL during stacking\n");
- env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK;
- env->v7m.sfar = addr;
- exc = ARMV7M_EXCP_SECURE;
- exc_secure = false;
- } else {
- qemu_log_mask(CPU_LOG_INT, "...MemManageFault with CFSR.MSTKERR\n");
- env->v7m.cfsr[secure] |= R_V7M_CFSR_MSTKERR_MASK;
- exc = ARMV7M_EXCP_MEM;
- exc_secure = secure;
- }
- goto pend_fault;
- }
- address_space_stl_le(arm_addressspace(cs, attrs), physaddr, value,
- attrs, &txres);
- if (txres != MEMTX_OK) {
- /* BusFault trying to write the data */
- qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.STKERR\n");
- env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_STKERR_MASK;
- exc = ARMV7M_EXCP_BUS;
- exc_secure = false;
- goto pend_fault;
- }
- return true;
-
-pend_fault:
- /* By pending the exception at this point we are making
- * the IMPDEF choice "overridden exceptions pended" (see the
- * MergeExcInfo() pseudocode). The other choice would be to not
- * pend them now and then make a choice about which to throw away
- * later if we have two derived exceptions.
- * The only case when we must not pend the exception but instead
- * throw it away is if we are doing the push of the callee registers
- * and we've already generated a derived exception. Even in this
- * case we will still update the fault status registers.
- */
- if (!ignfault) {
- armv7m_nvic_set_pending_derived(env->nvic, exc, exc_secure);
- }
- return false;
-}
+ int idx = cs->exception_index;
-static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr,
- ARMMMUIdx mmu_idx)
-{
- CPUState *cs = CPU(cpu);
- CPUARMState *env = &cpu->env;
- MemTxAttrs attrs = {};
- MemTxResult txres;
- target_ulong page_size;
- hwaddr physaddr;
- int prot;
- ARMMMUFaultInfo fi = {};
- bool secure = mmu_idx & ARM_MMU_IDX_M_S;
- int exc;
- bool exc_secure;
- uint32_t value;
-
- if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr,
- &attrs, &prot, &page_size, &fi, NULL)) {
- /* MPU/SAU lookup failed */
- if (fi.type == ARMFault_QEMU_SFault) {
- qemu_log_mask(CPU_LOG_INT,
- "...SecureFault with SFSR.AUVIOL during unstack\n");
- env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK;
- env->v7m.sfar = addr;
- exc = ARMV7M_EXCP_SECURE;
- exc_secure = false;
- } else {
- qemu_log_mask(CPU_LOG_INT,
- "...MemManageFault with CFSR.MUNSTKERR\n");
- env->v7m.cfsr[secure] |= R_V7M_CFSR_MUNSTKERR_MASK;
- exc = ARMV7M_EXCP_MEM;
- exc_secure = secure;
- }
- goto pend_fault;
- }
-
- value = address_space_ldl(arm_addressspace(cs, attrs), physaddr,
- attrs, &txres);
- if (txres != MEMTX_OK) {
- /* BusFault trying to read the data */
- qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.UNSTKERR\n");
- env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_UNSTKERR_MASK;
- exc = ARMV7M_EXCP_BUS;
- exc_secure = false;
- goto pend_fault;
- }
-
- *dest = value;
- return true;
-
-pend_fault:
- /* By pending the exception at this point we are making
- * the IMPDEF choice "overridden exceptions pended" (see the
- * MergeExcInfo() pseudocode). The other choice would be to not
- * pend them now and then make a choice about which to throw away
- * later if we have two derived exceptions.
- */
- armv7m_nvic_set_pending(env->nvic, exc, exc_secure);
- return false;
-}
-
-/* Write to v7M CONTROL.SPSEL bit for the specified security bank.
- * This may change the current stack pointer between Main and Process
- * stack pointers if it is done for the CONTROL register for the current
- * security state.
- */
-static void write_v7m_control_spsel_for_secstate(CPUARMState *env,
- bool new_spsel,
- bool secstate)
-{
- bool old_is_psp = v7m_using_psp(env);
-
- env->v7m.control[secstate] =
- deposit32(env->v7m.control[secstate],
- R_V7M_CONTROL_SPSEL_SHIFT,
- R_V7M_CONTROL_SPSEL_LENGTH, new_spsel);
-
- if (secstate == env->v7m.secure) {
- bool new_is_psp = v7m_using_psp(env);
- uint32_t tmp;
-
- if (old_is_psp != new_is_psp) {
- tmp = env->v7m.other_sp;
- env->v7m.other_sp = env->regs[13];
- env->regs[13] = tmp;
- }
- }
-}
-
-/* Write to v7M CONTROL.SPSEL bit. This may change the current
- * stack pointer between Main and Process stack pointers.
- */
-static void write_v7m_control_spsel(CPUARMState *env, bool new_spsel)
-{
- write_v7m_control_spsel_for_secstate(env, new_spsel, env->v7m.secure);
-}
-
-void write_v7m_exception(CPUARMState *env, uint32_t new_exc)
-{
- /* Write a new value to v7m.exception, thus transitioning into or out
- * of Handler mode; this may result in a change of active stack pointer.
- */
- bool new_is_psp, old_is_psp = v7m_using_psp(env);
- uint32_t tmp;
-
- env->v7m.exception = new_exc;
-
- new_is_psp = v7m_using_psp(env);
-
- if (old_is_psp != new_is_psp) {
- tmp = env->v7m.other_sp;
- env->v7m.other_sp = env->regs[13];
- env->regs[13] = tmp;
- }
-}
-
-/* Switch M profile security state between NS and S */
-static void switch_v7m_security_state(CPUARMState *env, bool new_secstate)
-{
- uint32_t new_ss_msp, new_ss_psp;
-
- if (env->v7m.secure == new_secstate) {
- return;
- }
-
- /* All the banked state is accessed by looking at env->v7m.secure
- * except for the stack pointer; rearrange the SP appropriately.
- */
- new_ss_msp = env->v7m.other_ss_msp;
- new_ss_psp = env->v7m.other_ss_psp;
-
- if (v7m_using_psp(env)) {
- env->v7m.other_ss_psp = env->regs[13];
- env->v7m.other_ss_msp = env->v7m.other_sp;
- } else {
- env->v7m.other_ss_msp = env->regs[13];
- env->v7m.other_ss_psp = env->v7m.other_sp;
- }
-
- env->v7m.secure = new_secstate;
-
- if (v7m_using_psp(env)) {
- env->regs[13] = new_ss_psp;
- env->v7m.other_sp = new_ss_msp;
- } else {
- env->regs[13] = new_ss_msp;
- env->v7m.other_sp = new_ss_psp;
- }
-}
-
-void HELPER(v7m_bxns)(CPUARMState *env, uint32_t dest)
-{
- /* Handle v7M BXNS:
- * - if the return value is a magic value, do exception return (like BX)
- * - otherwise bit 0 of the return value is the target security state
- */
- uint32_t min_magic;
-
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- /* Covers FNC_RETURN and EXC_RETURN magic */
- min_magic = FNC_RETURN_MIN_MAGIC;
- } else {
- /* EXC_RETURN magic only */
- min_magic = EXC_RETURN_MIN_MAGIC;
- }
-
- if (dest >= min_magic) {
- /* This is an exception return magic value; put it where
- * do_v7m_exception_exit() expects and raise EXCEPTION_EXIT.
- * Note that if we ever add gen_ss_advance() singlestep support to
- * M profile this should count as an "instruction execution complete"
- * event (compare gen_bx_excret_final_code()).
- */
- env->regs[15] = dest & ~1;
- env->thumb = dest & 1;
- HELPER(exception_internal)(env, EXCP_EXCEPTION_EXIT);
- /* notreached */
- }
-
- /* translate.c should have made BXNS UNDEF unless we're secure */
- assert(env->v7m.secure);
-
- switch_v7m_security_state(env, dest & 1);
- env->thumb = 1;
- env->regs[15] = dest & ~1;
-}
-
-void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
-{
- /* Handle v7M BLXNS:
- * - bit 0 of the destination address is the target security state
- */
-
- /* At this point regs[15] is the address just after the BLXNS */
- uint32_t nextinst = env->regs[15] | 1;
- uint32_t sp = env->regs[13] - 8;
- uint32_t saved_psr;
-
- /* translate.c will have made BLXNS UNDEF unless we're secure */
- assert(env->v7m.secure);
-
- if (dest & 1) {
- /* target is Secure, so this is just a normal BLX,
- * except that the low bit doesn't indicate Thumb/not.
- */
- env->regs[14] = nextinst;
- env->thumb = 1;
- env->regs[15] = dest & ~1;
- return;
- }
-
- /* Target is non-secure: first push a stack frame */
- if (!QEMU_IS_ALIGNED(sp, 8)) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "BLXNS with misaligned SP is UNPREDICTABLE\n");
- }
-
- if (sp < v7m_sp_limit(env)) {
- raise_exception(env, EXCP_STKOF, 0, 1);
- }
-
- saved_psr = env->v7m.exception;
- if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK) {
- saved_psr |= XPSR_SFPA;
- }
-
- /* Note that these stores can throw exceptions on MPU faults */
- cpu_stl_data(env, sp, nextinst);
- cpu_stl_data(env, sp + 4, saved_psr);
-
- env->regs[13] = sp;
- env->regs[14] = 0xfeffffff;
- if (arm_v7m_is_handler_mode(env)) {
- /* Write a dummy value to IPSR, to avoid leaking the current secure
- * exception number to non-secure code. This is guaranteed not
- * to cause write_v7m_exception() to actually change stacks.
- */
- write_v7m_exception(env, 1);
- }
- switch_v7m_security_state(env, 0);
- env->thumb = 1;
- env->regs[15] = dest;
-}
-
-static uint32_t *get_v7m_sp_ptr(CPUARMState *env, bool secure, bool threadmode,
- bool spsel)
-{
- /* Return a pointer to the location where we currently store the
- * stack pointer for the requested security state and thread mode.
- * This pointer will become invalid if the CPU state is updated
- * such that the stack pointers are switched around (eg changing
- * the SPSEL control bit).
- * Compare the v8M ARM ARM pseudocode LookUpSP_with_security_mode().
- * Unlike that pseudocode, we require the caller to pass us in the
- * SPSEL control bit value; this is because we also use this
- * function in handling of pushing of the callee-saves registers
- * part of the v8M stack frame (pseudocode PushCalleeStack()),
- * and in the tailchain codepath the SPSEL bit comes from the exception
- * return magic LR value from the previous exception. The pseudocode
- * opencodes the stack-selection in PushCalleeStack(), but we prefer
- * to make this utility function generic enough to do the job.
- */
- bool want_psp = threadmode && spsel;
-
- if (secure == env->v7m.secure) {
- if (want_psp == v7m_using_psp(env)) {
- return &env->regs[13];
- } else {
- return &env->v7m.other_sp;
- }
- } else {
- if (want_psp) {
- return &env->v7m.other_ss_psp;
- } else {
- return &env->v7m.other_ss_msp;
- }
- }
-}
-
-static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure,
- uint32_t *pvec)
-{
- CPUState *cs = CPU(cpu);
- CPUARMState *env = &cpu->env;
- MemTxResult result;
- uint32_t addr = env->v7m.vecbase[targets_secure] + exc * 4;
- uint32_t vector_entry;
- MemTxAttrs attrs = {};
- ARMMMUIdx mmu_idx;
- bool exc_secure;
-
- mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, targets_secure, true);
-
- /* We don't do a get_phys_addr() here because the rules for vector
- * loads are special: they always use the default memory map, and
- * the default memory map permits reads from all addresses.
- * Since there's no easy way to pass through to pmsav8_mpu_lookup()
- * that we want this special case which would always say "yes",
- * we just do the SAU lookup here followed by a direct physical load.
- */
- attrs.secure = targets_secure;
- attrs.user = false;
-
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- V8M_SAttributes sattrs = {};
-
- v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, &sattrs);
- if (sattrs.ns) {
- attrs.secure = false;
- } else if (!targets_secure) {
- /* NS access to S memory */
- goto load_fail;
- }
- }
-
- vector_entry = address_space_ldl(arm_addressspace(cs, attrs), addr,
- attrs, &result);
- if (result != MEMTX_OK) {
- goto load_fail;
- }
- *pvec = vector_entry;
- return true;
-
-load_fail:
- /* All vector table fetch fails are reported as HardFault, with
- * HFSR.VECTTBL and .FORCED set. (FORCED is set because
- * technically the underlying exception is a MemManage or BusFault
- * that is escalated to HardFault.) This is a terminal exception,
- * so we will either take the HardFault immediately or else enter
- * lockup (the latter case is handled in armv7m_nvic_set_pending_derived()).
- */
- exc_secure = targets_secure ||
- !(cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK);
- env->v7m.hfsr |= R_V7M_HFSR_VECTTBL_MASK | R_V7M_HFSR_FORCED_MASK;
- armv7m_nvic_set_pending_derived(env->nvic, ARMV7M_EXCP_HARD, exc_secure);
- return false;
-}
-
-static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
- bool ignore_faults)
-{
- /* For v8M, push the callee-saves register part of the stack frame.
- * Compare the v8M pseudocode PushCalleeStack().
- * In the tailchaining case this may not be the current stack.
- */
- CPUARMState *env = &cpu->env;
- uint32_t *frame_sp_p;
- uint32_t frameptr;
- ARMMMUIdx mmu_idx;
- bool stacked_ok;
- uint32_t limit;
- bool want_psp;
-
- if (dotailchain) {
- bool mode = lr & R_V7M_EXCRET_MODE_MASK;
- bool priv = !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_NPRIV_MASK) ||
- !mode;
-
- mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv);
- frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode,
- lr & R_V7M_EXCRET_SPSEL_MASK);
- want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK);
- if (want_psp) {
- limit = env->v7m.psplim[M_REG_S];
- } else {
- limit = env->v7m.msplim[M_REG_S];
- }
- } else {
- mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
- frame_sp_p = &env->regs[13];
- limit = v7m_sp_limit(env);
- }
-
- frameptr = *frame_sp_p - 0x28;
- if (frameptr < limit) {
- /*
- * Stack limit failure: set SP to the limit value, and generate
- * STKOF UsageFault. Stack pushes below the limit must not be
- * performed. It is IMPDEF whether pushes above the limit are
- * performed; we choose not to.
- */
- qemu_log_mask(CPU_LOG_INT,
- "...STKOF during callee-saves register stacking\n");
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
- env->v7m.secure);
- *frame_sp_p = limit;
- return true;
- }
-
- /* Write as much of the stack frame as we can. A write failure may
- * cause us to pend a derived exception.
- */
- stacked_ok =
- v7m_stack_write(cpu, frameptr, 0xfefa125b, mmu_idx, ignore_faults) &&
- v7m_stack_write(cpu, frameptr + 0x8, env->regs[4], mmu_idx,
- ignore_faults) &&
- v7m_stack_write(cpu, frameptr + 0xc, env->regs[5], mmu_idx,
- ignore_faults) &&
- v7m_stack_write(cpu, frameptr + 0x10, env->regs[6], mmu_idx,
- ignore_faults) &&
- v7m_stack_write(cpu, frameptr + 0x14, env->regs[7], mmu_idx,
- ignore_faults) &&
- v7m_stack_write(cpu, frameptr + 0x18, env->regs[8], mmu_idx,
- ignore_faults) &&
- v7m_stack_write(cpu, frameptr + 0x1c, env->regs[9], mmu_idx,
- ignore_faults) &&
- v7m_stack_write(cpu, frameptr + 0x20, env->regs[10], mmu_idx,
- ignore_faults) &&
- v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx,
- ignore_faults);
-
- /* Update SP regardless of whether any of the stack accesses failed. */
- *frame_sp_p = frameptr;
-
- return !stacked_ok;
-}
-
-static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
- bool ignore_stackfaults)
-{
- /* Do the "take the exception" parts of exception entry,
- * but not the pushing of state to the stack. This is
- * similar to the pseudocode ExceptionTaken() function.
- */
- CPUARMState *env = &cpu->env;
- uint32_t addr;
- bool targets_secure;
- int exc;
- bool push_failed = false;
-
- armv7m_nvic_get_pending_irq_info(env->nvic, &exc, &targets_secure);
- qemu_log_mask(CPU_LOG_INT, "...taking pending %s exception %d\n",
- targets_secure ? "secure" : "nonsecure", exc);
-
- if (arm_feature(env, ARM_FEATURE_V8)) {
- if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
- (lr & R_V7M_EXCRET_S_MASK)) {
- /* The background code (the owner of the registers in the
- * exception frame) is Secure. This means it may either already
- * have or now needs to push callee-saves registers.
- */
- if (targets_secure) {
- if (dotailchain && !(lr & R_V7M_EXCRET_ES_MASK)) {
- /* We took an exception from Secure to NonSecure
- * (which means the callee-saved registers got stacked)
- * and are now tailchaining to a Secure exception.
- * Clear DCRS so eventual return from this Secure
- * exception unstacks the callee-saved registers.
- */
- lr &= ~R_V7M_EXCRET_DCRS_MASK;
- }
- } else {
- /* We're going to a non-secure exception; push the
- * callee-saves registers to the stack now, if they're
- * not already saved.
- */
- if (lr & R_V7M_EXCRET_DCRS_MASK &&
- !(dotailchain && !(lr & R_V7M_EXCRET_ES_MASK))) {
- push_failed = v7m_push_callee_stack(cpu, lr, dotailchain,
- ignore_stackfaults);
- }
- lr |= R_V7M_EXCRET_DCRS_MASK;
- }
- }
-
- lr &= ~R_V7M_EXCRET_ES_MASK;
- if (targets_secure || !arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- lr |= R_V7M_EXCRET_ES_MASK;
- }
- lr &= ~R_V7M_EXCRET_SPSEL_MASK;
- if (env->v7m.control[targets_secure] & R_V7M_CONTROL_SPSEL_MASK) {
- lr |= R_V7M_EXCRET_SPSEL_MASK;
- }
-
- /* Clear registers if necessary to prevent non-secure exception
- * code being able to see register values from secure code.
- * Where register values become architecturally UNKNOWN we leave
- * them with their previous values.
- */
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- if (!targets_secure) {
- /* Always clear the caller-saved registers (they have been
- * pushed to the stack earlier in v7m_push_stack()).
- * Clear callee-saved registers if the background code is
- * Secure (in which case these regs were saved in
- * v7m_push_callee_stack()).
- */
- int i;
-
- for (i = 0; i < 13; i++) {
- /* r4..r11 are callee-saves, zero only if EXCRET.S == 1 */
- if (i < 4 || i > 11 || (lr & R_V7M_EXCRET_S_MASK)) {
- env->regs[i] = 0;
- }
- }
- /* Clear EAPSR */
- xpsr_write(env, 0, XPSR_NZCV | XPSR_Q | XPSR_GE | XPSR_IT);
- }
- }
- }
-
- if (push_failed && !ignore_stackfaults) {
- /* Derived exception on callee-saves register stacking:
- * we might now want to take a different exception which
- * targets a different security state, so try again from the top.
- */
- qemu_log_mask(CPU_LOG_INT,
- "...derived exception on callee-saves register stacking");
- v7m_exception_taken(cpu, lr, true, true);
- return;
- }
-
- if (!arm_v7m_load_vector(cpu, exc, targets_secure, &addr)) {
- /* Vector load failed: derived exception */
- qemu_log_mask(CPU_LOG_INT, "...derived exception on vector table load");
- v7m_exception_taken(cpu, lr, true, true);
- return;
- }
-
- /* Now we've done everything that might cause a derived exception
- * we can go ahead and activate whichever exception we're going to
- * take (which might now be the derived exception).
- */
- armv7m_nvic_acknowledge_irq(env->nvic);
-
- /* Switch to target security state -- must do this before writing SPSEL */
- switch_v7m_security_state(env, targets_secure);
- write_v7m_control_spsel(env, 0);
- arm_clear_exclusive(env);
- /* Clear IT bits */
- env->condexec_bits = 0;
- env->regs[14] = lr;
- env->regs[15] = addr & 0xfffffffe;
- env->thumb = addr & 1;
-}
-
-static bool v7m_push_stack(ARMCPU *cpu)
-{
- /* Do the "set up stack frame" part of exception entry,
- * similar to pseudocode PushStack().
- * Return true if we generate a derived exception (and so
- * should ignore further stack faults trying to process
- * that derived exception.)
- */
- bool stacked_ok;
- CPUARMState *env = &cpu->env;
- uint32_t xpsr = xpsr_read(env);
- uint32_t frameptr = env->regs[13];
- ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
-
- /* Align stack pointer if the guest wants that */
- if ((frameptr & 4) &&
- (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKALIGN_MASK)) {
- frameptr -= 4;
- xpsr |= XPSR_SPREALIGN;
- }
-
- frameptr -= 0x20;
-
- if (arm_feature(env, ARM_FEATURE_V8)) {
- uint32_t limit = v7m_sp_limit(env);
-
- if (frameptr < limit) {
- /*
- * Stack limit failure: set SP to the limit value, and generate
- * STKOF UsageFault. Stack pushes below the limit must not be
- * performed. It is IMPDEF whether pushes above the limit are
- * performed; we choose not to.
- */
- qemu_log_mask(CPU_LOG_INT,
- "...STKOF during stacking\n");
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
- env->v7m.secure);
- env->regs[13] = limit;
- return true;
- }
- }
-
- /* Write as much of the stack frame as we can. If we fail a stack
- * write this will result in a derived exception being pended
- * (which may be taken in preference to the one we started with
- * if it has higher priority).
- */
- stacked_ok =
- v7m_stack_write(cpu, frameptr, env->regs[0], mmu_idx, false) &&
- v7m_stack_write(cpu, frameptr + 4, env->regs[1], mmu_idx, false) &&
- v7m_stack_write(cpu, frameptr + 8, env->regs[2], mmu_idx, false) &&
- v7m_stack_write(cpu, frameptr + 12, env->regs[3], mmu_idx, false) &&
- v7m_stack_write(cpu, frameptr + 16, env->regs[12], mmu_idx, false) &&
- v7m_stack_write(cpu, frameptr + 20, env->regs[14], mmu_idx, false) &&
- v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) &&
- v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false);
-
- /* Update SP regardless of whether any of the stack accesses failed. */
- env->regs[13] = frameptr;
-
- return !stacked_ok;
-}
-
-static void do_v7m_exception_exit(ARMCPU *cpu)
-{
- CPUARMState *env = &cpu->env;
- uint32_t excret;
- uint32_t xpsr;
- bool ufault = false;
- bool sfault = false;
- bool return_to_sp_process;
- bool return_to_handler;
- bool rettobase = false;
- bool exc_secure = false;
- bool return_to_secure;
-
- /* If we're not in Handler mode then jumps to magic exception-exit
- * addresses don't have magic behaviour. However for the v8M
- * security extensions the magic secure-function-return has to
- * work in thread mode too, so to avoid doing an extra check in
- * the generated code we allow exception-exit magic to also cause the
- * internal exception and bring us here in thread mode. Correct code
- * will never try to do this (the following insn fetch will always
- * fault) so we the overhead of having taken an unnecessary exception
- * doesn't matter.
- */
- if (!arm_v7m_is_handler_mode(env)) {
- return;
- }
-
- /* In the spec pseudocode ExceptionReturn() is called directly
- * from BXWritePC() and gets the full target PC value including
- * bit zero. In QEMU's implementation we treat it as a normal
- * jump-to-register (which is then caught later on), and so split
- * the target value up between env->regs[15] and env->thumb in
- * gen_bx(). Reconstitute it.
- */
- excret = env->regs[15];
- if (env->thumb) {
- excret |= 1;
- }
-
- qemu_log_mask(CPU_LOG_INT, "Exception return: magic PC %" PRIx32
- " previous exception %d\n",
- excret, env->v7m.exception);
-
- if ((excret & R_V7M_EXCRET_RES1_MASK) != R_V7M_EXCRET_RES1_MASK) {
- qemu_log_mask(LOG_GUEST_ERROR, "M profile: zero high bits in exception "
- "exit PC value 0x%" PRIx32 " are UNPREDICTABLE\n",
- excret);
- }
-
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- /* EXC_RETURN.ES validation check (R_SMFL). We must do this before
- * we pick which FAULTMASK to clear.
- */
- if (!env->v7m.secure &&
- ((excret & R_V7M_EXCRET_ES_MASK) ||
- !(excret & R_V7M_EXCRET_DCRS_MASK))) {
- sfault = 1;
- /* For all other purposes, treat ES as 0 (R_HXSR) */
- excret &= ~R_V7M_EXCRET_ES_MASK;
- }
- exc_secure = excret & R_V7M_EXCRET_ES_MASK;
- }
-
- if (env->v7m.exception != ARMV7M_EXCP_NMI) {
- /* Auto-clear FAULTMASK on return from other than NMI.
- * If the security extension is implemented then this only
- * happens if the raw execution priority is >= 0; the
- * value of the ES bit in the exception return value indicates
- * which security state's faultmask to clear. (v8M ARM ARM R_KBNF.)
- */
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- if (armv7m_nvic_raw_execution_priority(env->nvic) >= 0) {
- env->v7m.faultmask[exc_secure] = 0;
- }
- } else {
- env->v7m.faultmask[M_REG_NS] = 0;
- }
- }
-
- switch (armv7m_nvic_complete_irq(env->nvic, env->v7m.exception,
- exc_secure)) {
- case -1:
- /* attempt to exit an exception that isn't active */
- ufault = true;
- break;
- case 0:
- /* still an irq active now */
- break;
- case 1:
- /* we returned to base exception level, no nesting.
- * (In the pseudocode this is written using "NestedActivation != 1"
- * where we have 'rettobase == false'.)
- */
- rettobase = true;
- break;
- default:
- g_assert_not_reached();
- }
-
- return_to_handler = !(excret & R_V7M_EXCRET_MODE_MASK);
- return_to_sp_process = excret & R_V7M_EXCRET_SPSEL_MASK;
- return_to_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
- (excret & R_V7M_EXCRET_S_MASK);
-
- if (arm_feature(env, ARM_FEATURE_V8)) {
- if (!arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- /* UNPREDICTABLE if S == 1 or DCRS == 0 or ES == 1 (R_XLCP);
- * we choose to take the UsageFault.
- */
- if ((excret & R_V7M_EXCRET_S_MASK) ||
- (excret & R_V7M_EXCRET_ES_MASK) ||
- !(excret & R_V7M_EXCRET_DCRS_MASK)) {
- ufault = true;
- }
- }
- if (excret & R_V7M_EXCRET_RES0_MASK) {
- ufault = true;
- }
- } else {
- /* For v7M we only recognize certain combinations of the low bits */
- switch (excret & 0xf) {
- case 1: /* Return to Handler */
- break;
- case 13: /* Return to Thread using Process stack */
- case 9: /* Return to Thread using Main stack */
- /* We only need to check NONBASETHRDENA for v7M, because in
- * v8M this bit does not exist (it is RES1).
- */
- if (!rettobase &&
- !(env->v7m.ccr[env->v7m.secure] &
- R_V7M_CCR_NONBASETHRDENA_MASK)) {
- ufault = true;
- }
- break;
- default:
- ufault = true;
- }
- }
-
- /*
- * Set CONTROL.SPSEL from excret.SPSEL. Since we're still in
- * Handler mode (and will be until we write the new XPSR.Interrupt
- * field) this does not switch around the current stack pointer.
- * We must do this before we do any kind of tailchaining, including
- * for the derived exceptions on integrity check failures, or we will
- * give the guest an incorrect EXCRET.SPSEL value on exception entry.
- */
- write_v7m_control_spsel_for_secstate(env, return_to_sp_process, exc_secure);
-
- if (sfault) {
- env->v7m.sfsr |= R_V7M_SFSR_INVER_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
- qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
- "stackframe: failed EXC_RETURN.ES validity check\n");
- v7m_exception_taken(cpu, excret, true, false);
- return;
- }
-
- if (ufault) {
- /* Bad exception return: instead of popping the exception
- * stack, directly take a usage fault on the current stack.
- */
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
- qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
- "stackframe: failed exception return integrity check\n");
- v7m_exception_taken(cpu, excret, true, false);
- return;
- }
-
- /*
- * Tailchaining: if there is currently a pending exception that
- * is high enough priority to preempt execution at the level we're
- * about to return to, then just directly take that exception now,
- * avoiding an unstack-and-then-stack. Note that now we have
- * deactivated the previous exception by calling armv7m_nvic_complete_irq()
- * our current execution priority is already the execution priority we are
- * returning to -- none of the state we would unstack or set based on
- * the EXCRET value affects it.
- */
- if (armv7m_nvic_can_take_pending_exception(env->nvic)) {
- qemu_log_mask(CPU_LOG_INT, "...tailchaining to pending exception\n");
- v7m_exception_taken(cpu, excret, true, false);
- return;
- }
-
- switch_v7m_security_state(env, return_to_secure);
-
- {
- /* The stack pointer we should be reading the exception frame from
- * depends on bits in the magic exception return type value (and
- * for v8M isn't necessarily the stack pointer we will eventually
- * end up resuming execution with). Get a pointer to the location
- * in the CPU state struct where the SP we need is currently being
- * stored; we will use and modify it in place.
- * We use this limited C variable scope so we don't accidentally
- * use 'frame_sp_p' after we do something that makes it invalid.
- */
- uint32_t *frame_sp_p = get_v7m_sp_ptr(env,
- return_to_secure,
- !return_to_handler,
- return_to_sp_process);
- uint32_t frameptr = *frame_sp_p;
- bool pop_ok = true;
- ARMMMUIdx mmu_idx;
- bool return_to_priv = return_to_handler ||
- !(env->v7m.control[return_to_secure] & R_V7M_CONTROL_NPRIV_MASK);
-
- mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, return_to_secure,
- return_to_priv);
-
- if (!QEMU_IS_ALIGNED(frameptr, 8) &&
- arm_feature(env, ARM_FEATURE_V8)) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "M profile exception return with non-8-aligned SP "
- "for destination state is UNPREDICTABLE\n");
- }
-
- /* Do we need to pop callee-saved registers? */
- if (return_to_secure &&
- ((excret & R_V7M_EXCRET_ES_MASK) == 0 ||
- (excret & R_V7M_EXCRET_DCRS_MASK) == 0)) {
- uint32_t expected_sig = 0xfefa125b;
- uint32_t actual_sig;
-
- pop_ok = v7m_stack_read(cpu, &actual_sig, frameptr, mmu_idx);
-
- if (pop_ok && expected_sig != actual_sig) {
- /* Take a SecureFault on the current stack */
- env->v7m.sfsr |= R_V7M_SFSR_INVIS_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
- qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
- "stackframe: failed exception return integrity "
- "signature check\n");
- v7m_exception_taken(cpu, excret, true, false);
- return;
- }
-
- pop_ok = pop_ok &&
- v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[7], frameptr + 0x14, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[8], frameptr + 0x18, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[9], frameptr + 0x1c, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[10], frameptr + 0x20, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[11], frameptr + 0x24, mmu_idx);
-
- frameptr += 0x28;
- }
-
- /* Pop registers */
- pop_ok = pop_ok &&
- v7m_stack_read(cpu, &env->regs[0], frameptr, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[1], frameptr + 0x4, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[2], frameptr + 0x8, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[3], frameptr + 0xc, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[12], frameptr + 0x10, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[14], frameptr + 0x14, mmu_idx) &&
- v7m_stack_read(cpu, &env->regs[15], frameptr + 0x18, mmu_idx) &&
- v7m_stack_read(cpu, &xpsr, frameptr + 0x1c, mmu_idx);
-
- if (!pop_ok) {
- /* v7m_stack_read() pended a fault, so take it (as a tail
- * chained exception on the same stack frame)
- */
- qemu_log_mask(CPU_LOG_INT, "...derived exception on unstacking\n");
- v7m_exception_taken(cpu, excret, true, false);
- return;
- }
-
- /* Returning from an exception with a PC with bit 0 set is defined
- * behaviour on v8M (bit 0 is ignored), but for v7M it was specified
- * to be UNPREDICTABLE. In practice actual v7M hardware seems to ignore
- * the lsbit, and there are several RTOSes out there which incorrectly
- * assume the r15 in the stack frame should be a Thumb-style "lsbit
- * indicates ARM/Thumb" value, so ignore the bit on v7M as well, but
- * complain about the badly behaved guest.
- */
- if (env->regs[15] & 1) {
- env->regs[15] &= ~1U;
- if (!arm_feature(env, ARM_FEATURE_V8)) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "M profile return from interrupt with misaligned "
- "PC is UNPREDICTABLE on v7M\n");
- }
- }
-
- if (arm_feature(env, ARM_FEATURE_V8)) {
- /* For v8M we have to check whether the xPSR exception field
- * matches the EXCRET value for return to handler/thread
- * before we commit to changing the SP and xPSR.
- */
- bool will_be_handler = (xpsr & XPSR_EXCP) != 0;
- if (return_to_handler != will_be_handler) {
- /* Take an INVPC UsageFault on the current stack.
- * By this point we will have switched to the security state
- * for the background state, so this UsageFault will target
- * that state.
- */
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
- env->v7m.secure);
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
- qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
- "stackframe: failed exception return integrity "
- "check\n");
- v7m_exception_taken(cpu, excret, true, false);
- return;
- }
- }
-
- /* Commit to consuming the stack frame */
- frameptr += 0x20;
- /* Undo stack alignment (the SPREALIGN bit indicates that the original
- * pre-exception SP was not 8-aligned and we added a padding word to
- * align it, so we undo this by ORing in the bit that increases it
- * from the current 8-aligned value to the 8-unaligned value. (Adding 4
- * would work too but a logical OR is how the pseudocode specifies it.)
- */
- if (xpsr & XPSR_SPREALIGN) {
- frameptr |= 4;
- }
- *frame_sp_p = frameptr;
- }
- /* This xpsr_write() will invalidate frame_sp_p as it may switch stack */
- xpsr_write(env, xpsr, ~XPSR_SPREALIGN);
-
- /* The restored xPSR exception field will be zero if we're
- * resuming in Thread mode. If that doesn't match what the
- * exception return excret specified then this is a UsageFault.
- * v7M requires we make this check here; v8M did it earlier.
- */
- if (return_to_handler != arm_v7m_is_handler_mode(env)) {
- /* Take an INVPC UsageFault by pushing the stack again;
- * we know we're v7M so this is never a Secure UsageFault.
- */
- bool ignore_stackfaults;
-
- assert(!arm_feature(env, ARM_FEATURE_V8));
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, false);
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
- ignore_stackfaults = v7m_push_stack(cpu);
- qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on new stackframe: "
- "failed exception return integrity check\n");
- v7m_exception_taken(cpu, excret, false, ignore_stackfaults);
- return;
- }
-
- /* Otherwise, we have a successful exception exit. */
- arm_clear_exclusive(env);
- qemu_log_mask(CPU_LOG_INT, "...successful exception return\n");
-}
-
-static bool do_v7m_function_return(ARMCPU *cpu)
-{
- /* v8M security extensions magic function return.
- * We may either:
- * (1) throw an exception (longjump)
- * (2) return true if we successfully handled the function return
- * (3) return false if we failed a consistency check and have
- * pended a UsageFault that needs to be taken now
- *
- * At this point the magic return value is split between env->regs[15]
- * and env->thumb. We don't bother to reconstitute it because we don't
- * need it (all values are handled the same way).
- */
- CPUARMState *env = &cpu->env;
- uint32_t newpc, newpsr, newpsr_exc;
-
- qemu_log_mask(CPU_LOG_INT, "...really v7M secure function return\n");
-
- {
- bool threadmode, spsel;
- TCGMemOpIdx oi;
- ARMMMUIdx mmu_idx;
- uint32_t *frame_sp_p;
- uint32_t frameptr;
-
- /* Pull the return address and IPSR from the Secure stack */
- threadmode = !arm_v7m_is_handler_mode(env);
- spsel = env->v7m.control[M_REG_S] & R_V7M_CONTROL_SPSEL_MASK;
-
- frame_sp_p = get_v7m_sp_ptr(env, true, threadmode, spsel);
- frameptr = *frame_sp_p;
-
- /* These loads may throw an exception (for MPU faults). We want to
- * do them as secure, so work out what MMU index that is.
- */
- mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
- oi = make_memop_idx(MO_LE, arm_to_core_mmu_idx(mmu_idx));
- newpc = helper_le_ldul_mmu(env, frameptr, oi, 0);
- newpsr = helper_le_ldul_mmu(env, frameptr + 4, oi, 0);
-
- /* Consistency checks on new IPSR */
- newpsr_exc = newpsr & XPSR_EXCP;
- if (!((env->v7m.exception == 0 && newpsr_exc == 0) ||
- (env->v7m.exception == 1 && newpsr_exc != 0))) {
- /* Pend the fault and tell our caller to take it */
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
- env->v7m.secure);
- qemu_log_mask(CPU_LOG_INT,
- "...taking INVPC UsageFault: "
- "IPSR consistency check failed\n");
- return false;
- }
-
- *frame_sp_p = frameptr + 8;
- }
-
- /* This invalidates frame_sp_p */
- switch_v7m_security_state(env, true);
- env->v7m.exception = newpsr_exc;
- env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
- if (newpsr & XPSR_SFPA) {
- env->v7m.control[M_REG_S] |= R_V7M_CONTROL_SFPA_MASK;
- }
- xpsr_write(env, 0, XPSR_IT);
- env->thumb = newpc & 1;
- env->regs[15] = newpc & ~1;
-
- qemu_log_mask(CPU_LOG_INT, "...function return successful\n");
- return true;
-}
-
-static void arm_log_exception(int idx)
-{
if (qemu_loglevel_mask(CPU_LOG_INT)) {
const char *exc = NULL;
static const char * const excnames[] = {
@@ -7573,6 +10752,12 @@ static void arm_log_exception(int idx)
[EXCP_NOCP] = "v7M NOCP UsageFault",
[EXCP_INVSTATE] = "v7M INVSTATE UsageFault",
[EXCP_STKOF] = "v8M STKOF UsageFault",
+ [EXCP_LAZYFP] = "v7M exception during lazy FP stacking",
+ [EXCP_LSERR] = "v8M LSERR UsageFault",
+ [EXCP_UNALIGNED] = "v7M UNALIGNED UsageFault",
+ [EXCP_DIVBYZERO] = "v7M DIVBYZERO UsageFault",
+ [EXCP_VSERR] = "Virtual SERR",
+ [EXCP_GPC] = "Granule Protection Check",
};
if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
@@ -7581,318 +10766,13 @@ static void arm_log_exception(int idx)
if (!exc) {
exc = "unknown";
}
- qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s]\n", idx, exc);
+ qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s] on CPU %d\n",
+ idx, exc, cs->cpu_index);
}
}
-static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx,
- uint32_t addr, uint16_t *insn)
-{
- /* Load a 16-bit portion of a v7M instruction, returning true on success,
- * or false on failure (in which case we will have pended the appropriate
- * exception).
- * We need to do the instruction fetch's MPU and SAU checks
- * like this because there is no MMU index that would allow
- * doing the load with a single function call. Instead we must
- * first check that the security attributes permit the load
- * and that they don't mismatch on the two halves of the instruction,
- * and then we do the load as a secure load (ie using the security
- * attributes of the address, not the CPU, as architecturally required).
- */
- CPUState *cs = CPU(cpu);
- CPUARMState *env = &cpu->env;
- V8M_SAttributes sattrs = {};
- MemTxAttrs attrs = {};
- ARMMMUFaultInfo fi = {};
- MemTxResult txres;
- target_ulong page_size;
- hwaddr physaddr;
- int prot;
-
- v8m_security_lookup(env, addr, MMU_INST_FETCH, mmu_idx, &sattrs);
- if (!sattrs.nsc || sattrs.ns) {
- /* This must be the second half of the insn, and it straddles a
- * region boundary with the second half not being S&NSC.
- */
- env->v7m.sfsr |= R_V7M_SFSR_INVEP_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
- qemu_log_mask(CPU_LOG_INT,
- "...really SecureFault with SFSR.INVEP\n");
- return false;
- }
- if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx,
- &physaddr, &attrs, &prot, &page_size, &fi, NULL)) {
- /* the MPU lookup failed */
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure);
- qemu_log_mask(CPU_LOG_INT, "...really MemManage with CFSR.IACCVIOL\n");
- return false;
- }
- *insn = address_space_lduw_le(arm_addressspace(cs, attrs), physaddr,
- attrs, &txres);
- if (txres != MEMTX_OK) {
- env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_IBUSERR_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS, false);
- qemu_log_mask(CPU_LOG_INT, "...really BusFault with CFSR.IBUSERR\n");
- return false;
- }
- return true;
-}
-
-static bool v7m_handle_execute_nsc(ARMCPU *cpu)
-{
- /* Check whether this attempt to execute code in a Secure & NS-Callable
- * memory region is for an SG instruction; if so, then emulate the
- * effect of the SG instruction and return true. Otherwise pend
- * the correct kind of exception and return false.
- */
- CPUARMState *env = &cpu->env;
- ARMMMUIdx mmu_idx;
- uint16_t insn;
-
- /* We should never get here unless get_phys_addr_pmsav8() caused
- * an exception for NS executing in S&NSC memory.
- */
- assert(!env->v7m.secure);
- assert(arm_feature(env, ARM_FEATURE_M_SECURITY));
-
- /* We want to do the MPU lookup as secure; work out what mmu_idx that is */
- mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
-
- if (!v7m_read_half_insn(cpu, mmu_idx, env->regs[15], &insn)) {
- return false;
- }
-
- if (!env->thumb) {
- goto gen_invep;
- }
-
- if (insn != 0xe97f) {
- /* Not an SG instruction first half (we choose the IMPDEF
- * early-SG-check option).
- */
- goto gen_invep;
- }
-
- if (!v7m_read_half_insn(cpu, mmu_idx, env->regs[15] + 2, &insn)) {
- return false;
- }
-
- if (insn != 0xe97f) {
- /* Not an SG instruction second half (yes, both halves of the SG
- * insn have the same hex value)
- */
- goto gen_invep;
- }
-
- /* OK, we have confirmed that we really have an SG instruction.
- * We know we're NS in S memory so don't need to repeat those checks.
- */
- qemu_log_mask(CPU_LOG_INT, "...really an SG instruction at 0x%08" PRIx32
- ", executing it\n", env->regs[15]);
- env->regs[14] &= ~1;
- switch_v7m_security_state(env, true);
- xpsr_write(env, 0, XPSR_IT);
- env->regs[15] += 4;
- return true;
-
-gen_invep:
- env->v7m.sfsr |= R_V7M_SFSR_INVEP_MASK;
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
- qemu_log_mask(CPU_LOG_INT,
- "...really SecureFault with SFSR.INVEP\n");
- return false;
-}
-
-void arm_v7m_cpu_do_interrupt(CPUState *cs)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- uint32_t lr;
- bool ignore_stackfaults;
-
- arm_log_exception(cs->exception_index);
-
- /* For exceptions we just mark as pending on the NVIC, and let that
- handle it. */
- switch (cs->exception_index) {
- case EXCP_UDEF:
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_UNDEFINSTR_MASK;
- break;
- case EXCP_NOCP:
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_NOCP_MASK;
- break;
- case EXCP_INVSTATE:
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVSTATE_MASK;
- break;
- case EXCP_STKOF:
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
- break;
- case EXCP_SWI:
- /* The PC already points to the next instruction. */
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure);
- break;
- case EXCP_PREFETCH_ABORT:
- case EXCP_DATA_ABORT:
- /* Note that for M profile we don't have a guest facing FSR, but
- * the env->exception.fsr will be populated by the code that
- * raises the fault, in the A profile short-descriptor format.
- */
- switch (env->exception.fsr & 0xf) {
- case M_FAKE_FSR_NSC_EXEC:
- /* Exception generated when we try to execute code at an address
- * which is marked as Secure & Non-Secure Callable and the CPU
- * is in the Non-Secure state. The only instruction which can
- * be executed like this is SG (and that only if both halves of
- * the SG instruction have the same security attributes.)
- * Everything else must generate an INVEP SecureFault, so we
- * emulate the SG instruction here.
- */
- if (v7m_handle_execute_nsc(cpu)) {
- return;
- }
- break;
- case M_FAKE_FSR_SFAULT:
- /* Various flavours of SecureFault for attempts to execute or
- * access data in the wrong security state.
- */
- switch (cs->exception_index) {
- case EXCP_PREFETCH_ABORT:
- if (env->v7m.secure) {
- env->v7m.sfsr |= R_V7M_SFSR_INVTRAN_MASK;
- qemu_log_mask(CPU_LOG_INT,
- "...really SecureFault with SFSR.INVTRAN\n");
- } else {
- env->v7m.sfsr |= R_V7M_SFSR_INVEP_MASK;
- qemu_log_mask(CPU_LOG_INT,
- "...really SecureFault with SFSR.INVEP\n");
- }
- break;
- case EXCP_DATA_ABORT:
- /* This must be an NS access to S memory */
- env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK;
- qemu_log_mask(CPU_LOG_INT,
- "...really SecureFault with SFSR.AUVIOL\n");
- break;
- }
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
- break;
- case 0x8: /* External Abort */
- switch (cs->exception_index) {
- case EXCP_PREFETCH_ABORT:
- env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_IBUSERR_MASK;
- qemu_log_mask(CPU_LOG_INT, "...with CFSR.IBUSERR\n");
- break;
- case EXCP_DATA_ABORT:
- env->v7m.cfsr[M_REG_NS] |=
- (R_V7M_CFSR_PRECISERR_MASK | R_V7M_CFSR_BFARVALID_MASK);
- env->v7m.bfar = env->exception.vaddress;
- qemu_log_mask(CPU_LOG_INT,
- "...with CFSR.PRECISERR and BFAR 0x%x\n",
- env->v7m.bfar);
- break;
- }
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS, false);
- break;
- default:
- /* All other FSR values are either MPU faults or "can't happen
- * for M profile" cases.
- */
- switch (cs->exception_index) {
- case EXCP_PREFETCH_ABORT:
- env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK;
- qemu_log_mask(CPU_LOG_INT, "...with CFSR.IACCVIOL\n");
- break;
- case EXCP_DATA_ABORT:
- env->v7m.cfsr[env->v7m.secure] |=
- (R_V7M_CFSR_DACCVIOL_MASK | R_V7M_CFSR_MMARVALID_MASK);
- env->v7m.mmfar[env->v7m.secure] = env->exception.vaddress;
- qemu_log_mask(CPU_LOG_INT,
- "...with CFSR.DACCVIOL and MMFAR 0x%x\n",
- env->v7m.mmfar[env->v7m.secure]);
- break;
- }
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM,
- env->v7m.secure);
- break;
- }
- break;
- case EXCP_BKPT:
- if (semihosting_enabled()) {
- int nr;
- nr = arm_lduw_code(env, env->regs[15], arm_sctlr_b(env)) & 0xff;
- if (nr == 0xab) {
- env->regs[15] += 2;
- qemu_log_mask(CPU_LOG_INT,
- "...handling as semihosting call 0x%x\n",
- env->regs[0]);
- env->regs[0] = do_arm_semihosting(env);
- return;
- }
- }
- armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_DEBUG, false);
- break;
- case EXCP_IRQ:
- break;
- case EXCP_EXCEPTION_EXIT:
- if (env->regs[15] < EXC_RETURN_MIN_MAGIC) {
- /* Must be v8M security extension function return */
- assert(env->regs[15] >= FNC_RETURN_MIN_MAGIC);
- assert(arm_feature(env, ARM_FEATURE_M_SECURITY));
- if (do_v7m_function_return(cpu)) {
- return;
- }
- } else {
- do_v7m_exception_exit(cpu);
- return;
- }
- break;
- default:
- cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
- return; /* Never happens. Keep compiler happy. */
- }
-
- if (arm_feature(env, ARM_FEATURE_V8)) {
- lr = R_V7M_EXCRET_RES1_MASK |
- R_V7M_EXCRET_DCRS_MASK |
- R_V7M_EXCRET_FTYPE_MASK;
- /* The S bit indicates whether we should return to Secure
- * or NonSecure (ie our current state).
- * The ES bit indicates whether we're taking this exception
- * to Secure or NonSecure (ie our target state). We set it
- * later, in v7m_exception_taken().
- * The SPSEL bit is also set in v7m_exception_taken() for v8M.
- * This corresponds to the ARM ARM pseudocode for v8M setting
- * some LR bits in PushStack() and some in ExceptionTaken();
- * the distinction matters for the tailchain cases where we
- * can take an exception without pushing the stack.
- */
- if (env->v7m.secure) {
- lr |= R_V7M_EXCRET_S_MASK;
- }
- } else {
- lr = R_V7M_EXCRET_RES1_MASK |
- R_V7M_EXCRET_S_MASK |
- R_V7M_EXCRET_DCRS_MASK |
- R_V7M_EXCRET_FTYPE_MASK |
- R_V7M_EXCRET_ES_MASK;
- if (env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK) {
- lr |= R_V7M_EXCRET_SPSEL_MASK;
- }
- }
- if (!arm_v7m_is_handler_mode(env)) {
- lr |= R_V7M_EXCRET_MODE_MASK;
- }
-
- ignore_stackfaults = v7m_push_stack(cpu);
- v7m_exception_taken(cpu, lr, false, ignore_stackfaults);
-}
-
-/* Function used to synchronize QEMU's AArch64 register set with AArch32
+/*
+ * Function used to synchronize QEMU's AArch64 register set with AArch32
* register set. This is necessary when switching between AArch32 and AArch64
* execution state.
*/
@@ -7906,7 +10786,8 @@ void aarch64_sync_32_to_64(CPUARMState *env)
env->xregs[i] = env->regs[i];
}
- /* Unless we are in FIQ mode, x8-x12 come from the user registers r8-r12.
+ /*
+ * Unless we are in FIQ mode, x8-x12 come from the user registers r8-r12.
* Otherwise, they come from the banked user regs.
*/
if (mode == ARM_CPU_MODE_FIQ) {
@@ -7919,7 +10800,8 @@ void aarch64_sync_32_to_64(CPUARMState *env)
}
}
- /* Registers x13-x23 are the various mode SP and FP registers. Registers
+ /*
+ * Registers x13-x23 are the various mode SP and FP registers. Registers
* r13 and r14 are only copied if we are in that mode, otherwise we copy
* from the mode banked register.
*/
@@ -7932,7 +10814,7 @@ void aarch64_sync_32_to_64(CPUARMState *env)
if (mode == ARM_CPU_MODE_HYP) {
env->xregs[14] = env->regs[14];
} else {
- env->xregs[14] = env->banked_r14[bank_number(ARM_CPU_MODE_USR)];
+ env->xregs[14] = env->banked_r14[r14_bank_number(ARM_CPU_MODE_USR)];
}
}
@@ -7946,7 +10828,7 @@ void aarch64_sync_32_to_64(CPUARMState *env)
env->xregs[16] = env->regs[14];
env->xregs[17] = env->regs[13];
} else {
- env->xregs[16] = env->banked_r14[bank_number(ARM_CPU_MODE_IRQ)];
+ env->xregs[16] = env->banked_r14[r14_bank_number(ARM_CPU_MODE_IRQ)];
env->xregs[17] = env->banked_r13[bank_number(ARM_CPU_MODE_IRQ)];
}
@@ -7954,7 +10836,7 @@ void aarch64_sync_32_to_64(CPUARMState *env)
env->xregs[18] = env->regs[14];
env->xregs[19] = env->regs[13];
} else {
- env->xregs[18] = env->banked_r14[bank_number(ARM_CPU_MODE_SVC)];
+ env->xregs[18] = env->banked_r14[r14_bank_number(ARM_CPU_MODE_SVC)];
env->xregs[19] = env->banked_r13[bank_number(ARM_CPU_MODE_SVC)];
}
@@ -7962,7 +10844,7 @@ void aarch64_sync_32_to_64(CPUARMState *env)
env->xregs[20] = env->regs[14];
env->xregs[21] = env->regs[13];
} else {
- env->xregs[20] = env->banked_r14[bank_number(ARM_CPU_MODE_ABT)];
+ env->xregs[20] = env->banked_r14[r14_bank_number(ARM_CPU_MODE_ABT)];
env->xregs[21] = env->banked_r13[bank_number(ARM_CPU_MODE_ABT)];
}
@@ -7970,11 +10852,12 @@ void aarch64_sync_32_to_64(CPUARMState *env)
env->xregs[22] = env->regs[14];
env->xregs[23] = env->regs[13];
} else {
- env->xregs[22] = env->banked_r14[bank_number(ARM_CPU_MODE_UND)];
+ env->xregs[22] = env->banked_r14[r14_bank_number(ARM_CPU_MODE_UND)];
env->xregs[23] = env->banked_r13[bank_number(ARM_CPU_MODE_UND)];
}
- /* Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ
+ /*
+ * Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ
* mode, then we can copy from r8-r14. Otherwise, we copy from the
* FIQ bank for r8-r14.
*/
@@ -7987,13 +10870,14 @@ void aarch64_sync_32_to_64(CPUARMState *env)
env->xregs[i] = env->fiq_regs[i - 24];
}
env->xregs[29] = env->banked_r13[bank_number(ARM_CPU_MODE_FIQ)];
- env->xregs[30] = env->banked_r14[bank_number(ARM_CPU_MODE_FIQ)];
+ env->xregs[30] = env->banked_r14[r14_bank_number(ARM_CPU_MODE_FIQ)];
}
env->pc = env->regs[15];
}
-/* Function used to synchronize QEMU's AArch32 register set with AArch64
+/*
+ * Function used to synchronize QEMU's AArch32 register set with AArch64
* register set. This is necessary when switching between AArch32 and AArch64
* execution state.
*/
@@ -8007,7 +10891,8 @@ void aarch64_sync_64_to_32(CPUARMState *env)
env->regs[i] = env->xregs[i];
}
- /* Unless we are in FIQ mode, r8-r12 come from the user registers x8-x12.
+ /*
+ * Unless we are in FIQ mode, r8-r12 come from the user registers x8-x12.
* Otherwise, we copy x8-x12 into the banked user regs.
*/
if (mode == ARM_CPU_MODE_FIQ) {
@@ -8020,7 +10905,8 @@ void aarch64_sync_64_to_32(CPUARMState *env)
}
}
- /* Registers r13 & r14 depend on the current mode.
+ /*
+ * Registers r13 & r14 depend on the current mode.
* If we are in a given mode, we copy the corresponding x registers to r13
* and r14. Otherwise, we copy the x register to the banked r13 and r14
* for the mode.
@@ -8031,13 +10917,14 @@ void aarch64_sync_64_to_32(CPUARMState *env)
} else {
env->banked_r13[bank_number(ARM_CPU_MODE_USR)] = env->xregs[13];
- /* HYP is an exception in that it does not have its own banked r14 but
+ /*
+ * HYP is an exception in that it does not have its own banked r14 but
* shares the USR r14
*/
if (mode == ARM_CPU_MODE_HYP) {
env->regs[14] = env->xregs[14];
} else {
- env->banked_r14[bank_number(ARM_CPU_MODE_USR)] = env->xregs[14];
+ env->banked_r14[r14_bank_number(ARM_CPU_MODE_USR)] = env->xregs[14];
}
}
@@ -8051,7 +10938,7 @@ void aarch64_sync_64_to_32(CPUARMState *env)
env->regs[14] = env->xregs[16];
env->regs[13] = env->xregs[17];
} else {
- env->banked_r14[bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[16];
+ env->banked_r14[r14_bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[16];
env->banked_r13[bank_number(ARM_CPU_MODE_IRQ)] = env->xregs[17];
}
@@ -8059,7 +10946,7 @@ void aarch64_sync_64_to_32(CPUARMState *env)
env->regs[14] = env->xregs[18];
env->regs[13] = env->xregs[19];
} else {
- env->banked_r14[bank_number(ARM_CPU_MODE_SVC)] = env->xregs[18];
+ env->banked_r14[r14_bank_number(ARM_CPU_MODE_SVC)] = env->xregs[18];
env->banked_r13[bank_number(ARM_CPU_MODE_SVC)] = env->xregs[19];
}
@@ -8067,7 +10954,7 @@ void aarch64_sync_64_to_32(CPUARMState *env)
env->regs[14] = env->xregs[20];
env->regs[13] = env->xregs[21];
} else {
- env->banked_r14[bank_number(ARM_CPU_MODE_ABT)] = env->xregs[20];
+ env->banked_r14[r14_bank_number(ARM_CPU_MODE_ABT)] = env->xregs[20];
env->banked_r13[bank_number(ARM_CPU_MODE_ABT)] = env->xregs[21];
}
@@ -8075,11 +10962,12 @@ void aarch64_sync_64_to_32(CPUARMState *env)
env->regs[14] = env->xregs[22];
env->regs[13] = env->xregs[23];
} else {
- env->banked_r14[bank_number(ARM_CPU_MODE_UND)] = env->xregs[22];
+ env->banked_r14[r14_bank_number(ARM_CPU_MODE_UND)] = env->xregs[22];
env->banked_r13[bank_number(ARM_CPU_MODE_UND)] = env->xregs[23];
}
- /* Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ
+ /*
+ * Registers x24-x30 are mapped to r8-r14 in FIQ mode. If we are in FIQ
* mode, then we can copy to r8-r14. Otherwise, we copy to the
* FIQ bank for r8-r14.
*/
@@ -8092,7 +10980,7 @@ void aarch64_sync_64_to_32(CPUARMState *env)
env->fiq_regs[i - 24] = env->xregs[i];
}
env->banked_r13[bank_number(ARM_CPU_MODE_FIQ)] = env->xregs[29];
- env->banked_r14[bank_number(ARM_CPU_MODE_FIQ)] = env->xregs[30];
+ env->banked_r14[r14_bank_number(ARM_CPU_MODE_FIQ)] = env->xregs[30];
}
env->regs[15] = env->pc;
@@ -8102,31 +10990,65 @@ static void take_aarch32_exception(CPUARMState *env, int new_mode,
uint32_t mask, uint32_t offset,
uint32_t newpc)
{
+ int new_el;
+
/* Change the CPU state so as to actually take the exception. */
switch_mode(env, new_mode);
+
/*
* For exceptions taken to AArch32 we must clear the SS bit in both
* PSTATE and in the old-state value we save to SPSR_<mode>, so zero it now.
*/
- env->uncached_cpsr &= ~PSTATE_SS;
+ env->pstate &= ~PSTATE_SS;
env->spsr = cpsr_read(env);
/* Clear IT bits. */
env->condexec_bits = 0;
/* Switch to the new mode, and to the correct instruction set. */
env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
+
+ /* This must be after mode switching. */
+ new_el = arm_current_el(env);
+
/* Set new mode endianness */
env->uncached_cpsr &= ~CPSR_E;
- if (env->cp15.sctlr_el[arm_current_el(env)] & SCTLR_EE) {
+ if (env->cp15.sctlr_el[new_el] & SCTLR_EE) {
env->uncached_cpsr |= CPSR_E;
}
/* J and IL must always be cleared for exception entry */
env->uncached_cpsr &= ~(CPSR_IL | CPSR_J);
env->daif |= mask;
+ if (cpu_isar_feature(aa32_ssbs, env_archcpu(env))) {
+ if (env->cp15.sctlr_el[new_el] & SCTLR_DSSBS_32) {
+ env->uncached_cpsr |= CPSR_SSBS;
+ } else {
+ env->uncached_cpsr &= ~CPSR_SSBS;
+ }
+ }
+
if (new_mode == ARM_CPU_MODE_HYP) {
env->thumb = (env->cp15.sctlr_el[2] & SCTLR_TE) != 0;
env->elr_el[2] = env->regs[15];
} else {
+ /* CPSR.PAN is normally preserved preserved unless... */
+ if (cpu_isar_feature(aa32_pan, env_archcpu(env))) {
+ switch (new_el) {
+ case 3:
+ if (!arm_is_secure_below_el3(env)) {
+ /* ... the target is EL3, from non-secure state. */
+ env->uncached_cpsr &= ~CPSR_PAN;
+ break;
+ }
+ /* ... the target is EL3, from secure state ... */
+ /* fall through */
+ case 1:
+ /* ... the target is EL1 and SCTLR.SPAN is 0. */
+ if (!(env->cp15.sctlr_el[new_el] & SCTLR_SPAN)) {
+ env->uncached_cpsr |= CPSR_PAN;
+ }
+ break;
+ }
+ }
/*
* this is a lie, as there was no c1_sys on V4T/V5, but who cares
* and we should just guard the thumb mode on V4
@@ -8138,6 +11060,10 @@ static void take_aarch32_exception(CPUARMState *env, int new_mode,
env->regs[14] = env->regs[15] + offset;
}
env->regs[15] = newpc;
+
+ if (tcg_enabled()) {
+ arm_rebuild_hflags(env);
+ }
}
static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
@@ -8148,7 +11074,7 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
* separately here.
*
* The vector table entry used is always the 0x14 Hyp mode entry point,
- * unless this is an UNDEF/HVC/abort taken from Hyp to Hyp.
+ * unless this is an UNDEF/SVC/HVC/abort taken from Hyp to Hyp.
* The offset applied to the preferred return address is always zero
* (see DDI0487C.a section G1.12.3).
* PSTATE A/I/F masks are set based only on the SCR.EA/IRQ/FIQ values.
@@ -8162,7 +11088,7 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
addr = 0x04;
break;
case EXCP_SWI:
- addr = 0x14;
+ addr = 0x08;
break;
case EXCP_BKPT:
/* Fall through to prefetch abort. */
@@ -8189,11 +11115,25 @@ static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
break;
case EXCP_HYP_TRAP:
addr = 0x14;
+ break;
default:
cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
}
if (cs->exception_index != EXCP_IRQ && cs->exception_index != EXCP_FIQ) {
+ if (!arm_feature(env, ARM_FEATURE_V8)) {
+ /*
+ * QEMU syndrome values are v8-style. v7 has the IL bit
+ * UNK/SBZP for "field not valid" cases, where v8 uses RES1.
+ * If this is a v7 CPU, squash the IL bit in those cases.
+ */
+ if (cs->exception_index == EXCP_PREFETCH_ABORT ||
+ (cs->exception_index == EXCP_DATA_ABORT &&
+ !(env->exception.syndrome & ARM_EL_ISV)) ||
+ syn_get_ec(env->exception.syndrome) == EC_UNCATEGORIZED) {
+ env->exception.syndrome &= ~ARM_EL_IL;
+ }
+ }
env->cp15.esr_el[2] = env->exception.syndrome;
}
@@ -8228,7 +11168,7 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
uint32_t moe;
/* If this is a debug exception we must update the DBGDSCR.MOE bits */
- switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) {
+ switch (syn_get_ec(env->exception.syndrome)) {
case EC_BREAKPOINT:
case EC_BREAKPOINT_SAME_EL:
moe = 1;
@@ -8253,20 +11193,38 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
}
if (env->exception.target_el == 2) {
+ /* Debug exceptions are reported differently on AArch32 */
+ switch (syn_get_ec(env->exception.syndrome)) {
+ case EC_BREAKPOINT:
+ case EC_BREAKPOINT_SAME_EL:
+ case EC_AA32_BKPT:
+ case EC_VECTORCATCH:
+ env->exception.syndrome = syn_insn_abort(arm_current_el(env) == 2,
+ 0, 0, 0x22);
+ break;
+ case EC_WATCHPOINT:
+ env->exception.syndrome = syn_set_ec(env->exception.syndrome,
+ EC_DATAABORT);
+ break;
+ case EC_WATCHPOINT_SAME_EL:
+ env->exception.syndrome = syn_set_ec(env->exception.syndrome,
+ EC_DATAABORT_SAME_EL);
+ break;
+ }
arm_cpu_do_interrupt_aarch32_hyp(cs);
return;
}
- /* TODO: Vectored interrupt controller. */
switch (cs->exception_index) {
case EXCP_UDEF:
new_mode = ARM_CPU_MODE_UND;
addr = 0x04;
mask = CPSR_I;
- if (env->thumb)
+ if (env->thumb) {
offset = 2;
- else
+ } else {
offset = 4;
+ }
break;
case EXCP_SWI:
new_mode = ARM_CPU_MODE_SVC;
@@ -8335,6 +11293,31 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
mask = CPSR_A | CPSR_I | CPSR_F;
offset = 4;
break;
+ case EXCP_VSERR:
+ {
+ /*
+ * Note that this is reported as a data abort, but the DFAR
+ * has an UNKNOWN value. Construct the SError syndrome from
+ * AET and ExT fields.
+ */
+ ARMMMUFaultInfo fi = { .type = ARMFault_AsyncExternal, };
+
+ if (extended_addresses_enabled(env)) {
+ env->exception.fsr = arm_fi_to_lfsc(&fi);
+ } else {
+ env->exception.fsr = arm_fi_to_sfsc(&fi);
+ }
+ env->exception.fsr |= env->cp15.vsesr_el2 & 0xd000;
+ A32_BANKED_CURRENT_REG_SET(env, dfsr, env->exception.fsr);
+ qemu_log_mask(CPU_LOG_INT, "...with IFSR 0x%x\n",
+ env->exception.fsr);
+
+ new_mode = ARM_CPU_MODE_ABT;
+ addr = 0x10;
+ mask = CPSR_A | CPSR_I;
+ offset = 8;
+ }
+ break;
case EXCP_SMC:
new_mode = ARM_CPU_MODE_MON;
addr = 0x08;
@@ -8352,7 +11335,8 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
/* High vectors. When enabled, base address cannot be remapped. */
addr += 0xffff0000;
} else {
- /* ARM v7 architectures provide a vector base address register to remap
+ /*
+ * ARM v7 architectures provide a vector base address register to remap
* the interrupt vector table.
* This register is only followed in non-monitor mode, and is banked.
* Note: only bits 31:5 are valid.
@@ -8367,6 +11351,106 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
take_aarch32_exception(env, new_mode, mask, offset, addr);
}
+static int aarch64_regnum(CPUARMState *env, int aarch32_reg)
+{
+ /*
+ * Return the register number of the AArch64 view of the AArch32
+ * register @aarch32_reg. The CPUARMState CPSR is assumed to still
+ * be that of the AArch32 mode the exception came from.
+ */
+ int mode = env->uncached_cpsr & CPSR_M;
+
+ switch (aarch32_reg) {
+ case 0 ... 7:
+ return aarch32_reg;
+ case 8 ... 12:
+ return mode == ARM_CPU_MODE_FIQ ? aarch32_reg + 16 : aarch32_reg;
+ case 13:
+ switch (mode) {
+ case ARM_CPU_MODE_USR:
+ case ARM_CPU_MODE_SYS:
+ return 13;
+ case ARM_CPU_MODE_HYP:
+ return 15;
+ case ARM_CPU_MODE_IRQ:
+ return 17;
+ case ARM_CPU_MODE_SVC:
+ return 19;
+ case ARM_CPU_MODE_ABT:
+ return 21;
+ case ARM_CPU_MODE_UND:
+ return 23;
+ case ARM_CPU_MODE_FIQ:
+ return 29;
+ default:
+ g_assert_not_reached();
+ }
+ case 14:
+ switch (mode) {
+ case ARM_CPU_MODE_USR:
+ case ARM_CPU_MODE_SYS:
+ case ARM_CPU_MODE_HYP:
+ return 14;
+ case ARM_CPU_MODE_IRQ:
+ return 16;
+ case ARM_CPU_MODE_SVC:
+ return 18;
+ case ARM_CPU_MODE_ABT:
+ return 20;
+ case ARM_CPU_MODE_UND:
+ return 22;
+ case ARM_CPU_MODE_FIQ:
+ return 30;
+ default:
+ g_assert_not_reached();
+ }
+ case 15:
+ return 31;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env)
+{
+ uint32_t ret = cpsr_read(env);
+
+ /* Move DIT to the correct location for SPSR_ELx */
+ if (ret & CPSR_DIT) {
+ ret &= ~CPSR_DIT;
+ ret |= PSTATE_DIT;
+ }
+ /* Merge PSTATE.SS into SPSR_ELx */
+ ret |= env->pstate & PSTATE_SS;
+
+ return ret;
+}
+
+static bool syndrome_is_sync_extabt(uint32_t syndrome)
+{
+ /* Return true if this syndrome value is a synchronous external abort */
+ switch (syn_get_ec(syndrome)) {
+ case EC_INSNABORT:
+ case EC_INSNABORT_SAME_EL:
+ case EC_DATAABORT:
+ case EC_DATAABORT_SAME_EL:
+ /* Look at fault status code for all the synchronous ext abort cases */
+ switch (syndrome & 0x3f) {
+ case 0x10:
+ case 0x13:
+ case 0x14:
+ case 0x15:
+ case 0x16:
+ case 0x17:
+ return true;
+ default:
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
/* Handle exception entry to a target EL which is using AArch64 */
static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
{
@@ -8375,27 +11459,37 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
unsigned int new_el = env->exception.target_el;
target_ulong addr = env->cp15.vbar_el[new_el];
unsigned int new_mode = aarch64_pstate_mode(new_el, true);
+ unsigned int old_mode;
unsigned int cur_el = arm_current_el(env);
+ int rt;
- /*
- * Note that new_el can never be 0. If cur_el is 0, then
- * el0_a64 is is_a64(), else el0_a64 is ignored.
- */
- aarch64_sve_change_el(env, cur_el, new_el, is_a64(env));
+ if (tcg_enabled()) {
+ /*
+ * Note that new_el can never be 0. If cur_el is 0, then
+ * el0_a64 is is_a64(), else el0_a64 is ignored.
+ */
+ aarch64_sve_change_el(env, cur_el, new_el, is_a64(env));
+ }
if (cur_el < new_el) {
- /* Entry vector offset depends on whether the implemented EL
+ /*
+ * Entry vector offset depends on whether the implemented EL
* immediately lower than the target level is using AArch32 or AArch64
*/
bool is_aa64;
+ uint64_t hcr;
switch (new_el) {
case 3:
is_aa64 = (env->cp15.scr_el3 & SCR_RW) != 0;
break;
case 2:
- is_aa64 = (env->cp15.hcr_el2 & HCR_RW) != 0;
- break;
+ hcr = arm_hcr_el2_eff(env);
+ if ((hcr & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
+ is_aa64 = (hcr & HCR_RW) != 0;
+ break;
+ }
+ /* fall through */
case 1:
is_aa64 = is_a64(env);
break;
@@ -8413,8 +11507,20 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
}
switch (cs->exception_index) {
+ case EXCP_GPC:
+ qemu_log_mask(CPU_LOG_INT, "...with MFAR 0x%" PRIx64 "\n",
+ env->cp15.mfar_el3);
+ /* fall through */
case EXCP_PREFETCH_ABORT:
case EXCP_DATA_ABORT:
+ /*
+ * FEAT_DoubleFault allows synchronous external aborts taken to EL3
+ * to be taken to the SError vector entrypoint.
+ */
+ if (new_el == 3 && (env->cp15.scr_el3 & SCR_EASE) &&
+ syndrome_is_sync_extabt(env->exception.syndrome)) {
+ addr += 0x180;
+ }
env->cp15.far_el[new_el] = env->exception.vaddress;
qemu_log_mask(CPU_LOG_INT, "...with FAR 0x%" PRIx64 "\n",
env->cp15.far_el[new_el]);
@@ -8425,6 +11531,44 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
case EXCP_HVC:
case EXCP_HYP_TRAP:
case EXCP_SMC:
+ switch (syn_get_ec(env->exception.syndrome)) {
+ case EC_ADVSIMDFPACCESSTRAP:
+ /*
+ * QEMU internal FP/SIMD syndromes from AArch32 include the
+ * TA and coproc fields which are only exposed if the exception
+ * is taken to AArch32 Hyp mode. Mask them out to get a valid
+ * AArch64 format syndrome.
+ */
+ env->exception.syndrome &= ~MAKE_64BIT_MASK(0, 20);
+ break;
+ case EC_CP14RTTRAP:
+ case EC_CP15RTTRAP:
+ case EC_CP14DTTRAP:
+ /*
+ * For a trap on AArch32 MRC/MCR/LDC/STC the Rt field is currently
+ * the raw register field from the insn; when taking this to
+ * AArch64 we must convert it to the AArch64 view of the register
+ * number. Notice that we read a 4-bit AArch32 register number and
+ * write back a 5-bit AArch64 one.
+ */
+ rt = extract32(env->exception.syndrome, 5, 4);
+ rt = aarch64_regnum(env, rt);
+ env->exception.syndrome = deposit32(env->exception.syndrome,
+ 5, 5, rt);
+ break;
+ case EC_CP15RRTTRAP:
+ case EC_CP14RRTTRAP:
+ /* Similarly for MRRC/MCRR traps for Rt and Rt2 fields */
+ rt = extract32(env->exception.syndrome, 5, 4);
+ rt = aarch64_regnum(env, rt);
+ env->exception.syndrome = deposit32(env->exception.syndrome,
+ 5, 5, rt);
+ rt = extract32(env->exception.syndrome, 10, 4);
+ rt = aarch64_regnum(env, rt);
+ env->exception.syndrome = deposit32(env->exception.syndrome,
+ 10, 5, rt);
+ break;
+ }
env->cp15.esr_el[new_el] = env->exception.syndrome;
break;
case EXCP_IRQ:
@@ -8435,124 +11579,132 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
case EXCP_VFIQ:
addr += 0x100;
break;
- case EXCP_SEMIHOST:
- qemu_log_mask(CPU_LOG_INT,
- "...handling as semihosting call 0x%" PRIx64 "\n",
- env->xregs[0]);
- env->xregs[0] = do_arm_semihosting(env);
- return;
+ case EXCP_VSERR:
+ addr += 0x180;
+ /* Construct the SError syndrome from IDS and ISS fields. */
+ env->exception.syndrome = syn_serror(env->cp15.vsesr_el2 & 0x1ffffff);
+ env->cp15.esr_el[new_el] = env->exception.syndrome;
+ break;
default:
cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
}
if (is_a64(env)) {
- env->banked_spsr[aarch64_banked_spsr_index(new_el)] = pstate_read(env);
+ old_mode = pstate_read(env);
aarch64_save_sp(env, arm_current_el(env));
env->elr_el[new_el] = env->pc;
+
+ if (cur_el == 1 && new_el == 1) {
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ if ((hcr & (HCR_NV | HCR_NV1 | HCR_NV2)) == HCR_NV ||
+ (hcr & (HCR_NV | HCR_NV2)) == (HCR_NV | HCR_NV2)) {
+ /*
+ * FEAT_NV, FEAT_NV2 may need to report EL2 in the SPSR
+ * by setting M[3:2] to 0b10.
+ * If NV2 is disabled, change SPSR when NV,NV1 == 1,0 (I_ZJRNN)
+ * If NV2 is enabled, change SPSR when NV is 1 (I_DBTLM)
+ */
+ old_mode = deposit32(old_mode, 2, 2, 2);
+ }
+ }
} else {
- env->banked_spsr[aarch64_banked_spsr_index(new_el)] = cpsr_read(env);
+ old_mode = cpsr_read_for_spsr_elx(env);
env->elr_el[new_el] = env->regs[15];
aarch64_sync_32_to_64(env);
env->condexec_bits = 0;
}
+ env->banked_spsr[aarch64_banked_spsr_index(new_el)] = old_mode;
+
+ qemu_log_mask(CPU_LOG_INT, "...with SPSR 0x%x\n", old_mode);
qemu_log_mask(CPU_LOG_INT, "...with ELR 0x%" PRIx64 "\n",
env->elr_el[new_el]);
+ if (cpu_isar_feature(aa64_pan, cpu)) {
+ /* The value of PSTATE.PAN is normally preserved, except when ... */
+ new_mode |= old_mode & PSTATE_PAN;
+ switch (new_el) {
+ case 2:
+ /* ... the target is EL2 with HCR_EL2.{E2H,TGE} == '11' ... */
+ if ((arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE))
+ != (HCR_E2H | HCR_TGE)) {
+ break;
+ }
+ /* fall through */
+ case 1:
+ /* ... the target is EL1 ... */
+ /* ... and SCTLR_ELx.SPAN == 0, then set to 1. */
+ if ((env->cp15.sctlr_el[new_el] & SCTLR_SPAN) == 0) {
+ new_mode |= PSTATE_PAN;
+ }
+ break;
+ }
+ }
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ new_mode |= PSTATE_TCO;
+ }
+
+ if (cpu_isar_feature(aa64_ssbs, cpu)) {
+ if (env->cp15.sctlr_el[new_el] & SCTLR_DSSBS_64) {
+ new_mode |= PSTATE_SSBS;
+ } else {
+ new_mode &= ~PSTATE_SSBS;
+ }
+ }
+
pstate_write(env, PSTATE_DAIF | new_mode);
- env->aarch64 = 1;
+ env->aarch64 = true;
aarch64_restore_sp(env, new_el);
+ if (tcg_enabled()) {
+ helper_rebuild_hflags_a64(env, new_el);
+ }
+
env->pc = addr;
qemu_log_mask(CPU_LOG_INT, "...to EL%d PC 0x%" PRIx64 " PSTATE 0x%x\n",
new_el, env->pc, pstate_read(env));
}
-static inline bool check_for_semihosting(CPUState *cs)
+/*
+ * Do semihosting call and set the appropriate return value. All the
+ * permission and validity checks have been done at translate time.
+ *
+ * We only see semihosting exceptions in TCG only as they are not
+ * trapped to the hypervisor in KVM.
+ */
+#ifdef CONFIG_TCG
+static void tcg_handle_semihosting(CPUState *cs)
{
- /* Check whether this exception is a semihosting call; if so
- * then handle it and return true; otherwise return false.
- */
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
if (is_a64(env)) {
- if (cs->exception_index == EXCP_SEMIHOST) {
- /* This is always the 64-bit semihosting exception.
- * The "is this usermode" and "is semihosting enabled"
- * checks have been done at translate time.
- */
- qemu_log_mask(CPU_LOG_INT,
- "...handling as semihosting call 0x%" PRIx64 "\n",
- env->xregs[0]);
- env->xregs[0] = do_arm_semihosting(env);
- return true;
- }
- return false;
+ qemu_log_mask(CPU_LOG_INT,
+ "...handling as semihosting call 0x%" PRIx64 "\n",
+ env->xregs[0]);
+ do_common_semihosting(cs);
+ env->pc += 4;
} else {
- uint32_t imm;
-
- /* Only intercept calls from privileged modes, to provide some
- * semblance of security.
- */
- if (cs->exception_index != EXCP_SEMIHOST &&
- (!semihosting_enabled() ||
- ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR))) {
- return false;
- }
-
- switch (cs->exception_index) {
- case EXCP_SEMIHOST:
- /* This is always a semihosting call; the "is this usermode"
- * and "is semihosting enabled" checks have been done at
- * translate time.
- */
- break;
- case EXCP_SWI:
- /* Check for semihosting interrupt. */
- if (env->thumb) {
- imm = arm_lduw_code(env, env->regs[15] - 2, arm_sctlr_b(env))
- & 0xff;
- if (imm == 0xab) {
- break;
- }
- } else {
- imm = arm_ldl_code(env, env->regs[15] - 4, arm_sctlr_b(env))
- & 0xffffff;
- if (imm == 0x123456) {
- break;
- }
- }
- return false;
- case EXCP_BKPT:
- /* See if this is a semihosting syscall. */
- if (env->thumb) {
- imm = arm_lduw_code(env, env->regs[15], arm_sctlr_b(env))
- & 0xff;
- if (imm == 0xab) {
- env->regs[15] += 2;
- break;
- }
- }
- return false;
- default:
- return false;
- }
-
qemu_log_mask(CPU_LOG_INT,
"...handling as semihosting call 0x%x\n",
env->regs[0]);
- env->regs[0] = do_arm_semihosting(env);
- return true;
+ do_common_semihosting(cs);
+ env->regs[15] += env->thumb ? 2 : 4;
}
}
+#endif
-/* Handle a CPU exception for A and R profile CPUs.
+/*
+ * Handle a CPU exception for A and R profile CPUs.
* Do any appropriate logging, handle PSCI calls, and then hand off
* to the AArch64-entry or AArch32-entry function depending on the
* target exception level's register width.
+ *
+ * Note: this is used for both TCG (as the do_interrupt tcg op),
+ * and KVM to re-inject guest debug exceptions, and to
+ * inject a Synchronous-External-Abort.
*/
void arm_cpu_do_interrupt(CPUState *cs)
{
@@ -8562,35 +11714,40 @@ void arm_cpu_do_interrupt(CPUState *cs)
assert(!arm_feature(env, ARM_FEATURE_M));
- arm_log_exception(cs->exception_index);
+ arm_log_exception(cs);
qemu_log_mask(CPU_LOG_INT, "...from EL%d to EL%d\n", arm_current_el(env),
new_el);
if (qemu_loglevel_mask(CPU_LOG_INT)
&& !excp_is_internal(cs->exception_index)) {
qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%x/0x%" PRIx32 "\n",
- env->exception.syndrome >> ARM_EL_EC_SHIFT,
+ syn_get_ec(env->exception.syndrome),
env->exception.syndrome);
}
- if (arm_is_psci_call(cpu, cs->exception_index)) {
+ if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) {
arm_handle_psci_call(cpu);
qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n");
return;
}
- /* Semihosting semantics depend on the register width of the
- * code that caused the exception, not the target exception level,
- * so must be handled here.
+ /*
+ * Semihosting semantics depend on the register width of the code
+ * that caused the exception, not the target exception level, so
+ * must be handled here.
*/
- if (check_for_semihosting(cs)) {
+#ifdef CONFIG_TCG
+ if (cs->exception_index == EXCP_SEMIHOST) {
+ tcg_handle_semihosting(cs);
return;
}
+#endif
- /* Hooks may change global state so BQL should be held, also the
+ /*
+ * Hooks may change global state so BQL should be held, also the
* BQL needs to be held for any modification of
* cs->interrupt_request.
*/
- g_assert(qemu_mutex_iothread_locked());
+ g_assert(bql_locked());
arm_call_pre_el_change_hook(cpu);
@@ -8607,2696 +11764,282 @@ void arm_cpu_do_interrupt(CPUState *cs)
cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
}
}
+#endif /* !CONFIG_USER_ONLY */
-/* Return the exception level which controls this address translation regime */
-static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- switch (mmu_idx) {
- case ARMMMUIdx_S2NS:
- case ARMMMUIdx_S1E2:
- return 2;
- case ARMMMUIdx_S1E3:
- return 3;
- case ARMMMUIdx_S1SE0:
- return arm_el_is_aa64(env, 3) ? 1 : 3;
- case ARMMMUIdx_S1SE1:
- case ARMMMUIdx_S1NSE0:
- case ARMMMUIdx_S1NSE1:
- case ARMMMUIdx_MPrivNegPri:
- case ARMMMUIdx_MUserNegPri:
- case ARMMMUIdx_MPriv:
- case ARMMMUIdx_MUser:
- case ARMMMUIdx_MSPrivNegPri:
- case ARMMMUIdx_MSUserNegPri:
- case ARMMMUIdx_MSPriv:
- case ARMMMUIdx_MSUser:
- return 1;
- default:
- g_assert_not_reached();
- }
-}
-
-/* Return the SCTLR value which controls this address translation regime */
-static inline uint32_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- return env->cp15.sctlr_el[regime_el(env, mmu_idx)];
-}
-
-/* Return true if the specified stage of address translation is disabled */
-static inline bool regime_translation_disabled(CPUARMState *env,
- ARMMMUIdx mmu_idx)
+uint64_t arm_sctlr(CPUARMState *env, int el)
{
- if (arm_feature(env, ARM_FEATURE_M)) {
- switch (env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)] &
- (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) {
- case R_V7M_MPU_CTRL_ENABLE_MASK:
- /* Enabled, but not for HardFault and NMI */
- return mmu_idx & ARM_MMU_IDX_M_NEGPRI;
- case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK:
- /* Enabled for all cases */
- return false;
- case 0:
- default:
- /* HFNMIENA set and ENABLE clear is UNPREDICTABLE, but
- * we warned about that in armv7m_nvic.c when the guest set it.
- */
- return true;
- }
- }
-
- if (mmu_idx == ARMMMUIdx_S2NS) {
- return (env->cp15.hcr_el2 & HCR_VM) == 0;
- }
-
- if (env->cp15.hcr_el2 & HCR_TGE) {
- /* TGE means that NS EL0/1 act as if SCTLR_EL1.M is zero */
- if (!regime_is_secure(env, mmu_idx) && regime_el(env, mmu_idx) == 1) {
- return true;
- }
- }
-
- return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0;
-}
-
-static inline bool regime_translation_big_endian(CPUARMState *env,
- ARMMMUIdx mmu_idx)
-{
- return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
-}
-
-/* Return the TCR controlling this translation regime */
-static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- if (mmu_idx == ARMMMUIdx_S2NS) {
- return &env->cp15.vtcr_el2;
- }
- return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
-}
-
-/* Convert a possible stage1+2 MMU index into the appropriate
- * stage 1 MMU index
- */
-static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
-{
- if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
- mmu_idx += (ARMMMUIdx_S1NSE0 - ARMMMUIdx_S12NSE0);
+ /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */
+ if (el == 0) {
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0);
+ el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1;
}
- return mmu_idx;
+ return env->cp15.sctlr_el[el];
}
-/* Returns TBI0 value for current regime el */
-uint32_t arm_regime_tbi0(CPUARMState *env, ARMMMUIdx mmu_idx)
+int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx)
{
- TCR *tcr;
- uint32_t el;
-
- /* For EL0 and EL1, TBI is controlled by stage 1's TCR, so convert
- * a stage 1+2 mmu index into the appropriate stage 1 mmu index.
- */
- mmu_idx = stage_1_mmu_idx(mmu_idx);
-
- tcr = regime_tcr(env, mmu_idx);
- el = regime_el(env, mmu_idx);
-
- if (el > 1) {
- return extract64(tcr->raw_tcr, 20, 1);
+ if (regime_has_2_ranges(mmu_idx)) {
+ return extract64(tcr, 37, 2);
+ } else if (regime_is_stage2(mmu_idx)) {
+ return 0; /* VTCR_EL2 */
} else {
- return extract64(tcr->raw_tcr, 37, 1);
+ /* Replicate the single TBI bit so we always have 2 bits. */
+ return extract32(tcr, 20, 1) * 3;
}
}
-/* Returns TBI1 value for current regime el */
-uint32_t arm_regime_tbi1(CPUARMState *env, ARMMMUIdx mmu_idx)
+int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
{
- TCR *tcr;
- uint32_t el;
-
- /* For EL0 and EL1, TBI is controlled by stage 1's TCR, so convert
- * a stage 1+2 mmu index into the appropriate stage 1 mmu index.
- */
- mmu_idx = stage_1_mmu_idx(mmu_idx);
-
- tcr = regime_tcr(env, mmu_idx);
- el = regime_el(env, mmu_idx);
-
- if (el > 1) {
- return 0;
+ if (regime_has_2_ranges(mmu_idx)) {
+ return extract64(tcr, 51, 2);
+ } else if (regime_is_stage2(mmu_idx)) {
+ return 0; /* VTCR_EL2 */
} else {
- return extract64(tcr->raw_tcr, 38, 1);
+ /* Replicate the single TBID bit so we always have 2 bits. */
+ return extract32(tcr, 29, 1) * 3;
}
}
-/* Return the TTBR associated with this translation regime */
-static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
- int ttbrn)
+int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx)
{
- if (mmu_idx == ARMMMUIdx_S2NS) {
- return env->cp15.vttbr_el2;
- }
- if (ttbrn == 0) {
- return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
+ if (regime_has_2_ranges(mmu_idx)) {
+ return extract64(tcr, 57, 2);
} else {
- return env->cp15.ttbr1_el[regime_el(env, mmu_idx)];
- }
-}
-
-/* Return true if the translation regime is using LPAE format page tables */
-static inline bool regime_using_lpae_format(CPUARMState *env,
- ARMMMUIdx mmu_idx)
-{
- int el = regime_el(env, mmu_idx);
- if (el == 2 || arm_el_is_aa64(env, el)) {
- return true;
- }
- if (arm_feature(env, ARM_FEATURE_LPAE)
- && (regime_tcr(env, mmu_idx)->raw_tcr & TTBCR_EAE)) {
- return true;
- }
- return false;
-}
-
-/* Returns true if the stage 1 translation regime is using LPAE format page
- * tables. Used when raising alignment exceptions, whose FSR changes depending
- * on whether the long or short descriptor format is in use. */
-bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- mmu_idx = stage_1_mmu_idx(mmu_idx);
-
- return regime_using_lpae_format(env, mmu_idx);
-}
-
-static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- switch (mmu_idx) {
- case ARMMMUIdx_S1SE0:
- case ARMMMUIdx_S1NSE0:
- case ARMMMUIdx_MUser:
- case ARMMMUIdx_MSUser:
- case ARMMMUIdx_MUserNegPri:
- case ARMMMUIdx_MSUserNegPri:
- return true;
- default:
- return false;
- case ARMMMUIdx_S12NSE0:
- case ARMMMUIdx_S12NSE1:
- g_assert_not_reached();
+ /* Replicate the single TCMA bit so we always have 2 bits. */
+ return extract32(tcr, 30, 1) * 3;
}
}
-/* Translate section/page access permissions to page
- * R/W protection flags
- *
- * @env: CPUARMState
- * @mmu_idx: MMU index indicating required translation regime
- * @ap: The 3-bit access permissions (AP[2:0])
- * @domain_prot: The 2-bit domain access permissions
- */
-static inline int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
- int ap, int domain_prot)
+static ARMGranuleSize tg0_to_gran_size(int tg)
{
- bool is_user = regime_is_user(env, mmu_idx);
-
- if (domain_prot == 3) {
- return PAGE_READ | PAGE_WRITE;
- }
-
- switch (ap) {
+ switch (tg) {
case 0:
- if (arm_feature(env, ARM_FEATURE_V7)) {
- return 0;
- }
- switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) {
- case SCTLR_S:
- return is_user ? 0 : PAGE_READ;
- case SCTLR_R:
- return PAGE_READ;
- default:
- return 0;
- }
+ return Gran4K;
case 1:
- return is_user ? 0 : PAGE_READ | PAGE_WRITE;
+ return Gran64K;
case 2:
- if (is_user) {
- return PAGE_READ;
- } else {
- return PAGE_READ | PAGE_WRITE;
- }
- case 3:
- return PAGE_READ | PAGE_WRITE;
- case 4: /* Reserved. */
- return 0;
- case 5:
- return is_user ? 0 : PAGE_READ;
- case 6:
- return PAGE_READ;
- case 7:
- if (!arm_feature(env, ARM_FEATURE_V6K)) {
- return 0;
- }
- return PAGE_READ;
+ return Gran16K;
default:
- g_assert_not_reached();
+ return GranInvalid;
}
}
-/* Translate section/page access permissions to page
- * R/W protection flags.
- *
- * @ap: The 2-bit simple AP (AP[2:1])
- * @is_user: TRUE if accessing from PL0
- */
-static inline int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
+static ARMGranuleSize tg1_to_gran_size(int tg)
{
- switch (ap) {
- case 0:
- return is_user ? 0 : PAGE_READ | PAGE_WRITE;
+ switch (tg) {
case 1:
- return PAGE_READ | PAGE_WRITE;
+ return Gran16K;
case 2:
- return is_user ? 0 : PAGE_READ;
+ return Gran4K;
case 3:
- return PAGE_READ;
+ return Gran64K;
default:
- g_assert_not_reached();
- }
-}
-
-static inline int
-simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
-{
- return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
-}
-
-/* Translate S2 section/page access permissions to protection flags
- *
- * @env: CPUARMState
- * @s2ap: The 2-bit stage2 access permissions (S2AP)
- * @xn: XN (execute-never) bit
- */
-static int get_S2prot(CPUARMState *env, int s2ap, int xn)
-{
- int prot = 0;
-
- if (s2ap & 1) {
- prot |= PAGE_READ;
- }
- if (s2ap & 2) {
- prot |= PAGE_WRITE;
- }
- if (!xn) {
- if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
- prot |= PAGE_EXEC;
- }
- }
- return prot;
-}
-
-/* Translate section/page access permissions to protection flags
- *
- * @env: CPUARMState
- * @mmu_idx: MMU index indicating required translation regime
- * @is_aa64: TRUE if AArch64
- * @ap: The 2-bit simple AP (AP[2:1])
- * @ns: NS (non-secure) bit
- * @xn: XN (execute-never) bit
- * @pxn: PXN (privileged execute-never) bit
- */
-static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
- int ap, int ns, int xn, int pxn)
-{
- bool is_user = regime_is_user(env, mmu_idx);
- int prot_rw, user_rw;
- bool have_wxn;
- int wxn = 0;
-
- assert(mmu_idx != ARMMMUIdx_S2NS);
-
- user_rw = simple_ap_to_rw_prot_is_user(ap, true);
- if (is_user) {
- prot_rw = user_rw;
- } else {
- prot_rw = simple_ap_to_rw_prot_is_user(ap, false);
- }
-
- if (ns && arm_is_secure(env) && (env->cp15.scr_el3 & SCR_SIF)) {
- return prot_rw;
- }
-
- /* TODO have_wxn should be replaced with
- * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2)
- * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE
- * compatible processors have EL2, which is required for [U]WXN.
- */
- have_wxn = arm_feature(env, ARM_FEATURE_LPAE);
-
- if (have_wxn) {
- wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN;
- }
-
- if (is_aa64) {
- switch (regime_el(env, mmu_idx)) {
- case 1:
- if (!is_user) {
- xn = pxn || (user_rw & PAGE_WRITE);
- }
- break;
- case 2:
- case 3:
- break;
- }
- } else if (arm_feature(env, ARM_FEATURE_V7)) {
- switch (regime_el(env, mmu_idx)) {
- case 1:
- case 3:
- if (is_user) {
- xn = xn || !(user_rw & PAGE_READ);
- } else {
- int uwxn = 0;
- if (have_wxn) {
- uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN;
- }
- xn = xn || !(prot_rw & PAGE_READ) || pxn ||
- (uwxn && (user_rw & PAGE_WRITE));
- }
- break;
- case 2:
- break;
- }
- } else {
- xn = wxn = 0;
- }
-
- if (xn || (wxn && (prot_rw & PAGE_WRITE))) {
- return prot_rw;
- }
- return prot_rw | PAGE_EXEC;
-}
-
-static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
- uint32_t *table, uint32_t address)
-{
- /* Note that we can only get here for an AArch32 PL0/PL1 lookup */
- TCR *tcr = regime_tcr(env, mmu_idx);
-
- if (address & tcr->mask) {
- if (tcr->raw_tcr & TTBCR_PD1) {
- /* Translation table walk disabled for TTBR1 */
- return false;
- }
- *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000;
- } else {
- if (tcr->raw_tcr & TTBCR_PD0) {
- /* Translation table walk disabled for TTBR0 */
- return false;
- }
- *table = regime_ttbr(env, mmu_idx, 0) & tcr->base_mask;
- }
- *table |= (address >> 18) & 0x3ffc;
- return true;
-}
-
-/* Translate a S1 pagetable walk through S2 if needed. */
-static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
- hwaddr addr, MemTxAttrs txattrs,
- ARMMMUFaultInfo *fi)
-{
- if ((mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1) &&
- !regime_translation_disabled(env, ARMMMUIdx_S2NS)) {
- target_ulong s2size;
- hwaddr s2pa;
- int s2prot;
- int ret;
-
- ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa,
- &txattrs, &s2prot, &s2size, fi, NULL);
- if (ret) {
- assert(fi->type != ARMFault_None);
- fi->s2addr = addr;
- fi->stage2 = true;
- fi->s1ptw = true;
- return ~0;
- }
- addr = s2pa;
- }
- return addr;
-}
-
-/* All loads done in the course of a page table walk go through here. */
-static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
- ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- MemTxAttrs attrs = {};
- MemTxResult result = MEMTX_OK;
- AddressSpace *as;
- uint32_t data;
-
- attrs.secure = is_secure;
- as = arm_addressspace(cs, attrs);
- addr = S1_ptw_translate(env, mmu_idx, addr, attrs, fi);
- if (fi->s1ptw) {
- return 0;
- }
- if (regime_translation_big_endian(env, mmu_idx)) {
- data = address_space_ldl_be(as, addr, attrs, &result);
- } else {
- data = address_space_ldl_le(as, addr, attrs, &result);
- }
- if (result == MEMTX_OK) {
- return data;
- }
- fi->type = ARMFault_SyncExternalOnWalk;
- fi->ea = arm_extabort_type(result);
- return 0;
-}
-
-static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
- ARMMMUIdx mmu_idx, ARMMMUFaultInfo *fi)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- MemTxAttrs attrs = {};
- MemTxResult result = MEMTX_OK;
- AddressSpace *as;
- uint64_t data;
-
- attrs.secure = is_secure;
- as = arm_addressspace(cs, attrs);
- addr = S1_ptw_translate(env, mmu_idx, addr, attrs, fi);
- if (fi->s1ptw) {
- return 0;
- }
- if (regime_translation_big_endian(env, mmu_idx)) {
- data = address_space_ldq_be(as, addr, attrs, &result);
- } else {
- data = address_space_ldq_le(as, addr, attrs, &result);
- }
- if (result == MEMTX_OK) {
- return data;
- }
- fi->type = ARMFault_SyncExternalOnWalk;
- fi->ea = arm_extabort_type(result);
- return 0;
-}
-
-static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, int *prot,
- target_ulong *page_size,
- ARMMMUFaultInfo *fi)
-{
- CPUState *cs = CPU(arm_env_get_cpu(env));
- int level = 1;
- uint32_t table;
- uint32_t desc;
- int type;
- int ap;
- int domain = 0;
- int domain_prot;
- hwaddr phys_addr;
- uint32_t dacr;
-
- /* Pagetable walk. */
- /* Lookup l1 descriptor. */
- if (!get_level1_table_address(env, mmu_idx, &table, address)) {
- /* Section translation fault if page walk is disabled by PD0 or PD1 */
- fi->type = ARMFault_Translation;
- goto do_fault;
- }
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
- mmu_idx, fi);
- if (fi->type != ARMFault_None) {
- goto do_fault;
- }
- type = (desc & 3);
- domain = (desc >> 5) & 0x0f;
- if (regime_el(env, mmu_idx) == 1) {
- dacr = env->cp15.dacr_ns;
- } else {
- dacr = env->cp15.dacr_s;
- }
- domain_prot = (dacr >> (domain * 2)) & 3;
- if (type == 0) {
- /* Section translation fault. */
- fi->type = ARMFault_Translation;
- goto do_fault;
- }
- if (type != 2) {
- level = 2;
- }
- if (domain_prot == 0 || domain_prot == 2) {
- fi->type = ARMFault_Domain;
- goto do_fault;
- }
- if (type == 2) {
- /* 1Mb section. */
- phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
- ap = (desc >> 10) & 3;
- *page_size = 1024 * 1024;
- } else {
- /* Lookup l2 entry. */
- if (type == 1) {
- /* Coarse pagetable. */
- table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
- } else {
- /* Fine pagetable. */
- table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
- }
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
- mmu_idx, fi);
- if (fi->type != ARMFault_None) {
- goto do_fault;
- }
- switch (desc & 3) {
- case 0: /* Page translation fault. */
- fi->type = ARMFault_Translation;
- goto do_fault;
- case 1: /* 64k page. */
- phys_addr = (desc & 0xffff0000) | (address & 0xffff);
- ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
- *page_size = 0x10000;
- break;
- case 2: /* 4k page. */
- phys_addr = (desc & 0xfffff000) | (address & 0xfff);
- ap = (desc >> (4 + ((address >> 9) & 6))) & 3;
- *page_size = 0x1000;
- break;
- case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */
- if (type == 1) {
- /* ARMv6/XScale extended small page format */
- if (arm_feature(env, ARM_FEATURE_XSCALE)
- || arm_feature(env, ARM_FEATURE_V6)) {
- phys_addr = (desc & 0xfffff000) | (address & 0xfff);
- *page_size = 0x1000;
- } else {
- /* UNPREDICTABLE in ARMv5; we choose to take a
- * page translation fault.
- */
- fi->type = ARMFault_Translation;
- goto do_fault;
- }
- } else {
- phys_addr = (desc & 0xfffffc00) | (address & 0x3ff);
- *page_size = 0x400;
- }
- ap = (desc >> 4) & 3;
- break;
- default:
- /* Never happens, but compiler isn't smart enough to tell. */
- abort();
- }
- }
- *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
- *prot |= *prot ? PAGE_EXEC : 0;
- if (!(*prot & (1 << access_type))) {
- /* Access permission fault. */
- fi->type = ARMFault_Permission;
- goto do_fault;
- }
- *phys_ptr = phys_addr;
- return false;
-do_fault:
- fi->domain = domain;
- fi->level = level;
- return true;
-}
-
-static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
- target_ulong *page_size, ARMMMUFaultInfo *fi)
-{
- CPUState *cs = CPU(arm_env_get_cpu(env));
- int level = 1;
- uint32_t table;
- uint32_t desc;
- uint32_t xn;
- uint32_t pxn = 0;
- int type;
- int ap;
- int domain = 0;
- int domain_prot;
- hwaddr phys_addr;
- uint32_t dacr;
- bool ns;
-
- /* Pagetable walk. */
- /* Lookup l1 descriptor. */
- if (!get_level1_table_address(env, mmu_idx, &table, address)) {
- /* Section translation fault if page walk is disabled by PD0 or PD1 */
- fi->type = ARMFault_Translation;
- goto do_fault;
- }
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
- mmu_idx, fi);
- if (fi->type != ARMFault_None) {
- goto do_fault;
- }
- type = (desc & 3);
- if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) {
- /* Section translation fault, or attempt to use the encoding
- * which is Reserved on implementations without PXN.
- */
- fi->type = ARMFault_Translation;
- goto do_fault;
- }
- if ((type == 1) || !(desc & (1 << 18))) {
- /* Page or Section. */
- domain = (desc >> 5) & 0x0f;
- }
- if (regime_el(env, mmu_idx) == 1) {
- dacr = env->cp15.dacr_ns;
- } else {
- dacr = env->cp15.dacr_s;
- }
- if (type == 1) {
- level = 2;
- }
- domain_prot = (dacr >> (domain * 2)) & 3;
- if (domain_prot == 0 || domain_prot == 2) {
- /* Section or Page domain fault */
- fi->type = ARMFault_Domain;
- goto do_fault;
- }
- if (type != 1) {
- if (desc & (1 << 18)) {
- /* Supersection. */
- phys_addr = (desc & 0xff000000) | (address & 0x00ffffff);
- phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32;
- phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36;
- *page_size = 0x1000000;
- } else {
- /* Section. */
- phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
- *page_size = 0x100000;
- }
- ap = ((desc >> 10) & 3) | ((desc >> 13) & 4);
- xn = desc & (1 << 4);
- pxn = desc & 1;
- ns = extract32(desc, 19, 1);
- } else {
- if (arm_feature(env, ARM_FEATURE_PXN)) {
- pxn = (desc >> 2) & 1;
- }
- ns = extract32(desc, 3, 1);
- /* Lookup l2 entry. */
- table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
- desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
- mmu_idx, fi);
- if (fi->type != ARMFault_None) {
- goto do_fault;
- }
- ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
- switch (desc & 3) {
- case 0: /* Page translation fault. */
- fi->type = ARMFault_Translation;
- goto do_fault;
- case 1: /* 64k page. */
- phys_addr = (desc & 0xffff0000) | (address & 0xffff);
- xn = desc & (1 << 15);
- *page_size = 0x10000;
- break;
- case 2: case 3: /* 4k page. */
- phys_addr = (desc & 0xfffff000) | (address & 0xfff);
- xn = desc & 1;
- *page_size = 0x1000;
- break;
- default:
- /* Never happens, but compiler isn't smart enough to tell. */
- abort();
- }
- }
- if (domain_prot == 3) {
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- } else {
- if (pxn && !regime_is_user(env, mmu_idx)) {
- xn = 1;
- }
- if (xn && access_type == MMU_INST_FETCH) {
- fi->type = ARMFault_Permission;
- goto do_fault;
- }
-
- if (arm_feature(env, ARM_FEATURE_V6K) &&
- (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) {
- /* The simplified model uses AP[0] as an access control bit. */
- if ((ap & 1) == 0) {
- /* Access flag fault. */
- fi->type = ARMFault_AccessFlag;
- goto do_fault;
- }
- *prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1);
- } else {
- *prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
- }
- if (*prot && !xn) {
- *prot |= PAGE_EXEC;
- }
- if (!(*prot & (1 << access_type))) {
- /* Access permission fault. */
- fi->type = ARMFault_Permission;
- goto do_fault;
- }
- }
- if (ns) {
- /* The NS bit will (as required by the architecture) have no effect if
- * the CPU doesn't support TZ or this is a non-secure translation
- * regime, because the attribute will already be non-secure.
- */
- attrs->secure = false;
- }
- *phys_ptr = phys_addr;
- return false;
-do_fault:
- fi->domain = domain;
- fi->level = level;
- return true;
-}
-
-/*
- * check_s2_mmu_setup
- * @cpu: ARMCPU
- * @is_aa64: True if the translation regime is in AArch64 state
- * @startlevel: Suggested starting level
- * @inputsize: Bitsize of IPAs
- * @stride: Page-table stride (See the ARM ARM)
- *
- * Returns true if the suggested S2 translation parameters are OK and
- * false otherwise.
- */
-static bool check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, int level,
- int inputsize, int stride)
-{
- const int grainsize = stride + 3;
- int startsizecheck;
-
- /* Negative levels are never allowed. */
- if (level < 0) {
- return false;
- }
-
- startsizecheck = inputsize - ((3 - level) * stride + grainsize);
- if (startsizecheck < 1 || startsizecheck > stride + 4) {
- return false;
- }
-
- if (is_aa64) {
- CPUARMState *env = &cpu->env;
- unsigned int pamax = arm_pamax(cpu);
-
- switch (stride) {
- case 13: /* 64KB Pages. */
- if (level == 0 || (level == 1 && pamax <= 42)) {
- return false;
- }
- break;
- case 11: /* 16KB Pages. */
- if (level == 0 || (level == 1 && pamax <= 40)) {
- return false;
- }
- break;
- case 9: /* 4KB Pages. */
- if (level == 0 && pamax <= 42) {
- return false;
- }
- break;
- default:
- g_assert_not_reached();
- }
-
- /* Inputsize checks. */
- if (inputsize > pamax &&
- (arm_el_is_aa64(env, 1) || inputsize > 40)) {
- /* This is CONSTRAINED UNPREDICTABLE and we choose to fault. */
- return false;
- }
- } else {
- /* AArch32 only supports 4KB pages. Assert on that. */
- assert(stride == 9);
-
- if (level == 0) {
- return false;
- }
- }
- return true;
-}
-
-/* Translate from the 4-bit stage 2 representation of
- * memory attributes (without cache-allocation hints) to
- * the 8-bit representation of the stage 1 MAIR registers
- * (which includes allocation hints).
- *
- * ref: shared/translation/attrs/S2AttrDecode()
- * .../S2ConvertAttrsHints()
- */
-static uint8_t convert_stage2_attrs(CPUARMState *env, uint8_t s2attrs)
-{
- uint8_t hiattr = extract32(s2attrs, 2, 2);
- uint8_t loattr = extract32(s2attrs, 0, 2);
- uint8_t hihint = 0, lohint = 0;
-
- if (hiattr != 0) { /* normal memory */
- if ((env->cp15.hcr_el2 & HCR_CD) != 0) { /* cache disabled */
- hiattr = loattr = 1; /* non-cacheable */
- } else {
- if (hiattr != 1) { /* Write-through or write-back */
- hihint = 3; /* RW allocate */
- }
- if (loattr != 1) { /* Write-through or write-back */
- lohint = 3; /* RW allocate */
- }
- }
- }
-
- return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint;
-}
-
-static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
- target_ulong *page_size_ptr,
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
- /* Read an LPAE long-descriptor translation table. */
- ARMFaultType fault_type = ARMFault_Translation;
- uint32_t level;
- uint32_t epd = 0;
- int32_t t0sz, t1sz;
- uint32_t tg;
- uint64_t ttbr;
- int ttbr_select;
- hwaddr descaddr, indexmask, indexmask_grainsize;
- uint32_t tableattrs;
- target_ulong page_size;
- uint32_t attrs;
- int32_t stride = 9;
- int32_t addrsize;
- int inputsize;
- int32_t tbi = 0;
- TCR *tcr = regime_tcr(env, mmu_idx);
- int ap, ns, xn, pxn;
- uint32_t el = regime_el(env, mmu_idx);
- bool ttbr1_valid = true;
- uint64_t descaddrmask;
- bool aarch64 = arm_el_is_aa64(env, el);
-
- /* TODO:
- * This code does not handle the different format TCR for VTCR_EL2.
- * This code also does not support shareability levels.
- * Attribute and permission bit handling should also be checked when adding
- * support for those page table walks.
- */
- if (aarch64) {
- level = 0;
- addrsize = 64;
- if (el > 1) {
- if (mmu_idx != ARMMMUIdx_S2NS) {
- tbi = extract64(tcr->raw_tcr, 20, 1);
- }
- } else {
- if (extract64(address, 55, 1)) {
- tbi = extract64(tcr->raw_tcr, 38, 1);
- } else {
- tbi = extract64(tcr->raw_tcr, 37, 1);
- }
- }
- tbi *= 8;
-
- /* If we are in 64-bit EL2 or EL3 then there is no TTBR1, so mark it
- * invalid.
- */
- if (el > 1) {
- ttbr1_valid = false;
- }
- } else {
- level = 1;
- addrsize = 32;
- /* There is no TTBR1 for EL2 */
- if (el == 2) {
- ttbr1_valid = false;
- }
- }
-
- /* Determine whether this address is in the region controlled by
- * TTBR0 or TTBR1 (or if it is in neither region and should fault).
- * This is a Non-secure PL0/1 stage 1 translation, so controlled by
- * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32:
- */
- if (aarch64) {
- /* AArch64 translation. */
- t0sz = extract32(tcr->raw_tcr, 0, 6);
- t0sz = MIN(t0sz, 39);
- t0sz = MAX(t0sz, 16);
- } else if (mmu_idx != ARMMMUIdx_S2NS) {
- /* AArch32 stage 1 translation. */
- t0sz = extract32(tcr->raw_tcr, 0, 3);
- } else {
- /* AArch32 stage 2 translation. */
- bool sext = extract32(tcr->raw_tcr, 4, 1);
- bool sign = extract32(tcr->raw_tcr, 3, 1);
- /* Address size is 40-bit for a stage 2 translation,
- * and t0sz can be negative (from -8 to 7),
- * so we need to adjust it to use the TTBR selecting logic below.
- */
- addrsize = 40;
- t0sz = sextract32(tcr->raw_tcr, 0, 4) + 8;
-
- /* If the sign-extend bit is not the same as t0sz[3], the result
- * is unpredictable. Flag this as a guest error. */
- if (sign != sext) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n");
- }
- }
- t1sz = extract32(tcr->raw_tcr, 16, 6);
- if (aarch64) {
- t1sz = MIN(t1sz, 39);
- t1sz = MAX(t1sz, 16);
- }
- if (t0sz && !extract64(address, addrsize - t0sz, t0sz - tbi)) {
- /* there is a ttbr0 region and we are in it (high bits all zero) */
- ttbr_select = 0;
- } else if (ttbr1_valid && t1sz &&
- !extract64(~address, addrsize - t1sz, t1sz - tbi)) {
- /* there is a ttbr1 region and we are in it (high bits all one) */
- ttbr_select = 1;
- } else if (!t0sz) {
- /* ttbr0 region is "everything not in the ttbr1 region" */
- ttbr_select = 0;
- } else if (!t1sz && ttbr1_valid) {
- /* ttbr1 region is "everything not in the ttbr0 region" */
- ttbr_select = 1;
- } else {
- /* in the gap between the two regions, this is a Translation fault */
- fault_type = ARMFault_Translation;
- goto do_fault;
- }
-
- /* Note that QEMU ignores shareability and cacheability attributes,
- * so we don't need to do anything with the SH, ORGN, IRGN fields
- * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the
- * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently
- * implement any ASID-like capability so we can ignore it (instead
- * we will always flush the TLB any time the ASID is changed).
- */
- if (ttbr_select == 0) {
- ttbr = regime_ttbr(env, mmu_idx, 0);
- if (el < 2) {
- epd = extract32(tcr->raw_tcr, 7, 1);
- }
- inputsize = addrsize - t0sz;
-
- tg = extract32(tcr->raw_tcr, 14, 2);
- if (tg == 1) { /* 64KB pages */
- stride = 13;
- }
- if (tg == 2) { /* 16KB pages */
- stride = 11;
- }
- } else {
- /* We should only be here if TTBR1 is valid */
- assert(ttbr1_valid);
-
- ttbr = regime_ttbr(env, mmu_idx, 1);
- epd = extract32(tcr->raw_tcr, 23, 1);
- inputsize = addrsize - t1sz;
-
- tg = extract32(tcr->raw_tcr, 30, 2);
- if (tg == 3) { /* 64KB pages */
- stride = 13;
- }
- if (tg == 1) { /* 16KB pages */
- stride = 11;
- }
- }
-
- /* Here we should have set up all the parameters for the translation:
- * inputsize, ttbr, epd, stride, tbi
- */
-
- if (epd) {
- /* Translation table walk disabled => Translation fault on TLB miss
- * Note: This is always 0 on 64-bit EL2 and EL3.
- */
- goto do_fault;
- }
-
- if (mmu_idx != ARMMMUIdx_S2NS) {
- /* The starting level depends on the virtual address size (which can
- * be up to 48 bits) and the translation granule size. It indicates
- * the number of strides (stride bits at a time) needed to
- * consume the bits of the input address. In the pseudocode this is:
- * level = 4 - RoundUp((inputsize - grainsize) / stride)
- * where their 'inputsize' is our 'inputsize', 'grainsize' is
- * our 'stride + 3' and 'stride' is our 'stride'.
- * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
- * = 4 - (inputsize - stride - 3 + stride - 1) / stride
- * = 4 - (inputsize - 4) / stride;
- */
- level = 4 - (inputsize - 4) / stride;
- } else {
- /* For stage 2 translations the starting level is specified by the
- * VTCR_EL2.SL0 field (whose interpretation depends on the page size)
- */
- uint32_t sl0 = extract32(tcr->raw_tcr, 6, 2);
- uint32_t startlevel;
- bool ok;
-
- if (!aarch64 || stride == 9) {
- /* AArch32 or 4KB pages */
- startlevel = 2 - sl0;
- } else {
- /* 16KB or 64KB pages */
- startlevel = 3 - sl0;
- }
-
- /* Check that the starting level is valid. */
- ok = check_s2_mmu_setup(cpu, aarch64, startlevel,
- inputsize, stride);
- if (!ok) {
- fault_type = ARMFault_Translation;
- goto do_fault;
- }
- level = startlevel;
- }
-
- indexmask_grainsize = (1ULL << (stride + 3)) - 1;
- indexmask = (1ULL << (inputsize - (stride * (4 - level)))) - 1;
-
- /* Now we can extract the actual base address from the TTBR */
- descaddr = extract64(ttbr, 0, 48);
- descaddr &= ~indexmask;
-
- /* The address field in the descriptor goes up to bit 39 for ARMv7
- * but up to bit 47 for ARMv8, but we use the descaddrmask
- * up to bit 39 for AArch32, because we don't need other bits in that case
- * to construct next descriptor address (anyway they should be all zeroes).
- */
- descaddrmask = ((1ull << (aarch64 ? 48 : 40)) - 1) &
- ~indexmask_grainsize;
-
- /* Secure accesses start with the page table in secure memory and
- * can be downgraded to non-secure at any step. Non-secure accesses
- * remain non-secure. We implement this by just ORing in the NSTable/NS
- * bits at each step.
- */
- tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4);
- for (;;) {
- uint64_t descriptor;
- bool nstable;
-
- descaddr |= (address >> (stride * (4 - level))) & indexmask;
- descaddr &= ~7ULL;
- nstable = extract32(tableattrs, 4, 1);
- descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fi);
- if (fi->type != ARMFault_None) {
- goto do_fault;
- }
-
- if (!(descriptor & 1) ||
- (!(descriptor & 2) && (level == 3))) {
- /* Invalid, or the Reserved level 3 encoding */
- goto do_fault;
- }
- descaddr = descriptor & descaddrmask;
-
- if ((descriptor & 2) && (level < 3)) {
- /* Table entry. The top five bits are attributes which may
- * propagate down through lower levels of the table (and
- * which are all arranged so that 0 means "no effect", so
- * we can gather them up by ORing in the bits at each level).
- */
- tableattrs |= extract64(descriptor, 59, 5);
- level++;
- indexmask = indexmask_grainsize;
- continue;
- }
- /* Block entry at level 1 or 2, or page entry at level 3.
- * These are basically the same thing, although the number
- * of bits we pull in from the vaddr varies.
- */
- page_size = (1ULL << ((stride * (4 - level)) + 3));
- descaddr |= (address & (page_size - 1));
- /* Extract attributes from the descriptor */
- attrs = extract64(descriptor, 2, 10)
- | (extract64(descriptor, 52, 12) << 10);
-
- if (mmu_idx == ARMMMUIdx_S2NS) {
- /* Stage 2 table descriptors do not include any attribute fields */
- break;
- }
- /* Merge in attributes from table descriptors */
- attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
- attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
- /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
- * means "force PL1 access only", which means forcing AP[1] to 0.
- */
- if (extract32(tableattrs, 2, 1)) {
- attrs &= ~(1 << 4);
- }
- attrs |= nstable << 3; /* NS */
- break;
- }
- /* Here descaddr is the final physical address, and attributes
- * are all in attrs.
- */
- fault_type = ARMFault_AccessFlag;
- if ((attrs & (1 << 8)) == 0) {
- /* Access flag */
- goto do_fault;
- }
-
- ap = extract32(attrs, 4, 2);
- xn = extract32(attrs, 12, 1);
-
- if (mmu_idx == ARMMMUIdx_S2NS) {
- ns = true;
- *prot = get_S2prot(env, ap, xn);
- } else {
- ns = extract32(attrs, 3, 1);
- pxn = extract32(attrs, 11, 1);
- *prot = get_S1prot(env, mmu_idx, aarch64, ap, ns, xn, pxn);
- }
-
- fault_type = ARMFault_Permission;
- if (!(*prot & (1 << access_type))) {
- goto do_fault;
- }
-
- if (ns) {
- /* The NS bit will (as required by the architecture) have no effect if
- * the CPU doesn't support TZ or this is a non-secure translation
- * regime, because the attribute will already be non-secure.
- */
- txattrs->secure = false;
- }
-
- if (cacheattrs != NULL) {
- if (mmu_idx == ARMMMUIdx_S2NS) {
- cacheattrs->attrs = convert_stage2_attrs(env,
- extract32(attrs, 0, 4));
- } else {
- /* Index into MAIR registers for cache attributes */
- uint8_t attrindx = extract32(attrs, 0, 3);
- uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
- assert(attrindx <= 7);
- cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
- }
- cacheattrs->shareability = extract32(attrs, 6, 2);
- }
-
- *phys_ptr = descaddr;
- *page_size_ptr = page_size;
- return false;
-
-do_fault:
- fi->type = fault_type;
- fi->level = level;
- /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2. */
- fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_S2NS);
- return true;
-}
-
-static inline void get_phys_addr_pmsav7_default(CPUARMState *env,
- ARMMMUIdx mmu_idx,
- int32_t address, int *prot)
-{
- if (!arm_feature(env, ARM_FEATURE_M)) {
- *prot = PAGE_READ | PAGE_WRITE;
- switch (address) {
- case 0xF0000000 ... 0xFFFFFFFF:
- if (regime_sctlr(env, mmu_idx) & SCTLR_V) {
- /* hivecs execing is ok */
- *prot |= PAGE_EXEC;
- }
- break;
- case 0x00000000 ... 0x7FFFFFFF:
- *prot |= PAGE_EXEC;
- break;
- }
- } else {
- /* Default system address map for M profile cores.
- * The architecture specifies which regions are execute-never;
- * at the MPU level no other checks are defined.
- */
- switch (address) {
- case 0x00000000 ... 0x1fffffff: /* ROM */
- case 0x20000000 ... 0x3fffffff: /* SRAM */
- case 0x60000000 ... 0x7fffffff: /* RAM */
- case 0x80000000 ... 0x9fffffff: /* RAM */
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- break;
- case 0x40000000 ... 0x5fffffff: /* Peripheral */
- case 0xa0000000 ... 0xbfffffff: /* Device */
- case 0xc0000000 ... 0xdfffffff: /* Device */
- case 0xe0000000 ... 0xffffffff: /* System */
- *prot = PAGE_READ | PAGE_WRITE;
- break;
- default:
- g_assert_not_reached();
- }
- }
-}
-
-static bool pmsav7_use_background_region(ARMCPU *cpu,
- ARMMMUIdx mmu_idx, bool is_user)
-{
- /* Return true if we should use the default memory map as a
- * "background" region if there are no hits against any MPU regions.
- */
- CPUARMState *env = &cpu->env;
-
- if (is_user) {
- return false;
- }
-
- if (arm_feature(env, ARM_FEATURE_M)) {
- return env->v7m.mpu_ctrl[regime_is_secure(env, mmu_idx)]
- & R_V7M_MPU_CTRL_PRIVDEFENA_MASK;
- } else {
- return regime_sctlr(env, mmu_idx) & SCTLR_BR;
- }
-}
-
-static inline bool m_is_ppb_region(CPUARMState *env, uint32_t address)
-{
- /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */
- return arm_feature(env, ARM_FEATURE_M) &&
- extract32(address, 20, 12) == 0xe00;
-}
-
-static inline bool m_is_system_region(CPUARMState *env, uint32_t address)
-{
- /* True if address is in the M profile system region
- * 0xe0000000 - 0xffffffff
- */
- return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7;
-}
-
-static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, int *prot,
- target_ulong *page_size,
- ARMMMUFaultInfo *fi)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
- int n;
- bool is_user = regime_is_user(env, mmu_idx);
-
- *phys_ptr = address;
- *page_size = TARGET_PAGE_SIZE;
- *prot = 0;
-
- if (regime_translation_disabled(env, mmu_idx) ||
- m_is_ppb_region(env, address)) {
- /* MPU disabled or M profile PPB access: use default memory map.
- * The other case which uses the default memory map in the
- * v7M ARM ARM pseudocode is exception vector reads from the vector
- * table. In QEMU those accesses are done in arm_v7m_load_vector(),
- * which always does a direct read using address_space_ldl(), rather
- * than going via this function, so we don't need to check that here.
- */
- get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
- } else { /* MPU enabled */
- for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
- /* region search */
- uint32_t base = env->pmsav7.drbar[n];
- uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5);
- uint32_t rmask;
- bool srdis = false;
-
- if (!(env->pmsav7.drsr[n] & 0x1)) {
- continue;
- }
-
- if (!rsize) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "DRSR[%d]: Rsize field cannot be 0\n", n);
- continue;
- }
- rsize++;
- rmask = (1ull << rsize) - 1;
-
- if (base & rmask) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "DRBAR[%d]: 0x%" PRIx32 " misaligned "
- "to DRSR region size, mask = 0x%" PRIx32 "\n",
- n, base, rmask);
- continue;
- }
-
- if (address < base || address > base + rmask) {
- /*
- * Address not in this region. We must check whether the
- * region covers addresses in the same page as our address.
- * In that case we must not report a size that covers the
- * whole page for a subsequent hit against a different MPU
- * region or the background region, because it would result in
- * incorrect TLB hits for subsequent accesses to addresses that
- * are in this MPU region.
- */
- if (ranges_overlap(base, rmask,
- address & TARGET_PAGE_MASK,
- TARGET_PAGE_SIZE)) {
- *page_size = 1;
- }
- continue;
- }
-
- /* Region matched */
-
- if (rsize >= 8) { /* no subregions for regions < 256 bytes */
- int i, snd;
- uint32_t srdis_mask;
-
- rsize -= 3; /* sub region size (power of 2) */
- snd = ((address - base) >> rsize) & 0x7;
- srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1);
-
- srdis_mask = srdis ? 0x3 : 0x0;
- for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) {
- /* This will check in groups of 2, 4 and then 8, whether
- * the subregion bits are consistent. rsize is incremented
- * back up to give the region size, considering consistent
- * adjacent subregions as one region. Stop testing if rsize
- * is already big enough for an entire QEMU page.
- */
- int snd_rounded = snd & ~(i - 1);
- uint32_t srdis_multi = extract32(env->pmsav7.drsr[n],
- snd_rounded + 8, i);
- if (srdis_mask ^ srdis_multi) {
- break;
- }
- srdis_mask = (srdis_mask << i) | srdis_mask;
- rsize++;
- }
- }
- if (srdis) {
- continue;
- }
- if (rsize < TARGET_PAGE_BITS) {
- *page_size = 1 << rsize;
- }
- break;
- }
-
- if (n == -1) { /* no hits */
- if (!pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
- /* background fault */
- fi->type = ARMFault_Background;
- return true;
- }
- get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
- } else { /* a MPU hit! */
- uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3);
- uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1);
-
- if (m_is_system_region(env, address)) {
- /* System space is always execute never */
- xn = 1;
- }
-
- if (is_user) { /* User mode AP bit decoding */
- switch (ap) {
- case 0:
- case 1:
- case 5:
- break; /* no access */
- case 3:
- *prot |= PAGE_WRITE;
- /* fall through */
- case 2:
- case 6:
- *prot |= PAGE_READ | PAGE_EXEC;
- break;
- case 7:
- /* for v7M, same as 6; for R profile a reserved value */
- if (arm_feature(env, ARM_FEATURE_M)) {
- *prot |= PAGE_READ | PAGE_EXEC;
- break;
- }
- /* fall through */
- default:
- qemu_log_mask(LOG_GUEST_ERROR,
- "DRACR[%d]: Bad value for AP bits: 0x%"
- PRIx32 "\n", n, ap);
- }
- } else { /* Priv. mode AP bits decoding */
- switch (ap) {
- case 0:
- break; /* no access */
- case 1:
- case 2:
- case 3:
- *prot |= PAGE_WRITE;
- /* fall through */
- case 5:
- case 6:
- *prot |= PAGE_READ | PAGE_EXEC;
- break;
- case 7:
- /* for v7M, same as 6; for R profile a reserved value */
- if (arm_feature(env, ARM_FEATURE_M)) {
- *prot |= PAGE_READ | PAGE_EXEC;
- break;
- }
- /* fall through */
- default:
- qemu_log_mask(LOG_GUEST_ERROR,
- "DRACR[%d]: Bad value for AP bits: 0x%"
- PRIx32 "\n", n, ap);
- }
- }
-
- /* execute never */
- if (xn) {
- *prot &= ~PAGE_EXEC;
- }
- }
+ return GranInvalid;
}
-
- fi->type = ARMFault_Permission;
- fi->level = 1;
- return !(*prot & (1 << access_type));
}
-static bool v8m_is_sau_exempt(CPUARMState *env,
- uint32_t address, MMUAccessType access_type)
+static inline bool have4k(ARMCPU *cpu, bool stage2)
{
- /* The architecture specifies that certain address ranges are
- * exempt from v8M SAU/IDAU checks.
- */
- return
- (access_type == MMU_INST_FETCH && m_is_system_region(env, address)) ||
- (address >= 0xe0000000 && address <= 0xe0002fff) ||
- (address >= 0xe000e000 && address <= 0xe000efff) ||
- (address >= 0xe002e000 && address <= 0xe002efff) ||
- (address >= 0xe0040000 && address <= 0xe0041fff) ||
- (address >= 0xe00ff000 && address <= 0xe00fffff);
+ return stage2 ? cpu_isar_feature(aa64_tgran4_2, cpu)
+ : cpu_isar_feature(aa64_tgran4, cpu);
}
-static void v8m_security_lookup(CPUARMState *env, uint32_t address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- V8M_SAttributes *sattrs)
+static inline bool have16k(ARMCPU *cpu, bool stage2)
{
- /* Look up the security attributes for this address. Compare the
- * pseudocode SecurityCheck() function.
- * We assume the caller has zero-initialized *sattrs.
- */
- ARMCPU *cpu = arm_env_get_cpu(env);
- int r;
- bool idau_exempt = false, idau_ns = true, idau_nsc = true;
- int idau_region = IREGION_NOTVALID;
- uint32_t addr_page_base = address & TARGET_PAGE_MASK;
- uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1);
-
- if (cpu->idau) {
- IDAUInterfaceClass *iic = IDAU_INTERFACE_GET_CLASS(cpu->idau);
- IDAUInterface *ii = IDAU_INTERFACE(cpu->idau);
-
- iic->check(ii, address, &idau_region, &idau_exempt, &idau_ns,
- &idau_nsc);
- }
-
- if (access_type == MMU_INST_FETCH && extract32(address, 28, 4) == 0xf) {
- /* 0xf0000000..0xffffffff is always S for insn fetches */
- return;
- }
-
- if (idau_exempt || v8m_is_sau_exempt(env, address, access_type)) {
- sattrs->ns = !regime_is_secure(env, mmu_idx);
- return;
- }
-
- if (idau_region != IREGION_NOTVALID) {
- sattrs->irvalid = true;
- sattrs->iregion = idau_region;
- }
-
- switch (env->sau.ctrl & 3) {
- case 0: /* SAU.ENABLE == 0, SAU.ALLNS == 0 */
- break;
- case 2: /* SAU.ENABLE == 0, SAU.ALLNS == 1 */
- sattrs->ns = true;
- break;
- default: /* SAU.ENABLE == 1 */
- for (r = 0; r < cpu->sau_sregion; r++) {
- if (env->sau.rlar[r] & 1) {
- uint32_t base = env->sau.rbar[r] & ~0x1f;
- uint32_t limit = env->sau.rlar[r] | 0x1f;
-
- if (base <= address && limit >= address) {
- if (base > addr_page_base || limit < addr_page_limit) {
- sattrs->subpage = true;
- }
- if (sattrs->srvalid) {
- /* If we hit in more than one region then we must report
- * as Secure, not NS-Callable, with no valid region
- * number info.
- */
- sattrs->ns = false;
- sattrs->nsc = false;
- sattrs->sregion = 0;
- sattrs->srvalid = false;
- break;
- } else {
- if (env->sau.rlar[r] & 2) {
- sattrs->nsc = true;
- } else {
- sattrs->ns = true;
- }
- sattrs->srvalid = true;
- sattrs->sregion = r;
- }
- } else {
- /*
- * Address not in this region. We must check whether the
- * region covers addresses in the same page as our address.
- * In that case we must not report a size that covers the
- * whole page for a subsequent hit against a different MPU
- * region or the background region, because it would result
- * in incorrect TLB hits for subsequent accesses to
- * addresses that are in this MPU region.
- */
- if (limit >= base &&
- ranges_overlap(base, limit - base + 1,
- addr_page_base,
- TARGET_PAGE_SIZE)) {
- sattrs->subpage = true;
- }
- }
- }
- }
-
- /* The IDAU will override the SAU lookup results if it specifies
- * higher security than the SAU does.
- */
- if (!idau_ns) {
- if (sattrs->ns || (!idau_nsc && sattrs->nsc)) {
- sattrs->ns = false;
- sattrs->nsc = idau_nsc;
- }
- }
- break;
- }
-}
-
-static bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, MemTxAttrs *txattrs,
- int *prot, bool *is_subpage,
- ARMMMUFaultInfo *fi, uint32_t *mregion)
-{
- /* Perform a PMSAv8 MPU lookup (without also doing the SAU check
- * that a full phys-to-virt translation does).
- * mregion is (if not NULL) set to the region number which matched,
- * or -1 if no region number is returned (MPU off, address did not
- * hit a region, address hit in multiple regions).
- * We set is_subpage to true if the region hit doesn't cover the
- * entire TARGET_PAGE the address is within.
- */
- ARMCPU *cpu = arm_env_get_cpu(env);
- bool is_user = regime_is_user(env, mmu_idx);
- uint32_t secure = regime_is_secure(env, mmu_idx);
- int n;
- int matchregion = -1;
- bool hit = false;
- uint32_t addr_page_base = address & TARGET_PAGE_MASK;
- uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1);
-
- *is_subpage = false;
- *phys_ptr = address;
- *prot = 0;
- if (mregion) {
- *mregion = -1;
- }
-
- /* Unlike the ARM ARM pseudocode, we don't need to check whether this
- * was an exception vector read from the vector table (which is always
- * done using the default system address map), because those accesses
- * are done in arm_v7m_load_vector(), which always does a direct
- * read using address_space_ldl(), rather than going via this function.
- */
- if (regime_translation_disabled(env, mmu_idx)) { /* MPU disabled */
- hit = true;
- } else if (m_is_ppb_region(env, address)) {
- hit = true;
- } else if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
- hit = true;
- } else {
- for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
- /* region search */
- /* Note that the base address is bits [31:5] from the register
- * with bits [4:0] all zeroes, but the limit address is bits
- * [31:5] from the register with bits [4:0] all ones.
- */
- uint32_t base = env->pmsav8.rbar[secure][n] & ~0x1f;
- uint32_t limit = env->pmsav8.rlar[secure][n] | 0x1f;
-
- if (!(env->pmsav8.rlar[secure][n] & 0x1)) {
- /* Region disabled */
- continue;
- }
-
- if (address < base || address > limit) {
- /*
- * Address not in this region. We must check whether the
- * region covers addresses in the same page as our address.
- * In that case we must not report a size that covers the
- * whole page for a subsequent hit against a different MPU
- * region or the background region, because it would result in
- * incorrect TLB hits for subsequent accesses to addresses that
- * are in this MPU region.
- */
- if (limit >= base &&
- ranges_overlap(base, limit - base + 1,
- addr_page_base,
- TARGET_PAGE_SIZE)) {
- *is_subpage = true;
- }
- continue;
- }
-
- if (base > addr_page_base || limit < addr_page_limit) {
- *is_subpage = true;
- }
-
- if (hit) {
- /* Multiple regions match -- always a failure (unlike
- * PMSAv7 where highest-numbered-region wins)
- */
- fi->type = ARMFault_Permission;
- fi->level = 1;
- return true;
- }
-
- matchregion = n;
- hit = true;
- }
- }
-
- if (!hit) {
- /* background fault */
- fi->type = ARMFault_Background;
- return true;
- }
-
- if (matchregion == -1) {
- /* hit using the background region */
- get_phys_addr_pmsav7_default(env, mmu_idx, address, prot);
- } else {
- uint32_t ap = extract32(env->pmsav8.rbar[secure][matchregion], 1, 2);
- uint32_t xn = extract32(env->pmsav8.rbar[secure][matchregion], 0, 1);
-
- if (m_is_system_region(env, address)) {
- /* System space is always execute never */
- xn = 1;
- }
-
- *prot = simple_ap_to_rw_prot(env, mmu_idx, ap);
- if (*prot && !xn) {
- *prot |= PAGE_EXEC;
- }
- /* We don't need to look the attribute up in the MAIR0/MAIR1
- * registers because that only tells us about cacheability.
- */
- if (mregion) {
- *mregion = matchregion;
- }
- }
-
- fi->type = ARMFault_Permission;
- fi->level = 1;
- return !(*prot & (1 << access_type));
+ return stage2 ? cpu_isar_feature(aa64_tgran16_2, cpu)
+ : cpu_isar_feature(aa64_tgran16, cpu);
}
-
-static bool get_phys_addr_pmsav8(CPUARMState *env, uint32_t address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, MemTxAttrs *txattrs,
- int *prot, target_ulong *page_size,
- ARMMMUFaultInfo *fi)
+static inline bool have64k(ARMCPU *cpu, bool stage2)
{
- uint32_t secure = regime_is_secure(env, mmu_idx);
- V8M_SAttributes sattrs = {};
- bool ret;
- bool mpu_is_subpage;
-
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- v8m_security_lookup(env, address, access_type, mmu_idx, &sattrs);
- if (access_type == MMU_INST_FETCH) {
- /* Instruction fetches always use the MMU bank and the
- * transaction attribute determined by the fetch address,
- * regardless of CPU state. This is painful for QEMU
- * to handle, because it would mean we need to encode
- * into the mmu_idx not just the (user, negpri) information
- * for the current security state but also that for the
- * other security state, which would balloon the number
- * of mmu_idx values needed alarmingly.
- * Fortunately we can avoid this because it's not actually
- * possible to arbitrarily execute code from memory with
- * the wrong security attribute: it will always generate
- * an exception of some kind or another, apart from the
- * special case of an NS CPU executing an SG instruction
- * in S&NSC memory. So we always just fail the translation
- * here and sort things out in the exception handler
- * (including possibly emulating an SG instruction).
- */
- if (sattrs.ns != !secure) {
- if (sattrs.nsc) {
- fi->type = ARMFault_QEMU_NSCExec;
- } else {
- fi->type = ARMFault_QEMU_SFault;
- }
- *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE;
- *phys_ptr = address;
- *prot = 0;
- return true;
- }
- } else {
- /* For data accesses we always use the MMU bank indicated
- * by the current CPU state, but the security attributes
- * might downgrade a secure access to nonsecure.
- */
- if (sattrs.ns) {
- txattrs->secure = false;
- } else if (!secure) {
- /* NS access to S memory must fault.
- * Architecturally we should first check whether the
- * MPU information for this address indicates that we
- * are doing an unaligned access to Device memory, which
- * should generate a UsageFault instead. QEMU does not
- * currently check for that kind of unaligned access though.
- * If we added it we would need to do so as a special case
- * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt().
- */
- fi->type = ARMFault_QEMU_SFault;
- *page_size = sattrs.subpage ? 1 : TARGET_PAGE_SIZE;
- *phys_ptr = address;
- *prot = 0;
- return true;
- }
- }
- }
-
- ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, phys_ptr,
- txattrs, prot, &mpu_is_subpage, fi, NULL);
- /*
- * TODO: this is a temporary hack to ignore the fact that the SAU region
- * is smaller than a page if this is an executable region. We never
- * supported small MPU regions, but we did (accidentally) allow small
- * SAU regions, and if we now made small SAU regions not be executable
- * then this would break previously working guest code. We can't
- * remove this until/unless we implement support for execution from
- * small regions.
- */
- if (*prot & PAGE_EXEC) {
- sattrs.subpage = false;
- }
- *page_size = sattrs.subpage || mpu_is_subpage ? 1 : TARGET_PAGE_SIZE;
- return ret;
+ return stage2 ? cpu_isar_feature(aa64_tgran64_2, cpu)
+ : cpu_isar_feature(aa64_tgran64, cpu);
}
-static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, int *prot,
- ARMMMUFaultInfo *fi)
+static ARMGranuleSize sanitize_gran_size(ARMCPU *cpu, ARMGranuleSize gran,
+ bool stage2)
{
- int n;
- uint32_t mask;
- uint32_t base;
- bool is_user = regime_is_user(env, mmu_idx);
-
- if (regime_translation_disabled(env, mmu_idx)) {
- /* MPU disabled. */
- *phys_ptr = address;
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- return false;
- }
-
- *phys_ptr = address;
- for (n = 7; n >= 0; n--) {
- base = env->cp15.c6_region[n];
- if ((base & 1) == 0) {
- continue;
+ switch (gran) {
+ case Gran4K:
+ if (have4k(cpu, stage2)) {
+ return gran;
}
- mask = 1 << ((base >> 1) & 0x1f);
- /* Keep this shift separate from the above to avoid an
- (undefined) << 32. */
- mask = (mask << 1) - 1;
- if (((base ^ address) & ~mask) == 0) {
- break;
- }
- }
- if (n < 0) {
- fi->type = ARMFault_Background;
- return true;
- }
-
- if (access_type == MMU_INST_FETCH) {
- mask = env->cp15.pmsav5_insn_ap;
- } else {
- mask = env->cp15.pmsav5_data_ap;
- }
- mask = (mask >> (n * 4)) & 0xf;
- switch (mask) {
- case 0:
- fi->type = ARMFault_Permission;
- fi->level = 1;
- return true;
- case 1:
- if (is_user) {
- fi->type = ARMFault_Permission;
- fi->level = 1;
- return true;
- }
- *prot = PAGE_READ | PAGE_WRITE;
break;
- case 2:
- *prot = PAGE_READ;
- if (!is_user) {
- *prot |= PAGE_WRITE;
+ case Gran16K:
+ if (have16k(cpu, stage2)) {
+ return gran;
}
break;
- case 3:
- *prot = PAGE_READ | PAGE_WRITE;
- break;
- case 5:
- if (is_user) {
- fi->type = ARMFault_Permission;
- fi->level = 1;
- return true;
+ case Gran64K:
+ if (have64k(cpu, stage2)) {
+ return gran;
}
- *prot = PAGE_READ;
break;
- case 6:
- *prot = PAGE_READ;
+ case GranInvalid:
break;
- default:
- /* Bad permission. */
- fi->type = ARMFault_Permission;
- fi->level = 1;
- return true;
- }
- *prot |= PAGE_EXEC;
- return false;
-}
-
-/* Combine either inner or outer cacheability attributes for normal
- * memory, according to table D4-42 and pseudocode procedure
- * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
- *
- * NB: only stage 1 includes allocation hints (RW bits), leading to
- * some asymmetry.
- */
-static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2)
-{
- if (s1 == 4 || s2 == 4) {
- /* non-cacheable has precedence */
- return 4;
- } else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) {
- /* stage 1 write-through takes precedence */
- return s1;
- } else if (extract32(s2, 2, 2) == 2) {
- /* stage 2 write-through takes precedence, but the allocation hint
- * is still taken from stage 1
- */
- return (2 << 2) | extract32(s1, 0, 2);
- } else { /* write-back */
- return s1;
}
-}
-
-/* Combine S1 and S2 cacheability/shareability attributes, per D4.5.4
- * and CombineS1S2Desc()
- *
- * @s1: Attributes from stage 1 walk
- * @s2: Attributes from stage 2 walk
- */
-static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2)
-{
- uint8_t s1lo = extract32(s1.attrs, 0, 4), s2lo = extract32(s2.attrs, 0, 4);
- uint8_t s1hi = extract32(s1.attrs, 4, 4), s2hi = extract32(s2.attrs, 4, 4);
- ARMCacheAttrs ret;
-
- /* Combine shareability attributes (table D4-43) */
- if (s1.shareability == 2 || s2.shareability == 2) {
- /* if either are outer-shareable, the result is outer-shareable */
- ret.shareability = 2;
- } else if (s1.shareability == 3 || s2.shareability == 3) {
- /* if either are inner-shareable, the result is inner-shareable */
- ret.shareability = 3;
- } else {
- /* both non-shareable */
- ret.shareability = 0;
- }
-
- /* Combine memory type and cacheability attributes */
- if (s1hi == 0 || s2hi == 0) {
- /* Device has precedence over normal */
- if (s1lo == 0 || s2lo == 0) {
- /* nGnRnE has precedence over anything */
- ret.attrs = 0;
- } else if (s1lo == 4 || s2lo == 4) {
- /* non-Reordering has precedence over Reordering */
- ret.attrs = 4; /* nGnRE */
- } else if (s1lo == 8 || s2lo == 8) {
- /* non-Gathering has precedence over Gathering */
- ret.attrs = 8; /* nGRE */
+ /*
+ * If the guest selects a granule size that isn't implemented,
+ * the architecture requires that we behave as if it selected one
+ * that is (with an IMPDEF choice of which one to pick). We choose
+ * to implement the smallest supported granule size.
+ */
+ if (have4k(cpu, stage2)) {
+ return Gran4K;
+ }
+ if (have16k(cpu, stage2)) {
+ return Gran16K;
+ }
+ assert(have64k(cpu, stage2));
+ return Gran64K;
+}
+
+ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
+ ARMMMUIdx mmu_idx, bool data,
+ bool el1_is_aa32)
+{
+ uint64_t tcr = regime_tcr(env, mmu_idx);
+ bool epd, hpd, tsz_oob, ds, ha, hd;
+ int select, tsz, tbi, max_tsz, min_tsz, ps, sh;
+ ARMGranuleSize gran;
+ ARMCPU *cpu = env_archcpu(env);
+ bool stage2 = regime_is_stage2(mmu_idx);
+
+ if (!regime_has_2_ranges(mmu_idx)) {
+ select = 0;
+ tsz = extract32(tcr, 0, 6);
+ gran = tg0_to_gran_size(extract32(tcr, 14, 2));
+ if (stage2) {
+ /* VTCR_EL2 */
+ hpd = false;
} else {
- ret.attrs = 0xc; /* GRE */
- }
-
- /* Any location for which the resultant memory type is any
- * type of Device memory is always treated as Outer Shareable.
- */
- ret.shareability = 2;
- } else { /* Normal memory */
- /* Outer/inner cacheability combine independently */
- ret.attrs = combine_cacheattr_nibble(s1hi, s2hi) << 4
- | combine_cacheattr_nibble(s1lo, s2lo);
-
- if (ret.attrs == 0x44) {
- /* Any location for which the resultant memory type is Normal
- * Inner Non-cacheable, Outer Non-cacheable is always treated
- * as Outer Shareable.
- */
- ret.shareability = 2;
- }
- }
-
- return ret;
-}
-
+ hpd = extract32(tcr, 24, 1);
+ }
+ epd = false;
+ sh = extract32(tcr, 12, 2);
+ ps = extract32(tcr, 16, 3);
+ ha = extract32(tcr, 21, 1) && cpu_isar_feature(aa64_hafs, cpu);
+ hd = extract32(tcr, 22, 1) && cpu_isar_feature(aa64_hdbs, cpu);
+ ds = extract64(tcr, 32, 1);
+ } else {
+ bool e0pd;
-/* get_phys_addr - get the physical address for this virtual address
- *
- * Find the physical address corresponding to the given virtual address,
- * by doing a translation table walk on MMU based systems or using the
- * MPU state on MPU based systems.
- *
- * Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
- * prot and page_size may not be filled in, and the populated fsr value provides
- * information on why the translation aborted, in the format of a
- * DFSR/IFSR fault register, with the following caveats:
- * * we honour the short vs long DFSR format differences.
- * * the WnR bit is never set (the caller must do this).
- * * for PSMAv5 based systems we don't bother to return a full FSR format
- * value.
- *
- * @env: CPUARMState
- * @address: virtual address to get physical address for
- * @access_type: 0 for read, 1 for write, 2 for execute
- * @mmu_idx: MMU index indicating required translation regime
- * @phys_ptr: set to the physical address corresponding to the virtual address
- * @attrs: set to the memory transaction attributes to use
- * @prot: set to the permissions for the page containing phys_ptr
- * @page_size: set to the size of the page containing phys_ptr
- * @fi: set to fault info if the translation fails
- * @cacheattrs: (if non-NULL) set to the cacheability/shareability attributes
- */
-static bool get_phys_addr(CPUARMState *env, target_ulong address,
- MMUAccessType access_type, ARMMMUIdx mmu_idx,
- hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
- target_ulong *page_size,
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
-{
- if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
- /* Call ourselves recursively to do the stage 1 and then stage 2
- * translations.
+ /*
+ * Bit 55 is always between the two regions, and is canonical for
+ * determining if address tagging is enabled.
*/
- if (arm_feature(env, ARM_FEATURE_EL2)) {
- hwaddr ipa;
- int s2_prot;
- int ret;
- ARMCacheAttrs cacheattrs2 = {};
-
- ret = get_phys_addr(env, address, access_type,
- stage_1_mmu_idx(mmu_idx), &ipa, attrs,
- prot, page_size, fi, cacheattrs);
-
- /* If S1 fails or S2 is disabled, return early. */
- if (ret || regime_translation_disabled(env, ARMMMUIdx_S2NS)) {
- *phys_ptr = ipa;
- return ret;
- }
-
- /* S1 is done. Now do S2 translation. */
- ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_S2NS,
- phys_ptr, attrs, &s2_prot,
- page_size, fi,
- cacheattrs != NULL ? &cacheattrs2 : NULL);
- fi->s2addr = ipa;
- /* Combine the S1 and S2 perms. */
- *prot &= s2_prot;
-
- /* Combine the S1 and S2 cache attributes, if needed */
- if (!ret && cacheattrs != NULL) {
- *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2);
- }
-
- return ret;
+ select = extract64(va, 55, 1);
+ if (!select) {
+ tsz = extract32(tcr, 0, 6);
+ gran = tg0_to_gran_size(extract32(tcr, 14, 2));
+ epd = extract32(tcr, 7, 1);
+ sh = extract32(tcr, 12, 2);
+ hpd = extract64(tcr, 41, 1);
+ e0pd = extract64(tcr, 55, 1);
} else {
- /*
- * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
- */
- mmu_idx = stage_1_mmu_idx(mmu_idx);
+ tsz = extract32(tcr, 16, 6);
+ gran = tg1_to_gran_size(extract32(tcr, 30, 2));
+ epd = extract32(tcr, 23, 1);
+ sh = extract32(tcr, 28, 2);
+ hpd = extract64(tcr, 42, 1);
+ e0pd = extract64(tcr, 56, 1);
}
- }
+ ps = extract64(tcr, 32, 3);
+ ha = extract64(tcr, 39, 1) && cpu_isar_feature(aa64_hafs, cpu);
+ hd = extract64(tcr, 40, 1) && cpu_isar_feature(aa64_hdbs, cpu);
+ ds = extract64(tcr, 59, 1);
- /* The page table entries may downgrade secure to non-secure, but
- * cannot upgrade an non-secure translation regime's attributes
- * to secure.
- */
- attrs->secure = regime_is_secure(env, mmu_idx);
- attrs->user = regime_is_user(env, mmu_idx);
-
- /* Fast Context Switch Extension. This doesn't exist at all in v8.
- * In v7 and earlier it affects all stage 1 translations.
- */
- if (address < 0x02000000 && mmu_idx != ARMMMUIdx_S2NS
- && !arm_feature(env, ARM_FEATURE_V8)) {
- if (regime_el(env, mmu_idx) == 3) {
- address += env->cp15.fcseidr_s;
- } else {
- address += env->cp15.fcseidr_ns;
+ if (e0pd && cpu_isar_feature(aa64_e0pd, cpu) &&
+ regime_is_user(env, mmu_idx)) {
+ epd = true;
}
}
- if (arm_feature(env, ARM_FEATURE_PMSA)) {
- bool ret;
- *page_size = TARGET_PAGE_SIZE;
+ gran = sanitize_gran_size(cpu, gran, stage2);
- if (arm_feature(env, ARM_FEATURE_V8)) {
- /* PMSAv8 */
- ret = get_phys_addr_pmsav8(env, address, access_type, mmu_idx,
- phys_ptr, attrs, prot, page_size, fi);
- } else if (arm_feature(env, ARM_FEATURE_V7)) {
- /* PMSAv7 */
- ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
- phys_ptr, prot, page_size, fi);
- } else {
- /* Pre-v7 MPU */
- ret = get_phys_addr_pmsav5(env, address, access_type, mmu_idx,
- phys_ptr, prot, fi);
- }
- qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32
- " mmu_idx %u -> %s (prot %c%c%c)\n",
- access_type == MMU_DATA_LOAD ? "reading" :
- (access_type == MMU_DATA_STORE ? "writing" : "execute"),
- (uint32_t)address, mmu_idx,
- ret ? "Miss" : "Hit",
- *prot & PAGE_READ ? 'r' : '-',
- *prot & PAGE_WRITE ? 'w' : '-',
- *prot & PAGE_EXEC ? 'x' : '-');
-
- return ret;
- }
-
- /* Definitely a real MMU, not an MPU */
-
- if (regime_translation_disabled(env, mmu_idx)) {
- /* MMU disabled. */
- *phys_ptr = address;
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- *page_size = TARGET_PAGE_SIZE;
- return 0;
- }
-
- if (regime_using_lpae_format(env, mmu_idx)) {
- return get_phys_addr_lpae(env, address, access_type, mmu_idx,
- phys_ptr, attrs, prot, page_size,
- fi, cacheattrs);
- } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
- return get_phys_addr_v6(env, address, access_type, mmu_idx,
- phys_ptr, attrs, prot, page_size, fi);
+ if (cpu_isar_feature(aa64_st, cpu)) {
+ max_tsz = 48 - (gran == Gran64K);
} else {
- return get_phys_addr_v5(env, address, access_type, mmu_idx,
- phys_ptr, prot, page_size, fi);
+ max_tsz = 39;
}
-}
-/* Walk the page table and (if the mapping exists) add the page
- * to the TLB. Return false on success, or true on failure. Populate
- * fsr with ARM DFSR/IFSR fault register format value on failure.
- */
-bool arm_tlb_fill(CPUState *cs, vaddr address,
- MMUAccessType access_type, int mmu_idx,
- ARMMMUFaultInfo *fi)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- hwaddr phys_addr;
- target_ulong page_size;
- int prot;
- int ret;
- MemTxAttrs attrs = {};
-
- ret = get_phys_addr(env, address, access_type,
- core_to_arm_mmu_idx(env, mmu_idx), &phys_addr,
- &attrs, &prot, &page_size, fi, NULL);
- if (!ret) {
- /*
- * Map a single [sub]page. Regions smaller than our declared
- * target page size are handled specially, so for those we
- * pass in the exact addresses.
- */
- if (page_size >= TARGET_PAGE_SIZE) {
- phys_addr &= TARGET_PAGE_MASK;
- address &= TARGET_PAGE_MASK;
- }
- tlb_set_page_with_attrs(cs, address, phys_addr, attrs,
- prot, mmu_idx, page_size);
- return 0;
- }
-
- return ret;
-}
-
-hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
- MemTxAttrs *attrs)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- hwaddr phys_addr;
- target_ulong page_size;
- int prot;
- bool ret;
- ARMMMUFaultInfo fi = {};
- ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
-
- *attrs = (MemTxAttrs) {};
-
- ret = get_phys_addr(env, addr, 0, mmu_idx, &phys_addr,
- attrs, &prot, &page_size, &fi, NULL);
-
- if (ret) {
- return -1;
- }
- return phys_addr;
-}
-
-uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
-{
- uint32_t mask;
- unsigned el = arm_current_el(env);
-
- /* First handle registers which unprivileged can read */
-
- switch (reg) {
- case 0 ... 7: /* xPSR sub-fields */
- mask = 0;
- if ((reg & 1) && el) {
- mask |= XPSR_EXCP; /* IPSR (unpriv. reads as zero) */
- }
- if (!(reg & 4)) {
- mask |= XPSR_NZCV | XPSR_Q; /* APSR */
- }
- /* EPSR reads as zero */
- return xpsr_read(env) & mask;
- break;
- case 20: /* CONTROL */
- return env->v7m.control[env->v7m.secure];
- case 0x94: /* CONTROL_NS */
- /* We have to handle this here because unprivileged Secure code
- * can read the NS CONTROL register.
- */
- if (!env->v7m.secure) {
- return 0;
- }
- return env->v7m.control[M_REG_NS];
- }
-
- if (el == 0) {
- return 0; /* unprivileged reads others as zero */
- }
-
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- switch (reg) {
- case 0x88: /* MSP_NS */
- if (!env->v7m.secure) {
- return 0;
- }
- return env->v7m.other_ss_msp;
- case 0x89: /* PSP_NS */
- if (!env->v7m.secure) {
- return 0;
- }
- return env->v7m.other_ss_psp;
- case 0x8a: /* MSPLIM_NS */
- if (!env->v7m.secure) {
- return 0;
- }
- return env->v7m.msplim[M_REG_NS];
- case 0x8b: /* PSPLIM_NS */
- if (!env->v7m.secure) {
- return 0;
- }
- return env->v7m.psplim[M_REG_NS];
- case 0x90: /* PRIMASK_NS */
- if (!env->v7m.secure) {
- return 0;
- }
- return env->v7m.primask[M_REG_NS];
- case 0x91: /* BASEPRI_NS */
- if (!env->v7m.secure) {
- return 0;
- }
- return env->v7m.basepri[M_REG_NS];
- case 0x93: /* FAULTMASK_NS */
- if (!env->v7m.secure) {
- return 0;
- }
- return env->v7m.faultmask[M_REG_NS];
- case 0x98: /* SP_NS */
- {
- /* This gives the non-secure SP selected based on whether we're
- * currently in handler mode or not, using the NS CONTROL.SPSEL.
- */
- bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK;
-
- if (!env->v7m.secure) {
- return 0;
- }
- if (!arm_v7m_is_handler_mode(env) && spsel) {
- return env->v7m.other_ss_psp;
+ /*
+ * DS is RES0 unless FEAT_LPA2 is supported for the given page size;
+ * adjust the effective value of DS, as documented.
+ */
+ min_tsz = 16;
+ if (gran == Gran64K) {
+ if (cpu_isar_feature(aa64_lva, cpu)) {
+ min_tsz = 12;
+ }
+ ds = false;
+ } else if (ds) {
+ if (regime_is_stage2(mmu_idx)) {
+ if (gran == Gran16K) {
+ ds = cpu_isar_feature(aa64_tgran16_2_lpa2, cpu);
} else {
- return env->v7m.other_ss_msp;
- }
- }
- default:
- break;
- }
- }
-
- switch (reg) {
- case 8: /* MSP */
- return v7m_using_psp(env) ? env->v7m.other_sp : env->regs[13];
- case 9: /* PSP */
- return v7m_using_psp(env) ? env->regs[13] : env->v7m.other_sp;
- case 10: /* MSPLIM */
- if (!arm_feature(env, ARM_FEATURE_V8)) {
- goto bad_reg;
- }
- return env->v7m.msplim[env->v7m.secure];
- case 11: /* PSPLIM */
- if (!arm_feature(env, ARM_FEATURE_V8)) {
- goto bad_reg;
- }
- return env->v7m.psplim[env->v7m.secure];
- case 16: /* PRIMASK */
- return env->v7m.primask[env->v7m.secure];
- case 17: /* BASEPRI */
- case 18: /* BASEPRI_MAX */
- return env->v7m.basepri[env->v7m.secure];
- case 19: /* FAULTMASK */
- return env->v7m.faultmask[env->v7m.secure];
- default:
- bad_reg:
- qemu_log_mask(LOG_GUEST_ERROR, "Attempt to read unknown special"
- " register %d\n", reg);
- return 0;
- }
-}
-
-void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
-{
- /* We're passed bits [11..0] of the instruction; extract
- * SYSm and the mask bits.
- * Invalid combinations of SYSm and mask are UNPREDICTABLE;
- * we choose to treat them as if the mask bits were valid.
- * NB that the pseudocode 'mask' variable is bits [11..10],
- * whereas ours is [11..8].
- */
- uint32_t mask = extract32(maskreg, 8, 4);
- uint32_t reg = extract32(maskreg, 0, 8);
-
- if (arm_current_el(env) == 0 && reg > 7) {
- /* only xPSR sub-fields may be written by unprivileged */
- return;
- }
-
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- switch (reg) {
- case 0x88: /* MSP_NS */
- if (!env->v7m.secure) {
- return;
- }
- env->v7m.other_ss_msp = val;
- return;
- case 0x89: /* PSP_NS */
- if (!env->v7m.secure) {
- return;
- }
- env->v7m.other_ss_psp = val;
- return;
- case 0x8a: /* MSPLIM_NS */
- if (!env->v7m.secure) {
- return;
- }
- env->v7m.msplim[M_REG_NS] = val & ~7;
- return;
- case 0x8b: /* PSPLIM_NS */
- if (!env->v7m.secure) {
- return;
- }
- env->v7m.psplim[M_REG_NS] = val & ~7;
- return;
- case 0x90: /* PRIMASK_NS */
- if (!env->v7m.secure) {
- return;
- }
- env->v7m.primask[M_REG_NS] = val & 1;
- return;
- case 0x91: /* BASEPRI_NS */
- if (!env->v7m.secure || !arm_feature(env, ARM_FEATURE_M_MAIN)) {
- return;
- }
- env->v7m.basepri[M_REG_NS] = val & 0xff;
- return;
- case 0x93: /* FAULTMASK_NS */
- if (!env->v7m.secure || !arm_feature(env, ARM_FEATURE_M_MAIN)) {
- return;
- }
- env->v7m.faultmask[M_REG_NS] = val & 1;
- return;
- case 0x94: /* CONTROL_NS */
- if (!env->v7m.secure) {
- return;
- }
- write_v7m_control_spsel_for_secstate(env,
- val & R_V7M_CONTROL_SPSEL_MASK,
- M_REG_NS);
- if (arm_feature(env, ARM_FEATURE_M_MAIN)) {
- env->v7m.control[M_REG_NS] &= ~R_V7M_CONTROL_NPRIV_MASK;
- env->v7m.control[M_REG_NS] |= val & R_V7M_CONTROL_NPRIV_MASK;
- }
- return;
- case 0x98: /* SP_NS */
- {
- /* This gives the non-secure SP selected based on whether we're
- * currently in handler mode or not, using the NS CONTROL.SPSEL.
- */
- bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK;
- bool is_psp = !arm_v7m_is_handler_mode(env) && spsel;
- uint32_t limit;
-
- if (!env->v7m.secure) {
- return;
- }
-
- limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false];
-
- if (val < limit) {
- CPUState *cs = CPU(arm_env_get_cpu(env));
-
- cpu_restore_state(cs, GETPC(), true);
- raise_exception(env, EXCP_STKOF, 0, 1);
+ ds = cpu_isar_feature(aa64_tgran4_2_lpa2, cpu);
}
-
- if (is_psp) {
- env->v7m.other_ss_psp = val;
+ } else {
+ if (gran == Gran16K) {
+ ds = cpu_isar_feature(aa64_tgran16_lpa2, cpu);
} else {
- env->v7m.other_ss_msp = val;
+ ds = cpu_isar_feature(aa64_tgran4_lpa2, cpu);
}
- return;
}
- default:
- break;
+ if (ds) {
+ min_tsz = 12;
}
}
- switch (reg) {
- case 0 ... 7: /* xPSR sub-fields */
- /* only APSR is actually writable */
- if (!(reg & 4)) {
- uint32_t apsrmask = 0;
-
- if (mask & 8) {
- apsrmask |= XPSR_NZCV | XPSR_Q;
- }
- if ((mask & 4) && arm_feature(env, ARM_FEATURE_THUMB_DSP)) {
- apsrmask |= XPSR_GE;
- }
- xpsr_write(env, val, apsrmask);
- }
- break;
- case 8: /* MSP */
- if (v7m_using_psp(env)) {
- env->v7m.other_sp = val;
- } else {
- env->regs[13] = val;
- }
- break;
- case 9: /* PSP */
- if (v7m_using_psp(env)) {
- env->regs[13] = val;
- } else {
- env->v7m.other_sp = val;
- }
- break;
- case 10: /* MSPLIM */
- if (!arm_feature(env, ARM_FEATURE_V8)) {
- goto bad_reg;
- }
- env->v7m.msplim[env->v7m.secure] = val & ~7;
- break;
- case 11: /* PSPLIM */
- if (!arm_feature(env, ARM_FEATURE_V8)) {
- goto bad_reg;
- }
- env->v7m.psplim[env->v7m.secure] = val & ~7;
- break;
- case 16: /* PRIMASK */
- env->v7m.primask[env->v7m.secure] = val & 1;
- break;
- case 17: /* BASEPRI */
- if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
- goto bad_reg;
- }
- env->v7m.basepri[env->v7m.secure] = val & 0xff;
- break;
- case 18: /* BASEPRI_MAX */
- if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
- goto bad_reg;
- }
- val &= 0xff;
- if (val != 0 && (val < env->v7m.basepri[env->v7m.secure]
- || env->v7m.basepri[env->v7m.secure] == 0)) {
- env->v7m.basepri[env->v7m.secure] = val;
- }
- break;
- case 19: /* FAULTMASK */
- if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
- goto bad_reg;
- }
- env->v7m.faultmask[env->v7m.secure] = val & 1;
- break;
- case 20: /* CONTROL */
- /* Writing to the SPSEL bit only has an effect if we are in
- * thread mode; other bits can be updated by any privileged code.
- * write_v7m_control_spsel() deals with updating the SPSEL bit in
- * env->v7m.control, so we only need update the others.
- * For v7M, we must just ignore explicit writes to SPSEL in handler
- * mode; for v8M the write is permitted but will have no effect.
+ if (stage2 && el1_is_aa32) {
+ /*
+ * For AArch32 EL1 the min txsz (and thus max IPA size) requirements
+ * are loosened: a configured IPA of 40 bits is permitted even if
+ * the implemented PA is less than that (and so a 40 bit IPA would
+ * fault for an AArch64 EL1). See R_DTLMN.
*/
- if (arm_feature(env, ARM_FEATURE_V8) ||
- !arm_v7m_is_handler_mode(env)) {
- write_v7m_control_spsel(env, (val & R_V7M_CONTROL_SPSEL_MASK) != 0);
- }
- if (arm_feature(env, ARM_FEATURE_M_MAIN)) {
- env->v7m.control[env->v7m.secure] &= ~R_V7M_CONTROL_NPRIV_MASK;
- env->v7m.control[env->v7m.secure] |= val & R_V7M_CONTROL_NPRIV_MASK;
- }
- break;
- default:
- bad_reg:
- qemu_log_mask(LOG_GUEST_ERROR, "Attempt to write unknown special"
- " register %d\n", reg);
- return;
- }
-}
-
-uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
-{
- /* Implement the TT instruction. op is bits [7:6] of the insn. */
- bool forceunpriv = op & 1;
- bool alt = op & 2;
- V8M_SAttributes sattrs = {};
- uint32_t tt_resp;
- bool r, rw, nsr, nsrw, mrvalid;
- int prot;
- ARMMMUFaultInfo fi = {};
- MemTxAttrs attrs = {};
- hwaddr phys_addr;
- ARMMMUIdx mmu_idx;
- uint32_t mregion;
- bool targetpriv;
- bool targetsec = env->v7m.secure;
- bool is_subpage;
-
- /* Work out what the security state and privilege level we're
- * interested in is...
- */
- if (alt) {
- targetsec = !targetsec;
+ min_tsz = MIN(min_tsz, 24);
}
- if (forceunpriv) {
- targetpriv = false;
+ if (tsz > max_tsz) {
+ tsz = max_tsz;
+ tsz_oob = true;
+ } else if (tsz < min_tsz) {
+ tsz = min_tsz;
+ tsz_oob = true;
} else {
- targetpriv = arm_v7m_is_handler_mode(env) ||
- !(env->v7m.control[targetsec] & R_V7M_CONTROL_NPRIV_MASK);
- }
-
- /* ...and then figure out which MMU index this is */
- mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, targetsec, targetpriv);
-
- /* We know that the MPU and SAU don't care about the access type
- * for our purposes beyond that we don't want to claim to be
- * an insn fetch, so we arbitrarily call this a read.
- */
-
- /* MPU region info only available for privileged or if
- * inspecting the other MPU state.
- */
- if (arm_current_el(env) != 0 || alt) {
- /* We can ignore the return value as prot is always set */
- pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, mmu_idx,
- &phys_addr, &attrs, &prot, &is_subpage,
- &fi, &mregion);
- if (mregion == -1) {
- mrvalid = false;
- mregion = 0;
- } else {
- mrvalid = true;
- }
- r = prot & PAGE_READ;
- rw = prot & PAGE_WRITE;
- } else {
- r = false;
- rw = false;
- mrvalid = false;
- mregion = 0;
- }
-
- if (env->v7m.secure) {
- v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, &sattrs);
- nsr = sattrs.ns && r;
- nsrw = sattrs.ns && rw;
- } else {
- sattrs.ns = true;
- nsr = false;
- nsrw = false;
- }
-
- tt_resp = (sattrs.iregion << 24) |
- (sattrs.irvalid << 23) |
- ((!sattrs.ns) << 22) |
- (nsrw << 21) |
- (nsr << 20) |
- (rw << 19) |
- (r << 18) |
- (sattrs.srvalid << 17) |
- (mrvalid << 16) |
- (sattrs.sregion << 8) |
- mregion;
-
- return tt_resp;
-}
-
-#endif
-
-void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
-{
- /* Implement DC ZVA, which zeroes a fixed-length block of memory.
- * Note that we do not implement the (architecturally mandated)
- * alignment fault for attempts to use this on Device memory
- * (which matches the usual QEMU behaviour of not implementing either
- * alignment faults or any memory attribute handling).
- */
-
- ARMCPU *cpu = arm_env_get_cpu(env);
- uint64_t blocklen = 4 << cpu->dcz_blocksize;
- uint64_t vaddr = vaddr_in & ~(blocklen - 1);
-
-#ifndef CONFIG_USER_ONLY
- {
- /* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
- * the block size so we might have to do more than one TLB lookup.
- * We know that in fact for any v8 CPU the page size is at least 4K
- * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
- * 1K as an artefact of legacy v5 subpage support being present in the
- * same QEMU executable.
- */
- int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
- void *hostaddr[maxidx];
- int try, i;
- unsigned mmu_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-
- for (try = 0; try < 2; try++) {
-
- for (i = 0; i < maxidx; i++) {
- hostaddr[i] = tlb_vaddr_to_host(env,
- vaddr + TARGET_PAGE_SIZE * i,
- 1, mmu_idx);
- if (!hostaddr[i]) {
- break;
- }
- }
- if (i == maxidx) {
- /* If it's all in the TLB it's fair game for just writing to;
- * we know we don't need to update dirty status, etc.
- */
- for (i = 0; i < maxidx - 1; i++) {
- memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
- }
- memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
- return;
- }
- /* OK, try a store and see if we can populate the tlb. This
- * might cause an exception if the memory isn't writable,
- * in which case we will longjmp out of here. We must for
- * this purpose use the actual register value passed to us
- * so that we get the fault address right.
- */
- helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
- /* Now we can populate the other TLB entries, if any */
- for (i = 0; i < maxidx; i++) {
- uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
- if (va != (vaddr_in & TARGET_PAGE_MASK)) {
- helper_ret_stb_mmu(env, va, 0, oi, GETPC());
- }
- }
- }
-
- /* Slow path (probably attempt to do this to an I/O device or
- * similar, or clearing of a block of code we have translations
- * cached for). Just do a series of byte writes as the architecture
- * demands. It's not worth trying to use a cpu_physical_memory_map(),
- * memset(), unmap() sequence here because:
- * + we'd need to account for the blocksize being larger than a page
- * + the direct-RAM access case is almost always going to be dealt
- * with in the fastpath code above, so there's no speed benefit
- * + we would have to deal with the map returning NULL because the
- * bounce buffer was in use
- */
- for (i = 0; i < blocklen; i++) {
- helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
- }
- }
-#else
- memset(g2h(vaddr), 0, blocklen);
-#endif
+ tsz_oob = false;
+ }
+
+ /* Present TBI as a composite with TBID. */
+ tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
+ if (!data) {
+ tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
+ }
+ tbi = (tbi >> select) & 1;
+
+ return (ARMVAParameters) {
+ .tsz = tsz,
+ .ps = ps,
+ .sh = sh,
+ .select = select,
+ .tbi = tbi,
+ .epd = epd,
+ .hpd = hpd,
+ .tsz_oob = tsz_oob,
+ .ds = ds,
+ .ha = ha,
+ .hd = ha && hd,
+ .gran = gran,
+ };
}
-/* Note that signed overflow is undefined in C. The following routines are
- careful to use unsigned types where modulo arithmetic is required.
- Failure to do so _will_ break on newer gcc. */
+/*
+ * Note that signed overflow is undefined in C. The following routines are
+ * careful to use unsigned types where modulo arithmetic is required.
+ * Failure to do so _will_ break on newer gcc.
+ */
/* Signed saturating arithmetic. */
@@ -11307,10 +12050,11 @@ static inline uint16_t add16_sat(uint16_t a, uint16_t b)
res = a + b;
if (((res ^ a) & 0x8000) && !((a ^ b) & 0x8000)) {
- if (a & 0x8000)
+ if (a & 0x8000) {
res = 0x8000;
- else
+ } else {
res = 0x7fff;
+ }
}
return res;
}
@@ -11322,10 +12066,11 @@ static inline uint8_t add8_sat(uint8_t a, uint8_t b)
res = a + b;
if (((res ^ a) & 0x80) && !((a ^ b) & 0x80)) {
- if (a & 0x80)
+ if (a & 0x80) {
res = 0x80;
- else
+ } else {
res = 0x7f;
+ }
}
return res;
}
@@ -11337,10 +12082,11 @@ static inline uint16_t sub16_sat(uint16_t a, uint16_t b)
res = a - b;
if (((res ^ a) & 0x8000) && ((a ^ b) & 0x8000)) {
- if (a & 0x8000)
+ if (a & 0x8000) {
res = 0x8000;
- else
+ } else {
res = 0x7fff;
+ }
}
return res;
}
@@ -11352,10 +12098,11 @@ static inline uint8_t sub8_sat(uint8_t a, uint8_t b)
res = a - b;
if (((res ^ a) & 0x80) && ((a ^ b) & 0x80)) {
- if (a & 0x80)
+ if (a & 0x80) {
res = 0x80;
- else
+ } else {
res = 0x7f;
+ }
}
return res;
}
@@ -11373,34 +12120,38 @@ static inline uint16_t add16_usat(uint16_t a, uint16_t b)
{
uint16_t res;
res = a + b;
- if (res < a)
+ if (res < a) {
res = 0xffff;
+ }
return res;
}
static inline uint16_t sub16_usat(uint16_t a, uint16_t b)
{
- if (a > b)
+ if (a > b) {
return a - b;
- else
+ } else {
return 0;
+ }
}
static inline uint8_t add8_usat(uint8_t a, uint8_t b)
{
uint8_t res;
res = a + b;
- if (res < a)
+ if (res < a) {
res = 0xff;
+ }
return res;
}
static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
{
- if (a > b)
+ if (a > b) {
return a - b;
- else
+ } else {
return 0;
+ }
}
#define ADD16(a, b, n) RESULT(add16_usat(a, b), n, 16);
@@ -11418,7 +12169,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
RESULT(sum, n, 16); \
if (sum >= 0) \
ge |= 3 << (n * 2); \
- } while(0)
+ } while (0)
#define SARITH8(a, b, n, op) do { \
int32_t sum; \
@@ -11426,7 +12177,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
RESULT(sum, n, 8); \
if (sum >= 0) \
ge |= 1 << n; \
- } while(0)
+ } while (0)
#define ADD16(a, b, n) SARITH16(a, b, n, +)
@@ -11445,7 +12196,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
RESULT(sum, n, 16); \
if ((sum >> 16) == 1) \
ge |= 3 << (n * 2); \
- } while(0)
+ } while (0)
#define ADD8(a, b, n) do { \
uint32_t sum; \
@@ -11453,7 +12204,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
RESULT(sum, n, 8); \
if ((sum >> 8) == 1) \
ge |= 1 << n; \
- } while(0)
+ } while (0)
#define SUB16(a, b, n) do { \
uint32_t sum; \
@@ -11461,7 +12212,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
RESULT(sum, n, 16); \
if ((sum >> 16) == 0) \
ge |= 3 << (n * 2); \
- } while(0)
+ } while (0)
#define SUB8(a, b, n) do { \
uint32_t sum; \
@@ -11469,7 +12220,7 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
RESULT(sum, n, 8); \
if ((sum >> 8) == 0) \
ge |= 1 << n; \
- } while(0)
+ } while (0)
#define PFX u
#define ARITH_GE
@@ -11504,10 +12255,11 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b)
static inline uint8_t do_usad(uint8_t a, uint8_t b)
{
- if (a > b)
+ if (a > b) {
return a - b;
- else
+ } else {
return b - a;
+ }
}
/* Unsigned sum of absolute byte differences. */
@@ -11516,7 +12268,7 @@ uint32_t HELPER(usad8)(uint32_t a, uint32_t b)
uint32_t sum;
sum = do_usad(a, b);
sum += do_usad(a >> 8, b >> 8);
- sum += do_usad(a >> 16, b >>16);
+ sum += do_usad(a >> 16, b >> 16);
sum += do_usad(a >> 24, b >> 24);
return sum;
}
@@ -11527,1231 +12279,347 @@ uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
uint32_t mask;
mask = 0;
- if (flags & 1)
+ if (flags & 1) {
mask |= 0xff;
- if (flags & 2)
+ }
+ if (flags & 2) {
mask |= 0xff00;
- if (flags & 4)
+ }
+ if (flags & 4) {
mask |= 0xff0000;
- if (flags & 8)
+ }
+ if (flags & 8) {
mask |= 0xff000000;
+ }
return (a & mask) | (b & ~mask);
}
-/* VFP support. We follow the convention used for VFP instructions:
- Single precision routines have a "s" suffix, double precision a
- "d" suffix. */
-
-/* Convert host exception flags to vfp form. */
-static inline int vfp_exceptbits_from_host(int host_bits)
-{
- int target_bits = 0;
-
- if (host_bits & float_flag_invalid)
- target_bits |= 1;
- if (host_bits & float_flag_divbyzero)
- target_bits |= 2;
- if (host_bits & float_flag_overflow)
- target_bits |= 4;
- if (host_bits & (float_flag_underflow | float_flag_output_denormal))
- target_bits |= 8;
- if (host_bits & float_flag_inexact)
- target_bits |= 0x10;
- if (host_bits & float_flag_input_denormal)
- target_bits |= 0x80;
- return target_bits;
-}
-
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+/*
+ * CRC helpers.
+ * The upper bytes of val (above the number specified by 'bytes') must have
+ * been zeroed out by the caller.
+ */
+uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
{
- int i;
- uint32_t fpscr;
-
- fpscr = (env->vfp.xregs[ARM_VFP_FPSCR] & 0xffc8ffff)
- | (env->vfp.vec_len << 16)
- | (env->vfp.vec_stride << 20);
+ uint8_t buf[4];
- i = get_float_exception_flags(&env->vfp.fp_status);
- i |= get_float_exception_flags(&env->vfp.standard_fp_status);
- /* FZ16 does not generate an input denormal exception. */
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
- & ~float_flag_input_denormal);
+ stl_le_p(buf, val);
- fpscr |= vfp_exceptbits_from_host(i);
- return fpscr;
+ /* zlib crc32 converts the accumulator and output to one's complement. */
+ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
}
-uint32_t vfp_get_fpscr(CPUARMState *env)
+uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
{
- return HELPER(vfp_get_fpscr)(env);
-}
+ uint8_t buf[4];
-/* Convert vfp exception flags to target form. */
-static inline int vfp_exceptbits_to_host(int target_bits)
-{
- int host_bits = 0;
+ stl_le_p(buf, val);
- if (target_bits & 1)
- host_bits |= float_flag_invalid;
- if (target_bits & 2)
- host_bits |= float_flag_divbyzero;
- if (target_bits & 4)
- host_bits |= float_flag_overflow;
- if (target_bits & 8)
- host_bits |= float_flag_underflow;
- if (target_bits & 0x10)
- host_bits |= float_flag_inexact;
- if (target_bits & 0x80)
- host_bits |= float_flag_input_denormal;
- return host_bits;
+ /* Linux crc32c converts the output to one's complement. */
+ return crc32c(acc, buf, bytes) ^ 0xffffffff;
}
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+/*
+ * Return the exception level to which FP-disabled exceptions should
+ * be taken, or 0 if FP is enabled.
+ */
+int fp_exception_el(CPUARMState *env, int cur_el)
{
- int i;
- uint32_t changed;
-
- /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
- if (!arm_feature(env, ARM_FEATURE_V8_FP16)) {
- val &= ~FPCR_FZ16;
- }
-
- changed = env->vfp.xregs[ARM_VFP_FPSCR];
- env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff);
- env->vfp.vec_len = (val >> 16) & 7;
- env->vfp.vec_stride = (val >> 20) & 3;
-
- changed ^= val;
- if (changed & (3 << 22)) {
- i = (val >> 22) & 3;
- switch (i) {
- case FPROUNDING_TIEEVEN:
- i = float_round_nearest_even;
- break;
- case FPROUNDING_POSINF:
- i = float_round_up;
- break;
- case FPROUNDING_NEGINF:
- i = float_round_down;
- break;
- case FPROUNDING_ZERO:
- i = float_round_to_zero;
- break;
- }
- set_float_rounding_mode(i, &env->vfp.fp_status);
- set_float_rounding_mode(i, &env->vfp.fp_status_f16);
- }
- if (changed & FPCR_FZ16) {
- bool ftz_enabled = val & FPCR_FZ16;
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
- }
- if (changed & FPCR_FZ) {
- bool ftz_enabled = val & FPCR_FZ;
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
- }
- if (changed & FPCR_DN) {
- bool dnan_enabled = val & FPCR_DN;
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
- }
+#ifndef CONFIG_USER_ONLY
+ uint64_t hcr_el2;
- /* The exception flags are ORed together when we read fpscr so we
- * only need to preserve the current state in one of our
- * float_status values.
+ /*
+ * CPACR and the CPTR registers don't exist before v6, so FP is
+ * always accessible
*/
- i = vfp_exceptbits_to_host(val);
- set_float_exception_flags(i, &env->vfp.fp_status);
- set_float_exception_flags(0, &env->vfp.fp_status_f16);
- set_float_exception_flags(0, &env->vfp.standard_fp_status);
-}
-
-void vfp_set_fpscr(CPUARMState *env, uint32_t val)
-{
- HELPER(vfp_set_fpscr)(env, val);
-}
-
-#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
-
-#define VFP_BINOP(name) \
-float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
-{ \
- float_status *fpst = fpstp; \
- return float32_ ## name(a, b, fpst); \
-} \
-float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
-{ \
- float_status *fpst = fpstp; \
- return float64_ ## name(a, b, fpst); \
-}
-VFP_BINOP(add)
-VFP_BINOP(sub)
-VFP_BINOP(mul)
-VFP_BINOP(div)
-VFP_BINOP(min)
-VFP_BINOP(max)
-VFP_BINOP(minnum)
-VFP_BINOP(maxnum)
-#undef VFP_BINOP
-
-float32 VFP_HELPER(neg, s)(float32 a)
-{
- return float32_chs(a);
-}
-
-float64 VFP_HELPER(neg, d)(float64 a)
-{
- return float64_chs(a);
-}
-
-float32 VFP_HELPER(abs, s)(float32 a)
-{
- return float32_abs(a);
-}
-
-float64 VFP_HELPER(abs, d)(float64 a)
-{
- return float64_abs(a);
-}
-
-float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
-{
- return float32_sqrt(a, &env->vfp.fp_status);
-}
-
-float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
-{
- return float64_sqrt(a, &env->vfp.fp_status);
-}
-
-/* XXX: check quiet/signaling case */
-#define DO_VFP_cmp(p, type) \
-void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \
-{ \
- uint32_t flags; \
- switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \
- case 0: flags = 0x6; break; \
- case -1: flags = 0x8; break; \
- case 1: flags = 0x2; break; \
- default: case 2: flags = 0x3; break; \
- } \
- env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
- | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
-} \
-void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
-{ \
- uint32_t flags; \
- switch(type ## _compare(a, b, &env->vfp.fp_status)) { \
- case 0: flags = 0x6; break; \
- case -1: flags = 0x8; break; \
- case 1: flags = 0x2; break; \
- default: case 2: flags = 0x3; break; \
- } \
- env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
- | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
-}
-DO_VFP_cmp(s, float32)
-DO_VFP_cmp(d, float64)
-#undef DO_VFP_cmp
-
-/* Integer to float and float to integer conversions */
-
-#define CONV_ITOF(name, ftype, fsz, sign) \
-ftype HELPER(name)(uint32_t x, void *fpstp) \
-{ \
- float_status *fpst = fpstp; \
- return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
-}
-
-#define CONV_FTOI(name, ftype, fsz, sign, round) \
-sign##int32_t HELPER(name)(ftype x, void *fpstp) \
-{ \
- float_status *fpst = fpstp; \
- if (float##fsz##_is_any_nan(x)) { \
- float_raise(float_flag_invalid, fpst); \
- return 0; \
- } \
- return float##fsz##_to_##sign##int32##round(x, fpst); \
-}
-
-#define FLOAT_CONVS(name, p, ftype, fsz, sign) \
- CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \
- CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \
- CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
-
-FLOAT_CONVS(si, h, uint32_t, 16, )
-FLOAT_CONVS(si, s, float32, 32, )
-FLOAT_CONVS(si, d, float64, 64, )
-FLOAT_CONVS(ui, h, uint32_t, 16, u)
-FLOAT_CONVS(ui, s, float32, 32, u)
-FLOAT_CONVS(ui, d, float64, 64, u)
-
-#undef CONV_ITOF
-#undef CONV_FTOI
-#undef FLOAT_CONVS
-
-/* floating point conversion */
-float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
-{
- return float32_to_float64(x, &env->vfp.fp_status);
-}
-
-float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
-{
- return float64_to_float32(x, &env->vfp.fp_status);
-}
-
-/* VFP3 fixed point conversion. */
-#define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
-float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
- void *fpstp) \
-{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
-
-#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff) \
-uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
- void *fpst) \
-{ \
- if (unlikely(float##fsz##_is_any_nan(x))) { \
- float_raise(float_flag_invalid, fpst); \
- return 0; \
- } \
- return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \
-}
-
-#define VFP_CONV_FIX(name, p, fsz, isz, itype) \
-VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
- float_round_to_zero, _round_to_zero) \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
- get_float_rounding_mode(fpst), )
-
-#define VFP_CONV_FIX_A64(name, p, fsz, isz, itype) \
-VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, \
- get_float_rounding_mode(fpst), )
-
-VFP_CONV_FIX(sh, d, 64, 64, int16)
-VFP_CONV_FIX(sl, d, 64, 64, int32)
-VFP_CONV_FIX_A64(sq, d, 64, 64, int64)
-VFP_CONV_FIX(uh, d, 64, 64, uint16)
-VFP_CONV_FIX(ul, d, 64, 64, uint32)
-VFP_CONV_FIX_A64(uq, d, 64, 64, uint64)
-VFP_CONV_FIX(sh, s, 32, 32, int16)
-VFP_CONV_FIX(sl, s, 32, 32, int32)
-VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
-VFP_CONV_FIX(uh, s, 32, 32, uint16)
-VFP_CONV_FIX(ul, s, 32, 32, uint32)
-VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
-
-#undef VFP_CONV_FIX
-#undef VFP_CONV_FIX_FLOAT
-#undef VFP_CONV_FLOAT_FIX_ROUND
-#undef VFP_CONV_FIX_A64
-
-uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
-{
- return int32_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
-{
- return uint32_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
-{
- return int64_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
-{
- return uint64_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
-uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
-uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
+ if (!arm_feature(env, ARM_FEATURE_V6)) {
return 0;
}
- return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ /* CPACR can cause a NOCP UsageFault taken to current security state */
+ if (!v7m_cpacr_pass(env, env->v7m.secure, cur_el != 0)) {
+ return 1;
+ }
-uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
- return 0;
- }
- return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY) && !env->v7m.secure) {
+ if (!extract32(env->v7m.nsacr, 10, 1)) {
+ /* FP insns cause a NOCP UsageFault taken to Secure */
+ return 3;
+ }
+ }
-uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
-{
- if (unlikely(float16_is_any_nan(x))) {
- float_raise(float_flag_invalid, fpst);
return 0;
}
- return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
- shift, fpst);
-}
-
-/* Set the current fp rounding mode and return the old one.
- * The argument is a softfloat float_round_ value.
- */
-uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
-{
- float_status *fp_status = fpstp;
- uint32_t prev_rmode = get_float_rounding_mode(fp_status);
- set_float_rounding_mode(rmode, fp_status);
-
- return prev_rmode;
-}
-
-/* Set the current fp rounding mode in the standard fp status and return
- * the old one. This is for NEON instructions that need to change the
- * rounding mode but wish to use the standard FPSCR values for everything
- * else. Always set the rounding mode back to the correct value after
- * modifying it.
- * The argument is a softfloat float_round_ value.
- */
-uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
-{
- float_status *fp_status = &env->vfp.standard_fp_status;
-
- uint32_t prev_rmode = get_float_rounding_mode(fp_status);
- set_float_rounding_mode(rmode, fp_status);
-
- return prev_rmode;
-}
-
-/* Half precision conversions. */
-float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
-{
- /* Squash FZ16 to 0 for the duration of conversion. In this case,
- * it would affect flushing input denormals.
- */
- float_status *fpst = fpstp;
- flag save = get_flush_inputs_to_zero(fpst);
- set_flush_inputs_to_zero(false, fpst);
- float32 r = float16_to_float32(a, !ahp_mode, fpst);
- set_flush_inputs_to_zero(save, fpst);
- return r;
-}
-
-uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
-{
- /* Squash FZ16 to 0 for the duration of conversion. In this case,
- * it would affect flushing output denormals.
- */
- float_status *fpst = fpstp;
- flag save = get_flush_to_zero(fpst);
- set_flush_to_zero(false, fpst);
- float16 r = float32_to_float16(a, !ahp_mode, fpst);
- set_flush_to_zero(save, fpst);
- return r;
-}
-
-float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
-{
- /* Squash FZ16 to 0 for the duration of conversion. In this case,
- * it would affect flushing input denormals.
- */
- float_status *fpst = fpstp;
- flag save = get_flush_inputs_to_zero(fpst);
- set_flush_inputs_to_zero(false, fpst);
- float64 r = float16_to_float64(a, !ahp_mode, fpst);
- set_flush_inputs_to_zero(save, fpst);
- return r;
-}
+ hcr_el2 = arm_hcr_el2_eff(env);
-uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
-{
- /* Squash FZ16 to 0 for the duration of conversion. In this case,
- * it would affect flushing output denormals.
+ /*
+ * The CPACR controls traps to EL1, or PL1 if we're 32 bit:
+ * 0, 2 : trap EL0 and EL1/PL1 accesses
+ * 1 : trap only EL0 accesses
+ * 3 : trap no accesses
+ * This register is ignored if E2H+TGE are both set.
*/
- float_status *fpst = fpstp;
- flag save = get_flush_to_zero(fpst);
- set_flush_to_zero(false, fpst);
- float16 r = float64_to_float16(a, !ahp_mode, fpst);
- set_flush_to_zero(save, fpst);
- return r;
-}
-
-#define float32_two make_float32(0x40000000)
-#define float32_three make_float32(0x40400000)
-#define float32_one_point_five make_float32(0x3fc00000)
+ if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
+ int fpen = FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, FPEN);
-float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
-{
- float_status *s = &env->vfp.standard_fp_status;
- if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
- (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
- if (!(float32_is_zero(a) || float32_is_zero(b))) {
- float_raise(float_flag_input_denormal, s);
+ switch (fpen) {
+ case 1:
+ if (cur_el != 0) {
+ break;
+ }
+ /* fall through */
+ case 0:
+ case 2:
+ /* Trap from Secure PL0 or PL1 to Secure PL1. */
+ if (!arm_el_is_aa64(env, 3)
+ && (cur_el == 3 || arm_is_secure_below_el3(env))) {
+ return 3;
+ }
+ if (cur_el <= 1) {
+ return 1;
+ }
+ break;
}
- return float32_two;
}
- return float32_sub(float32_two, float32_mul(a, b, s), s);
-}
-float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
-{
- float_status *s = &env->vfp.standard_fp_status;
- float32 product;
- if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
- (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
- if (!(float32_is_zero(a) || float32_is_zero(b))) {
- float_raise(float_flag_input_denormal, s);
+ /*
+ * The NSACR allows A-profile AArch32 EL3 and M-profile secure mode
+ * to control non-secure access to the FPU. It doesn't have any
+ * effect if EL3 is AArch64 or if EL3 doesn't exist at all.
+ */
+ if ((arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+ cur_el <= 2 && !arm_is_secure_below_el3(env))) {
+ if (!extract32(env->cp15.nsacr, 10, 1)) {
+ /* FP insns act as UNDEF */
+ return cur_el == 2 ? 2 : 1;
}
- return float32_one_point_five;
}
- product = float32_mul(a, b, s);
- return float32_div(float32_sub(float32_three, product, s), float32_two, s);
-}
-
-/* NEON helpers. */
-
-/* Constants 256 and 512 are used in some helpers; we avoid relying on
- * int->float conversions at run-time. */
-#define float64_256 make_float64(0x4070000000000000LL)
-#define float64_512 make_float64(0x4080000000000000LL)
-#define float16_maxnorm make_float16(0x7bff)
-#define float32_maxnorm make_float32(0x7f7fffff)
-#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
-
-/* Reciprocal functions
- *
- * The algorithm that must be used to calculate the estimate
- * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
- */
-
-/* See RecipEstimate()
- *
- * input is a 9 bit fixed point number
- * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
- * result range 256 .. 511 for a number from 1.0 to 511/256.
- */
-static int recip_estimate(int input)
-{
- int a, b, r;
- assert(256 <= input && input < 512);
- a = (input * 2) + 1;
- b = (1 << 19) / a;
- r = (b + 1) >> 1;
- assert(256 <= r && r < 512);
- return r;
-}
-
-/*
- * Common wrapper to call recip_estimate
- *
- * The parameters are exponent and 64 bit fraction (without implicit
- * bit) where the binary point is nominally at bit 52. Returns a
- * float64 which can then be rounded to the appropriate size by the
- * callee.
- */
-
-static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
-{
- uint32_t scaled, estimate;
- uint64_t result_frac;
- int result_exp;
-
- /* Handle sub-normals */
- if (*exp == 0) {
- if (extract64(frac, 51, 1) == 0) {
- *exp = -1;
- frac <<= 2;
- } else {
- frac <<= 1;
+ /*
+ * CPTR_EL2 is present in v7VE or v8, and changes format
+ * with HCR_EL2.E2H (regardless of TGE).
+ */
+ if (cur_el <= 2) {
+ if (hcr_el2 & HCR_E2H) {
+ switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, FPEN)) {
+ case 1:
+ if (cur_el != 0 || !(hcr_el2 & HCR_TGE)) {
+ break;
+ }
+ /* fall through */
+ case 0:
+ case 2:
+ return 2;
+ }
+ } else if (arm_is_el2_enabled(env)) {
+ if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TFP)) {
+ return 2;
+ }
}
}
- /* scaled = UInt('1':fraction<51:44>) */
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
- estimate = recip_estimate(scaled);
-
- result_exp = exp_off - *exp;
- result_frac = deposit64(0, 44, 8, estimate);
- if (result_exp == 0) {
- result_frac = deposit64(result_frac >> 1, 51, 1, 1);
- } else if (result_exp == -1) {
- result_frac = deposit64(result_frac >> 2, 50, 2, 1);
- result_exp = 0;
+ /* CPTR_EL3 : present in v8 */
+ if (FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TFP)) {
+ /* Trap all FP ops to EL3 */
+ return 3;
}
-
- *exp = result_exp;
-
- return result_frac;
+#endif
+ return 0;
}
-static bool round_to_inf(float_status *fpst, bool sign_bit)
+/* Return the exception level we're running at if this is our mmu_idx */
+int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
{
- switch (fpst->float_rounding_mode) {
- case float_round_nearest_even: /* Round to Nearest */
- return true;
- case float_round_up: /* Round to +Inf */
- return !sign_bit;
- case float_round_down: /* Round to -Inf */
- return sign_bit;
- case float_round_to_zero: /* Round to Zero */
- return false;
- }
-
- g_assert_not_reached();
-}
-
-uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
-{
- float_status *fpst = fpstp;
- float16 f16 = float16_squash_input_denormal(input, fpst);
- uint32_t f16_val = float16_val(f16);
- uint32_t f16_sign = float16_is_neg(f16);
- int f16_exp = extract32(f16_val, 10, 5);
- uint32_t f16_frac = extract32(f16_val, 0, 10);
- uint64_t f64_frac;
-
- if (float16_is_any_nan(f16)) {
- float16 nan = f16;
- if (float16_is_signaling_nan(f16, fpst)) {
- float_raise(float_flag_invalid, fpst);
- nan = float16_silence_nan(f16, fpst);
- }
- if (fpst->default_nan_mode) {
- nan = float16_default_nan(fpst);
- }
- return nan;
- } else if (float16_is_infinity(f16)) {
- return float16_set_sign(float16_zero, float16_is_neg(f16));
- } else if (float16_is_zero(f16)) {
- float_raise(float_flag_divbyzero, fpst);
- return float16_set_sign(float16_infinity, float16_is_neg(f16));
- } else if (float16_abs(f16) < (1 << 8)) {
- /* Abs(value) < 2.0^-16 */
- float_raise(float_flag_overflow | float_flag_inexact, fpst);
- if (round_to_inf(fpst, f16_sign)) {
- return float16_set_sign(float16_infinity, f16_sign);
- } else {
- return float16_set_sign(float16_maxnorm, f16_sign);
- }
- } else if (f16_exp >= 29 && fpst->flush_to_zero) {
- float_raise(float_flag_underflow, fpst);
- return float16_set_sign(float16_zero, float16_is_neg(f16));
- }
-
- f64_frac = call_recip_estimate(&f16_exp, 29,
- ((uint64_t) f16_frac) << (52 - 10));
-
- /* result = sign : result_exp<4:0> : fraction<51:42> */
- f16_val = deposit32(0, 15, 1, f16_sign);
- f16_val = deposit32(f16_val, 10, 5, f16_exp);
- f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
- return make_float16(f16_val);
-}
-
-float32 HELPER(recpe_f32)(float32 input, void *fpstp)
-{
- float_status *fpst = fpstp;
- float32 f32 = float32_squash_input_denormal(input, fpst);
- uint32_t f32_val = float32_val(f32);
- bool f32_sign = float32_is_neg(f32);
- int f32_exp = extract32(f32_val, 23, 8);
- uint32_t f32_frac = extract32(f32_val, 0, 23);
- uint64_t f64_frac;
-
- if (float32_is_any_nan(f32)) {
- float32 nan = f32;
- if (float32_is_signaling_nan(f32, fpst)) {
- float_raise(float_flag_invalid, fpst);
- nan = float32_silence_nan(f32, fpst);
- }
- if (fpst->default_nan_mode) {
- nan = float32_default_nan(fpst);
- }
- return nan;
- } else if (float32_is_infinity(f32)) {
- return float32_set_sign(float32_zero, float32_is_neg(f32));
- } else if (float32_is_zero(f32)) {
- float_raise(float_flag_divbyzero, fpst);
- return float32_set_sign(float32_infinity, float32_is_neg(f32));
- } else if (float32_abs(f32) < (1ULL << 21)) {
- /* Abs(value) < 2.0^-128 */
- float_raise(float_flag_overflow | float_flag_inexact, fpst);
- if (round_to_inf(fpst, f32_sign)) {
- return float32_set_sign(float32_infinity, f32_sign);
- } else {
- return float32_set_sign(float32_maxnorm, f32_sign);
- }
- } else if (f32_exp >= 253 && fpst->flush_to_zero) {
- float_raise(float_flag_underflow, fpst);
- return float32_set_sign(float32_zero, float32_is_neg(f32));
- }
-
- f64_frac = call_recip_estimate(&f32_exp, 253,
- ((uint64_t) f32_frac) << (52 - 23));
-
- /* result = sign : result_exp<7:0> : fraction<51:29> */
- f32_val = deposit32(0, 31, 1, f32_sign);
- f32_val = deposit32(f32_val, 23, 8, f32_exp);
- f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
- return make_float32(f32_val);
-}
-
-float64 HELPER(recpe_f64)(float64 input, void *fpstp)
-{
- float_status *fpst = fpstp;
- float64 f64 = float64_squash_input_denormal(input, fpst);
- uint64_t f64_val = float64_val(f64);
- bool f64_sign = float64_is_neg(f64);
- int f64_exp = extract64(f64_val, 52, 11);
- uint64_t f64_frac = extract64(f64_val, 0, 52);
-
- /* Deal with any special cases */
- if (float64_is_any_nan(f64)) {
- float64 nan = f64;
- if (float64_is_signaling_nan(f64, fpst)) {
- float_raise(float_flag_invalid, fpst);
- nan = float64_silence_nan(f64, fpst);
- }
- if (fpst->default_nan_mode) {
- nan = float64_default_nan(fpst);
- }
- return nan;
- } else if (float64_is_infinity(f64)) {
- return float64_set_sign(float64_zero, float64_is_neg(f64));
- } else if (float64_is_zero(f64)) {
- float_raise(float_flag_divbyzero, fpst);
- return float64_set_sign(float64_infinity, float64_is_neg(f64));
- } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
- /* Abs(value) < 2.0^-1024 */
- float_raise(float_flag_overflow | float_flag_inexact, fpst);
- if (round_to_inf(fpst, f64_sign)) {
- return float64_set_sign(float64_infinity, f64_sign);
- } else {
- return float64_set_sign(float64_maxnorm, f64_sign);
- }
- } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
- float_raise(float_flag_underflow, fpst);
- return float64_set_sign(float64_zero, float64_is_neg(f64));
+ if (mmu_idx & ARM_MMU_IDX_M) {
+ return mmu_idx & ARM_MMU_IDX_M_PRIV;
}
- f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
-
- /* result = sign : result_exp<10:0> : fraction<51:0>; */
- f64_val = deposit64(0, 63, 1, f64_sign);
- f64_val = deposit64(f64_val, 52, 11, f64_exp);
- f64_val = deposit64(f64_val, 0, 52, f64_frac);
- return make_float64(f64_val);
-}
-
-/* The algorithm that must be used to calculate the estimate
- * is specified by the ARM ARM.
- */
-
-static int do_recip_sqrt_estimate(int a)
-{
- int b, estimate;
-
- assert(128 <= a && a < 512);
- if (a < 256) {
- a = a * 2 + 1;
- } else {
- a = (a >> 1) << 1;
- a = (a + 1) * 2;
- }
- b = 512;
- while (a * (b + 1) * (b + 1) < (1 << 28)) {
- b += 1;
+ switch (mmu_idx) {
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E20_0:
+ return 0;
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ return 1;
+ case ARMMMUIdx_E2:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ return 2;
+ case ARMMMUIdx_E3:
+ return 3;
+ default:
+ g_assert_not_reached();
}
- estimate = (b + 1) / 2;
- assert(256 <= estimate && estimate < 512);
-
- return estimate;
}
-
-static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
+#ifndef CONFIG_TCG
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
{
- int estimate;
- uint32_t scaled;
-
- if (*exp == 0) {
- while (extract64(frac, 51, 1) == 0) {
- frac = frac << 1;
- *exp -= 1;
- }
- frac = extract64(frac, 0, 51) << 1;
- }
-
- if (*exp & 1) {
- /* scaled = UInt('01':fraction<51:45>) */
- scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
- } else {
- /* scaled = UInt('1':fraction<51:44>) */
- scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
- }
- estimate = do_recip_sqrt_estimate(scaled);
-
- *exp = (exp_off - *exp) / 2;
- return extract64(estimate, 0, 8) << 44;
+ g_assert_not_reached();
}
+#endif
-uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
+ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el)
{
- float_status *s = fpstp;
- float16 f16 = float16_squash_input_denormal(input, s);
- uint16_t val = float16_val(f16);
- bool f16_sign = float16_is_neg(f16);
- int f16_exp = extract32(val, 10, 5);
- uint16_t f16_frac = extract32(val, 0, 10);
- uint64_t f64_frac;
+ ARMMMUIdx idx;
+ uint64_t hcr;
- if (float16_is_any_nan(f16)) {
- float16 nan = f16;
- if (float16_is_signaling_nan(f16, s)) {
- float_raise(float_flag_invalid, s);
- nan = float16_silence_nan(f16, s);
- }
- if (s->default_nan_mode) {
- nan = float16_default_nan(s);
- }
- return nan;
- } else if (float16_is_zero(f16)) {
- float_raise(float_flag_divbyzero, s);
- return float16_set_sign(float16_infinity, f16_sign);
- } else if (f16_sign) {
- float_raise(float_flag_invalid, s);
- return float16_default_nan(s);
- } else if (float16_is_infinity(f16)) {
- return float16_zero;
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return arm_v7m_mmu_idx_for_secstate(env, env->v7m.secure);
}
- /* Scale and normalize to a double-precision value between 0.25 and 1.0,
- * preserving the parity of the exponent. */
-
- f64_frac = ((uint64_t) f16_frac) << (52 - 10);
-
- f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
-
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
- val = deposit32(0, 15, 1, f16_sign);
- val = deposit32(val, 10, 5, f16_exp);
- val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
- return make_float16(val);
-}
-
-float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
-{
- float_status *s = fpstp;
- float32 f32 = float32_squash_input_denormal(input, s);
- uint32_t val = float32_val(f32);
- uint32_t f32_sign = float32_is_neg(f32);
- int f32_exp = extract32(val, 23, 8);
- uint32_t f32_frac = extract32(val, 0, 23);
- uint64_t f64_frac;
-
- if (float32_is_any_nan(f32)) {
- float32 nan = f32;
- if (float32_is_signaling_nan(f32, s)) {
- float_raise(float_flag_invalid, s);
- nan = float32_silence_nan(f32, s);
- }
- if (s->default_nan_mode) {
- nan = float32_default_nan(s);
+ /* See ARM pseudo-function ELIsInHost. */
+ switch (el) {
+ case 0:
+ hcr = arm_hcr_el2_eff(env);
+ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) {
+ idx = ARMMMUIdx_E20_0;
+ } else {
+ idx = ARMMMUIdx_E10_0;
}
- return nan;
- } else if (float32_is_zero(f32)) {
- float_raise(float_flag_divbyzero, s);
- return float32_set_sign(float32_infinity, float32_is_neg(f32));
- } else if (float32_is_neg(f32)) {
- float_raise(float_flag_invalid, s);
- return float32_default_nan(s);
- } else if (float32_is_infinity(f32)) {
- return float32_zero;
- }
-
- /* Scale and normalize to a double-precision value between 0.25 and 1.0,
- * preserving the parity of the exponent. */
-
- f64_frac = ((uint64_t) f32_frac) << 29;
-
- f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
-
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
- val = deposit32(0, 31, 1, f32_sign);
- val = deposit32(val, 23, 8, f32_exp);
- val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
- return make_float32(val);
-}
-
-float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
-{
- float_status *s = fpstp;
- float64 f64 = float64_squash_input_denormal(input, s);
- uint64_t val = float64_val(f64);
- bool f64_sign = float64_is_neg(f64);
- int f64_exp = extract64(val, 52, 11);
- uint64_t f64_frac = extract64(val, 0, 52);
-
- if (float64_is_any_nan(f64)) {
- float64 nan = f64;
- if (float64_is_signaling_nan(f64, s)) {
- float_raise(float_flag_invalid, s);
- nan = float64_silence_nan(f64, s);
+ break;
+ case 1:
+ if (arm_pan_enabled(env)) {
+ idx = ARMMMUIdx_E10_1_PAN;
+ } else {
+ idx = ARMMMUIdx_E10_1;
}
- if (s->default_nan_mode) {
- nan = float64_default_nan(s);
+ break;
+ case 2:
+ /* Note that TGE does not apply at EL2. */
+ if (arm_hcr_el2_eff(env) & HCR_E2H) {
+ if (arm_pan_enabled(env)) {
+ idx = ARMMMUIdx_E20_2_PAN;
+ } else {
+ idx = ARMMMUIdx_E20_2;
+ }
+ } else {
+ idx = ARMMMUIdx_E2;
}
- return nan;
- } else if (float64_is_zero(f64)) {
- float_raise(float_flag_divbyzero, s);
- return float64_set_sign(float64_infinity, float64_is_neg(f64));
- } else if (float64_is_neg(f64)) {
- float_raise(float_flag_invalid, s);
- return float64_default_nan(s);
- } else if (float64_is_infinity(f64)) {
- return float64_zero;
- }
-
- f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
-
- /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
- val = deposit64(0, 61, 1, f64_sign);
- val = deposit64(val, 52, 11, f64_exp);
- val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
- return make_float64(val);
-}
-
-uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
-{
- /* float_status *s = fpstp; */
- int input, estimate;
-
- if ((a & 0x80000000) == 0) {
- return 0xffffffff;
- }
-
- input = extract32(a, 23, 9);
- estimate = recip_estimate(input);
-
- return deposit32(0, (32 - 9), 9, estimate);
-}
-
-uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
-{
- int estimate;
-
- if ((a & 0xc0000000) == 0) {
- return 0xffffffff;
- }
-
- estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
-
- return deposit32(0, 23, 9, estimate);
-}
-
-/* VFPv4 fused multiply-accumulate */
-float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
-{
- float_status *fpst = fpstp;
- return float32_muladd(a, b, c, 0, fpst);
-}
-
-float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
-{
- float_status *fpst = fpstp;
- return float64_muladd(a, b, c, 0, fpst);
-}
-
-/* ARMv8 round to integral */
-float32 HELPER(rints_exact)(float32 x, void *fp_status)
-{
- return float32_round_to_int(x, fp_status);
-}
-
-float64 HELPER(rintd_exact)(float64 x, void *fp_status)
-{
- return float64_round_to_int(x, fp_status);
-}
-
-float32 HELPER(rints)(float32 x, void *fp_status)
-{
- int old_flags = get_float_exception_flags(fp_status), new_flags;
- float32 ret;
-
- ret = float32_round_to_int(x, fp_status);
-
- /* Suppress any inexact exceptions the conversion produced */
- if (!(old_flags & float_flag_inexact)) {
- new_flags = get_float_exception_flags(fp_status);
- set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
- }
-
- return ret;
-}
-
-float64 HELPER(rintd)(float64 x, void *fp_status)
-{
- int old_flags = get_float_exception_flags(fp_status), new_flags;
- float64 ret;
-
- ret = float64_round_to_int(x, fp_status);
-
- new_flags = get_float_exception_flags(fp_status);
-
- /* Suppress any inexact exceptions the conversion produced */
- if (!(old_flags & float_flag_inexact)) {
- new_flags = get_float_exception_flags(fp_status);
- set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
- }
-
- return ret;
-}
-
-/* Convert ARM rounding mode to softfloat */
-int arm_rmode_to_sf(int rmode)
-{
- switch (rmode) {
- case FPROUNDING_TIEAWAY:
- rmode = float_round_ties_away;
break;
- case FPROUNDING_ODD:
- /* FIXME: add support for TIEAWAY and ODD */
- qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
- rmode);
- /* fall through for now */
- case FPROUNDING_TIEEVEN:
+ case 3:
+ return ARMMMUIdx_E3;
default:
- rmode = float_round_nearest_even;
- break;
- case FPROUNDING_POSINF:
- rmode = float_round_up;
- break;
- case FPROUNDING_NEGINF:
- rmode = float_round_down;
- break;
- case FPROUNDING_ZERO:
- rmode = float_round_to_zero;
- break;
+ g_assert_not_reached();
}
- return rmode;
-}
-
-/* CRC helpers.
- * The upper bytes of val (above the number specified by 'bytes') must have
- * been zeroed out by the caller.
- */
-uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
-{
- uint8_t buf[4];
- stl_le_p(buf, val);
-
- /* zlib crc32 converts the accumulator and output to one's complement. */
- return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
+ return idx;
}
-uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
+ARMMMUIdx arm_mmu_idx(CPUARMState *env)
{
- uint8_t buf[4];
-
- stl_le_p(buf, val);
-
- /* Linux crc32c converts the output to one's complement. */
- return crc32c(acc, buf, bytes) ^ 0xffffffff;
+ return arm_mmu_idx_el(env, arm_current_el(env));
}
-/* Return the exception level to which FP-disabled exceptions should
- * be taken, or 0 if FP is enabled.
- */
-int fp_exception_el(CPUARMState *env, int cur_el)
+static bool mve_no_pred(CPUARMState *env)
{
-#ifndef CONFIG_USER_ONLY
- int fpen;
-
- /* CPACR and the CPTR registers don't exist before v6, so FP is
- * always accessible
- */
- if (!arm_feature(env, ARM_FEATURE_V6)) {
- return 0;
- }
-
- /* The CPACR controls traps to EL1, or PL1 if we're 32 bit:
- * 0, 2 : trap EL0 and EL1/PL1 accesses
- * 1 : trap only EL0 accesses
- * 3 : trap no accesses
+ /*
+ * Return true if there is definitely no predication of MVE
+ * instructions by VPR or LTPSIZE. (Returning false even if there
+ * isn't any predication is OK; generated code will just be
+ * a little worse.)
+ * If the CPU does not implement MVE then this TB flag is always 0.
+ *
+ * NOTE: if you change this logic, the "recalculate s->mve_no_pred"
+ * logic in gen_update_fp_context() needs to be updated to match.
+ *
+ * We do not include the effect of the ECI bits here -- they are
+ * tracked in other TB flags. This simplifies the logic for
+ * "when did we emit code that changes the MVE_NO_PRED TB flag
+ * and thus need to end the TB?".
*/
- fpen = extract32(env->cp15.cpacr_el1, 20, 2);
- switch (fpen) {
- case 0:
- case 2:
- if (cur_el == 0 || cur_el == 1) {
- /* Trap to PL1, which might be EL1 or EL3 */
- if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
- return 3;
- }
- return 1;
- }
- if (cur_el == 3 && !is_a64(env)) {
- /* Secure PL1 running at EL3 */
- return 3;
- }
- break;
- case 1:
- if (cur_el == 0) {
- return 1;
- }
- break;
- case 3:
- break;
+ if (cpu_isar_feature(aa32_mve, env_archcpu(env))) {
+ return false;
}
-
- /* For the CPTR registers we don't need to guard with an ARM_FEATURE
- * check because zero bits in the registers mean "don't trap".
- */
-
- /* CPTR_EL2 : present in v7VE or v8 */
- if (cur_el <= 2 && extract32(env->cp15.cptr_el[2], 10, 1)
- && !arm_is_secure_below_el3(env)) {
- /* Trap FP ops at EL2, NS-EL1 or NS-EL0 to EL2 */
- return 2;
+ if (env->v7m.vpr) {
+ return false;
}
-
- /* CPTR_EL3 : present in v8 */
- if (extract32(env->cp15.cptr_el[3], 10, 1)) {
- /* Trap all FP ops to EL3 */
- return 3;
+ if (env->v7m.ltpsize < 4) {
+ return false;
}
-#endif
- return 0;
+ return true;
}
-void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
- target_ulong *cs_base, uint32_t *pflags)
+void cpu_get_tb_cpu_state(CPUARMState *env, vaddr *pc,
+ uint64_t *cs_base, uint32_t *pflags)
{
- ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
- int current_el = arm_current_el(env);
- int fp_el = fp_exception_el(env, current_el);
- uint32_t flags;
+ CPUARMTBFlags flags;
- if (is_a64(env)) {
+ assert_hflags_rebuild_correctly(env);
+ flags = env->hflags;
+
+ if (EX_TBFLAG_ANY(flags, AARCH64_STATE)) {
*pc = env->pc;
- flags = ARM_TBFLAG_AARCH64_STATE_MASK;
- /* Get control bits for tagged addresses */
- flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT);
- flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT);
+ if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
+ DP_TBFLAG_A64(flags, BTYPE, env->btype);
+ }
+ } else {
+ *pc = env->regs[15];
- if (arm_feature(env, ARM_FEATURE_SVE)) {
- int sve_el = sve_exception_el(env, current_el);
- uint32_t zcr_len;
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
+ FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
+ != env->v7m.secure) {
+ DP_TBFLAG_M32(flags, FPCCR_S_WRONG, 1);
+ }
- /* If SVE is disabled, but FP is enabled,
- * then the effective len is 0.
+ if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
+ (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
+ (env->v7m.secure &&
+ !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
+ /*
+ * ASPEN is set, but FPCA/SFPA indicate that there is no
+ * active FP context; we must create a new FP context before
+ * executing any FP insn.
+ */
+ DP_TBFLAG_M32(flags, NEW_FP_CTXT_NEEDED, 1);
+ }
+
+ bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+ if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
+ DP_TBFLAG_M32(flags, LSPACT, 1);
+ }
+
+ if (mve_no_pred(env)) {
+ DP_TBFLAG_M32(flags, MVE_NO_PRED, 1);
+ }
+ } else {
+ /*
+ * Note that XSCALE_CPAR shares bits with VECSTRIDE.
+ * Note that VECLEN+VECSTRIDE are RES0 for M-profile.
*/
- if (sve_el != 0 && fp_el == 0) {
- zcr_len = 0;
+ if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+ DP_TBFLAG_A32(flags, XSCALE_CPAR, env->cp15.c15_cpar);
} else {
- zcr_len = sve_zcr_len_for_el(env, current_el);
+ DP_TBFLAG_A32(flags, VECLEN, env->vfp.vec_len);
+ DP_TBFLAG_A32(flags, VECSTRIDE, env->vfp.vec_stride);
+ }
+ if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
+ DP_TBFLAG_A32(flags, VFPEN, 1);
}
- flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
- flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
- }
- } else {
- *pc = env->regs[15];
- flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
- | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
- | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
- | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
- | (arm_sctlr_b(env) << ARM_TBFLAG_SCTLR_B_SHIFT);
- if (!(access_secure_reg(env))) {
- flags |= ARM_TBFLAG_NS_MASK;
- }
- if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
- || arm_el_is_aa64(env, 1)) {
- flags |= ARM_TBFLAG_VFPEN_MASK;
}
- flags |= (extract32(env->cp15.c15_cpar, 0, 2)
- << ARM_TBFLAG_XSCALE_CPAR_SHIFT);
- }
- flags |= (arm_to_core_mmu_idx(mmu_idx) << ARM_TBFLAG_MMUIDX_SHIFT);
+ DP_TBFLAG_AM32(flags, THUMB, env->thumb);
+ DP_TBFLAG_AM32(flags, CONDEXEC, env->condexec_bits);
+ }
- /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
+ /*
+ * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
* states defined in the ARM ARM for software singlestep:
* SS_ACTIVE PSTATE.SS State
* 0 x Inactive (the TB flag for SS is always 0)
* 1 0 Active-pending
* 1 1 Active-not-pending
+ * SS_ACTIVE is set in hflags; PSTATE__SS is computed every TB.
*/
- if (arm_singlestep_active(env)) {
- flags |= ARM_TBFLAG_SS_ACTIVE_MASK;
- if (is_a64(env)) {
- if (env->pstate & PSTATE_SS) {
- flags |= ARM_TBFLAG_PSTATE_SS_MASK;
- }
- } else {
- if (env->uncached_cpsr & PSTATE_SS) {
- flags |= ARM_TBFLAG_PSTATE_SS_MASK;
- }
- }
- }
- if (arm_cpu_data_is_big_endian(env)) {
- flags |= ARM_TBFLAG_BE_DATA_MASK;
- }
- flags |= fp_el << ARM_TBFLAG_FPEXC_EL_SHIFT;
-
- if (arm_v7m_is_handler_mode(env)) {
- flags |= ARM_TBFLAG_HANDLER_MASK;
- }
-
- /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is
- * suppressing them because the requested execution priority is less than 0.
- */
- if (arm_feature(env, ARM_FEATURE_V8) &&
- arm_feature(env, ARM_FEATURE_M) &&
- !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
- (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
- flags |= ARM_TBFLAG_STACKCHECK_MASK;
+ if (EX_TBFLAG_ANY(flags, SS_ACTIVE) && (env->pstate & PSTATE_SS)) {
+ DP_TBFLAG_ANY(flags, PSTATE__SS, 1);
}
- *pflags = flags;
- *cs_base = 0;
+ *pflags = flags.flags;
+ *cs_base = flags.flags2;
}
#ifdef TARGET_AARCH64
@@ -12775,7 +12643,7 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
uint64_t pmask;
assert(vq >= 1 && vq <= ARM_MAX_VQ);
- assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
+ assert(vq <= env_archcpu(env)->sve_max_vq);
/* Zap the high bits of the zregs. */
for (i = 0; i < 32; i++) {
@@ -12795,17 +12663,33 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
}
}
+static uint32_t sve_vqm1_for_el_sm_ena(CPUARMState *env, int el, bool sm)
+{
+ int exc_el;
+
+ if (sm) {
+ exc_el = sme_exception_el(env, el);
+ } else {
+ exc_el = sve_exception_el(env, el);
+ }
+ if (exc_el) {
+ return 0; /* disabled */
+ }
+ return sve_vqm1_for_el_sm(env, el, sm);
+}
+
/*
* Notice a change in SVE vector size when changing EL.
*/
void aarch64_sve_change_el(CPUARMState *env, int old_el,
int new_el, bool el0_a64)
{
+ ARMCPU *cpu = env_archcpu(env);
int old_len, new_len;
- bool old_a64, new_a64;
+ bool old_a64, new_a64, sm;
/* Nothing to do if no SVE. */
- if (!arm_feature(env, ARM_FEATURE_SVE)) {
+ if (!cpu_isar_feature(aa64_sve, cpu)) {
return;
}
@@ -12814,6 +12698,20 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
return;
}
+ old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
+ new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
+
+ /*
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
+ * invoke ResetSVEState when taking an exception from, or
+ * returning to, AArch32 state when PSTATE.SM is enabled.
+ */
+ sm = FIELD_EX64(env->svcr, SVCR, SM);
+ if (old_a64 != new_a64 && sm) {
+ arm_reset_sve_state(env);
+ return;
+ }
+
/*
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
* at ELx, or not available because the EL is in AArch32 state, then
@@ -12826,12 +12724,13 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
* we already have the correct register contents when encountering the
* vq0->vq0 transition between EL0->EL1.
*/
- old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
- old_len = (old_a64 && !sve_exception_el(env, old_el)
- ? sve_zcr_len_for_el(env, old_el) : 0);
- new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
- new_len = (new_a64 && !sve_exception_el(env, new_el)
- ? sve_zcr_len_for_el(env, new_el) : 0);
+ old_len = new_len = 0;
+ if (old_a64) {
+ old_len = sve_vqm1_for_el_sm_ena(env, old_el, sm);
+ }
+ if (new_a64) {
+ new_len = sve_vqm1_for_el_sm_ena(env, new_el, sm);
+ }
/* When changing vector length, clear inaccessible state. */
if (new_len < old_len) {
@@ -12839,3 +12738,63 @@ void aarch64_sve_change_el(CPUARMState *env, int old_el,
}
}
#endif
+
+#ifndef CONFIG_USER_ONLY
+ARMSecuritySpace arm_security_space(CPUARMState *env)
+{
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return arm_secure_to_space(env->v7m.secure);
+ }
+
+ /*
+ * If EL3 is not supported then the secure state is implementation
+ * defined, in which case QEMU defaults to non-secure.
+ */
+ if (!arm_feature(env, ARM_FEATURE_EL3)) {
+ return ARMSS_NonSecure;
+ }
+
+ /* Check for AArch64 EL3 or AArch32 Mon. */
+ if (is_a64(env)) {
+ if (extract32(env->pstate, 2, 2) == 3) {
+ if (cpu_isar_feature(aa64_rme, env_archcpu(env))) {
+ return ARMSS_Root;
+ } else {
+ return ARMSS_Secure;
+ }
+ }
+ } else {
+ if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_MON) {
+ return ARMSS_Secure;
+ }
+ }
+
+ return arm_security_space_below_el3(env);
+}
+
+ARMSecuritySpace arm_security_space_below_el3(CPUARMState *env)
+{
+ assert(!arm_feature(env, ARM_FEATURE_M));
+
+ /*
+ * If EL3 is not supported then the secure state is implementation
+ * defined, in which case QEMU defaults to non-secure.
+ */
+ if (!arm_feature(env, ARM_FEATURE_EL3)) {
+ return ARMSS_NonSecure;
+ }
+
+ /*
+ * Note NSE cannot be set without RME, and NSE & !NS is Reserved.
+ * Ignoring NSE when !NS retains consistency without having to
+ * modify other predicates.
+ */
+ if (!(env->cp15.scr_el3 & SCR_NS)) {
+ return ARMSS_Secure;
+ } else if (env->cp15.scr_el3 & SCR_NSE) {
+ return ARMSS_Realm;
+ } else {
+ return ARMSS_NonSecure;
+ }
+}
+#endif /* !CONFIG_USER_ONLY */
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 8c9590091b..2b02733305 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -6,9 +6,8 @@ DEF_HELPER_3(add_saturate, i32, env, i32, i32)
DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
-DEF_HELPER_2(double_saturate, i32, env, s32)
-DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_NO_RWG_SE, s32, s32, s32)
-DEF_HELPER_FLAGS_2(udiv, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_3(sdiv, TCG_CALL_NO_RWG, s32, env, s32, s32)
+DEF_HELPER_FLAGS_3(udiv, TCG_CALL_NO_RWG, i32, env, i32, i32)
DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32)
#define PAS_OP(pfx) \
@@ -45,17 +44,19 @@ DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32)
DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
i32, i32, i32, i32)
-DEF_HELPER_2(exception_internal, void, env, i32)
-DEF_HELPER_4(exception_with_syndrome, void, env, i32, i32, i32)
-DEF_HELPER_2(exception_bkpt_insn, void, env, i32)
+DEF_HELPER_2(exception_internal, noreturn, env, i32)
+DEF_HELPER_3(exception_with_syndrome, noreturn, env, i32, i32)
+DEF_HELPER_4(exception_with_syndrome_el, noreturn, env, i32, i32, i32)
+DEF_HELPER_2(exception_bkpt_insn, noreturn, env, i32)
+DEF_HELPER_2(exception_swstep, noreturn, env, i32)
+DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl)
DEF_HELPER_1(setend, void, env)
DEF_HELPER_2(wfi, void, env, i32)
DEF_HELPER_1(wfe, void, env)
DEF_HELPER_1(yield, void, env)
DEF_HELPER_1(pre_hvc, void, env)
DEF_HELPER_2(pre_smc, void, env, i32)
-
-DEF_HELPER_1(check_breakpoints, void, env)
+DEF_HELPER_1(vesb, void, env)
DEF_HELPER_3(cpsr_write, void, env, i32, i32)
DEF_HELPER_2(cpsr_write_eret, void, env, i32)
@@ -69,17 +70,23 @@ DEF_HELPER_2(v7m_blxns, void, env, i32)
DEF_HELPER_3(v7m_tt, i32, env, i32, i32)
+DEF_HELPER_1(v7m_preserve_fp_state, void, env)
+
+DEF_HELPER_2(v7m_vlstm, void, env, i32)
+DEF_HELPER_2(v7m_vlldm, void, env, i32)
+
DEF_HELPER_2(v8m_stackcheck, void, env, i32)
-DEF_HELPER_4(access_check_cp_reg, void, env, ptr, i32, i32)
-DEF_HELPER_3(set_cp_reg, void, env, ptr, i32)
-DEF_HELPER_2(get_cp_reg, i32, env, ptr)
-DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64)
-DEF_HELPER_2(get_cp_reg64, i64, env, ptr)
+DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32)
-DEF_HELPER_3(msr_i_pstate, void, env, i32, i32)
-DEF_HELPER_1(clear_pstate_ss, void, env)
-DEF_HELPER_1(exception_return, void, env)
+DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32)
+DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32)
+DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_3(set_cp_reg, void, env, cptr, i32)
+DEF_HELPER_2(get_cp_reg, i32, env, cptr)
+DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64)
+DEF_HELPER_2(get_cp_reg64, i64, env, cptr)
DEF_HELPER_2(get_r13_banked, i32, env, i32)
DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
@@ -90,38 +97,61 @@ DEF_HELPER_4(msr_banked, void, env, i32, i32, i32)
DEF_HELPER_2(get_user_reg, i32, env, i32)
DEF_HELPER_3(set_user_reg, void, env, i32, i32)
+DEF_HELPER_FLAGS_1(rebuild_hflags_m32_newel, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_2(rebuild_hflags_m32, TCG_CALL_NO_RWG, void, env, int)
+DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int)
+DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int)
+
+DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32)
+
DEF_HELPER_1(vfp_get_fpscr, i32, env)
DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
+DEF_HELPER_3(vfp_addh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_adds, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_addd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_subh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_subs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_subd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_mulh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_muls, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_muld, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_divh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_divs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_divd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_maxs, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_maxd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_mins, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_mind, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_maxnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_maxnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
+DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
+DEF_HELPER_1(vfp_negh, f16, f16)
DEF_HELPER_1(vfp_negs, f32, f32)
DEF_HELPER_1(vfp_negd, f64, f64)
+DEF_HELPER_1(vfp_absh, f16, f16)
DEF_HELPER_1(vfp_abss, f32, f32)
DEF_HELPER_1(vfp_absd, f64, f64)
+DEF_HELPER_2(vfp_sqrth, f16, f16, env)
DEF_HELPER_2(vfp_sqrts, f32, f32, env)
DEF_HELPER_2(vfp_sqrtd, f64, f64, env)
+DEF_HELPER_3(vfp_cmph, void, f16, f16, env)
DEF_HELPER_3(vfp_cmps, void, f32, f32, env)
DEF_HELPER_3(vfp_cmpd, void, f64, f64, env)
+DEF_HELPER_3(vfp_cmpeh, void, f16, f16, env)
DEF_HELPER_3(vfp_cmpes, void, f32, f32, env)
DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
+DEF_HELPER_FLAGS_2(bfcvt, TCG_CALL_NO_RWG, i32, f32, ptr)
+DEF_HELPER_FLAGS_2(bfcvt_pair, TCG_CALL_NO_RWG, i32, i64, ptr)
DEF_HELPER_2(vfp_uitoh, f16, i32, ptr)
DEF_HELPER_2(vfp_uitos, f32, i32, ptr)
@@ -143,6 +173,10 @@ DEF_HELPER_2(vfp_tosizh, s32, f16, ptr)
DEF_HELPER_2(vfp_tosizs, s32, f32, ptr)
DEF_HELPER_2(vfp_tosizd, s32, f64, ptr)
+DEF_HELPER_3(vfp_toshh_round_to_zero, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_toslh_round_to_zero, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_touhh_round_to_zero, i32, f16, i32, ptr)
+DEF_HELPER_3(vfp_toulh_round_to_zero, i32, f16, i32, ptr)
DEF_HELPER_3(vfp_toshs_round_to_zero, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_tosls_round_to_zero, i32, f32, i32, ptr)
DEF_HELPER_3(vfp_touhs_round_to_zero, i32, f32, i32, ptr)
@@ -181,13 +215,27 @@ DEF_HELPER_3(vfp_sqtod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_uhtod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_ultod, f64, i64, i32, ptr)
DEF_HELPER_3(vfp_uqtod, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_shtoh, f16, i32, i32, ptr)
+DEF_HELPER_3(vfp_uhtoh, f16, i32, i32, ptr)
DEF_HELPER_3(vfp_sltoh, f16, i32, i32, ptr)
DEF_HELPER_3(vfp_ultoh, f16, i32, i32, ptr)
DEF_HELPER_3(vfp_sqtoh, f16, i64, i32, ptr)
DEF_HELPER_3(vfp_uqtoh, f16, i64, i32, ptr)
+DEF_HELPER_3(vfp_shtos_round_to_nearest, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_sltos_round_to_nearest, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_uhtos_round_to_nearest, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_ultos_round_to_nearest, f32, i32, i32, ptr)
+DEF_HELPER_3(vfp_shtod_round_to_nearest, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_sltod_round_to_nearest, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_uhtod_round_to_nearest, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_ultod_round_to_nearest, f64, i64, i32, ptr)
+DEF_HELPER_3(vfp_shtoh_round_to_nearest, f16, i32, i32, ptr)
+DEF_HELPER_3(vfp_uhtoh_round_to_nearest, f16, i32, i32, ptr)
+DEF_HELPER_3(vfp_sltoh_round_to_nearest, f16, i32, i32, ptr)
+DEF_HELPER_3(vfp_ultoh_round_to_nearest, f16, i32, i32, ptr)
+
DEF_HELPER_FLAGS_2(set_rmode, TCG_CALL_NO_RWG, i32, i32, ptr)
-DEF_HELPER_FLAGS_2(set_neon_rmode, TCG_CALL_NO_RWG, i32, i32, env)
DEF_HELPER_FLAGS_3(vfp_fcvt_f16_to_f32, TCG_CALL_NO_RWG, f32, f16, ptr, i32)
DEF_HELPER_FLAGS_3(vfp_fcvt_f32_to_f16, TCG_CALL_NO_RWG, f16, f32, ptr, i32)
@@ -196,29 +244,35 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
+DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr)
-DEF_HELPER_3(recps_f32, f32, f32, f32, env)
-DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_2(recpe_u32, i32, i32, ptr)
-DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
-DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32)
+DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
+DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
+DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i64, env, i32, i64, i64)
DEF_HELPER_3(shl_cc, i32, env, i32, i32)
DEF_HELPER_3(shr_cc, i32, env, i32, i32)
DEF_HELPER_3(sar_cc, i32, env, i32, i32)
DEF_HELPER_3(ror_cc, i32, env, i32, i32)
+DEF_HELPER_FLAGS_2(rinth_exact, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints_exact, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(rinth, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env)
+DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, ptr)
+
+DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32)
+
/* neon_helper.c */
DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32)
DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32)
@@ -264,31 +318,6 @@ DEF_HELPER_2(neon_hsub_u16, i32, i32, i32)
DEF_HELPER_2(neon_hsub_s32, s32, s32, s32)
DEF_HELPER_2(neon_hsub_u32, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_u8, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_s8, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_u16, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_s16, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_u32, i32, i32, i32)
-DEF_HELPER_2(neon_cgt_s32, i32, i32, i32)
-DEF_HELPER_2(neon_cge_u8, i32, i32, i32)
-DEF_HELPER_2(neon_cge_s8, i32, i32, i32)
-DEF_HELPER_2(neon_cge_u16, i32, i32, i32)
-DEF_HELPER_2(neon_cge_s16, i32, i32, i32)
-DEF_HELPER_2(neon_cge_u32, i32, i32, i32)
-DEF_HELPER_2(neon_cge_s32, i32, i32, i32)
-
-DEF_HELPER_2(neon_min_u8, i32, i32, i32)
-DEF_HELPER_2(neon_min_s8, i32, i32, i32)
-DEF_HELPER_2(neon_min_u16, i32, i32, i32)
-DEF_HELPER_2(neon_min_s16, i32, i32, i32)
-DEF_HELPER_2(neon_min_u32, i32, i32, i32)
-DEF_HELPER_2(neon_min_s32, i32, i32, i32)
-DEF_HELPER_2(neon_max_u8, i32, i32, i32)
-DEF_HELPER_2(neon_max_s8, i32, i32, i32)
-DEF_HELPER_2(neon_max_u16, i32, i32, i32)
-DEF_HELPER_2(neon_max_s16, i32, i32, i32)
-DEF_HELPER_2(neon_max_u32, i32, i32, i32)
-DEF_HELPER_2(neon_max_s32, i32, i32, i32)
DEF_HELPER_2(neon_pmin_u8, i32, i32, i32)
DEF_HELPER_2(neon_pmin_s8, i32, i32, i32)
DEF_HELPER_2(neon_pmin_u16, i32, i32, i32)
@@ -298,21 +327,8 @@ DEF_HELPER_2(neon_pmax_s8, i32, i32, i32)
DEF_HELPER_2(neon_pmax_u16, i32, i32, i32)
DEF_HELPER_2(neon_pmax_s16, i32, i32, i32)
-DEF_HELPER_2(neon_abd_u8, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s8, i32, i32, i32)
-DEF_HELPER_2(neon_abd_u16, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s16, i32, i32, i32)
-DEF_HELPER_2(neon_abd_u32, i32, i32, i32)
-DEF_HELPER_2(neon_abd_s32, i32, i32, i32)
-
-DEF_HELPER_2(neon_shl_u8, i32, i32, i32)
-DEF_HELPER_2(neon_shl_s8, i32, i32, i32)
DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
-DEF_HELPER_2(neon_shl_u32, i32, i32, i32)
-DEF_HELPER_2(neon_shl_s32, i32, i32, i32)
-DEF_HELPER_2(neon_shl_u64, i64, i64, i64)
-DEF_HELPER_2(neon_shl_s64, i64, i64, i64)
DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
DEF_HELPER_2(neon_rshl_s8, i32, i32, i32)
DEF_HELPER_2(neon_rshl_u16, i32, i32, i32)
@@ -350,18 +366,11 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
-DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
-DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
DEF_HELPER_2(neon_tst_u32, i32, i32, i32)
-DEF_HELPER_2(neon_ceq_u8, i32, i32, i32)
-DEF_HELPER_2(neon_ceq_u16, i32, i32, i32)
-DEF_HELPER_2(neon_ceq_u32, i32, i32, i32)
-DEF_HELPER_1(neon_abs_s8, i32, i32)
-DEF_HELPER_1(neon_abs_s16, i32, i32)
DEF_HELPER_1(neon_clz_u8, i32, i32)
DEF_HELPER_1(neon_clz_u16, i32, i32)
DEF_HELPER_1(neon_cls_s8, i32, i32)
@@ -430,7 +439,6 @@ DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
-DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
@@ -545,36 +553,45 @@ DEF_HELPER_FLAGS_2(neon_qzip8, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr)
DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_aesd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr)
-
-DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-
-DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-
-DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
-DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
-
-DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr)
-DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_3(crypto_aesimc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sha1su0, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1c, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1p, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha1m, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sm3tt1a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt1b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt2a, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3tt2b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(crypto_rax1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
-DEF_HELPER_2(dc_zva, void, env, i64)
-
-DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
@@ -585,34 +602,97 @@ DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sdot_idx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_udot_idx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_sdot_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_4(gvec_udot_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sdot_idx_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_udot_idx_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sdot_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-
-DEF_HELPER_FLAGS_5(gvec_fcmlah, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_udot_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcmlah_idx, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_sudot_idx_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcmlas, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_usdot_idx_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(gvec_fcmlad, TCG_CALL_NO_RWG,
+DEF_HELPER_FLAGS_5(gvec_fcaddd, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlah, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlah_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlas, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_paddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_pmaxh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_pminh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_padds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_pmaxs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_pmins, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_tosszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_tosizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_touszh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_touizs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_sf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_uf, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_fs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_fu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_hs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_hu, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_ss, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_us, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_sh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vcvt_rm_uh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vrint_rm_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vrint_rm_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_vrintx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -621,6 +701,21 @@ DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fcgt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fcge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fcge0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fceq0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fcle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fcle0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_fclt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_fclt0_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -633,6 +728,54 @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_rsqrts_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmla_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmls_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
@@ -647,6 +790,16 @@ DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmla_nf_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmls_nf_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
@@ -654,7 +807,245 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_a64, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a32, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmlal_idx_a64, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_2(frint32_s, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr)
+DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr)
+
+DEF_HELPER_FLAGS_3(gvec_ceq0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ceq0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_clt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_clt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cle0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cle0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cgt0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cge0_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_cge0_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_smulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_smulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_umulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_umulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_ssra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ssra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_usra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_usra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_srsra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_srsra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_ursra_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_ursra_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sri_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sli_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sli_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_sabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uabd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_saba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_saba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_uaba_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_uaba_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_mul_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_mul_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mla_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_mls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmulh_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqrdmulh_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fmlal_zzzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmlal_zzxw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(gvec_xar_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_smmla_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_ummla_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_usmmla_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_bfdot, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_bfdot_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_bfmmla, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
-#include "helper-a64.h"
-#include "helper-sve.h"
+#include "tcg/helper-a64.h"
+#include "tcg/helper-sve.h"
+#include "tcg/helper-sme.h"
#endif
+
+#include "tcg/helper-mve.h"
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
new file mode 100644
index 0000000000..65a5601804
--- /dev/null
+++ b/target/arm/hvf/hvf.c
@@ -0,0 +1,2252 @@
+/*
+ * QEMU Hypervisor.framework support for Apple Silicon
+
+ * Copyright 2020 Alexander Graf <agraf@csgraf.de>
+ * Copyright 2020 Google LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "sysemu/runstate.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
+#include "sysemu/hw_accel.h"
+#include "hvf_arm.h"
+#include "cpregs.h"
+
+#include <mach/mach_time.h>
+
+#include "exec/address-spaces.h"
+#include "hw/irq.h"
+#include "qemu/main-loop.h"
+#include "sysemu/cpus.h"
+#include "arm-powerctl.h"
+#include "target/arm/cpu.h"
+#include "target/arm/internals.h"
+#include "target/arm/multiprocessing.h"
+#include "target/arm/gtimer.h"
+#include "trace/trace-target_arm_hvf.h"
+#include "migration/vmstate.h"
+
+#include "exec/gdbstub.h"
+
+#define MDSCR_EL1_SS_SHIFT 0
+#define MDSCR_EL1_MDE_SHIFT 15
+
+static const uint16_t dbgbcr_regs[] = {
+ HV_SYS_REG_DBGBCR0_EL1,
+ HV_SYS_REG_DBGBCR1_EL1,
+ HV_SYS_REG_DBGBCR2_EL1,
+ HV_SYS_REG_DBGBCR3_EL1,
+ HV_SYS_REG_DBGBCR4_EL1,
+ HV_SYS_REG_DBGBCR5_EL1,
+ HV_SYS_REG_DBGBCR6_EL1,
+ HV_SYS_REG_DBGBCR7_EL1,
+ HV_SYS_REG_DBGBCR8_EL1,
+ HV_SYS_REG_DBGBCR9_EL1,
+ HV_SYS_REG_DBGBCR10_EL1,
+ HV_SYS_REG_DBGBCR11_EL1,
+ HV_SYS_REG_DBGBCR12_EL1,
+ HV_SYS_REG_DBGBCR13_EL1,
+ HV_SYS_REG_DBGBCR14_EL1,
+ HV_SYS_REG_DBGBCR15_EL1,
+};
+
+static const uint16_t dbgbvr_regs[] = {
+ HV_SYS_REG_DBGBVR0_EL1,
+ HV_SYS_REG_DBGBVR1_EL1,
+ HV_SYS_REG_DBGBVR2_EL1,
+ HV_SYS_REG_DBGBVR3_EL1,
+ HV_SYS_REG_DBGBVR4_EL1,
+ HV_SYS_REG_DBGBVR5_EL1,
+ HV_SYS_REG_DBGBVR6_EL1,
+ HV_SYS_REG_DBGBVR7_EL1,
+ HV_SYS_REG_DBGBVR8_EL1,
+ HV_SYS_REG_DBGBVR9_EL1,
+ HV_SYS_REG_DBGBVR10_EL1,
+ HV_SYS_REG_DBGBVR11_EL1,
+ HV_SYS_REG_DBGBVR12_EL1,
+ HV_SYS_REG_DBGBVR13_EL1,
+ HV_SYS_REG_DBGBVR14_EL1,
+ HV_SYS_REG_DBGBVR15_EL1,
+};
+
+static const uint16_t dbgwcr_regs[] = {
+ HV_SYS_REG_DBGWCR0_EL1,
+ HV_SYS_REG_DBGWCR1_EL1,
+ HV_SYS_REG_DBGWCR2_EL1,
+ HV_SYS_REG_DBGWCR3_EL1,
+ HV_SYS_REG_DBGWCR4_EL1,
+ HV_SYS_REG_DBGWCR5_EL1,
+ HV_SYS_REG_DBGWCR6_EL1,
+ HV_SYS_REG_DBGWCR7_EL1,
+ HV_SYS_REG_DBGWCR8_EL1,
+ HV_SYS_REG_DBGWCR9_EL1,
+ HV_SYS_REG_DBGWCR10_EL1,
+ HV_SYS_REG_DBGWCR11_EL1,
+ HV_SYS_REG_DBGWCR12_EL1,
+ HV_SYS_REG_DBGWCR13_EL1,
+ HV_SYS_REG_DBGWCR14_EL1,
+ HV_SYS_REG_DBGWCR15_EL1,
+};
+
+static const uint16_t dbgwvr_regs[] = {
+ HV_SYS_REG_DBGWVR0_EL1,
+ HV_SYS_REG_DBGWVR1_EL1,
+ HV_SYS_REG_DBGWVR2_EL1,
+ HV_SYS_REG_DBGWVR3_EL1,
+ HV_SYS_REG_DBGWVR4_EL1,
+ HV_SYS_REG_DBGWVR5_EL1,
+ HV_SYS_REG_DBGWVR6_EL1,
+ HV_SYS_REG_DBGWVR7_EL1,
+ HV_SYS_REG_DBGWVR8_EL1,
+ HV_SYS_REG_DBGWVR9_EL1,
+ HV_SYS_REG_DBGWVR10_EL1,
+ HV_SYS_REG_DBGWVR11_EL1,
+ HV_SYS_REG_DBGWVR12_EL1,
+ HV_SYS_REG_DBGWVR13_EL1,
+ HV_SYS_REG_DBGWVR14_EL1,
+ HV_SYS_REG_DBGWVR15_EL1,
+};
+
+static inline int hvf_arm_num_brps(hv_vcpu_config_t config)
+{
+ uint64_t val;
+ hv_return_t ret;
+ ret = hv_vcpu_config_get_feature_reg(config, HV_FEATURE_REG_ID_AA64DFR0_EL1,
+ &val);
+ assert_hvf_ok(ret);
+ return FIELD_EX64(val, ID_AA64DFR0, BRPS) + 1;
+}
+
+static inline int hvf_arm_num_wrps(hv_vcpu_config_t config)
+{
+ uint64_t val;
+ hv_return_t ret;
+ ret = hv_vcpu_config_get_feature_reg(config, HV_FEATURE_REG_ID_AA64DFR0_EL1,
+ &val);
+ assert_hvf_ok(ret);
+ return FIELD_EX64(val, ID_AA64DFR0, WRPS) + 1;
+}
+
+void hvf_arm_init_debug(void)
+{
+ hv_vcpu_config_t config;
+ config = hv_vcpu_config_create();
+
+ max_hw_bps = hvf_arm_num_brps(config);
+ hw_breakpoints =
+ g_array_sized_new(true, true, sizeof(HWBreakpoint), max_hw_bps);
+
+ max_hw_wps = hvf_arm_num_wrps(config);
+ hw_watchpoints =
+ g_array_sized_new(true, true, sizeof(HWWatchpoint), max_hw_wps);
+}
+
+#define HVF_SYSREG(crn, crm, op0, op1, op2) \
+ ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
+#define PL1_WRITE_MASK 0x4
+
+#define SYSREG_OP0_SHIFT 20
+#define SYSREG_OP0_MASK 0x3
+#define SYSREG_OP0(sysreg) ((sysreg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK)
+#define SYSREG_OP1_SHIFT 14
+#define SYSREG_OP1_MASK 0x7
+#define SYSREG_OP1(sysreg) ((sysreg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK)
+#define SYSREG_CRN_SHIFT 10
+#define SYSREG_CRN_MASK 0xf
+#define SYSREG_CRN(sysreg) ((sysreg >> SYSREG_CRN_SHIFT) & SYSREG_CRN_MASK)
+#define SYSREG_CRM_SHIFT 1
+#define SYSREG_CRM_MASK 0xf
+#define SYSREG_CRM(sysreg) ((sysreg >> SYSREG_CRM_SHIFT) & SYSREG_CRM_MASK)
+#define SYSREG_OP2_SHIFT 17
+#define SYSREG_OP2_MASK 0x7
+#define SYSREG_OP2(sysreg) ((sysreg >> SYSREG_OP2_SHIFT) & SYSREG_OP2_MASK)
+
+#define SYSREG(op0, op1, crn, crm, op2) \
+ ((op0 << SYSREG_OP0_SHIFT) | \
+ (op1 << SYSREG_OP1_SHIFT) | \
+ (crn << SYSREG_CRN_SHIFT) | \
+ (crm << SYSREG_CRM_SHIFT) | \
+ (op2 << SYSREG_OP2_SHIFT))
+#define SYSREG_MASK \
+ SYSREG(SYSREG_OP0_MASK, \
+ SYSREG_OP1_MASK, \
+ SYSREG_CRN_MASK, \
+ SYSREG_CRM_MASK, \
+ SYSREG_OP2_MASK)
+#define SYSREG_OSLAR_EL1 SYSREG(2, 0, 1, 0, 4)
+#define SYSREG_OSLSR_EL1 SYSREG(2, 0, 1, 1, 4)
+#define SYSREG_OSDLR_EL1 SYSREG(2, 0, 1, 3, 4)
+#define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 14, 0, 1)
+#define SYSREG_PMCR_EL0 SYSREG(3, 3, 9, 12, 0)
+#define SYSREG_PMUSERENR_EL0 SYSREG(3, 3, 9, 14, 0)
+#define SYSREG_PMCNTENSET_EL0 SYSREG(3, 3, 9, 12, 1)
+#define SYSREG_PMCNTENCLR_EL0 SYSREG(3, 3, 9, 12, 2)
+#define SYSREG_PMINTENCLR_EL1 SYSREG(3, 0, 9, 14, 2)
+#define SYSREG_PMOVSCLR_EL0 SYSREG(3, 3, 9, 12, 3)
+#define SYSREG_PMSWINC_EL0 SYSREG(3, 3, 9, 12, 4)
+#define SYSREG_PMSELR_EL0 SYSREG(3, 3, 9, 12, 5)
+#define SYSREG_PMCEID0_EL0 SYSREG(3, 3, 9, 12, 6)
+#define SYSREG_PMCEID1_EL0 SYSREG(3, 3, 9, 12, 7)
+#define SYSREG_PMCCNTR_EL0 SYSREG(3, 3, 9, 13, 0)
+#define SYSREG_PMCCFILTR_EL0 SYSREG(3, 3, 14, 15, 7)
+
+#define SYSREG_ICC_AP0R0_EL1 SYSREG(3, 0, 12, 8, 4)
+#define SYSREG_ICC_AP0R1_EL1 SYSREG(3, 0, 12, 8, 5)
+#define SYSREG_ICC_AP0R2_EL1 SYSREG(3, 0, 12, 8, 6)
+#define SYSREG_ICC_AP0R3_EL1 SYSREG(3, 0, 12, 8, 7)
+#define SYSREG_ICC_AP1R0_EL1 SYSREG(3, 0, 12, 9, 0)
+#define SYSREG_ICC_AP1R1_EL1 SYSREG(3, 0, 12, 9, 1)
+#define SYSREG_ICC_AP1R2_EL1 SYSREG(3, 0, 12, 9, 2)
+#define SYSREG_ICC_AP1R3_EL1 SYSREG(3, 0, 12, 9, 3)
+#define SYSREG_ICC_ASGI1R_EL1 SYSREG(3, 0, 12, 11, 6)
+#define SYSREG_ICC_BPR0_EL1 SYSREG(3, 0, 12, 8, 3)
+#define SYSREG_ICC_BPR1_EL1 SYSREG(3, 0, 12, 12, 3)
+#define SYSREG_ICC_CTLR_EL1 SYSREG(3, 0, 12, 12, 4)
+#define SYSREG_ICC_DIR_EL1 SYSREG(3, 0, 12, 11, 1)
+#define SYSREG_ICC_EOIR0_EL1 SYSREG(3, 0, 12, 8, 1)
+#define SYSREG_ICC_EOIR1_EL1 SYSREG(3, 0, 12, 12, 1)
+#define SYSREG_ICC_HPPIR0_EL1 SYSREG(3, 0, 12, 8, 2)
+#define SYSREG_ICC_HPPIR1_EL1 SYSREG(3, 0, 12, 12, 2)
+#define SYSREG_ICC_IAR0_EL1 SYSREG(3, 0, 12, 8, 0)
+#define SYSREG_ICC_IAR1_EL1 SYSREG(3, 0, 12, 12, 0)
+#define SYSREG_ICC_IGRPEN0_EL1 SYSREG(3, 0, 12, 12, 6)
+#define SYSREG_ICC_IGRPEN1_EL1 SYSREG(3, 0, 12, 12, 7)
+#define SYSREG_ICC_PMR_EL1 SYSREG(3, 0, 4, 6, 0)
+#define SYSREG_ICC_RPR_EL1 SYSREG(3, 0, 12, 11, 3)
+#define SYSREG_ICC_SGI0R_EL1 SYSREG(3, 0, 12, 11, 7)
+#define SYSREG_ICC_SGI1R_EL1 SYSREG(3, 0, 12, 11, 5)
+#define SYSREG_ICC_SRE_EL1 SYSREG(3, 0, 12, 12, 5)
+
+#define SYSREG_MDSCR_EL1 SYSREG(2, 0, 0, 2, 2)
+#define SYSREG_DBGBVR0_EL1 SYSREG(2, 0, 0, 0, 4)
+#define SYSREG_DBGBCR0_EL1 SYSREG(2, 0, 0, 0, 5)
+#define SYSREG_DBGWVR0_EL1 SYSREG(2, 0, 0, 0, 6)
+#define SYSREG_DBGWCR0_EL1 SYSREG(2, 0, 0, 0, 7)
+#define SYSREG_DBGBVR1_EL1 SYSREG(2, 0, 0, 1, 4)
+#define SYSREG_DBGBCR1_EL1 SYSREG(2, 0, 0, 1, 5)
+#define SYSREG_DBGWVR1_EL1 SYSREG(2, 0, 0, 1, 6)
+#define SYSREG_DBGWCR1_EL1 SYSREG(2, 0, 0, 1, 7)
+#define SYSREG_DBGBVR2_EL1 SYSREG(2, 0, 0, 2, 4)
+#define SYSREG_DBGBCR2_EL1 SYSREG(2, 0, 0, 2, 5)
+#define SYSREG_DBGWVR2_EL1 SYSREG(2, 0, 0, 2, 6)
+#define SYSREG_DBGWCR2_EL1 SYSREG(2, 0, 0, 2, 7)
+#define SYSREG_DBGBVR3_EL1 SYSREG(2, 0, 0, 3, 4)
+#define SYSREG_DBGBCR3_EL1 SYSREG(2, 0, 0, 3, 5)
+#define SYSREG_DBGWVR3_EL1 SYSREG(2, 0, 0, 3, 6)
+#define SYSREG_DBGWCR3_EL1 SYSREG(2, 0, 0, 3, 7)
+#define SYSREG_DBGBVR4_EL1 SYSREG(2, 0, 0, 4, 4)
+#define SYSREG_DBGBCR4_EL1 SYSREG(2, 0, 0, 4, 5)
+#define SYSREG_DBGWVR4_EL1 SYSREG(2, 0, 0, 4, 6)
+#define SYSREG_DBGWCR4_EL1 SYSREG(2, 0, 0, 4, 7)
+#define SYSREG_DBGBVR5_EL1 SYSREG(2, 0, 0, 5, 4)
+#define SYSREG_DBGBCR5_EL1 SYSREG(2, 0, 0, 5, 5)
+#define SYSREG_DBGWVR5_EL1 SYSREG(2, 0, 0, 5, 6)
+#define SYSREG_DBGWCR5_EL1 SYSREG(2, 0, 0, 5, 7)
+#define SYSREG_DBGBVR6_EL1 SYSREG(2, 0, 0, 6, 4)
+#define SYSREG_DBGBCR6_EL1 SYSREG(2, 0, 0, 6, 5)
+#define SYSREG_DBGWVR6_EL1 SYSREG(2, 0, 0, 6, 6)
+#define SYSREG_DBGWCR6_EL1 SYSREG(2, 0, 0, 6, 7)
+#define SYSREG_DBGBVR7_EL1 SYSREG(2, 0, 0, 7, 4)
+#define SYSREG_DBGBCR7_EL1 SYSREG(2, 0, 0, 7, 5)
+#define SYSREG_DBGWVR7_EL1 SYSREG(2, 0, 0, 7, 6)
+#define SYSREG_DBGWCR7_EL1 SYSREG(2, 0, 0, 7, 7)
+#define SYSREG_DBGBVR8_EL1 SYSREG(2, 0, 0, 8, 4)
+#define SYSREG_DBGBCR8_EL1 SYSREG(2, 0, 0, 8, 5)
+#define SYSREG_DBGWVR8_EL1 SYSREG(2, 0, 0, 8, 6)
+#define SYSREG_DBGWCR8_EL1 SYSREG(2, 0, 0, 8, 7)
+#define SYSREG_DBGBVR9_EL1 SYSREG(2, 0, 0, 9, 4)
+#define SYSREG_DBGBCR9_EL1 SYSREG(2, 0, 0, 9, 5)
+#define SYSREG_DBGWVR9_EL1 SYSREG(2, 0, 0, 9, 6)
+#define SYSREG_DBGWCR9_EL1 SYSREG(2, 0, 0, 9, 7)
+#define SYSREG_DBGBVR10_EL1 SYSREG(2, 0, 0, 10, 4)
+#define SYSREG_DBGBCR10_EL1 SYSREG(2, 0, 0, 10, 5)
+#define SYSREG_DBGWVR10_EL1 SYSREG(2, 0, 0, 10, 6)
+#define SYSREG_DBGWCR10_EL1 SYSREG(2, 0, 0, 10, 7)
+#define SYSREG_DBGBVR11_EL1 SYSREG(2, 0, 0, 11, 4)
+#define SYSREG_DBGBCR11_EL1 SYSREG(2, 0, 0, 11, 5)
+#define SYSREG_DBGWVR11_EL1 SYSREG(2, 0, 0, 11, 6)
+#define SYSREG_DBGWCR11_EL1 SYSREG(2, 0, 0, 11, 7)
+#define SYSREG_DBGBVR12_EL1 SYSREG(2, 0, 0, 12, 4)
+#define SYSREG_DBGBCR12_EL1 SYSREG(2, 0, 0, 12, 5)
+#define SYSREG_DBGWVR12_EL1 SYSREG(2, 0, 0, 12, 6)
+#define SYSREG_DBGWCR12_EL1 SYSREG(2, 0, 0, 12, 7)
+#define SYSREG_DBGBVR13_EL1 SYSREG(2, 0, 0, 13, 4)
+#define SYSREG_DBGBCR13_EL1 SYSREG(2, 0, 0, 13, 5)
+#define SYSREG_DBGWVR13_EL1 SYSREG(2, 0, 0, 13, 6)
+#define SYSREG_DBGWCR13_EL1 SYSREG(2, 0, 0, 13, 7)
+#define SYSREG_DBGBVR14_EL1 SYSREG(2, 0, 0, 14, 4)
+#define SYSREG_DBGBCR14_EL1 SYSREG(2, 0, 0, 14, 5)
+#define SYSREG_DBGWVR14_EL1 SYSREG(2, 0, 0, 14, 6)
+#define SYSREG_DBGWCR14_EL1 SYSREG(2, 0, 0, 14, 7)
+#define SYSREG_DBGBVR15_EL1 SYSREG(2, 0, 0, 15, 4)
+#define SYSREG_DBGBCR15_EL1 SYSREG(2, 0, 0, 15, 5)
+#define SYSREG_DBGWVR15_EL1 SYSREG(2, 0, 0, 15, 6)
+#define SYSREG_DBGWCR15_EL1 SYSREG(2, 0, 0, 15, 7)
+
+#define WFX_IS_WFE (1 << 0)
+
+#define TMR_CTL_ENABLE (1 << 0)
+#define TMR_CTL_IMASK (1 << 1)
+#define TMR_CTL_ISTATUS (1 << 2)
+
+static void hvf_wfi(CPUState *cpu);
+
+typedef struct HVFVTimer {
+ /* Vtimer value during migration and paused state */
+ uint64_t vtimer_val;
+} HVFVTimer;
+
+static HVFVTimer vtimer;
+
+typedef struct ARMHostCPUFeatures {
+ ARMISARegisters isar;
+ uint64_t features;
+ uint64_t midr;
+ uint32_t reset_sctlr;
+ const char *dtb_compatible;
+} ARMHostCPUFeatures;
+
+static ARMHostCPUFeatures arm_host_cpu_features;
+
+struct hvf_reg_match {
+ int reg;
+ uint64_t offset;
+};
+
+static const struct hvf_reg_match hvf_reg_match[] = {
+ { HV_REG_X0, offsetof(CPUARMState, xregs[0]) },
+ { HV_REG_X1, offsetof(CPUARMState, xregs[1]) },
+ { HV_REG_X2, offsetof(CPUARMState, xregs[2]) },
+ { HV_REG_X3, offsetof(CPUARMState, xregs[3]) },
+ { HV_REG_X4, offsetof(CPUARMState, xregs[4]) },
+ { HV_REG_X5, offsetof(CPUARMState, xregs[5]) },
+ { HV_REG_X6, offsetof(CPUARMState, xregs[6]) },
+ { HV_REG_X7, offsetof(CPUARMState, xregs[7]) },
+ { HV_REG_X8, offsetof(CPUARMState, xregs[8]) },
+ { HV_REG_X9, offsetof(CPUARMState, xregs[9]) },
+ { HV_REG_X10, offsetof(CPUARMState, xregs[10]) },
+ { HV_REG_X11, offsetof(CPUARMState, xregs[11]) },
+ { HV_REG_X12, offsetof(CPUARMState, xregs[12]) },
+ { HV_REG_X13, offsetof(CPUARMState, xregs[13]) },
+ { HV_REG_X14, offsetof(CPUARMState, xregs[14]) },
+ { HV_REG_X15, offsetof(CPUARMState, xregs[15]) },
+ { HV_REG_X16, offsetof(CPUARMState, xregs[16]) },
+ { HV_REG_X17, offsetof(CPUARMState, xregs[17]) },
+ { HV_REG_X18, offsetof(CPUARMState, xregs[18]) },
+ { HV_REG_X19, offsetof(CPUARMState, xregs[19]) },
+ { HV_REG_X20, offsetof(CPUARMState, xregs[20]) },
+ { HV_REG_X21, offsetof(CPUARMState, xregs[21]) },
+ { HV_REG_X22, offsetof(CPUARMState, xregs[22]) },
+ { HV_REG_X23, offsetof(CPUARMState, xregs[23]) },
+ { HV_REG_X24, offsetof(CPUARMState, xregs[24]) },
+ { HV_REG_X25, offsetof(CPUARMState, xregs[25]) },
+ { HV_REG_X26, offsetof(CPUARMState, xregs[26]) },
+ { HV_REG_X27, offsetof(CPUARMState, xregs[27]) },
+ { HV_REG_X28, offsetof(CPUARMState, xregs[28]) },
+ { HV_REG_X29, offsetof(CPUARMState, xregs[29]) },
+ { HV_REG_X30, offsetof(CPUARMState, xregs[30]) },
+ { HV_REG_PC, offsetof(CPUARMState, pc) },
+};
+
+static const struct hvf_reg_match hvf_fpreg_match[] = {
+ { HV_SIMD_FP_REG_Q0, offsetof(CPUARMState, vfp.zregs[0]) },
+ { HV_SIMD_FP_REG_Q1, offsetof(CPUARMState, vfp.zregs[1]) },
+ { HV_SIMD_FP_REG_Q2, offsetof(CPUARMState, vfp.zregs[2]) },
+ { HV_SIMD_FP_REG_Q3, offsetof(CPUARMState, vfp.zregs[3]) },
+ { HV_SIMD_FP_REG_Q4, offsetof(CPUARMState, vfp.zregs[4]) },
+ { HV_SIMD_FP_REG_Q5, offsetof(CPUARMState, vfp.zregs[5]) },
+ { HV_SIMD_FP_REG_Q6, offsetof(CPUARMState, vfp.zregs[6]) },
+ { HV_SIMD_FP_REG_Q7, offsetof(CPUARMState, vfp.zregs[7]) },
+ { HV_SIMD_FP_REG_Q8, offsetof(CPUARMState, vfp.zregs[8]) },
+ { HV_SIMD_FP_REG_Q9, offsetof(CPUARMState, vfp.zregs[9]) },
+ { HV_SIMD_FP_REG_Q10, offsetof(CPUARMState, vfp.zregs[10]) },
+ { HV_SIMD_FP_REG_Q11, offsetof(CPUARMState, vfp.zregs[11]) },
+ { HV_SIMD_FP_REG_Q12, offsetof(CPUARMState, vfp.zregs[12]) },
+ { HV_SIMD_FP_REG_Q13, offsetof(CPUARMState, vfp.zregs[13]) },
+ { HV_SIMD_FP_REG_Q14, offsetof(CPUARMState, vfp.zregs[14]) },
+ { HV_SIMD_FP_REG_Q15, offsetof(CPUARMState, vfp.zregs[15]) },
+ { HV_SIMD_FP_REG_Q16, offsetof(CPUARMState, vfp.zregs[16]) },
+ { HV_SIMD_FP_REG_Q17, offsetof(CPUARMState, vfp.zregs[17]) },
+ { HV_SIMD_FP_REG_Q18, offsetof(CPUARMState, vfp.zregs[18]) },
+ { HV_SIMD_FP_REG_Q19, offsetof(CPUARMState, vfp.zregs[19]) },
+ { HV_SIMD_FP_REG_Q20, offsetof(CPUARMState, vfp.zregs[20]) },
+ { HV_SIMD_FP_REG_Q21, offsetof(CPUARMState, vfp.zregs[21]) },
+ { HV_SIMD_FP_REG_Q22, offsetof(CPUARMState, vfp.zregs[22]) },
+ { HV_SIMD_FP_REG_Q23, offsetof(CPUARMState, vfp.zregs[23]) },
+ { HV_SIMD_FP_REG_Q24, offsetof(CPUARMState, vfp.zregs[24]) },
+ { HV_SIMD_FP_REG_Q25, offsetof(CPUARMState, vfp.zregs[25]) },
+ { HV_SIMD_FP_REG_Q26, offsetof(CPUARMState, vfp.zregs[26]) },
+ { HV_SIMD_FP_REG_Q27, offsetof(CPUARMState, vfp.zregs[27]) },
+ { HV_SIMD_FP_REG_Q28, offsetof(CPUARMState, vfp.zregs[28]) },
+ { HV_SIMD_FP_REG_Q29, offsetof(CPUARMState, vfp.zregs[29]) },
+ { HV_SIMD_FP_REG_Q30, offsetof(CPUARMState, vfp.zregs[30]) },
+ { HV_SIMD_FP_REG_Q31, offsetof(CPUARMState, vfp.zregs[31]) },
+};
+
+struct hvf_sreg_match {
+ int reg;
+ uint32_t key;
+ uint32_t cp_idx;
+};
+
+static struct hvf_sreg_match hvf_sreg_match[] = {
+ { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 7) },
+
+ { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 4) },
+ { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 5) },
+ { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 6) },
+ { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 7) },
+
+#ifdef SYNC_NO_RAW_REGS
+ /*
+ * The registers below are manually synced on init because they are
+ * marked as NO_RAW. We still list them to make number space sync easier.
+ */
+ { HV_SYS_REG_MDCCINT_EL1, HVF_SYSREG(0, 2, 2, 0, 0) },
+ { HV_SYS_REG_MIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 0) },
+ { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) },
+ { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) },
+#endif
+ { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 2) },
+ { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) },
+ { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) },
+ { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) },
+ { HV_SYS_REG_ID_AA64ISAR1_EL1, HVF_SYSREG(0, 6, 3, 0, 1) },
+#ifdef SYNC_NO_MMFR0
+ /* We keep the hardware MMFR0 around. HW limits are there anyway */
+ { HV_SYS_REG_ID_AA64MMFR0_EL1, HVF_SYSREG(0, 7, 3, 0, 0) },
+#endif
+ { HV_SYS_REG_ID_AA64MMFR1_EL1, HVF_SYSREG(0, 7, 3, 0, 1) },
+ { HV_SYS_REG_ID_AA64MMFR2_EL1, HVF_SYSREG(0, 7, 3, 0, 2) },
+
+ { HV_SYS_REG_MDSCR_EL1, HVF_SYSREG(0, 2, 2, 0, 2) },
+ { HV_SYS_REG_SCTLR_EL1, HVF_SYSREG(1, 0, 3, 0, 0) },
+ { HV_SYS_REG_CPACR_EL1, HVF_SYSREG(1, 0, 3, 0, 2) },
+ { HV_SYS_REG_TTBR0_EL1, HVF_SYSREG(2, 0, 3, 0, 0) },
+ { HV_SYS_REG_TTBR1_EL1, HVF_SYSREG(2, 0, 3, 0, 1) },
+ { HV_SYS_REG_TCR_EL1, HVF_SYSREG(2, 0, 3, 0, 2) },
+
+ { HV_SYS_REG_APIAKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 0) },
+ { HV_SYS_REG_APIAKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 1) },
+ { HV_SYS_REG_APIBKEYLO_EL1, HVF_SYSREG(2, 1, 3, 0, 2) },
+ { HV_SYS_REG_APIBKEYHI_EL1, HVF_SYSREG(2, 1, 3, 0, 3) },
+ { HV_SYS_REG_APDAKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 0) },
+ { HV_SYS_REG_APDAKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 1) },
+ { HV_SYS_REG_APDBKEYLO_EL1, HVF_SYSREG(2, 2, 3, 0, 2) },
+ { HV_SYS_REG_APDBKEYHI_EL1, HVF_SYSREG(2, 2, 3, 0, 3) },
+ { HV_SYS_REG_APGAKEYLO_EL1, HVF_SYSREG(2, 3, 3, 0, 0) },
+ { HV_SYS_REG_APGAKEYHI_EL1, HVF_SYSREG(2, 3, 3, 0, 1) },
+
+ { HV_SYS_REG_SPSR_EL1, HVF_SYSREG(4, 0, 3, 0, 0) },
+ { HV_SYS_REG_ELR_EL1, HVF_SYSREG(4, 0, 3, 0, 1) },
+ { HV_SYS_REG_SP_EL0, HVF_SYSREG(4, 1, 3, 0, 0) },
+ { HV_SYS_REG_AFSR0_EL1, HVF_SYSREG(5, 1, 3, 0, 0) },
+ { HV_SYS_REG_AFSR1_EL1, HVF_SYSREG(5, 1, 3, 0, 1) },
+ { HV_SYS_REG_ESR_EL1, HVF_SYSREG(5, 2, 3, 0, 0) },
+ { HV_SYS_REG_FAR_EL1, HVF_SYSREG(6, 0, 3, 0, 0) },
+ { HV_SYS_REG_PAR_EL1, HVF_SYSREG(7, 4, 3, 0, 0) },
+ { HV_SYS_REG_MAIR_EL1, HVF_SYSREG(10, 2, 3, 0, 0) },
+ { HV_SYS_REG_AMAIR_EL1, HVF_SYSREG(10, 3, 3, 0, 0) },
+ { HV_SYS_REG_VBAR_EL1, HVF_SYSREG(12, 0, 3, 0, 0) },
+ { HV_SYS_REG_CONTEXTIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 1) },
+ { HV_SYS_REG_TPIDR_EL1, HVF_SYSREG(13, 0, 3, 0, 4) },
+ { HV_SYS_REG_CNTKCTL_EL1, HVF_SYSREG(14, 1, 3, 0, 0) },
+ { HV_SYS_REG_CSSELR_EL1, HVF_SYSREG(0, 0, 3, 2, 0) },
+ { HV_SYS_REG_TPIDR_EL0, HVF_SYSREG(13, 0, 3, 3, 2) },
+ { HV_SYS_REG_TPIDRRO_EL0, HVF_SYSREG(13, 0, 3, 3, 3) },
+ { HV_SYS_REG_CNTV_CTL_EL0, HVF_SYSREG(14, 3, 3, 3, 1) },
+ { HV_SYS_REG_CNTV_CVAL_EL0, HVF_SYSREG(14, 3, 3, 3, 2) },
+ { HV_SYS_REG_SP_EL1, HVF_SYSREG(4, 1, 3, 4, 0) },
+};
+
+int hvf_get_registers(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ hv_return_t ret;
+ uint64_t val;
+ hv_simd_fp_uchar16_t fpval;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
+ ret = hv_vcpu_get_reg(cpu->accel->fd, hvf_reg_match[i].reg, &val);
+ *(uint64_t *)((void *)env + hvf_reg_match[i].offset) = val;
+ assert_hvf_ok(ret);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hvf_fpreg_match); i++) {
+ ret = hv_vcpu_get_simd_fp_reg(cpu->accel->fd, hvf_fpreg_match[i].reg,
+ &fpval);
+ memcpy((void *)env + hvf_fpreg_match[i].offset, &fpval, sizeof(fpval));
+ assert_hvf_ok(ret);
+ }
+
+ val = 0;
+ ret = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_FPCR, &val);
+ assert_hvf_ok(ret);
+ vfp_set_fpcr(env, val);
+
+ val = 0;
+ ret = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_FPSR, &val);
+ assert_hvf_ok(ret);
+ vfp_set_fpsr(env, val);
+
+ ret = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_CPSR, &val);
+ assert_hvf_ok(ret);
+ pstate_write(env, val);
+
+ for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) {
+ if (hvf_sreg_match[i].cp_idx == -1) {
+ continue;
+ }
+
+ if (cpu->accel->guest_debug_enabled) {
+ /* Handle debug registers */
+ switch (hvf_sreg_match[i].reg) {
+ case HV_SYS_REG_DBGBVR0_EL1:
+ case HV_SYS_REG_DBGBCR0_EL1:
+ case HV_SYS_REG_DBGWVR0_EL1:
+ case HV_SYS_REG_DBGWCR0_EL1:
+ case HV_SYS_REG_DBGBVR1_EL1:
+ case HV_SYS_REG_DBGBCR1_EL1:
+ case HV_SYS_REG_DBGWVR1_EL1:
+ case HV_SYS_REG_DBGWCR1_EL1:
+ case HV_SYS_REG_DBGBVR2_EL1:
+ case HV_SYS_REG_DBGBCR2_EL1:
+ case HV_SYS_REG_DBGWVR2_EL1:
+ case HV_SYS_REG_DBGWCR2_EL1:
+ case HV_SYS_REG_DBGBVR3_EL1:
+ case HV_SYS_REG_DBGBCR3_EL1:
+ case HV_SYS_REG_DBGWVR3_EL1:
+ case HV_SYS_REG_DBGWCR3_EL1:
+ case HV_SYS_REG_DBGBVR4_EL1:
+ case HV_SYS_REG_DBGBCR4_EL1:
+ case HV_SYS_REG_DBGWVR4_EL1:
+ case HV_SYS_REG_DBGWCR4_EL1:
+ case HV_SYS_REG_DBGBVR5_EL1:
+ case HV_SYS_REG_DBGBCR5_EL1:
+ case HV_SYS_REG_DBGWVR5_EL1:
+ case HV_SYS_REG_DBGWCR5_EL1:
+ case HV_SYS_REG_DBGBVR6_EL1:
+ case HV_SYS_REG_DBGBCR6_EL1:
+ case HV_SYS_REG_DBGWVR6_EL1:
+ case HV_SYS_REG_DBGWCR6_EL1:
+ case HV_SYS_REG_DBGBVR7_EL1:
+ case HV_SYS_REG_DBGBCR7_EL1:
+ case HV_SYS_REG_DBGWVR7_EL1:
+ case HV_SYS_REG_DBGWCR7_EL1:
+ case HV_SYS_REG_DBGBVR8_EL1:
+ case HV_SYS_REG_DBGBCR8_EL1:
+ case HV_SYS_REG_DBGWVR8_EL1:
+ case HV_SYS_REG_DBGWCR8_EL1:
+ case HV_SYS_REG_DBGBVR9_EL1:
+ case HV_SYS_REG_DBGBCR9_EL1:
+ case HV_SYS_REG_DBGWVR9_EL1:
+ case HV_SYS_REG_DBGWCR9_EL1:
+ case HV_SYS_REG_DBGBVR10_EL1:
+ case HV_SYS_REG_DBGBCR10_EL1:
+ case HV_SYS_REG_DBGWVR10_EL1:
+ case HV_SYS_REG_DBGWCR10_EL1:
+ case HV_SYS_REG_DBGBVR11_EL1:
+ case HV_SYS_REG_DBGBCR11_EL1:
+ case HV_SYS_REG_DBGWVR11_EL1:
+ case HV_SYS_REG_DBGWCR11_EL1:
+ case HV_SYS_REG_DBGBVR12_EL1:
+ case HV_SYS_REG_DBGBCR12_EL1:
+ case HV_SYS_REG_DBGWVR12_EL1:
+ case HV_SYS_REG_DBGWCR12_EL1:
+ case HV_SYS_REG_DBGBVR13_EL1:
+ case HV_SYS_REG_DBGBCR13_EL1:
+ case HV_SYS_REG_DBGWVR13_EL1:
+ case HV_SYS_REG_DBGWCR13_EL1:
+ case HV_SYS_REG_DBGBVR14_EL1:
+ case HV_SYS_REG_DBGBCR14_EL1:
+ case HV_SYS_REG_DBGWVR14_EL1:
+ case HV_SYS_REG_DBGWCR14_EL1:
+ case HV_SYS_REG_DBGBVR15_EL1:
+ case HV_SYS_REG_DBGBCR15_EL1:
+ case HV_SYS_REG_DBGWVR15_EL1:
+ case HV_SYS_REG_DBGWCR15_EL1: {
+ /*
+ * If the guest is being debugged, the vCPU's debug registers
+ * are holding the gdbstub's view of the registers (set in
+ * hvf_arch_update_guest_debug()).
+ * Since the environment is used to store only the guest's view
+ * of the registers, don't update it with the values from the
+ * vCPU but simply keep the values from the previous
+ * environment.
+ */
+ const ARMCPRegInfo *ri;
+ ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_sreg_match[i].key);
+ val = read_raw_cp_reg(env, ri);
+
+ arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val;
+ continue;
+ }
+ }
+ }
+
+ ret = hv_vcpu_get_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, &val);
+ assert_hvf_ok(ret);
+
+ arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx] = val;
+ }
+ assert(write_list_to_cpustate(arm_cpu));
+
+ aarch64_restore_sp(env, arm_current_el(env));
+
+ return 0;
+}
+
+int hvf_put_registers(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ hv_return_t ret;
+ uint64_t val;
+ hv_simd_fp_uchar16_t fpval;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hvf_reg_match); i++) {
+ val = *(uint64_t *)((void *)env + hvf_reg_match[i].offset);
+ ret = hv_vcpu_set_reg(cpu->accel->fd, hvf_reg_match[i].reg, val);
+ assert_hvf_ok(ret);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hvf_fpreg_match); i++) {
+ memcpy(&fpval, (void *)env + hvf_fpreg_match[i].offset, sizeof(fpval));
+ ret = hv_vcpu_set_simd_fp_reg(cpu->accel->fd, hvf_fpreg_match[i].reg,
+ fpval);
+ assert_hvf_ok(ret);
+ }
+
+ ret = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_FPCR, vfp_get_fpcr(env));
+ assert_hvf_ok(ret);
+
+ ret = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_FPSR, vfp_get_fpsr(env));
+ assert_hvf_ok(ret);
+
+ ret = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_CPSR, pstate_read(env));
+ assert_hvf_ok(ret);
+
+ aarch64_save_sp(env, arm_current_el(env));
+
+ assert(write_cpustate_to_list(arm_cpu, false));
+ for (i = 0; i < ARRAY_SIZE(hvf_sreg_match); i++) {
+ if (hvf_sreg_match[i].cp_idx == -1) {
+ continue;
+ }
+
+ if (cpu->accel->guest_debug_enabled) {
+ /* Handle debug registers */
+ switch (hvf_sreg_match[i].reg) {
+ case HV_SYS_REG_DBGBVR0_EL1:
+ case HV_SYS_REG_DBGBCR0_EL1:
+ case HV_SYS_REG_DBGWVR0_EL1:
+ case HV_SYS_REG_DBGWCR0_EL1:
+ case HV_SYS_REG_DBGBVR1_EL1:
+ case HV_SYS_REG_DBGBCR1_EL1:
+ case HV_SYS_REG_DBGWVR1_EL1:
+ case HV_SYS_REG_DBGWCR1_EL1:
+ case HV_SYS_REG_DBGBVR2_EL1:
+ case HV_SYS_REG_DBGBCR2_EL1:
+ case HV_SYS_REG_DBGWVR2_EL1:
+ case HV_SYS_REG_DBGWCR2_EL1:
+ case HV_SYS_REG_DBGBVR3_EL1:
+ case HV_SYS_REG_DBGBCR3_EL1:
+ case HV_SYS_REG_DBGWVR3_EL1:
+ case HV_SYS_REG_DBGWCR3_EL1:
+ case HV_SYS_REG_DBGBVR4_EL1:
+ case HV_SYS_REG_DBGBCR4_EL1:
+ case HV_SYS_REG_DBGWVR4_EL1:
+ case HV_SYS_REG_DBGWCR4_EL1:
+ case HV_SYS_REG_DBGBVR5_EL1:
+ case HV_SYS_REG_DBGBCR5_EL1:
+ case HV_SYS_REG_DBGWVR5_EL1:
+ case HV_SYS_REG_DBGWCR5_EL1:
+ case HV_SYS_REG_DBGBVR6_EL1:
+ case HV_SYS_REG_DBGBCR6_EL1:
+ case HV_SYS_REG_DBGWVR6_EL1:
+ case HV_SYS_REG_DBGWCR6_EL1:
+ case HV_SYS_REG_DBGBVR7_EL1:
+ case HV_SYS_REG_DBGBCR7_EL1:
+ case HV_SYS_REG_DBGWVR7_EL1:
+ case HV_SYS_REG_DBGWCR7_EL1:
+ case HV_SYS_REG_DBGBVR8_EL1:
+ case HV_SYS_REG_DBGBCR8_EL1:
+ case HV_SYS_REG_DBGWVR8_EL1:
+ case HV_SYS_REG_DBGWCR8_EL1:
+ case HV_SYS_REG_DBGBVR9_EL1:
+ case HV_SYS_REG_DBGBCR9_EL1:
+ case HV_SYS_REG_DBGWVR9_EL1:
+ case HV_SYS_REG_DBGWCR9_EL1:
+ case HV_SYS_REG_DBGBVR10_EL1:
+ case HV_SYS_REG_DBGBCR10_EL1:
+ case HV_SYS_REG_DBGWVR10_EL1:
+ case HV_SYS_REG_DBGWCR10_EL1:
+ case HV_SYS_REG_DBGBVR11_EL1:
+ case HV_SYS_REG_DBGBCR11_EL1:
+ case HV_SYS_REG_DBGWVR11_EL1:
+ case HV_SYS_REG_DBGWCR11_EL1:
+ case HV_SYS_REG_DBGBVR12_EL1:
+ case HV_SYS_REG_DBGBCR12_EL1:
+ case HV_SYS_REG_DBGWVR12_EL1:
+ case HV_SYS_REG_DBGWCR12_EL1:
+ case HV_SYS_REG_DBGBVR13_EL1:
+ case HV_SYS_REG_DBGBCR13_EL1:
+ case HV_SYS_REG_DBGWVR13_EL1:
+ case HV_SYS_REG_DBGWCR13_EL1:
+ case HV_SYS_REG_DBGBVR14_EL1:
+ case HV_SYS_REG_DBGBCR14_EL1:
+ case HV_SYS_REG_DBGWVR14_EL1:
+ case HV_SYS_REG_DBGWCR14_EL1:
+ case HV_SYS_REG_DBGBVR15_EL1:
+ case HV_SYS_REG_DBGBCR15_EL1:
+ case HV_SYS_REG_DBGWVR15_EL1:
+ case HV_SYS_REG_DBGWCR15_EL1:
+ /*
+ * If the guest is being debugged, the vCPU's debug registers
+ * are already holding the gdbstub's view of the registers (set
+ * in hvf_arch_update_guest_debug()).
+ */
+ continue;
+ }
+ }
+
+ val = arm_cpu->cpreg_values[hvf_sreg_match[i].cp_idx];
+ ret = hv_vcpu_set_sys_reg(cpu->accel->fd, hvf_sreg_match[i].reg, val);
+ assert_hvf_ok(ret);
+ }
+
+ ret = hv_vcpu_set_vtimer_offset(cpu->accel->fd, hvf_state->vtimer_offset);
+ assert_hvf_ok(ret);
+
+ return 0;
+}
+
+static void flush_cpu_state(CPUState *cpu)
+{
+ if (cpu->vcpu_dirty) {
+ hvf_put_registers(cpu);
+ cpu->vcpu_dirty = false;
+ }
+}
+
+static void hvf_set_reg(CPUState *cpu, int rt, uint64_t val)
+{
+ hv_return_t r;
+
+ flush_cpu_state(cpu);
+
+ if (rt < 31) {
+ r = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_X0 + rt, val);
+ assert_hvf_ok(r);
+ }
+}
+
+static uint64_t hvf_get_reg(CPUState *cpu, int rt)
+{
+ uint64_t val = 0;
+ hv_return_t r;
+
+ flush_cpu_state(cpu);
+
+ if (rt < 31) {
+ r = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_X0 + rt, &val);
+ assert_hvf_ok(r);
+ }
+
+ return val;
+}
+
+static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
+{
+ ARMISARegisters host_isar = {};
+ const struct isar_regs {
+ int reg;
+ uint64_t *val;
+ } regs[] = {
+ { HV_SYS_REG_ID_AA64PFR0_EL1, &host_isar.id_aa64pfr0 },
+ { HV_SYS_REG_ID_AA64PFR1_EL1, &host_isar.id_aa64pfr1 },
+ { HV_SYS_REG_ID_AA64DFR0_EL1, &host_isar.id_aa64dfr0 },
+ { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 },
+ { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 },
+ { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 },
+ /* Add ID_AA64ISAR2_EL1 here when HVF supports it */
+ { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 },
+ { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 },
+ { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 },
+ };
+ hv_vcpu_t fd;
+ hv_return_t r = HV_SUCCESS;
+ hv_vcpu_exit_t *exit;
+ int i;
+
+ ahcf->dtb_compatible = "arm,arm-v8";
+ ahcf->features = (1ULL << ARM_FEATURE_V8) |
+ (1ULL << ARM_FEATURE_NEON) |
+ (1ULL << ARM_FEATURE_AARCH64) |
+ (1ULL << ARM_FEATURE_PMU) |
+ (1ULL << ARM_FEATURE_GENERIC_TIMER);
+
+ /* We set up a small vcpu to extract host registers */
+
+ if (hv_vcpu_create(&fd, &exit, NULL) != HV_SUCCESS) {
+ return false;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ r |= hv_vcpu_get_sys_reg(fd, regs[i].reg, regs[i].val);
+ }
+ r |= hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, &ahcf->midr);
+ r |= hv_vcpu_destroy(fd);
+
+ ahcf->isar = host_isar;
+
+ /*
+ * A scratch vCPU returns SCTLR 0, so let's fill our default with the M1
+ * boot SCTLR from https://github.com/AsahiLinux/m1n1/issues/97
+ */
+ ahcf->reset_sctlr = 0x30100180;
+ /*
+ * SPAN is disabled by default when SCTLR.SPAN=1. To improve compatibility,
+ * let's disable it on boot and then allow guest software to turn it on by
+ * setting it to 0.
+ */
+ ahcf->reset_sctlr |= 0x00800000;
+
+ /* Make sure we don't advertise AArch32 support for EL0/EL1 */
+ if ((host_isar.id_aa64pfr0 & 0xff) != 0x11) {
+ return false;
+ }
+
+ return r == HV_SUCCESS;
+}
+
+void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu)
+{
+ if (!arm_host_cpu_features.dtb_compatible) {
+ if (!hvf_enabled() ||
+ !hvf_arm_get_host_cpu_features(&arm_host_cpu_features)) {
+ /*
+ * We can't report this error yet, so flag that we need to
+ * in arm_cpu_realizefn().
+ */
+ cpu->host_cpu_probe_failed = true;
+ return;
+ }
+ }
+
+ cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
+ cpu->isar = arm_host_cpu_features.isar;
+ cpu->env.features = arm_host_cpu_features.features;
+ cpu->midr = arm_host_cpu_features.midr;
+ cpu->reset_sctlr = arm_host_cpu_features.reset_sctlr;
+}
+
+void hvf_arch_vcpu_destroy(CPUState *cpu)
+{
+}
+
+int hvf_arch_init_vcpu(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ uint32_t sregs_match_len = ARRAY_SIZE(hvf_sreg_match);
+ uint32_t sregs_cnt = 0;
+ uint64_t pfr;
+ hv_return_t ret;
+ int i;
+
+ env->aarch64 = true;
+ asm volatile("mrs %0, cntfrq_el0" : "=r"(arm_cpu->gt_cntfrq_hz));
+
+ /* Allocate enough space for our sysreg sync */
+ arm_cpu->cpreg_indexes = g_renew(uint64_t, arm_cpu->cpreg_indexes,
+ sregs_match_len);
+ arm_cpu->cpreg_values = g_renew(uint64_t, arm_cpu->cpreg_values,
+ sregs_match_len);
+ arm_cpu->cpreg_vmstate_indexes = g_renew(uint64_t,
+ arm_cpu->cpreg_vmstate_indexes,
+ sregs_match_len);
+ arm_cpu->cpreg_vmstate_values = g_renew(uint64_t,
+ arm_cpu->cpreg_vmstate_values,
+ sregs_match_len);
+
+ memset(arm_cpu->cpreg_values, 0, sregs_match_len * sizeof(uint64_t));
+
+ /* Populate cp list for all known sysregs */
+ for (i = 0; i < sregs_match_len; i++) {
+ const ARMCPRegInfo *ri;
+ uint32_t key = hvf_sreg_match[i].key;
+
+ ri = get_arm_cp_reginfo(arm_cpu->cp_regs, key);
+ if (ri) {
+ assert(!(ri->type & ARM_CP_NO_RAW));
+ hvf_sreg_match[i].cp_idx = sregs_cnt;
+ arm_cpu->cpreg_indexes[sregs_cnt++] = cpreg_to_kvm_id(key);
+ } else {
+ hvf_sreg_match[i].cp_idx = -1;
+ }
+ }
+ arm_cpu->cpreg_array_len = sregs_cnt;
+ arm_cpu->cpreg_vmstate_array_len = sregs_cnt;
+
+ assert(write_cpustate_to_list(arm_cpu, false));
+
+ /* Set CP_NO_RAW system registers on init */
+ ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_MIDR_EL1,
+ arm_cpu->midr);
+ assert_hvf_ok(ret);
+
+ ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_MPIDR_EL1,
+ arm_cpu->mp_affinity);
+ assert_hvf_ok(ret);
+
+ ret = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64PFR0_EL1, &pfr);
+ assert_hvf_ok(ret);
+ pfr |= env->gicv3state ? (1 << 24) : 0;
+ ret = hv_vcpu_set_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64PFR0_EL1, pfr);
+ assert_hvf_ok(ret);
+
+ /* We're limited to underlying hardware caps, override internal versions */
+ ret = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_ID_AA64MMFR0_EL1,
+ &arm_cpu->isar.id_aa64mmfr0);
+ assert_hvf_ok(ret);
+
+ return 0;
+}
+
+void hvf_kick_vcpu_thread(CPUState *cpu)
+{
+ cpus_kick_thread(cpu);
+ hv_vcpus_exit(&cpu->accel->fd, 1);
+}
+
+static void hvf_raise_exception(CPUState *cpu, uint32_t excp,
+ uint32_t syndrome)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+
+ cpu->exception_index = excp;
+ env->exception.target_el = 1;
+ env->exception.syndrome = syndrome;
+
+ arm_cpu_do_interrupt(cpu);
+}
+
+static void hvf_psci_cpu_off(ARMCPU *arm_cpu)
+{
+ int32_t ret = arm_set_cpu_off(arm_cpu_mp_affinity(arm_cpu));
+ assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS);
+}
+
+/*
+ * Handle a PSCI call.
+ *
+ * Returns 0 on success
+ * -1 when the PSCI call is unknown,
+ */
+static bool hvf_handle_psci_call(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ uint64_t param[4] = {
+ env->xregs[0],
+ env->xregs[1],
+ env->xregs[2],
+ env->xregs[3]
+ };
+ uint64_t context_id, mpidr;
+ bool target_aarch64 = true;
+ CPUState *target_cpu_state;
+ ARMCPU *target_cpu;
+ target_ulong entry;
+ int target_el = 1;
+ int32_t ret = 0;
+
+ trace_hvf_psci_call(param[0], param[1], param[2], param[3],
+ arm_cpu_mp_affinity(arm_cpu));
+
+ switch (param[0]) {
+ case QEMU_PSCI_0_2_FN_PSCI_VERSION:
+ ret = QEMU_PSCI_VERSION_1_1;
+ break;
+ case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+ ret = QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED; /* No trusted OS */
+ break;
+ case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
+ case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
+ mpidr = param[1];
+
+ switch (param[2]) {
+ case 0:
+ target_cpu_state = arm_get_cpu_by_id(mpidr);
+ if (!target_cpu_state) {
+ ret = QEMU_PSCI_RET_INVALID_PARAMS;
+ break;
+ }
+ target_cpu = ARM_CPU(target_cpu_state);
+
+ ret = target_cpu->power_state;
+ break;
+ default:
+ /* Everything above affinity level 0 is always on. */
+ ret = 0;
+ }
+ break;
+ case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+ /*
+ * QEMU reset and shutdown are async requests, but PSCI
+ * mandates that we never return from the reset/shutdown
+ * call, so power the CPU off now so it doesn't execute
+ * anything further.
+ */
+ hvf_psci_cpu_off(arm_cpu);
+ break;
+ case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+ hvf_psci_cpu_off(arm_cpu);
+ break;
+ case QEMU_PSCI_0_1_FN_CPU_ON:
+ case QEMU_PSCI_0_2_FN_CPU_ON:
+ case QEMU_PSCI_0_2_FN64_CPU_ON:
+ mpidr = param[1];
+ entry = param[2];
+ context_id = param[3];
+ ret = arm_set_cpu_on(mpidr, entry, context_id,
+ target_el, target_aarch64);
+ break;
+ case QEMU_PSCI_0_1_FN_CPU_OFF:
+ case QEMU_PSCI_0_2_FN_CPU_OFF:
+ hvf_psci_cpu_off(arm_cpu);
+ break;
+ case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
+ case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
+ case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
+ /* Affinity levels are not supported in QEMU */
+ if (param[1] & 0xfffe0000) {
+ ret = QEMU_PSCI_RET_INVALID_PARAMS;
+ break;
+ }
+ /* Powerdown is not supported, we always go into WFI */
+ env->xregs[0] = 0;
+ hvf_wfi(cpu);
+ break;
+ case QEMU_PSCI_0_1_FN_MIGRATE:
+ case QEMU_PSCI_0_2_FN_MIGRATE:
+ ret = QEMU_PSCI_RET_NOT_SUPPORTED;
+ break;
+ case QEMU_PSCI_1_0_FN_PSCI_FEATURES:
+ switch (param[1]) {
+ case QEMU_PSCI_0_2_FN_PSCI_VERSION:
+ case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+ case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
+ case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
+ case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
+ case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
+ case QEMU_PSCI_0_1_FN_CPU_ON:
+ case QEMU_PSCI_0_2_FN_CPU_ON:
+ case QEMU_PSCI_0_2_FN64_CPU_ON:
+ case QEMU_PSCI_0_1_FN_CPU_OFF:
+ case QEMU_PSCI_0_2_FN_CPU_OFF:
+ case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
+ case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
+ case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
+ case QEMU_PSCI_1_0_FN_PSCI_FEATURES:
+ ret = 0;
+ break;
+ case QEMU_PSCI_0_1_FN_MIGRATE:
+ case QEMU_PSCI_0_2_FN_MIGRATE:
+ default:
+ ret = QEMU_PSCI_RET_NOT_SUPPORTED;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ env->xregs[0] = ret;
+ return true;
+}
+
+static bool is_id_sysreg(uint32_t reg)
+{
+ return SYSREG_OP0(reg) == 3 &&
+ SYSREG_OP1(reg) == 0 &&
+ SYSREG_CRN(reg) == 0 &&
+ SYSREG_CRM(reg) >= 1 &&
+ SYSREG_CRM(reg) < 8;
+}
+
+static uint32_t hvf_reg2cp_reg(uint32_t reg)
+{
+ return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
+ (reg >> SYSREG_CRN_SHIFT) & SYSREG_CRN_MASK,
+ (reg >> SYSREG_CRM_SHIFT) & SYSREG_CRM_MASK,
+ (reg >> SYSREG_OP0_SHIFT) & SYSREG_OP0_MASK,
+ (reg >> SYSREG_OP1_SHIFT) & SYSREG_OP1_MASK,
+ (reg >> SYSREG_OP2_SHIFT) & SYSREG_OP2_MASK);
+}
+
+static bool hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg, uint64_t *val)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ const ARMCPRegInfo *ri;
+
+ ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
+ if (ri) {
+ if (ri->accessfn) {
+ if (ri->accessfn(env, ri, true) != CP_ACCESS_OK) {
+ return false;
+ }
+ }
+ if (ri->type & ARM_CP_CONST) {
+ *val = ri->resetvalue;
+ } else if (ri->readfn) {
+ *val = ri->readfn(env, ri);
+ } else {
+ *val = CPREG_FIELD64(env, ri);
+ }
+ trace_hvf_vgic_read(ri->name, *val);
+ return true;
+ }
+
+ return false;
+}
+
+static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ uint64_t val = 0;
+
+ switch (reg) {
+ case SYSREG_CNTPCT_EL0:
+ val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) /
+ gt_cntfrq_period_ns(arm_cpu);
+ break;
+ case SYSREG_PMCR_EL0:
+ val = env->cp15.c9_pmcr;
+ break;
+ case SYSREG_PMCCNTR_EL0:
+ pmu_op_start(env);
+ val = env->cp15.c15_ccnt;
+ pmu_op_finish(env);
+ break;
+ case SYSREG_PMCNTENCLR_EL0:
+ val = env->cp15.c9_pmcnten;
+ break;
+ case SYSREG_PMOVSCLR_EL0:
+ val = env->cp15.c9_pmovsr;
+ break;
+ case SYSREG_PMSELR_EL0:
+ val = env->cp15.c9_pmselr;
+ break;
+ case SYSREG_PMINTENCLR_EL1:
+ val = env->cp15.c9_pminten;
+ break;
+ case SYSREG_PMCCFILTR_EL0:
+ val = env->cp15.pmccfiltr_el0;
+ break;
+ case SYSREG_PMCNTENSET_EL0:
+ val = env->cp15.c9_pmcnten;
+ break;
+ case SYSREG_PMUSERENR_EL0:
+ val = env->cp15.c9_pmuserenr;
+ break;
+ case SYSREG_PMCEID0_EL0:
+ case SYSREG_PMCEID1_EL0:
+ /* We can't really count anything yet, declare all events invalid */
+ val = 0;
+ break;
+ case SYSREG_OSLSR_EL1:
+ val = env->cp15.oslsr_el1;
+ break;
+ case SYSREG_OSDLR_EL1:
+ /* Dummy register */
+ break;
+ case SYSREG_ICC_AP0R0_EL1:
+ case SYSREG_ICC_AP0R1_EL1:
+ case SYSREG_ICC_AP0R2_EL1:
+ case SYSREG_ICC_AP0R3_EL1:
+ case SYSREG_ICC_AP1R0_EL1:
+ case SYSREG_ICC_AP1R1_EL1:
+ case SYSREG_ICC_AP1R2_EL1:
+ case SYSREG_ICC_AP1R3_EL1:
+ case SYSREG_ICC_ASGI1R_EL1:
+ case SYSREG_ICC_BPR0_EL1:
+ case SYSREG_ICC_BPR1_EL1:
+ case SYSREG_ICC_DIR_EL1:
+ case SYSREG_ICC_EOIR0_EL1:
+ case SYSREG_ICC_EOIR1_EL1:
+ case SYSREG_ICC_HPPIR0_EL1:
+ case SYSREG_ICC_HPPIR1_EL1:
+ case SYSREG_ICC_IAR0_EL1:
+ case SYSREG_ICC_IAR1_EL1:
+ case SYSREG_ICC_IGRPEN0_EL1:
+ case SYSREG_ICC_IGRPEN1_EL1:
+ case SYSREG_ICC_PMR_EL1:
+ case SYSREG_ICC_SGI0R_EL1:
+ case SYSREG_ICC_SGI1R_EL1:
+ case SYSREG_ICC_SRE_EL1:
+ case SYSREG_ICC_CTLR_EL1:
+ /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
+ if (!hvf_sysreg_read_cp(cpu, reg, &val)) {
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ }
+ break;
+ case SYSREG_DBGBVR0_EL1:
+ case SYSREG_DBGBVR1_EL1:
+ case SYSREG_DBGBVR2_EL1:
+ case SYSREG_DBGBVR3_EL1:
+ case SYSREG_DBGBVR4_EL1:
+ case SYSREG_DBGBVR5_EL1:
+ case SYSREG_DBGBVR6_EL1:
+ case SYSREG_DBGBVR7_EL1:
+ case SYSREG_DBGBVR8_EL1:
+ case SYSREG_DBGBVR9_EL1:
+ case SYSREG_DBGBVR10_EL1:
+ case SYSREG_DBGBVR11_EL1:
+ case SYSREG_DBGBVR12_EL1:
+ case SYSREG_DBGBVR13_EL1:
+ case SYSREG_DBGBVR14_EL1:
+ case SYSREG_DBGBVR15_EL1:
+ val = env->cp15.dbgbvr[SYSREG_CRM(reg)];
+ break;
+ case SYSREG_DBGBCR0_EL1:
+ case SYSREG_DBGBCR1_EL1:
+ case SYSREG_DBGBCR2_EL1:
+ case SYSREG_DBGBCR3_EL1:
+ case SYSREG_DBGBCR4_EL1:
+ case SYSREG_DBGBCR5_EL1:
+ case SYSREG_DBGBCR6_EL1:
+ case SYSREG_DBGBCR7_EL1:
+ case SYSREG_DBGBCR8_EL1:
+ case SYSREG_DBGBCR9_EL1:
+ case SYSREG_DBGBCR10_EL1:
+ case SYSREG_DBGBCR11_EL1:
+ case SYSREG_DBGBCR12_EL1:
+ case SYSREG_DBGBCR13_EL1:
+ case SYSREG_DBGBCR14_EL1:
+ case SYSREG_DBGBCR15_EL1:
+ val = env->cp15.dbgbcr[SYSREG_CRM(reg)];
+ break;
+ case SYSREG_DBGWVR0_EL1:
+ case SYSREG_DBGWVR1_EL1:
+ case SYSREG_DBGWVR2_EL1:
+ case SYSREG_DBGWVR3_EL1:
+ case SYSREG_DBGWVR4_EL1:
+ case SYSREG_DBGWVR5_EL1:
+ case SYSREG_DBGWVR6_EL1:
+ case SYSREG_DBGWVR7_EL1:
+ case SYSREG_DBGWVR8_EL1:
+ case SYSREG_DBGWVR9_EL1:
+ case SYSREG_DBGWVR10_EL1:
+ case SYSREG_DBGWVR11_EL1:
+ case SYSREG_DBGWVR12_EL1:
+ case SYSREG_DBGWVR13_EL1:
+ case SYSREG_DBGWVR14_EL1:
+ case SYSREG_DBGWVR15_EL1:
+ val = env->cp15.dbgwvr[SYSREG_CRM(reg)];
+ break;
+ case SYSREG_DBGWCR0_EL1:
+ case SYSREG_DBGWCR1_EL1:
+ case SYSREG_DBGWCR2_EL1:
+ case SYSREG_DBGWCR3_EL1:
+ case SYSREG_DBGWCR4_EL1:
+ case SYSREG_DBGWCR5_EL1:
+ case SYSREG_DBGWCR6_EL1:
+ case SYSREG_DBGWCR7_EL1:
+ case SYSREG_DBGWCR8_EL1:
+ case SYSREG_DBGWCR9_EL1:
+ case SYSREG_DBGWCR10_EL1:
+ case SYSREG_DBGWCR11_EL1:
+ case SYSREG_DBGWCR12_EL1:
+ case SYSREG_DBGWCR13_EL1:
+ case SYSREG_DBGWCR14_EL1:
+ case SYSREG_DBGWCR15_EL1:
+ val = env->cp15.dbgwcr[SYSREG_CRM(reg)];
+ break;
+ default:
+ if (is_id_sysreg(reg)) {
+ /* ID system registers read as RES0 */
+ val = 0;
+ break;
+ }
+ cpu_synchronize_state(cpu);
+ trace_hvf_unhandled_sysreg_read(env->pc, reg,
+ SYSREG_OP0(reg),
+ SYSREG_OP1(reg),
+ SYSREG_CRN(reg),
+ SYSREG_CRM(reg),
+ SYSREG_OP2(reg));
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ return 1;
+ }
+
+ trace_hvf_sysreg_read(reg,
+ SYSREG_OP0(reg),
+ SYSREG_OP1(reg),
+ SYSREG_CRN(reg),
+ SYSREG_CRM(reg),
+ SYSREG_OP2(reg),
+ val);
+ hvf_set_reg(cpu, rt, val);
+
+ return 0;
+}
+
+static void pmu_update_irq(CPUARMState *env)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) &&
+ (env->cp15.c9_pminten & env->cp15.c9_pmovsr));
+}
+
+static bool pmu_event_supported(uint16_t number)
+{
+ return false;
+}
+
+/* Returns true if the counter (pass 31 for PMCCNTR) should count events using
+ * the current EL, security state, and register configuration.
+ */
+static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter)
+{
+ uint64_t filter;
+ bool enabled, filtered = true;
+ int el = arm_current_el(env);
+
+ enabled = (env->cp15.c9_pmcr & PMCRE) &&
+ (env->cp15.c9_pmcnten & (1 << counter));
+
+ if (counter == 31) {
+ filter = env->cp15.pmccfiltr_el0;
+ } else {
+ filter = env->cp15.c14_pmevtyper[counter];
+ }
+
+ if (el == 0) {
+ filtered = filter & PMXEVTYPER_U;
+ } else if (el == 1) {
+ filtered = filter & PMXEVTYPER_P;
+ }
+
+ if (counter != 31) {
+ /*
+ * If not checking PMCCNTR, ensure the counter is setup to an event we
+ * support
+ */
+ uint16_t event = filter & PMXEVTYPER_EVTCOUNT;
+ if (!pmu_event_supported(event)) {
+ return false;
+ }
+ }
+
+ return enabled && !filtered;
+}
+
+static void pmswinc_write(CPUARMState *env, uint64_t value)
+{
+ unsigned int i;
+ for (i = 0; i < pmu_num_counters(env); i++) {
+ /* Increment a counter's count iff: */
+ if ((value & (1 << i)) && /* counter's bit is set */
+ /* counter is enabled and not filtered */
+ pmu_counter_enabled(env, i) &&
+ /* counter is SW_INCR */
+ (env->cp15.c14_pmevtyper[i] & PMXEVTYPER_EVTCOUNT) == 0x0) {
+ /*
+ * Detect if this write causes an overflow since we can't predict
+ * PMSWINC overflows like we can for other events
+ */
+ uint32_t new_pmswinc = env->cp15.c14_pmevcntr[i] + 1;
+
+ if (env->cp15.c14_pmevcntr[i] & ~new_pmswinc & INT32_MIN) {
+ env->cp15.c9_pmovsr |= (1 << i);
+ pmu_update_irq(env);
+ }
+
+ env->cp15.c14_pmevcntr[i] = new_pmswinc;
+ }
+ }
+}
+
+static bool hvf_sysreg_write_cp(CPUState *cpu, uint32_t reg, uint64_t val)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ const ARMCPRegInfo *ri;
+
+ ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
+
+ if (ri) {
+ if (ri->accessfn) {
+ if (ri->accessfn(env, ri, false) != CP_ACCESS_OK) {
+ return false;
+ }
+ }
+ if (ri->writefn) {
+ ri->writefn(env, ri, val);
+ } else {
+ CPREG_FIELD64(env, ri) = val;
+ }
+
+ trace_hvf_vgic_write(ri->name, val);
+ return true;
+ }
+
+ return false;
+}
+
+static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+
+ trace_hvf_sysreg_write(reg,
+ SYSREG_OP0(reg),
+ SYSREG_OP1(reg),
+ SYSREG_CRN(reg),
+ SYSREG_CRM(reg),
+ SYSREG_OP2(reg),
+ val);
+
+ switch (reg) {
+ case SYSREG_PMCCNTR_EL0:
+ pmu_op_start(env);
+ env->cp15.c15_ccnt = val;
+ pmu_op_finish(env);
+ break;
+ case SYSREG_PMCR_EL0:
+ pmu_op_start(env);
+
+ if (val & PMCRC) {
+ /* The counter has been reset */
+ env->cp15.c15_ccnt = 0;
+ }
+
+ if (val & PMCRP) {
+ unsigned int i;
+ for (i = 0; i < pmu_num_counters(env); i++) {
+ env->cp15.c14_pmevcntr[i] = 0;
+ }
+ }
+
+ env->cp15.c9_pmcr &= ~PMCR_WRITABLE_MASK;
+ env->cp15.c9_pmcr |= (val & PMCR_WRITABLE_MASK);
+
+ pmu_op_finish(env);
+ break;
+ case SYSREG_PMUSERENR_EL0:
+ env->cp15.c9_pmuserenr = val & 0xf;
+ break;
+ case SYSREG_PMCNTENSET_EL0:
+ env->cp15.c9_pmcnten |= (val & pmu_counter_mask(env));
+ break;
+ case SYSREG_PMCNTENCLR_EL0:
+ env->cp15.c9_pmcnten &= ~(val & pmu_counter_mask(env));
+ break;
+ case SYSREG_PMINTENCLR_EL1:
+ pmu_op_start(env);
+ env->cp15.c9_pminten |= val;
+ pmu_op_finish(env);
+ break;
+ case SYSREG_PMOVSCLR_EL0:
+ pmu_op_start(env);
+ env->cp15.c9_pmovsr &= ~val;
+ pmu_op_finish(env);
+ break;
+ case SYSREG_PMSWINC_EL0:
+ pmu_op_start(env);
+ pmswinc_write(env, val);
+ pmu_op_finish(env);
+ break;
+ case SYSREG_PMSELR_EL0:
+ env->cp15.c9_pmselr = val & 0x1f;
+ break;
+ case SYSREG_PMCCFILTR_EL0:
+ pmu_op_start(env);
+ env->cp15.pmccfiltr_el0 = val & PMCCFILTR_EL0;
+ pmu_op_finish(env);
+ break;
+ case SYSREG_OSLAR_EL1:
+ env->cp15.oslsr_el1 = val & 1;
+ break;
+ case SYSREG_OSDLR_EL1:
+ /* Dummy register */
+ break;
+ case SYSREG_ICC_AP0R0_EL1:
+ case SYSREG_ICC_AP0R1_EL1:
+ case SYSREG_ICC_AP0R2_EL1:
+ case SYSREG_ICC_AP0R3_EL1:
+ case SYSREG_ICC_AP1R0_EL1:
+ case SYSREG_ICC_AP1R1_EL1:
+ case SYSREG_ICC_AP1R2_EL1:
+ case SYSREG_ICC_AP1R3_EL1:
+ case SYSREG_ICC_ASGI1R_EL1:
+ case SYSREG_ICC_BPR0_EL1:
+ case SYSREG_ICC_BPR1_EL1:
+ case SYSREG_ICC_CTLR_EL1:
+ case SYSREG_ICC_DIR_EL1:
+ case SYSREG_ICC_EOIR0_EL1:
+ case SYSREG_ICC_EOIR1_EL1:
+ case SYSREG_ICC_HPPIR0_EL1:
+ case SYSREG_ICC_HPPIR1_EL1:
+ case SYSREG_ICC_IAR0_EL1:
+ case SYSREG_ICC_IAR1_EL1:
+ case SYSREG_ICC_IGRPEN0_EL1:
+ case SYSREG_ICC_IGRPEN1_EL1:
+ case SYSREG_ICC_PMR_EL1:
+ case SYSREG_ICC_SGI0R_EL1:
+ case SYSREG_ICC_SGI1R_EL1:
+ case SYSREG_ICC_SRE_EL1:
+ /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */
+ if (!hvf_sysreg_write_cp(cpu, reg, val)) {
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ }
+ break;
+ case SYSREG_MDSCR_EL1:
+ env->cp15.mdscr_el1 = val;
+ break;
+ case SYSREG_DBGBVR0_EL1:
+ case SYSREG_DBGBVR1_EL1:
+ case SYSREG_DBGBVR2_EL1:
+ case SYSREG_DBGBVR3_EL1:
+ case SYSREG_DBGBVR4_EL1:
+ case SYSREG_DBGBVR5_EL1:
+ case SYSREG_DBGBVR6_EL1:
+ case SYSREG_DBGBVR7_EL1:
+ case SYSREG_DBGBVR8_EL1:
+ case SYSREG_DBGBVR9_EL1:
+ case SYSREG_DBGBVR10_EL1:
+ case SYSREG_DBGBVR11_EL1:
+ case SYSREG_DBGBVR12_EL1:
+ case SYSREG_DBGBVR13_EL1:
+ case SYSREG_DBGBVR14_EL1:
+ case SYSREG_DBGBVR15_EL1:
+ env->cp15.dbgbvr[SYSREG_CRM(reg)] = val;
+ break;
+ case SYSREG_DBGBCR0_EL1:
+ case SYSREG_DBGBCR1_EL1:
+ case SYSREG_DBGBCR2_EL1:
+ case SYSREG_DBGBCR3_EL1:
+ case SYSREG_DBGBCR4_EL1:
+ case SYSREG_DBGBCR5_EL1:
+ case SYSREG_DBGBCR6_EL1:
+ case SYSREG_DBGBCR7_EL1:
+ case SYSREG_DBGBCR8_EL1:
+ case SYSREG_DBGBCR9_EL1:
+ case SYSREG_DBGBCR10_EL1:
+ case SYSREG_DBGBCR11_EL1:
+ case SYSREG_DBGBCR12_EL1:
+ case SYSREG_DBGBCR13_EL1:
+ case SYSREG_DBGBCR14_EL1:
+ case SYSREG_DBGBCR15_EL1:
+ env->cp15.dbgbcr[SYSREG_CRM(reg)] = val;
+ break;
+ case SYSREG_DBGWVR0_EL1:
+ case SYSREG_DBGWVR1_EL1:
+ case SYSREG_DBGWVR2_EL1:
+ case SYSREG_DBGWVR3_EL1:
+ case SYSREG_DBGWVR4_EL1:
+ case SYSREG_DBGWVR5_EL1:
+ case SYSREG_DBGWVR6_EL1:
+ case SYSREG_DBGWVR7_EL1:
+ case SYSREG_DBGWVR8_EL1:
+ case SYSREG_DBGWVR9_EL1:
+ case SYSREG_DBGWVR10_EL1:
+ case SYSREG_DBGWVR11_EL1:
+ case SYSREG_DBGWVR12_EL1:
+ case SYSREG_DBGWVR13_EL1:
+ case SYSREG_DBGWVR14_EL1:
+ case SYSREG_DBGWVR15_EL1:
+ env->cp15.dbgwvr[SYSREG_CRM(reg)] = val;
+ break;
+ case SYSREG_DBGWCR0_EL1:
+ case SYSREG_DBGWCR1_EL1:
+ case SYSREG_DBGWCR2_EL1:
+ case SYSREG_DBGWCR3_EL1:
+ case SYSREG_DBGWCR4_EL1:
+ case SYSREG_DBGWCR5_EL1:
+ case SYSREG_DBGWCR6_EL1:
+ case SYSREG_DBGWCR7_EL1:
+ case SYSREG_DBGWCR8_EL1:
+ case SYSREG_DBGWCR9_EL1:
+ case SYSREG_DBGWCR10_EL1:
+ case SYSREG_DBGWCR11_EL1:
+ case SYSREG_DBGWCR12_EL1:
+ case SYSREG_DBGWCR13_EL1:
+ case SYSREG_DBGWCR14_EL1:
+ case SYSREG_DBGWCR15_EL1:
+ env->cp15.dbgwcr[SYSREG_CRM(reg)] = val;
+ break;
+ default:
+ cpu_synchronize_state(cpu);
+ trace_hvf_unhandled_sysreg_write(env->pc, reg,
+ SYSREG_OP0(reg),
+ SYSREG_OP1(reg),
+ SYSREG_CRN(reg),
+ SYSREG_CRM(reg),
+ SYSREG_OP2(reg));
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ return 1;
+ }
+
+ return 0;
+}
+
+static int hvf_inject_interrupts(CPUState *cpu)
+{
+ if (cpu->interrupt_request & CPU_INTERRUPT_FIQ) {
+ trace_hvf_inject_fiq();
+ hv_vcpu_set_pending_interrupt(cpu->accel->fd, HV_INTERRUPT_TYPE_FIQ,
+ true);
+ }
+
+ if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
+ trace_hvf_inject_irq();
+ hv_vcpu_set_pending_interrupt(cpu->accel->fd, HV_INTERRUPT_TYPE_IRQ,
+ true);
+ }
+
+ return 0;
+}
+
+static uint64_t hvf_vtimer_val_raw(void)
+{
+ /*
+ * mach_absolute_time() returns the vtimer value without the VM
+ * offset that we define. Add our own offset on top.
+ */
+ return mach_absolute_time() - hvf_state->vtimer_offset;
+}
+
+static uint64_t hvf_vtimer_val(void)
+{
+ if (!runstate_is_running()) {
+ /* VM is paused, the vtimer value is in vtimer.vtimer_val */
+ return vtimer.vtimer_val;
+ }
+
+ return hvf_vtimer_val_raw();
+}
+
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
+{
+ /*
+ * Use pselect to sleep so that other threads can IPI us while we're
+ * sleeping.
+ */
+ qatomic_set_mb(&cpu->thread_kicked, false);
+ bql_unlock();
+ pselect(0, 0, 0, 0, ts, &cpu->accel->unblock_ipi_mask);
+ bql_lock();
+}
+
+static void hvf_wfi(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ struct timespec ts;
+ hv_return_t r;
+ uint64_t ctl;
+ uint64_t cval;
+ int64_t ticks_to_sleep;
+ uint64_t seconds;
+ uint64_t nanos;
+ uint32_t cntfrq;
+
+ if (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) {
+ /* Interrupt pending, no need to wait */
+ return;
+ }
+
+ r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
+ assert_hvf_ok(r);
+
+ if (!(ctl & 1) || (ctl & 2)) {
+ /* Timer disabled or masked, just wait for an IPI. */
+ hvf_wait_for_ipi(cpu, NULL);
+ return;
+ }
+
+ r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
+ assert_hvf_ok(r);
+
+ ticks_to_sleep = cval - hvf_vtimer_val();
+ if (ticks_to_sleep < 0) {
+ return;
+ }
+
+ cntfrq = gt_cntfrq_period_ns(arm_cpu);
+ seconds = muldiv64(ticks_to_sleep, cntfrq, NANOSECONDS_PER_SECOND);
+ ticks_to_sleep -= muldiv64(seconds, NANOSECONDS_PER_SECOND, cntfrq);
+ nanos = ticks_to_sleep * cntfrq;
+
+ /*
+ * Don't sleep for less than the time a context switch would take,
+ * so that we can satisfy fast timer requests on the same CPU.
+ * Measurements on M1 show the sweet spot to be ~2ms.
+ */
+ if (!seconds && nanos < (2 * SCALE_MS)) {
+ return;
+ }
+
+ ts = (struct timespec) { seconds, nanos };
+ hvf_wait_for_ipi(cpu, &ts);
+}
+
+static void hvf_sync_vtimer(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ hv_return_t r;
+ uint64_t ctl;
+ bool irq_state;
+
+ if (!cpu->accel->vtimer_masked) {
+ /* We will get notified on vtimer changes by hvf, nothing to do */
+ return;
+ }
+
+ r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
+ assert_hvf_ok(r);
+
+ irq_state = (ctl & (TMR_CTL_ENABLE | TMR_CTL_IMASK | TMR_CTL_ISTATUS)) ==
+ (TMR_CTL_ENABLE | TMR_CTL_ISTATUS);
+ qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], irq_state);
+
+ if (!irq_state) {
+ /* Timer no longer asserting, we can unmask it */
+ hv_vcpu_set_vtimer_mask(cpu->accel->fd, false);
+ cpu->accel->vtimer_masked = false;
+ }
+}
+
+int hvf_vcpu_exec(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ int ret;
+ hv_vcpu_exit_t *hvf_exit = cpu->accel->exit;
+ hv_return_t r;
+ bool advance_pc = false;
+
+ if (!(cpu->singlestep_enabled & SSTEP_NOIRQ) &&
+ hvf_inject_interrupts(cpu)) {
+ return EXCP_INTERRUPT;
+ }
+
+ if (cpu->halted) {
+ return EXCP_HLT;
+ }
+
+ flush_cpu_state(cpu);
+
+ bql_unlock();
+ assert_hvf_ok(hv_vcpu_run(cpu->accel->fd));
+
+ /* handle VMEXIT */
+ uint64_t exit_reason = hvf_exit->reason;
+ uint64_t syndrome = hvf_exit->exception.syndrome;
+ uint32_t ec = syn_get_ec(syndrome);
+
+ ret = 0;
+ bql_lock();
+ switch (exit_reason) {
+ case HV_EXIT_REASON_EXCEPTION:
+ /* This is the main one, handle below. */
+ break;
+ case HV_EXIT_REASON_VTIMER_ACTIVATED:
+ qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
+ cpu->accel->vtimer_masked = true;
+ return 0;
+ case HV_EXIT_REASON_CANCELED:
+ /* we got kicked, no exit to process */
+ return 0;
+ default:
+ g_assert_not_reached();
+ }
+
+ hvf_sync_vtimer(cpu);
+
+ switch (ec) {
+ case EC_SOFTWARESTEP: {
+ ret = EXCP_DEBUG;
+
+ if (!cpu->singlestep_enabled) {
+ error_report("EC_SOFTWARESTEP but single-stepping not enabled");
+ }
+ break;
+ }
+ case EC_AA64_BKPT: {
+ ret = EXCP_DEBUG;
+
+ cpu_synchronize_state(cpu);
+
+ if (!hvf_find_sw_breakpoint(cpu, env->pc)) {
+ /* Re-inject into the guest */
+ ret = 0;
+ hvf_raise_exception(cpu, EXCP_BKPT, syn_aa64_bkpt(0));
+ }
+ break;
+ }
+ case EC_BREAKPOINT: {
+ ret = EXCP_DEBUG;
+
+ cpu_synchronize_state(cpu);
+
+ if (!find_hw_breakpoint(cpu, env->pc)) {
+ error_report("EC_BREAKPOINT but unknown hw breakpoint");
+ }
+ break;
+ }
+ case EC_WATCHPOINT: {
+ ret = EXCP_DEBUG;
+
+ cpu_synchronize_state(cpu);
+
+ CPUWatchpoint *wp =
+ find_hw_watchpoint(cpu, hvf_exit->exception.virtual_address);
+ if (!wp) {
+ error_report("EXCP_DEBUG but unknown hw watchpoint");
+ }
+ cpu->watchpoint_hit = wp;
+ break;
+ }
+ case EC_DATAABORT: {
+ bool isv = syndrome & ARM_EL_ISV;
+ bool iswrite = (syndrome >> 6) & 1;
+ bool s1ptw = (syndrome >> 7) & 1;
+ uint32_t sas = (syndrome >> 22) & 3;
+ uint32_t len = 1 << sas;
+ uint32_t srt = (syndrome >> 16) & 0x1f;
+ uint32_t cm = (syndrome >> 8) & 0x1;
+ uint64_t val = 0;
+
+ trace_hvf_data_abort(env->pc, hvf_exit->exception.virtual_address,
+ hvf_exit->exception.physical_address, isv,
+ iswrite, s1ptw, len, srt);
+
+ if (cm) {
+ /* We don't cache MMIO regions */
+ advance_pc = true;
+ break;
+ }
+
+ assert(isv);
+
+ if (iswrite) {
+ val = hvf_get_reg(cpu, srt);
+ address_space_write(&address_space_memory,
+ hvf_exit->exception.physical_address,
+ MEMTXATTRS_UNSPECIFIED, &val, len);
+ } else {
+ address_space_read(&address_space_memory,
+ hvf_exit->exception.physical_address,
+ MEMTXATTRS_UNSPECIFIED, &val, len);
+ hvf_set_reg(cpu, srt, val);
+ }
+
+ advance_pc = true;
+ break;
+ }
+ case EC_SYSTEMREGISTERTRAP: {
+ bool isread = (syndrome >> 0) & 1;
+ uint32_t rt = (syndrome >> 5) & 0x1f;
+ uint32_t reg = syndrome & SYSREG_MASK;
+ uint64_t val;
+ int sysreg_ret = 0;
+
+ if (isread) {
+ sysreg_ret = hvf_sysreg_read(cpu, reg, rt);
+ } else {
+ val = hvf_get_reg(cpu, rt);
+ sysreg_ret = hvf_sysreg_write(cpu, reg, val);
+ }
+
+ advance_pc = !sysreg_ret;
+ break;
+ }
+ case EC_WFX_TRAP:
+ advance_pc = true;
+ if (!(syndrome & WFX_IS_WFE)) {
+ hvf_wfi(cpu);
+ }
+ break;
+ case EC_AA64_HVC:
+ cpu_synchronize_state(cpu);
+ if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_HVC) {
+ if (!hvf_handle_psci_call(cpu)) {
+ trace_hvf_unknown_hvc(env->xregs[0]);
+ /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
+ env->xregs[0] = -1;
+ }
+ } else {
+ trace_hvf_unknown_hvc(env->xregs[0]);
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ }
+ break;
+ case EC_AA64_SMC:
+ cpu_synchronize_state(cpu);
+ if (arm_cpu->psci_conduit == QEMU_PSCI_CONDUIT_SMC) {
+ advance_pc = true;
+
+ if (!hvf_handle_psci_call(cpu)) {
+ trace_hvf_unknown_smc(env->xregs[0]);
+ /* SMCCC 1.3 section 5.2 says every unknown SMCCC call returns -1 */
+ env->xregs[0] = -1;
+ }
+ } else {
+ trace_hvf_unknown_smc(env->xregs[0]);
+ hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized());
+ }
+ break;
+ default:
+ cpu_synchronize_state(cpu);
+ trace_hvf_exit(syndrome, ec, env->pc);
+ error_report("0x%llx: unhandled exception ec=0x%x", env->pc, ec);
+ }
+
+ if (advance_pc) {
+ uint64_t pc;
+
+ flush_cpu_state(cpu);
+
+ r = hv_vcpu_get_reg(cpu->accel->fd, HV_REG_PC, &pc);
+ assert_hvf_ok(r);
+ pc += 4;
+ r = hv_vcpu_set_reg(cpu->accel->fd, HV_REG_PC, pc);
+ assert_hvf_ok(r);
+
+ /* Handle single-stepping over instructions which trigger a VM exit */
+ if (cpu->singlestep_enabled) {
+ ret = EXCP_DEBUG;
+ }
+ }
+
+ return ret;
+}
+
+static const VMStateDescription vmstate_hvf_vtimer = {
+ .name = "hvf-vtimer",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT64(vtimer_val, HVFVTimer),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void hvf_vm_state_change(void *opaque, bool running, RunState state)
+{
+ HVFVTimer *s = opaque;
+
+ if (running) {
+ /* Update vtimer offset on all CPUs */
+ hvf_state->vtimer_offset = mach_absolute_time() - s->vtimer_val;
+ cpu_synchronize_all_states();
+ } else {
+ /* Remember vtimer value on every pause */
+ s->vtimer_val = hvf_vtimer_val_raw();
+ }
+}
+
+int hvf_arch_init(void)
+{
+ hvf_state->vtimer_offset = mach_absolute_time();
+ vmstate_register(NULL, 0, &vmstate_hvf_vtimer, &vtimer);
+ qemu_add_vm_change_state_handler(hvf_vm_state_change, &vtimer);
+
+ hvf_arm_init_debug();
+
+ return 0;
+}
+
+static const uint32_t brk_insn = 0xd4200000;
+
+int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
+{
+ if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) ||
+ cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&brk_insn, 4, 1)) {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp)
+{
+ static uint32_t brk;
+
+ if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&brk, 4, 0) ||
+ brk != brk_insn ||
+ cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
+{
+ switch (type) {
+ case GDB_BREAKPOINT_HW:
+ return insert_hw_breakpoint(addr);
+ case GDB_WATCHPOINT_READ:
+ case GDB_WATCHPOINT_WRITE:
+ case GDB_WATCHPOINT_ACCESS:
+ return insert_hw_watchpoint(addr, len, type);
+ default:
+ return -ENOSYS;
+ }
+}
+
+int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
+{
+ switch (type) {
+ case GDB_BREAKPOINT_HW:
+ return delete_hw_breakpoint(addr);
+ case GDB_WATCHPOINT_READ:
+ case GDB_WATCHPOINT_WRITE:
+ case GDB_WATCHPOINT_ACCESS:
+ return delete_hw_watchpoint(addr, len, type);
+ default:
+ return -ENOSYS;
+ }
+}
+
+void hvf_arch_remove_all_hw_breakpoints(void)
+{
+ if (cur_hw_wps > 0) {
+ g_array_remove_range(hw_watchpoints, 0, cur_hw_wps);
+ }
+ if (cur_hw_bps > 0) {
+ g_array_remove_range(hw_breakpoints, 0, cur_hw_bps);
+ }
+}
+
+/*
+ * Update the vCPU with the gdbstub's view of debug registers. This view
+ * consists of all hardware breakpoints and watchpoints inserted so far while
+ * debugging the guest.
+ */
+static void hvf_put_gdbstub_debug_registers(CPUState *cpu)
+{
+ hv_return_t r = HV_SUCCESS;
+ int i;
+
+ for (i = 0; i < cur_hw_bps; i++) {
+ HWBreakpoint *bp = get_hw_bp(i);
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbcr_regs[i], bp->bcr);
+ assert_hvf_ok(r);
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbvr_regs[i], bp->bvr);
+ assert_hvf_ok(r);
+ }
+ for (i = cur_hw_bps; i < max_hw_bps; i++) {
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbcr_regs[i], 0);
+ assert_hvf_ok(r);
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbvr_regs[i], 0);
+ assert_hvf_ok(r);
+ }
+
+ for (i = 0; i < cur_hw_wps; i++) {
+ HWWatchpoint *wp = get_hw_wp(i);
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwcr_regs[i], wp->wcr);
+ assert_hvf_ok(r);
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwvr_regs[i], wp->wvr);
+ assert_hvf_ok(r);
+ }
+ for (i = cur_hw_wps; i < max_hw_wps; i++) {
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwcr_regs[i], 0);
+ assert_hvf_ok(r);
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwvr_regs[i], 0);
+ assert_hvf_ok(r);
+ }
+}
+
+/*
+ * Update the vCPU with the guest's view of debug registers. This view is kept
+ * in the environment at all times.
+ */
+static void hvf_put_guest_debug_registers(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+ hv_return_t r = HV_SUCCESS;
+ int i;
+
+ for (i = 0; i < max_hw_bps; i++) {
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbcr_regs[i],
+ env->cp15.dbgbcr[i]);
+ assert_hvf_ok(r);
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgbvr_regs[i],
+ env->cp15.dbgbvr[i]);
+ assert_hvf_ok(r);
+ }
+
+ for (i = 0; i < max_hw_wps; i++) {
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwcr_regs[i],
+ env->cp15.dbgwcr[i]);
+ assert_hvf_ok(r);
+ r = hv_vcpu_set_sys_reg(cpu->accel->fd, dbgwvr_regs[i],
+ env->cp15.dbgwvr[i]);
+ assert_hvf_ok(r);
+ }
+}
+
+static inline bool hvf_arm_hw_debug_active(CPUState *cpu)
+{
+ return ((cur_hw_wps > 0) || (cur_hw_bps > 0));
+}
+
+static void hvf_arch_set_traps(void)
+{
+ CPUState *cpu;
+ bool should_enable_traps = false;
+ hv_return_t r = HV_SUCCESS;
+
+ /* Check whether guest debugging is enabled for at least one vCPU; if it
+ * is, enable exiting the guest on all vCPUs */
+ CPU_FOREACH(cpu) {
+ should_enable_traps |= cpu->accel->guest_debug_enabled;
+ }
+ CPU_FOREACH(cpu) {
+ /* Set whether debug exceptions exit the guest */
+ r = hv_vcpu_set_trap_debug_exceptions(cpu->accel->fd,
+ should_enable_traps);
+ assert_hvf_ok(r);
+
+ /* Set whether accesses to debug registers exit the guest */
+ r = hv_vcpu_set_trap_debug_reg_accesses(cpu->accel->fd,
+ should_enable_traps);
+ assert_hvf_ok(r);
+ }
+}
+
+void hvf_arch_update_guest_debug(CPUState *cpu)
+{
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ CPUARMState *env = &arm_cpu->env;
+
+ /* Check whether guest debugging is enabled */
+ cpu->accel->guest_debug_enabled = cpu->singlestep_enabled ||
+ hvf_sw_breakpoints_active(cpu) ||
+ hvf_arm_hw_debug_active(cpu);
+
+ /* Update debug registers */
+ if (cpu->accel->guest_debug_enabled) {
+ hvf_put_gdbstub_debug_registers(cpu);
+ } else {
+ hvf_put_guest_debug_registers(cpu);
+ }
+
+ cpu_synchronize_state(cpu);
+
+ /* Enable/disable single-stepping */
+ if (cpu->singlestep_enabled) {
+ env->cp15.mdscr_el1 =
+ deposit64(env->cp15.mdscr_el1, MDSCR_EL1_SS_SHIFT, 1, 1);
+ pstate_write(env, pstate_read(env) | PSTATE_SS);
+ } else {
+ env->cp15.mdscr_el1 =
+ deposit64(env->cp15.mdscr_el1, MDSCR_EL1_SS_SHIFT, 1, 0);
+ }
+
+ /* Enable/disable Breakpoint exceptions */
+ if (hvf_arm_hw_debug_active(cpu)) {
+ env->cp15.mdscr_el1 =
+ deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 1);
+ } else {
+ env->cp15.mdscr_el1 =
+ deposit64(env->cp15.mdscr_el1, MDSCR_EL1_MDE_SHIFT, 1, 0);
+ }
+
+ hvf_arch_set_traps();
+}
+
+bool hvf_arch_supports_guest_debug(void)
+{
+ return true;
+}
diff --git a/target/arm/hvf/meson.build b/target/arm/hvf/meson.build
new file mode 100644
index 0000000000..afc509a470
--- /dev/null
+++ b/target/arm/hvf/meson.build
@@ -0,0 +1,3 @@
+arm_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
+ 'hvf.c',
+))
diff --git a/target/arm/hvf/trace-events b/target/arm/hvf/trace-events
new file mode 100644
index 0000000000..4fbbe4b45e
--- /dev/null
+++ b/target/arm/hvf/trace-events
@@ -0,0 +1,13 @@
+hvf_unhandled_sysreg_read(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg read at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)"
+hvf_unhandled_sysreg_write(uint64_t pc, uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2) "unhandled sysreg write at pc=0x%"PRIx64": 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d)"
+hvf_inject_fiq(void) "injecting FIQ"
+hvf_inject_irq(void) "injecting IRQ"
+hvf_data_abort(uint64_t pc, uint64_t va, uint64_t pa, bool isv, bool iswrite, bool s1ptw, uint32_t len, uint32_t srt) "data abort: [pc=0x%"PRIx64" va=0x%016"PRIx64" pa=0x%016"PRIx64" isv=%d iswrite=%d s1ptw=%d len=%d srt=%d]"
+hvf_sysreg_read(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg read 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d) = 0x%016"PRIx64
+hvf_sysreg_write(uint32_t reg, uint32_t op0, uint32_t op1, uint32_t crn, uint32_t crm, uint32_t op2, uint64_t val) "sysreg write 0x%08x (op0=%d op1=%d crn=%d crm=%d op2=%d, val=0x%016"PRIx64")"
+hvf_unknown_hvc(uint64_t x0) "unknown HVC! 0x%016"PRIx64
+hvf_unknown_smc(uint64_t x0) "unknown SMC! 0x%016"PRIx64
+hvf_exit(uint64_t syndrome, uint32_t ec, uint64_t pc) "exit: 0x%"PRIx64" [ec=0x%x pc=0x%"PRIx64"]"
+hvf_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64" x3=0x%016"PRIx64" cpu=0x%x"
+hvf_vgic_write(const char *name, uint64_t val) "vgic write to %s [val=0x%016"PRIx64"]"
+hvf_vgic_read(const char *name, uint64_t val) "vgic read from %s [val=0x%016"PRIx64"]"
diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h
new file mode 100644
index 0000000000..e848c1d27d
--- /dev/null
+++ b/target/arm/hvf_arm.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU Hypervisor.framework (HVF) support -- ARM specifics
+ *
+ * Copyright (c) 2021 Alexander Graf
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_HVF_ARM_H
+#define QEMU_HVF_ARM_H
+
+#include "cpu.h"
+
+/**
+ * hvf_arm_init_debug() - initialize guest debug capabilities
+ *
+ * Should be called only once before using guest debug capabilities.
+ */
+void hvf_arm_init_debug(void);
+
+void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu);
+
+#endif
diff --git a/target/arm/hyp_gdbstub.c b/target/arm/hyp_gdbstub.c
new file mode 100644
index 0000000000..ebde2899cd
--- /dev/null
+++ b/target/arm/hyp_gdbstub.c
@@ -0,0 +1,253 @@
+/*
+ * ARM implementation of KVM and HVF hooks, 64 bit specific code
+ *
+ * Copyright Mian-M. Hamayun 2013, Virtual Open Systems
+ * Copyright Alex Bennée 2014, Linaro
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "exec/gdbstub.h"
+
+/* Maximum and current break/watch point counts */
+int max_hw_bps, max_hw_wps;
+GArray *hw_breakpoints, *hw_watchpoints;
+
+/**
+ * insert_hw_breakpoint()
+ * @addr: address of breakpoint
+ *
+ * See ARM ARM D2.9.1 for details but here we are only going to create
+ * simple un-linked breakpoints (i.e. we don't chain breakpoints
+ * together to match address and context or vmid). The hardware is
+ * capable of fancier matching but that will require exposing that
+ * fanciness to GDB's interface
+ *
+ * DBGBCR<n>_EL1, Debug Breakpoint Control Registers
+ *
+ * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0
+ * +------+------+-------+-----+----+------+-----+------+-----+---+
+ * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E |
+ * +------+------+-------+-----+----+------+-----+------+-----+---+
+ *
+ * BT: Breakpoint type (0 = unlinked address match)
+ * LBN: Linked BP number (0 = unused)
+ * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12)
+ * BAS: Byte Address Select (RES1 for AArch64)
+ * E: Enable bit
+ *
+ * DBGBVR<n>_EL1, Debug Breakpoint Value Registers
+ *
+ * 63 53 52 49 48 2 1 0
+ * +------+-----------+----------+-----+
+ * | RESS | VA[52:49] | VA[48:2] | 0 0 |
+ * +------+-----------+----------+-----+
+ *
+ * Depending on the addressing mode bits the top bits of the register
+ * are a sign extension of the highest applicable VA bit. Some
+ * versions of GDB don't do it correctly so we ensure they are correct
+ * here so future PC comparisons will work properly.
+ */
+
+int insert_hw_breakpoint(target_ulong addr)
+{
+ HWBreakpoint brk = {
+ .bcr = 0x1, /* BCR E=1, enable */
+ .bvr = sextract64(addr, 0, 53)
+ };
+
+ if (cur_hw_bps >= max_hw_bps) {
+ return -ENOBUFS;
+ }
+
+ brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */
+ brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */
+
+ g_array_append_val(hw_breakpoints, brk);
+
+ return 0;
+}
+
+/**
+ * delete_hw_breakpoint()
+ * @pc: address of breakpoint
+ *
+ * Delete a breakpoint and shuffle any above down
+ */
+
+int delete_hw_breakpoint(target_ulong pc)
+{
+ int i;
+ for (i = 0; i < hw_breakpoints->len; i++) {
+ HWBreakpoint *brk = get_hw_bp(i);
+ if (brk->bvr == pc) {
+ g_array_remove_index(hw_breakpoints, i);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/**
+ * insert_hw_watchpoint()
+ * @addr: address of watch point
+ * @len: size of area
+ * @type: type of watch point
+ *
+ * See ARM ARM D2.10. As with the breakpoints we can do some advanced
+ * stuff if we want to. The watch points can be linked with the break
+ * points above to make them context aware. However for simplicity
+ * currently we only deal with simple read/write watch points.
+ *
+ * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers
+ *
+ * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0
+ * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+
+ * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E |
+ * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+
+ *
+ * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes))
+ * WT: 0 - unlinked, 1 - linked (not currently used)
+ * LBN: Linked BP number (not currently used)
+ * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11)
+ * BAS: Byte Address Select
+ * LSC: Load/Store control (01: load, 10: store, 11: both)
+ * E: Enable
+ *
+ * The bottom 2 bits of the value register are masked. Therefore to
+ * break on any sizes smaller than an unaligned word you need to set
+ * MASK=0, BAS=bit per byte in question. For larger regions (^2) you
+ * need to ensure you mask the address as required and set BAS=0xff
+ */
+
+int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type)
+{
+ HWWatchpoint wp = {
+ .wcr = R_DBGWCR_E_MASK, /* E=1, enable */
+ .wvr = addr & (~0x7ULL),
+ .details = { .vaddr = addr, .len = len }
+ };
+
+ if (cur_hw_wps >= max_hw_wps) {
+ return -ENOBUFS;
+ }
+
+ /*
+ * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state,
+ * valid whether EL3 is implemented or not
+ */
+ wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, PAC, 3);
+
+ switch (type) {
+ case GDB_WATCHPOINT_READ:
+ wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 1);
+ wp.details.flags = BP_MEM_READ;
+ break;
+ case GDB_WATCHPOINT_WRITE:
+ wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 2);
+ wp.details.flags = BP_MEM_WRITE;
+ break;
+ case GDB_WATCHPOINT_ACCESS:
+ wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, LSC, 3);
+ wp.details.flags = BP_MEM_ACCESS;
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+ if (len <= 8) {
+ /* we align the address and set the bits in BAS */
+ int off = addr & 0x7;
+ int bas = (1 << len) - 1;
+
+ wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas);
+ } else {
+ /* For ranges above 8 bytes we need to be a power of 2 */
+ if (is_power_of_2(len)) {
+ int bits = ctz64(len);
+
+ wp.wvr &= ~((1 << bits) - 1);
+ wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, MASK, bits);
+ wp.wcr = FIELD_DP64(wp.wcr, DBGWCR, BAS, 0xff);
+ } else {
+ return -ENOBUFS;
+ }
+ }
+
+ g_array_append_val(hw_watchpoints, wp);
+ return 0;
+}
+
+bool check_watchpoint_in_range(int i, target_ulong addr)
+{
+ HWWatchpoint *wp = get_hw_wp(i);
+ uint64_t addr_top, addr_bottom = wp->wvr;
+ int bas = extract32(wp->wcr, 5, 8);
+ int mask = extract32(wp->wcr, 24, 4);
+
+ if (mask) {
+ addr_top = addr_bottom + (1 << mask);
+ } else {
+ /*
+ * BAS must be contiguous but can offset against the base
+ * address in DBGWVR
+ */
+ addr_bottom = addr_bottom + ctz32(bas);
+ addr_top = addr_bottom + clo32(bas);
+ }
+
+ if (addr >= addr_bottom && addr <= addr_top) {
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * delete_hw_watchpoint()
+ * @addr: address of breakpoint
+ *
+ * Delete a breakpoint and shuffle any above down
+ */
+
+int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type)
+{
+ int i;
+ for (i = 0; i < cur_hw_wps; i++) {
+ if (check_watchpoint_in_range(i, addr)) {
+ g_array_remove_index(hw_watchpoints, i);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+bool find_hw_breakpoint(CPUState *cpu, target_ulong pc)
+{
+ int i;
+
+ for (i = 0; i < cur_hw_bps; i++) {
+ HWBreakpoint *bp = get_hw_bp(i);
+ if (bp->bvr == pc) {
+ return true;
+ }
+ }
+ return false;
+}
+
+CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr)
+{
+ int i;
+
+ for (i = 0; i < cur_hw_wps; i++) {
+ if (check_watchpoint_in_range(i, addr)) {
+ return &get_hw_wp(i)->details;
+ }
+ }
+ return NULL;
+}
diff --git a/target/arm/idau.h b/target/arm/idau.h
index cac27b95fa..0ef5251971 100644
--- a/target/arm/idau.h
+++ b/target/arm/idau.h
@@ -33,18 +33,15 @@
#define TYPE_IDAU_INTERFACE "idau-interface"
#define IDAU_INTERFACE(obj) \
INTERFACE_CHECK(IDAUInterface, (obj), TYPE_IDAU_INTERFACE)
-#define IDAU_INTERFACE_CLASS(class) \
- OBJECT_CLASS_CHECK(IDAUInterfaceClass, (class), TYPE_IDAU_INTERFACE)
-#define IDAU_INTERFACE_GET_CLASS(obj) \
- OBJECT_GET_CLASS(IDAUInterfaceClass, (obj), TYPE_IDAU_INTERFACE)
+typedef struct IDAUInterfaceClass IDAUInterfaceClass;
+DECLARE_CLASS_CHECKERS(IDAUInterfaceClass, IDAU_INTERFACE,
+ TYPE_IDAU_INTERFACE)
-typedef struct IDAUInterface {
- Object parent;
-} IDAUInterface;
+typedef struct IDAUInterface IDAUInterface;
#define IREGION_NOTVALID -1
-typedef struct IDAUInterfaceClass {
+struct IDAUInterfaceClass {
InterfaceClass parent;
/* Check the specified address and return the IDAU security information
@@ -56,6 +53,6 @@ typedef struct IDAUInterfaceClass {
*/
void (*check)(IDAUInterface *ii, uint32_t address, int *iregion,
bool *exempt, bool *ns, bool *nsc);
-} IDAUInterfaceClass;
+};
#endif
diff --git a/target/arm/internals.h b/target/arm/internals.h
index a4fc709bcc..dd3da211a3 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -26,6 +26,9 @@
#define TARGET_ARM_INTERNALS_H
#include "hw/registerfields.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "syndrome.h"
+#include "cpu-features.h"
/* register banks for CPU modes */
#define BANK_USRSYS 0
@@ -37,6 +40,11 @@
#define BANK_HYP 6
#define BANK_MON 7
+static inline int arm_env_mmu_index(CPUARMState *env)
+{
+ return EX_TBFLAG_ANY(env->hflags, MMUIDX);
+}
+
static inline bool excp_is_internal(int excp)
{
/* Return true if this exception number represents a QEMU-internal
@@ -79,6 +87,169 @@ FIELD(V7M_EXCRET, RES1, 7, 25) /* including the must-be-1 prefix */
*/
#define FNC_RETURN_MIN_MAGIC 0xfefffffe
+/* Bit definitions for DBGWCRn and DBGWCRn_EL1 */
+FIELD(DBGWCR, E, 0, 1)
+FIELD(DBGWCR, PAC, 1, 2)
+FIELD(DBGWCR, LSC, 3, 2)
+FIELD(DBGWCR, BAS, 5, 8)
+FIELD(DBGWCR, HMC, 13, 1)
+FIELD(DBGWCR, SSC, 14, 2)
+FIELD(DBGWCR, LBN, 16, 4)
+FIELD(DBGWCR, WT, 20, 1)
+FIELD(DBGWCR, MASK, 24, 5)
+FIELD(DBGWCR, SSCE, 29, 1)
+
+#define VTCR_NSW (1u << 29)
+#define VTCR_NSA (1u << 30)
+#define VSTCR_SW VTCR_NSW
+#define VSTCR_SA VTCR_NSA
+
+/* Bit definitions for CPACR (AArch32 only) */
+FIELD(CPACR, CP10, 20, 2)
+FIELD(CPACR, CP11, 22, 2)
+FIELD(CPACR, TRCDIS, 28, 1) /* matches CPACR_EL1.TTA */
+FIELD(CPACR, D32DIS, 30, 1) /* up to v7; RAZ in v8 */
+FIELD(CPACR, ASEDIS, 31, 1)
+
+/* Bit definitions for CPACR_EL1 (AArch64 only) */
+FIELD(CPACR_EL1, ZEN, 16, 2)
+FIELD(CPACR_EL1, FPEN, 20, 2)
+FIELD(CPACR_EL1, SMEN, 24, 2)
+FIELD(CPACR_EL1, TTA, 28, 1) /* matches CPACR.TRCDIS */
+
+/* Bit definitions for HCPTR (AArch32 only) */
+FIELD(HCPTR, TCP10, 10, 1)
+FIELD(HCPTR, TCP11, 11, 1)
+FIELD(HCPTR, TASE, 15, 1)
+FIELD(HCPTR, TTA, 20, 1)
+FIELD(HCPTR, TAM, 30, 1) /* matches CPTR_EL2.TAM */
+FIELD(HCPTR, TCPAC, 31, 1) /* matches CPTR_EL2.TCPAC */
+
+/* Bit definitions for CPTR_EL2 (AArch64 only) */
+FIELD(CPTR_EL2, TZ, 8, 1) /* !E2H */
+FIELD(CPTR_EL2, TFP, 10, 1) /* !E2H, matches HCPTR.TCP10 */
+FIELD(CPTR_EL2, TSM, 12, 1) /* !E2H */
+FIELD(CPTR_EL2, ZEN, 16, 2) /* E2H */
+FIELD(CPTR_EL2, FPEN, 20, 2) /* E2H */
+FIELD(CPTR_EL2, SMEN, 24, 2) /* E2H */
+FIELD(CPTR_EL2, TTA, 28, 1)
+FIELD(CPTR_EL2, TAM, 30, 1) /* matches HCPTR.TAM */
+FIELD(CPTR_EL2, TCPAC, 31, 1) /* matches HCPTR.TCPAC */
+
+/* Bit definitions for CPTR_EL3 (AArch64 only) */
+FIELD(CPTR_EL3, EZ, 8, 1)
+FIELD(CPTR_EL3, TFP, 10, 1)
+FIELD(CPTR_EL3, ESM, 12, 1)
+FIELD(CPTR_EL3, TTA, 20, 1)
+FIELD(CPTR_EL3, TAM, 30, 1)
+FIELD(CPTR_EL3, TCPAC, 31, 1)
+
+#define MDCR_MTPME (1U << 28)
+#define MDCR_TDCC (1U << 27)
+#define MDCR_HLP (1U << 26) /* MDCR_EL2 */
+#define MDCR_SCCD (1U << 23) /* MDCR_EL3 */
+#define MDCR_HCCD (1U << 23) /* MDCR_EL2 */
+#define MDCR_EPMAD (1U << 21)
+#define MDCR_EDAD (1U << 20)
+#define MDCR_TTRF (1U << 19)
+#define MDCR_STE (1U << 18) /* MDCR_EL3 */
+#define MDCR_SPME (1U << 17) /* MDCR_EL3 */
+#define MDCR_HPMD (1U << 17) /* MDCR_EL2 */
+#define MDCR_SDD (1U << 16)
+#define MDCR_SPD (3U << 14)
+#define MDCR_TDRA (1U << 11)
+#define MDCR_TDOSA (1U << 10)
+#define MDCR_TDA (1U << 9)
+#define MDCR_TDE (1U << 8)
+#define MDCR_HPME (1U << 7)
+#define MDCR_TPM (1U << 6)
+#define MDCR_TPMCR (1U << 5)
+#define MDCR_HPMN (0x1fU)
+
+/* Not all of the MDCR_EL3 bits are present in the 32-bit SDCR */
+#define SDCR_VALID_MASK (MDCR_MTPME | MDCR_TDCC | MDCR_SCCD | \
+ MDCR_EPMAD | MDCR_EDAD | MDCR_TTRF | \
+ MDCR_STE | MDCR_SPME | MDCR_SPD)
+
+#define TTBCR_N (7U << 0) /* TTBCR.EAE==0 */
+#define TTBCR_T0SZ (7U << 0) /* TTBCR.EAE==1 */
+#define TTBCR_PD0 (1U << 4)
+#define TTBCR_PD1 (1U << 5)
+#define TTBCR_EPD0 (1U << 7)
+#define TTBCR_IRGN0 (3U << 8)
+#define TTBCR_ORGN0 (3U << 10)
+#define TTBCR_SH0 (3U << 12)
+#define TTBCR_T1SZ (3U << 16)
+#define TTBCR_A1 (1U << 22)
+#define TTBCR_EPD1 (1U << 23)
+#define TTBCR_IRGN1 (3U << 24)
+#define TTBCR_ORGN1 (3U << 26)
+#define TTBCR_SH1 (1U << 28)
+#define TTBCR_EAE (1U << 31)
+
+FIELD(VTCR, T0SZ, 0, 6)
+FIELD(VTCR, SL0, 6, 2)
+FIELD(VTCR, IRGN0, 8, 2)
+FIELD(VTCR, ORGN0, 10, 2)
+FIELD(VTCR, SH0, 12, 2)
+FIELD(VTCR, TG0, 14, 2)
+FIELD(VTCR, PS, 16, 3)
+FIELD(VTCR, VS, 19, 1)
+FIELD(VTCR, HA, 21, 1)
+FIELD(VTCR, HD, 22, 1)
+FIELD(VTCR, HWU59, 25, 1)
+FIELD(VTCR, HWU60, 26, 1)
+FIELD(VTCR, HWU61, 27, 1)
+FIELD(VTCR, HWU62, 28, 1)
+FIELD(VTCR, NSW, 29, 1)
+FIELD(VTCR, NSA, 30, 1)
+FIELD(VTCR, DS, 32, 1)
+FIELD(VTCR, SL2, 33, 1)
+
+#define HCRX_ENAS0 (1ULL << 0)
+#define HCRX_ENALS (1ULL << 1)
+#define HCRX_ENASR (1ULL << 2)
+#define HCRX_FNXS (1ULL << 3)
+#define HCRX_FGTNXS (1ULL << 4)
+#define HCRX_SMPME (1ULL << 5)
+#define HCRX_TALLINT (1ULL << 6)
+#define HCRX_VINMI (1ULL << 7)
+#define HCRX_VFNMI (1ULL << 8)
+#define HCRX_CMOW (1ULL << 9)
+#define HCRX_MCE2 (1ULL << 10)
+#define HCRX_MSCEN (1ULL << 11)
+
+#define HPFAR_NS (1ULL << 63)
+
+#define HSTR_TTEE (1 << 16)
+#define HSTR_TJDBX (1 << 17)
+
+/*
+ * Depending on the value of HCR_EL2.E2H, bits 0 and 1
+ * have different bit definitions, and EL1PCTEN might be
+ * bit 0 or bit 10. We use _E2H1 and _E2H0 suffixes to
+ * disambiguate if necessary.
+ */
+FIELD(CNTHCTL, EL0PCTEN_E2H1, 0, 1)
+FIELD(CNTHCTL, EL0VCTEN_E2H1, 1, 1)
+FIELD(CNTHCTL, EL1PCTEN_E2H0, 0, 1)
+FIELD(CNTHCTL, EL1PCEN_E2H0, 1, 1)
+FIELD(CNTHCTL, EVNTEN, 2, 1)
+FIELD(CNTHCTL, EVNTDIR, 3, 1)
+FIELD(CNTHCTL, EVNTI, 4, 4)
+FIELD(CNTHCTL, EL0VTEN, 8, 1)
+FIELD(CNTHCTL, EL0PTEN, 9, 1)
+FIELD(CNTHCTL, EL1PCTEN_E2H1, 10, 1)
+FIELD(CNTHCTL, EL1PTEN, 11, 1)
+FIELD(CNTHCTL, ECV, 12, 1)
+FIELD(CNTHCTL, EL1TVT, 13, 1)
+FIELD(CNTHCTL, EL1TVCT, 14, 1)
+FIELD(CNTHCTL, EL1NVPCT, 15, 1)
+FIELD(CNTHCTL, EL1NVVCT, 16, 1)
+FIELD(CNTHCTL, EVNTIS, 17, 1)
+FIELD(CNTHCTL, CNTVMASK, 18, 1)
+FIELD(CNTHCTL, CNTPMASK, 19, 1)
+
/* We use a few fake FSR values for internal purposes in M profile.
* M profile cores don't have A/R format FSRs, but currently our
* get_phys_addr() code assumes A/R profile and reports failures via
@@ -100,8 +271,15 @@ FIELD(V7M_EXCRET, RES1, 7, 25) /* including the must-be-1 prefix */
* and target exception level. This should be called from helper functions,
* and never returns because we will longjump back up to the CPU main loop.
*/
-void QEMU_NORETURN raise_exception(CPUARMState *env, uint32_t excp,
- uint32_t syndrome, uint32_t target_el);
+G_NORETURN void raise_exception(CPUARMState *env, uint32_t excp,
+ uint32_t syndrome, uint32_t target_el);
+
+/*
+ * Similarly, but also use unwinding to restore cpu state.
+ */
+G_NORETURN void raise_exception_ra(CPUARMState *env, uint32_t excp,
+ uint32_t syndrome, uint32_t target_el,
+ uintptr_t ra);
/*
* For AArch64, map a given EL to an index in the banked_spsr array.
@@ -145,20 +323,55 @@ static inline int bank_number(int mode)
g_assert_not_reached();
}
-void switch_mode(CPUARMState *, int);
+/**
+ * r14_bank_number: Map CPU mode onto register bank for r14
+ *
+ * Given an AArch32 CPU mode, return the index into the saved register
+ * banks to use for the R14 (LR) in that mode. This is the same as
+ * bank_number(), except for the special case of Hyp mode, where
+ * R14 is shared with USR and SYS, unlike its R13 and SPSR.
+ * This should be used as the index into env->banked_r14[], and
+ * bank_number() used for the index into env->banked_r13[] and
+ * env->banked_spsr[].
+ */
+static inline int r14_bank_number(int mode)
+{
+ return (mode == ARM_CPU_MODE_HYP) ? BANK_USRSYS : bank_number(mode);
+}
+
+void arm_cpu_register(const ARMCPUInfo *info);
+void aarch64_cpu_register(const ARMCPUInfo *info);
+
+void register_cp_regs_for_features(ARMCPU *cpu);
+void init_cpreg_list(ARMCPU *cpu);
+
void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
void arm_translate_init(void);
-enum arm_fprounding {
+void arm_restore_state_to_opc(CPUState *cs,
+ const TranslationBlock *tb,
+ const uint64_t *data);
+
+#ifdef CONFIG_TCG
+void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
+#endif /* CONFIG_TCG */
+
+typedef enum ARMFPRounding {
FPROUNDING_TIEEVEN,
FPROUNDING_POSINF,
FPROUNDING_NEGINF,
FPROUNDING_ZERO,
FPROUNDING_TIEAWAY,
FPROUNDING_ODD
-};
+} ARMFPRounding;
-int arm_rmode_to_sf(int rmode);
+extern const FloatRoundMode arm_rmode_to_sf_map[6];
+
+static inline FloatRoundMode arm_rmode_to_sf(ARMFPRounding rmode)
+{
+ assert((unsigned)rmode < ARRAY_SIZE(arm_rmode_to_sf_map));
+ return arm_rmode_to_sf_map[rmode];
+}
static inline void aarch64_save_sp(CPUARMState *env, int el)
{
@@ -204,23 +417,7 @@ static inline void update_spsel(CPUARMState *env, uint32_t imm)
* Returns the implementation defined bit-width of physical addresses.
* The ARMv8 reference manuals refer to this as PAMax().
*/
-static inline unsigned int arm_pamax(ARMCPU *cpu)
-{
- static const unsigned int pamax_map[] = {
- [0] = 32,
- [1] = 36,
- [2] = 40,
- [3] = 42,
- [4] = 44,
- [5] = 48,
- };
- unsigned int parange = extract32(cpu->id_aa64mmfr0, 0, 4);
-
- /* id_aa64mmfr0 is a read-only register so values outside of the
- * supported mappings can be considered an implementation error. */
- assert(parange < ARRAY_SIZE(pamax_map));
- return pamax_map[parange];
-}
+unsigned int arm_pamax(ARMCPU *cpu);
/* Return true if extended addresses are enabled.
* This is always the case if our translation regime is 64 bit,
@@ -228,226 +425,13 @@ static inline unsigned int arm_pamax(ARMCPU *cpu)
*/
static inline bool extended_addresses_enabled(CPUARMState *env)
{
- TCR *tcr = &env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1];
+ uint64_t tcr = env->cp15.tcr_el[arm_is_secure(env) ? 3 : 1];
+ if (arm_feature(env, ARM_FEATURE_PMSA) &&
+ arm_feature(env, ARM_FEATURE_V8)) {
+ return true;
+ }
return arm_el_is_aa64(env, 1) ||
- (arm_feature(env, ARM_FEATURE_LPAE) && (tcr->raw_tcr & TTBCR_EAE));
-}
-
-/* Valid Syndrome Register EC field values */
-enum arm_exception_class {
- EC_UNCATEGORIZED = 0x00,
- EC_WFX_TRAP = 0x01,
- EC_CP15RTTRAP = 0x03,
- EC_CP15RRTTRAP = 0x04,
- EC_CP14RTTRAP = 0x05,
- EC_CP14DTTRAP = 0x06,
- EC_ADVSIMDFPACCESSTRAP = 0x07,
- EC_FPIDTRAP = 0x08,
- EC_CP14RRTTRAP = 0x0c,
- EC_ILLEGALSTATE = 0x0e,
- EC_AA32_SVC = 0x11,
- EC_AA32_HVC = 0x12,
- EC_AA32_SMC = 0x13,
- EC_AA64_SVC = 0x15,
- EC_AA64_HVC = 0x16,
- EC_AA64_SMC = 0x17,
- EC_SYSTEMREGISTERTRAP = 0x18,
- EC_SVEACCESSTRAP = 0x19,
- EC_INSNABORT = 0x20,
- EC_INSNABORT_SAME_EL = 0x21,
- EC_PCALIGNMENT = 0x22,
- EC_DATAABORT = 0x24,
- EC_DATAABORT_SAME_EL = 0x25,
- EC_SPALIGNMENT = 0x26,
- EC_AA32_FPTRAP = 0x28,
- EC_AA64_FPTRAP = 0x2c,
- EC_SERROR = 0x2f,
- EC_BREAKPOINT = 0x30,
- EC_BREAKPOINT_SAME_EL = 0x31,
- EC_SOFTWARESTEP = 0x32,
- EC_SOFTWARESTEP_SAME_EL = 0x33,
- EC_WATCHPOINT = 0x34,
- EC_WATCHPOINT_SAME_EL = 0x35,
- EC_AA32_BKPT = 0x38,
- EC_VECTORCATCH = 0x3a,
- EC_AA64_BKPT = 0x3c,
-};
-
-#define ARM_EL_EC_SHIFT 26
-#define ARM_EL_IL_SHIFT 25
-#define ARM_EL_ISV_SHIFT 24
-#define ARM_EL_IL (1 << ARM_EL_IL_SHIFT)
-#define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT)
-
-/* Utility functions for constructing various kinds of syndrome value.
- * Note that in general we follow the AArch64 syndrome values; in a
- * few cases the value in HSR for exceptions taken to AArch32 Hyp
- * mode differs slightly, so if we ever implemented Hyp mode then the
- * syndrome value would need some massaging on exception entry.
- * (One example of this is that AArch64 defaults to IL bit set for
- * exceptions which don't specifically indicate information about the
- * trapping instruction, whereas AArch32 defaults to IL bit clear.)
- */
-static inline uint32_t syn_uncategorized(void)
-{
- return (EC_UNCATEGORIZED << ARM_EL_EC_SHIFT) | ARM_EL_IL;
-}
-
-static inline uint32_t syn_aa64_svc(uint32_t imm16)
-{
- return (EC_AA64_SVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
-}
-
-static inline uint32_t syn_aa64_hvc(uint32_t imm16)
-{
- return (EC_AA64_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
-}
-
-static inline uint32_t syn_aa64_smc(uint32_t imm16)
-{
- return (EC_AA64_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
-}
-
-static inline uint32_t syn_aa32_svc(uint32_t imm16, bool is_16bit)
-{
- return (EC_AA32_SVC << ARM_EL_EC_SHIFT) | (imm16 & 0xffff)
- | (is_16bit ? 0 : ARM_EL_IL);
-}
-
-static inline uint32_t syn_aa32_hvc(uint32_t imm16)
-{
- return (EC_AA32_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
-}
-
-static inline uint32_t syn_aa32_smc(void)
-{
- return (EC_AA32_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL;
-}
-
-static inline uint32_t syn_aa64_bkpt(uint32_t imm16)
-{
- return (EC_AA64_BKPT << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
-}
-
-static inline uint32_t syn_aa32_bkpt(uint32_t imm16, bool is_16bit)
-{
- return (EC_AA32_BKPT << ARM_EL_EC_SHIFT) | (imm16 & 0xffff)
- | (is_16bit ? 0 : ARM_EL_IL);
-}
-
-static inline uint32_t syn_aa64_sysregtrap(int op0, int op1, int op2,
- int crn, int crm, int rt,
- int isread)
-{
- return (EC_SYSTEMREGISTERTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL
- | (op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (rt << 5)
- | (crm << 1) | isread;
-}
-
-static inline uint32_t syn_cp14_rt_trap(int cv, int cond, int opc1, int opc2,
- int crn, int crm, int rt, int isread,
- bool is_16bit)
-{
- return (EC_CP14RTTRAP << ARM_EL_EC_SHIFT)
- | (is_16bit ? 0 : ARM_EL_IL)
- | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14)
- | (crn << 10) | (rt << 5) | (crm << 1) | isread;
-}
-
-static inline uint32_t syn_cp15_rt_trap(int cv, int cond, int opc1, int opc2,
- int crn, int crm, int rt, int isread,
- bool is_16bit)
-{
- return (EC_CP15RTTRAP << ARM_EL_EC_SHIFT)
- | (is_16bit ? 0 : ARM_EL_IL)
- | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14)
- | (crn << 10) | (rt << 5) | (crm << 1) | isread;
-}
-
-static inline uint32_t syn_cp14_rrt_trap(int cv, int cond, int opc1, int crm,
- int rt, int rt2, int isread,
- bool is_16bit)
-{
- return (EC_CP14RRTTRAP << ARM_EL_EC_SHIFT)
- | (is_16bit ? 0 : ARM_EL_IL)
- | (cv << 24) | (cond << 20) | (opc1 << 16)
- | (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
-}
-
-static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm,
- int rt, int rt2, int isread,
- bool is_16bit)
-{
- return (EC_CP15RRTTRAP << ARM_EL_EC_SHIFT)
- | (is_16bit ? 0 : ARM_EL_IL)
- | (cv << 24) | (cond << 20) | (opc1 << 16)
- | (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
-}
-
-static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit)
-{
- return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT)
- | (is_16bit ? 0 : ARM_EL_IL)
- | (cv << 24) | (cond << 20);
-}
-
-static inline uint32_t syn_sve_access_trap(void)
-{
- return EC_SVEACCESSTRAP << ARM_EL_EC_SHIFT;
-}
-
-static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
-{
- return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
- | ARM_EL_IL | (ea << 9) | (s1ptw << 7) | fsc;
-}
-
-static inline uint32_t syn_data_abort_no_iss(int same_el,
- int ea, int cm, int s1ptw,
- int wnr, int fsc)
-{
- return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
- | ARM_EL_IL
- | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
-}
-
-static inline uint32_t syn_data_abort_with_iss(int same_el,
- int sas, int sse, int srt,
- int sf, int ar,
- int ea, int cm, int s1ptw,
- int wnr, int fsc,
- bool is_16bit)
-{
- return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
- | (is_16bit ? 0 : ARM_EL_IL)
- | ARM_EL_ISV | (sas << 22) | (sse << 21) | (srt << 16)
- | (sf << 15) | (ar << 14)
- | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
-}
-
-static inline uint32_t syn_swstep(int same_el, int isv, int ex)
-{
- return (EC_SOFTWARESTEP << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
- | ARM_EL_IL | (isv << 24) | (ex << 6) | 0x22;
-}
-
-static inline uint32_t syn_watchpoint(int same_el, int cm, int wnr)
-{
- return (EC_WATCHPOINT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
- | ARM_EL_IL | (cm << 8) | (wnr << 6) | 0x22;
-}
-
-static inline uint32_t syn_breakpoint(int same_el)
-{
- return (EC_BREAKPOINT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
- | ARM_EL_IL | 0x22;
-}
-
-static inline uint32_t syn_wfx(int cv, int cond, int ti, bool is_16bit)
-{
- return (EC_WFX_TRAP << ARM_EL_EC_SHIFT) |
- (is_16bit ? 0 : (1 << ARM_EL_IL_SHIFT)) |
- (cv << 24) | (cond << 20) | ti;
+ (arm_feature(env, ARM_FEATURE_LPAE) && (tcr & TTBCR_EAE));
}
/* Update a QEMU watchpoint based on the information the guest has set in the
@@ -469,6 +453,9 @@ void hw_breakpoint_update(ARMCPU *cpu, int n);
*/
void hw_breakpoint_update_all(ARMCPU *cpu);
+/* Callback function for checking if a breakpoint should trigger. */
+bool arm_debug_check_breakpoint(CPUState *cs);
+
/* Callback function for checking if a watchpoint should trigger. */
bool arm_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
@@ -480,11 +467,15 @@ vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len);
/* Callback function for when a watchpoint or breakpoint triggers. */
void arm_debug_excp_handler(CPUState *cs);
-#ifdef CONFIG_USER_ONLY
+#if defined(CONFIG_USER_ONLY) || !defined(CONFIG_TCG)
static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
{
return false;
}
+static inline void arm_handle_psci_call(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
#else
/* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */
bool arm_is_psci_call(ARMCPU *cpu, int excp_type);
@@ -524,31 +515,50 @@ typedef enum ARMFaultType {
ARMFault_AsyncExternal,
ARMFault_Debug,
ARMFault_TLBConflict,
+ ARMFault_UnsuppAtomicUpdate,
ARMFault_Lockdown,
ARMFault_Exclusive,
ARMFault_ICacheMaint,
ARMFault_QEMU_NSCExec, /* v8M: NS executing in S&NSC memory */
ARMFault_QEMU_SFault, /* v8M: SecureFault INVTRAN, INVEP or AUVIOL */
+ ARMFault_GPCFOnWalk,
+ ARMFault_GPCFOnOutput,
} ARMFaultType;
+typedef enum ARMGPCF {
+ GPCF_None,
+ GPCF_AddressSize,
+ GPCF_Walk,
+ GPCF_EABT,
+ GPCF_Fail,
+} ARMGPCF;
+
/**
* ARMMMUFaultInfo: Information describing an ARM MMU Fault
* @type: Type of fault
+ * @gpcf: Subtype of ARMFault_GPCFOn{Walk,Output}.
* @level: Table walk level (for translation, access flag and permission faults)
* @domain: Domain of the fault address (for non-LPAE CPUs only)
* @s2addr: Address that caused a fault at stage 2
+ * @paddr: physical address that caused a fault for gpc
+ * @paddr_space: physical address space that caused a fault for gpc
* @stage2: True if we faulted at stage 2
* @s1ptw: True if we faulted at stage 2 while doing a stage 1 page-table walk
+ * @s1ns: True if we faulted on a non-secure IPA while in secure state
* @ea: True if we should set the EA (external abort type) bit in syndrome
*/
typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
struct ARMMMUFaultInfo {
ARMFaultType type;
+ ARMGPCF gpcf;
target_ulong s2addr;
+ target_ulong paddr;
+ ARMSecuritySpace paddr_space;
int level;
int domain;
bool stage2;
bool s1ptw;
+ bool s1ns;
bool ea;
};
@@ -647,28 +657,51 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi)
case ARMFault_None:
return 0;
case ARMFault_AddressSize:
- fsc = fi->level & 3;
+ assert(fi->level >= -1 && fi->level <= 3);
+ if (fi->level < 0) {
+ fsc = 0b101001;
+ } else {
+ fsc = fi->level;
+ }
break;
case ARMFault_AccessFlag:
- fsc = (fi->level & 3) | (0x2 << 2);
+ assert(fi->level >= 0 && fi->level <= 3);
+ fsc = 0b001000 | fi->level;
break;
case ARMFault_Permission:
- fsc = (fi->level & 3) | (0x3 << 2);
+ assert(fi->level >= 0 && fi->level <= 3);
+ fsc = 0b001100 | fi->level;
break;
case ARMFault_Translation:
- fsc = (fi->level & 3) | (0x1 << 2);
+ assert(fi->level >= -1 && fi->level <= 3);
+ if (fi->level < 0) {
+ fsc = 0b101011;
+ } else {
+ fsc = 0b000100 | fi->level;
+ }
break;
case ARMFault_SyncExternal:
fsc = 0x10 | (fi->ea << 12);
break;
case ARMFault_SyncExternalOnWalk:
- fsc = (fi->level & 3) | (0x5 << 2) | (fi->ea << 12);
+ assert(fi->level >= -1 && fi->level <= 3);
+ if (fi->level < 0) {
+ fsc = 0b010011;
+ } else {
+ fsc = 0b010100 | fi->level;
+ }
+ fsc |= fi->ea << 12;
break;
case ARMFault_SyncParity:
fsc = 0x18;
break;
case ARMFault_SyncParityOnWalk:
- fsc = (fi->level & 3) | (0x7 << 2);
+ assert(fi->level >= -1 && fi->level <= 3);
+ if (fi->level < 0) {
+ fsc = 0b011011;
+ } else {
+ fsc = 0b011100 | fi->level;
+ }
break;
case ARMFault_AsyncParity:
fsc = 0x19;
@@ -685,12 +718,26 @@ static inline uint32_t arm_fi_to_lfsc(ARMMMUFaultInfo *fi)
case ARMFault_TLBConflict:
fsc = 0x30;
break;
+ case ARMFault_UnsuppAtomicUpdate:
+ fsc = 0x31;
+ break;
case ARMFault_Lockdown:
fsc = 0x34;
break;
case ARMFault_Exclusive:
fsc = 0x35;
break;
+ case ARMFault_GPCFOnWalk:
+ assert(fi->level >= -1 && fi->level <= 3);
+ if (fi->level < 0) {
+ fsc = 0b100011;
+ } else {
+ fsc = 0b100100 | fi->level;
+ }
+ break;
+ case ARMFault_GPCFOnOutput:
+ fsc = 0b101000;
+ break;
default:
/* Other faults can't occur in a context that requires a
* long-format status code.
@@ -712,20 +759,55 @@ static inline bool arm_extabort_type(MemTxResult result)
return result != MEMTX_DECODE_ERROR;
}
-/* Do a page table walk and add page to TLB if possible */
-bool arm_tlb_fill(CPUState *cpu, vaddr address,
- MMUAccessType access_type, int mmu_idx,
- ARMMMUFaultInfo *fi);
+#ifdef CONFIG_USER_ONLY
+void arm_cpu_record_sigsegv(CPUState *cpu, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra);
+void arm_cpu_record_sigbus(CPUState *cpu, vaddr addr,
+ MMUAccessType access_type, uintptr_t ra);
+#else
+bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr);
+#endif
-/* Return true if the stage 1 translation regime is using LPAE format page
- * tables */
+static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx)
+{
+ return mmu_idx & ARM_MMU_IDX_COREIDX_MASK;
+}
+
+static inline ARMMMUIdx core_to_arm_mmu_idx(CPUARMState *env, int mmu_idx)
+{
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return mmu_idx | ARM_MMU_IDX_M;
+ } else {
+ return mmu_idx | ARM_MMU_IDX_A;
+ }
+}
+
+static inline ARMMMUIdx core_to_aa64_mmu_idx(int mmu_idx)
+{
+ /* AArch64 is always a-profile. */
+ return mmu_idx | ARM_MMU_IDX_A;
+}
+
+int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx);
+
+/* Return the MMU index for a v7M CPU in the specified security state */
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate);
+
+/*
+ * Return true if the stage 1 translation regime is using LPAE
+ * format page tables
+ */
bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx);
/* Raise a data fault alignment exception for the specified virtual address */
-void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
- MMUAccessType access_type,
- int mmu_idx, uintptr_t retaddr);
+G_NORETURN void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
+ MMUAccessType access_type,
+ int mmu_idx, uintptr_t retaddr);
+#ifndef CONFIG_USER_ONLY
/* arm_cpu_do_transaction_failed: handle a memory system error response
* (eg "no device/memory present at address") by raising an external abort
* exception
@@ -735,6 +817,7 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
MMUAccessType access_type,
int mmu_idx, MemTxAttrs attrs,
MemTxResult response, uintptr_t retaddr);
+#endif
/* Call any registered EL change hooks */
static inline void arm_call_pre_el_change_hook(ARMCPU *cpu)
@@ -752,63 +835,192 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu)
}
}
-/* Return true if this address translation regime is secure */
-static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx)
+/* Return true if this address translation regime has two ranges. */
+static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx)
+{
+ switch (mmu_idx) {
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ switch (mmu_idx) {
+ case ARMMMUIdx_Stage1_E1_PAN:
+ case ARMMMUIdx_E10_1_PAN:
+ case ARMMMUIdx_E20_2_PAN:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool regime_is_stage2(ARMMMUIdx mmu_idx)
+{
+ return mmu_idx == ARMMMUIdx_Stage2 || mmu_idx == ARMMMUIdx_Stage2_S;
+}
+
+/* Return the exception level which controls this address translation regime */
+static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
{
switch (mmu_idx) {
- case ARMMMUIdx_S12NSE0:
- case ARMMMUIdx_S12NSE1:
- case ARMMMUIdx_S1NSE0:
- case ARMMMUIdx_S1NSE1:
- case ARMMMUIdx_S1E2:
- case ARMMMUIdx_S2NS:
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ case ARMMMUIdx_Stage2:
+ case ARMMMUIdx_Stage2_S:
+ case ARMMMUIdx_E2:
+ return 2;
+ case ARMMMUIdx_E3:
+ return 3;
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_Stage1_E0:
+ return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3;
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
case ARMMMUIdx_MPrivNegPri:
case ARMMMUIdx_MUserNegPri:
case ARMMMUIdx_MPriv:
case ARMMMUIdx_MUser:
- return false;
- case ARMMMUIdx_S1E3:
- case ARMMMUIdx_S1SE0:
- case ARMMMUIdx_S1SE1:
case ARMMMUIdx_MSPrivNegPri:
case ARMMMUIdx_MSUserNegPri:
case ARMMMUIdx_MSPriv:
case ARMMMUIdx_MSUser:
+ return 1;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ switch (mmu_idx) {
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_MUser:
+ case ARMMMUIdx_MSUser:
+ case ARMMMUIdx_MUserNegPri:
+ case ARMMMUIdx_MSUserNegPri:
return true;
default:
+ return false;
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
g_assert_not_reached();
}
}
-/* Return the FSR value for a debug exception (watchpoint, hardware
- * breakpoint or BKPT insn) targeting the specified exception level.
+/* Return the SCTLR value which controls this address translation regime */
+static inline uint64_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ return env->cp15.sctlr_el[regime_el(env, mmu_idx)];
+}
+
+/*
+ * These are the fields in VTCR_EL2 which affect both the Secure stage 2
+ * and the Non-Secure stage 2 translation regimes (and hence which are
+ * not present in VSTCR_EL2).
*/
-static inline uint32_t arm_debug_exception_fsr(CPUARMState *env)
+#define VTCR_SHARED_FIELD_MASK \
+ (R_VTCR_IRGN0_MASK | R_VTCR_ORGN0_MASK | R_VTCR_SH0_MASK | \
+ R_VTCR_PS_MASK | R_VTCR_VS_MASK | R_VTCR_HA_MASK | R_VTCR_HD_MASK | \
+ R_VTCR_DS_MASK)
+
+/* Return the value of the TCR controlling this translation regime */
+static inline uint64_t regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
{
- ARMMMUFaultInfo fi = { .type = ARMFault_Debug };
- int target_el = arm_debug_target_el(env);
- bool using_lpae = false;
+ if (mmu_idx == ARMMMUIdx_Stage2) {
+ return env->cp15.vtcr_el2;
+ }
+ if (mmu_idx == ARMMMUIdx_Stage2_S) {
+ /*
+ * Secure stage 2 shares fields from VTCR_EL2. We merge those
+ * in with the VSTCR_EL2 value to synthesize a single VTCR_EL2 format
+ * value so the callers don't need to special case this.
+ *
+ * If a future architecture change defines bits in VSTCR_EL2 that
+ * overlap with these VTCR_EL2 fields we may need to revisit this.
+ */
+ uint64_t v = env->cp15.vstcr_el2 & ~VTCR_SHARED_FIELD_MASK;
+ v |= env->cp15.vtcr_el2 & VTCR_SHARED_FIELD_MASK;
+ return v;
+ }
+ return env->cp15.tcr_el[regime_el(env, mmu_idx)];
+}
+
+/* Return true if the translation regime is using LPAE format page tables */
+static inline bool regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ int el = regime_el(env, mmu_idx);
+ if (el == 2 || arm_el_is_aa64(env, el)) {
+ return true;
+ }
+ if (arm_feature(env, ARM_FEATURE_PMSA) &&
+ arm_feature(env, ARM_FEATURE_V8)) {
+ return true;
+ }
+ if (arm_feature(env, ARM_FEATURE_LPAE)
+ && (regime_tcr(env, mmu_idx) & TTBCR_EAE)) {
+ return true;
+ }
+ return false;
+}
- if (target_el == 2 || arm_el_is_aa64(env, target_el)) {
- using_lpae = true;
+/**
+ * arm_num_brps: Return number of implemented breakpoints.
+ * Note that the ID register BRPS field is "number of bps - 1",
+ * and we return the actual number of breakpoints.
+ */
+static inline int arm_num_brps(ARMCPU *cpu)
+{
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1;
} else {
- if (arm_feature(env, ARM_FEATURE_LPAE) &&
- (env->cp15.tcr_el[target_el].raw_tcr & TTBCR_EAE)) {
- using_lpae = true;
- }
+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1;
}
+}
- if (using_lpae) {
- return arm_fi_to_lfsc(&fi);
+/**
+ * arm_num_wrps: Return number of implemented watchpoints.
+ * Note that the ID register WRPS field is "number of wps - 1",
+ * and we return the actual number of watchpoints.
+ */
+static inline int arm_num_wrps(ARMCPU *cpu)
+{
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1;
} else {
- return arm_fi_to_sfsc(&fi);
+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1;
}
}
-/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3.
- * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits.
+/**
+ * arm_num_ctx_cmps: Return number of implemented context comparators.
+ * Note that the ID register CTX_CMPS field is "number of cmps - 1",
+ * and we return the actual number of comparators.
*/
-#define MEMOPIDX_SHIFT 8
+static inline int arm_num_ctx_cmps(ARMCPU *cpu)
+{
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1;
+ } else {
+ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1;
+ }
+}
/**
* v7m_using_psp: Return true if using process stack pointer
@@ -840,4 +1052,691 @@ static inline uint32_t v7m_sp_limit(CPUARMState *env)
}
}
+/**
+ * v7m_cpacr_pass:
+ * Return true if the v7M CPACR permits access to the FPU for the specified
+ * security state and privilege level.
+ */
+static inline bool v7m_cpacr_pass(CPUARMState *env,
+ bool is_secure, bool is_priv)
+{
+ switch (extract32(env->v7m.cpacr[is_secure], 20, 2)) {
+ case 0:
+ case 2: /* UNPREDICTABLE: we treat like 0 */
+ return false;
+ case 1:
+ return is_priv;
+ case 3:
+ return true;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/**
+ * aarch32_mode_name(): Return name of the AArch32 CPU mode
+ * @psr: Program Status Register indicating CPU mode
+ *
+ * Returns, for debug logging purposes, a printable representation
+ * of the AArch32 CPU mode ("svc", "usr", etc) as indicated by
+ * the low bits of the specified PSR.
+ */
+static inline const char *aarch32_mode_name(uint32_t psr)
+{
+ static const char cpu_mode_names[16][4] = {
+ "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
+ "???", "???", "hyp", "und", "???", "???", "???", "sys"
+ };
+
+ return cpu_mode_names[psr & 0xf];
+}
+
+/**
+ * arm_cpu_update_virq: Update CPU_INTERRUPT_VIRQ bit in cs->interrupt_request
+ *
+ * Update the CPU_INTERRUPT_VIRQ bit in cs->interrupt_request, following
+ * a change to either the input VIRQ line from the GIC or the HCR_EL2.VI bit.
+ * Must be called with the BQL held.
+ */
+void arm_cpu_update_virq(ARMCPU *cpu);
+
+/**
+ * arm_cpu_update_vfiq: Update CPU_INTERRUPT_VFIQ bit in cs->interrupt_request
+ *
+ * Update the CPU_INTERRUPT_VFIQ bit in cs->interrupt_request, following
+ * a change to either the input VFIQ line from the GIC or the HCR_EL2.VF bit.
+ * Must be called with the BQL held.
+ */
+void arm_cpu_update_vfiq(ARMCPU *cpu);
+
+/**
+ * arm_cpu_update_vserr: Update CPU_INTERRUPT_VSERR bit
+ *
+ * Update the CPU_INTERRUPT_VSERR bit in cs->interrupt_request,
+ * following a change to the HCR_EL2.VSE bit.
+ */
+void arm_cpu_update_vserr(ARMCPU *cpu);
+
+/**
+ * arm_mmu_idx_el:
+ * @env: The cpu environment
+ * @el: The EL to use.
+ *
+ * Return the full ARMMMUIdx for the translation regime for EL.
+ */
+ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el);
+
+/**
+ * arm_mmu_idx:
+ * @env: The cpu environment
+ *
+ * Return the full ARMMMUIdx for the current translation regime.
+ */
+ARMMMUIdx arm_mmu_idx(CPUARMState *env);
+
+/**
+ * arm_stage1_mmu_idx:
+ * @env: The cpu environment
+ *
+ * Return the ARMMMUIdx for the stage1 traversal for the current regime.
+ */
+#ifdef CONFIG_USER_ONLY
+static inline ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
+{
+ return ARMMMUIdx_Stage1_E0;
+}
+static inline ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
+{
+ return ARMMMUIdx_Stage1_E0;
+}
+#else
+ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx);
+ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env);
+#endif
+
+/**
+ * arm_mmu_idx_is_stage1_of_2:
+ * @mmu_idx: The ARMMMUIdx to test
+ *
+ * Return true if @mmu_idx is a NOTLB mmu_idx that is the
+ * first stage of a two stage regime.
+ */
+static inline bool arm_mmu_idx_is_stage1_of_2(ARMMMUIdx mmu_idx)
+{
+ switch (mmu_idx) {
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline uint32_t aarch32_cpsr_valid_mask(uint64_t features,
+ const ARMISARegisters *id)
+{
+ uint32_t valid = CPSR_M | CPSR_AIF | CPSR_IL | CPSR_NZCV;
+
+ if ((features >> ARM_FEATURE_V4T) & 1) {
+ valid |= CPSR_T;
+ }
+ if ((features >> ARM_FEATURE_V5) & 1) {
+ valid |= CPSR_Q; /* V5TE in reality*/
+ }
+ if ((features >> ARM_FEATURE_V6) & 1) {
+ valid |= CPSR_E | CPSR_GE;
+ }
+ if ((features >> ARM_FEATURE_THUMB2) & 1) {
+ valid |= CPSR_IT;
+ }
+ if (isar_feature_aa32_jazelle(id)) {
+ valid |= CPSR_J;
+ }
+ if (isar_feature_aa32_pan(id)) {
+ valid |= CPSR_PAN;
+ }
+ if (isar_feature_aa32_dit(id)) {
+ valid |= CPSR_DIT;
+ }
+ if (isar_feature_aa32_ssbs(id)) {
+ valid |= CPSR_SSBS;
+ }
+
+ return valid;
+}
+
+static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id)
+{
+ uint32_t valid;
+
+ valid = PSTATE_M | PSTATE_DAIF | PSTATE_IL | PSTATE_SS | PSTATE_NZCV;
+ if (isar_feature_aa64_bti(id)) {
+ valid |= PSTATE_BTYPE;
+ }
+ if (isar_feature_aa64_pan(id)) {
+ valid |= PSTATE_PAN;
+ }
+ if (isar_feature_aa64_uao(id)) {
+ valid |= PSTATE_UAO;
+ }
+ if (isar_feature_aa64_dit(id)) {
+ valid |= PSTATE_DIT;
+ }
+ if (isar_feature_aa64_ssbs(id)) {
+ valid |= PSTATE_SSBS;
+ }
+ if (isar_feature_aa64_mte(id)) {
+ valid |= PSTATE_TCO;
+ }
+
+ return valid;
+}
+
+/* Granule size (i.e. page size) */
+typedef enum ARMGranuleSize {
+ /* Same order as TG0 encoding */
+ Gran4K,
+ Gran64K,
+ Gran16K,
+ GranInvalid,
+} ARMGranuleSize;
+
+/**
+ * arm_granule_bits: Return address size of the granule in bits
+ *
+ * Return the address size of the granule in bits. This corresponds
+ * to the pseudocode TGxGranuleBits().
+ */
+static inline int arm_granule_bits(ARMGranuleSize gran)
+{
+ switch (gran) {
+ case Gran64K:
+ return 16;
+ case Gran16K:
+ return 14;
+ case Gran4K:
+ return 12;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * Parameters of a given virtual address, as extracted from the
+ * translation control register (TCR) for a given regime.
+ */
+typedef struct ARMVAParameters {
+ unsigned tsz : 8;
+ unsigned ps : 3;
+ unsigned sh : 2;
+ unsigned select : 1;
+ bool tbi : 1;
+ bool epd : 1;
+ bool hpd : 1;
+ bool tsz_oob : 1; /* tsz has been clamped to legal range */
+ bool ds : 1;
+ bool ha : 1;
+ bool hd : 1;
+ ARMGranuleSize gran : 2;
+} ARMVAParameters;
+
+/**
+ * aa64_va_parameters: Return parameters for an AArch64 virtual address
+ * @env: CPU
+ * @va: virtual address to look up
+ * @mmu_idx: determines translation regime to use
+ * @data: true if this is a data access
+ * @el1_is_aa32: true if we are asking about stage 2 when EL1 is AArch32
+ * (ignored if @mmu_idx is for a stage 1 regime; only affects tsz/tsz_oob)
+ */
+ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
+ ARMMMUIdx mmu_idx, bool data,
+ bool el1_is_aa32);
+
+int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx);
+int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx);
+int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx);
+
+/* Determine if allocation tags are available. */
+static inline bool allocation_tag_access_enabled(CPUARMState *env, int el,
+ uint64_t sctlr)
+{
+ if (el < 3
+ && arm_feature(env, ARM_FEATURE_EL3)
+ && !(env->cp15.scr_el3 & SCR_ATA)) {
+ return false;
+ }
+ if (el < 2 && arm_is_el2_enabled(env)) {
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ if (!(hcr & HCR_ATA) && (!(hcr & HCR_E2H) || !(hcr & HCR_TGE))) {
+ return false;
+ }
+ }
+ sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA);
+ return sctlr != 0;
+}
+
+#ifndef CONFIG_USER_ONLY
+
+/* Security attributes for an address, as returned by v8m_security_lookup. */
+typedef struct V8M_SAttributes {
+ bool subpage; /* true if these attrs don't cover the whole TARGET_PAGE */
+ bool ns;
+ bool nsc;
+ uint8_t sregion;
+ bool srvalid;
+ uint8_t iregion;
+ bool irvalid;
+} V8M_SAttributes;
+
+void v8m_security_lookup(CPUARMState *env, uint32_t address,
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ bool secure, V8M_SAttributes *sattrs);
+
+/* Cacheability and shareability attributes for a memory access */
+typedef struct ARMCacheAttrs {
+ /*
+ * If is_s2_format is true, attrs is the S2 descriptor bits [5:2]
+ * Otherwise, attrs is the same as the MAIR_EL1 8-bit format
+ */
+ unsigned int attrs:8;
+ unsigned int shareability:2; /* as in the SH field of the VMSAv8-64 PTEs */
+ bool is_s2_format:1;
+} ARMCacheAttrs;
+
+/* Fields that are valid upon success. */
+typedef struct GetPhysAddrResult {
+ CPUTLBEntryFull f;
+ ARMCacheAttrs cacheattrs;
+} GetPhysAddrResult;
+
+/**
+ * get_phys_addr: get the physical address for a virtual address
+ * @env: CPUARMState
+ * @address: virtual address to get physical address for
+ * @access_type: 0 for read, 1 for write, 2 for execute
+ * @mmu_idx: MMU index indicating required translation regime
+ * @result: set on translation success.
+ * @fi: set to fault info if the translation fails
+ *
+ * Find the physical address corresponding to the given virtual address,
+ * by doing a translation table walk on MMU based systems or using the
+ * MPU state on MPU based systems.
+ *
+ * Returns false if the translation was successful. Otherwise, phys_ptr, attrs,
+ * prot and page_size may not be filled in, and the populated fsr value provides
+ * information on why the translation aborted, in the format of a
+ * DFSR/IFSR fault register, with the following caveats:
+ * * we honour the short vs long DFSR format differences.
+ * * the WnR bit is never set (the caller must do this).
+ * * for PSMAv5 based systems we don't bother to return a full FSR format
+ * value.
+ */
+bool get_phys_addr(CPUARMState *env, target_ulong address,
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
+ __attribute__((nonnull));
+
+/**
+ * get_phys_addr_with_space_nogpc: get the physical address for a virtual
+ * address
+ * @env: CPUARMState
+ * @address: virtual address to get physical address for
+ * @access_type: 0 for read, 1 for write, 2 for execute
+ * @mmu_idx: MMU index indicating required translation regime
+ * @space: security space for the access
+ * @result: set on translation success.
+ * @fi: set to fault info if the translation fails
+ *
+ * Similar to get_phys_addr, but use the given security space and don't perform
+ * a Granule Protection Check on the resulting address.
+ */
+bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address,
+ MMUAccessType access_type,
+ ARMMMUIdx mmu_idx, ARMSecuritySpace space,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+ __attribute__((nonnull));
+
+bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ bool is_secure, GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi, uint32_t *mregion);
+
+void arm_log_exception(CPUState *cs);
+
+#endif /* !CONFIG_USER_ONLY */
+
+/*
+ * SVE predicates are 1/8 the size of SVE vectors, and cannot use
+ * the same simd_desc() encoding due to restrictions on size.
+ * Use these instead.
+ */
+FIELD(PREDDESC, OPRSZ, 0, 6)
+FIELD(PREDDESC, ESZ, 6, 2)
+FIELD(PREDDESC, DATA, 8, 24)
+
+/*
+ * The SVE simd_data field, for memory ops, contains either
+ * rd (5 bits) or a shift count (2 bits).
+ */
+#define SVE_MTEDESC_SHIFT 5
+
+/* Bits within a descriptor passed to the helper_mte_check* functions. */
+FIELD(MTEDESC, MIDX, 0, 4)
+FIELD(MTEDESC, TBI, 4, 2)
+FIELD(MTEDESC, TCMA, 6, 2)
+FIELD(MTEDESC, WRITE, 8, 1)
+FIELD(MTEDESC, ALIGN, 9, 3)
+FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - SVE_MTEDESC_SHIFT - 12) /* size - 1 */
+
+bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr);
+uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra);
+
+/**
+ * mte_mops_probe: Check where the next MTE failure is for a FEAT_MOPS operation
+ * @env: CPU env
+ * @ptr: start address of memory region (dirty pointer)
+ * @size: length of region (guaranteed not to cross a page boundary)
+ * @desc: MTEDESC descriptor word (0 means no MTE checks)
+ * Returns: the size of the region that can be copied without hitting
+ * an MTE tag failure
+ *
+ * Note that we assume that the caller has already checked the TBI
+ * and TCMA bits with mte_checks_needed() and an MTE check is definitely
+ * required.
+ */
+uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
+ uint32_t desc);
+
+/**
+ * mte_mops_probe_rev: Check where the next MTE failure is for a FEAT_MOPS
+ * operation going in the reverse direction
+ * @env: CPU env
+ * @ptr: *end* address of memory region (dirty pointer)
+ * @size: length of region (guaranteed not to cross a page boundary)
+ * @desc: MTEDESC descriptor word (0 means no MTE checks)
+ * Returns: the size of the region that can be copied without hitting
+ * an MTE tag failure
+ *
+ * Note that we assume that the caller has already checked the TBI
+ * and TCMA bits with mte_checks_needed() and an MTE check is definitely
+ * required.
+ */
+uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size,
+ uint32_t desc);
+
+/**
+ * mte_check_fail: Record an MTE tag check failure
+ * @env: CPU env
+ * @desc: MTEDESC descriptor word
+ * @dirty_ptr: Failing dirty address
+ * @ra: TCG retaddr
+ *
+ * This may never return (if the MTE tag checks are configured to fault).
+ */
+void mte_check_fail(CPUARMState *env, uint32_t desc,
+ uint64_t dirty_ptr, uintptr_t ra);
+
+/**
+ * mte_mops_set_tags: Set MTE tags for a portion of a FEAT_MOPS operation
+ * @env: CPU env
+ * @dirty_ptr: Start address of memory region (dirty pointer)
+ * @size: length of region (guaranteed not to cross page boundary)
+ * @desc: MTEDESC descriptor word
+ */
+void mte_mops_set_tags(CPUARMState *env, uint64_t dirty_ptr, uint64_t size,
+ uint32_t desc);
+
+static inline int allocation_tag_from_addr(uint64_t ptr)
+{
+ return extract64(ptr, 56, 4);
+}
+
+static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag)
+{
+ return deposit64(ptr, 56, 4, rtag);
+}
+
+/* Return true if tbi bits mean that the access is checked. */
+static inline bool tbi_check(uint32_t desc, int bit55)
+{
+ return (desc >> (R_MTEDESC_TBI_SHIFT + bit55)) & 1;
+}
+
+/* Return true if tcma bits mean that the access is unchecked. */
+static inline bool tcma_check(uint32_t desc, int bit55, int ptr_tag)
+{
+ /*
+ * We had extracted bit55 and ptr_tag for other reasons, so fold
+ * (ptr<59:55> == 00000 || ptr<59:55> == 11111) into a single test.
+ */
+ bool match = ((ptr_tag + bit55) & 0xf) == 0;
+ bool tcma = (desc >> (R_MTEDESC_TCMA_SHIFT + bit55)) & 1;
+ return tcma && match;
+}
+
+/*
+ * For TBI, ideally, we would do nothing. Proper behaviour on fault is
+ * for the tag to be present in the FAR_ELx register. But for user-only
+ * mode, we do not have a TLB with which to implement this, so we must
+ * remove the top byte.
+ */
+static inline uint64_t useronly_clean_ptr(uint64_t ptr)
+{
+#ifdef CONFIG_USER_ONLY
+ /* TBI0 is known to be enabled, while TBI1 is disabled. */
+ ptr &= sextract64(ptr, 0, 56);
+#endif
+ return ptr;
+}
+
+static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr)
+{
+#ifdef CONFIG_USER_ONLY
+ int64_t clean_ptr = sextract64(ptr, 0, 56);
+ if (tbi_check(desc, clean_ptr < 0)) {
+ ptr = clean_ptr;
+ }
+#endif
+ return ptr;
+}
+
+/* Values for M-profile PSR.ECI for MVE insns */
+enum MVEECIState {
+ ECI_NONE = 0, /* No completed beats */
+ ECI_A0 = 1, /* Completed: A0 */
+ ECI_A0A1 = 2, /* Completed: A0, A1 */
+ /* 3 is reserved */
+ ECI_A0A1A2 = 4, /* Completed: A0, A1, A2 */
+ ECI_A0A1A2B0 = 5, /* Completed: A0, A1, A2, B0 */
+ /* All other values reserved */
+};
+
+/* Definitions for the PMU registers */
+#define PMCRN_MASK 0xf800
+#define PMCRN_SHIFT 11
+#define PMCRLP 0x80
+#define PMCRLC 0x40
+#define PMCRDP 0x20
+#define PMCRX 0x10
+#define PMCRD 0x8
+#define PMCRC 0x4
+#define PMCRP 0x2
+#define PMCRE 0x1
+/*
+ * Mask of PMCR bits writable by guest (not including WO bits like C, P,
+ * which can be written as 1 to trigger behaviour but which stay RAZ).
+ */
+#define PMCR_WRITABLE_MASK (PMCRLP | PMCRLC | PMCRDP | PMCRX | PMCRD | PMCRE)
+
+#define PMXEVTYPER_P 0x80000000
+#define PMXEVTYPER_U 0x40000000
+#define PMXEVTYPER_NSK 0x20000000
+#define PMXEVTYPER_NSU 0x10000000
+#define PMXEVTYPER_NSH 0x08000000
+#define PMXEVTYPER_M 0x04000000
+#define PMXEVTYPER_MT 0x02000000
+#define PMXEVTYPER_EVTCOUNT 0x0000ffff
+#define PMXEVTYPER_MASK (PMXEVTYPER_P | PMXEVTYPER_U | PMXEVTYPER_NSK | \
+ PMXEVTYPER_NSU | PMXEVTYPER_NSH | \
+ PMXEVTYPER_M | PMXEVTYPER_MT | \
+ PMXEVTYPER_EVTCOUNT)
+
+#define PMCCFILTR 0xf8000000
+#define PMCCFILTR_M PMXEVTYPER_M
+#define PMCCFILTR_EL0 (PMCCFILTR | PMCCFILTR_M)
+
+static inline uint32_t pmu_num_counters(CPUARMState *env)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ return (cpu->isar.reset_pmcr_el0 & PMCRN_MASK) >> PMCRN_SHIFT;
+}
+
+/* Bits allowed to be set/cleared for PMCNTEN* and PMINTEN* */
+static inline uint64_t pmu_counter_mask(CPUARMState *env)
+{
+ return (1ULL << 31) | ((1ULL << pmu_num_counters(env)) - 1);
+}
+
+#ifdef TARGET_AARCH64
+GDBFeature *arm_gen_dynamic_svereg_feature(CPUState *cpu, int base_reg);
+int aarch64_gdb_get_sve_reg(CPUState *cs, GByteArray *buf, int reg);
+int aarch64_gdb_set_sve_reg(CPUState *cs, uint8_t *buf, int reg);
+int aarch64_gdb_get_fpu_reg(CPUState *cs, GByteArray *buf, int reg);
+int aarch64_gdb_set_fpu_reg(CPUState *cs, uint8_t *buf, int reg);
+int aarch64_gdb_get_pauth_reg(CPUState *cs, GByteArray *buf, int reg);
+int aarch64_gdb_set_pauth_reg(CPUState *cs, uint8_t *buf, int reg);
+void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp);
+void arm_cpu_sme_finalize(ARMCPU *cpu, Error **errp);
+void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp);
+void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp);
+void aarch64_max_tcg_initfn(Object *obj);
+void aarch64_add_pauth_properties(Object *obj);
+void aarch64_add_sve_properties(Object *obj);
+void aarch64_add_sme_properties(Object *obj);
+#endif
+
+/* Read the CONTROL register as the MRS instruction would. */
+uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure);
+
+/*
+ * Return a pointer to the location where we currently store the
+ * stack pointer for the requested security state and thread mode.
+ * This pointer will become invalid if the CPU state is updated
+ * such that the stack pointers are switched around (eg changing
+ * the SPSEL control bit).
+ */
+uint32_t *arm_v7m_get_sp_ptr(CPUARMState *env, bool secure,
+ bool threadmode, bool spsel);
+
+bool el_is_in_host(CPUARMState *env, int el);
+
+void aa32_max_features(ARMCPU *cpu);
+int exception_target_el(CPUARMState *env);
+bool arm_singlestep_active(CPUARMState *env);
+bool arm_generate_debug_exceptions(CPUARMState *env);
+
+/**
+ * pauth_ptr_mask:
+ * @param: parameters defining the MMU setup
+ *
+ * Return a mask of the address bits that contain the authentication code,
+ * given the MMU config defined by @param.
+ */
+static inline uint64_t pauth_ptr_mask(ARMVAParameters param)
+{
+ int bot_pac_bit = 64 - param.tsz;
+ int top_pac_bit = 64 - 8 * param.tbi;
+
+ return MAKE_64BIT_MASK(bot_pac_bit, top_pac_bit - bot_pac_bit);
+}
+
+/* Add the cpreg definitions for debug related system registers */
+void define_debug_regs(ARMCPU *cpu);
+
+/* Effective value of MDCR_EL2 */
+static inline uint64_t arm_mdcr_el2_eff(CPUARMState *env)
+{
+ return arm_is_el2_enabled(env) ? env->cp15.mdcr_el2 : 0;
+}
+
+/* Powers of 2 for sve_vq_map et al. */
+#define SVE_VQ_POW2_MAP \
+ ((1 << (1 - 1)) | (1 << (2 - 1)) | \
+ (1 << (4 - 1)) | (1 << (8 - 1)) | (1 << (16 - 1)))
+
+/*
+ * Return true if it is possible to take a fine-grained-trap to EL2.
+ */
+static inline bool arm_fgt_active(CPUARMState *env, int el)
+{
+ /*
+ * The Arm ARM only requires the "{E2H,TGE} != {1,1}" test for traps
+ * that can affect EL0, but it is harmless to do the test also for
+ * traps on registers that are only accessible at EL1 because if the test
+ * returns true then we can't be executing at EL1 anyway.
+ * FGT traps only happen when EL2 is enabled and EL1 is AArch64;
+ * traps from AArch32 only happen for the EL0 is AArch32 case.
+ */
+ return cpu_isar_feature(aa64_fgt, env_archcpu(env)) &&
+ el < 2 && arm_is_el2_enabled(env) &&
+ arm_el_is_aa64(env, 1) &&
+ (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) &&
+ (!arm_feature(env, ARM_FEATURE_EL3) || (env->cp15.scr_el3 & SCR_FGTEN));
+}
+
+void assert_hflags_rebuild_correctly(CPUARMState *env);
+
+/*
+ * Although the ARM implementation of hardware assisted debugging
+ * allows for different breakpoints per-core, the current GDB
+ * interface treats them as a global pool of registers (which seems to
+ * be the case for x86, ppc and s390). As a result we store one copy
+ * of registers which is used for all active cores.
+ *
+ * Write access is serialised by virtue of the GDB protocol which
+ * updates things. Read access (i.e. when the values are copied to the
+ * vCPU) is also gated by GDB's run control.
+ *
+ * This is not unreasonable as most of the time debugging kernels you
+ * never know which core will eventually execute your function.
+ */
+
+typedef struct {
+ uint64_t bcr;
+ uint64_t bvr;
+} HWBreakpoint;
+
+/*
+ * The watchpoint registers can cover more area than the requested
+ * watchpoint so we need to store the additional information
+ * somewhere. We also need to supply a CPUWatchpoint to the GDB stub
+ * when the watchpoint is hit.
+ */
+typedef struct {
+ uint64_t wcr;
+ uint64_t wvr;
+ CPUWatchpoint details;
+} HWWatchpoint;
+
+/* Maximum and current break/watch point counts */
+extern int max_hw_bps, max_hw_wps;
+extern GArray *hw_breakpoints, *hw_watchpoints;
+
+#define cur_hw_wps (hw_watchpoints->len)
+#define cur_hw_bps (hw_breakpoints->len)
+#define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i))
+#define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i))
+
+bool find_hw_breakpoint(CPUState *cpu, target_ulong pc);
+int insert_hw_breakpoint(target_ulong pc);
+int delete_hw_breakpoint(target_ulong pc);
+
+bool check_watchpoint_in_range(int i, target_ulong addr);
+CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr);
+int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type);
+int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type);
#endif
diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h
index aad28258a3..7c6adc14f6 100644
--- a/target/arm/kvm-consts.h
+++ b/target/arm/kvm-consts.h
@@ -14,16 +14,16 @@
#ifndef ARM_KVM_CONSTS_H
#define ARM_KVM_CONSTS_H
+#ifdef NEED_CPU_H
#ifdef CONFIG_KVM
#include <linux/kvm.h>
#include <linux/psci.h>
-
#define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(X != Y)
+#endif
+#endif
-#else
-
+#ifndef MISMATCH_CHECK
#define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(0)
-
#endif
#define CP_REG_SIZE_SHIFT 52
@@ -77,6 +77,8 @@ MISMATCH_CHECK(QEMU_PSCI_0_1_FN_MIGRATE, KVM_PSCI_FN_MIGRATE);
#define QEMU_PSCI_0_2_FN64_AFFINITY_INFO QEMU_PSCI_0_2_FN64(4)
#define QEMU_PSCI_0_2_FN64_MIGRATE QEMU_PSCI_0_2_FN64(5)
+#define QEMU_PSCI_1_0_FN_PSCI_FEATURES QEMU_PSCI_0_2_FN(10)
+
MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_SUSPEND, PSCI_0_2_FN_CPU_SUSPEND);
MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_OFF, PSCI_0_2_FN_CPU_OFF);
MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_ON, PSCI_0_2_FN_CPU_ON);
@@ -84,18 +86,22 @@ MISMATCH_CHECK(QEMU_PSCI_0_2_FN_MIGRATE, PSCI_0_2_FN_MIGRATE);
MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_SUSPEND, PSCI_0_2_FN64_CPU_SUSPEND);
MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_ON, PSCI_0_2_FN64_CPU_ON);
MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_MIGRATE, PSCI_0_2_FN64_MIGRATE);
+MISMATCH_CHECK(QEMU_PSCI_1_0_FN_PSCI_FEATURES, PSCI_1_0_FN_PSCI_FEATURES);
/* PSCI v0.2 return values used by TCG emulation of PSCI */
/* No Trusted OS migration to worry about when offlining CPUs */
#define QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED 2
-/* We implement version 0.2 only */
-#define QEMU_PSCI_0_2_RET_VERSION_0_2 2
+#define QEMU_PSCI_VERSION_0_1 0x00001
+#define QEMU_PSCI_VERSION_0_2 0x00002
+#define QEMU_PSCI_VERSION_1_0 0x10000
+#define QEMU_PSCI_VERSION_1_1 0x10001
MISMATCH_CHECK(QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED, PSCI_0_2_TOS_MP);
-MISMATCH_CHECK(QEMU_PSCI_0_2_RET_VERSION_0_2,
- (PSCI_VERSION_MAJOR(0) | PSCI_VERSION_MINOR(2)));
+/* We don't bother to check every possible version value */
+MISMATCH_CHECK(QEMU_PSCI_VERSION_0_2, PSCI_VERSION(0, 2));
+MISMATCH_CHECK(QEMU_PSCI_VERSION_1_1, PSCI_VERSION(1, 1));
/* PSCI return values (inclusive of all PSCI versions) */
#define QEMU_PSCI_RET_SUCCESS 0
@@ -118,13 +124,10 @@ MISMATCH_CHECK(QEMU_PSCI_RET_INTERNAL_FAILURE, PSCI_RET_INTERNAL_FAILURE);
MISMATCH_CHECK(QEMU_PSCI_RET_NOT_PRESENT, PSCI_RET_NOT_PRESENT);
MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED);
-/* Note that KVM uses overlapping values for AArch32 and AArch64
- * target CPU numbers. AArch32 targets:
+/*
+ * Note that KVM uses overlapping values for AArch32 and AArch64
+ * target CPU numbers. AArch64 targets:
*/
-#define QEMU_KVM_ARM_TARGET_CORTEX_A15 0
-#define QEMU_KVM_ARM_TARGET_CORTEX_A7 1
-
-/* AArch64 targets: */
#define QEMU_KVM_ARM_TARGET_AEM_V8 0
#define QEMU_KVM_ARM_TARGET_FOUNDATION_V8 1
#define QEMU_KVM_ARM_TARGET_CORTEX_A57 2
@@ -136,16 +139,11 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED);
*/
#define QEMU_KVM_ARM_TARGET_NONE UINT_MAX
-#ifdef TARGET_AARCH64
MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_AEM_V8, KVM_ARM_TARGET_AEM_V8);
MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8);
MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57);
MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA);
MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53);
-#else
-MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A15, KVM_ARM_TARGET_CORTEX_A15);
-MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A7, KVM_ARM_TARGET_CORTEX_A7);
-#endif
#define CP_REG_ARM64 0x6000000000000000ULL
#define CP_REG_ARM_COPROC_MASK 0x000000000FFF0000
@@ -165,7 +163,6 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A7, KVM_ARM_TARGET_CORTEX_A7);
/* No kernel define but it's useful to QEMU */
#define CP_REG_ARM64_SYSREG_CP (CP_REG_ARM64_SYSREG >> CP_REG_ARM_COPROC_SHIFT)
-#ifdef TARGET_AARCH64
MISMATCH_CHECK(CP_REG_ARM64, KVM_REG_ARM64);
MISMATCH_CHECK(CP_REG_ARM_COPROC_MASK, KVM_REG_ARM_COPROC_MASK);
MISMATCH_CHECK(CP_REG_ARM_COPROC_SHIFT, KVM_REG_ARM_COPROC_SHIFT);
@@ -180,7 +177,6 @@ MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRM_MASK);
MISMATCH_CHECK(CP_REG_ARM64_SYSREG_CRM_SHIFT, KVM_REG_ARM64_SYSREG_CRM_SHIFT);
MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK);
MISMATCH_CHECK(CP_REG_ARM64_SYSREG_OP2_SHIFT, KVM_REG_ARM64_SYSREG_OP2_SHIFT);
-#endif
#undef MISMATCH_CHECK
diff --git a/target/arm/kvm-stub.c b/target/arm/kvm-stub.c
index b2c66df532..965a486b32 100644
--- a/target/arm/kvm-stub.c
+++ b/target/arm/kvm-stub.c
@@ -10,16 +10,15 @@
*
*/
#include "qemu/osdep.h"
-#include "qemu-common.h"
#include "cpu.h"
#include "kvm_arm.h"
bool write_kvmstate_to_list(ARMCPU *cpu)
{
- abort();
+ g_assert_not_reached();
}
bool write_list_to_kvmstate(ARMCPU *cpu, int level)
{
- abort();
+ g_assert_not_reached();
}
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 54ef5f711b..ab85d628a8 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2,6 +2,8 @@
* ARM implementation of KVM hooks
*
* Copyright Christoffer Dall 2009-2010
+ * Copyright Mian-M. Hamayun 2013, Virtual Open Systems
+ * Copyright Alex Bennée 2014, Linaro
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
@@ -13,52 +15,107 @@
#include <linux/kvm.h>
-#include "qemu-common.h"
#include "qemu/timer.h"
#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qom/object.h"
+#include "qapi/error.h"
#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
#include "sysemu/kvm.h"
+#include "sysemu/kvm_int.h"
#include "kvm_arm.h"
#include "cpu.h"
#include "trace.h"
#include "internals.h"
-#include "hw/arm/arm.h"
#include "hw/pci/pci.h"
#include "exec/memattrs.h"
#include "exec/address-spaces.h"
+#include "exec/gdbstub.h"
#include "hw/boards.h"
+#include "hw/irq.h"
+#include "qapi/visitor.h"
#include "qemu/log.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/ghes.h"
+#include "target/arm/gtimer.h"
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
KVM_CAP_LAST_INFO
};
static bool cap_has_mp_state;
+static bool cap_has_inject_serror_esr;
+static bool cap_has_inject_ext_dabt;
+
+/**
+ * ARMHostCPUFeatures: information about the host CPU (identified
+ * by asking the host kernel)
+ */
+typedef struct ARMHostCPUFeatures {
+ ARMISARegisters isar;
+ uint64_t features;
+ uint32_t target;
+ const char *dtb_compatible;
+} ARMHostCPUFeatures;
static ARMHostCPUFeatures arm_host_cpu_features;
-int kvm_arm_vcpu_init(CPUState *cs)
+/**
+ * kvm_arm_vcpu_init:
+ * @cpu: ARMCPU
+ *
+ * Initialize (or reinitialize) the VCPU by invoking the
+ * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature
+ * bitmask specified in the CPUState.
+ *
+ * Returns: 0 if success else < 0 error code
+ */
+static int kvm_arm_vcpu_init(ARMCPU *cpu)
{
- ARMCPU *cpu = ARM_CPU(cs);
struct kvm_vcpu_init init;
init.target = cpu->kvm_target;
memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
- return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
+ return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_INIT, &init);
+}
+
+/**
+ * kvm_arm_vcpu_finalize:
+ * @cpu: ARMCPU
+ * @feature: feature to finalize
+ *
+ * Finalizes the configuration of the specified VCPU feature by
+ * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring
+ * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of
+ * KVM's API documentation.
+ *
+ * Returns: 0 if success else < 0 error code
+ */
+static int kvm_arm_vcpu_finalize(ARMCPU *cpu, int feature)
+{
+ return kvm_vcpu_ioctl(CPU(cpu), KVM_ARM_VCPU_FINALIZE, &feature);
}
bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
int *fdarray,
struct kvm_vcpu_init *init)
{
- int ret, kvmfd = -1, vmfd = -1, cpufd = -1;
+ int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
+ int max_vm_pa_size;
- kvmfd = qemu_open("/dev/kvm", O_RDWR);
+ kvmfd = qemu_open_old("/dev/kvm", O_RDWR);
if (kvmfd < 0) {
goto err;
}
- vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
+ max_vm_pa_size = ioctl(kvmfd, KVM_CHECK_EXTENSION, KVM_CAP_ARM_VM_IPA_SIZE);
+ if (max_vm_pa_size < 0) {
+ max_vm_pa_size = 0;
+ }
+ do {
+ vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size);
+ } while (vmfd == -1 && errno == EINTR);
if (vmfd < 0) {
goto err;
}
@@ -72,7 +129,14 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
goto finish;
}
- ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init);
+ if (init->target == -1) {
+ struct kvm_vcpu_init preferred;
+
+ ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, &preferred);
+ if (!ret) {
+ init->target = preferred.target;
+ }
+ }
if (ret >= 0) {
ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
if (ret < 0) {
@@ -84,10 +148,12 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
* creating one kind of guest CPU which is its preferred
* CPU type.
*/
+ struct kvm_vcpu_init try;
+
while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
- init->target = *cpus_to_try++;
- memset(init->features, 0, sizeof(init->features));
- ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
+ try.target = *cpus_to_try++;
+ memcpy(try.features, init->features, sizeof(init->features));
+ ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, &try);
if (ret >= 0) {
break;
}
@@ -95,6 +161,7 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
if (ret < 0) {
goto err;
}
+ init->target = try.target;
} else {
/* Treat a NULL cpus_to_try argument the same as an empty
* list, which means we will fail the call since this must
@@ -133,6 +200,260 @@ void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
}
}
+static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id)
+{
+ uint64_t ret;
+ struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret };
+ int err;
+
+ assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64);
+ err = ioctl(fd, KVM_GET_ONE_REG, &idreg);
+ if (err < 0) {
+ return -1;
+ }
+ *pret = ret;
+ return 0;
+}
+
+static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id)
+{
+ struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret };
+
+ assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64);
+ return ioctl(fd, KVM_GET_ONE_REG, &idreg);
+}
+
+static bool kvm_arm_pauth_supported(void)
+{
+ return (kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_ADDRESS) &&
+ kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC));
+}
+
+static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
+{
+ /* Identify the feature bits corresponding to the host CPU, and
+ * fill out the ARMHostCPUClass fields accordingly. To do this
+ * we have to create a scratch VM, create a single CPU inside it,
+ * and then query that CPU for the relevant ID registers.
+ */
+ int fdarray[3];
+ bool sve_supported;
+ bool pmu_supported = false;
+ uint64_t features = 0;
+ int err;
+
+ /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
+ * we know these will only support creating one kind of guest CPU,
+ * which is its preferred CPU type. Fortunately these old kernels
+ * support only a very limited number of CPUs.
+ */
+ static const uint32_t cpus_to_try[] = {
+ KVM_ARM_TARGET_AEM_V8,
+ KVM_ARM_TARGET_FOUNDATION_V8,
+ KVM_ARM_TARGET_CORTEX_A57,
+ QEMU_KVM_ARM_TARGET_NONE
+ };
+ /*
+ * target = -1 informs kvm_arm_create_scratch_host_vcpu()
+ * to use the preferred target
+ */
+ struct kvm_vcpu_init init = { .target = -1, };
+
+ /*
+ * Ask for SVE if supported, so that we can query ID_AA64ZFR0,
+ * which is otherwise RAZ.
+ */
+ sve_supported = kvm_arm_sve_supported();
+ if (sve_supported) {
+ init.features[0] |= 1 << KVM_ARM_VCPU_SVE;
+ }
+
+ /*
+ * Ask for Pointer Authentication if supported, so that we get
+ * the unsanitized field values for AA64ISAR1_EL1.
+ */
+ if (kvm_arm_pauth_supported()) {
+ init.features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS |
+ 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC);
+ }
+
+ if (kvm_arm_pmu_supported()) {
+ init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
+ pmu_supported = true;
+ }
+
+ if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
+ return false;
+ }
+
+ ahcf->target = init.target;
+ ahcf->dtb_compatible = "arm,arm-v8";
+
+ err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0,
+ ARM64_SYS_REG(3, 0, 0, 4, 0));
+ if (unlikely(err < 0)) {
+ /*
+ * Before v4.15, the kernel only exposed a limited number of system
+ * registers, not including any of the interesting AArch64 ID regs.
+ * For the most part we could leave these fields as zero with minimal
+ * effect, since this does not affect the values seen by the guest.
+ *
+ * However, it could cause problems down the line for QEMU,
+ * so provide a minimal v8.0 default.
+ *
+ * ??? Could read MIDR and use knowledge from cpu64.c.
+ * ??? Could map a page of memory into our temp guest and
+ * run the tiniest of hand-crafted kernels to extract
+ * the values seen by the guest.
+ * ??? Either of these sounds like too much effort just
+ * to work around running a modern host kernel.
+ */
+ ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */
+ err = 0;
+ } else {
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1,
+ ARM64_SYS_REG(3, 0, 0, 4, 1));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0,
+ ARM64_SYS_REG(3, 0, 0, 4, 5));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0,
+ ARM64_SYS_REG(3, 0, 0, 5, 0));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1,
+ ARM64_SYS_REG(3, 0, 0, 5, 1));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0,
+ ARM64_SYS_REG(3, 0, 0, 6, 0));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1,
+ ARM64_SYS_REG(3, 0, 0, 6, 1));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2,
+ ARM64_SYS_REG(3, 0, 0, 6, 2));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0,
+ ARM64_SYS_REG(3, 0, 0, 7, 0));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1,
+ ARM64_SYS_REG(3, 0, 0, 7, 1));
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2,
+ ARM64_SYS_REG(3, 0, 0, 7, 2));
+
+ /*
+ * Note that if AArch32 support is not present in the host,
+ * the AArch32 sysregs are present to be read, but will
+ * return UNKNOWN values. This is neither better nor worse
+ * than skipping the reads and leaving 0, as we must avoid
+ * considering the values in every case.
+ */
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0,
+ ARM64_SYS_REG(3, 0, 0, 1, 0));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1,
+ ARM64_SYS_REG(3, 0, 0, 1, 1));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0,
+ ARM64_SYS_REG(3, 0, 0, 1, 2));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0,
+ ARM64_SYS_REG(3, 0, 0, 1, 4));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1,
+ ARM64_SYS_REG(3, 0, 0, 1, 5));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2,
+ ARM64_SYS_REG(3, 0, 0, 1, 6));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3,
+ ARM64_SYS_REG(3, 0, 0, 1, 7));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0,
+ ARM64_SYS_REG(3, 0, 0, 2, 0));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1,
+ ARM64_SYS_REG(3, 0, 0, 2, 1));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2,
+ ARM64_SYS_REG(3, 0, 0, 2, 2));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3,
+ ARM64_SYS_REG(3, 0, 0, 2, 3));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4,
+ ARM64_SYS_REG(3, 0, 0, 2, 4));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5,
+ ARM64_SYS_REG(3, 0, 0, 2, 5));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4,
+ ARM64_SYS_REG(3, 0, 0, 2, 6));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6,
+ ARM64_SYS_REG(3, 0, 0, 2, 7));
+
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0,
+ ARM64_SYS_REG(3, 0, 0, 3, 0));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1,
+ ARM64_SYS_REG(3, 0, 0, 3, 1));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2,
+ ARM64_SYS_REG(3, 0, 0, 3, 2));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2,
+ ARM64_SYS_REG(3, 0, 0, 3, 4));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1,
+ ARM64_SYS_REG(3, 0, 0, 3, 5));
+ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5,
+ ARM64_SYS_REG(3, 0, 0, 3, 6));
+
+ /*
+ * DBGDIDR is a bit complicated because the kernel doesn't
+ * provide an accessor for it in 64-bit mode, which is what this
+ * scratch VM is in, and there's no architected "64-bit sysreg
+ * which reads the same as the 32-bit register" the way there is
+ * for other ID registers. Instead we synthesize a value from the
+ * AArch64 ID_AA64DFR0, the same way the kernel code in
+ * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does.
+ * We only do this if the CPU supports AArch32 at EL1.
+ */
+ if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) {
+ int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS);
+ int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS);
+ int ctx_cmps =
+ FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS);
+ int version = 6; /* ARMv8 debug architecture */
+ bool has_el3 =
+ !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3);
+ uint32_t dbgdidr = 0;
+
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3);
+ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3);
+ dbgdidr |= (1 << 15); /* RES1 bit */
+ ahcf->isar.dbgdidr = dbgdidr;
+ }
+
+ if (pmu_supported) {
+ /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0,
+ ARM64_SYS_REG(3, 3, 9, 12, 0));
+ }
+
+ if (sve_supported) {
+ /*
+ * There is a range of kernels between kernel commit 73433762fcae
+ * and f81cb2c3ad41 which have a bug where the kernel doesn't
+ * expose SYS_ID_AA64ZFR0_EL1 via the ONE_REG API unless the VM has
+ * enabled SVE support, which resulted in an error rather than RAZ.
+ * So only read the register if we set KVM_ARM_VCPU_SVE above.
+ */
+ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0,
+ ARM64_SYS_REG(3, 0, 0, 4, 4));
+ }
+ }
+
+ kvm_arm_destroy_scratch_host_vcpu(fdarray);
+
+ if (err < 0) {
+ return false;
+ }
+
+ /*
+ * We can assume any KVM supporting CPU is at least a v8
+ * with VFPv4+Neon; this in turn implies most of the other
+ * feature bits.
+ */
+ features |= 1ULL << ARM_FEATURE_V8;
+ features |= 1ULL << ARM_FEATURE_NEON;
+ features |= 1ULL << ARM_FEATURE_AARCH64;
+ features |= 1ULL << ARM_FEATURE_PMU;
+ features |= 1ULL << ARM_FEATURE_GENERIC_TIMER;
+
+ ahcf->features = features;
+
+ return true;
+}
+
void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
{
CPUARMState *env = &cpu->env;
@@ -151,11 +472,79 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
cpu->kvm_target = arm_host_cpu_features.target;
cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
+ cpu->isar = arm_host_cpu_features.isar;
env->features = arm_host_cpu_features.features;
}
+static bool kvm_no_adjvtime_get(Object *obj, Error **errp)
+{
+ return !ARM_CPU(obj)->kvm_adjvtime;
+}
+
+static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp)
+{
+ ARM_CPU(obj)->kvm_adjvtime = !value;
+}
+
+static bool kvm_steal_time_get(Object *obj, Error **errp)
+{
+ return ARM_CPU(obj)->kvm_steal_time != ON_OFF_AUTO_OFF;
+}
+
+static void kvm_steal_time_set(Object *obj, bool value, Error **errp)
+{
+ ARM_CPU(obj)->kvm_steal_time = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+}
+
+/* KVM VCPU properties should be prefixed with "kvm-". */
+void kvm_arm_add_vcpu_properties(ARMCPU *cpu)
+{
+ CPUARMState *env = &cpu->env;
+ Object *obj = OBJECT(cpu);
+
+ if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) {
+ cpu->kvm_adjvtime = true;
+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get,
+ kvm_no_adjvtime_set);
+ object_property_set_description(obj, "kvm-no-adjvtime",
+ "Set on to disable the adjustment of "
+ "the virtual counter. VM stopped time "
+ "will be counted.");
+ }
+
+ cpu->kvm_steal_time = ON_OFF_AUTO_AUTO;
+ object_property_add_bool(obj, "kvm-steal-time", kvm_steal_time_get,
+ kvm_steal_time_set);
+ object_property_set_description(obj, "kvm-steal-time",
+ "Set off to disable KVM steal time.");
+}
+
+bool kvm_arm_pmu_supported(void)
+{
+ return kvm_check_extension(kvm_state, KVM_CAP_ARM_PMU_V3);
+}
+
+int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa)
+{
+ KVMState *s = KVM_STATE(ms->accelerator);
+ int ret;
+
+ ret = kvm_check_extension(s, KVM_CAP_ARM_VM_IPA_SIZE);
+ *fixed_ipa = ret <= 0;
+
+ return ret > 0 ? ret : 40;
+}
+
+int kvm_arch_get_default_type(MachineState *ms)
+{
+ bool fixed_ipa;
+ int size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa);
+ return fixed_ipa ? 0 : size;
+}
+
int kvm_arch_init(MachineState *ms, KVMState *s)
{
+ int ret = 0;
/* For ARM interrupt delivery is always asynchronous,
* whether we are using an in-kernel VGIC or not.
*/
@@ -169,7 +558,56 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
- return 0;
+ /* Check whether user space can specify guest syndrome value */
+ cap_has_inject_serror_esr =
+ kvm_check_extension(s, KVM_CAP_ARM_INJECT_SERROR_ESR);
+
+ if (ms->smp.cpus > 256 &&
+ !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) {
+ error_report("Using more than 256 vcpus requires a host kernel "
+ "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2");
+ ret = -EINVAL;
+ }
+
+ if (kvm_check_extension(s, KVM_CAP_ARM_NISV_TO_USER)) {
+ if (kvm_vm_enable_cap(s, KVM_CAP_ARM_NISV_TO_USER, 0)) {
+ error_report("Failed to enable KVM_CAP_ARM_NISV_TO_USER cap");
+ } else {
+ /* Set status for supporting the external dabt injection */
+ cap_has_inject_ext_dabt = kvm_check_extension(s,
+ KVM_CAP_ARM_INJECT_EXT_DABT);
+ }
+ }
+
+ if (s->kvm_eager_split_size) {
+ uint32_t sizes;
+
+ sizes = kvm_vm_check_extension(s, KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES);
+ if (!sizes) {
+ s->kvm_eager_split_size = 0;
+ warn_report("Eager Page Split support not available");
+ } else if (!(s->kvm_eager_split_size & sizes)) {
+ error_report("Eager Page Split requested chunk size not valid");
+ ret = -EINVAL;
+ } else {
+ ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0,
+ s->kvm_eager_split_size);
+ if (ret < 0) {
+ error_report("Enabling of Eager Page Split failed: %s",
+ strerror(-ret));
+ }
+ }
+ }
+
+ max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS);
+ hw_watchpoints = g_array_sized_new(true, true,
+ sizeof(HWWatchpoint), max_hw_wps);
+
+ max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS);
+ hw_breakpoints = g_array_sized_new(true, true,
+ sizeof(HWBreakpoint), max_hw_bps);
+
+ return ret;
}
unsigned long kvm_arch_vcpu_id(CPUState *cpu)
@@ -198,7 +636,7 @@ typedef struct KVMDevice {
int dev_fd;
} KVMDevice;
-static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
+static QSLIST_HEAD(, KVMDevice) kvm_devices_head;
static void kvm_arm_devlistener_add(MemoryListener *listener,
MemoryRegionSection *section)
@@ -225,8 +663,10 @@ static void kvm_arm_devlistener_del(MemoryListener *listener,
}
static MemoryListener devlistener = {
+ .name = "kvm-arm",
.region_add = kvm_arm_devlistener_add,
.region_del = kvm_arm_devlistener_del,
+ .priority = MEMORY_LISTENER_PRIORITY_MIN,
};
static void kvm_arm_set_device_addr(KVMDevice *kd)
@@ -310,11 +750,52 @@ static int compare_u64(const void *a, const void *b)
return 0;
}
-/* Initialize the ARMCPU cpreg list according to the kernel's
+/*
+ * cpreg_values are sorted in ascending order by KVM register ID
+ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find
+ * the storage for a KVM register by ID with a binary search.
+ */
+static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx)
+{
+ uint64_t *res;
+
+ res = bsearch(&regidx, cpu->cpreg_indexes, cpu->cpreg_array_len,
+ sizeof(uint64_t), compare_u64);
+ assert(res);
+
+ return &cpu->cpreg_values[res - cpu->cpreg_indexes];
+}
+
+/**
+ * kvm_arm_reg_syncs_via_cpreg_list:
+ * @regidx: KVM register index
+ *
+ * Return true if this KVM register should be synchronized via the
+ * cpreg list of arbitrary system registers, false if it is synchronized
+ * by hand using code in kvm_arch_get/put_registers().
+ */
+static bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
+{
+ switch (regidx & KVM_REG_ARM_COPROC_MASK) {
+ case KVM_REG_ARM_CORE:
+ case KVM_REG_ARM64_SVE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+/**
+ * kvm_arm_init_cpreg_list:
+ * @cpu: ARMCPU
+ *
+ * Initialize the ARMCPU cpreg list according to the kernel's
* definition of what CPU registers it knows about (and throw away
* the previous TCG-created cpreg list).
+ *
+ * Returns: 0 if success, else < 0 error code
*/
-int kvm_arm_init_cpreg_list(ARMCPU *cpu)
+static int kvm_arm_init_cpreg_list(ARMCPU *cpu)
{
struct kvm_reg_list rl;
struct kvm_reg_list *rlp;
@@ -387,6 +868,28 @@ out:
return ret;
}
+/**
+ * kvm_arm_cpreg_level:
+ * @regidx: KVM register index
+ *
+ * Return the level of this coprocessor/system register. Return value is
+ * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE.
+ */
+static int kvm_arm_cpreg_level(uint64_t regidx)
+{
+ /*
+ * All system registers are assumed to be level KVM_PUT_RUNTIME_STATE.
+ * If a register should be written less often, you must add it here
+ * with a state of either KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE.
+ */
+ switch (regidx) {
+ case KVM_REG_ARM_TIMER_CNT:
+ case KVM_REG_ARM_PTIMER_CNT:
+ return KVM_PUT_FULL_STATE;
+ }
+ return KVM_PUT_RUNTIME_STATE;
+}
+
bool write_kvmstate_to_list(ARMCPU *cpu)
{
CPUState *cs = CPU(cpu);
@@ -394,27 +897,22 @@ bool write_kvmstate_to_list(ARMCPU *cpu)
bool ok = true;
for (i = 0; i < cpu->cpreg_array_len; i++) {
- struct kvm_one_reg r;
uint64_t regidx = cpu->cpreg_indexes[i];
uint32_t v32;
int ret;
- r.id = regidx;
-
switch (regidx & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U32:
- r.addr = (uintptr_t)&v32;
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
+ ret = kvm_get_one_reg(cs, regidx, &v32);
if (!ret) {
cpu->cpreg_values[i] = v32;
}
break;
case KVM_REG_SIZE_U64:
- r.addr = (uintptr_t)(cpu->cpreg_values + i);
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
+ ret = kvm_get_one_reg(cs, regidx, cpu->cpreg_values + i);
break;
default:
- abort();
+ g_assert_not_reached();
}
if (ret) {
ok = false;
@@ -430,7 +928,6 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level)
bool ok = true;
for (i = 0; i < cpu->cpreg_array_len; i++) {
- struct kvm_one_reg r;
uint64_t regidx = cpu->cpreg_indexes[i];
uint32_t v32;
int ret;
@@ -439,19 +936,17 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level)
continue;
}
- r.id = regidx;
switch (regidx & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U32:
v32 = cpu->cpreg_values[i];
- r.addr = (uintptr_t)&v32;
+ ret = kvm_set_one_reg(cs, regidx, &v32);
break;
case KVM_REG_SIZE_U64:
- r.addr = (uintptr_t)(cpu->cpreg_values + i);
+ ret = kvm_set_one_reg(cs, regidx, cpu->cpreg_values + i);
break;
default:
- abort();
+ g_assert_not_reached();
}
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
if (ret) {
/* We might fail for "unknown register" and also for
* "you tried to set a register which is constant with
@@ -463,6 +958,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level)
return ok;
}
+void kvm_arm_cpu_pre_save(ARMCPU *cpu)
+{
+ /* KVM virtual time adjustment */
+ if (cpu->kvm_vtime_dirty) {
+ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime;
+ }
+}
+
+void kvm_arm_cpu_post_load(ARMCPU *cpu)
+{
+ /* KVM virtual time adjustment */
+ if (cpu->kvm_adjvtime) {
+ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT);
+ cpu->kvm_vtime_dirty = true;
+ }
+}
+
void kvm_arm_reset_vcpu(ARMCPU *cpu)
{
int ret;
@@ -470,7 +982,7 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
/* Re-init VCPU so that all registers are set to
* their respective reset values.
*/
- ret = kvm_arm_vcpu_init(CPU(cpu));
+ ret = kvm_arm_vcpu_init(cpu);
if (ret < 0) {
fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
abort();
@@ -479,51 +991,245 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
fprintf(stderr, "write_kvmstate_to_list failed\n");
abort();
}
+ /*
+ * Sync the reset values also into the CPUState. This is necessary
+ * because the next thing we do will be a kvm_arch_put_registers()
+ * which will update the list values from the CPUState before copying
+ * the list values back to KVM. It's OK to ignore failure returns here
+ * for the same reason we do so in kvm_arch_get_registers().
+ */
+ write_list_to_cpustate(cpu);
}
/*
* Update KVM's MP_STATE based on what QEMU thinks it is
*/
-int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
+static int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
{
if (cap_has_mp_state) {
struct kvm_mp_state mp_state = {
.mp_state = (cpu->power_state == PSCI_OFF) ?
KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
};
- int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
- if (ret) {
- fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
- __func__, ret, strerror(-ret));
- return -1;
- }
+ return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
}
-
return 0;
}
/*
* Sync the KVM MP_STATE into QEMU
*/
-int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
+static int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
{
if (cap_has_mp_state) {
struct kvm_mp_state mp_state;
int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
if (ret) {
- fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n",
- __func__, ret, strerror(-ret));
- abort();
+ return ret;
}
cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
PSCI_OFF : PSCI_ON;
}
+ return 0;
+}
+
+/**
+ * kvm_arm_get_virtual_time:
+ * @cpu: ARMCPU
+ *
+ * Gets the VCPU's virtual counter and stores it in the KVM CPU state.
+ */
+static void kvm_arm_get_virtual_time(ARMCPU *cpu)
+{
+ int ret;
+
+ if (cpu->kvm_vtime_dirty) {
+ return;
+ }
+
+ ret = kvm_get_one_reg(CPU(cpu), KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime);
+ if (ret) {
+ error_report("Failed to get KVM_REG_ARM_TIMER_CNT");
+ abort();
+ }
+
+ cpu->kvm_vtime_dirty = true;
+}
+
+/**
+ * kvm_arm_put_virtual_time:
+ * @cpu: ARMCPU
+ *
+ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state.
+ */
+static void kvm_arm_put_virtual_time(ARMCPU *cpu)
+{
+ int ret;
+
+ if (!cpu->kvm_vtime_dirty) {
+ return;
+ }
+
+ ret = kvm_set_one_reg(CPU(cpu), KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime);
+ if (ret) {
+ error_report("Failed to set KVM_REG_ARM_TIMER_CNT");
+ abort();
+ }
+
+ cpu->kvm_vtime_dirty = false;
+}
+
+/**
+ * kvm_put_vcpu_events:
+ * @cpu: ARMCPU
+ *
+ * Put VCPU related state to kvm.
+ *
+ * Returns: 0 if success else < 0 error code
+ */
+static int kvm_put_vcpu_events(ARMCPU *cpu)
+{
+ CPUARMState *env = &cpu->env;
+ struct kvm_vcpu_events events;
+ int ret;
+
+ if (!kvm_has_vcpu_events()) {
+ return 0;
+ }
+
+ memset(&events, 0, sizeof(events));
+ events.exception.serror_pending = env->serror.pending;
+
+ /* Inject SError to guest with specified syndrome if host kernel
+ * supports it, otherwise inject SError without syndrome.
+ */
+ if (cap_has_inject_serror_esr) {
+ events.exception.serror_has_esr = env->serror.has_esr;
+ events.exception.serror_esr = env->serror.esr;
+ }
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
+ if (ret) {
+ error_report("failed to put vcpu events");
+ }
+
+ return ret;
+}
+
+/**
+ * kvm_get_vcpu_events:
+ * @cpu: ARMCPU
+ *
+ * Get VCPU related state from kvm.
+ *
+ * Returns: 0 if success else < 0 error code
+ */
+static int kvm_get_vcpu_events(ARMCPU *cpu)
+{
+ CPUARMState *env = &cpu->env;
+ struct kvm_vcpu_events events;
+ int ret;
+
+ if (!kvm_has_vcpu_events()) {
+ return 0;
+ }
+
+ memset(&events, 0, sizeof(events));
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
+ if (ret) {
+ error_report("failed to get vcpu events");
+ return ret;
+ }
+
+ env->serror.pending = events.exception.serror_pending;
+ env->serror.has_esr = events.exception.serror_has_esr;
+ env->serror.esr = events.exception.serror_esr;
return 0;
}
+#define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0)
+#define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2)
+
+/*
+ * ESR_EL1
+ * ISS encoding
+ * AARCH64: DFSC, bits [5:0]
+ * AARCH32:
+ * TTBCR.EAE == 0
+ * FS[4] - DFSR[10]
+ * FS[3:0] - DFSR[3:0]
+ * TTBCR.EAE == 1
+ * FS, bits [5:0]
+ */
+#define ESR_DFSC(aarch64, lpae, v) \
+ ((aarch64 || (lpae)) ? ((v) & 0x3F) \
+ : (((v) >> 6) | ((v) & 0x1F)))
+
+#define ESR_DFSC_EXTABT(aarch64, lpae) \
+ ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8)
+
+/**
+ * kvm_arm_verify_ext_dabt_pending:
+ * @cpu: ARMCPU
+ *
+ * Verify the fault status code wrt the Ext DABT injection
+ *
+ * Returns: true if the fault status code is as expected, false otherwise
+ */
+static bool kvm_arm_verify_ext_dabt_pending(ARMCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ uint64_t dfsr_val;
+
+ if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) {
+ CPUARMState *env = &cpu->env;
+ int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64);
+ int lpae = 0;
+
+ if (!aarch64_mode) {
+ uint64_t ttbcr;
+
+ if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) {
+ lpae = arm_feature(env, ARM_FEATURE_LPAE)
+ && (ttbcr & TTBCR_EAE);
+ }
+ }
+ /*
+ * The verification here is based on the DFSC bits
+ * of the ESR_EL1 reg only
+ */
+ return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) ==
+ ESR_DFSC_EXTABT(aarch64_mode, lpae));
+ }
+ return false;
+}
+
void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ if (unlikely(env->ext_dabt_raised)) {
+ /*
+ * Verifying that the ext DABT has been properly injected,
+ * otherwise risking indefinitely re-running the faulting instruction
+ * Covering a very narrow case for kernels 5.5..5.5.4
+ * when injected abort was misconfigured to be
+ * an IMPLEMENTATION DEFINED exception (for 32-bit EL1)
+ */
+ if (!arm_feature(env, ARM_FEATURE_AARCH64) &&
+ unlikely(!kvm_arm_verify_ext_dabt_pending(cpu))) {
+
+ error_report("Data abort exception with no valid ISS generated by "
+ "guest memory access. KVM unable to emulate faulting "
+ "instruction. Failed to inject an external data abort "
+ "into the guest.");
+ abort();
+ }
+ /* Clear the status */
+ env->ext_dabt_raised = 0;
+ }
}
MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
@@ -545,7 +1251,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
if (run->s.regs.device_irq_level != cpu->device_irq_level) {
switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level;
- qemu_mutex_lock_iothread();
+ bql_lock();
if (switched_level & KVM_ARM_DEV_EL1_VTIMER) {
qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT],
@@ -574,23 +1280,160 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
/* We also mark unknown levels as processed to not waste cycles */
cpu->device_irq_level = run->s.regs.device_irq_level;
- qemu_mutex_unlock_iothread();
+ bql_unlock();
}
return MEMTXATTRS_UNSPECIFIED;
}
+static void kvm_arm_vm_state_change(void *opaque, bool running, RunState state)
+{
+ ARMCPU *cpu = opaque;
+
+ if (running) {
+ if (cpu->kvm_adjvtime) {
+ kvm_arm_put_virtual_time(cpu);
+ }
+ } else {
+ if (cpu->kvm_adjvtime) {
+ kvm_arm_get_virtual_time(cpu);
+ }
+ }
+}
+
+/**
+ * kvm_arm_handle_dabt_nisv:
+ * @cpu: ARMCPU
+ * @esr_iss: ISS encoding (limited) for the exception from Data Abort
+ * ISV bit set to '0b0' -> no valid instruction syndrome
+ * @fault_ipa: faulting address for the synchronous data abort
+ *
+ * Returns: 0 if the exception has been handled, < 0 otherwise
+ */
+static int kvm_arm_handle_dabt_nisv(ARMCPU *cpu, uint64_t esr_iss,
+ uint64_t fault_ipa)
+{
+ CPUARMState *env = &cpu->env;
+ /*
+ * Request KVM to inject the external data abort into the guest
+ */
+ if (cap_has_inject_ext_dabt) {
+ struct kvm_vcpu_events events = { };
+ /*
+ * The external data abort event will be handled immediately by KVM
+ * using the address fault that triggered the exit on given VCPU.
+ * Requesting injection of the external data abort does not rely
+ * on any other VCPU state. Therefore, in this particular case, the VCPU
+ * synchronization can be exceptionally skipped.
+ */
+ events.exception.ext_dabt_pending = 1;
+ /* KVM_CAP_ARM_INJECT_EXT_DABT implies KVM_CAP_VCPU_EVENTS */
+ if (!kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events)) {
+ env->ext_dabt_raised = 1;
+ return 0;
+ }
+ } else {
+ error_report("Data abort exception triggered by guest memory access "
+ "at physical address: 0x" TARGET_FMT_lx,
+ (target_ulong)fault_ipa);
+ error_printf("KVM unable to emulate faulting instruction.\n");
+ }
+ return -1;
+}
+
+/**
+ * kvm_arm_handle_debug:
+ * @cpu: ARMCPU
+ * @debug_exit: debug part of the KVM exit structure
+ *
+ * Returns: TRUE if the debug exception was handled.
+ *
+ * See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register
+ *
+ * To minimise translating between kernel and user-space the kernel
+ * ABI just provides user-space with the full exception syndrome
+ * register value to be decoded in QEMU.
+ */
+static bool kvm_arm_handle_debug(ARMCPU *cpu,
+ struct kvm_debug_exit_arch *debug_exit)
+{
+ int hsr_ec = syn_get_ec(debug_exit->hsr);
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+
+ /* Ensure PC is synchronised */
+ kvm_cpu_synchronize_state(cs);
+
+ switch (hsr_ec) {
+ case EC_SOFTWARESTEP:
+ if (cs->singlestep_enabled) {
+ return true;
+ } else {
+ /*
+ * The kernel should have suppressed the guest's ability to
+ * single step at this point so something has gone wrong.
+ */
+ error_report("%s: guest single-step while debugging unsupported"
+ " (%"PRIx64", %"PRIx32")",
+ __func__, env->pc, debug_exit->hsr);
+ return false;
+ }
+ break;
+ case EC_AA64_BKPT:
+ if (kvm_find_sw_breakpoint(cs, env->pc)) {
+ return true;
+ }
+ break;
+ case EC_BREAKPOINT:
+ if (find_hw_breakpoint(cs, env->pc)) {
+ return true;
+ }
+ break;
+ case EC_WATCHPOINT:
+ {
+ CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far);
+ if (wp) {
+ cs->watchpoint_hit = wp;
+ return true;
+ }
+ break;
+ }
+ default:
+ error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")",
+ __func__, debug_exit->hsr, env->pc);
+ }
+
+ /* If we are not handling the debug exception it must belong to
+ * the guest. Let's re-use the existing TCG interrupt code to set
+ * everything up properly.
+ */
+ cs->exception_index = EXCP_BKPT;
+ env->exception.syndrome = debug_exit->hsr;
+ env->exception.vaddress = debug_exit->far;
+ env->exception.target_el = 1;
+ bql_lock();
+ arm_cpu_do_interrupt(cs);
+ bql_unlock();
+
+ return false;
+}
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
{
+ ARMCPU *cpu = ARM_CPU(cs);
int ret = 0;
switch (run->exit_reason) {
case KVM_EXIT_DEBUG:
- if (kvm_arm_handle_debug(cs, &run->debug.arch)) {
+ if (kvm_arm_handle_debug(cpu, &run->debug.arch)) {
ret = EXCP_DEBUG;
} /* otherwise return to guest */
break;
+ case KVM_EXIT_ARM_NISV:
+ /* External DABT with no valid iss to decode */
+ ret = kvm_arm_handle_dabt_nisv(cpu, run->arm_nisv.esr_iss,
+ run->arm_nisv.fault_ipa);
+ break;
default:
qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
__func__, run->exit_reason);
@@ -609,33 +1452,61 @@ int kvm_arch_process_async_events(CPUState *cs)
return 0;
}
-/* The #ifdef protections are until 32bit headers are imported and can
- * be removed once both 32 and 64 bit reach feature parity.
+/**
+ * kvm_arm_hw_debug_active:
+ * @cpu: ARMCPU
+ *
+ * Return: TRUE if any hardware breakpoints in use.
*/
+static bool kvm_arm_hw_debug_active(ARMCPU *cpu)
+{
+ return ((cur_hw_wps > 0) || (cur_hw_bps > 0));
+}
+
+/**
+ * kvm_arm_copy_hw_debug_data:
+ * @ptr: kvm_guest_debug_arch structure
+ *
+ * Copy the architecture specific debug registers into the
+ * kvm_guest_debug ioctl structure.
+ */
+static void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr)
+{
+ int i;
+ memset(ptr, 0, sizeof(struct kvm_guest_debug_arch));
+
+ for (i = 0; i < max_hw_wps; i++) {
+ HWWatchpoint *wp = get_hw_wp(i);
+ ptr->dbg_wcr[i] = wp->wcr;
+ ptr->dbg_wvr[i] = wp->wvr;
+ }
+ for (i = 0; i < max_hw_bps; i++) {
+ HWBreakpoint *bp = get_hw_bp(i);
+ ptr->dbg_bcr[i] = bp->bcr;
+ ptr->dbg_bvr[i] = bp->bvr;
+ }
+}
+
void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
{
-#ifdef KVM_GUESTDBG_USE_SW_BP
if (kvm_sw_breakpoints_active(cs)) {
dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
}
-#endif
-#ifdef KVM_GUESTDBG_USE_HW
- if (kvm_arm_hw_debug_active(cs)) {
+ if (kvm_arm_hw_debug_active(ARM_CPU(cs))) {
dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
kvm_arm_copy_hw_debug_data(&dbg->arch);
}
-#endif
}
void kvm_arch_init_irq_routing(KVMState *s)
{
}
-int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
+int kvm_arch_irqchip_create(KVMState *s)
{
- if (machine_kernel_irqchip_split(ms)) {
- perror("-machine kernel_irqchip=split is not supported on ARM.");
- exit(1);
+ if (kvm_kernel_irqchip_split()) {
+ error_report("-machine kernel_irqchip=split is not supported on ARM.");
+ exit(1);
}
/* If we can create the VGIC using the newer device control API, we
@@ -646,15 +1517,29 @@ int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
int kvm_arm_vgic_probe(void)
{
+ int val = 0;
+
if (kvm_create_device(kvm_state,
KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
- return 3;
- } else if (kvm_create_device(kvm_state,
- KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
- return 2;
- } else {
- return 0;
+ val |= KVM_ARM_VGIC_V3;
}
+ if (kvm_create_device(kvm_state,
+ KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
+ val |= KVM_ARM_VGIC_V2;
+ }
+ return val;
+}
+
+int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level)
+{
+ int kvm_irq = (irqtype << KVM_ARM_IRQ_TYPE_SHIFT) | irq;
+ int cpu_idx1 = cpu % 256;
+ int cpu_idx2 = cpu / 256;
+
+ kvm_irq |= (cpu_idx1 << KVM_ARM_IRQ_VCPU_SHIFT) |
+ (cpu_idx2 << KVM_ARM_IRQ_VCPU2_SHIFT);
+
+ return kvm_set_irq(kvm_state, kvm_irq, !!level);
}
int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
@@ -664,7 +1549,6 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
hwaddr xlat, len, doorbell_gpa;
MemoryRegionSection mrs;
MemoryRegion *mr;
- int ret = 1;
if (as == &address_space_memory) {
return 0;
@@ -672,15 +1556,19 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
/* MSI doorbell address is translated by an IOMMU */
- rcu_read_lock();
+ RCU_READ_LOCK_GUARD();
+
mr = address_space_translate(as, address, &xlat, &len, true,
MEMTXATTRS_UNSPECIFIED);
+
if (!mr) {
- goto unlock;
+ return 1;
}
+
mrs = memory_region_find(mr, xlat, 1);
+
if (!mrs.mr) {
- goto unlock;
+ return 1;
}
doorbell_gpa = mrs.offset_within_address_space;
@@ -691,11 +1579,7 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
trace_kvm_arm_fixup_msi_route(address, doorbell_gpa);
- ret = 0;
-
-unlock:
- rcu_read_unlock();
- return ret;
+ return 0;
}
int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
@@ -713,3 +1597,831 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
{
return (data - 32) & 0xffff;
}
+
+bool kvm_arch_cpu_check_are_resettable(void)
+{
+ return true;
+}
+
+static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ KVMState *s = KVM_STATE(obj);
+ uint64_t value = s->kvm_eager_split_size;
+
+ visit_type_size(v, name, &value, errp);
+}
+
+static void kvm_arch_set_eager_split_size(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ KVMState *s = KVM_STATE(obj);
+ uint64_t value;
+
+ if (s->fd != -1) {
+ error_setg(errp, "Unable to set early-split-size after KVM has been initialized");
+ return;
+ }
+
+ if (!visit_type_size(v, name, &value, errp)) {
+ return;
+ }
+
+ if (value && !is_power_of_2(value)) {
+ error_setg(errp, "early-split-size must be a power of two");
+ return;
+ }
+
+ s->kvm_eager_split_size = value;
+}
+
+void kvm_arch_accel_class_init(ObjectClass *oc)
+{
+ object_class_property_add(oc, "eager-split-size", "size",
+ kvm_arch_get_eager_split_size,
+ kvm_arch_set_eager_split_size, NULL, NULL);
+
+ object_class_property_set_description(oc, "eager-split-size",
+ "Eager Page Split chunk size for hugepages. (default: 0, disabled)");
+}
+
+int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type)
+{
+ switch (type) {
+ case GDB_BREAKPOINT_HW:
+ return insert_hw_breakpoint(addr);
+ break;
+ case GDB_WATCHPOINT_READ:
+ case GDB_WATCHPOINT_WRITE:
+ case GDB_WATCHPOINT_ACCESS:
+ return insert_hw_watchpoint(addr, len, type);
+ default:
+ return -ENOSYS;
+ }
+}
+
+int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type)
+{
+ switch (type) {
+ case GDB_BREAKPOINT_HW:
+ return delete_hw_breakpoint(addr);
+ case GDB_WATCHPOINT_READ:
+ case GDB_WATCHPOINT_WRITE:
+ case GDB_WATCHPOINT_ACCESS:
+ return delete_hw_watchpoint(addr, len, type);
+ default:
+ return -ENOSYS;
+ }
+}
+
+void kvm_arch_remove_all_hw_breakpoints(void)
+{
+ if (cur_hw_wps > 0) {
+ g_array_remove_range(hw_watchpoints, 0, cur_hw_wps);
+ }
+ if (cur_hw_bps > 0) {
+ g_array_remove_range(hw_breakpoints, 0, cur_hw_bps);
+ }
+}
+
+static bool kvm_arm_set_device_attr(ARMCPU *cpu, struct kvm_device_attr *attr,
+ const char *name)
+{
+ int err;
+
+ err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr);
+ if (err != 0) {
+ error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err));
+ return false;
+ }
+
+ err = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEVICE_ATTR, attr);
+ if (err != 0) {
+ error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err));
+ return false;
+ }
+
+ return true;
+}
+
+void kvm_arm_pmu_init(ARMCPU *cpu)
+{
+ struct kvm_device_attr attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .attr = KVM_ARM_VCPU_PMU_V3_INIT,
+ };
+
+ if (!cpu->has_pmu) {
+ return;
+ }
+ if (!kvm_arm_set_device_attr(cpu, &attr, "PMU")) {
+ error_report("failed to init PMU");
+ abort();
+ }
+}
+
+void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq)
+{
+ struct kvm_device_attr attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .addr = (intptr_t)&irq,
+ .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+ };
+
+ if (!cpu->has_pmu) {
+ return;
+ }
+ if (!kvm_arm_set_device_attr(cpu, &attr, "PMU")) {
+ error_report("failed to set irq for PMU");
+ abort();
+ }
+}
+
+void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa)
+{
+ struct kvm_device_attr attr = {
+ .group = KVM_ARM_VCPU_PVTIME_CTRL,
+ .attr = KVM_ARM_VCPU_PVTIME_IPA,
+ .addr = (uint64_t)&ipa,
+ };
+
+ if (cpu->kvm_steal_time == ON_OFF_AUTO_OFF) {
+ return;
+ }
+ if (!kvm_arm_set_device_attr(cpu, &attr, "PVTIME IPA")) {
+ error_report("failed to init PVTIME IPA");
+ abort();
+ }
+}
+
+void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
+{
+ bool has_steal_time = kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME);
+
+ if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) {
+ if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ cpu->kvm_steal_time = ON_OFF_AUTO_OFF;
+ } else {
+ cpu->kvm_steal_time = ON_OFF_AUTO_ON;
+ }
+ } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) {
+ if (!has_steal_time) {
+ error_setg(errp, "'kvm-steal-time' cannot be enabled "
+ "on this host");
+ return;
+ } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ /*
+ * DEN0057A chapter 2 says "This specification only covers
+ * systems in which the Execution state of the hypervisor
+ * as well as EL1 of virtual machines is AArch64.". And,
+ * to ensure that, the smc/hvc calls are only specified as
+ * smc64/hvc64.
+ */
+ error_setg(errp, "'kvm-steal-time' cannot be enabled "
+ "for AArch32 guests");
+ return;
+ }
+ }
+}
+
+bool kvm_arm_aarch32_supported(void)
+{
+ return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT);
+}
+
+bool kvm_arm_sve_supported(void)
+{
+ return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
+}
+
+QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
+
+uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
+{
+ /* Only call this function if kvm_arm_sve_supported() returns true. */
+ static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS];
+ static bool probed;
+ uint32_t vq = 0;
+ int i;
+
+ /*
+ * KVM ensures all host CPUs support the same set of vector lengths.
+ * So we only need to create the scratch VCPUs once and then cache
+ * the results.
+ */
+ if (!probed) {
+ struct kvm_vcpu_init init = {
+ .target = -1,
+ .features[0] = (1 << KVM_ARM_VCPU_SVE),
+ };
+ struct kvm_one_reg reg = {
+ .id = KVM_REG_ARM64_SVE_VLS,
+ .addr = (uint64_t)&vls[0],
+ };
+ int fdarray[3], ret;
+
+ probed = true;
+
+ if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) {
+ error_report("failed to create scratch VCPU with SVE enabled");
+ abort();
+ }
+ ret = ioctl(fdarray[2], KVM_GET_ONE_REG, &reg);
+ kvm_arm_destroy_scratch_host_vcpu(fdarray);
+ if (ret) {
+ error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s",
+ strerror(errno));
+ abort();
+ }
+
+ for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) {
+ if (vls[i]) {
+ vq = 64 - clz64(vls[i]) + i * 64;
+ break;
+ }
+ }
+ if (vq > ARM_MAX_VQ) {
+ warn_report("KVM supports vector lengths larger than "
+ "QEMU can enable");
+ vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ);
+ }
+ }
+
+ return vls[0];
+}
+
+static int kvm_arm_sve_set_vls(ARMCPU *cpu)
+{
+ uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq.map };
+
+ assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX);
+
+ return kvm_set_one_reg(CPU(cpu), KVM_REG_ARM64_SVE_VLS, &vls[0]);
+}
+
+#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5
+
+int kvm_arch_init_vcpu(CPUState *cs)
+{
+ int ret;
+ uint64_t mpidr;
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ uint64_t psciver;
+
+ if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
+ !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) {
+ error_report("KVM is not supported for this guest CPU type");
+ return -EINVAL;
+ }
+
+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cpu);
+
+ /* Determine init features for this CPU */
+ memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
+ if (cs->start_powered_off) {
+ cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
+ }
+ if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
+ cpu->psci_version = QEMU_PSCI_VERSION_0_2;
+ cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
+ }
+ if (!arm_feature(env, ARM_FEATURE_AARCH64)) {
+ cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
+ }
+ if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
+ cpu->has_pmu = false;
+ }
+ if (cpu->has_pmu) {
+ cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
+ } else {
+ env->features &= ~(1ULL << ARM_FEATURE_PMU);
+ }
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ assert(kvm_arm_sve_supported());
+ cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE;
+ }
+ if (cpu_isar_feature(aa64_pauth, cpu)) {
+ cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS |
+ 1 << KVM_ARM_VCPU_PTRAUTH_GENERIC);
+ }
+
+ /* Do KVM_ARM_VCPU_INIT ioctl */
+ ret = kvm_arm_vcpu_init(cpu);
+ if (ret) {
+ return ret;
+ }
+
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ ret = kvm_arm_sve_set_vls(cpu);
+ if (ret) {
+ return ret;
+ }
+ ret = kvm_arm_vcpu_finalize(cpu, KVM_ARM_VCPU_SVE);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ /*
+ * KVM reports the exact PSCI version it is implementing via a
+ * special sysreg. If it is present, use its contents to determine
+ * what to report to the guest in the dtb (it is the PSCI version,
+ * in the same 15-bits major 16-bits minor format that PSCI_VERSION
+ * returns).
+ */
+ if (!kvm_get_one_reg(cs, KVM_REG_ARM_PSCI_VERSION, &psciver)) {
+ cpu->psci_version = psciver;
+ }
+
+ /*
+ * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
+ * Currently KVM has its own idea about MPIDR assignment, so we
+ * override our defaults with what we get from KVM.
+ */
+ ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr);
+ if (ret) {
+ return ret;
+ }
+ cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
+
+ return kvm_arm_init_cpreg_list(cpu);
+}
+
+int kvm_arch_destroy_vcpu(CPUState *cs)
+{
+ return 0;
+}
+
+/* Callers must hold the iothread mutex lock */
+static void kvm_inject_arm_sea(CPUState *c)
+{
+ ARMCPU *cpu = ARM_CPU(c);
+ CPUARMState *env = &cpu->env;
+ uint32_t esr;
+ bool same_el;
+
+ c->exception_index = EXCP_DATA_ABORT;
+ env->exception.target_el = 1;
+
+ /*
+ * Set the DFSC to synchronous external abort and set FnV to not valid,
+ * this will tell guest the FAR_ELx is UNKNOWN for this abort.
+ */
+ same_el = arm_current_el(env) == env->exception.target_el;
+ esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10);
+
+ env->exception.syndrome = esr;
+
+ arm_cpu_do_interrupt(c);
+}
+
+#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+#define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \
+ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+#define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \
+ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+static int kvm_arch_put_fpsimd(CPUState *cs)
+{
+ CPUARMState *env = &ARM_CPU(cs)->env;
+ int i, ret;
+
+ for (i = 0; i < 32; i++) {
+ uint64_t *q = aa64_vfp_qreg(env, i);
+#if HOST_BIG_ENDIAN
+ uint64_t fp_val[2] = { q[1], q[0] };
+ ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]),
+ fp_val);
+#else
+ ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q);
+#endif
+ if (ret) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
+ * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
+ * code the slice index to zero for now as it's unlikely we'll need more than
+ * one slice for quite some time.
+ */
+static int kvm_arch_put_sve(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ uint64_t tmp[ARM_MAX_VQ * 2];
+ uint64_t *r;
+ int n, ret;
+
+ for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) {
+ r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2);
+ ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) {
+ r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0],
+ DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
+ ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0],
+ DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
+ ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r);
+ if (ret) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int kvm_arch_put_registers(CPUState *cs, int level)
+{
+ uint64_t val;
+ uint32_t fpr;
+ int i, ret;
+ unsigned int el;
+
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ /* If we are in AArch32 mode then we need to copy the AArch32 regs to the
+ * AArch64 registers before pushing them out to 64-bit KVM.
+ */
+ if (!is_a64(env)) {
+ aarch64_sync_32_to_64(env);
+ }
+
+ for (i = 0; i < 31; i++) {
+ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]),
+ &env->xregs[i]);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
+ * QEMU side we keep the current SP in xregs[31] as well.
+ */
+ aarch64_save_sp(env, 1);
+
+ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]);
+ if (ret) {
+ return ret;
+ }
+
+ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]);
+ if (ret) {
+ return ret;
+ }
+
+ /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */
+ if (is_a64(env)) {
+ val = pstate_read(env);
+ } else {
+ val = cpsr_read(env);
+ }
+ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val);
+ if (ret) {
+ return ret;
+ }
+
+ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc);
+ if (ret) {
+ return ret;
+ }
+
+ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]);
+ if (ret) {
+ return ret;
+ }
+
+ /* Saved Program State Registers
+ *
+ * Before we restore from the banked_spsr[] array we need to
+ * ensure that any modifications to env->spsr are correctly
+ * reflected in the banks.
+ */
+ el = arm_current_el(env);
+ if (el > 0 && !is_a64(env)) {
+ i = bank_number(env->uncached_cpsr & CPSR_M);
+ env->banked_spsr[i] = env->spsr;
+ }
+
+ /* KVM 0-4 map to QEMU banks 1-5 */
+ for (i = 0; i < KVM_NR_SPSR; i++) {
+ ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(spsr[i]),
+ &env->banked_spsr[i + 1]);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ ret = kvm_arch_put_sve(cs);
+ } else {
+ ret = kvm_arch_put_fpsimd(cs);
+ }
+ if (ret) {
+ return ret;
+ }
+
+ fpr = vfp_get_fpsr(env);
+ ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr);
+ if (ret) {
+ return ret;
+ }
+
+ fpr = vfp_get_fpcr(env);
+ ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr);
+ if (ret) {
+ return ret;
+ }
+
+ write_cpustate_to_list(cpu, true);
+
+ if (!write_list_to_kvmstate(cpu, level)) {
+ return -EINVAL;
+ }
+
+ /*
+ * Setting VCPU events should be triggered after syncing the registers
+ * to avoid overwriting potential changes made by KVM upon calling
+ * KVM_SET_VCPU_EVENTS ioctl
+ */
+ ret = kvm_put_vcpu_events(cpu);
+ if (ret) {
+ return ret;
+ }
+
+ return kvm_arm_sync_mpstate_to_kvm(cpu);
+}
+
+static int kvm_arch_get_fpsimd(CPUState *cs)
+{
+ CPUARMState *env = &ARM_CPU(cs)->env;
+ int i, ret;
+
+ for (i = 0; i < 32; i++) {
+ uint64_t *q = aa64_vfp_qreg(env, i);
+ ret = kvm_get_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q);
+ if (ret) {
+ return ret;
+ } else {
+#if HOST_BIG_ENDIAN
+ uint64_t t;
+ t = q[0], q[0] = q[1], q[1] = t;
+#endif
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
+ * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
+ * code the slice index to zero for now as it's unlikely we'll need more than
+ * one slice for quite some time.
+ */
+static int kvm_arch_get_sve(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ uint64_t *r;
+ int n, ret;
+
+ for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) {
+ r = &env->vfp.zregs[n].d[0];
+ ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r);
+ if (ret) {
+ return ret;
+ }
+ sve_bswap64(r, r, cpu->sve_max_vq * 2);
+ }
+
+ for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) {
+ r = &env->vfp.pregs[n].p[0];
+ ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r);
+ if (ret) {
+ return ret;
+ }
+ sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
+ }
+
+ r = &env->vfp.pregs[FFR_PRED_NUM].p[0];
+ ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r);
+ if (ret) {
+ return ret;
+ }
+ sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
+
+ return 0;
+}
+
+int kvm_arch_get_registers(CPUState *cs)
+{
+ uint64_t val;
+ unsigned int el;
+ uint32_t fpr;
+ int i, ret;
+
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+
+ for (i = 0; i < 31; i++) {
+ ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]),
+ &env->xregs[i]);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]);
+ if (ret) {
+ return ret;
+ }
+
+ ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]);
+ if (ret) {
+ return ret;
+ }
+
+ ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val);
+ if (ret) {
+ return ret;
+ }
+
+ env->aarch64 = ((val & PSTATE_nRW) == 0);
+ if (is_a64(env)) {
+ pstate_write(env, val);
+ } else {
+ cpsr_write(env, val, 0xffffffff, CPSRWriteRaw);
+ }
+
+ /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
+ * QEMU side we keep the current SP in xregs[31] as well.
+ */
+ aarch64_restore_sp(env, 1);
+
+ ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc);
+ if (ret) {
+ return ret;
+ }
+
+ /* If we are in AArch32 mode then we need to sync the AArch32 regs with the
+ * incoming AArch64 regs received from 64-bit KVM.
+ * We must perform this after all of the registers have been acquired from
+ * the kernel.
+ */
+ if (!is_a64(env)) {
+ aarch64_sync_64_to_32(env);
+ }
+
+ ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]);
+ if (ret) {
+ return ret;
+ }
+
+ /* Fetch the SPSR registers
+ *
+ * KVM SPSRs 0-4 map to QEMU banks 1-5
+ */
+ for (i = 0; i < KVM_NR_SPSR; i++) {
+ ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(spsr[i]),
+ &env->banked_spsr[i + 1]);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ el = arm_current_el(env);
+ if (el > 0 && !is_a64(env)) {
+ i = bank_number(env->uncached_cpsr & CPSR_M);
+ env->spsr = env->banked_spsr[i];
+ }
+
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ ret = kvm_arch_get_sve(cs);
+ } else {
+ ret = kvm_arch_get_fpsimd(cs);
+ }
+ if (ret) {
+ return ret;
+ }
+
+ ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr);
+ if (ret) {
+ return ret;
+ }
+ vfp_set_fpsr(env, fpr);
+
+ ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr);
+ if (ret) {
+ return ret;
+ }
+ vfp_set_fpcr(env, fpr);
+
+ ret = kvm_get_vcpu_events(cpu);
+ if (ret) {
+ return ret;
+ }
+
+ if (!write_kvmstate_to_list(cpu)) {
+ return -EINVAL;
+ }
+ /* Note that it's OK to have registers which aren't in CPUState,
+ * so we can ignore a failure return here.
+ */
+ write_list_to_cpustate(cpu);
+
+ ret = kvm_arm_sync_mpstate_to_qemu(cpu);
+
+ /* TODO: other registers */
+ return ret;
+}
+
+void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
+{
+ ram_addr_t ram_addr;
+ hwaddr paddr;
+
+ assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
+
+ if (acpi_ghes_present() && addr) {
+ ram_addr = qemu_ram_addr_from_host(addr);
+ if (ram_addr != RAM_ADDR_INVALID &&
+ kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+ kvm_hwpoison_page_add(ram_addr);
+ /*
+ * If this is a BUS_MCEERR_AR, we know we have been called
+ * synchronously from the vCPU thread, so we can easily
+ * synchronize the state and inject an error.
+ *
+ * TODO: we currently don't tell the guest at all about
+ * BUS_MCEERR_AO. In that case we might either be being
+ * called synchronously from the vCPU thread, or a bit
+ * later from the main thread, so doing the injection of
+ * the error would be more complicated.
+ */
+ if (code == BUS_MCEERR_AR) {
+ kvm_cpu_synchronize_state(c);
+ if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
+ kvm_inject_arm_sea(c);
+ } else {
+ error_report("failed to record the error");
+ abort();
+ }
+ }
+ return;
+ }
+ if (code == BUS_MCEERR_AO) {
+ error_report("Hardware memory error at addr %p for memory used by "
+ "QEMU itself instead of guest system!", addr);
+ }
+ }
+
+ if (code == BUS_MCEERR_AR) {
+ error_report("Hardware memory error!");
+ exit(1);
+ }
+}
+
+/* C6.6.29 BRK instruction */
+static const uint32_t brk_insn = 0xd4200000;
+
+int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) ||
+ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+ static uint32_t brk;
+
+ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) ||
+ brk != brk_insn ||
+ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) {
+ return -EINVAL;
+ }
+ return 0;
+}
diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c
deleted file mode 100644
index 4e91c11796..0000000000
--- a/target/arm/kvm32.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * ARM implementation of KVM hooks, 32 bit specific code.
- *
- * Copyright Christoffer Dall 2009-2010
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include <sys/ioctl.h>
-
-#include <linux/kvm.h>
-
-#include "qemu-common.h"
-#include "cpu.h"
-#include "qemu/timer.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/kvm.h"
-#include "kvm_arm.h"
-#include "internals.h"
-#include "hw/arm/arm.h"
-#include "qemu/log.h"
-
-static inline void set_feature(uint64_t *features, int feature)
-{
- *features |= 1ULL << feature;
-}
-
-bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
-{
- /* Identify the feature bits corresponding to the host CPU, and
- * fill out the ARMHostCPUClass fields accordingly. To do this
- * we have to create a scratch VM, create a single CPU inside it,
- * and then query that CPU for the relevant ID registers.
- */
- int i, ret, fdarray[3];
- uint32_t midr, id_pfr0, mvfr1;
- uint64_t features = 0;
- /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
- * we know these will only support creating one kind of guest CPU,
- * which is its preferred CPU type.
- */
- static const uint32_t cpus_to_try[] = {
- QEMU_KVM_ARM_TARGET_CORTEX_A15,
- QEMU_KVM_ARM_TARGET_NONE
- };
- struct kvm_vcpu_init init;
- struct kvm_one_reg idregs[] = {
- {
- .id = KVM_REG_ARM | KVM_REG_SIZE_U32
- | ENCODE_CP_REG(15, 0, 0, 0, 0, 0, 0),
- .addr = (uintptr_t)&midr,
- },
- {
- .id = KVM_REG_ARM | KVM_REG_SIZE_U32
- | ENCODE_CP_REG(15, 0, 0, 0, 1, 0, 0),
- .addr = (uintptr_t)&id_pfr0,
- },
- {
- .id = KVM_REG_ARM | KVM_REG_SIZE_U32
- | KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR1,
- .addr = (uintptr_t)&mvfr1,
- },
- };
-
- if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
- return false;
- }
-
- ahcf->target = init.target;
-
- /* This is not strictly blessed by the device tree binding docs yet,
- * but in practice the kernel does not care about this string so
- * there is no point maintaining an KVM_ARM_TARGET_* -> string table.
- */
- ahcf->dtb_compatible = "arm,arm-v7";
-
- for (i = 0; i < ARRAY_SIZE(idregs); i++) {
- ret = ioctl(fdarray[2], KVM_GET_ONE_REG, &idregs[i]);
- if (ret) {
- break;
- }
- }
-
- kvm_arm_destroy_scratch_host_vcpu(fdarray);
-
- if (ret) {
- return false;
- }
-
- /* Now we've retrieved all the register information we can
- * set the feature bits based on the ID register fields.
- * We can assume any KVM supporting CPU is at least a v7
- * with VFPv3, virtualization extensions, and the generic
- * timers; this in turn implies most of the other feature
- * bits, but a few must be tested.
- */
- set_feature(&features, ARM_FEATURE_V7VE);
- set_feature(&features, ARM_FEATURE_VFP3);
- set_feature(&features, ARM_FEATURE_GENERIC_TIMER);
-
- if (extract32(id_pfr0, 12, 4) == 1) {
- set_feature(&features, ARM_FEATURE_THUMB2EE);
- }
- if (extract32(mvfr1, 20, 4) == 1) {
- set_feature(&features, ARM_FEATURE_VFP_FP16);
- }
- if (extract32(mvfr1, 12, 4) == 1) {
- set_feature(&features, ARM_FEATURE_NEON);
- }
- if (extract32(mvfr1, 28, 4) == 1) {
- /* FMAC support implies VFPv4 */
- set_feature(&features, ARM_FEATURE_VFP4);
- }
-
- ahcf->features = features;
-
- return true;
-}
-
-bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
-{
- /* Return true if the regidx is a register we should synchronize
- * via the cpreg_tuples array (ie is not a core reg we sync by
- * hand in kvm_arch_get/put_registers())
- */
- switch (regidx & KVM_REG_ARM_COPROC_MASK) {
- case KVM_REG_ARM_CORE:
- case KVM_REG_ARM_VFP:
- return false;
- default:
- return true;
- }
-}
-
-typedef struct CPRegStateLevel {
- uint64_t regidx;
- int level;
-} CPRegStateLevel;
-
-/* All coprocessor registers not listed in the following table are assumed to
- * be of the level KVM_PUT_RUNTIME_STATE. If a register should be written less
- * often, you must add it to this table with a state of either
- * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE.
- */
-static const CPRegStateLevel non_runtime_cpregs[] = {
- { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE },
-};
-
-int kvm_arm_cpreg_level(uint64_t regidx)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) {
- const CPRegStateLevel *l = &non_runtime_cpregs[i];
- if (l->regidx == regidx) {
- return l->level;
- }
- }
-
- return KVM_PUT_RUNTIME_STATE;
-}
-
-#define ARM_CPU_ID_MPIDR 0, 0, 0, 5
-
-int kvm_arch_init_vcpu(CPUState *cs)
-{
- int ret;
- uint64_t v;
- uint32_t mpidr;
- struct kvm_one_reg r;
- ARMCPU *cpu = ARM_CPU(cs);
-
- if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE) {
- fprintf(stderr, "KVM is not supported for this guest CPU type\n");
- return -EINVAL;
- }
-
- /* Determine init features for this CPU */
- memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
- if (cpu->start_powered_off) {
- cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
- }
- if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
- cpu->psci_version = 2;
- cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
- }
-
- /* Do KVM_ARM_VCPU_INIT ioctl */
- ret = kvm_arm_vcpu_init(cs);
- if (ret) {
- return ret;
- }
-
- /* Query the kernel to make sure it supports 32 VFP
- * registers: QEMU's "cortex-a15" CPU is always a
- * VFP-D32 core. The simplest way to do this is just
- * to attempt to read register d31.
- */
- r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP | 31;
- r.addr = (uintptr_t)(&v);
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
- if (ret == -ENOENT) {
- return -EINVAL;
- }
-
- /*
- * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
- * Currently KVM has its own idea about MPIDR assignment, so we
- * override our defaults with what we get from KVM.
- */
- ret = kvm_get_one_reg(cs, ARM_CP15_REG32(ARM_CPU_ID_MPIDR), &mpidr);
- if (ret) {
- return ret;
- }
- cpu->mp_affinity = mpidr & ARM32_AFFINITY_MASK;
-
- return kvm_arm_init_cpreg_list(cpu);
-}
-
-typedef struct Reg {
- uint64_t id;
- int offset;
-} Reg;
-
-#define COREREG(KERNELNAME, QEMUFIELD) \
- { \
- KVM_REG_ARM | KVM_REG_SIZE_U32 | \
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \
- offsetof(CPUARMState, QEMUFIELD) \
- }
-
-#define VFPSYSREG(R) \
- { \
- KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP | \
- KVM_REG_ARM_VFP_##R, \
- offsetof(CPUARMState, vfp.xregs[ARM_VFP_##R]) \
- }
-
-/* Like COREREG, but handle fields which are in a uint64_t in CPUARMState. */
-#define COREREG64(KERNELNAME, QEMUFIELD) \
- { \
- KVM_REG_ARM | KVM_REG_SIZE_U32 | \
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \
- offsetoflow32(CPUARMState, QEMUFIELD) \
- }
-
-static const Reg regs[] = {
- /* R0_usr .. R14_usr */
- COREREG(usr_regs.uregs[0], regs[0]),
- COREREG(usr_regs.uregs[1], regs[1]),
- COREREG(usr_regs.uregs[2], regs[2]),
- COREREG(usr_regs.uregs[3], regs[3]),
- COREREG(usr_regs.uregs[4], regs[4]),
- COREREG(usr_regs.uregs[5], regs[5]),
- COREREG(usr_regs.uregs[6], regs[6]),
- COREREG(usr_regs.uregs[7], regs[7]),
- COREREG(usr_regs.uregs[8], usr_regs[0]),
- COREREG(usr_regs.uregs[9], usr_regs[1]),
- COREREG(usr_regs.uregs[10], usr_regs[2]),
- COREREG(usr_regs.uregs[11], usr_regs[3]),
- COREREG(usr_regs.uregs[12], usr_regs[4]),
- COREREG(usr_regs.uregs[13], banked_r13[BANK_USRSYS]),
- COREREG(usr_regs.uregs[14], banked_r14[BANK_USRSYS]),
- /* R13, R14, SPSR for SVC, ABT, UND, IRQ banks */
- COREREG(svc_regs[0], banked_r13[BANK_SVC]),
- COREREG(svc_regs[1], banked_r14[BANK_SVC]),
- COREREG64(svc_regs[2], banked_spsr[BANK_SVC]),
- COREREG(abt_regs[0], banked_r13[BANK_ABT]),
- COREREG(abt_regs[1], banked_r14[BANK_ABT]),
- COREREG64(abt_regs[2], banked_spsr[BANK_ABT]),
- COREREG(und_regs[0], banked_r13[BANK_UND]),
- COREREG(und_regs[1], banked_r14[BANK_UND]),
- COREREG64(und_regs[2], banked_spsr[BANK_UND]),
- COREREG(irq_regs[0], banked_r13[BANK_IRQ]),
- COREREG(irq_regs[1], banked_r14[BANK_IRQ]),
- COREREG64(irq_regs[2], banked_spsr[BANK_IRQ]),
- /* R8_fiq .. R14_fiq and SPSR_fiq */
- COREREG(fiq_regs[0], fiq_regs[0]),
- COREREG(fiq_regs[1], fiq_regs[1]),
- COREREG(fiq_regs[2], fiq_regs[2]),
- COREREG(fiq_regs[3], fiq_regs[3]),
- COREREG(fiq_regs[4], fiq_regs[4]),
- COREREG(fiq_regs[5], banked_r13[BANK_FIQ]),
- COREREG(fiq_regs[6], banked_r14[BANK_FIQ]),
- COREREG64(fiq_regs[7], banked_spsr[BANK_FIQ]),
- /* R15 */
- COREREG(usr_regs.uregs[15], regs[15]),
- /* VFP system registers */
- VFPSYSREG(FPSID),
- VFPSYSREG(MVFR1),
- VFPSYSREG(MVFR0),
- VFPSYSREG(FPEXC),
- VFPSYSREG(FPINST),
- VFPSYSREG(FPINST2),
-};
-
-int kvm_arch_put_registers(CPUState *cs, int level)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- struct kvm_one_reg r;
- int mode, bn;
- int ret, i;
- uint32_t cpsr, fpscr;
-
- /* Make sure the banked regs are properly set */
- mode = env->uncached_cpsr & CPSR_M;
- bn = bank_number(mode);
- if (mode == ARM_CPU_MODE_FIQ) {
- memcpy(env->fiq_regs, env->regs + 8, 5 * sizeof(uint32_t));
- } else {
- memcpy(env->usr_regs, env->regs + 8, 5 * sizeof(uint32_t));
- }
- env->banked_r13[bn] = env->regs[13];
- env->banked_r14[bn] = env->regs[14];
- env->banked_spsr[bn] = env->spsr;
-
- /* Now we can safely copy stuff down to the kernel */
- for (i = 0; i < ARRAY_SIZE(regs); i++) {
- r.id = regs[i].id;
- r.addr = (uintptr_t)(env) + regs[i].offset;
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
- if (ret) {
- return ret;
- }
- }
-
- /* Special cases which aren't a single CPUARMState field */
- cpsr = cpsr_read(env);
- r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 |
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr);
- r.addr = (uintptr_t)(&cpsr);
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
- if (ret) {
- return ret;
- }
-
- /* VFP registers */
- r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
- for (i = 0; i < 32; i++) {
- r.addr = (uintptr_t)aa32_vfp_dreg(env, i);
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
- if (ret) {
- return ret;
- }
- r.id++;
- }
-
- r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP |
- KVM_REG_ARM_VFP_FPSCR;
- fpscr = vfp_get_fpscr(env);
- r.addr = (uintptr_t)&fpscr;
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
- if (ret) {
- return ret;
- }
-
- /* Note that we do not call write_cpustate_to_list()
- * here, so we are only writing the tuple list back to
- * KVM. This is safe because nothing can change the
- * CPUARMState cp15 fields (in particular gdb accesses cannot)
- * and so there are no changes to sync. In fact syncing would
- * be wrong at this point: for a constant register where TCG and
- * KVM disagree about its value, the preceding write_list_to_cpustate()
- * would not have had any effect on the CPUARMState value (since the
- * register is read-only), and a write_cpustate_to_list() here would
- * then try to write the TCG value back into KVM -- this would either
- * fail or incorrectly change the value the guest sees.
- *
- * If we ever want to allow the user to modify cp15 registers via
- * the gdb stub, we would need to be more clever here (for instance
- * tracking the set of registers kvm_arch_get_registers() successfully
- * managed to update the CPUARMState with, and only allowing those
- * to be written back up into the kernel).
- */
- if (!write_list_to_kvmstate(cpu, level)) {
- return EINVAL;
- }
-
- kvm_arm_sync_mpstate_to_kvm(cpu);
-
- return ret;
-}
-
-int kvm_arch_get_registers(CPUState *cs)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- struct kvm_one_reg r;
- int mode, bn;
- int ret, i;
- uint32_t cpsr, fpscr;
-
- for (i = 0; i < ARRAY_SIZE(regs); i++) {
- r.id = regs[i].id;
- r.addr = (uintptr_t)(env) + regs[i].offset;
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
- if (ret) {
- return ret;
- }
- }
-
- /* Special cases which aren't a single CPUARMState field */
- r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 |
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(usr_regs.ARM_cpsr);
- r.addr = (uintptr_t)(&cpsr);
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
- if (ret) {
- return ret;
- }
- cpsr_write(env, cpsr, 0xffffffff, CPSRWriteRaw);
-
- /* Make sure the current mode regs are properly set */
- mode = env->uncached_cpsr & CPSR_M;
- bn = bank_number(mode);
- if (mode == ARM_CPU_MODE_FIQ) {
- memcpy(env->regs + 8, env->fiq_regs, 5 * sizeof(uint32_t));
- } else {
- memcpy(env->regs + 8, env->usr_regs, 5 * sizeof(uint32_t));
- }
- env->regs[13] = env->banked_r13[bn];
- env->regs[14] = env->banked_r14[bn];
- env->spsr = env->banked_spsr[bn];
-
- /* VFP registers */
- r.id = KVM_REG_ARM | KVM_REG_SIZE_U64 | KVM_REG_ARM_VFP;
- for (i = 0; i < 32; i++) {
- r.addr = (uintptr_t)aa32_vfp_dreg(env, i);
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
- if (ret) {
- return ret;
- }
- r.id++;
- }
-
- r.id = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_VFP |
- KVM_REG_ARM_VFP_FPSCR;
- r.addr = (uintptr_t)&fpscr;
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
- if (ret) {
- return ret;
- }
- vfp_set_fpscr(env, fpscr);
-
- if (!write_kvmstate_to_list(cpu)) {
- return EINVAL;
- }
- /* Note that it's OK to have registers which aren't in CPUState,
- * so we can ignore a failure return here.
- */
- write_list_to_cpustate(cpu);
-
- kvm_arm_sync_mpstate_to_qemu(cpu);
-
- return 0;
-}
-
-int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
-{
- qemu_log_mask(LOG_UNIMP, "%s: guest debug not yet implemented\n", __func__);
- return -EINVAL;
-}
-
-int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
-{
- qemu_log_mask(LOG_UNIMP, "%s: guest debug not yet implemented\n", __func__);
- return -EINVAL;
-}
-
-bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
-{
- qemu_log_mask(LOG_UNIMP, "%s: guest debug not yet implemented\n", __func__);
- return false;
-}
-
-int kvm_arch_insert_hw_breakpoint(target_ulong addr,
- target_ulong len, int type)
-{
- qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
- return -EINVAL;
-}
-
-int kvm_arch_remove_hw_breakpoint(target_ulong addr,
- target_ulong len, int type)
-{
- qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
- return -EINVAL;
-}
-
-void kvm_arch_remove_all_hw_breakpoints(void)
-{
- qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
-}
-
-void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr)
-{
- qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
-}
-
-bool kvm_arm_hw_debug_active(CPUState *cs)
-{
- return false;
-}
-
-void kvm_arm_pmu_set_irq(CPUState *cs, int irq)
-{
- qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
-}
-
-void kvm_arm_pmu_init(CPUState *cs)
-{
- qemu_log_mask(LOG_UNIMP, "%s: not implemented\n", __func__);
-}
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
deleted file mode 100644
index e0b8246283..0000000000
--- a/target/arm/kvm64.c
+++ /dev/null
@@ -1,980 +0,0 @@
-/*
- * ARM implementation of KVM hooks, 64 bit specific code
- *
- * Copyright Mian-M. Hamayun 2013, Virtual Open Systems
- * Copyright Alex Bennée 2014, Linaro
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include <sys/ioctl.h>
-#include <sys/ptrace.h>
-
-#include <linux/elf.h>
-#include <linux/kvm.h>
-
-#include "qemu-common.h"
-#include "cpu.h"
-#include "qemu/timer.h"
-#include "qemu/error-report.h"
-#include "qemu/host-utils.h"
-#include "exec/gdbstub.h"
-#include "sysemu/sysemu.h"
-#include "sysemu/kvm.h"
-#include "kvm_arm.h"
-#include "internals.h"
-#include "hw/arm/arm.h"
-
-static bool have_guest_debug;
-
-/*
- * Although the ARM implementation of hardware assisted debugging
- * allows for different breakpoints per-core, the current GDB
- * interface treats them as a global pool of registers (which seems to
- * be the case for x86, ppc and s390). As a result we store one copy
- * of registers which is used for all active cores.
- *
- * Write access is serialised by virtue of the GDB protocol which
- * updates things. Read access (i.e. when the values are copied to the
- * vCPU) is also gated by GDB's run control.
- *
- * This is not unreasonable as most of the time debugging kernels you
- * never know which core will eventually execute your function.
- */
-
-typedef struct {
- uint64_t bcr;
- uint64_t bvr;
-} HWBreakpoint;
-
-/* The watchpoint registers can cover more area than the requested
- * watchpoint so we need to store the additional information
- * somewhere. We also need to supply a CPUWatchpoint to the GDB stub
- * when the watchpoint is hit.
- */
-typedef struct {
- uint64_t wcr;
- uint64_t wvr;
- CPUWatchpoint details;
-} HWWatchpoint;
-
-/* Maximum and current break/watch point counts */
-int max_hw_bps, max_hw_wps;
-GArray *hw_breakpoints, *hw_watchpoints;
-
-#define cur_hw_wps (hw_watchpoints->len)
-#define cur_hw_bps (hw_breakpoints->len)
-#define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i))
-#define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i))
-
-/**
- * kvm_arm_init_debug() - check for guest debug capabilities
- * @cs: CPUState
- *
- * kvm_check_extension returns the number of debug registers we have
- * or 0 if we have none.
- *
- */
-static void kvm_arm_init_debug(CPUState *cs)
-{
- have_guest_debug = kvm_check_extension(cs->kvm_state,
- KVM_CAP_SET_GUEST_DEBUG);
-
- max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS);
- hw_watchpoints = g_array_sized_new(true, true,
- sizeof(HWWatchpoint), max_hw_wps);
-
- max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS);
- hw_breakpoints = g_array_sized_new(true, true,
- sizeof(HWBreakpoint), max_hw_bps);
- return;
-}
-
-/**
- * insert_hw_breakpoint()
- * @addr: address of breakpoint
- *
- * See ARM ARM D2.9.1 for details but here we are only going to create
- * simple un-linked breakpoints (i.e. we don't chain breakpoints
- * together to match address and context or vmid). The hardware is
- * capable of fancier matching but that will require exposing that
- * fanciness to GDB's interface
- *
- * D7.3.2 DBGBCR<n>_EL1, Debug Breakpoint Control Registers
- *
- * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0
- * +------+------+-------+-----+----+------+-----+------+-----+---+
- * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E |
- * +------+------+-------+-----+----+------+-----+------+-----+---+
- *
- * BT: Breakpoint type (0 = unlinked address match)
- * LBN: Linked BP number (0 = unused)
- * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12)
- * BAS: Byte Address Select (RES1 for AArch64)
- * E: Enable bit
- */
-static int insert_hw_breakpoint(target_ulong addr)
-{
- HWBreakpoint brk = {
- .bcr = 0x1, /* BCR E=1, enable */
- .bvr = addr
- };
-
- if (cur_hw_bps >= max_hw_bps) {
- return -ENOBUFS;
- }
-
- brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */
- brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */
-
- g_array_append_val(hw_breakpoints, brk);
-
- return 0;
-}
-
-/**
- * delete_hw_breakpoint()
- * @pc: address of breakpoint
- *
- * Delete a breakpoint and shuffle any above down
- */
-
-static int delete_hw_breakpoint(target_ulong pc)
-{
- int i;
- for (i = 0; i < hw_breakpoints->len; i++) {
- HWBreakpoint *brk = get_hw_bp(i);
- if (brk->bvr == pc) {
- g_array_remove_index(hw_breakpoints, i);
- return 0;
- }
- }
- return -ENOENT;
-}
-
-/**
- * insert_hw_watchpoint()
- * @addr: address of watch point
- * @len: size of area
- * @type: type of watch point
- *
- * See ARM ARM D2.10. As with the breakpoints we can do some advanced
- * stuff if we want to. The watch points can be linked with the break
- * points above to make them context aware. However for simplicity
- * currently we only deal with simple read/write watch points.
- *
- * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers
- *
- * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0
- * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+
- * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E |
- * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+
- *
- * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes))
- * WT: 0 - unlinked, 1 - linked (not currently used)
- * LBN: Linked BP number (not currently used)
- * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11)
- * BAS: Byte Address Select
- * LSC: Load/Store control (01: load, 10: store, 11: both)
- * E: Enable
- *
- * The bottom 2 bits of the value register are masked. Therefore to
- * break on any sizes smaller than an unaligned word you need to set
- * MASK=0, BAS=bit per byte in question. For larger regions (^2) you
- * need to ensure you mask the address as required and set BAS=0xff
- */
-
-static int insert_hw_watchpoint(target_ulong addr,
- target_ulong len, int type)
-{
- HWWatchpoint wp = {
- .wcr = 1, /* E=1, enable */
- .wvr = addr & (~0x7ULL),
- .details = { .vaddr = addr, .len = len }
- };
-
- if (cur_hw_wps >= max_hw_wps) {
- return -ENOBUFS;
- }
-
- /*
- * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state,
- * valid whether EL3 is implemented or not
- */
- wp.wcr = deposit32(wp.wcr, 1, 2, 3);
-
- switch (type) {
- case GDB_WATCHPOINT_READ:
- wp.wcr = deposit32(wp.wcr, 3, 2, 1);
- wp.details.flags = BP_MEM_READ;
- break;
- case GDB_WATCHPOINT_WRITE:
- wp.wcr = deposit32(wp.wcr, 3, 2, 2);
- wp.details.flags = BP_MEM_WRITE;
- break;
- case GDB_WATCHPOINT_ACCESS:
- wp.wcr = deposit32(wp.wcr, 3, 2, 3);
- wp.details.flags = BP_MEM_ACCESS;
- break;
- default:
- g_assert_not_reached();
- break;
- }
- if (len <= 8) {
- /* we align the address and set the bits in BAS */
- int off = addr & 0x7;
- int bas = (1 << len) - 1;
-
- wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas);
- } else {
- /* For ranges above 8 bytes we need to be a power of 2 */
- if (is_power_of_2(len)) {
- int bits = ctz64(len);
-
- wp.wvr &= ~((1 << bits) - 1);
- wp.wcr = deposit32(wp.wcr, 24, 4, bits);
- wp.wcr = deposit32(wp.wcr, 5, 8, 0xff);
- } else {
- return -ENOBUFS;
- }
- }
-
- g_array_append_val(hw_watchpoints, wp);
- return 0;
-}
-
-
-static bool check_watchpoint_in_range(int i, target_ulong addr)
-{
- HWWatchpoint *wp = get_hw_wp(i);
- uint64_t addr_top, addr_bottom = wp->wvr;
- int bas = extract32(wp->wcr, 5, 8);
- int mask = extract32(wp->wcr, 24, 4);
-
- if (mask) {
- addr_top = addr_bottom + (1 << mask);
- } else {
- /* BAS must be contiguous but can offset against the base
- * address in DBGWVR */
- addr_bottom = addr_bottom + ctz32(bas);
- addr_top = addr_bottom + clo32(bas);
- }
-
- if (addr >= addr_bottom && addr <= addr_top) {
- return true;
- }
-
- return false;
-}
-
-/**
- * delete_hw_watchpoint()
- * @addr: address of breakpoint
- *
- * Delete a breakpoint and shuffle any above down
- */
-
-static int delete_hw_watchpoint(target_ulong addr,
- target_ulong len, int type)
-{
- int i;
- for (i = 0; i < cur_hw_wps; i++) {
- if (check_watchpoint_in_range(i, addr)) {
- g_array_remove_index(hw_watchpoints, i);
- return 0;
- }
- }
- return -ENOENT;
-}
-
-
-int kvm_arch_insert_hw_breakpoint(target_ulong addr,
- target_ulong len, int type)
-{
- switch (type) {
- case GDB_BREAKPOINT_HW:
- return insert_hw_breakpoint(addr);
- break;
- case GDB_WATCHPOINT_READ:
- case GDB_WATCHPOINT_WRITE:
- case GDB_WATCHPOINT_ACCESS:
- return insert_hw_watchpoint(addr, len, type);
- default:
- return -ENOSYS;
- }
-}
-
-int kvm_arch_remove_hw_breakpoint(target_ulong addr,
- target_ulong len, int type)
-{
- switch (type) {
- case GDB_BREAKPOINT_HW:
- return delete_hw_breakpoint(addr);
- break;
- case GDB_WATCHPOINT_READ:
- case GDB_WATCHPOINT_WRITE:
- case GDB_WATCHPOINT_ACCESS:
- return delete_hw_watchpoint(addr, len, type);
- default:
- return -ENOSYS;
- }
-}
-
-
-void kvm_arch_remove_all_hw_breakpoints(void)
-{
- if (cur_hw_wps > 0) {
- g_array_remove_range(hw_watchpoints, 0, cur_hw_wps);
- }
- if (cur_hw_bps > 0) {
- g_array_remove_range(hw_breakpoints, 0, cur_hw_bps);
- }
-}
-
-void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr)
-{
- int i;
- memset(ptr, 0, sizeof(struct kvm_guest_debug_arch));
-
- for (i = 0; i < max_hw_wps; i++) {
- HWWatchpoint *wp = get_hw_wp(i);
- ptr->dbg_wcr[i] = wp->wcr;
- ptr->dbg_wvr[i] = wp->wvr;
- }
- for (i = 0; i < max_hw_bps; i++) {
- HWBreakpoint *bp = get_hw_bp(i);
- ptr->dbg_bcr[i] = bp->bcr;
- ptr->dbg_bvr[i] = bp->bvr;
- }
-}
-
-bool kvm_arm_hw_debug_active(CPUState *cs)
-{
- return ((cur_hw_wps > 0) || (cur_hw_bps > 0));
-}
-
-static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc)
-{
- int i;
-
- for (i = 0; i < cur_hw_bps; i++) {
- HWBreakpoint *bp = get_hw_bp(i);
- if (bp->bvr == pc) {
- return true;
- }
- }
- return false;
-}
-
-static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr)
-{
- int i;
-
- for (i = 0; i < cur_hw_wps; i++) {
- if (check_watchpoint_in_range(i, addr)) {
- return &get_hw_wp(i)->details;
- }
- }
- return NULL;
-}
-
-static bool kvm_arm_pmu_set_attr(CPUState *cs, struct kvm_device_attr *attr)
-{
- int err;
-
- err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr);
- if (err != 0) {
- error_report("PMU: KVM_HAS_DEVICE_ATTR: %s", strerror(-err));
- return false;
- }
-
- err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr);
- if (err != 0) {
- error_report("PMU: KVM_SET_DEVICE_ATTR: %s", strerror(-err));
- return false;
- }
-
- return true;
-}
-
-void kvm_arm_pmu_init(CPUState *cs)
-{
- struct kvm_device_attr attr = {
- .group = KVM_ARM_VCPU_PMU_V3_CTRL,
- .attr = KVM_ARM_VCPU_PMU_V3_INIT,
- };
-
- if (!ARM_CPU(cs)->has_pmu) {
- return;
- }
- if (!kvm_arm_pmu_set_attr(cs, &attr)) {
- error_report("failed to init PMU");
- abort();
- }
-}
-
-void kvm_arm_pmu_set_irq(CPUState *cs, int irq)
-{
- struct kvm_device_attr attr = {
- .group = KVM_ARM_VCPU_PMU_V3_CTRL,
- .addr = (intptr_t)&irq,
- .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
- };
-
- if (!ARM_CPU(cs)->has_pmu) {
- return;
- }
- if (!kvm_arm_pmu_set_attr(cs, &attr)) {
- error_report("failed to set irq for PMU");
- abort();
- }
-}
-
-static inline void set_feature(uint64_t *features, int feature)
-{
- *features |= 1ULL << feature;
-}
-
-static inline void unset_feature(uint64_t *features, int feature)
-{
- *features &= ~(1ULL << feature);
-}
-
-bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
-{
- /* Identify the feature bits corresponding to the host CPU, and
- * fill out the ARMHostCPUClass fields accordingly. To do this
- * we have to create a scratch VM, create a single CPU inside it,
- * and then query that CPU for the relevant ID registers.
- * For AArch64 we currently don't care about ID registers at
- * all; we just want to know the CPU type.
- */
- int fdarray[3];
- uint64_t features = 0;
- /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
- * we know these will only support creating one kind of guest CPU,
- * which is its preferred CPU type. Fortunately these old kernels
- * support only a very limited number of CPUs.
- */
- static const uint32_t cpus_to_try[] = {
- KVM_ARM_TARGET_AEM_V8,
- KVM_ARM_TARGET_FOUNDATION_V8,
- KVM_ARM_TARGET_CORTEX_A57,
- QEMU_KVM_ARM_TARGET_NONE
- };
- struct kvm_vcpu_init init;
-
- if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
- return false;
- }
-
- ahcf->target = init.target;
- ahcf->dtb_compatible = "arm,arm-v8";
-
- kvm_arm_destroy_scratch_host_vcpu(fdarray);
-
- /* We can assume any KVM supporting CPU is at least a v8
- * with VFPv4+Neon; this in turn implies most of the other
- * feature bits.
- */
- set_feature(&features, ARM_FEATURE_V8);
- set_feature(&features, ARM_FEATURE_VFP4);
- set_feature(&features, ARM_FEATURE_NEON);
- set_feature(&features, ARM_FEATURE_AARCH64);
- set_feature(&features, ARM_FEATURE_PMU);
-
- ahcf->features = features;
-
- return true;
-}
-
-#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5
-
-int kvm_arch_init_vcpu(CPUState *cs)
-{
- int ret;
- uint64_t mpidr;
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
-
- if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
- !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) {
- fprintf(stderr, "KVM is not supported for this guest CPU type\n");
- return -EINVAL;
- }
-
- /* Determine init features for this CPU */
- memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
- if (cpu->start_powered_off) {
- cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
- }
- if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
- cpu->psci_version = 2;
- cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
- }
- if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
- cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
- }
- if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
- cpu->has_pmu = false;
- }
- if (cpu->has_pmu) {
- cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
- } else {
- unset_feature(&env->features, ARM_FEATURE_PMU);
- }
-
- /* Do KVM_ARM_VCPU_INIT ioctl */
- ret = kvm_arm_vcpu_init(cs);
- if (ret) {
- return ret;
- }
-
- /*
- * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
- * Currently KVM has its own idea about MPIDR assignment, so we
- * override our defaults with what we get from KVM.
- */
- ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr);
- if (ret) {
- return ret;
- }
- cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
-
- kvm_arm_init_debug(cs);
-
- return kvm_arm_init_cpreg_list(cpu);
-}
-
-bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
-{
- /* Return true if the regidx is a register we should synchronize
- * via the cpreg_tuples array (ie is not a core reg we sync by
- * hand in kvm_arch_get/put_registers())
- */
- switch (regidx & KVM_REG_ARM_COPROC_MASK) {
- case KVM_REG_ARM_CORE:
- return false;
- default:
- return true;
- }
-}
-
-typedef struct CPRegStateLevel {
- uint64_t regidx;
- int level;
-} CPRegStateLevel;
-
-/* All system registers not listed in the following table are assumed to be
- * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less
- * often, you must add it to this table with a state of either
- * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE.
- */
-static const CPRegStateLevel non_runtime_cpregs[] = {
- { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE },
-};
-
-int kvm_arm_cpreg_level(uint64_t regidx)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) {
- const CPRegStateLevel *l = &non_runtime_cpregs[i];
- if (l->regidx == regidx) {
- return l->level;
- }
- }
-
- return KVM_PUT_RUNTIME_STATE;
-}
-
-#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-
-#define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-
-#define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \
- KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-
-int kvm_arch_put_registers(CPUState *cs, int level)
-{
- struct kvm_one_reg reg;
- uint32_t fpr;
- uint64_t val;
- int i;
- int ret;
- unsigned int el;
-
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
-
- /* If we are in AArch32 mode then we need to copy the AArch32 regs to the
- * AArch64 registers before pushing them out to 64-bit KVM.
- */
- if (!is_a64(env)) {
- aarch64_sync_32_to_64(env);
- }
-
- for (i = 0; i < 31; i++) {
- reg.id = AARCH64_CORE_REG(regs.regs[i]);
- reg.addr = (uintptr_t) &env->xregs[i];
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
- }
-
- /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
- * QEMU side we keep the current SP in xregs[31] as well.
- */
- aarch64_save_sp(env, 1);
-
- reg.id = AARCH64_CORE_REG(regs.sp);
- reg.addr = (uintptr_t) &env->sp_el[0];
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- reg.id = AARCH64_CORE_REG(sp_el1);
- reg.addr = (uintptr_t) &env->sp_el[1];
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */
- if (is_a64(env)) {
- val = pstate_read(env);
- } else {
- val = cpsr_read(env);
- }
- reg.id = AARCH64_CORE_REG(regs.pstate);
- reg.addr = (uintptr_t) &val;
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- reg.id = AARCH64_CORE_REG(regs.pc);
- reg.addr = (uintptr_t) &env->pc;
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- reg.id = AARCH64_CORE_REG(elr_el1);
- reg.addr = (uintptr_t) &env->elr_el[1];
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- /* Saved Program State Registers
- *
- * Before we restore from the banked_spsr[] array we need to
- * ensure that any modifications to env->spsr are correctly
- * reflected in the banks.
- */
- el = arm_current_el(env);
- if (el > 0 && !is_a64(env)) {
- i = bank_number(env->uncached_cpsr & CPSR_M);
- env->banked_spsr[i] = env->spsr;
- }
-
- /* KVM 0-4 map to QEMU banks 1-5 */
- for (i = 0; i < KVM_NR_SPSR; i++) {
- reg.id = AARCH64_CORE_REG(spsr[i]);
- reg.addr = (uintptr_t) &env->banked_spsr[i + 1];
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
- }
-
- /* Advanced SIMD and FP registers. */
- for (i = 0; i < 32; i++) {
- uint64_t *q = aa64_vfp_qreg(env, i);
-#ifdef HOST_WORDS_BIGENDIAN
- uint64_t fp_val[2] = { q[1], q[0] };
- reg.addr = (uintptr_t)fp_val;
-#else
- reg.addr = (uintptr_t)q;
-#endif
- reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
- }
-
- reg.addr = (uintptr_t)(&fpr);
- fpr = vfp_get_fpsr(env);
- reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- fpr = vfp_get_fpcr(env);
- reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
- ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- if (!write_list_to_kvmstate(cpu, level)) {
- return EINVAL;
- }
-
- kvm_arm_sync_mpstate_to_kvm(cpu);
-
- return ret;
-}
-
-int kvm_arch_get_registers(CPUState *cs)
-{
- struct kvm_one_reg reg;
- uint64_t val;
- uint32_t fpr;
- unsigned int el;
- int i;
- int ret;
-
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
-
- for (i = 0; i < 31; i++) {
- reg.id = AARCH64_CORE_REG(regs.regs[i]);
- reg.addr = (uintptr_t) &env->xregs[i];
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
- }
-
- reg.id = AARCH64_CORE_REG(regs.sp);
- reg.addr = (uintptr_t) &env->sp_el[0];
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- reg.id = AARCH64_CORE_REG(sp_el1);
- reg.addr = (uintptr_t) &env->sp_el[1];
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- reg.id = AARCH64_CORE_REG(regs.pstate);
- reg.addr = (uintptr_t) &val;
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- env->aarch64 = ((val & PSTATE_nRW) == 0);
- if (is_a64(env)) {
- pstate_write(env, val);
- } else {
- cpsr_write(env, val, 0xffffffff, CPSRWriteRaw);
- }
-
- /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
- * QEMU side we keep the current SP in xregs[31] as well.
- */
- aarch64_restore_sp(env, 1);
-
- reg.id = AARCH64_CORE_REG(regs.pc);
- reg.addr = (uintptr_t) &env->pc;
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- /* If we are in AArch32 mode then we need to sync the AArch32 regs with the
- * incoming AArch64 regs received from 64-bit KVM.
- * We must perform this after all of the registers have been acquired from
- * the kernel.
- */
- if (!is_a64(env)) {
- aarch64_sync_64_to_32(env);
- }
-
- reg.id = AARCH64_CORE_REG(elr_el1);
- reg.addr = (uintptr_t) &env->elr_el[1];
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
-
- /* Fetch the SPSR registers
- *
- * KVM SPSRs 0-4 map to QEMU banks 1-5
- */
- for (i = 0; i < KVM_NR_SPSR; i++) {
- reg.id = AARCH64_CORE_REG(spsr[i]);
- reg.addr = (uintptr_t) &env->banked_spsr[i + 1];
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
- }
-
- el = arm_current_el(env);
- if (el > 0 && !is_a64(env)) {
- i = bank_number(env->uncached_cpsr & CPSR_M);
- env->spsr = env->banked_spsr[i];
- }
-
- /* Advanced SIMD and FP registers */
- for (i = 0; i < 32; i++) {
- uint64_t *q = aa64_vfp_qreg(env, i);
- reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
- reg.addr = (uintptr_t)q;
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- } else {
-#ifdef HOST_WORDS_BIGENDIAN
- uint64_t t;
- t = q[0], q[0] = q[1], q[1] = t;
-#endif
- }
- }
-
- reg.addr = (uintptr_t)(&fpr);
- reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
- vfp_set_fpsr(env, fpr);
-
- reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
- ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
- if (ret) {
- return ret;
- }
- vfp_set_fpcr(env, fpr);
-
- if (!write_kvmstate_to_list(cpu)) {
- return EINVAL;
- }
- /* Note that it's OK to have registers which aren't in CPUState,
- * so we can ignore a failure return here.
- */
- write_list_to_cpustate(cpu);
-
- kvm_arm_sync_mpstate_to_qemu(cpu);
-
- /* TODO: other registers */
- return ret;
-}
-
-/* C6.6.29 BRK instruction */
-static const uint32_t brk_insn = 0xd4200000;
-
-int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
-{
- if (have_guest_debug) {
- if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) ||
- cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) {
- return -EINVAL;
- }
- return 0;
- } else {
- error_report("guest debug not supported on this kernel");
- return -EINVAL;
- }
-}
-
-int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
-{
- static uint32_t brk;
-
- if (have_guest_debug) {
- if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) ||
- brk != brk_insn ||
- cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) {
- return -EINVAL;
- }
- return 0;
- } else {
- error_report("guest debug not supported on this kernel");
- return -EINVAL;
- }
-}
-
-/* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register
- *
- * To minimise translating between kernel and user-space the kernel
- * ABI just provides user-space with the full exception syndrome
- * register value to be decoded in QEMU.
- */
-
-bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
-{
- int hsr_ec = debug_exit->hsr >> ARM_EL_EC_SHIFT;
- ARMCPU *cpu = ARM_CPU(cs);
- CPUClass *cc = CPU_GET_CLASS(cs);
- CPUARMState *env = &cpu->env;
-
- /* Ensure PC is synchronised */
- kvm_cpu_synchronize_state(cs);
-
- switch (hsr_ec) {
- case EC_SOFTWARESTEP:
- if (cs->singlestep_enabled) {
- return true;
- } else {
- /*
- * The kernel should have suppressed the guest's ability to
- * single step at this point so something has gone wrong.
- */
- error_report("%s: guest single-step while debugging unsupported"
- " (%"PRIx64", %"PRIx32")",
- __func__, env->pc, debug_exit->hsr);
- return false;
- }
- break;
- case EC_AA64_BKPT:
- if (kvm_find_sw_breakpoint(cs, env->pc)) {
- return true;
- }
- break;
- case EC_BREAKPOINT:
- if (find_hw_breakpoint(cs, env->pc)) {
- return true;
- }
- break;
- case EC_WATCHPOINT:
- {
- CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far);
- if (wp) {
- cs->watchpoint_hit = wp;
- return true;
- }
- break;
- }
- default:
- error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")",
- __func__, debug_exit->hsr, env->pc);
- }
-
- /* If we are not handling the debug exception it must belong to
- * the guest. Let's re-use the existing TCG interrupt code to set
- * everything up properly.
- */
- cs->exception_index = EXCP_BKPT;
- env->exception.syndrome = debug_exit->hsr;
- env->exception.vaddress = debug_exit->far;
- cc->do_interrupt(cs);
-
- return false;
-}
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 5948e8b560..cfaa0d9bc7 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -12,20 +12,9 @@
#define QEMU_KVM_ARM_H
#include "sysemu/kvm.h"
-#include "exec/memory.h"
-#include "qemu/error-report.h"
-/**
- * kvm_arm_vcpu_init:
- * @cs: CPUState
- *
- * Initialize (or reinitialize) the VCPU by invoking the
- * KVM_ARM_VCPU_INIT ioctl with the CPU type and feature
- * bitmask specified in the CPUState.
- *
- * Returns: 0 if success else < 0 error code
- */
-int kvm_arm_vcpu_init(CPUState *cs);
+#define KVM_ARM_VGIC_V2 (1 << 0)
+#define KVM_ARM_VGIC_V3 (1 << 1)
/**
* kvm_arm_register_device:
@@ -49,37 +38,6 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
uint64_t attr, int dev_fd, uint64_t addr_ormask);
/**
- * kvm_arm_init_cpreg_list:
- * @cpu: ARMCPU
- *
- * Initialize the ARMCPU cpreg list according to the kernel's
- * definition of what CPU registers it knows about (and throw away
- * the previous TCG-created cpreg list).
- *
- * Returns: 0 if success, else < 0 error code
- */
-int kvm_arm_init_cpreg_list(ARMCPU *cpu);
-
-/**
- * kvm_arm_reg_syncs_via_cpreg_list
- * regidx: KVM register index
- *
- * Return true if this KVM register should be synchronized via the
- * cpreg list of arbitrary system registers, false if it is synchronized
- * by hand using code in kvm_arch_get/put_registers().
- */
-bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx);
-
-/**
- * kvm_arm_cpreg_level
- * regidx: KVM register index
- *
- * Return the level of this coprocessor/system register. Return value is
- * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE.
- */
-int kvm_arm_cpreg_level(uint64_t regidx);
-
-/**
* write_list_to_kvmstate:
* @cpu: ARMCPU
* @level: the state level to sync
@@ -114,6 +72,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level);
bool write_kvmstate_to_list(ARMCPU *cpu);
/**
+ * kvm_arm_cpu_pre_save:
+ * @cpu: ARMCPU
+ *
+ * Called after write_kvmstate_to_list() from cpu_pre_save() to update
+ * the cpreg list with KVM CPU state.
+ */
+void kvm_arm_cpu_pre_save(ARMCPU *cpu);
+
+/**
+ * kvm_arm_cpu_post_load:
+ * @cpu: ARMCPU
+ *
+ * Called from cpu_post_load() to update KVM CPU state from the cpreg list.
+ */
+void kvm_arm_cpu_post_load(ARMCPU *cpu);
+
+/**
* kvm_arm_reset_vcpu:
* @cpu: ARMCPU
*
@@ -152,26 +127,15 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
*/
void kvm_arm_destroy_scratch_host_vcpu(int *fdarray);
-#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
-
/**
- * ARMHostCPUFeatures: information about the host CPU (identified
- * by asking the host kernel)
- */
-typedef struct ARMHostCPUFeatures {
- uint64_t features;
- uint32_t target;
- const char *dtb_compatible;
-} ARMHostCPUFeatures;
-
-/**
- * kvm_arm_get_host_cpu_features:
- * @ahcc: ARMHostCPUClass to fill in
+ * kvm_arm_sve_get_vls:
+ * @cpu: ARMCPU
*
- * Probe the capabilities of the host kernel's preferred CPU and fill
- * in the ARMHostCPUClass struct accordingly.
+ * Get all the SVE vector lengths supported by the KVM host, setting
+ * the bits corresponding to their length in quadwords minus one
+ * (vq - 1) up to ARM_MAX_VQ. Return the resulting map.
*/
-bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf);
+uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu);
/**
* kvm_arm_set_cpu_features_from_host:
@@ -183,126 +147,142 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf);
void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu);
/**
- * kvm_arm_sync_mpstate_to_kvm
- * @cpu: ARMCPU
+ * kvm_arm_add_vcpu_properties:
+ * @cpu: The CPU object to add the properties to
*
- * If supported set the KVM MP_STATE based on QEMU's model.
+ * Add all KVM specific CPU properties to the CPU object. These
+ * are the CPU properties with "kvm-" prefixed names.
*/
-int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu);
+void kvm_arm_add_vcpu_properties(ARMCPU *cpu);
/**
- * kvm_arm_sync_mpstate_to_qemu
- * @cpu: ARMCPU
+ * kvm_arm_steal_time_finalize:
+ * @cpu: ARMCPU for which to finalize kvm-steal-time
+ * @errp: Pointer to Error* for error propagation
+ *
+ * Validate the kvm-steal-time property selection and set its default
+ * based on KVM support and guest configuration.
+ */
+void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp);
+
+/**
+ * kvm_arm_aarch32_supported:
+ *
+ * Returns: true if KVM can enable AArch32 mode
+ * and false otherwise.
+ */
+bool kvm_arm_aarch32_supported(void);
+
+/**
+ * kvm_arm_pmu_supported:
+ *
+ * Returns: true if KVM can enable the PMU
+ * and false otherwise.
+ */
+bool kvm_arm_pmu_supported(void);
+
+/**
+ * kvm_arm_sve_supported:
+ *
+ * Returns true if KVM can enable SVE and false otherwise.
+ */
+bool kvm_arm_sve_supported(void);
+
+/**
+ * kvm_arm_get_max_vm_ipa_size:
+ * @ms: Machine state handle
+ * @fixed_ipa: True when the IPA limit is fixed at 40. This is the case
+ * for legacy KVM.
*
- * If supported get the MP_STATE from KVM and store in QEMU's model.
+ * Returns the number of bits in the IPA address space supported by KVM
*/
-int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu);
+int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa);
int kvm_arm_vgic_probe(void);
-void kvm_arm_pmu_set_irq(CPUState *cs, int irq);
-void kvm_arm_pmu_init(CPUState *cs);
+void kvm_arm_pmu_init(ARMCPU *cpu);
+void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq);
+
+/**
+ * kvm_arm_pvtime_init:
+ * @cpu: ARMCPU
+ * @ipa: Per-vcpu guest physical base address of the pvtime structures
+ *
+ * Initializes PVTIME for the VCPU, setting the PVTIME IPA to @ipa.
+ */
+void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa);
+
+int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
#else
-static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
+/*
+ * It's safe to call these functions without KVM support.
+ * They should either do nothing or return "not supported".
+ */
+static inline bool kvm_arm_aarch32_supported(void)
{
- /* This should never actually be called in the "not KVM" case,
- * but set up the fields to indicate an error anyway.
- */
- cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
- cpu->host_cpu_probe_failed = true;
+ return false;
}
-static inline int kvm_arm_vgic_probe(void)
+static inline bool kvm_arm_pmu_supported(void)
{
- return 0;
+ return false;
}
-static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {}
-static inline void kvm_arm_pmu_init(CPUState *cs) {}
+static inline bool kvm_arm_sve_supported(void)
+{
+ return false;
+}
-#endif
+/*
+ * These functions should never actually be called without KVM support.
+ */
+static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
-static inline const char *gic_class_name(void)
+static inline void kvm_arm_add_vcpu_properties(ARMCPU *cpu)
{
- return kvm_irqchip_in_kernel() ? "kvm-arm-gic" : "arm_gic";
+ g_assert_not_reached();
}
-/**
- * gicv3_class_name
- *
- * Return name of GICv3 class to use depending on whether KVM acceleration is
- * in use. May throw an error if the chosen implementation is not available.
- *
- * Returns: class name to use
- */
-static inline const char *gicv3_class_name(void)
+static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa)
{
- if (kvm_irqchip_in_kernel()) {
-#ifdef TARGET_AARCH64
- return "kvm-arm-gicv3";
-#else
- error_report("KVM GICv3 acceleration is not supported on this "
- "platform");
- exit(1);
-#endif
- } else {
- if (kvm_enabled()) {
- error_report("Userspace GICv3 is not supported with KVM");
- exit(1);
- }
- return "arm-gicv3";
- }
+ g_assert_not_reached();
}
-/**
- * kvm_arm_handle_debug:
- * @cs: CPUState
- * @debug_exit: debug part of the KVM exit structure
- *
- * Returns: TRUE if the debug exception was handled.
- */
-bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit);
+static inline int kvm_arm_vgic_probe(void)
+{
+ g_assert_not_reached();
+}
-/**
- * kvm_arm_hw_debug_active:
- * @cs: CPU State
- *
- * Return: TRUE if any hardware breakpoints in use.
- */
+static inline void kvm_arm_pmu_set_irq(ARMCPU *cpu, int irq)
+{
+ g_assert_not_reached();
+}
-bool kvm_arm_hw_debug_active(CPUState *cs);
+static inline void kvm_arm_pmu_init(ARMCPU *cpu)
+{
+ g_assert_not_reached();
+}
-/**
- * kvm_arm_copy_hw_debug_data:
- *
- * @ptr: kvm_guest_debug_arch structure
- *
- * Copy the architecture specific debug registers into the
- * kvm_guest_debug ioctl structure.
- */
-struct kvm_guest_debug_arch;
+static inline void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa)
+{
+ g_assert_not_reached();
+}
-void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr);
+static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
+{
+ g_assert_not_reached();
+}
-/**
- * its_class_name
- *
- * Return the ITS class name to use depending on whether KVM acceleration
- * and KVM CAP_SIGNAL_MSI are supported
- *
- * Returns: class name to use or NULL
- */
-static inline const char *its_class_name(void)
+static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
{
- if (kvm_irqchip_in_kernel()) {
- /* KVM implementation requires this capability */
- return kvm_direct_msi_enabled() ? "arm-its-kvm" : NULL;
- } else {
- /* Software emulation is not implemented yet */
- return NULL;
- }
+ g_assert_not_reached();
}
#endif
+
+#endif
diff --git a/target/arm/machine.c b/target/arm/machine.c
index ff4ec22bf7..b2b39b2475 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -1,24 +1,25 @@
#include "qemu/osdep.h"
-#include "qemu-common.h"
#include "cpu.h"
-#include "hw/hw.h"
-#include "hw/boards.h"
#include "qemu/error-report.h"
#include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
#include "kvm_arm.h"
#include "internals.h"
+#include "cpu-features.h"
#include "migration/cpu.h"
+#include "target/arm/gtimer.h"
static bool vfp_needed(void *opaque)
{
ARMCPU *cpu = opaque;
- CPUARMState *env = &cpu->env;
- return arm_feature(env, ARM_FEATURE_VFP);
+ return (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)
+ ? cpu_isar_feature(aa64_fp_simd, cpu)
+ : cpu_isar_feature(aa32_vfp_simd, cpu));
}
static int get_fpscr(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field)
+ const VMStateField *field)
{
ARMCPU *cpu = opaque;
CPUARMState *env = &cpu->env;
@@ -29,7 +30,7 @@ static int get_fpscr(QEMUFile *f, void *opaque, size_t size,
}
static int put_fpscr(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field, QJSON *vmdesc)
+ const VMStateField *field, JSONWriter *vmdesc)
{
ARMCPU *cpu = opaque;
CPUARMState *env = &cpu->env;
@@ -49,7 +50,7 @@ static const VMStateDescription vmstate_vfp = {
.version_id = 3,
.minimum_version_id = 3,
.needed = vfp_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
/* For compatibility, store Qn out of Zn here. */
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2),
VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[1].d, ARMCPU, 0, 2),
@@ -115,7 +116,7 @@ static const VMStateDescription vmstate_iwmmxt = {
.version_id = 1,
.minimum_version_id = 1,
.needed = iwmmxt_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT64_ARRAY(env.iwmmxt.regs, ARMCPU, 16),
VMSTATE_UINT32_ARRAY(env.iwmmxt.cregs, ARMCPU, 16),
VMSTATE_END_OF_LIST()
@@ -131,9 +132,8 @@ static const VMStateDescription vmstate_iwmmxt = {
static bool sve_needed(void *opaque)
{
ARMCPU *cpu = opaque;
- CPUARMState *env = &cpu->env;
- return arm_feature(env, ARM_FEATURE_SVE);
+ return cpu_isar_feature(aa64_sve, cpu);
}
/* The first two words of each Zreg is stored in VFP state. */
@@ -141,7 +141,7 @@ static const VMStateDescription vmstate_zreg_hi_reg = {
.name = "cpu/sve/zreg_hi",
.version_id = 1,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT64_SUB_ARRAY(d, ARMVectorReg, 2, ARM_MAX_VQ - 2),
VMSTATE_END_OF_LIST()
}
@@ -151,7 +151,7 @@ static const VMStateDescription vmstate_preg_reg = {
.name = "cpu/sve/preg",
.version_id = 1,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT64_ARRAY(p, ARMPredicateReg, 2 * ARM_MAX_VQ / 8),
VMSTATE_END_OF_LIST()
}
@@ -162,7 +162,7 @@ static const VMStateDescription vmstate_sve = {
.version_id = 1,
.minimum_version_id = 1,
.needed = sve_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_STRUCT_ARRAY(env.vfp.zregs, ARMCPU, 32, 0,
vmstate_zreg_hi_reg, ARMVectorReg),
VMSTATE_STRUCT_ARRAY(env.vfp.pregs, ARMCPU, 17, 0,
@@ -170,8 +170,78 @@ static const VMStateDescription vmstate_sve = {
VMSTATE_END_OF_LIST()
}
};
+
+static const VMStateDescription vmstate_vreg = {
+ .name = "vreg",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT64_ARRAY(d, ARMVectorReg, ARM_MAX_VQ * 2),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool za_needed(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+
+ /*
+ * When ZA storage is disabled, its contents are discarded.
+ * It will be zeroed when ZA storage is re-enabled.
+ */
+ return FIELD_EX64(cpu->env.svcr, SVCR, ZA);
+}
+
+static const VMStateDescription vmstate_za = {
+ .name = "cpu/sme",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = za_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_STRUCT_ARRAY(env.zarray, ARMCPU, ARM_MAX_VQ * 16, 0,
+ vmstate_vreg, ARMVectorReg),
+ VMSTATE_END_OF_LIST()
+ }
+};
#endif /* AARCH64 */
+static bool serror_needed(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ return env->serror.pending != 0;
+}
+
+static const VMStateDescription vmstate_serror = {
+ .name = "cpu/serror",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = serror_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT8(env.serror.pending, ARMCPU),
+ VMSTATE_UINT8(env.serror.has_esr, ARMCPU),
+ VMSTATE_UINT64(env.serror.esr, ARMCPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool irq_line_state_needed(void *opaque)
+{
+ return true;
+}
+
+static const VMStateDescription vmstate_irq_line_state = {
+ .name = "cpu/irq-line-state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = irq_line_state_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32(env.irq_line_state, ARMCPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static bool m_needed(void *opaque)
{
ARMCPU *cpu = opaque;
@@ -185,7 +255,7 @@ static const VMStateDescription vmstate_m_faultmask_primask = {
.version_id = 1,
.minimum_version_id = 1,
.needed = m_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(env.v7m.faultmask[M_REG_NS], ARMCPU),
VMSTATE_UINT32(env.v7m.primask[M_REG_NS], ARMCPU),
VMSTATE_END_OF_LIST()
@@ -220,7 +290,7 @@ static const VMStateDescription vmstate_m_csselr = {
.version_id = 1,
.minimum_version_id = 1,
.needed = m_csselr_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32_ARRAY(env.v7m.csselr, ARMCPU, M_REG_NUM_BANKS),
VMSTATE_VALIDATE("CSSELR is valid", csselr_vmstate_validate),
VMSTATE_END_OF_LIST()
@@ -232,7 +302,7 @@ static const VMStateDescription vmstate_m_scr = {
.version_id = 1,
.minimum_version_id = 1,
.needed = m_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(env.v7m.scr[M_REG_NS], ARMCPU),
VMSTATE_END_OF_LIST()
}
@@ -243,7 +313,7 @@ static const VMStateDescription vmstate_m_other_sp = {
.version_id = 1,
.minimum_version_id = 1,
.needed = m_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(env.v7m.other_sp, ARMCPU),
VMSTATE_END_OF_LIST()
}
@@ -262,19 +332,53 @@ static const VMStateDescription vmstate_m_v8m = {
.version_id = 1,
.minimum_version_id = 1,
.needed = m_v8m_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32_ARRAY(env.v7m.msplim, ARMCPU, M_REG_NUM_BANKS),
VMSTATE_UINT32_ARRAY(env.v7m.psplim, ARMCPU, M_REG_NUM_BANKS),
VMSTATE_END_OF_LIST()
}
};
+static const VMStateDescription vmstate_m_fp = {
+ .name = "cpu/m/fp",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vfp_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32_ARRAY(env.v7m.fpcar, ARMCPU, M_REG_NUM_BANKS),
+ VMSTATE_UINT32_ARRAY(env.v7m.fpccr, ARMCPU, M_REG_NUM_BANKS),
+ VMSTATE_UINT32_ARRAY(env.v7m.fpdscr, ARMCPU, M_REG_NUM_BANKS),
+ VMSTATE_UINT32_ARRAY(env.v7m.cpacr, ARMCPU, M_REG_NUM_BANKS),
+ VMSTATE_UINT32(env.v7m.nsacr, ARMCPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool mve_needed(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+
+ return cpu_isar_feature(aa32_mve, cpu);
+}
+
+static const VMStateDescription vmstate_m_mve = {
+ .name = "cpu/m/mve",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = mve_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32(env.v7m.vpr, ARMCPU),
+ VMSTATE_UINT32(env.v7m.ltpsize, ARMCPU),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
static const VMStateDescription vmstate_m = {
.name = "cpu/m",
.version_id = 4,
.minimum_version_id = 4,
.needed = m_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(env.v7m.vecbase[M_REG_NS], ARMCPU),
VMSTATE_UINT32(env.v7m.basepri[M_REG_NS], ARMCPU),
VMSTATE_UINT32(env.v7m.control[M_REG_NS], ARMCPU),
@@ -288,12 +392,14 @@ static const VMStateDescription vmstate_m = {
VMSTATE_INT32(env.v7m.exception, ARMCPU),
VMSTATE_END_OF_LIST()
},
- .subsections = (const VMStateDescription*[]) {
+ .subsections = (const VMStateDescription * const []) {
&vmstate_m_faultmask_primask,
&vmstate_m_csselr,
&vmstate_m_scr,
&vmstate_m_other_sp,
&vmstate_m_v8m,
+ &vmstate_m_fp,
+ &vmstate_m_mve,
NULL
}
};
@@ -311,7 +417,7 @@ static const VMStateDescription vmstate_thumb2ee = {
.version_id = 1,
.minimum_version_id = 1,
.needed = thumb2ee_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(env.teecr, ARMCPU),
VMSTATE_UINT32(env.teehbr, ARMCPU),
VMSTATE_END_OF_LIST()
@@ -340,7 +446,7 @@ static const VMStateDescription vmstate_pmsav7 = {
.version_id = 1,
.minimum_version_id = 1,
.needed = pmsav7_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_VARRAY_UINT32(env.pmsav7.drbar, ARMCPU, pmsav7_dregion, 0,
vmstate_info_uint32, uint32_t),
VMSTATE_VARRAY_UINT32(env.pmsav7.drsr, ARMCPU, pmsav7_dregion, 0,
@@ -369,7 +475,7 @@ static const VMStateDescription vmstate_pmsav7_rnr = {
.version_id = 1,
.minimum_version_id = 1,
.needed = pmsav7_rnr_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(env.pmsav7.rnr[M_REG_NS], ARMCPU),
VMSTATE_END_OF_LIST()
}
@@ -384,12 +490,36 @@ static bool pmsav8_needed(void *opaque)
arm_feature(env, ARM_FEATURE_V8);
}
+static bool pmsav8r_needed(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ return arm_feature(env, ARM_FEATURE_PMSA) &&
+ arm_feature(env, ARM_FEATURE_V8) &&
+ !arm_feature(env, ARM_FEATURE_M);
+}
+
+static const VMStateDescription vmstate_pmsav8r = {
+ .name = "cpu/pmsav8/pmsav8r",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = pmsav8r_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_VARRAY_UINT32(env.pmsav8.hprbar, ARMCPU,
+ pmsav8r_hdregion, 0, vmstate_info_uint32, uint32_t),
+ VMSTATE_VARRAY_UINT32(env.pmsav8.hprlar, ARMCPU,
+ pmsav8r_hdregion, 0, vmstate_info_uint32, uint32_t),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
static const VMStateDescription vmstate_pmsav8 = {
.name = "cpu/pmsav8",
.version_id = 1,
.minimum_version_id = 1,
.needed = pmsav8_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_VARRAY_UINT32(env.pmsav8.rbar[M_REG_NS], ARMCPU, pmsav7_dregion,
0, vmstate_info_uint32, uint32_t),
VMSTATE_VARRAY_UINT32(env.pmsav8.rlar[M_REG_NS], ARMCPU, pmsav7_dregion,
@@ -397,6 +527,10 @@ static const VMStateDescription vmstate_pmsav8 = {
VMSTATE_UINT32(env.pmsav8.mair0[M_REG_NS], ARMCPU),
VMSTATE_UINT32(env.pmsav8.mair1[M_REG_NS], ARMCPU),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * const []) {
+ &vmstate_pmsav8r,
+ NULL
}
};
@@ -427,7 +561,7 @@ static const VMStateDescription vmstate_m_security = {
.version_id = 1,
.minimum_version_id = 1,
.needed = m_security_needed,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(env.v7m.secure, ARMCPU),
VMSTATE_UINT32(env.v7m.other_ss_msp, ARMCPU),
VMSTATE_UINT32(env.v7m.other_ss_psp, ARMCPU),
@@ -467,7 +601,7 @@ static const VMStateDescription vmstate_m_security = {
};
static int get_cpsr(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field)
+ const VMStateField *field)
{
ARMCPU *cpu = opaque;
CPUARMState *env = &cpu->env;
@@ -523,7 +657,7 @@ static int get_cpsr(QEMUFile *f, void *opaque, size_t size,
}
static int put_cpsr(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field, QJSON *vmdesc)
+ const VMStateField *field, JSONWriter *vmdesc)
{
ARMCPU *cpu = opaque;
CPUARMState *env = &cpu->env;
@@ -549,7 +683,7 @@ static const VMStateInfo vmstate_cpsr = {
};
static int get_power(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field)
+ const VMStateField *field)
{
ARMCPU *cpu = opaque;
bool powered_off = qemu_get_byte(f);
@@ -558,7 +692,7 @@ static int get_power(QEMUFile *f, void *opaque, size_t size,
}
static int put_power(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field, QJSON *vmdesc)
+ const VMStateField *field, JSONWriter *vmdesc)
{
ARMCPU *cpu = opaque;
@@ -584,15 +718,25 @@ static int cpu_pre_save(void *opaque)
{
ARMCPU *cpu = opaque;
+ if (!kvm_enabled()) {
+ pmu_op_start(&cpu->env);
+ }
+
if (kvm_enabled()) {
if (!write_kvmstate_to_list(cpu)) {
/* This should never fail */
- abort();
+ g_assert_not_reached();
}
+
+ /*
+ * kvm_arm_cpu_pre_save() must be called after
+ * write_kvmstate_to_list()
+ */
+ kvm_arm_cpu_pre_save(cpu);
} else {
- if (!write_cpustate_to_list(cpu)) {
+ if (!write_cpustate_to_list(cpu, false)) {
/* This should never fail. */
- abort();
+ g_assert_not_reached();
}
}
@@ -605,11 +749,59 @@ static int cpu_pre_save(void *opaque)
return 0;
}
+static int cpu_post_save(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+
+ if (!kvm_enabled()) {
+ pmu_op_finish(&cpu->env);
+ }
+
+ return 0;
+}
+
+static int cpu_pre_load(void *opaque)
+{
+ ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
+
+ /*
+ * Pre-initialize irq_line_state to a value that's never valid as
+ * real data, so cpu_post_load() can tell whether we've seen the
+ * irq-line-state subsection in the incoming migration state.
+ */
+ env->irq_line_state = UINT32_MAX;
+
+ if (!kvm_enabled()) {
+ pmu_op_start(env);
+ }
+
+ return 0;
+}
+
static int cpu_post_load(void *opaque, int version_id)
{
ARMCPU *cpu = opaque;
+ CPUARMState *env = &cpu->env;
int i, v;
+ /*
+ * Handle migration compatibility from old QEMU which didn't
+ * send the irq-line-state subsection. A QEMU without it did not
+ * implement the HCR_EL2.{VI,VF} bits as generating interrupts,
+ * so for TCG the line state matches the bits set in cs->interrupt_request.
+ * For KVM the line state is not stored in cs->interrupt_request
+ * and so this will leave irq_line_state as 0, but this is OK because
+ * we only need to care about it for TCG.
+ */
+ if (env->irq_line_state == UINT32_MAX) {
+ CPUState *cs = CPU(cpu);
+
+ env->irq_line_state = cs->interrupt_request &
+ (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ |
+ CPU_INTERRUPT_VIRQ | CPU_INTERRUPT_VFIQ);
+ }
+
/* Update the values list from the incoming migration data.
* Anything in the incoming data which we don't know about is
* a migration failure; anything we know about but the incoming
@@ -643,14 +835,48 @@ static int cpu_post_load(void *opaque, int version_id)
* we're using it.
*/
write_list_to_cpustate(cpu);
+ kvm_arm_cpu_post_load(cpu);
} else {
if (!write_list_to_cpustate(cpu)) {
return -1;
}
}
- hw_breakpoint_update_all(cpu);
- hw_watchpoint_update_all(cpu);
+ /*
+ * Misaligned thumb pc is architecturally impossible. Fail the
+ * incoming migration. For TCG it would trigger the assert in
+ * thumb_tr_translate_insn().
+ */
+ if (!is_a64(env) && env->thumb && (env->regs[15] & 1)) {
+ return -1;
+ }
+
+ if (tcg_enabled()) {
+ hw_breakpoint_update_all(cpu);
+ hw_watchpoint_update_all(cpu);
+ }
+
+ /*
+ * TCG gen_update_fp_context() relies on the invariant that
+ * FPDSCR.LTPSIZE is constant 4 for M-profile with the LOB extension;
+ * forbid bogus incoming data with some other value.
+ */
+ if (arm_feature(env, ARM_FEATURE_M) && cpu_isar_feature(aa32_lob, cpu)) {
+ if (extract32(env->v7m.fpdscr[M_REG_NS],
+ FPCR_LTPSIZE_SHIFT, FPCR_LTPSIZE_LENGTH) != 4 ||
+ extract32(env->v7m.fpdscr[M_REG_S],
+ FPCR_LTPSIZE_SHIFT, FPCR_LTPSIZE_LENGTH) != 4) {
+ return -1;
+ }
+ }
+
+ if (!kvm_enabled()) {
+ pmu_op_finish(env);
+ }
+
+ if (tcg_enabled()) {
+ arm_rebuild_hflags(env);
+ }
return 0;
}
@@ -660,8 +886,10 @@ const VMStateDescription vmstate_arm_cpu = {
.version_id = 22,
.minimum_version_id = 22,
.pre_save = cpu_pre_save,
+ .post_save = cpu_post_save,
+ .pre_load = cpu_pre_load,
.post_load = cpu_post_load,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32_ARRAY(env.regs, ARMCPU, 16),
VMSTATE_UINT64_ARRAY(env.xregs, ARMCPU, 32),
VMSTATE_UINT64(env.pc, ARMCPU),
@@ -694,7 +922,7 @@ const VMStateDescription vmstate_arm_cpu = {
VMSTATE_UINT64(env.exclusive_addr, ARMCPU),
VMSTATE_UINT64(env.exclusive_val, ARMCPU),
VMSTATE_UINT64(env.exclusive_high, ARMCPU),
- VMSTATE_UINT64(env.features, ARMCPU),
+ VMSTATE_UNUSED(sizeof(uint64_t)),
VMSTATE_UINT32(env.exception.syndrome, ARMCPU),
VMSTATE_UINT32(env.exception.fsr, ARMCPU),
VMSTATE_UINT64(env.exception.vaddress, ARMCPU),
@@ -710,7 +938,7 @@ const VMStateDescription vmstate_arm_cpu = {
},
VMSTATE_END_OF_LIST()
},
- .subsections = (const VMStateDescription*[]) {
+ .subsections = (const VMStateDescription * const []) {
&vmstate_vfp,
&vmstate_iwmmxt,
&vmstate_m,
@@ -725,7 +953,10 @@ const VMStateDescription vmstate_arm_cpu = {
&vmstate_m_security,
#ifdef TARGET_AARCH64
&vmstate_sve,
+ &vmstate_za,
#endif
+ &vmstate_serror,
+ &vmstate_irq_line_state,
NULL
}
};
diff --git a/target/arm/meson.build b/target/arm/meson.build
new file mode 100644
index 0000000000..2e10464dbb
--- /dev/null
+++ b/target/arm/meson.build
@@ -0,0 +1,41 @@
+arm_ss = ss.source_set()
+arm_ss.add(files(
+ 'cpu.c',
+ 'debug_helper.c',
+ 'gdbstub.c',
+ 'helper.c',
+ 'vfp_helper.c',
+))
+arm_ss.add(zlib)
+
+arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c'), if_false: files('kvm-stub.c'))
+arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c'))
+
+arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
+ 'cpu64.c',
+ 'gdbstub64.c',
+))
+
+arm_system_ss = ss.source_set()
+arm_system_ss.add(files(
+ 'arch_dump.c',
+ 'arm-powerctl.c',
+ 'arm-qmp-cmds.c',
+ 'cortex-regs.c',
+ 'machine.c',
+ 'ptw.c',
+))
+
+arm_user_ss = ss.source_set()
+
+subdir('hvf')
+
+if 'CONFIG_TCG' in config_all_accel
+ subdir('tcg')
+else
+ arm_ss.add(files('tcg-stubs.c'))
+endif
+
+target_arch += {'arm': arm_ss}
+target_system_arch += {'arm': arm_system_ss}
+target_user_arch += {'arm': arm_user_ss}
diff --git a/target/arm/monitor.c b/target/arm/monitor.c
deleted file mode 100644
index 4cdd2676dd..0000000000
--- a/target/arm/monitor.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * QEMU monitor.c for ARM.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "hw/boards.h"
-#include "kvm_arm.h"
-#include "qapi/qapi-commands-misc.h"
-
-static GICCapability *gic_cap_new(int version)
-{
- GICCapability *cap = g_new0(GICCapability, 1);
- cap->version = version;
- /* by default, support none */
- cap->emulated = false;
- cap->kernel = false;
- return cap;
-}
-
-static GICCapabilityList *gic_cap_list_add(GICCapabilityList *head,
- GICCapability *cap)
-{
- GICCapabilityList *item = g_new0(GICCapabilityList, 1);
- item->value = cap;
- item->next = head;
- return item;
-}
-
-static inline void gic_cap_kvm_probe(GICCapability *v2, GICCapability *v3)
-{
-#ifdef CONFIG_KVM
- int fdarray[3];
-
- if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, NULL)) {
- return;
- }
-
- /* Test KVM GICv2 */
- if (kvm_device_supported(fdarray[1], KVM_DEV_TYPE_ARM_VGIC_V2)) {
- v2->kernel = true;
- }
-
- /* Test KVM GICv3 */
- if (kvm_device_supported(fdarray[1], KVM_DEV_TYPE_ARM_VGIC_V3)) {
- v3->kernel = true;
- }
-
- kvm_arm_destroy_scratch_host_vcpu(fdarray);
-#endif
-}
-
-GICCapabilityList *qmp_query_gic_capabilities(Error **errp)
-{
- GICCapabilityList *head = NULL;
- GICCapability *v2 = gic_cap_new(2), *v3 = gic_cap_new(3);
-
- v2->emulated = true;
- v3->emulated = true;
-
- gic_cap_kvm_probe(v2, v3);
-
- head = gic_cap_list_add(head, v2);
- head = gic_cap_list_add(head, v3);
-
- return head;
-}
diff --git a/target/arm/multiprocessing.h b/target/arm/multiprocessing.h
new file mode 100644
index 0000000000..81715d345c
--- /dev/null
+++ b/target/arm/multiprocessing.h
@@ -0,0 +1,16 @@
+/*
+ * ARM multiprocessor CPU helpers
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef TARGET_ARM_MULTIPROCESSING_H
+#define TARGET_ARM_MULTIPROCESSING_H
+
+#include "target/arm/cpu-qom.h"
+
+uint64_t arm_cpu_mp_affinity(ARMCPU *cpu);
+
+#endif
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
deleted file mode 100644
index d915579712..0000000000
--- a/target/arm/op_helper.c
+++ /dev/null
@@ -1,1482 +0,0 @@
-/*
- * ARM helper routines
- *
- * Copyright (c) 2005-2007 CodeSourcery, LLC
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "qemu/osdep.h"
-#include "qemu/log.h"
-#include "qemu/main-loop.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "internals.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-
-#define SIGNBIT (uint32_t)0x80000000
-#define SIGNBIT64 ((uint64_t)1 << 63)
-
-void raise_exception(CPUARMState *env, uint32_t excp,
- uint32_t syndrome, uint32_t target_el)
-{
- CPUState *cs = CPU(arm_env_get_cpu(env));
-
- if ((env->cp15.hcr_el2 & HCR_TGE) &&
- target_el == 1 && !arm_is_secure(env)) {
- /*
- * Redirect NS EL1 exceptions to NS EL2. These are reported with
- * their original syndrome register value, with the exception of
- * SIMD/FP access traps, which are reported as uncategorized
- * (see DDI0478C.a D1.10.4)
- */
- target_el = 2;
- if (syndrome >> ARM_EL_EC_SHIFT == EC_ADVSIMDFPACCESSTRAP) {
- syndrome = syn_uncategorized();
- }
- }
-
- assert(!excp_is_internal(excp));
- cs->exception_index = excp;
- env->exception.syndrome = syndrome;
- env->exception.target_el = target_el;
- cpu_loop_exit(cs);
-}
-
-static int exception_target_el(CPUARMState *env)
-{
- int target_el = MAX(1, arm_current_el(env));
-
- /* No such thing as secure EL1 if EL3 is aarch32, so update the target EL
- * to EL3 in this case.
- */
- if (arm_is_secure(env) && !arm_el_is_aa64(env, 3) && target_el == 1) {
- target_el = 3;
- }
-
- return target_el;
-}
-
-uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def, void *vn,
- uint32_t maxindex)
-{
- uint32_t val, shift;
- uint64_t *table = vn;
-
- val = 0;
- for (shift = 0; shift < 32; shift += 8) {
- uint32_t index = (ireg >> shift) & 0xff;
- if (index < maxindex) {
- uint32_t tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff;
- val |= tmp << shift;
- } else {
- val |= def & (0xff << shift);
- }
- }
- return val;
-}
-
-#if !defined(CONFIG_USER_ONLY)
-
-static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
- unsigned int target_el,
- bool same_el, bool ea,
- bool s1ptw, bool is_write,
- int fsc)
-{
- uint32_t syn;
-
- /* ISV is only set for data aborts routed to EL2 and
- * never for stage-1 page table walks faulting on stage 2.
- *
- * Furthermore, ISV is only set for certain kinds of load/stores.
- * If the template syndrome does not have ISV set, we should leave
- * it cleared.
- *
- * See ARMv8 specs, D7-1974:
- * ISS encoding for an exception from a Data Abort, the
- * ISV field.
- */
- if (!(template_syn & ARM_EL_ISV) || target_el != 2 || s1ptw) {
- syn = syn_data_abort_no_iss(same_el,
- ea, 0, s1ptw, is_write, fsc);
- } else {
- /* Fields: IL, ISV, SAS, SSE, SRT, SF and AR come from the template
- * syndrome created at translation time.
- * Now we create the runtime syndrome with the remaining fields.
- */
- syn = syn_data_abort_with_iss(same_el,
- 0, 0, 0, 0, 0,
- ea, 0, s1ptw, is_write, fsc,
- false);
- /* Merge the runtime syndrome with the template syndrome. */
- syn |= template_syn;
- }
- return syn;
-}
-
-static void deliver_fault(ARMCPU *cpu, vaddr addr, MMUAccessType access_type,
- int mmu_idx, ARMMMUFaultInfo *fi)
-{
- CPUARMState *env = &cpu->env;
- int target_el;
- bool same_el;
- uint32_t syn, exc, fsr, fsc;
- ARMMMUIdx arm_mmu_idx = core_to_arm_mmu_idx(env, mmu_idx);
-
- target_el = exception_target_el(env);
- if (fi->stage2) {
- target_el = 2;
- env->cp15.hpfar_el2 = extract64(fi->s2addr, 12, 47) << 4;
- }
- same_el = (arm_current_el(env) == target_el);
-
- if (target_el == 2 || arm_el_is_aa64(env, target_el) ||
- arm_s1_regime_using_lpae_format(env, arm_mmu_idx)) {
- /* LPAE format fault status register : bottom 6 bits are
- * status code in the same form as needed for syndrome
- */
- fsr = arm_fi_to_lfsc(fi);
- fsc = extract32(fsr, 0, 6);
- } else {
- fsr = arm_fi_to_sfsc(fi);
- /* Short format FSR : this fault will never actually be reported
- * to an EL that uses a syndrome register. Use a (currently)
- * reserved FSR code in case the constructed syndrome does leak
- * into the guest somehow.
- */
- fsc = 0x3f;
- }
-
- if (access_type == MMU_INST_FETCH) {
- syn = syn_insn_abort(same_el, fi->ea, fi->s1ptw, fsc);
- exc = EXCP_PREFETCH_ABORT;
- } else {
- syn = merge_syn_data_abort(env->exception.syndrome, target_el,
- same_el, fi->ea, fi->s1ptw,
- access_type == MMU_DATA_STORE,
- fsc);
- if (access_type == MMU_DATA_STORE
- && arm_feature(env, ARM_FEATURE_V6)) {
- fsr |= (1 << 11);
- }
- exc = EXCP_DATA_ABORT;
- }
-
- env->exception.vaddress = addr;
- env->exception.fsr = fsr;
- raise_exception(env, exc, syn, target_el);
-}
-
-/* try to fill the TLB and return an exception if error. If retaddr is
- * NULL, it means that the function was called in C code (i.e. not
- * from generated code or from helper.c)
- */
-void tlb_fill(CPUState *cs, target_ulong addr, int size,
- MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
-{
- bool ret;
- ARMMMUFaultInfo fi = {};
-
- ret = arm_tlb_fill(cs, addr, access_type, mmu_idx, &fi);
- if (unlikely(ret)) {
- ARMCPU *cpu = ARM_CPU(cs);
-
- /* now we have a real cpu fault */
- cpu_restore_state(cs, retaddr, true);
-
- deliver_fault(cpu, addr, access_type, mmu_idx, &fi);
- }
-}
-
-/* Raise a data fault alignment exception for the specified virtual address */
-void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
- MMUAccessType access_type,
- int mmu_idx, uintptr_t retaddr)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- ARMMMUFaultInfo fi = {};
-
- /* now we have a real cpu fault */
- cpu_restore_state(cs, retaddr, true);
-
- fi.type = ARMFault_Alignment;
- deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
-}
-
-/* arm_cpu_do_transaction_failed: handle a memory system error response
- * (eg "no device/memory present at address") by raising an external abort
- * exception
- */
-void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
- vaddr addr, unsigned size,
- MMUAccessType access_type,
- int mmu_idx, MemTxAttrs attrs,
- MemTxResult response, uintptr_t retaddr)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- ARMMMUFaultInfo fi = {};
-
- /* now we have a real cpu fault */
- cpu_restore_state(cs, retaddr, true);
-
- fi.ea = arm_extabort_type(response);
- fi.type = ARMFault_SyncExternal;
- deliver_fault(cpu, addr, access_type, mmu_idx, &fi);
-}
-
-#endif /* !defined(CONFIG_USER_ONLY) */
-
-void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue)
-{
- /*
- * Perform the v8M stack limit check for SP updates from translated code,
- * raising an exception if the limit is breached.
- */
- if (newvalue < v7m_sp_limit(env)) {
- CPUState *cs = CPU(arm_env_get_cpu(env));
-
- /*
- * Stack limit exceptions are a rare case, so rather than syncing
- * PC/condbits before the call, we use cpu_restore_state() to
- * get them right before raising the exception.
- */
- cpu_restore_state(cs, GETPC(), true);
- raise_exception(env, EXCP_STKOF, 0, 1);
- }
-}
-
-uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b)
-{
- uint32_t res = a + b;
- if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT))
- env->QF = 1;
- return res;
-}
-
-uint32_t HELPER(add_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
-{
- uint32_t res = a + b;
- if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
- env->QF = 1;
- res = ~(((int32_t)a >> 31) ^ SIGNBIT);
- }
- return res;
-}
-
-uint32_t HELPER(sub_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
-{
- uint32_t res = a - b;
- if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
- env->QF = 1;
- res = ~(((int32_t)a >> 31) ^ SIGNBIT);
- }
- return res;
-}
-
-uint32_t HELPER(double_saturate)(CPUARMState *env, int32_t val)
-{
- uint32_t res;
- if (val >= 0x40000000) {
- res = ~SIGNBIT;
- env->QF = 1;
- } else if (val <= (int32_t)0xc0000000) {
- res = SIGNBIT;
- env->QF = 1;
- } else {
- res = val << 1;
- }
- return res;
-}
-
-uint32_t HELPER(add_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
-{
- uint32_t res = a + b;
- if (res < a) {
- env->QF = 1;
- res = ~0;
- }
- return res;
-}
-
-uint32_t HELPER(sub_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
-{
- uint32_t res = a - b;
- if (res > a) {
- env->QF = 1;
- res = 0;
- }
- return res;
-}
-
-/* Signed saturation. */
-static inline uint32_t do_ssat(CPUARMState *env, int32_t val, int shift)
-{
- int32_t top;
- uint32_t mask;
-
- top = val >> shift;
- mask = (1u << shift) - 1;
- if (top > 0) {
- env->QF = 1;
- return mask;
- } else if (top < -1) {
- env->QF = 1;
- return ~mask;
- }
- return val;
-}
-
-/* Unsigned saturation. */
-static inline uint32_t do_usat(CPUARMState *env, int32_t val, int shift)
-{
- uint32_t max;
-
- max = (1u << shift) - 1;
- if (val < 0) {
- env->QF = 1;
- return 0;
- } else if (val > max) {
- env->QF = 1;
- return max;
- }
- return val;
-}
-
-/* Signed saturate. */
-uint32_t HELPER(ssat)(CPUARMState *env, uint32_t x, uint32_t shift)
-{
- return do_ssat(env, x, shift);
-}
-
-/* Dual halfword signed saturate. */
-uint32_t HELPER(ssat16)(CPUARMState *env, uint32_t x, uint32_t shift)
-{
- uint32_t res;
-
- res = (uint16_t)do_ssat(env, (int16_t)x, shift);
- res |= do_ssat(env, ((int32_t)x) >> 16, shift) << 16;
- return res;
-}
-
-/* Unsigned saturate. */
-uint32_t HELPER(usat)(CPUARMState *env, uint32_t x, uint32_t shift)
-{
- return do_usat(env, x, shift);
-}
-
-/* Dual halfword unsigned saturate. */
-uint32_t HELPER(usat16)(CPUARMState *env, uint32_t x, uint32_t shift)
-{
- uint32_t res;
-
- res = (uint16_t)do_usat(env, (int16_t)x, shift);
- res |= do_usat(env, ((int32_t)x) >> 16, shift) << 16;
- return res;
-}
-
-void HELPER(setend)(CPUARMState *env)
-{
- env->uncached_cpsr ^= CPSR_E;
-}
-
-/* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
- * The function returns the target EL (1-3) if the instruction is to be trapped;
- * otherwise it returns 0 indicating it is not trapped.
- */
-static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
-{
- int cur_el = arm_current_el(env);
- uint64_t mask;
-
- if (arm_feature(env, ARM_FEATURE_M)) {
- /* M profile cores can never trap WFI/WFE. */
- return 0;
- }
-
- /* If we are currently in EL0 then we need to check if SCTLR is set up for
- * WFx instructions being trapped to EL1. These trap bits don't exist in v7.
- */
- if (cur_el < 1 && arm_feature(env, ARM_FEATURE_V8)) {
- int target_el;
-
- mask = is_wfe ? SCTLR_nTWE : SCTLR_nTWI;
- if (arm_is_secure_below_el3(env) && !arm_el_is_aa64(env, 3)) {
- /* Secure EL0 and Secure PL1 is at EL3 */
- target_el = 3;
- } else {
- target_el = 1;
- }
-
- if (!(env->cp15.sctlr_el[target_el] & mask)) {
- return target_el;
- }
- }
-
- /* We are not trapping to EL1; trap to EL2 if HCR_EL2 requires it
- * No need for ARM_FEATURE check as if HCR_EL2 doesn't exist the
- * bits will be zero indicating no trap.
- */
- if (cur_el < 2 && !arm_is_secure(env)) {
- mask = (is_wfe) ? HCR_TWE : HCR_TWI;
- if (env->cp15.hcr_el2 & mask) {
- return 2;
- }
- }
-
- /* We are not trapping to EL1 or EL2; trap to EL3 if SCR_EL3 requires it */
- if (cur_el < 3) {
- mask = (is_wfe) ? SCR_TWE : SCR_TWI;
- if (env->cp15.scr_el3 & mask) {
- return 3;
- }
- }
-
- return 0;
-}
-
-void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
-{
- CPUState *cs = CPU(arm_env_get_cpu(env));
- int target_el = check_wfx_trap(env, false);
-
- if (cpu_has_work(cs)) {
- /* Don't bother to go into our "low power state" if
- * we would just wake up immediately.
- */
- return;
- }
-
- if (target_el) {
- env->pc -= insn_len;
- raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, insn_len == 2),
- target_el);
- }
-
- cs->exception_index = EXCP_HLT;
- cs->halted = 1;
- cpu_loop_exit(cs);
-}
-
-void HELPER(wfe)(CPUARMState *env)
-{
- /* This is a hint instruction that is semantically different
- * from YIELD even though we currently implement it identically.
- * Don't actually halt the CPU, just yield back to top
- * level loop. This is not going into a "low power state"
- * (ie halting until some event occurs), so we never take
- * a configurable trap to a different exception level.
- */
- HELPER(yield)(env);
-}
-
-void HELPER(yield)(CPUARMState *env)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
- CPUState *cs = CPU(cpu);
-
- /* This is a non-trappable hint instruction that generally indicates
- * that the guest is currently busy-looping. Yield control back to the
- * top level loop so that a more deserving VCPU has a chance to run.
- */
- cs->exception_index = EXCP_YIELD;
- cpu_loop_exit(cs);
-}
-
-/* Raise an internal-to-QEMU exception. This is limited to only
- * those EXCP values which are special cases for QEMU to interrupt
- * execution and not to be used for exceptions which are passed to
- * the guest (those must all have syndrome information and thus should
- * use exception_with_syndrome).
- */
-void HELPER(exception_internal)(CPUARMState *env, uint32_t excp)
-{
- CPUState *cs = CPU(arm_env_get_cpu(env));
-
- assert(excp_is_internal(excp));
- cs->exception_index = excp;
- cpu_loop_exit(cs);
-}
-
-/* Raise an exception with the specified syndrome register value */
-void HELPER(exception_with_syndrome)(CPUARMState *env, uint32_t excp,
- uint32_t syndrome, uint32_t target_el)
-{
- raise_exception(env, excp, syndrome, target_el);
-}
-
-/* Raise an EXCP_BKPT with the specified syndrome register value,
- * targeting the correct exception level for debug exceptions.
- */
-void HELPER(exception_bkpt_insn)(CPUARMState *env, uint32_t syndrome)
-{
- /* FSR will only be used if the debug target EL is AArch32. */
- env->exception.fsr = arm_debug_exception_fsr(env);
- /* FAR is UNKNOWN: clear vaddress to avoid potentially exposing
- * values to the guest that it shouldn't be able to see at its
- * exception/security level.
- */
- env->exception.vaddress = 0;
- raise_exception(env, EXCP_BKPT, syndrome, arm_debug_target_el(env));
-}
-
-uint32_t HELPER(cpsr_read)(CPUARMState *env)
-{
- return cpsr_read(env) & ~(CPSR_EXEC | CPSR_RESERVED);
-}
-
-void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
-{
- cpsr_write(env, val, mask, CPSRWriteByInstr);
-}
-
-/* Write the CPSR for a 32-bit exception return */
-void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
-{
- qemu_mutex_lock_iothread();
- arm_call_pre_el_change_hook(arm_env_get_cpu(env));
- qemu_mutex_unlock_iothread();
-
- cpsr_write(env, val, CPSR_ERET_MASK, CPSRWriteExceptionReturn);
-
- /* Generated code has already stored the new PC value, but
- * without masking out its low bits, because which bits need
- * masking depends on whether we're returning to Thumb or ARM
- * state. Do the masking now.
- */
- env->regs[15] &= (env->thumb ? ~1 : ~3);
-
- qemu_mutex_lock_iothread();
- arm_call_el_change_hook(arm_env_get_cpu(env));
- qemu_mutex_unlock_iothread();
-}
-
-/* Access to user mode registers from privileged modes. */
-uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno)
-{
- uint32_t val;
-
- if (regno == 13) {
- val = env->banked_r13[BANK_USRSYS];
- } else if (regno == 14) {
- val = env->banked_r14[BANK_USRSYS];
- } else if (regno >= 8
- && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
- val = env->usr_regs[regno - 8];
- } else {
- val = env->regs[regno];
- }
- return val;
-}
-
-void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
-{
- if (regno == 13) {
- env->banked_r13[BANK_USRSYS] = val;
- } else if (regno == 14) {
- env->banked_r14[BANK_USRSYS] = val;
- } else if (regno >= 8
- && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
- env->usr_regs[regno - 8] = val;
- } else {
- env->regs[regno] = val;
- }
-}
-
-void HELPER(set_r13_banked)(CPUARMState *env, uint32_t mode, uint32_t val)
-{
- if ((env->uncached_cpsr & CPSR_M) == mode) {
- env->regs[13] = val;
- } else {
- env->banked_r13[bank_number(mode)] = val;
- }
-}
-
-uint32_t HELPER(get_r13_banked)(CPUARMState *env, uint32_t mode)
-{
- if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_SYS) {
- /* SRS instruction is UNPREDICTABLE from System mode; we UNDEF.
- * Other UNPREDICTABLE and UNDEF cases were caught at translate time.
- */
- raise_exception(env, EXCP_UDEF, syn_uncategorized(),
- exception_target_el(env));
- }
-
- if ((env->uncached_cpsr & CPSR_M) == mode) {
- return env->regs[13];
- } else {
- return env->banked_r13[bank_number(mode)];
- }
-}
-
-static void msr_mrs_banked_exc_checks(CPUARMState *env, uint32_t tgtmode,
- uint32_t regno)
-{
- /* Raise an exception if the requested access is one of the UNPREDICTABLE
- * cases; otherwise return. This broadly corresponds to the pseudocode
- * BankedRegisterAccessValid() and SPSRAccessValid(),
- * except that we have already handled some cases at translate time.
- */
- int curmode = env->uncached_cpsr & CPSR_M;
-
- if (regno == 17) {
- /* ELR_Hyp: a special case because access from tgtmode is OK */
- if (curmode != ARM_CPU_MODE_HYP && curmode != ARM_CPU_MODE_MON) {
- goto undef;
- }
- return;
- }
-
- if (curmode == tgtmode) {
- goto undef;
- }
-
- if (tgtmode == ARM_CPU_MODE_USR) {
- switch (regno) {
- case 8 ... 12:
- if (curmode != ARM_CPU_MODE_FIQ) {
- goto undef;
- }
- break;
- case 13:
- if (curmode == ARM_CPU_MODE_SYS) {
- goto undef;
- }
- break;
- case 14:
- if (curmode == ARM_CPU_MODE_HYP || curmode == ARM_CPU_MODE_SYS) {
- goto undef;
- }
- break;
- default:
- break;
- }
- }
-
- if (tgtmode == ARM_CPU_MODE_HYP) {
- /* SPSR_Hyp, r13_hyp: accessible from Monitor mode only */
- if (curmode != ARM_CPU_MODE_MON) {
- goto undef;
- }
- }
-
- return;
-
-undef:
- raise_exception(env, EXCP_UDEF, syn_uncategorized(),
- exception_target_el(env));
-}
-
-void HELPER(msr_banked)(CPUARMState *env, uint32_t value, uint32_t tgtmode,
- uint32_t regno)
-{
- msr_mrs_banked_exc_checks(env, tgtmode, regno);
-
- switch (regno) {
- case 16: /* SPSRs */
- env->banked_spsr[bank_number(tgtmode)] = value;
- break;
- case 17: /* ELR_Hyp */
- env->elr_el[2] = value;
- break;
- case 13:
- env->banked_r13[bank_number(tgtmode)] = value;
- break;
- case 14:
- env->banked_r14[bank_number(tgtmode)] = value;
- break;
- case 8 ... 12:
- switch (tgtmode) {
- case ARM_CPU_MODE_USR:
- env->usr_regs[regno - 8] = value;
- break;
- case ARM_CPU_MODE_FIQ:
- env->fiq_regs[regno - 8] = value;
- break;
- default:
- g_assert_not_reached();
- }
- break;
- default:
- g_assert_not_reached();
- }
-}
-
-uint32_t HELPER(mrs_banked)(CPUARMState *env, uint32_t tgtmode, uint32_t regno)
-{
- msr_mrs_banked_exc_checks(env, tgtmode, regno);
-
- switch (regno) {
- case 16: /* SPSRs */
- return env->banked_spsr[bank_number(tgtmode)];
- case 17: /* ELR_Hyp */
- return env->elr_el[2];
- case 13:
- return env->banked_r13[bank_number(tgtmode)];
- case 14:
- return env->banked_r14[bank_number(tgtmode)];
- case 8 ... 12:
- switch (tgtmode) {
- case ARM_CPU_MODE_USR:
- return env->usr_regs[regno - 8];
- case ARM_CPU_MODE_FIQ:
- return env->fiq_regs[regno - 8];
- default:
- g_assert_not_reached();
- }
- default:
- g_assert_not_reached();
- }
-}
-
-void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip, uint32_t syndrome,
- uint32_t isread)
-{
- const ARMCPRegInfo *ri = rip;
- int target_el;
-
- if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14
- && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) {
- raise_exception(env, EXCP_UDEF, syndrome, exception_target_el(env));
- }
-
- if (!ri->accessfn) {
- return;
- }
-
- switch (ri->accessfn(env, ri, isread)) {
- case CP_ACCESS_OK:
- return;
- case CP_ACCESS_TRAP:
- target_el = exception_target_el(env);
- break;
- case CP_ACCESS_TRAP_EL2:
- /* Requesting a trap to EL2 when we're in EL3 or S-EL0/1 is
- * a bug in the access function.
- */
- assert(!arm_is_secure(env) && arm_current_el(env) != 3);
- target_el = 2;
- break;
- case CP_ACCESS_TRAP_EL3:
- target_el = 3;
- break;
- case CP_ACCESS_TRAP_UNCATEGORIZED:
- target_el = exception_target_el(env);
- syndrome = syn_uncategorized();
- break;
- case CP_ACCESS_TRAP_UNCATEGORIZED_EL2:
- target_el = 2;
- syndrome = syn_uncategorized();
- break;
- case CP_ACCESS_TRAP_UNCATEGORIZED_EL3:
- target_el = 3;
- syndrome = syn_uncategorized();
- break;
- case CP_ACCESS_TRAP_FP_EL2:
- target_el = 2;
- /* Since we are an implementation that takes exceptions on a trapped
- * conditional insn only if the insn has passed its condition code
- * check, we take the IMPDEF choice to always report CV=1 COND=0xe
- * (which is also the required value for AArch64 traps).
- */
- syndrome = syn_fp_access_trap(1, 0xe, false);
- break;
- case CP_ACCESS_TRAP_FP_EL3:
- target_el = 3;
- syndrome = syn_fp_access_trap(1, 0xe, false);
- break;
- default:
- g_assert_not_reached();
- }
-
- raise_exception(env, EXCP_UDEF, syndrome, target_el);
-}
-
-void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value)
-{
- const ARMCPRegInfo *ri = rip;
-
- if (ri->type & ARM_CP_IO) {
- qemu_mutex_lock_iothread();
- ri->writefn(env, ri, value);
- qemu_mutex_unlock_iothread();
- } else {
- ri->writefn(env, ri, value);
- }
-}
-
-uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip)
-{
- const ARMCPRegInfo *ri = rip;
- uint32_t res;
-
- if (ri->type & ARM_CP_IO) {
- qemu_mutex_lock_iothread();
- res = ri->readfn(env, ri);
- qemu_mutex_unlock_iothread();
- } else {
- res = ri->readfn(env, ri);
- }
-
- return res;
-}
-
-void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value)
-{
- const ARMCPRegInfo *ri = rip;
-
- if (ri->type & ARM_CP_IO) {
- qemu_mutex_lock_iothread();
- ri->writefn(env, ri, value);
- qemu_mutex_unlock_iothread();
- } else {
- ri->writefn(env, ri, value);
- }
-}
-
-uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
-{
- const ARMCPRegInfo *ri = rip;
- uint64_t res;
-
- if (ri->type & ARM_CP_IO) {
- qemu_mutex_lock_iothread();
- res = ri->readfn(env, ri);
- qemu_mutex_unlock_iothread();
- } else {
- res = ri->readfn(env, ri);
- }
-
- return res;
-}
-
-void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
-{
- /* MSR_i to update PSTATE. This is OK from EL0 only if UMA is set.
- * Note that SPSel is never OK from EL0; we rely on handle_msr_i()
- * to catch that case at translate time.
- */
- if (arm_current_el(env) == 0 && !(env->cp15.sctlr_el[1] & SCTLR_UMA)) {
- uint32_t syndrome = syn_aa64_sysregtrap(0, extract32(op, 0, 3),
- extract32(op, 3, 3), 4,
- imm, 0x1f, 0);
- raise_exception(env, EXCP_UDEF, syndrome, exception_target_el(env));
- }
-
- switch (op) {
- case 0x05: /* SPSel */
- update_spsel(env, imm);
- break;
- case 0x1e: /* DAIFSet */
- env->daif |= (imm << 6) & PSTATE_DAIF;
- break;
- case 0x1f: /* DAIFClear */
- env->daif &= ~((imm << 6) & PSTATE_DAIF);
- break;
- default:
- g_assert_not_reached();
- }
-}
-
-void HELPER(clear_pstate_ss)(CPUARMState *env)
-{
- env->pstate &= ~PSTATE_SS;
-}
-
-void HELPER(pre_hvc)(CPUARMState *env)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
- int cur_el = arm_current_el(env);
- /* FIXME: Use actual secure state. */
- bool secure = false;
- bool undef;
-
- if (arm_is_psci_call(cpu, EXCP_HVC)) {
- /* If PSCI is enabled and this looks like a valid PSCI call then
- * that overrides the architecturally mandated HVC behaviour.
- */
- return;
- }
-
- if (!arm_feature(env, ARM_FEATURE_EL2)) {
- /* If EL2 doesn't exist, HVC always UNDEFs */
- undef = true;
- } else if (arm_feature(env, ARM_FEATURE_EL3)) {
- /* EL3.HCE has priority over EL2.HCD. */
- undef = !(env->cp15.scr_el3 & SCR_HCE);
- } else {
- undef = env->cp15.hcr_el2 & HCR_HCD;
- }
-
- /* In ARMv7 and ARMv8/AArch32, HVC is undef in secure state.
- * For ARMv8/AArch64, HVC is allowed in EL3.
- * Note that we've already trapped HVC from EL0 at translation
- * time.
- */
- if (secure && (!is_a64(env) || cur_el == 1)) {
- undef = true;
- }
-
- if (undef) {
- raise_exception(env, EXCP_UDEF, syn_uncategorized(),
- exception_target_el(env));
- }
-}
-
-void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
- int cur_el = arm_current_el(env);
- bool secure = arm_is_secure(env);
- bool smd = env->cp15.scr_el3 & SCR_SMD;
- /* On ARMv8 with EL3 AArch64, SMD applies to both S and NS state.
- * On ARMv8 with EL3 AArch32, or ARMv7 with the Virtualization
- * extensions, SMD only applies to NS state.
- * On ARMv7 without the Virtualization extensions, the SMD bit
- * doesn't exist, but we forbid the guest to set it to 1 in scr_write(),
- * so we need not special case this here.
- */
- bool undef = arm_feature(env, ARM_FEATURE_AARCH64) ? smd : smd && !secure;
-
- if (!arm_feature(env, ARM_FEATURE_EL3) &&
- cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) {
- /* If we have no EL3 then SMC always UNDEFs and can't be
- * trapped to EL2. PSCI-via-SMC is a sort of ersatz EL3
- * firmware within QEMU, and we want an EL2 guest to be able
- * to forbid its EL1 from making PSCI calls into QEMU's
- * "firmware" via HCR.TSC, so for these purposes treat
- * PSCI-via-SMC as implying an EL3.
- */
- undef = true;
- } else if (!secure && cur_el == 1 && (env->cp15.hcr_el2 & HCR_TSC)) {
- /* In NS EL1, HCR controlled routing to EL2 has priority over SMD.
- * We also want an EL2 guest to be able to forbid its EL1 from
- * making PSCI calls into QEMU's "firmware" via HCR.TSC.
- */
- raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
- }
-
- /* If PSCI is enabled and this looks like a valid PSCI call then
- * suppress the UNDEF -- we'll catch the SMC exception and
- * implement the PSCI call behaviour there.
- */
- if (undef && !arm_is_psci_call(cpu, EXCP_SMC)) {
- raise_exception(env, EXCP_UDEF, syn_uncategorized(),
- exception_target_el(env));
- }
-}
-
-static int el_from_spsr(uint32_t spsr)
-{
- /* Return the exception level that this SPSR is requesting a return to,
- * or -1 if it is invalid (an illegal return)
- */
- if (spsr & PSTATE_nRW) {
- switch (spsr & CPSR_M) {
- case ARM_CPU_MODE_USR:
- return 0;
- case ARM_CPU_MODE_HYP:
- return 2;
- case ARM_CPU_MODE_FIQ:
- case ARM_CPU_MODE_IRQ:
- case ARM_CPU_MODE_SVC:
- case ARM_CPU_MODE_ABT:
- case ARM_CPU_MODE_UND:
- case ARM_CPU_MODE_SYS:
- return 1;
- case ARM_CPU_MODE_MON:
- /* Returning to Mon from AArch64 is never possible,
- * so this is an illegal return.
- */
- default:
- return -1;
- }
- } else {
- if (extract32(spsr, 1, 1)) {
- /* Return with reserved M[1] bit set */
- return -1;
- }
- if (extract32(spsr, 0, 4) == 1) {
- /* return to EL0 with M[0] bit set */
- return -1;
- }
- return extract32(spsr, 2, 2);
- }
-}
-
-void HELPER(exception_return)(CPUARMState *env)
-{
- int cur_el = arm_current_el(env);
- unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
- uint32_t spsr = env->banked_spsr[spsr_idx];
- int new_el;
- bool return_to_aa64 = (spsr & PSTATE_nRW) == 0;
-
- aarch64_save_sp(env, cur_el);
-
- arm_clear_exclusive(env);
-
- /* We must squash the PSTATE.SS bit to zero unless both of the
- * following hold:
- * 1. debug exceptions are currently disabled
- * 2. singlestep will be active in the EL we return to
- * We check 1 here and 2 after we've done the pstate/cpsr write() to
- * transition to the EL we're going to.
- */
- if (arm_generate_debug_exceptions(env)) {
- spsr &= ~PSTATE_SS;
- }
-
- new_el = el_from_spsr(spsr);
- if (new_el == -1) {
- goto illegal_return;
- }
- if (new_el > cur_el
- || (new_el == 2 && !arm_feature(env, ARM_FEATURE_EL2))) {
- /* Disallow return to an EL which is unimplemented or higher
- * than the current one.
- */
- goto illegal_return;
- }
-
- if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) {
- /* Return to an EL which is configured for a different register width */
- goto illegal_return;
- }
-
- if (new_el == 2 && arm_is_secure_below_el3(env)) {
- /* Return to the non-existent secure-EL2 */
- goto illegal_return;
- }
-
- if (new_el == 1 && (env->cp15.hcr_el2 & HCR_TGE)
- && !arm_is_secure_below_el3(env)) {
- goto illegal_return;
- }
-
- qemu_mutex_lock_iothread();
- arm_call_pre_el_change_hook(arm_env_get_cpu(env));
- qemu_mutex_unlock_iothread();
-
- if (!return_to_aa64) {
- env->aarch64 = 0;
- /* We do a raw CPSR write because aarch64_sync_64_to_32()
- * will sort the register banks out for us, and we've already
- * caught all the bad-mode cases in el_from_spsr().
- */
- cpsr_write(env, spsr, ~0, CPSRWriteRaw);
- if (!arm_singlestep_active(env)) {
- env->uncached_cpsr &= ~PSTATE_SS;
- }
- aarch64_sync_64_to_32(env);
-
- if (spsr & CPSR_T) {
- env->regs[15] = env->elr_el[cur_el] & ~0x1;
- } else {
- env->regs[15] = env->elr_el[cur_el] & ~0x3;
- }
- qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
- "AArch32 EL%d PC 0x%" PRIx32 "\n",
- cur_el, new_el, env->regs[15]);
- } else {
- env->aarch64 = 1;
- pstate_write(env, spsr);
- if (!arm_singlestep_active(env)) {
- env->pstate &= ~PSTATE_SS;
- }
- aarch64_restore_sp(env, new_el);
- env->pc = env->elr_el[cur_el];
- qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
- "AArch64 EL%d PC 0x%" PRIx64 "\n",
- cur_el, new_el, env->pc);
- }
- /*
- * Note that cur_el can never be 0. If new_el is 0, then
- * el0_a64 is return_to_aa64, else el0_a64 is ignored.
- */
- aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
-
- qemu_mutex_lock_iothread();
- arm_call_el_change_hook(arm_env_get_cpu(env));
- qemu_mutex_unlock_iothread();
-
- return;
-
-illegal_return:
- /* Illegal return events of various kinds have architecturally
- * mandated behaviour:
- * restore NZCV and DAIF from SPSR_ELx
- * set PSTATE.IL
- * restore PC from ELR_ELx
- * no change to exception level, execution state or stack pointer
- */
- env->pstate |= PSTATE_IL;
- env->pc = env->elr_el[cur_el];
- spsr &= PSTATE_NZCV | PSTATE_DAIF;
- spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF);
- pstate_write(env, spsr);
- if (!arm_singlestep_active(env)) {
- env->pstate &= ~PSTATE_SS;
- }
- qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
- "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
-}
-
-/* Return true if the linked breakpoint entry lbn passes its checks */
-static bool linked_bp_matches(ARMCPU *cpu, int lbn)
-{
- CPUARMState *env = &cpu->env;
- uint64_t bcr = env->cp15.dbgbcr[lbn];
- int brps = extract32(cpu->dbgdidr, 24, 4);
- int ctx_cmps = extract32(cpu->dbgdidr, 20, 4);
- int bt;
- uint32_t contextidr;
-
- /* Links to unimplemented or non-context aware breakpoints are
- * CONSTRAINED UNPREDICTABLE: either behave as if disabled, or
- * as if linked to an UNKNOWN context-aware breakpoint (in which
- * case DBGWCR<n>_EL1.LBN must indicate that breakpoint).
- * We choose the former.
- */
- if (lbn > brps || lbn < (brps - ctx_cmps)) {
- return false;
- }
-
- bcr = env->cp15.dbgbcr[lbn];
-
- if (extract64(bcr, 0, 1) == 0) {
- /* Linked breakpoint disabled : generate no events */
- return false;
- }
-
- bt = extract64(bcr, 20, 4);
-
- /* We match the whole register even if this is AArch32 using the
- * short descriptor format (in which case it holds both PROCID and ASID),
- * since we don't implement the optional v7 context ID masking.
- */
- contextidr = extract64(env->cp15.contextidr_el[1], 0, 32);
-
- switch (bt) {
- case 3: /* linked context ID match */
- if (arm_current_el(env) > 1) {
- /* Context matches never fire in EL2 or (AArch64) EL3 */
- return false;
- }
- return (contextidr == extract64(env->cp15.dbgbvr[lbn], 0, 32));
- case 5: /* linked address mismatch (reserved in AArch64) */
- case 9: /* linked VMID match (reserved if no EL2) */
- case 11: /* linked context ID and VMID match (reserved if no EL2) */
- default:
- /* Links to Unlinked context breakpoints must generate no
- * events; we choose to do the same for reserved values too.
- */
- return false;
- }
-
- return false;
-}
-
-static bool bp_wp_matches(ARMCPU *cpu, int n, bool is_wp)
-{
- CPUARMState *env = &cpu->env;
- uint64_t cr;
- int pac, hmc, ssc, wt, lbn;
- /* Note that for watchpoints the check is against the CPU security
- * state, not the S/NS attribute on the offending data access.
- */
- bool is_secure = arm_is_secure(env);
- int access_el = arm_current_el(env);
-
- if (is_wp) {
- CPUWatchpoint *wp = env->cpu_watchpoint[n];
-
- if (!wp || !(wp->flags & BP_WATCHPOINT_HIT)) {
- return false;
- }
- cr = env->cp15.dbgwcr[n];
- if (wp->hitattrs.user) {
- /* The LDRT/STRT/LDT/STT "unprivileged access" instructions should
- * match watchpoints as if they were accesses done at EL0, even if
- * the CPU is at EL1 or higher.
- */
- access_el = 0;
- }
- } else {
- uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
-
- if (!env->cpu_breakpoint[n] || env->cpu_breakpoint[n]->pc != pc) {
- return false;
- }
- cr = env->cp15.dbgbcr[n];
- }
- /* The WATCHPOINT_HIT flag guarantees us that the watchpoint is
- * enabled and that the address and access type match; for breakpoints
- * we know the address matched; check the remaining fields, including
- * linked breakpoints. We rely on WCR and BCR having the same layout
- * for the LBN, SSC, HMC, PAC/PMC and is-linked fields.
- * Note that some combinations of {PAC, HMC, SSC} are reserved and
- * must act either like some valid combination or as if the watchpoint
- * were disabled. We choose the former, and use this together with
- * the fact that EL3 must always be Secure and EL2 must always be
- * Non-Secure to simplify the code slightly compared to the full
- * table in the ARM ARM.
- */
- pac = extract64(cr, 1, 2);
- hmc = extract64(cr, 13, 1);
- ssc = extract64(cr, 14, 2);
-
- switch (ssc) {
- case 0:
- break;
- case 1:
- case 3:
- if (is_secure) {
- return false;
- }
- break;
- case 2:
- if (!is_secure) {
- return false;
- }
- break;
- }
-
- switch (access_el) {
- case 3:
- case 2:
- if (!hmc) {
- return false;
- }
- break;
- case 1:
- if (extract32(pac, 0, 1) == 0) {
- return false;
- }
- break;
- case 0:
- if (extract32(pac, 1, 1) == 0) {
- return false;
- }
- break;
- default:
- g_assert_not_reached();
- }
-
- wt = extract64(cr, 20, 1);
- lbn = extract64(cr, 16, 4);
-
- if (wt && !linked_bp_matches(cpu, lbn)) {
- return false;
- }
-
- return true;
-}
-
-static bool check_watchpoints(ARMCPU *cpu)
-{
- CPUARMState *env = &cpu->env;
- int n;
-
- /* If watchpoints are disabled globally or we can't take debug
- * exceptions here then watchpoint firings are ignored.
- */
- if (extract32(env->cp15.mdscr_el1, 15, 1) == 0
- || !arm_generate_debug_exceptions(env)) {
- return false;
- }
-
- for (n = 0; n < ARRAY_SIZE(env->cpu_watchpoint); n++) {
- if (bp_wp_matches(cpu, n, true)) {
- return true;
- }
- }
- return false;
-}
-
-static bool check_breakpoints(ARMCPU *cpu)
-{
- CPUARMState *env = &cpu->env;
- int n;
-
- /* If breakpoints are disabled globally or we can't take debug
- * exceptions here then breakpoint firings are ignored.
- */
- if (extract32(env->cp15.mdscr_el1, 15, 1) == 0
- || !arm_generate_debug_exceptions(env)) {
- return false;
- }
-
- for (n = 0; n < ARRAY_SIZE(env->cpu_breakpoint); n++) {
- if (bp_wp_matches(cpu, n, false)) {
- return true;
- }
- }
- return false;
-}
-
-void HELPER(check_breakpoints)(CPUARMState *env)
-{
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- if (check_breakpoints(cpu)) {
- HELPER(exception_internal(env, EXCP_DEBUG));
- }
-}
-
-bool arm_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
-{
- /* Called by core code when a CPU watchpoint fires; need to check if this
- * is also an architectural watchpoint match.
- */
- ARMCPU *cpu = ARM_CPU(cs);
-
- return check_watchpoints(cpu);
-}
-
-vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
-
- /* In BE32 system mode, target memory is stored byteswapped (on a
- * little-endian host system), and by the time we reach here (via an
- * opcode helper) the addresses of subword accesses have been adjusted
- * to account for that, which means that watchpoints will not match.
- * Undo the adjustment here.
- */
- if (arm_sctlr_b(env)) {
- if (len == 1) {
- addr ^= 3;
- } else if (len == 2) {
- addr ^= 2;
- }
- }
-
- return addr;
-}
-
-void arm_debug_excp_handler(CPUState *cs)
-{
- /* Called by core code when a watchpoint or breakpoint fires;
- * need to check which one and raise the appropriate exception.
- */
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- CPUWatchpoint *wp_hit = cs->watchpoint_hit;
-
- if (wp_hit) {
- if (wp_hit->flags & BP_CPU) {
- bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0;
- bool same_el = arm_debug_target_el(env) == arm_current_el(env);
-
- cs->watchpoint_hit = NULL;
-
- env->exception.fsr = arm_debug_exception_fsr(env);
- env->exception.vaddress = wp_hit->hitaddr;
- raise_exception(env, EXCP_DATA_ABORT,
- syn_watchpoint(same_el, 0, wnr),
- arm_debug_target_el(env));
- }
- } else {
- uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
- bool same_el = (arm_debug_target_el(env) == arm_current_el(env));
-
- /* (1) GDB breakpoints should be handled first.
- * (2) Do not raise a CPU exception if no CPU breakpoint has fired,
- * since singlestep is also done by generating a debug internal
- * exception.
- */
- if (cpu_breakpoint_test(cs, pc, BP_GDB)
- || !cpu_breakpoint_test(cs, pc, BP_CPU)) {
- return;
- }
-
- env->exception.fsr = arm_debug_exception_fsr(env);
- /* FAR is UNKNOWN: clear vaddress to avoid potentially exposing
- * values to the guest that it shouldn't be able to see at its
- * exception/security level.
- */
- env->exception.vaddress = 0;
- raise_exception(env, EXCP_PREFETCH_ABORT,
- syn_breakpoint(same_el),
- arm_debug_target_el(env));
- }
-}
-
-/* ??? Flag setting arithmetic is awkward because we need to do comparisons.
- The only way to do that in TCG is a conditional branch, which clobbers
- all our temporaries. For now implement these as helper functions. */
-
-/* Similarly for variable shift instructions. */
-
-uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
-{
- int shift = i & 0xff;
- if (shift >= 32) {
- if (shift == 32)
- env->CF = x & 1;
- else
- env->CF = 0;
- return 0;
- } else if (shift != 0) {
- env->CF = (x >> (32 - shift)) & 1;
- return x << shift;
- }
- return x;
-}
-
-uint32_t HELPER(shr_cc)(CPUARMState *env, uint32_t x, uint32_t i)
-{
- int shift = i & 0xff;
- if (shift >= 32) {
- if (shift == 32)
- env->CF = (x >> 31) & 1;
- else
- env->CF = 0;
- return 0;
- } else if (shift != 0) {
- env->CF = (x >> (shift - 1)) & 1;
- return x >> shift;
- }
- return x;
-}
-
-uint32_t HELPER(sar_cc)(CPUARMState *env, uint32_t x, uint32_t i)
-{
- int shift = i & 0xff;
- if (shift >= 32) {
- env->CF = (x >> 31) & 1;
- return (int32_t)x >> 31;
- } else if (shift != 0) {
- env->CF = (x >> (shift - 1)) & 1;
- return (int32_t)x >> shift;
- }
- return x;
-}
-
-uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
-{
- int shift1, shift;
- shift1 = i & 0xff;
- shift = shift1 & 0x1f;
- if (shift == 0) {
- if (shift1 != 0)
- env->CF = (x >> 31) & 1;
- return x;
- } else {
- env->CF = (x >> (shift - 1)) & 1;
- return ((uint32_t)x >> shift) | (x << (32 - shift));
- }
-}
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
new file mode 100644
index 0000000000..31ae43f60e
--- /dev/null
+++ b/target/arm/ptw.c
@@ -0,0 +1,3650 @@
+/*
+ * ARM page table walking.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/range.h"
+#include "qemu/main-loop.h"
+#include "exec/exec-all.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "idau.h"
+#ifdef CONFIG_TCG
+# include "tcg/oversized-guest.h"
+#endif
+
+typedef struct S1Translate {
+ /*
+ * in_mmu_idx : specifies which TTBR, TCR, etc to use for the walk.
+ * Together with in_space, specifies the architectural translation regime.
+ */
+ ARMMMUIdx in_mmu_idx;
+ /*
+ * in_ptw_idx: specifies which mmuidx to use for the actual
+ * page table descriptor load operations. This will be one of the
+ * ARMMMUIdx_Stage2* or one of the ARMMMUIdx_Phys_* indexes.
+ * If a Secure ptw is "downgraded" to NonSecure by an NSTable bit,
+ * this field is updated accordingly.
+ */
+ ARMMMUIdx in_ptw_idx;
+ /*
+ * in_space: the security space for this walk. This plus
+ * the in_mmu_idx specify the architectural translation regime.
+ * If a Secure ptw is "downgraded" to NonSecure by an NSTable bit,
+ * this field is updated accordingly.
+ *
+ * Note that the security space for the in_ptw_idx may be different
+ * from that for the in_mmu_idx. We do not need to explicitly track
+ * the in_ptw_idx security space because:
+ * - if the in_ptw_idx is an ARMMMUIdx_Phys_* then the mmuidx
+ * itself specifies the security space
+ * - if the in_ptw_idx is an ARMMMUIdx_Stage2* then the security
+ * space used for ptw reads is the same as that of the security
+ * space of the stage 1 translation for all cases except where
+ * stage 1 is Secure; in that case the only possibilities for
+ * the ptw read are Secure and NonSecure, and the in_ptw_idx
+ * value being Stage2 vs Stage2_S distinguishes those.
+ */
+ ARMSecuritySpace in_space;
+ /*
+ * in_debug: is this a QEMU debug access (gdbstub, etc)? Debug
+ * accesses will not update the guest page table access flags
+ * and will not change the state of the softmmu TLBs.
+ */
+ bool in_debug;
+ /*
+ * If this is stage 2 of a stage 1+2 page table walk, then this must
+ * be true if stage 1 is an EL0 access; otherwise this is ignored.
+ * Stage 2 is indicated by in_mmu_idx set to ARMMMUIdx_Stage2{,_S}.
+ */
+ bool in_s1_is_el0;
+ bool out_rw;
+ bool out_be;
+ ARMSecuritySpace out_space;
+ hwaddr out_virt;
+ hwaddr out_phys;
+ void *out_host;
+} S1Translate;
+
+static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw,
+ target_ulong address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi);
+
+static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw,
+ target_ulong address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi);
+
+/* This mapping is common between ID_AA64MMFR0.PARANGE and TCR_ELx.{I}PS. */
+static const uint8_t pamax_map[] = {
+ [0] = 32,
+ [1] = 36,
+ [2] = 40,
+ [3] = 42,
+ [4] = 44,
+ [5] = 48,
+ [6] = 52,
+};
+
+/*
+ * The cpu-specific constant value of PAMax; also used by hw/arm/virt.
+ * Note that machvirt_init calls this on a CPU that is inited but not realized!
+ */
+unsigned int arm_pamax(ARMCPU *cpu)
+{
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+ unsigned int parange =
+ FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
+
+ /*
+ * id_aa64mmfr0 is a read-only register so values outside of the
+ * supported mappings can be considered an implementation error.
+ */
+ assert(parange < ARRAY_SIZE(pamax_map));
+ return pamax_map[parange];
+ }
+
+ if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) {
+ /* v7 or v8 with LPAE */
+ return 40;
+ }
+ /* Anything else */
+ return 32;
+}
+
+/*
+ * Convert a possible stage1+2 MMU index into the appropriate stage 1 MMU index
+ */
+ARMMMUIdx stage_1_mmu_idx(ARMMMUIdx mmu_idx)
+{
+ switch (mmu_idx) {
+ case ARMMMUIdx_E10_0:
+ return ARMMMUIdx_Stage1_E0;
+ case ARMMMUIdx_E10_1:
+ return ARMMMUIdx_Stage1_E1;
+ case ARMMMUIdx_E10_1_PAN:
+ return ARMMMUIdx_Stage1_E1_PAN;
+ default:
+ return mmu_idx;
+ }
+}
+
+ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
+{
+ return stage_1_mmu_idx(arm_mmu_idx(env));
+}
+
+/*
+ * Return where we should do ptw loads from for a stage 2 walk.
+ * This depends on whether the address we are looking up is a
+ * Secure IPA or a NonSecure IPA, which we know from whether this is
+ * Stage2 or Stage2_S.
+ * If this is the Secure EL1&0 regime we need to check the NSW and SW bits.
+ */
+static ARMMMUIdx ptw_idx_for_stage_2(CPUARMState *env, ARMMMUIdx stage2idx)
+{
+ bool s2walk_secure;
+
+ /*
+ * We're OK to check the current state of the CPU here because
+ * (1) we always invalidate all TLBs when the SCR_EL3.NS or SCR_EL3.NSE bit
+ * changes.
+ * (2) there's no way to do a lookup that cares about Stage 2 for a
+ * different security state to the current one for AArch64, and AArch32
+ * never has a secure EL2. (AArch32 ATS12NSO[UP][RW] allow EL3 to do
+ * an NS stage 1+2 lookup while the NS bit is 0.)
+ */
+ if (!arm_el_is_aa64(env, 3)) {
+ return ARMMMUIdx_Phys_NS;
+ }
+
+ switch (arm_security_space_below_el3(env)) {
+ case ARMSS_NonSecure:
+ return ARMMMUIdx_Phys_NS;
+ case ARMSS_Realm:
+ return ARMMMUIdx_Phys_Realm;
+ case ARMSS_Secure:
+ if (stage2idx == ARMMMUIdx_Stage2_S) {
+ s2walk_secure = !(env->cp15.vstcr_el2 & VSTCR_SW);
+ } else {
+ s2walk_secure = !(env->cp15.vtcr_el2 & VTCR_NSW);
+ }
+ return s2walk_secure ? ARMMMUIdx_Phys_S : ARMMMUIdx_Phys_NS;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool regime_translation_big_endian(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ return (regime_sctlr(env, mmu_idx) & SCTLR_EE) != 0;
+}
+
+/* Return the TTBR associated with this translation regime */
+static uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, int ttbrn)
+{
+ if (mmu_idx == ARMMMUIdx_Stage2) {
+ return env->cp15.vttbr_el2;
+ }
+ if (mmu_idx == ARMMMUIdx_Stage2_S) {
+ return env->cp15.vsttbr_el2;
+ }
+ if (ttbrn == 0) {
+ return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
+ } else {
+ return env->cp15.ttbr1_el[regime_el(env, mmu_idx)];
+ }
+}
+
+/* Return true if the specified stage of address translation is disabled */
+static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx,
+ ARMSecuritySpace space)
+{
+ uint64_t hcr_el2;
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ bool is_secure = arm_space_is_secure(space);
+ switch (env->v7m.mpu_ctrl[is_secure] &
+ (R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) {
+ case R_V7M_MPU_CTRL_ENABLE_MASK:
+ /* Enabled, but not for HardFault and NMI */
+ return mmu_idx & ARM_MMU_IDX_M_NEGPRI;
+ case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK:
+ /* Enabled for all cases */
+ return false;
+ case 0:
+ default:
+ /*
+ * HFNMIENA set and ENABLE clear is UNPREDICTABLE, but
+ * we warned about that in armv7m_nvic.c when the guest set it.
+ */
+ return true;
+ }
+ }
+
+
+ switch (mmu_idx) {
+ case ARMMMUIdx_Stage2:
+ case ARMMMUIdx_Stage2_S:
+ /* HCR.DC means HCR.VM behaves as 1 */
+ hcr_el2 = arm_hcr_el2_eff_secstate(env, space);
+ return (hcr_el2 & (HCR_DC | HCR_VM)) == 0;
+
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ /* TGE means that EL0/1 act as if SCTLR_EL1.M is zero */
+ hcr_el2 = arm_hcr_el2_eff_secstate(env, space);
+ if (hcr_el2 & HCR_TGE) {
+ return true;
+ }
+ break;
+
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ /* HCR.DC means SCTLR_EL1.M behaves as 0 */
+ hcr_el2 = arm_hcr_el2_eff_secstate(env, space);
+ if (hcr_el2 & HCR_DC) {
+ return true;
+ }
+ break;
+
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ case ARMMMUIdx_E2:
+ case ARMMMUIdx_E3:
+ break;
+
+ case ARMMMUIdx_Phys_S:
+ case ARMMMUIdx_Phys_NS:
+ case ARMMMUIdx_Phys_Root:
+ case ARMMMUIdx_Phys_Realm:
+ /* No translation for physical address spaces. */
+ return true;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0;
+}
+
+static bool granule_protection_check(CPUARMState *env, uint64_t paddress,
+ ARMSecuritySpace pspace,
+ ARMMMUFaultInfo *fi)
+{
+ MemTxAttrs attrs = {
+ .secure = true,
+ .space = ARMSS_Root,
+ };
+ ARMCPU *cpu = env_archcpu(env);
+ uint64_t gpccr = env->cp15.gpccr_el3;
+ unsigned pps, pgs, l0gptsz, level = 0;
+ uint64_t tableaddr, pps_mask, align, entry, index;
+ AddressSpace *as;
+ MemTxResult result;
+ int gpi;
+
+ if (!FIELD_EX64(gpccr, GPCCR, GPC)) {
+ return true;
+ }
+
+ /*
+ * GPC Priority 1 (R_GMGRR):
+ * R_JWCSM: If the configuration of GPCCR_EL3 is invalid,
+ * the access fails as GPT walk fault at level 0.
+ */
+
+ /*
+ * Configuration of PPS to a value exceeding the implemented
+ * physical address size is invalid.
+ */
+ pps = FIELD_EX64(gpccr, GPCCR, PPS);
+ if (pps > FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE)) {
+ goto fault_walk;
+ }
+ pps = pamax_map[pps];
+ pps_mask = MAKE_64BIT_MASK(0, pps);
+
+ switch (FIELD_EX64(gpccr, GPCCR, SH)) {
+ case 0b10: /* outer shareable */
+ break;
+ case 0b00: /* non-shareable */
+ case 0b11: /* inner shareable */
+ /* Inner and Outer non-cacheable requires Outer shareable. */
+ if (FIELD_EX64(gpccr, GPCCR, ORGN) == 0 &&
+ FIELD_EX64(gpccr, GPCCR, IRGN) == 0) {
+ goto fault_walk;
+ }
+ break;
+ default: /* reserved */
+ goto fault_walk;
+ }
+
+ switch (FIELD_EX64(gpccr, GPCCR, PGS)) {
+ case 0b00: /* 4KB */
+ pgs = 12;
+ break;
+ case 0b01: /* 64KB */
+ pgs = 16;
+ break;
+ case 0b10: /* 16KB */
+ pgs = 14;
+ break;
+ default: /* reserved */
+ goto fault_walk;
+ }
+
+ /* Note this field is read-only and fixed at reset. */
+ l0gptsz = 30 + FIELD_EX64(gpccr, GPCCR, L0GPTSZ);
+
+ /*
+ * GPC Priority 2: Secure, Realm or Root address exceeds PPS.
+ * R_CPDSB: A NonSecure physical address input exceeding PPS
+ * does not experience any fault.
+ */
+ if (paddress & ~pps_mask) {
+ if (pspace == ARMSS_NonSecure) {
+ return true;
+ }
+ goto fault_size;
+ }
+
+ /* GPC Priority 3: the base address of GPTBR_EL3 exceeds PPS. */
+ tableaddr = env->cp15.gptbr_el3 << 12;
+ if (tableaddr & ~pps_mask) {
+ goto fault_size;
+ }
+
+ /*
+ * BADDR is aligned per a function of PPS and L0GPTSZ.
+ * These bits of GPTBR_EL3 are RES0, but are not a configuration error,
+ * unlike the RES0 bits of the GPT entries (R_XNKFZ).
+ */
+ align = MAX(pps - l0gptsz + 3, 12);
+ align = MAKE_64BIT_MASK(0, align);
+ tableaddr &= ~align;
+
+ as = arm_addressspace(env_cpu(env), attrs);
+
+ /* Level 0 lookup. */
+ index = extract64(paddress, l0gptsz, pps - l0gptsz);
+ tableaddr += index * 8;
+ entry = address_space_ldq_le(as, tableaddr, attrs, &result);
+ if (result != MEMTX_OK) {
+ goto fault_eabt;
+ }
+
+ switch (extract32(entry, 0, 4)) {
+ case 1: /* block descriptor */
+ if (entry >> 8) {
+ goto fault_walk; /* RES0 bits not 0 */
+ }
+ gpi = extract32(entry, 4, 4);
+ goto found;
+ case 3: /* table descriptor */
+ tableaddr = entry & ~0xf;
+ align = MAX(l0gptsz - pgs - 1, 12);
+ align = MAKE_64BIT_MASK(0, align);
+ if (tableaddr & (~pps_mask | align)) {
+ goto fault_walk; /* RES0 bits not 0 */
+ }
+ break;
+ default: /* invalid */
+ goto fault_walk;
+ }
+
+ /* Level 1 lookup */
+ level = 1;
+ index = extract64(paddress, pgs + 4, l0gptsz - pgs - 4);
+ tableaddr += index * 8;
+ entry = address_space_ldq_le(as, tableaddr, attrs, &result);
+ if (result != MEMTX_OK) {
+ goto fault_eabt;
+ }
+
+ switch (extract32(entry, 0, 4)) {
+ case 1: /* contiguous descriptor */
+ if (entry >> 10) {
+ goto fault_walk; /* RES0 bits not 0 */
+ }
+ /*
+ * Because the softmmu tlb only works on units of TARGET_PAGE_SIZE,
+ * and because we cannot invalidate by pa, and thus will always
+ * flush entire tlbs, we don't actually care about the range here
+ * and can simply extract the GPI as the result.
+ */
+ if (extract32(entry, 8, 2) == 0) {
+ goto fault_walk; /* reserved contig */
+ }
+ gpi = extract32(entry, 4, 4);
+ break;
+ default:
+ index = extract64(paddress, pgs, 4);
+ gpi = extract64(entry, index * 4, 4);
+ break;
+ }
+
+ found:
+ switch (gpi) {
+ case 0b0000: /* no access */
+ break;
+ case 0b1111: /* all access */
+ return true;
+ case 0b1000:
+ case 0b1001:
+ case 0b1010:
+ case 0b1011:
+ if (pspace == (gpi & 3)) {
+ return true;
+ }
+ break;
+ default:
+ goto fault_walk; /* reserved */
+ }
+
+ fi->gpcf = GPCF_Fail;
+ goto fault_common;
+ fault_eabt:
+ fi->gpcf = GPCF_EABT;
+ goto fault_common;
+ fault_size:
+ fi->gpcf = GPCF_AddressSize;
+ goto fault_common;
+ fault_walk:
+ fi->gpcf = GPCF_Walk;
+ fault_common:
+ fi->level = level;
+ fi->paddr = paddress;
+ fi->paddr_space = pspace;
+ return false;
+}
+
+static bool S1_attrs_are_device(uint8_t attrs)
+{
+ /*
+ * This slightly under-decodes the MAIR_ELx field:
+ * 0b0000dd01 is Device with FEAT_XS, otherwise UNPREDICTABLE;
+ * 0b0000dd1x is UNPREDICTABLE.
+ */
+ return (attrs & 0xf0) == 0;
+}
+
+static bool S2_attrs_are_device(uint64_t hcr, uint8_t attrs)
+{
+ /*
+ * For an S1 page table walk, the stage 1 attributes are always
+ * some form of "this is Normal memory". The combined S1+S2
+ * attributes are therefore only Device if stage 2 specifies Device.
+ * With HCR_EL2.FWB == 0 this is when descriptor bits [5:4] are 0b00,
+ * ie when cacheattrs.attrs bits [3:2] are 0b00.
+ * With HCR_EL2.FWB == 1 this is when descriptor bit [4] is 0, ie
+ * when cacheattrs.attrs bit [2] is 0.
+ */
+ if (hcr & HCR_FWB) {
+ return (attrs & 0x4) == 0;
+ } else {
+ return (attrs & 0xc) == 0;
+ }
+}
+
+static ARMSecuritySpace S2_security_space(ARMSecuritySpace s1_space,
+ ARMMMUIdx s2_mmu_idx)
+{
+ /*
+ * Return the security space to use for stage 2 when doing
+ * the S1 page table descriptor load.
+ */
+ if (regime_is_stage2(s2_mmu_idx)) {
+ /*
+ * The security space for ptw reads is almost always the same
+ * as that of the security space of the stage 1 translation.
+ * The only exception is when stage 1 is Secure; in that case
+ * the ptw read might be to the Secure or the NonSecure space
+ * (but never Realm or Root), and the s2_mmu_idx tells us which.
+ * Root translations are always single-stage.
+ */
+ if (s1_space == ARMSS_Secure) {
+ return arm_secure_to_space(s2_mmu_idx == ARMMMUIdx_Stage2_S);
+ } else {
+ assert(s2_mmu_idx != ARMMMUIdx_Stage2_S);
+ assert(s1_space != ARMSS_Root);
+ return s1_space;
+ }
+ } else {
+ /* ptw loads are from phys: the mmu idx itself says which space */
+ return arm_phys_to_space(s2_mmu_idx);
+ }
+}
+
+static bool fault_s1ns(ARMSecuritySpace space, ARMMMUIdx s2_mmu_idx)
+{
+ /*
+ * For stage 2 faults in Secure EL22, S1NS indicates
+ * whether the faulting IPA is in the Secure or NonSecure
+ * IPA space. For all other kinds of fault, it is false.
+ */
+ return space == ARMSS_Secure && regime_is_stage2(s2_mmu_idx)
+ && s2_mmu_idx == ARMMMUIdx_Stage2_S;
+}
+
+/* Translate a S1 pagetable walk through S2 if needed. */
+static bool S1_ptw_translate(CPUARMState *env, S1Translate *ptw,
+ hwaddr addr, ARMMMUFaultInfo *fi)
+{
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ ARMMMUIdx s2_mmu_idx = ptw->in_ptw_idx;
+ uint8_t pte_attrs;
+
+ ptw->out_virt = addr;
+
+ if (unlikely(ptw->in_debug)) {
+ /*
+ * From gdbstub, do not use softmmu so that we don't modify the
+ * state of the cpu at all, including softmmu tlb contents.
+ */
+ ARMSecuritySpace s2_space = S2_security_space(ptw->in_space, s2_mmu_idx);
+ S1Translate s2ptw = {
+ .in_mmu_idx = s2_mmu_idx,
+ .in_ptw_idx = ptw_idx_for_stage_2(env, s2_mmu_idx),
+ .in_space = s2_space,
+ .in_debug = true,
+ };
+ GetPhysAddrResult s2 = { };
+
+ if (get_phys_addr_gpc(env, &s2ptw, addr, MMU_DATA_LOAD, &s2, fi)) {
+ goto fail;
+ }
+
+ ptw->out_phys = s2.f.phys_addr;
+ pte_attrs = s2.cacheattrs.attrs;
+ ptw->out_host = NULL;
+ ptw->out_rw = false;
+ ptw->out_space = s2.f.attrs.space;
+ } else {
+#ifdef CONFIG_TCG
+ CPUTLBEntryFull *full;
+ int flags;
+
+ env->tlb_fi = fi;
+ flags = probe_access_full_mmu(env, addr, 0, MMU_DATA_LOAD,
+ arm_to_core_mmu_idx(s2_mmu_idx),
+ &ptw->out_host, &full);
+ env->tlb_fi = NULL;
+
+ if (unlikely(flags & TLB_INVALID_MASK)) {
+ goto fail;
+ }
+ ptw->out_phys = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
+ ptw->out_rw = full->prot & PAGE_WRITE;
+ pte_attrs = full->extra.arm.pte_attrs;
+ ptw->out_space = full->attrs.space;
+#else
+ g_assert_not_reached();
+#endif
+ }
+
+ if (regime_is_stage2(s2_mmu_idx)) {
+ uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space);
+
+ if ((hcr & HCR_PTW) && S2_attrs_are_device(hcr, pte_attrs)) {
+ /*
+ * PTW set and S1 walk touched S2 Device memory:
+ * generate Permission fault.
+ */
+ fi->type = ARMFault_Permission;
+ fi->s2addr = addr;
+ fi->stage2 = true;
+ fi->s1ptw = true;
+ fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx);
+ return false;
+ }
+ }
+
+ ptw->out_be = regime_translation_big_endian(env, mmu_idx);
+ return true;
+
+ fail:
+ assert(fi->type != ARMFault_None);
+ if (fi->type == ARMFault_GPCFOnOutput) {
+ fi->type = ARMFault_GPCFOnWalk;
+ }
+ fi->s2addr = addr;
+ fi->stage2 = regime_is_stage2(s2_mmu_idx);
+ fi->s1ptw = fi->stage2;
+ fi->s1ns = fault_s1ns(ptw->in_space, s2_mmu_idx);
+ return false;
+}
+
+/* All loads done in the course of a page table walk go through here. */
+static uint32_t arm_ldl_ptw(CPUARMState *env, S1Translate *ptw,
+ ARMMMUFaultInfo *fi)
+{
+ CPUState *cs = env_cpu(env);
+ void *host = ptw->out_host;
+ uint32_t data;
+
+ if (likely(host)) {
+ /* Page tables are in RAM, and we have the host address. */
+ data = qatomic_read((uint32_t *)host);
+ if (ptw->out_be) {
+ data = be32_to_cpu(data);
+ } else {
+ data = le32_to_cpu(data);
+ }
+ } else {
+ /* Page tables are in MMIO. */
+ MemTxAttrs attrs = {
+ .space = ptw->out_space,
+ .secure = arm_space_is_secure(ptw->out_space),
+ };
+ AddressSpace *as = arm_addressspace(cs, attrs);
+ MemTxResult result = MEMTX_OK;
+
+ if (ptw->out_be) {
+ data = address_space_ldl_be(as, ptw->out_phys, attrs, &result);
+ } else {
+ data = address_space_ldl_le(as, ptw->out_phys, attrs, &result);
+ }
+ if (unlikely(result != MEMTX_OK)) {
+ fi->type = ARMFault_SyncExternalOnWalk;
+ fi->ea = arm_extabort_type(result);
+ return 0;
+ }
+ }
+ return data;
+}
+
+static uint64_t arm_ldq_ptw(CPUARMState *env, S1Translate *ptw,
+ ARMMMUFaultInfo *fi)
+{
+ CPUState *cs = env_cpu(env);
+ void *host = ptw->out_host;
+ uint64_t data;
+
+ if (likely(host)) {
+ /* Page tables are in RAM, and we have the host address. */
+#ifdef CONFIG_ATOMIC64
+ data = qatomic_read__nocheck((uint64_t *)host);
+ if (ptw->out_be) {
+ data = be64_to_cpu(data);
+ } else {
+ data = le64_to_cpu(data);
+ }
+#else
+ if (ptw->out_be) {
+ data = ldq_be_p(host);
+ } else {
+ data = ldq_le_p(host);
+ }
+#endif
+ } else {
+ /* Page tables are in MMIO. */
+ MemTxAttrs attrs = {
+ .space = ptw->out_space,
+ .secure = arm_space_is_secure(ptw->out_space),
+ };
+ AddressSpace *as = arm_addressspace(cs, attrs);
+ MemTxResult result = MEMTX_OK;
+
+ if (ptw->out_be) {
+ data = address_space_ldq_be(as, ptw->out_phys, attrs, &result);
+ } else {
+ data = address_space_ldq_le(as, ptw->out_phys, attrs, &result);
+ }
+ if (unlikely(result != MEMTX_OK)) {
+ fi->type = ARMFault_SyncExternalOnWalk;
+ fi->ea = arm_extabort_type(result);
+ return 0;
+ }
+ }
+ return data;
+}
+
+static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,
+ uint64_t new_val, S1Translate *ptw,
+ ARMMMUFaultInfo *fi)
+{
+#if defined(TARGET_AARCH64) && defined(CONFIG_TCG)
+ uint64_t cur_val;
+ void *host = ptw->out_host;
+
+ if (unlikely(!host)) {
+ /* Page table in MMIO Memory Region */
+ CPUState *cs = env_cpu(env);
+ MemTxAttrs attrs = {
+ .space = ptw->out_space,
+ .secure = arm_space_is_secure(ptw->out_space),
+ };
+ AddressSpace *as = arm_addressspace(cs, attrs);
+ MemTxResult result = MEMTX_OK;
+ bool need_lock = !bql_locked();
+
+ if (need_lock) {
+ bql_lock();
+ }
+ if (ptw->out_be) {
+ cur_val = address_space_ldq_be(as, ptw->out_phys, attrs, &result);
+ if (unlikely(result != MEMTX_OK)) {
+ fi->type = ARMFault_SyncExternalOnWalk;
+ fi->ea = arm_extabort_type(result);
+ if (need_lock) {
+ bql_unlock();
+ }
+ return old_val;
+ }
+ if (cur_val == old_val) {
+ address_space_stq_be(as, ptw->out_phys, new_val, attrs, &result);
+ if (unlikely(result != MEMTX_OK)) {
+ fi->type = ARMFault_SyncExternalOnWalk;
+ fi->ea = arm_extabort_type(result);
+ if (need_lock) {
+ bql_unlock();
+ }
+ return old_val;
+ }
+ cur_val = new_val;
+ }
+ } else {
+ cur_val = address_space_ldq_le(as, ptw->out_phys, attrs, &result);
+ if (unlikely(result != MEMTX_OK)) {
+ fi->type = ARMFault_SyncExternalOnWalk;
+ fi->ea = arm_extabort_type(result);
+ if (need_lock) {
+ bql_unlock();
+ }
+ return old_val;
+ }
+ if (cur_val == old_val) {
+ address_space_stq_le(as, ptw->out_phys, new_val, attrs, &result);
+ if (unlikely(result != MEMTX_OK)) {
+ fi->type = ARMFault_SyncExternalOnWalk;
+ fi->ea = arm_extabort_type(result);
+ if (need_lock) {
+ bql_unlock();
+ }
+ return old_val;
+ }
+ cur_val = new_val;
+ }
+ }
+ if (need_lock) {
+ bql_unlock();
+ }
+ return cur_val;
+ }
+
+ /*
+ * Raising a stage2 Protection fault for an atomic update to a read-only
+ * page is delayed until it is certain that there is a change to make.
+ */
+ if (unlikely(!ptw->out_rw)) {
+ int flags;
+
+ env->tlb_fi = fi;
+ flags = probe_access_full_mmu(env, ptw->out_virt, 0,
+ MMU_DATA_STORE,
+ arm_to_core_mmu_idx(ptw->in_ptw_idx),
+ NULL, NULL);
+ env->tlb_fi = NULL;
+
+ if (unlikely(flags & TLB_INVALID_MASK)) {
+ /*
+ * We know this must be a stage 2 fault because the granule
+ * protection table does not separately track read and write
+ * permission, so all GPC faults are caught in S1_ptw_translate():
+ * we only get here for "readable but not writeable".
+ */
+ assert(fi->type != ARMFault_None);
+ fi->s2addr = ptw->out_virt;
+ fi->stage2 = true;
+ fi->s1ptw = true;
+ fi->s1ns = fault_s1ns(ptw->in_space, ptw->in_ptw_idx);
+ return 0;
+ }
+
+ /* In case CAS mismatches and we loop, remember writability. */
+ ptw->out_rw = true;
+ }
+
+#ifdef CONFIG_ATOMIC64
+ if (ptw->out_be) {
+ old_val = cpu_to_be64(old_val);
+ new_val = cpu_to_be64(new_val);
+ cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val);
+ cur_val = be64_to_cpu(cur_val);
+ } else {
+ old_val = cpu_to_le64(old_val);
+ new_val = cpu_to_le64(new_val);
+ cur_val = qatomic_cmpxchg__nocheck((uint64_t *)host, old_val, new_val);
+ cur_val = le64_to_cpu(cur_val);
+ }
+#else
+ /*
+ * We can't support the full 64-bit atomic cmpxchg on the host.
+ * Because this is only used for FEAT_HAFDBS, which is only for AA64,
+ * we know that TCG_OVERSIZED_GUEST is set, which means that we are
+ * running in round-robin mode and could only race with dma i/o.
+ */
+#if !TCG_OVERSIZED_GUEST
+# error "Unexpected configuration"
+#endif
+ bool locked = bql_locked();
+ if (!locked) {
+ bql_lock();
+ }
+ if (ptw->out_be) {
+ cur_val = ldq_be_p(host);
+ if (cur_val == old_val) {
+ stq_be_p(host, new_val);
+ }
+ } else {
+ cur_val = ldq_le_p(host);
+ if (cur_val == old_val) {
+ stq_le_p(host, new_val);
+ }
+ }
+ if (!locked) {
+ bql_unlock();
+ }
+#endif
+
+ return cur_val;
+#else
+ /* AArch32 does not have FEAT_HADFS; non-TCG guests only use debug-mode. */
+ g_assert_not_reached();
+#endif
+}
+
+static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
+ uint32_t *table, uint32_t address)
+{
+ /* Note that we can only get here for an AArch32 PL0/PL1 lookup */
+ uint64_t tcr = regime_tcr(env, mmu_idx);
+ int maskshift = extract32(tcr, 0, 3);
+ uint32_t mask = ~(((uint32_t)0xffffffffu) >> maskshift);
+ uint32_t base_mask;
+
+ if (address & mask) {
+ if (tcr & TTBCR_PD1) {
+ /* Translation table walk disabled for TTBR1 */
+ return false;
+ }
+ *table = regime_ttbr(env, mmu_idx, 1) & 0xffffc000;
+ } else {
+ if (tcr & TTBCR_PD0) {
+ /* Translation table walk disabled for TTBR0 */
+ return false;
+ }
+ base_mask = ~((uint32_t)0x3fffu >> maskshift);
+ *table = regime_ttbr(env, mmu_idx, 0) & base_mask;
+ }
+ *table |= (address >> 18) & 0x3ffc;
+ return true;
+}
+
+/*
+ * Translate section/page access permissions to page R/W protection flags
+ * @env: CPUARMState
+ * @mmu_idx: MMU index indicating required translation regime
+ * @ap: The 3-bit access permissions (AP[2:0])
+ * @domain_prot: The 2-bit domain access permissions
+ * @is_user: TRUE if accessing from PL0
+ */
+static int ap_to_rw_prot_is_user(CPUARMState *env, ARMMMUIdx mmu_idx,
+ int ap, int domain_prot, bool is_user)
+{
+ if (domain_prot == 3) {
+ return PAGE_READ | PAGE_WRITE;
+ }
+
+ switch (ap) {
+ case 0:
+ if (arm_feature(env, ARM_FEATURE_V7)) {
+ return 0;
+ }
+ switch (regime_sctlr(env, mmu_idx) & (SCTLR_S | SCTLR_R)) {
+ case SCTLR_S:
+ return is_user ? 0 : PAGE_READ;
+ case SCTLR_R:
+ return PAGE_READ;
+ default:
+ return 0;
+ }
+ case 1:
+ return is_user ? 0 : PAGE_READ | PAGE_WRITE;
+ case 2:
+ if (is_user) {
+ return PAGE_READ;
+ } else {
+ return PAGE_READ | PAGE_WRITE;
+ }
+ case 3:
+ return PAGE_READ | PAGE_WRITE;
+ case 4: /* Reserved. */
+ return 0;
+ case 5:
+ return is_user ? 0 : PAGE_READ;
+ case 6:
+ return PAGE_READ;
+ case 7:
+ if (!arm_feature(env, ARM_FEATURE_V6K)) {
+ return 0;
+ }
+ return PAGE_READ;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * Translate section/page access permissions to page R/W protection flags
+ * @env: CPUARMState
+ * @mmu_idx: MMU index indicating required translation regime
+ * @ap: The 3-bit access permissions (AP[2:0])
+ * @domain_prot: The 2-bit domain access permissions
+ */
+static int ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx,
+ int ap, int domain_prot)
+{
+ return ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot,
+ regime_is_user(env, mmu_idx));
+}
+
+/*
+ * Translate section/page access permissions to page R/W protection flags.
+ * @ap: The 2-bit simple AP (AP[2:1])
+ * @is_user: TRUE if accessing from PL0
+ */
+static int simple_ap_to_rw_prot_is_user(int ap, bool is_user)
+{
+ switch (ap) {
+ case 0:
+ return is_user ? 0 : PAGE_READ | PAGE_WRITE;
+ case 1:
+ return PAGE_READ | PAGE_WRITE;
+ case 2:
+ return is_user ? 0 : PAGE_READ;
+ case 3:
+ return PAGE_READ;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static int simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
+{
+ return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
+}
+
+static bool get_phys_addr_v5(CPUARMState *env, S1Translate *ptw,
+ uint32_t address, MMUAccessType access_type,
+ GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
+{
+ int level = 1;
+ uint32_t table;
+ uint32_t desc;
+ int type;
+ int ap;
+ int domain = 0;
+ int domain_prot;
+ hwaddr phys_addr;
+ uint32_t dacr;
+
+ /* Pagetable walk. */
+ /* Lookup l1 descriptor. */
+ if (!get_level1_table_address(env, ptw->in_mmu_idx, &table, address)) {
+ /* Section translation fault if page walk is disabled by PD0 or PD1 */
+ fi->type = ARMFault_Translation;
+ goto do_fault;
+ }
+ if (!S1_ptw_translate(env, ptw, table, fi)) {
+ goto do_fault;
+ }
+ desc = arm_ldl_ptw(env, ptw, fi);
+ if (fi->type != ARMFault_None) {
+ goto do_fault;
+ }
+ type = (desc & 3);
+ domain = (desc >> 5) & 0x0f;
+ if (regime_el(env, ptw->in_mmu_idx) == 1) {
+ dacr = env->cp15.dacr_ns;
+ } else {
+ dacr = env->cp15.dacr_s;
+ }
+ domain_prot = (dacr >> (domain * 2)) & 3;
+ if (type == 0) {
+ /* Section translation fault. */
+ fi->type = ARMFault_Translation;
+ goto do_fault;
+ }
+ if (type != 2) {
+ level = 2;
+ }
+ if (domain_prot == 0 || domain_prot == 2) {
+ fi->type = ARMFault_Domain;
+ goto do_fault;
+ }
+ if (type == 2) {
+ /* 1Mb section. */
+ phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
+ ap = (desc >> 10) & 3;
+ result->f.lg_page_size = 20; /* 1MB */
+ } else {
+ /* Lookup l2 entry. */
+ if (type == 1) {
+ /* Coarse pagetable. */
+ table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
+ } else {
+ /* Fine pagetable. */
+ table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
+ }
+ if (!S1_ptw_translate(env, ptw, table, fi)) {
+ goto do_fault;
+ }
+ desc = arm_ldl_ptw(env, ptw, fi);
+ if (fi->type != ARMFault_None) {
+ goto do_fault;
+ }
+ switch (desc & 3) {
+ case 0: /* Page translation fault. */
+ fi->type = ARMFault_Translation;
+ goto do_fault;
+ case 1: /* 64k page. */
+ phys_addr = (desc & 0xffff0000) | (address & 0xffff);
+ ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
+ result->f.lg_page_size = 16;
+ break;
+ case 2: /* 4k page. */
+ phys_addr = (desc & 0xfffff000) | (address & 0xfff);
+ ap = (desc >> (4 + ((address >> 9) & 6))) & 3;
+ result->f.lg_page_size = 12;
+ break;
+ case 3: /* 1k page, or ARMv6/XScale "extended small (4k) page" */
+ if (type == 1) {
+ /* ARMv6/XScale extended small page format */
+ if (arm_feature(env, ARM_FEATURE_XSCALE)
+ || arm_feature(env, ARM_FEATURE_V6)) {
+ phys_addr = (desc & 0xfffff000) | (address & 0xfff);
+ result->f.lg_page_size = 12;
+ } else {
+ /*
+ * UNPREDICTABLE in ARMv5; we choose to take a
+ * page translation fault.
+ */
+ fi->type = ARMFault_Translation;
+ goto do_fault;
+ }
+ } else {
+ phys_addr = (desc & 0xfffffc00) | (address & 0x3ff);
+ result->f.lg_page_size = 10;
+ }
+ ap = (desc >> 4) & 3;
+ break;
+ default:
+ /* Never happens, but compiler isn't smart enough to tell. */
+ g_assert_not_reached();
+ }
+ }
+ result->f.prot = ap_to_rw_prot(env, ptw->in_mmu_idx, ap, domain_prot);
+ result->f.prot |= result->f.prot ? PAGE_EXEC : 0;
+ if (!(result->f.prot & (1 << access_type))) {
+ /* Access permission fault. */
+ fi->type = ARMFault_Permission;
+ goto do_fault;
+ }
+ result->f.phys_addr = phys_addr;
+ return false;
+do_fault:
+ fi->domain = domain;
+ fi->level = level;
+ return true;
+}
+
+static bool get_phys_addr_v6(CPUARMState *env, S1Translate *ptw,
+ uint32_t address, MMUAccessType access_type,
+ GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ int level = 1;
+ uint32_t table;
+ uint32_t desc;
+ uint32_t xn;
+ uint32_t pxn = 0;
+ int type;
+ int ap;
+ int domain = 0;
+ int domain_prot;
+ hwaddr phys_addr;
+ uint32_t dacr;
+ bool ns;
+ int user_prot;
+
+ /* Pagetable walk. */
+ /* Lookup l1 descriptor. */
+ if (!get_level1_table_address(env, mmu_idx, &table, address)) {
+ /* Section translation fault if page walk is disabled by PD0 or PD1 */
+ fi->type = ARMFault_Translation;
+ goto do_fault;
+ }
+ if (!S1_ptw_translate(env, ptw, table, fi)) {
+ goto do_fault;
+ }
+ desc = arm_ldl_ptw(env, ptw, fi);
+ if (fi->type != ARMFault_None) {
+ goto do_fault;
+ }
+ type = (desc & 3);
+ if (type == 0 || (type == 3 && !cpu_isar_feature(aa32_pxn, cpu))) {
+ /* Section translation fault, or attempt to use the encoding
+ * which is Reserved on implementations without PXN.
+ */
+ fi->type = ARMFault_Translation;
+ goto do_fault;
+ }
+ if ((type == 1) || !(desc & (1 << 18))) {
+ /* Page or Section. */
+ domain = (desc >> 5) & 0x0f;
+ }
+ if (regime_el(env, mmu_idx) == 1) {
+ dacr = env->cp15.dacr_ns;
+ } else {
+ dacr = env->cp15.dacr_s;
+ }
+ if (type == 1) {
+ level = 2;
+ }
+ domain_prot = (dacr >> (domain * 2)) & 3;
+ if (domain_prot == 0 || domain_prot == 2) {
+ /* Section or Page domain fault */
+ fi->type = ARMFault_Domain;
+ goto do_fault;
+ }
+ if (type != 1) {
+ if (desc & (1 << 18)) {
+ /* Supersection. */
+ phys_addr = (desc & 0xff000000) | (address & 0x00ffffff);
+ phys_addr |= (uint64_t)extract32(desc, 20, 4) << 32;
+ phys_addr |= (uint64_t)extract32(desc, 5, 4) << 36;
+ result->f.lg_page_size = 24; /* 16MB */
+ } else {
+ /* Section. */
+ phys_addr = (desc & 0xfff00000) | (address & 0x000fffff);
+ result->f.lg_page_size = 20; /* 1MB */
+ }
+ ap = ((desc >> 10) & 3) | ((desc >> 13) & 4);
+ xn = desc & (1 << 4);
+ pxn = desc & 1;
+ ns = extract32(desc, 19, 1);
+ } else {
+ if (cpu_isar_feature(aa32_pxn, cpu)) {
+ pxn = (desc >> 2) & 1;
+ }
+ ns = extract32(desc, 3, 1);
+ /* Lookup l2 entry. */
+ table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
+ if (!S1_ptw_translate(env, ptw, table, fi)) {
+ goto do_fault;
+ }
+ desc = arm_ldl_ptw(env, ptw, fi);
+ if (fi->type != ARMFault_None) {
+ goto do_fault;
+ }
+ ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
+ switch (desc & 3) {
+ case 0: /* Page translation fault. */
+ fi->type = ARMFault_Translation;
+ goto do_fault;
+ case 1: /* 64k page. */
+ phys_addr = (desc & 0xffff0000) | (address & 0xffff);
+ xn = desc & (1 << 15);
+ result->f.lg_page_size = 16;
+ break;
+ case 2: case 3: /* 4k page. */
+ phys_addr = (desc & 0xfffff000) | (address & 0xfff);
+ xn = desc & 1;
+ result->f.lg_page_size = 12;
+ break;
+ default:
+ /* Never happens, but compiler isn't smart enough to tell. */
+ g_assert_not_reached();
+ }
+ }
+ if (domain_prot == 3) {
+ result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ } else {
+ if (pxn && !regime_is_user(env, mmu_idx)) {
+ xn = 1;
+ }
+ if (xn && access_type == MMU_INST_FETCH) {
+ fi->type = ARMFault_Permission;
+ goto do_fault;
+ }
+
+ if (arm_feature(env, ARM_FEATURE_V6K) &&
+ (regime_sctlr(env, mmu_idx) & SCTLR_AFE)) {
+ /* The simplified model uses AP[0] as an access control bit. */
+ if ((ap & 1) == 0) {
+ /* Access flag fault. */
+ fi->type = ARMFault_AccessFlag;
+ goto do_fault;
+ }
+ result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap >> 1);
+ user_prot = simple_ap_to_rw_prot_is_user(ap >> 1, 1);
+ } else {
+ result->f.prot = ap_to_rw_prot(env, mmu_idx, ap, domain_prot);
+ user_prot = ap_to_rw_prot_is_user(env, mmu_idx, ap, domain_prot, 1);
+ }
+ if (result->f.prot && !xn) {
+ result->f.prot |= PAGE_EXEC;
+ }
+ if (!(result->f.prot & (1 << access_type))) {
+ /* Access permission fault. */
+ fi->type = ARMFault_Permission;
+ goto do_fault;
+ }
+ if (regime_is_pan(env, mmu_idx) &&
+ !regime_is_user(env, mmu_idx) &&
+ user_prot &&
+ access_type != MMU_INST_FETCH) {
+ /* Privileged Access Never fault */
+ fi->type = ARMFault_Permission;
+ goto do_fault;
+ }
+ }
+ if (ns) {
+ /* The NS bit will (as required by the architecture) have no effect if
+ * the CPU doesn't support TZ or this is a non-secure translation
+ * regime, because the attribute will already be non-secure.
+ */
+ result->f.attrs.secure = false;
+ result->f.attrs.space = ARMSS_NonSecure;
+ }
+ result->f.phys_addr = phys_addr;
+ return false;
+do_fault:
+ fi->domain = domain;
+ fi->level = level;
+ return true;
+}
+
+/*
+ * Translate S2 section/page access permissions to protection flags
+ * @env: CPUARMState
+ * @s2ap: The 2-bit stage2 access permissions (S2AP)
+ * @xn: XN (execute-never) bits
+ * @s1_is_el0: true if this is S2 of an S1+2 walk for EL0
+ */
+static int get_S2prot_noexecute(int s2ap)
+{
+ int prot = 0;
+
+ if (s2ap & 1) {
+ prot |= PAGE_READ;
+ }
+ if (s2ap & 2) {
+ prot |= PAGE_WRITE;
+ }
+ return prot;
+}
+
+static int get_S2prot(CPUARMState *env, int s2ap, int xn, bool s1_is_el0)
+{
+ int prot = get_S2prot_noexecute(s2ap);
+
+ if (cpu_isar_feature(any_tts2uxn, env_archcpu(env))) {
+ switch (xn) {
+ case 0:
+ prot |= PAGE_EXEC;
+ break;
+ case 1:
+ if (s1_is_el0) {
+ prot |= PAGE_EXEC;
+ }
+ break;
+ case 2:
+ break;
+ case 3:
+ if (!s1_is_el0) {
+ prot |= PAGE_EXEC;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ if (!extract32(xn, 1, 1)) {
+ if (arm_el_is_aa64(env, 2) || prot & PAGE_READ) {
+ prot |= PAGE_EXEC;
+ }
+ }
+ }
+ return prot;
+}
+
+/*
+ * Translate section/page access permissions to protection flags
+ * @env: CPUARMState
+ * @mmu_idx: MMU index indicating required translation regime
+ * @is_aa64: TRUE if AArch64
+ * @ap: The 2-bit simple AP (AP[2:1])
+ * @xn: XN (execute-never) bit
+ * @pxn: PXN (privileged execute-never) bit
+ * @in_pa: The original input pa space
+ * @out_pa: The output pa space, modified by NSTable, NS, and NSE
+ */
+static int get_S1prot(CPUARMState *env, ARMMMUIdx mmu_idx, bool is_aa64,
+ int ap, int xn, int pxn,
+ ARMSecuritySpace in_pa, ARMSecuritySpace out_pa)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ bool is_user = regime_is_user(env, mmu_idx);
+ int prot_rw, user_rw;
+ bool have_wxn;
+ int wxn = 0;
+
+ assert(!regime_is_stage2(mmu_idx));
+
+ user_rw = simple_ap_to_rw_prot_is_user(ap, true);
+ if (is_user) {
+ prot_rw = user_rw;
+ } else {
+ /*
+ * PAN controls can forbid data accesses but don't affect insn fetch.
+ * Plain PAN forbids data accesses if EL0 has data permissions;
+ * PAN3 forbids data accesses if EL0 has either data or exec perms.
+ * Note that for AArch64 the 'user can exec' case is exactly !xn.
+ * We make the IMPDEF choices that SCR_EL3.SIF and Realm EL2&0
+ * do not affect EPAN.
+ */
+ if (user_rw && regime_is_pan(env, mmu_idx)) {
+ prot_rw = 0;
+ } else if (cpu_isar_feature(aa64_pan3, cpu) && is_aa64 &&
+ regime_is_pan(env, mmu_idx) &&
+ (regime_sctlr(env, mmu_idx) & SCTLR_EPAN) && !xn) {
+ prot_rw = 0;
+ } else {
+ prot_rw = simple_ap_to_rw_prot_is_user(ap, false);
+ }
+ }
+
+ if (in_pa != out_pa) {
+ switch (in_pa) {
+ case ARMSS_Root:
+ /*
+ * R_ZWRVD: permission fault for insn fetched from non-Root,
+ * I_WWBFB: SIF has no effect in EL3.
+ */
+ return prot_rw;
+ case ARMSS_Realm:
+ /*
+ * R_PKTDS: permission fault for insn fetched from non-Realm,
+ * for Realm EL2 or EL2&0. The corresponding fault for EL1&0
+ * happens during any stage2 translation.
+ */
+ switch (mmu_idx) {
+ case ARMMMUIdx_E2:
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ return prot_rw;
+ default:
+ break;
+ }
+ break;
+ case ARMSS_Secure:
+ if (env->cp15.scr_el3 & SCR_SIF) {
+ return prot_rw;
+ }
+ break;
+ default:
+ /* Input NonSecure must have output NonSecure. */
+ g_assert_not_reached();
+ }
+ }
+
+ /* TODO have_wxn should be replaced with
+ * ARM_FEATURE_V8 || (ARM_FEATURE_V7 && ARM_FEATURE_EL2)
+ * when ARM_FEATURE_EL2 starts getting set. For now we assume all LPAE
+ * compatible processors have EL2, which is required for [U]WXN.
+ */
+ have_wxn = arm_feature(env, ARM_FEATURE_LPAE);
+
+ if (have_wxn) {
+ wxn = regime_sctlr(env, mmu_idx) & SCTLR_WXN;
+ }
+
+ if (is_aa64) {
+ if (regime_has_2_ranges(mmu_idx) && !is_user) {
+ xn = pxn || (user_rw & PAGE_WRITE);
+ }
+ } else if (arm_feature(env, ARM_FEATURE_V7)) {
+ switch (regime_el(env, mmu_idx)) {
+ case 1:
+ case 3:
+ if (is_user) {
+ xn = xn || !(user_rw & PAGE_READ);
+ } else {
+ int uwxn = 0;
+ if (have_wxn) {
+ uwxn = regime_sctlr(env, mmu_idx) & SCTLR_UWXN;
+ }
+ xn = xn || !(prot_rw & PAGE_READ) || pxn ||
+ (uwxn && (user_rw & PAGE_WRITE));
+ }
+ break;
+ case 2:
+ break;
+ }
+ } else {
+ xn = wxn = 0;
+ }
+
+ if (xn || (wxn && (prot_rw & PAGE_WRITE))) {
+ return prot_rw;
+ }
+ return prot_rw | PAGE_EXEC;
+}
+
+static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
+ ARMMMUIdx mmu_idx)
+{
+ uint64_t tcr = regime_tcr(env, mmu_idx);
+ uint32_t el = regime_el(env, mmu_idx);
+ int select, tsz;
+ bool epd, hpd;
+
+ assert(mmu_idx != ARMMMUIdx_Stage2_S);
+
+ if (mmu_idx == ARMMMUIdx_Stage2) {
+ /* VTCR */
+ bool sext = extract32(tcr, 4, 1);
+ bool sign = extract32(tcr, 3, 1);
+
+ /*
+ * If the sign-extend bit is not the same as t0sz[3], the result
+ * is unpredictable. Flag this as a guest error.
+ */
+ if (sign != sext) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n");
+ }
+ tsz = sextract32(tcr, 0, 4) + 8;
+ select = 0;
+ hpd = false;
+ epd = false;
+ } else if (el == 2) {
+ /* HTCR */
+ tsz = extract32(tcr, 0, 3);
+ select = 0;
+ hpd = extract64(tcr, 24, 1);
+ epd = false;
+ } else {
+ int t0sz = extract32(tcr, 0, 3);
+ int t1sz = extract32(tcr, 16, 3);
+
+ if (t1sz == 0) {
+ select = va > (0xffffffffu >> t0sz);
+ } else {
+ /* Note that we will detect errors later. */
+ select = va >= ~(0xffffffffu >> t1sz);
+ }
+ if (!select) {
+ tsz = t0sz;
+ epd = extract32(tcr, 7, 1);
+ hpd = extract64(tcr, 41, 1);
+ } else {
+ tsz = t1sz;
+ epd = extract32(tcr, 23, 1);
+ hpd = extract64(tcr, 42, 1);
+ }
+ /* For aarch32, hpd0 is not enabled without t2e as well. */
+ hpd &= extract32(tcr, 6, 1);
+ }
+
+ return (ARMVAParameters) {
+ .tsz = tsz,
+ .select = select,
+ .epd = epd,
+ .hpd = hpd,
+ };
+}
+
+/*
+ * check_s2_mmu_setup
+ * @cpu: ARMCPU
+ * @is_aa64: True if the translation regime is in AArch64 state
+ * @tcr: VTCR_EL2 or VSTCR_EL2
+ * @ds: Effective value of TCR.DS.
+ * @iasize: Bitsize of IPAs
+ * @stride: Page-table stride (See the ARM ARM)
+ *
+ * Decode the starting level of the S2 lookup, returning INT_MIN if
+ * the configuration is invalid.
+ */
+static int check_s2_mmu_setup(ARMCPU *cpu, bool is_aa64, uint64_t tcr,
+ bool ds, int iasize, int stride)
+{
+ int sl0, sl2, startlevel, granulebits, levels;
+ int s1_min_iasize, s1_max_iasize;
+
+ sl0 = extract32(tcr, 6, 2);
+ if (is_aa64) {
+ /*
+ * AArch64.S2InvalidSL: Interpretation of SL depends on the page size,
+ * so interleave AArch64.S2StartLevel.
+ */
+ switch (stride) {
+ case 9: /* 4KB */
+ /* SL2 is RES0 unless DS=1 & 4KB granule. */
+ sl2 = extract64(tcr, 33, 1);
+ if (ds && sl2) {
+ if (sl0 != 0) {
+ goto fail;
+ }
+ startlevel = -1;
+ } else {
+ startlevel = 2 - sl0;
+ switch (sl0) {
+ case 2:
+ if (arm_pamax(cpu) < 44) {
+ goto fail;
+ }
+ break;
+ case 3:
+ if (!cpu_isar_feature(aa64_st, cpu)) {
+ goto fail;
+ }
+ startlevel = 3;
+ break;
+ }
+ }
+ break;
+ case 11: /* 16KB */
+ switch (sl0) {
+ case 2:
+ if (arm_pamax(cpu) < 42) {
+ goto fail;
+ }
+ break;
+ case 3:
+ if (!ds) {
+ goto fail;
+ }
+ break;
+ }
+ startlevel = 3 - sl0;
+ break;
+ case 13: /* 64KB */
+ switch (sl0) {
+ case 2:
+ if (arm_pamax(cpu) < 44) {
+ goto fail;
+ }
+ break;
+ case 3:
+ goto fail;
+ }
+ startlevel = 3 - sl0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ /*
+ * Things are simpler for AArch32 EL2, with only 4k pages.
+ * There is no separate S2InvalidSL function, but AArch32.S2Walk
+ * begins with walkparms.sl0 in {'1x'}.
+ */
+ assert(stride == 9);
+ if (sl0 >= 2) {
+ goto fail;
+ }
+ startlevel = 2 - sl0;
+ }
+
+ /* AArch{64,32}.S2InconsistentSL are functionally equivalent. */
+ levels = 3 - startlevel;
+ granulebits = stride + 3;
+
+ s1_min_iasize = levels * stride + granulebits + 1;
+ s1_max_iasize = s1_min_iasize + (stride - 1) + 4;
+
+ if (iasize >= s1_min_iasize && iasize <= s1_max_iasize) {
+ return startlevel;
+ }
+
+ fail:
+ return INT_MIN;
+}
+
+static bool lpae_block_desc_valid(ARMCPU *cpu, bool ds,
+ ARMGranuleSize gran, int level)
+{
+ /*
+ * See pseudocode AArch46.BlockDescSupported(): block descriptors
+ * are not valid at all levels, depending on the page size.
+ */
+ switch (gran) {
+ case Gran4K:
+ return (level == 0 && ds) || level == 1 || level == 2;
+ case Gran16K:
+ return (level == 1 && ds) || level == 2;
+ case Gran64K:
+ return (level == 1 && arm_pamax(cpu) == 52) || level == 2;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool nv_nv1_enabled(CPUARMState *env, S1Translate *ptw)
+{
+ uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space);
+ return (hcr & (HCR_NV | HCR_NV1)) == (HCR_NV | HCR_NV1);
+}
+
+/**
+ * get_phys_addr_lpae: perform one stage of page table walk, LPAE format
+ *
+ * Returns false if the translation was successful. Otherwise, phys_ptr,
+ * attrs, prot and page_size may not be filled in, and the populated fsr
+ * value provides information on why the translation aborted, in the format
+ * of a long-format DFSR/IFSR fault register, with the following caveat:
+ * the WnR bit is never set (the caller must do this).
+ *
+ * @env: CPUARMState
+ * @ptw: Current and next stage parameters for the walk.
+ * @address: virtual address to get physical address for
+ * @access_type: MMU_DATA_LOAD, MMU_DATA_STORE or MMU_INST_FETCH
+ * @result: set on translation success,
+ * @fi: set to fault info if the translation fails
+ */
+static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
+ uint64_t address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ int32_t level;
+ ARMVAParameters param;
+ uint64_t ttbr;
+ hwaddr descaddr, indexmask, indexmask_grainsize;
+ uint32_t tableattrs;
+ target_ulong page_size;
+ uint64_t attrs;
+ int32_t stride;
+ int addrsize, inputsize, outputsize;
+ uint64_t tcr = regime_tcr(env, mmu_idx);
+ int ap, xn, pxn;
+ uint32_t el = regime_el(env, mmu_idx);
+ uint64_t descaddrmask;
+ bool aarch64 = arm_el_is_aa64(env, el);
+ uint64_t descriptor, new_descriptor;
+ ARMSecuritySpace out_space;
+ bool device;
+
+ /* TODO: This code does not support shareability levels. */
+ if (aarch64) {
+ int ps;
+
+ param = aa64_va_parameters(env, address, mmu_idx,
+ access_type != MMU_INST_FETCH,
+ !arm_el_is_aa64(env, 1));
+ level = 0;
+
+ /*
+ * If TxSZ is programmed to a value larger than the maximum,
+ * or smaller than the effective minimum, it is IMPLEMENTATION
+ * DEFINED whether we behave as if the field were programmed
+ * within bounds, or if a level 0 Translation fault is generated.
+ *
+ * With FEAT_LVA, fault on less than minimum becomes required,
+ * so our choice is to always raise the fault.
+ */
+ if (param.tsz_oob) {
+ goto do_translation_fault;
+ }
+
+ addrsize = 64 - 8 * param.tbi;
+ inputsize = 64 - param.tsz;
+
+ /*
+ * Bound PS by PARANGE to find the effective output address size.
+ * ID_AA64MMFR0 is a read-only register so values outside of the
+ * supported mappings can be considered an implementation error.
+ */
+ ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
+ ps = MIN(ps, param.ps);
+ assert(ps < ARRAY_SIZE(pamax_map));
+ outputsize = pamax_map[ps];
+
+ /*
+ * With LPA2, the effective output address (OA) size is at most 48 bits
+ * unless TCR.DS == 1
+ */
+ if (!param.ds && param.gran != Gran64K) {
+ outputsize = MIN(outputsize, 48);
+ }
+ } else {
+ param = aa32_va_parameters(env, address, mmu_idx);
+ level = 1;
+ addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32);
+ inputsize = addrsize - param.tsz;
+ outputsize = 40;
+ }
+
+ /*
+ * We determined the region when collecting the parameters, but we
+ * have not yet validated that the address is valid for the region.
+ * Extract the top bits and verify that they all match select.
+ *
+ * For aa32, if inputsize == addrsize, then we have selected the
+ * region by exclusion in aa32_va_parameters and there is no more
+ * validation to do here.
+ */
+ if (inputsize < addrsize) {
+ target_ulong top_bits = sextract64(address, inputsize,
+ addrsize - inputsize);
+ if (-top_bits != param.select) {
+ /* The gap between the two regions is a Translation fault */
+ goto do_translation_fault;
+ }
+ }
+
+ stride = arm_granule_bits(param.gran) - 3;
+
+ /*
+ * Note that QEMU ignores shareability and cacheability attributes,
+ * so we don't need to do anything with the SH, ORGN, IRGN fields
+ * in the TTBCR. Similarly, TTBCR:A1 selects whether we get the
+ * ASID from TTBR0 or TTBR1, but QEMU's TLB doesn't currently
+ * implement any ASID-like capability so we can ignore it (instead
+ * we will always flush the TLB any time the ASID is changed).
+ */
+ ttbr = regime_ttbr(env, mmu_idx, param.select);
+
+ /*
+ * Here we should have set up all the parameters for the translation:
+ * inputsize, ttbr, epd, stride, tbi
+ */
+
+ if (param.epd) {
+ /*
+ * Translation table walk disabled => Translation fault on TLB miss
+ * Note: This is always 0 on 64-bit EL2 and EL3.
+ */
+ goto do_translation_fault;
+ }
+
+ if (!regime_is_stage2(mmu_idx)) {
+ /*
+ * The starting level depends on the virtual address size (which can
+ * be up to 48 bits) and the translation granule size. It indicates
+ * the number of strides (stride bits at a time) needed to
+ * consume the bits of the input address. In the pseudocode this is:
+ * level = 4 - RoundUp((inputsize - grainsize) / stride)
+ * where their 'inputsize' is our 'inputsize', 'grainsize' is
+ * our 'stride + 3' and 'stride' is our 'stride'.
+ * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
+ * = 4 - (inputsize - stride - 3 + stride - 1) / stride
+ * = 4 - (inputsize - 4) / stride;
+ */
+ level = 4 - (inputsize - 4) / stride;
+ } else {
+ int startlevel = check_s2_mmu_setup(cpu, aarch64, tcr, param.ds,
+ inputsize, stride);
+ if (startlevel == INT_MIN) {
+ level = 0;
+ goto do_translation_fault;
+ }
+ level = startlevel;
+ }
+
+ indexmask_grainsize = MAKE_64BIT_MASK(0, stride + 3);
+ indexmask = MAKE_64BIT_MASK(0, inputsize - (stride * (4 - level)));
+
+ /* Now we can extract the actual base address from the TTBR */
+ descaddr = extract64(ttbr, 0, 48);
+
+ /*
+ * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [5:2] of TTBR.
+ *
+ * Otherwise, if the base address is out of range, raise AddressSizeFault.
+ * In the pseudocode, this is !IsZero(baseregister<47:outputsize>),
+ * but we've just cleared the bits above 47, so simplify the test.
+ */
+ if (outputsize > 48) {
+ descaddr |= extract64(ttbr, 2, 4) << 48;
+ } else if (descaddr >> outputsize) {
+ level = 0;
+ fi->type = ARMFault_AddressSize;
+ goto do_fault;
+ }
+
+ /*
+ * We rely on this masking to clear the RES0 bits at the bottom of the TTBR
+ * and also to mask out CnP (bit 0) which could validly be non-zero.
+ */
+ descaddr &= ~indexmask;
+
+ /*
+ * For AArch32, the address field in the descriptor goes up to bit 39
+ * for both v7 and v8. However, for v8 the SBZ bits [47:40] must be 0
+ * or an AddressSize fault is raised. So for v8 we extract those SBZ
+ * bits as part of the address, which will be checked via outputsize.
+ * For AArch64, the address field goes up to bit 47, or 49 with FEAT_LPA2;
+ * the highest bits of a 52-bit output are placed elsewhere.
+ */
+ if (param.ds) {
+ descaddrmask = MAKE_64BIT_MASK(0, 50);
+ } else if (arm_feature(env, ARM_FEATURE_V8)) {
+ descaddrmask = MAKE_64BIT_MASK(0, 48);
+ } else {
+ descaddrmask = MAKE_64BIT_MASK(0, 40);
+ }
+ descaddrmask &= ~indexmask_grainsize;
+ tableattrs = 0;
+
+ next_level:
+ descaddr |= (address >> (stride * (4 - level))) & indexmask;
+ descaddr &= ~7ULL;
+
+ /*
+ * Process the NSTable bit from the previous level. This changes
+ * the table address space and the output space from Secure to
+ * NonSecure. With RME, the EL3 translation regime does not change
+ * from Root to NonSecure.
+ */
+ if (ptw->in_space == ARMSS_Secure
+ && !regime_is_stage2(mmu_idx)
+ && extract32(tableattrs, 4, 1)) {
+ /*
+ * Stage2_S -> Stage2 or Phys_S -> Phys_NS
+ * Assert the relative order of the secure/non-secure indexes.
+ */
+ QEMU_BUILD_BUG_ON(ARMMMUIdx_Phys_S + 1 != ARMMMUIdx_Phys_NS);
+ QEMU_BUILD_BUG_ON(ARMMMUIdx_Stage2_S + 1 != ARMMMUIdx_Stage2);
+ ptw->in_ptw_idx += 1;
+ ptw->in_space = ARMSS_NonSecure;
+ }
+
+ if (!S1_ptw_translate(env, ptw, descaddr, fi)) {
+ goto do_fault;
+ }
+ descriptor = arm_ldq_ptw(env, ptw, fi);
+ if (fi->type != ARMFault_None) {
+ goto do_fault;
+ }
+ new_descriptor = descriptor;
+
+ restart_atomic_update:
+ if (!(descriptor & 1) ||
+ (!(descriptor & 2) &&
+ !lpae_block_desc_valid(cpu, param.ds, param.gran, level))) {
+ /* Invalid, or a block descriptor at an invalid level */
+ goto do_translation_fault;
+ }
+
+ descaddr = descriptor & descaddrmask;
+
+ /*
+ * For FEAT_LPA and PS=6, bits [51:48] of descaddr are in [15:12]
+ * of descriptor. For FEAT_LPA2 and effective DS, bits [51:50] of
+ * descaddr are in [9:8]. Otherwise, if descaddr is out of range,
+ * raise AddressSizeFault.
+ */
+ if (outputsize > 48) {
+ if (param.ds) {
+ descaddr |= extract64(descriptor, 8, 2) << 50;
+ } else {
+ descaddr |= extract64(descriptor, 12, 4) << 48;
+ }
+ } else if (descaddr >> outputsize) {
+ fi->type = ARMFault_AddressSize;
+ goto do_fault;
+ }
+
+ if ((descriptor & 2) && (level < 3)) {
+ /*
+ * Table entry. The top five bits are attributes which may
+ * propagate down through lower levels of the table (and
+ * which are all arranged so that 0 means "no effect", so
+ * we can gather them up by ORing in the bits at each level).
+ */
+ tableattrs |= extract64(descriptor, 59, 5);
+ level++;
+ indexmask = indexmask_grainsize;
+ goto next_level;
+ }
+
+ /*
+ * Block entry at level 1 or 2, or page entry at level 3.
+ * These are basically the same thing, although the number
+ * of bits we pull in from the vaddr varies. Note that although
+ * descaddrmask masks enough of the low bits of the descriptor
+ * to give a correct page or table address, the address field
+ * in a block descriptor is smaller; so we need to explicitly
+ * clear the lower bits here before ORing in the low vaddr bits.
+ *
+ * Afterward, descaddr is the final physical address.
+ */
+ page_size = (1ULL << ((stride * (4 - level)) + 3));
+ descaddr &= ~(hwaddr)(page_size - 1);
+ descaddr |= (address & (page_size - 1));
+
+ if (likely(!ptw->in_debug)) {
+ /*
+ * Access flag.
+ * If HA is enabled, prepare to update the descriptor below.
+ * Otherwise, pass the access fault on to software.
+ */
+ if (!(descriptor & (1 << 10))) {
+ if (param.ha) {
+ new_descriptor |= 1 << 10; /* AF */
+ } else {
+ fi->type = ARMFault_AccessFlag;
+ goto do_fault;
+ }
+ }
+
+ /*
+ * Dirty Bit.
+ * If HD is enabled, pre-emptively set/clear the appropriate AP/S2AP
+ * bit for writeback. The actual write protection test may still be
+ * overridden by tableattrs, to be merged below.
+ */
+ if (param.hd
+ && extract64(descriptor, 51, 1) /* DBM */
+ && access_type == MMU_DATA_STORE) {
+ if (regime_is_stage2(mmu_idx)) {
+ new_descriptor |= 1ull << 7; /* set S2AP[1] */
+ } else {
+ new_descriptor &= ~(1ull << 7); /* clear AP[2] */
+ }
+ }
+ }
+
+ /*
+ * Extract attributes from the (modified) descriptor, and apply
+ * table descriptors. Stage 2 table descriptors do not include
+ * any attribute fields. HPD disables all the table attributes
+ * except NSTable (which we have already handled).
+ */
+ attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14));
+ if (!regime_is_stage2(mmu_idx)) {
+ if (!param.hpd) {
+ attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */
+ /*
+ * The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
+ * means "force PL1 access only", which means forcing AP[1] to 0.
+ */
+ attrs &= ~(extract64(tableattrs, 2, 1) << 6); /* !APT[0] => AP[1] */
+ attrs |= extract32(tableattrs, 3, 1) << 7; /* APT[1] => AP[2] */
+ }
+ }
+
+ ap = extract32(attrs, 6, 2);
+ out_space = ptw->in_space;
+ if (regime_is_stage2(mmu_idx)) {
+ /*
+ * R_GYNXY: For stage2 in Realm security state, bit 55 is NS.
+ * The bit remains ignored for other security states.
+ * R_YMCSL: Executing an insn fetched from non-Realm causes
+ * a stage2 permission fault.
+ */
+ if (out_space == ARMSS_Realm && extract64(attrs, 55, 1)) {
+ out_space = ARMSS_NonSecure;
+ result->f.prot = get_S2prot_noexecute(ap);
+ } else {
+ xn = extract64(attrs, 53, 2);
+ result->f.prot = get_S2prot(env, ap, xn, ptw->in_s1_is_el0);
+ }
+ } else {
+ int nse, ns = extract32(attrs, 5, 1);
+ switch (out_space) {
+ case ARMSS_Root:
+ /*
+ * R_GVZML: Bit 11 becomes the NSE field in the EL3 regime.
+ * R_XTYPW: NSE and NS together select the output pa space.
+ */
+ nse = extract32(attrs, 11, 1);
+ out_space = (nse << 1) | ns;
+ if (out_space == ARMSS_Secure &&
+ !cpu_isar_feature(aa64_sel2, cpu)) {
+ out_space = ARMSS_NonSecure;
+ }
+ break;
+ case ARMSS_Secure:
+ if (ns) {
+ out_space = ARMSS_NonSecure;
+ }
+ break;
+ case ARMSS_Realm:
+ switch (mmu_idx) {
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ /* I_CZPRF: For Realm EL1&0 stage1, NS bit is RES0. */
+ break;
+ case ARMMMUIdx_E2:
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ /*
+ * R_LYKFZ, R_WGRZN: For Realm EL2 and EL2&1,
+ * NS changes the output to non-secure space.
+ */
+ if (ns) {
+ out_space = ARMSS_NonSecure;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+ case ARMSS_NonSecure:
+ /* R_QRMFF: For NonSecure state, the NS bit is RES0. */
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ xn = extract64(attrs, 54, 1);
+ pxn = extract64(attrs, 53, 1);
+
+ if (el == 1 && nv_nv1_enabled(env, ptw)) {
+ /*
+ * With FEAT_NV, when HCR_EL2.{NV,NV1} == {1,1}, the block/page
+ * descriptor bit 54 holds PXN, 53 is RES0, and the effective value
+ * of UXN is 0. Similarly for bits 59 and 60 in table descriptors
+ * (which we have already folded into bits 53 and 54 of attrs).
+ * AP[1] (descriptor bit 6, our ap bit 0) is treated as 0.
+ * Similarly, APTable[0] from the table descriptor is treated as 0;
+ * we already folded this into AP[1] and squashing that to 0 does
+ * the right thing.
+ */
+ pxn = xn;
+ xn = 0;
+ ap &= ~1;
+ }
+ /*
+ * Note that we modified ptw->in_space earlier for NSTable, but
+ * result->f.attrs retains a copy of the original security space.
+ */
+ result->f.prot = get_S1prot(env, mmu_idx, aarch64, ap, xn, pxn,
+ result->f.attrs.space, out_space);
+ }
+
+ if (!(result->f.prot & (1 << access_type))) {
+ fi->type = ARMFault_Permission;
+ goto do_fault;
+ }
+
+ /* If FEAT_HAFDBS has made changes, update the PTE. */
+ if (new_descriptor != descriptor) {
+ new_descriptor = arm_casq_ptw(env, descriptor, new_descriptor, ptw, fi);
+ if (fi->type != ARMFault_None) {
+ goto do_fault;
+ }
+ /*
+ * I_YZSVV says that if the in-memory descriptor has changed,
+ * then we must use the information in that new value
+ * (which might include a different output address, different
+ * attributes, or generate a fault).
+ * Restart the handling of the descriptor value from scratch.
+ */
+ if (new_descriptor != descriptor) {
+ descriptor = new_descriptor;
+ goto restart_atomic_update;
+ }
+ }
+
+ result->f.attrs.space = out_space;
+ result->f.attrs.secure = arm_space_is_secure(out_space);
+
+ if (regime_is_stage2(mmu_idx)) {
+ result->cacheattrs.is_s2_format = true;
+ result->cacheattrs.attrs = extract32(attrs, 2, 4);
+ /*
+ * Security state does not really affect HCR_EL2.FWB;
+ * we only need to filter FWB for aa32 or other FEAT.
+ */
+ device = S2_attrs_are_device(arm_hcr_el2_eff(env),
+ result->cacheattrs.attrs);
+ } else {
+ /* Index into MAIR registers for cache attributes */
+ uint8_t attrindx = extract32(attrs, 2, 3);
+ uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
+ assert(attrindx <= 7);
+ result->cacheattrs.is_s2_format = false;
+ result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8);
+
+ /* When in aarch64 mode, and BTI is enabled, remember GP in the TLB. */
+ if (aarch64 && cpu_isar_feature(aa64_bti, cpu)) {
+ result->f.extra.arm.guarded = extract64(attrs, 50, 1); /* GP */
+ }
+ device = S1_attrs_are_device(result->cacheattrs.attrs);
+ }
+
+ /*
+ * Enable alignment checks on Device memory.
+ *
+ * Per R_XCHFJ, this check is mis-ordered. The correct ordering
+ * for alignment, permission, and stage 2 faults should be:
+ * - Alignment fault caused by the memory type
+ * - Permission fault
+ * - A stage 2 fault on the memory access
+ * but due to the way the TCG softmmu TLB operates, we will have
+ * implicitly done the permission check and the stage2 lookup in
+ * finding the TLB entry, so the alignment check cannot be done sooner.
+ *
+ * In v7, for a CPU without the Virtualization Extensions this
+ * access is UNPREDICTABLE; we choose to make it take the alignment
+ * fault as is required for a v7VE CPU. (QEMU doesn't emulate any
+ * CPUs with ARM_FEATURE_LPAE but not ARM_FEATURE_V7VE anyway.)
+ */
+ if (device) {
+ result->f.tlb_fill_flags |= TLB_CHECK_ALIGNED;
+ }
+
+ /*
+ * For FEAT_LPA2 and effective DS, the SH field in the attributes
+ * was re-purposed for output address bits. The SH attribute in
+ * that case comes from TCR_ELx, which we extracted earlier.
+ */
+ if (param.ds) {
+ result->cacheattrs.shareability = param.sh;
+ } else {
+ result->cacheattrs.shareability = extract32(attrs, 8, 2);
+ }
+
+ result->f.phys_addr = descaddr;
+ result->f.lg_page_size = ctz64(page_size);
+ return false;
+
+ do_translation_fault:
+ fi->type = ARMFault_Translation;
+ do_fault:
+ if (fi->s1ptw) {
+ /* Retain the existing stage 2 fi->level */
+ assert(fi->stage2);
+ } else {
+ fi->level = level;
+ fi->stage2 = regime_is_stage2(mmu_idx);
+ }
+ fi->s1ns = fault_s1ns(ptw->in_space, mmu_idx);
+ return true;
+}
+
+static bool get_phys_addr_pmsav5(CPUARMState *env,
+ S1Translate *ptw,
+ uint32_t address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+{
+ int n;
+ uint32_t mask;
+ uint32_t base;
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ bool is_user = regime_is_user(env, mmu_idx);
+
+ if (regime_translation_disabled(env, mmu_idx, ptw->in_space)) {
+ /* MPU disabled. */
+ result->f.phys_addr = address;
+ result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ return false;
+ }
+
+ result->f.phys_addr = address;
+ for (n = 7; n >= 0; n--) {
+ base = env->cp15.c6_region[n];
+ if ((base & 1) == 0) {
+ continue;
+ }
+ mask = 1 << ((base >> 1) & 0x1f);
+ /* Keep this shift separate from the above to avoid an
+ (undefined) << 32. */
+ mask = (mask << 1) - 1;
+ if (((base ^ address) & ~mask) == 0) {
+ break;
+ }
+ }
+ if (n < 0) {
+ fi->type = ARMFault_Background;
+ return true;
+ }
+
+ if (access_type == MMU_INST_FETCH) {
+ mask = env->cp15.pmsav5_insn_ap;
+ } else {
+ mask = env->cp15.pmsav5_data_ap;
+ }
+ mask = (mask >> (n * 4)) & 0xf;
+ switch (mask) {
+ case 0:
+ fi->type = ARMFault_Permission;
+ fi->level = 1;
+ return true;
+ case 1:
+ if (is_user) {
+ fi->type = ARMFault_Permission;
+ fi->level = 1;
+ return true;
+ }
+ result->f.prot = PAGE_READ | PAGE_WRITE;
+ break;
+ case 2:
+ result->f.prot = PAGE_READ;
+ if (!is_user) {
+ result->f.prot |= PAGE_WRITE;
+ }
+ break;
+ case 3:
+ result->f.prot = PAGE_READ | PAGE_WRITE;
+ break;
+ case 5:
+ if (is_user) {
+ fi->type = ARMFault_Permission;
+ fi->level = 1;
+ return true;
+ }
+ result->f.prot = PAGE_READ;
+ break;
+ case 6:
+ result->f.prot = PAGE_READ;
+ break;
+ default:
+ /* Bad permission. */
+ fi->type = ARMFault_Permission;
+ fi->level = 1;
+ return true;
+ }
+ result->f.prot |= PAGE_EXEC;
+ return false;
+}
+
+static void get_phys_addr_pmsav7_default(CPUARMState *env, ARMMMUIdx mmu_idx,
+ int32_t address, uint8_t *prot)
+{
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ *prot = PAGE_READ | PAGE_WRITE;
+ switch (address) {
+ case 0xF0000000 ... 0xFFFFFFFF:
+ if (regime_sctlr(env, mmu_idx) & SCTLR_V) {
+ /* hivecs execing is ok */
+ *prot |= PAGE_EXEC;
+ }
+ break;
+ case 0x00000000 ... 0x7FFFFFFF:
+ *prot |= PAGE_EXEC;
+ break;
+ }
+ } else {
+ /* Default system address map for M profile cores.
+ * The architecture specifies which regions are execute-never;
+ * at the MPU level no other checks are defined.
+ */
+ switch (address) {
+ case 0x00000000 ... 0x1fffffff: /* ROM */
+ case 0x20000000 ... 0x3fffffff: /* SRAM */
+ case 0x60000000 ... 0x7fffffff: /* RAM */
+ case 0x80000000 ... 0x9fffffff: /* RAM */
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ break;
+ case 0x40000000 ... 0x5fffffff: /* Peripheral */
+ case 0xa0000000 ... 0xbfffffff: /* Device */
+ case 0xc0000000 ... 0xdfffffff: /* Device */
+ case 0xe0000000 ... 0xffffffff: /* System */
+ *prot = PAGE_READ | PAGE_WRITE;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+static bool m_is_ppb_region(CPUARMState *env, uint32_t address)
+{
+ /* True if address is in the M profile PPB region 0xe0000000 - 0xe00fffff */
+ return arm_feature(env, ARM_FEATURE_M) &&
+ extract32(address, 20, 12) == 0xe00;
+}
+
+static bool m_is_system_region(CPUARMState *env, uint32_t address)
+{
+ /*
+ * True if address is in the M profile system region
+ * 0xe0000000 - 0xffffffff
+ */
+ return arm_feature(env, ARM_FEATURE_M) && extract32(address, 29, 3) == 0x7;
+}
+
+static bool pmsav7_use_background_region(ARMCPU *cpu, ARMMMUIdx mmu_idx,
+ bool is_secure, bool is_user)
+{
+ /*
+ * Return true if we should use the default memory map as a
+ * "background" region if there are no hits against any MPU regions.
+ */
+ CPUARMState *env = &cpu->env;
+
+ if (is_user) {
+ return false;
+ }
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ return env->v7m.mpu_ctrl[is_secure] & R_V7M_MPU_CTRL_PRIVDEFENA_MASK;
+ }
+
+ if (mmu_idx == ARMMMUIdx_Stage2) {
+ return false;
+ }
+
+ return regime_sctlr(env, mmu_idx) & SCTLR_BR;
+}
+
+static bool get_phys_addr_pmsav7(CPUARMState *env,
+ S1Translate *ptw,
+ uint32_t address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ int n;
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ bool is_user = regime_is_user(env, mmu_idx);
+ bool secure = arm_space_is_secure(ptw->in_space);
+
+ result->f.phys_addr = address;
+ result->f.lg_page_size = TARGET_PAGE_BITS;
+ result->f.prot = 0;
+
+ if (regime_translation_disabled(env, mmu_idx, ptw->in_space) ||
+ m_is_ppb_region(env, address)) {
+ /*
+ * MPU disabled or M profile PPB access: use default memory map.
+ * The other case which uses the default memory map in the
+ * v7M ARM ARM pseudocode is exception vector reads from the vector
+ * table. In QEMU those accesses are done in arm_v7m_load_vector(),
+ * which always does a direct read using address_space_ldl(), rather
+ * than going via this function, so we don't need to check that here.
+ */
+ get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->f.prot);
+ } else { /* MPU enabled */
+ for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
+ /* region search */
+ uint32_t base = env->pmsav7.drbar[n];
+ uint32_t rsize = extract32(env->pmsav7.drsr[n], 1, 5);
+ uint32_t rmask;
+ bool srdis = false;
+
+ if (!(env->pmsav7.drsr[n] & 0x1)) {
+ continue;
+ }
+
+ if (!rsize) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "DRSR[%d]: Rsize field cannot be 0\n", n);
+ continue;
+ }
+ rsize++;
+ rmask = (1ull << rsize) - 1;
+
+ if (base & rmask) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "DRBAR[%d]: 0x%" PRIx32 " misaligned "
+ "to DRSR region size, mask = 0x%" PRIx32 "\n",
+ n, base, rmask);
+ continue;
+ }
+
+ if (address < base || address > base + rmask) {
+ /*
+ * Address not in this region. We must check whether the
+ * region covers addresses in the same page as our address.
+ * In that case we must not report a size that covers the
+ * whole page for a subsequent hit against a different MPU
+ * region or the background region, because it would result in
+ * incorrect TLB hits for subsequent accesses to addresses that
+ * are in this MPU region.
+ */
+ if (ranges_overlap(base, rmask,
+ address & TARGET_PAGE_MASK,
+ TARGET_PAGE_SIZE)) {
+ result->f.lg_page_size = 0;
+ }
+ continue;
+ }
+
+ /* Region matched */
+
+ if (rsize >= 8) { /* no subregions for regions < 256 bytes */
+ int i, snd;
+ uint32_t srdis_mask;
+
+ rsize -= 3; /* sub region size (power of 2) */
+ snd = ((address - base) >> rsize) & 0x7;
+ srdis = extract32(env->pmsav7.drsr[n], snd + 8, 1);
+
+ srdis_mask = srdis ? 0x3 : 0x0;
+ for (i = 2; i <= 8 && rsize < TARGET_PAGE_BITS; i *= 2) {
+ /*
+ * This will check in groups of 2, 4 and then 8, whether
+ * the subregion bits are consistent. rsize is incremented
+ * back up to give the region size, considering consistent
+ * adjacent subregions as one region. Stop testing if rsize
+ * is already big enough for an entire QEMU page.
+ */
+ int snd_rounded = snd & ~(i - 1);
+ uint32_t srdis_multi = extract32(env->pmsav7.drsr[n],
+ snd_rounded + 8, i);
+ if (srdis_mask ^ srdis_multi) {
+ break;
+ }
+ srdis_mask = (srdis_mask << i) | srdis_mask;
+ rsize++;
+ }
+ }
+ if (srdis) {
+ continue;
+ }
+ if (rsize < TARGET_PAGE_BITS) {
+ result->f.lg_page_size = rsize;
+ }
+ break;
+ }
+
+ if (n == -1) { /* no hits */
+ if (!pmsav7_use_background_region(cpu, mmu_idx, secure, is_user)) {
+ /* background fault */
+ fi->type = ARMFault_Background;
+ return true;
+ }
+ get_phys_addr_pmsav7_default(env, mmu_idx, address,
+ &result->f.prot);
+ } else { /* a MPU hit! */
+ uint32_t ap = extract32(env->pmsav7.dracr[n], 8, 3);
+ uint32_t xn = extract32(env->pmsav7.dracr[n], 12, 1);
+
+ if (m_is_system_region(env, address)) {
+ /* System space is always execute never */
+ xn = 1;
+ }
+
+ if (is_user) { /* User mode AP bit decoding */
+ switch (ap) {
+ case 0:
+ case 1:
+ case 5:
+ break; /* no access */
+ case 3:
+ result->f.prot |= PAGE_WRITE;
+ /* fall through */
+ case 2:
+ case 6:
+ result->f.prot |= PAGE_READ | PAGE_EXEC;
+ break;
+ case 7:
+ /* for v7M, same as 6; for R profile a reserved value */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ result->f.prot |= PAGE_READ | PAGE_EXEC;
+ break;
+ }
+ /* fall through */
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "DRACR[%d]: Bad value for AP bits: 0x%"
+ PRIx32 "\n", n, ap);
+ }
+ } else { /* Priv. mode AP bits decoding */
+ switch (ap) {
+ case 0:
+ break; /* no access */
+ case 1:
+ case 2:
+ case 3:
+ result->f.prot |= PAGE_WRITE;
+ /* fall through */
+ case 5:
+ case 6:
+ result->f.prot |= PAGE_READ | PAGE_EXEC;
+ break;
+ case 7:
+ /* for v7M, same as 6; for R profile a reserved value */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ result->f.prot |= PAGE_READ | PAGE_EXEC;
+ break;
+ }
+ /* fall through */
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "DRACR[%d]: Bad value for AP bits: 0x%"
+ PRIx32 "\n", n, ap);
+ }
+ }
+
+ /* execute never */
+ if (xn) {
+ result->f.prot &= ~PAGE_EXEC;
+ }
+ }
+ }
+
+ fi->type = ARMFault_Permission;
+ fi->level = 1;
+ return !(result->f.prot & (1 << access_type));
+}
+
+static uint32_t *regime_rbar(CPUARMState *env, ARMMMUIdx mmu_idx,
+ uint32_t secure)
+{
+ if (regime_el(env, mmu_idx) == 2) {
+ return env->pmsav8.hprbar;
+ } else {
+ return env->pmsav8.rbar[secure];
+ }
+}
+
+static uint32_t *regime_rlar(CPUARMState *env, ARMMMUIdx mmu_idx,
+ uint32_t secure)
+{
+ if (regime_el(env, mmu_idx) == 2) {
+ return env->pmsav8.hprlar;
+ } else {
+ return env->pmsav8.rlar[secure];
+ }
+}
+
+bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ bool secure, GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi, uint32_t *mregion)
+{
+ /*
+ * Perform a PMSAv8 MPU lookup (without also doing the SAU check
+ * that a full phys-to-virt translation does).
+ * mregion is (if not NULL) set to the region number which matched,
+ * or -1 if no region number is returned (MPU off, address did not
+ * hit a region, address hit in multiple regions).
+ * If the region hit doesn't cover the entire TARGET_PAGE the address
+ * is within, then we set the result page_size to 1 to force the
+ * memory system to use a subpage.
+ */
+ ARMCPU *cpu = env_archcpu(env);
+ bool is_user = regime_is_user(env, mmu_idx);
+ int n;
+ int matchregion = -1;
+ bool hit = false;
+ uint32_t addr_page_base = address & TARGET_PAGE_MASK;
+ uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1);
+ int region_counter;
+
+ if (regime_el(env, mmu_idx) == 2) {
+ region_counter = cpu->pmsav8r_hdregion;
+ } else {
+ region_counter = cpu->pmsav7_dregion;
+ }
+
+ result->f.lg_page_size = TARGET_PAGE_BITS;
+ result->f.phys_addr = address;
+ result->f.prot = 0;
+ if (mregion) {
+ *mregion = -1;
+ }
+
+ if (mmu_idx == ARMMMUIdx_Stage2) {
+ fi->stage2 = true;
+ }
+
+ /*
+ * Unlike the ARM ARM pseudocode, we don't need to check whether this
+ * was an exception vector read from the vector table (which is always
+ * done using the default system address map), because those accesses
+ * are done in arm_v7m_load_vector(), which always does a direct
+ * read using address_space_ldl(), rather than going via this function.
+ */
+ if (regime_translation_disabled(env, mmu_idx, arm_secure_to_space(secure))) {
+ /* MPU disabled */
+ hit = true;
+ } else if (m_is_ppb_region(env, address)) {
+ hit = true;
+ } else {
+ if (pmsav7_use_background_region(cpu, mmu_idx, secure, is_user)) {
+ hit = true;
+ }
+
+ uint32_t bitmask;
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ bitmask = 0x1f;
+ } else {
+ bitmask = 0x3f;
+ fi->level = 0;
+ }
+
+ for (n = region_counter - 1; n >= 0; n--) {
+ /* region search */
+ /*
+ * Note that the base address is bits [31:x] from the register
+ * with bits [x-1:0] all zeroes, but the limit address is bits
+ * [31:x] from the register with bits [x:0] all ones. Where x is
+ * 5 for Cortex-M and 6 for Cortex-R
+ */
+ uint32_t base = regime_rbar(env, mmu_idx, secure)[n] & ~bitmask;
+ uint32_t limit = regime_rlar(env, mmu_idx, secure)[n] | bitmask;
+
+ if (!(regime_rlar(env, mmu_idx, secure)[n] & 0x1)) {
+ /* Region disabled */
+ continue;
+ }
+
+ if (address < base || address > limit) {
+ /*
+ * Address not in this region. We must check whether the
+ * region covers addresses in the same page as our address.
+ * In that case we must not report a size that covers the
+ * whole page for a subsequent hit against a different MPU
+ * region or the background region, because it would result in
+ * incorrect TLB hits for subsequent accesses to addresses that
+ * are in this MPU region.
+ */
+ if (limit >= base &&
+ ranges_overlap(base, limit - base + 1,
+ addr_page_base,
+ TARGET_PAGE_SIZE)) {
+ result->f.lg_page_size = 0;
+ }
+ continue;
+ }
+
+ if (base > addr_page_base || limit < addr_page_limit) {
+ result->f.lg_page_size = 0;
+ }
+
+ if (matchregion != -1) {
+ /*
+ * Multiple regions match -- always a failure (unlike
+ * PMSAv7 where highest-numbered-region wins)
+ */
+ fi->type = ARMFault_Permission;
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ fi->level = 1;
+ }
+ return true;
+ }
+
+ matchregion = n;
+ hit = true;
+ }
+ }
+
+ if (!hit) {
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ fi->type = ARMFault_Background;
+ } else {
+ fi->type = ARMFault_Permission;
+ }
+ return true;
+ }
+
+ if (matchregion == -1) {
+ /* hit using the background region */
+ get_phys_addr_pmsav7_default(env, mmu_idx, address, &result->f.prot);
+ } else {
+ uint32_t matched_rbar = regime_rbar(env, mmu_idx, secure)[matchregion];
+ uint32_t matched_rlar = regime_rlar(env, mmu_idx, secure)[matchregion];
+ uint32_t ap = extract32(matched_rbar, 1, 2);
+ uint32_t xn = extract32(matched_rbar, 0, 1);
+ bool pxn = false;
+
+ if (arm_feature(env, ARM_FEATURE_V8_1M)) {
+ pxn = extract32(matched_rlar, 4, 1);
+ }
+
+ if (m_is_system_region(env, address)) {
+ /* System space is always execute never */
+ xn = 1;
+ }
+
+ if (regime_el(env, mmu_idx) == 2) {
+ result->f.prot = simple_ap_to_rw_prot_is_user(ap,
+ mmu_idx != ARMMMUIdx_E2);
+ } else {
+ result->f.prot = simple_ap_to_rw_prot(env, mmu_idx, ap);
+ }
+
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ uint8_t attrindx = extract32(matched_rlar, 1, 3);
+ uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
+ uint8_t sh = extract32(matched_rlar, 3, 2);
+
+ if (regime_sctlr(env, mmu_idx) & SCTLR_WXN &&
+ result->f.prot & PAGE_WRITE && mmu_idx != ARMMMUIdx_Stage2) {
+ xn = 0x1;
+ }
+
+ if ((regime_el(env, mmu_idx) == 1) &&
+ regime_sctlr(env, mmu_idx) & SCTLR_UWXN && ap == 0x1) {
+ pxn = 0x1;
+ }
+
+ result->cacheattrs.is_s2_format = false;
+ result->cacheattrs.attrs = extract64(mair, attrindx * 8, 8);
+ result->cacheattrs.shareability = sh;
+ }
+
+ if (result->f.prot && !xn && !(pxn && !is_user)) {
+ result->f.prot |= PAGE_EXEC;
+ }
+
+ if (mregion) {
+ *mregion = matchregion;
+ }
+ }
+
+ fi->type = ARMFault_Permission;
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ fi->level = 1;
+ }
+ return !(result->f.prot & (1 << access_type));
+}
+
+static bool v8m_is_sau_exempt(CPUARMState *env,
+ uint32_t address, MMUAccessType access_type)
+{
+ /*
+ * The architecture specifies that certain address ranges are
+ * exempt from v8M SAU/IDAU checks.
+ */
+ return
+ (access_type == MMU_INST_FETCH && m_is_system_region(env, address)) ||
+ (address >= 0xe0000000 && address <= 0xe0002fff) ||
+ (address >= 0xe000e000 && address <= 0xe000efff) ||
+ (address >= 0xe002e000 && address <= 0xe002efff) ||
+ (address >= 0xe0040000 && address <= 0xe0041fff) ||
+ (address >= 0xe00ff000 && address <= 0xe00fffff);
+}
+
+void v8m_security_lookup(CPUARMState *env, uint32_t address,
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ bool is_secure, V8M_SAttributes *sattrs)
+{
+ /*
+ * Look up the security attributes for this address. Compare the
+ * pseudocode SecurityCheck() function.
+ * We assume the caller has zero-initialized *sattrs.
+ */
+ ARMCPU *cpu = env_archcpu(env);
+ int r;
+ bool idau_exempt = false, idau_ns = true, idau_nsc = true;
+ int idau_region = IREGION_NOTVALID;
+ uint32_t addr_page_base = address & TARGET_PAGE_MASK;
+ uint32_t addr_page_limit = addr_page_base + (TARGET_PAGE_SIZE - 1);
+
+ if (cpu->idau) {
+ IDAUInterfaceClass *iic = IDAU_INTERFACE_GET_CLASS(cpu->idau);
+ IDAUInterface *ii = IDAU_INTERFACE(cpu->idau);
+
+ iic->check(ii, address, &idau_region, &idau_exempt, &idau_ns,
+ &idau_nsc);
+ }
+
+ if (access_type == MMU_INST_FETCH && extract32(address, 28, 4) == 0xf) {
+ /* 0xf0000000..0xffffffff is always S for insn fetches */
+ return;
+ }
+
+ if (idau_exempt || v8m_is_sau_exempt(env, address, access_type)) {
+ sattrs->ns = !is_secure;
+ return;
+ }
+
+ if (idau_region != IREGION_NOTVALID) {
+ sattrs->irvalid = true;
+ sattrs->iregion = idau_region;
+ }
+
+ switch (env->sau.ctrl & 3) {
+ case 0: /* SAU.ENABLE == 0, SAU.ALLNS == 0 */
+ break;
+ case 2: /* SAU.ENABLE == 0, SAU.ALLNS == 1 */
+ sattrs->ns = true;
+ break;
+ default: /* SAU.ENABLE == 1 */
+ for (r = 0; r < cpu->sau_sregion; r++) {
+ if (env->sau.rlar[r] & 1) {
+ uint32_t base = env->sau.rbar[r] & ~0x1f;
+ uint32_t limit = env->sau.rlar[r] | 0x1f;
+
+ if (base <= address && limit >= address) {
+ if (base > addr_page_base || limit < addr_page_limit) {
+ sattrs->subpage = true;
+ }
+ if (sattrs->srvalid) {
+ /*
+ * If we hit in more than one region then we must report
+ * as Secure, not NS-Callable, with no valid region
+ * number info.
+ */
+ sattrs->ns = false;
+ sattrs->nsc = false;
+ sattrs->sregion = 0;
+ sattrs->srvalid = false;
+ break;
+ } else {
+ if (env->sau.rlar[r] & 2) {
+ sattrs->nsc = true;
+ } else {
+ sattrs->ns = true;
+ }
+ sattrs->srvalid = true;
+ sattrs->sregion = r;
+ }
+ } else {
+ /*
+ * Address not in this region. We must check whether the
+ * region covers addresses in the same page as our address.
+ * In that case we must not report a size that covers the
+ * whole page for a subsequent hit against a different MPU
+ * region or the background region, because it would result
+ * in incorrect TLB hits for subsequent accesses to
+ * addresses that are in this MPU region.
+ */
+ if (limit >= base &&
+ ranges_overlap(base, limit - base + 1,
+ addr_page_base,
+ TARGET_PAGE_SIZE)) {
+ sattrs->subpage = true;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ /*
+ * The IDAU will override the SAU lookup results if it specifies
+ * higher security than the SAU does.
+ */
+ if (!idau_ns) {
+ if (sattrs->ns || (!idau_nsc && sattrs->nsc)) {
+ sattrs->ns = false;
+ sattrs->nsc = idau_nsc;
+ }
+ }
+}
+
+static bool get_phys_addr_pmsav8(CPUARMState *env,
+ S1Translate *ptw,
+ uint32_t address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+{
+ V8M_SAttributes sattrs = {};
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ bool secure = arm_space_is_secure(ptw->in_space);
+ bool ret;
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ v8m_security_lookup(env, address, access_type, mmu_idx,
+ secure, &sattrs);
+ if (access_type == MMU_INST_FETCH) {
+ /*
+ * Instruction fetches always use the MMU bank and the
+ * transaction attribute determined by the fetch address,
+ * regardless of CPU state. This is painful for QEMU
+ * to handle, because it would mean we need to encode
+ * into the mmu_idx not just the (user, negpri) information
+ * for the current security state but also that for the
+ * other security state, which would balloon the number
+ * of mmu_idx values needed alarmingly.
+ * Fortunately we can avoid this because it's not actually
+ * possible to arbitrarily execute code from memory with
+ * the wrong security attribute: it will always generate
+ * an exception of some kind or another, apart from the
+ * special case of an NS CPU executing an SG instruction
+ * in S&NSC memory. So we always just fail the translation
+ * here and sort things out in the exception handler
+ * (including possibly emulating an SG instruction).
+ */
+ if (sattrs.ns != !secure) {
+ if (sattrs.nsc) {
+ fi->type = ARMFault_QEMU_NSCExec;
+ } else {
+ fi->type = ARMFault_QEMU_SFault;
+ }
+ result->f.lg_page_size = sattrs.subpage ? 0 : TARGET_PAGE_BITS;
+ result->f.phys_addr = address;
+ result->f.prot = 0;
+ return true;
+ }
+ } else {
+ /*
+ * For data accesses we always use the MMU bank indicated
+ * by the current CPU state, but the security attributes
+ * might downgrade a secure access to nonsecure.
+ */
+ if (sattrs.ns) {
+ result->f.attrs.secure = false;
+ result->f.attrs.space = ARMSS_NonSecure;
+ } else if (!secure) {
+ /*
+ * NS access to S memory must fault.
+ * Architecturally we should first check whether the
+ * MPU information for this address indicates that we
+ * are doing an unaligned access to Device memory, which
+ * should generate a UsageFault instead. QEMU does not
+ * currently check for that kind of unaligned access though.
+ * If we added it we would need to do so as a special case
+ * for M_FAKE_FSR_SFAULT in arm_v7m_cpu_do_interrupt().
+ */
+ fi->type = ARMFault_QEMU_SFault;
+ result->f.lg_page_size = sattrs.subpage ? 0 : TARGET_PAGE_BITS;
+ result->f.phys_addr = address;
+ result->f.prot = 0;
+ return true;
+ }
+ }
+ }
+
+ ret = pmsav8_mpu_lookup(env, address, access_type, mmu_idx, secure,
+ result, fi, NULL);
+ if (sattrs.subpage) {
+ result->f.lg_page_size = 0;
+ }
+ return ret;
+}
+
+/*
+ * Translate from the 4-bit stage 2 representation of
+ * memory attributes (without cache-allocation hints) to
+ * the 8-bit representation of the stage 1 MAIR registers
+ * (which includes allocation hints).
+ *
+ * ref: shared/translation/attrs/S2AttrDecode()
+ * .../S2ConvertAttrsHints()
+ */
+static uint8_t convert_stage2_attrs(uint64_t hcr, uint8_t s2attrs)
+{
+ uint8_t hiattr = extract32(s2attrs, 2, 2);
+ uint8_t loattr = extract32(s2attrs, 0, 2);
+ uint8_t hihint = 0, lohint = 0;
+
+ if (hiattr != 0) { /* normal memory */
+ if (hcr & HCR_CD) { /* cache disabled */
+ hiattr = loattr = 1; /* non-cacheable */
+ } else {
+ if (hiattr != 1) { /* Write-through or write-back */
+ hihint = 3; /* RW allocate */
+ }
+ if (loattr != 1) { /* Write-through or write-back */
+ lohint = 3; /* RW allocate */
+ }
+ }
+ }
+
+ return (hiattr << 6) | (hihint << 4) | (loattr << 2) | lohint;
+}
+
+/*
+ * Combine either inner or outer cacheability attributes for normal
+ * memory, according to table D4-42 and pseudocode procedure
+ * CombineS1S2AttrHints() of ARM DDI 0487B.b (the ARMv8 ARM).
+ *
+ * NB: only stage 1 includes allocation hints (RW bits), leading to
+ * some asymmetry.
+ */
+static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2)
+{
+ if (s1 == 4 || s2 == 4) {
+ /* non-cacheable has precedence */
+ return 4;
+ } else if (extract32(s1, 2, 2) == 0 || extract32(s1, 2, 2) == 2) {
+ /* stage 1 write-through takes precedence */
+ return s1;
+ } else if (extract32(s2, 2, 2) == 2) {
+ /* stage 2 write-through takes precedence, but the allocation hint
+ * is still taken from stage 1
+ */
+ return (2 << 2) | extract32(s1, 0, 2);
+ } else { /* write-back */
+ return s1;
+ }
+}
+
+/*
+ * Combine the memory type and cacheability attributes of
+ * s1 and s2 for the HCR_EL2.FWB == 0 case, returning the
+ * combined attributes in MAIR_EL1 format.
+ */
+static uint8_t combined_attrs_nofwb(uint64_t hcr,
+ ARMCacheAttrs s1, ARMCacheAttrs s2)
+{
+ uint8_t s1lo, s2lo, s1hi, s2hi, s2_mair_attrs, ret_attrs;
+
+ if (s2.is_s2_format) {
+ s2_mair_attrs = convert_stage2_attrs(hcr, s2.attrs);
+ } else {
+ s2_mair_attrs = s2.attrs;
+ }
+
+ s1lo = extract32(s1.attrs, 0, 4);
+ s2lo = extract32(s2_mair_attrs, 0, 4);
+ s1hi = extract32(s1.attrs, 4, 4);
+ s2hi = extract32(s2_mair_attrs, 4, 4);
+
+ /* Combine memory type and cacheability attributes */
+ if (s1hi == 0 || s2hi == 0) {
+ /* Device has precedence over normal */
+ if (s1lo == 0 || s2lo == 0) {
+ /* nGnRnE has precedence over anything */
+ ret_attrs = 0;
+ } else if (s1lo == 4 || s2lo == 4) {
+ /* non-Reordering has precedence over Reordering */
+ ret_attrs = 4; /* nGnRE */
+ } else if (s1lo == 8 || s2lo == 8) {
+ /* non-Gathering has precedence over Gathering */
+ ret_attrs = 8; /* nGRE */
+ } else {
+ ret_attrs = 0xc; /* GRE */
+ }
+ } else { /* Normal memory */
+ /* Outer/inner cacheability combine independently */
+ ret_attrs = combine_cacheattr_nibble(s1hi, s2hi) << 4
+ | combine_cacheattr_nibble(s1lo, s2lo);
+ }
+ return ret_attrs;
+}
+
+static uint8_t force_cacheattr_nibble_wb(uint8_t attr)
+{
+ /*
+ * Given the 4 bits specifying the outer or inner cacheability
+ * in MAIR format, return a value specifying Normal Write-Back,
+ * with the allocation and transient hints taken from the input
+ * if the input specified some kind of cacheable attribute.
+ */
+ if (attr == 0 || attr == 4) {
+ /*
+ * 0 == an UNPREDICTABLE encoding
+ * 4 == Non-cacheable
+ * Either way, force Write-Back RW allocate non-transient
+ */
+ return 0xf;
+ }
+ /* Change WriteThrough to WriteBack, keep allocation and transient hints */
+ return attr | 4;
+}
+
+/*
+ * Combine the memory type and cacheability attributes of
+ * s1 and s2 for the HCR_EL2.FWB == 1 case, returning the
+ * combined attributes in MAIR_EL1 format.
+ */
+static uint8_t combined_attrs_fwb(ARMCacheAttrs s1, ARMCacheAttrs s2)
+{
+ assert(s2.is_s2_format && !s1.is_s2_format);
+
+ switch (s2.attrs) {
+ case 7:
+ /* Use stage 1 attributes */
+ return s1.attrs;
+ case 6:
+ /*
+ * Force Normal Write-Back. Note that if S1 is Normal cacheable
+ * then we take the allocation hints from it; otherwise it is
+ * RW allocate, non-transient.
+ */
+ if ((s1.attrs & 0xf0) == 0) {
+ /* S1 is Device */
+ return 0xff;
+ }
+ /* Need to check the Inner and Outer nibbles separately */
+ return force_cacheattr_nibble_wb(s1.attrs & 0xf) |
+ force_cacheattr_nibble_wb(s1.attrs >> 4) << 4;
+ case 5:
+ /* If S1 attrs are Device, use them; otherwise Normal Non-cacheable */
+ if ((s1.attrs & 0xf0) == 0) {
+ return s1.attrs;
+ }
+ return 0x44;
+ case 0 ... 3:
+ /* Force Device, of subtype specified by S2 */
+ return s2.attrs << 2;
+ default:
+ /*
+ * RESERVED values (including RES0 descriptor bit [5] being nonzero);
+ * arbitrarily force Device.
+ */
+ return 0;
+ }
+}
+
+/*
+ * Combine S1 and S2 cacheability/shareability attributes, per D4.5.4
+ * and CombineS1S2Desc()
+ *
+ * @env: CPUARMState
+ * @s1: Attributes from stage 1 walk
+ * @s2: Attributes from stage 2 walk
+ */
+static ARMCacheAttrs combine_cacheattrs(uint64_t hcr,
+ ARMCacheAttrs s1, ARMCacheAttrs s2)
+{
+ ARMCacheAttrs ret;
+ bool tagged = false;
+
+ assert(!s1.is_s2_format);
+ ret.is_s2_format = false;
+
+ if (s1.attrs == 0xf0) {
+ tagged = true;
+ s1.attrs = 0xff;
+ }
+
+ /* Combine shareability attributes (table D4-43) */
+ if (s1.shareability == 2 || s2.shareability == 2) {
+ /* if either are outer-shareable, the result is outer-shareable */
+ ret.shareability = 2;
+ } else if (s1.shareability == 3 || s2.shareability == 3) {
+ /* if either are inner-shareable, the result is inner-shareable */
+ ret.shareability = 3;
+ } else {
+ /* both non-shareable */
+ ret.shareability = 0;
+ }
+
+ /* Combine memory type and cacheability attributes */
+ if (hcr & HCR_FWB) {
+ ret.attrs = combined_attrs_fwb(s1, s2);
+ } else {
+ ret.attrs = combined_attrs_nofwb(hcr, s1, s2);
+ }
+
+ /*
+ * Any location for which the resultant memory type is any
+ * type of Device memory is always treated as Outer Shareable.
+ * Any location for which the resultant memory type is Normal
+ * Inner Non-cacheable, Outer Non-cacheable is always treated
+ * as Outer Shareable.
+ * TODO: FEAT_XS adds another value (0x40) also meaning iNCoNC
+ */
+ if ((ret.attrs & 0xf0) == 0 || ret.attrs == 0x44) {
+ ret.shareability = 2;
+ }
+
+ /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */
+ if (tagged && ret.attrs == 0xff) {
+ ret.attrs = 0xf0;
+ }
+
+ return ret;
+}
+
+/*
+ * MMU disabled. S1 addresses within aa64 translation regimes are
+ * still checked for bounds -- see AArch64.S1DisabledOutput().
+ */
+static bool get_phys_addr_disabled(CPUARMState *env,
+ S1Translate *ptw,
+ target_ulong address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+{
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ uint8_t memattr = 0x00; /* Device nGnRnE */
+ uint8_t shareability = 0; /* non-shareable */
+ int r_el;
+
+ switch (mmu_idx) {
+ case ARMMMUIdx_Stage2:
+ case ARMMMUIdx_Stage2_S:
+ case ARMMMUIdx_Phys_S:
+ case ARMMMUIdx_Phys_NS:
+ case ARMMMUIdx_Phys_Root:
+ case ARMMMUIdx_Phys_Realm:
+ break;
+
+ default:
+ r_el = regime_el(env, mmu_idx);
+ if (arm_el_is_aa64(env, r_el)) {
+ int pamax = arm_pamax(env_archcpu(env));
+ uint64_t tcr = env->cp15.tcr_el[r_el];
+ int addrtop, tbi;
+
+ tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
+ if (access_type == MMU_INST_FETCH) {
+ tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
+ }
+ tbi = (tbi >> extract64(address, 55, 1)) & 1;
+ addrtop = (tbi ? 55 : 63);
+
+ if (extract64(address, pamax, addrtop - pamax + 1) != 0) {
+ fi->type = ARMFault_AddressSize;
+ fi->level = 0;
+ fi->stage2 = false;
+ return 1;
+ }
+
+ /*
+ * When TBI is disabled, we've just validated that all of the
+ * bits above PAMax are zero, so logically we only need to
+ * clear the top byte for TBI. But it's clearer to follow
+ * the pseudocode set of addrdesc.paddress.
+ */
+ address = extract64(address, 0, 52);
+ }
+
+ /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */
+ if (r_el == 1) {
+ uint64_t hcr = arm_hcr_el2_eff_secstate(env, ptw->in_space);
+ if (hcr & HCR_DC) {
+ if (hcr & HCR_DCT) {
+ memattr = 0xf0; /* Tagged, Normal, WB, RWA */
+ } else {
+ memattr = 0xff; /* Normal, WB, RWA */
+ }
+ }
+ }
+ if (memattr == 0) {
+ if (access_type == MMU_INST_FETCH) {
+ if (regime_sctlr(env, mmu_idx) & SCTLR_I) {
+ memattr = 0xee; /* Normal, WT, RA, NT */
+ } else {
+ memattr = 0x44; /* Normal, NC, No */
+ }
+ }
+ shareability = 2; /* outer shareable */
+ }
+ result->cacheattrs.is_s2_format = false;
+ break;
+ }
+
+ result->f.phys_addr = address;
+ result->f.prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ result->f.lg_page_size = TARGET_PAGE_BITS;
+ result->cacheattrs.shareability = shareability;
+ result->cacheattrs.attrs = memattr;
+ return false;
+}
+
+static bool get_phys_addr_twostage(CPUARMState *env, S1Translate *ptw,
+ target_ulong address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+{
+ hwaddr ipa;
+ int s1_prot, s1_lgpgsz;
+ ARMSecuritySpace in_space = ptw->in_space;
+ bool ret, ipa_secure, s1_guarded;
+ ARMCacheAttrs cacheattrs1;
+ ARMSecuritySpace ipa_space;
+ uint64_t hcr;
+
+ ret = get_phys_addr_nogpc(env, ptw, address, access_type, result, fi);
+
+ /* If S1 fails, return early. */
+ if (ret) {
+ return ret;
+ }
+
+ ipa = result->f.phys_addr;
+ ipa_secure = result->f.attrs.secure;
+ ipa_space = result->f.attrs.space;
+
+ ptw->in_s1_is_el0 = ptw->in_mmu_idx == ARMMMUIdx_Stage1_E0;
+ ptw->in_mmu_idx = ipa_secure ? ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
+ ptw->in_space = ipa_space;
+ ptw->in_ptw_idx = ptw_idx_for_stage_2(env, ptw->in_mmu_idx);
+
+ /*
+ * S1 is done, now do S2 translation.
+ * Save the stage1 results so that we may merge prot and cacheattrs later.
+ */
+ s1_prot = result->f.prot;
+ s1_lgpgsz = result->f.lg_page_size;
+ s1_guarded = result->f.extra.arm.guarded;
+ cacheattrs1 = result->cacheattrs;
+ memset(result, 0, sizeof(*result));
+
+ ret = get_phys_addr_nogpc(env, ptw, ipa, access_type, result, fi);
+ fi->s2addr = ipa;
+
+ /* Combine the S1 and S2 perms. */
+ result->f.prot &= s1_prot;
+
+ /* If S2 fails, return early. */
+ if (ret) {
+ return ret;
+ }
+
+ /*
+ * If either S1 or S2 returned a result smaller than TARGET_PAGE_SIZE,
+ * this means "don't put this in the TLB"; in this case, return a
+ * result with lg_page_size == 0 to achieve that. Otherwise,
+ * use the maximum of the S1 & S2 page size, so that invalidation
+ * of pages > TARGET_PAGE_SIZE works correctly. (This works even though
+ * we know the combined result permissions etc only cover the minimum
+ * of the S1 and S2 page size, because we know that the common TLB code
+ * never actually creates TLB entries bigger than TARGET_PAGE_SIZE,
+ * and passing a larger page size value only affects invalidations.)
+ */
+ if (result->f.lg_page_size < TARGET_PAGE_BITS ||
+ s1_lgpgsz < TARGET_PAGE_BITS) {
+ result->f.lg_page_size = 0;
+ } else if (result->f.lg_page_size < s1_lgpgsz) {
+ result->f.lg_page_size = s1_lgpgsz;
+ }
+
+ /* Combine the S1 and S2 cache attributes. */
+ hcr = arm_hcr_el2_eff_secstate(env, in_space);
+ if (hcr & HCR_DC) {
+ /*
+ * HCR.DC forces the first stage attributes to
+ * Normal Non-Shareable,
+ * Inner Write-Back Read-Allocate Write-Allocate,
+ * Outer Write-Back Read-Allocate Write-Allocate.
+ * Do not overwrite Tagged within attrs.
+ */
+ if (cacheattrs1.attrs != 0xf0) {
+ cacheattrs1.attrs = 0xff;
+ }
+ cacheattrs1.shareability = 0;
+ }
+ result->cacheattrs = combine_cacheattrs(hcr, cacheattrs1,
+ result->cacheattrs);
+
+ /* No BTI GP information in stage 2, we just use the S1 value */
+ result->f.extra.arm.guarded = s1_guarded;
+
+ /*
+ * Check if IPA translates to secure or non-secure PA space.
+ * Note that VSTCR overrides VTCR and {N}SW overrides {N}SA.
+ */
+ if (in_space == ARMSS_Secure) {
+ result->f.attrs.secure =
+ !(env->cp15.vstcr_el2 & (VSTCR_SA | VSTCR_SW))
+ && (ipa_secure
+ || !(env->cp15.vtcr_el2 & (VTCR_NSA | VTCR_NSW)));
+ result->f.attrs.space = arm_secure_to_space(result->f.attrs.secure);
+ }
+
+ return false;
+}
+
+static bool get_phys_addr_nogpc(CPUARMState *env, S1Translate *ptw,
+ target_ulong address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+{
+ ARMMMUIdx mmu_idx = ptw->in_mmu_idx;
+ ARMMMUIdx s1_mmu_idx;
+
+ /*
+ * The page table entries may downgrade Secure to NonSecure, but
+ * cannot upgrade a NonSecure translation regime's attributes
+ * to Secure or Realm.
+ */
+ result->f.attrs.space = ptw->in_space;
+ result->f.attrs.secure = arm_space_is_secure(ptw->in_space);
+
+ switch (mmu_idx) {
+ case ARMMMUIdx_Phys_S:
+ case ARMMMUIdx_Phys_NS:
+ case ARMMMUIdx_Phys_Root:
+ case ARMMMUIdx_Phys_Realm:
+ /* Checking Phys early avoids special casing later vs regime_el. */
+ return get_phys_addr_disabled(env, ptw, address, access_type,
+ result, fi);
+
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ /*
+ * First stage lookup uses second stage for ptw; only
+ * Secure has both S and NS IPA and starts with Stage2_S.
+ */
+ ptw->in_ptw_idx = (ptw->in_space == ARMSS_Secure) ?
+ ARMMMUIdx_Stage2_S : ARMMMUIdx_Stage2;
+ break;
+
+ case ARMMMUIdx_Stage2:
+ case ARMMMUIdx_Stage2_S:
+ /*
+ * Second stage lookup uses physical for ptw; whether this is S or
+ * NS may depend on the SW/NSW bits if this is a stage 2 lookup for
+ * the Secure EL2&0 regime.
+ */
+ ptw->in_ptw_idx = ptw_idx_for_stage_2(env, mmu_idx);
+ break;
+
+ case ARMMMUIdx_E10_0:
+ s1_mmu_idx = ARMMMUIdx_Stage1_E0;
+ goto do_twostage;
+ case ARMMMUIdx_E10_1:
+ s1_mmu_idx = ARMMMUIdx_Stage1_E1;
+ goto do_twostage;
+ case ARMMMUIdx_E10_1_PAN:
+ s1_mmu_idx = ARMMMUIdx_Stage1_E1_PAN;
+ do_twostage:
+ /*
+ * Call ourselves recursively to do the stage 1 and then stage 2
+ * translations if mmu_idx is a two-stage regime, and EL2 present.
+ * Otherwise, a stage1+stage2 translation is just stage 1.
+ */
+ ptw->in_mmu_idx = mmu_idx = s1_mmu_idx;
+ if (arm_feature(env, ARM_FEATURE_EL2) &&
+ !regime_translation_disabled(env, ARMMMUIdx_Stage2, ptw->in_space)) {
+ return get_phys_addr_twostage(env, ptw, address, access_type,
+ result, fi);
+ }
+ /* fall through */
+
+ default:
+ /* Single stage uses physical for ptw. */
+ ptw->in_ptw_idx = arm_space_to_phys(ptw->in_space);
+ break;
+ }
+
+ result->f.attrs.user = regime_is_user(env, mmu_idx);
+
+ /*
+ * Fast Context Switch Extension. This doesn't exist at all in v8.
+ * In v7 and earlier it affects all stage 1 translations.
+ */
+ if (address < 0x02000000 && mmu_idx != ARMMMUIdx_Stage2
+ && !arm_feature(env, ARM_FEATURE_V8)) {
+ if (regime_el(env, mmu_idx) == 3) {
+ address += env->cp15.fcseidr_s;
+ } else {
+ address += env->cp15.fcseidr_ns;
+ }
+ }
+
+ if (arm_feature(env, ARM_FEATURE_PMSA)) {
+ bool ret;
+ result->f.lg_page_size = TARGET_PAGE_BITS;
+
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ /* PMSAv8 */
+ ret = get_phys_addr_pmsav8(env, ptw, address, access_type,
+ result, fi);
+ } else if (arm_feature(env, ARM_FEATURE_V7)) {
+ /* PMSAv7 */
+ ret = get_phys_addr_pmsav7(env, ptw, address, access_type,
+ result, fi);
+ } else {
+ /* Pre-v7 MPU */
+ ret = get_phys_addr_pmsav5(env, ptw, address, access_type,
+ result, fi);
+ }
+ qemu_log_mask(CPU_LOG_MMU, "PMSA MPU lookup for %s at 0x%08" PRIx32
+ " mmu_idx %u -> %s (prot %c%c%c)\n",
+ access_type == MMU_DATA_LOAD ? "reading" :
+ (access_type == MMU_DATA_STORE ? "writing" : "execute"),
+ (uint32_t)address, mmu_idx,
+ ret ? "Miss" : "Hit",
+ result->f.prot & PAGE_READ ? 'r' : '-',
+ result->f.prot & PAGE_WRITE ? 'w' : '-',
+ result->f.prot & PAGE_EXEC ? 'x' : '-');
+
+ return ret;
+ }
+
+ /* Definitely a real MMU, not an MPU */
+
+ if (regime_translation_disabled(env, mmu_idx, ptw->in_space)) {
+ return get_phys_addr_disabled(env, ptw, address, access_type,
+ result, fi);
+ }
+
+ if (regime_using_lpae_format(env, mmu_idx)) {
+ return get_phys_addr_lpae(env, ptw, address, access_type, result, fi);
+ } else if (arm_feature(env, ARM_FEATURE_V7) ||
+ regime_sctlr(env, mmu_idx) & SCTLR_XP) {
+ return get_phys_addr_v6(env, ptw, address, access_type, result, fi);
+ } else {
+ return get_phys_addr_v5(env, ptw, address, access_type, result, fi);
+ }
+}
+
+static bool get_phys_addr_gpc(CPUARMState *env, S1Translate *ptw,
+ target_ulong address,
+ MMUAccessType access_type,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+{
+ if (get_phys_addr_nogpc(env, ptw, address, access_type, result, fi)) {
+ return true;
+ }
+ if (!granule_protection_check(env, result->f.phys_addr,
+ result->f.attrs.space, fi)) {
+ fi->type = ARMFault_GPCFOnOutput;
+ return true;
+ }
+ return false;
+}
+
+bool get_phys_addr_with_space_nogpc(CPUARMState *env, target_ulong address,
+ MMUAccessType access_type,
+ ARMMMUIdx mmu_idx, ARMSecuritySpace space,
+ GetPhysAddrResult *result,
+ ARMMMUFaultInfo *fi)
+{
+ S1Translate ptw = {
+ .in_mmu_idx = mmu_idx,
+ .in_space = space,
+ };
+ return get_phys_addr_nogpc(env, &ptw, address, access_type, result, fi);
+}
+
+bool get_phys_addr(CPUARMState *env, target_ulong address,
+ MMUAccessType access_type, ARMMMUIdx mmu_idx,
+ GetPhysAddrResult *result, ARMMMUFaultInfo *fi)
+{
+ S1Translate ptw = {
+ .in_mmu_idx = mmu_idx,
+ };
+ ARMSecuritySpace ss;
+
+ switch (mmu_idx) {
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ case ARMMMUIdx_E2:
+ ss = arm_security_space_below_el3(env);
+ break;
+ case ARMMMUIdx_Stage2:
+ /*
+ * For Secure EL2, we need this index to be NonSecure;
+ * otherwise this will already be NonSecure or Realm.
+ */
+ ss = arm_security_space_below_el3(env);
+ if (ss == ARMSS_Secure) {
+ ss = ARMSS_NonSecure;
+ }
+ break;
+ case ARMMMUIdx_Phys_NS:
+ case ARMMMUIdx_MPrivNegPri:
+ case ARMMMUIdx_MUserNegPri:
+ case ARMMMUIdx_MPriv:
+ case ARMMMUIdx_MUser:
+ ss = ARMSS_NonSecure;
+ break;
+ case ARMMMUIdx_Stage2_S:
+ case ARMMMUIdx_Phys_S:
+ case ARMMMUIdx_MSPrivNegPri:
+ case ARMMMUIdx_MSUserNegPri:
+ case ARMMMUIdx_MSPriv:
+ case ARMMMUIdx_MSUser:
+ ss = ARMSS_Secure;
+ break;
+ case ARMMMUIdx_E3:
+ if (arm_feature(env, ARM_FEATURE_AARCH64) &&
+ cpu_isar_feature(aa64_rme, env_archcpu(env))) {
+ ss = ARMSS_Root;
+ } else {
+ ss = ARMSS_Secure;
+ }
+ break;
+ case ARMMMUIdx_Phys_Root:
+ ss = ARMSS_Root;
+ break;
+ case ARMMMUIdx_Phys_Realm:
+ ss = ARMSS_Realm;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ ptw.in_space = ss;
+ return get_phys_addr_gpc(env, &ptw, address, access_type, result, fi);
+}
+
+hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
+ MemTxAttrs *attrs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ ARMMMUIdx mmu_idx = arm_mmu_idx(env);
+ ARMSecuritySpace ss = arm_security_space(env);
+ S1Translate ptw = {
+ .in_mmu_idx = mmu_idx,
+ .in_space = ss,
+ .in_debug = true,
+ };
+ GetPhysAddrResult res = {};
+ ARMMMUFaultInfo fi = {};
+ bool ret;
+
+ ret = get_phys_addr_gpc(env, &ptw, addr, MMU_DATA_LOAD, &res, &fi);
+ *attrs = res.f.attrs;
+
+ if (ret) {
+ return -1;
+ }
+ return res.f.phys_addr;
+}
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
deleted file mode 100644
index 8cbc6516ab..0000000000
--- a/target/arm/sve_helper.c
+++ /dev/null
@@ -1,5383 +0,0 @@
-/*
- * ARM SVE Operations
- *
- * Copyright (c) 2018 Linaro, Ltd.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "internals.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "exec/helper-proto.h"
-#include "tcg/tcg-gvec-desc.h"
-#include "fpu/softfloat.h"
-
-
-/* Note that vector data is stored in host-endian 64-bit chunks,
- so addressing units smaller than that needs a host-endian fixup. */
-#ifdef HOST_WORDS_BIGENDIAN
-#define H1(x) ((x) ^ 7)
-#define H1_2(x) ((x) ^ 6)
-#define H1_4(x) ((x) ^ 4)
-#define H2(x) ((x) ^ 3)
-#define H4(x) ((x) ^ 1)
-#else
-#define H1(x) (x)
-#define H1_2(x) (x)
-#define H1_4(x) (x)
-#define H2(x) (x)
-#define H4(x) (x)
-#endif
-
-/* Return a value for NZCV as per the ARM PredTest pseudofunction.
- *
- * The return value has bit 31 set if N is set, bit 1 set if Z is clear,
- * and bit 0 set if C is set. Compare the definitions of these variables
- * within CPUARMState.
- */
-
-/* For no G bits set, NZCV = C. */
-#define PREDTEST_INIT 1
-
-/* This is an iterative function, called for each Pd and Pg word
- * moving forward.
- */
-static uint32_t iter_predtest_fwd(uint64_t d, uint64_t g, uint32_t flags)
-{
- if (likely(g)) {
- /* Compute N from first D & G.
- Use bit 2 to signal first G bit seen. */
- if (!(flags & 4)) {
- flags |= ((d & (g & -g)) != 0) << 31;
- flags |= 4;
- }
-
- /* Accumulate Z from each D & G. */
- flags |= ((d & g) != 0) << 1;
-
- /* Compute C from last !(D & G). Replace previous. */
- flags = deposit32(flags, 0, 1, (d & pow2floor(g)) == 0);
- }
- return flags;
-}
-
-/* This is an iterative function, called for each Pd and Pg word
- * moving backward.
- */
-static uint32_t iter_predtest_bwd(uint64_t d, uint64_t g, uint32_t flags)
-{
- if (likely(g)) {
- /* Compute C from first (i.e last) !(D & G).
- Use bit 2 to signal first G bit seen. */
- if (!(flags & 4)) {
- flags += 4 - 1; /* add bit 2, subtract C from PREDTEST_INIT */
- flags |= (d & pow2floor(g)) == 0;
- }
-
- /* Accumulate Z from each D & G. */
- flags |= ((d & g) != 0) << 1;
-
- /* Compute N from last (i.e first) D & G. Replace previous. */
- flags = deposit32(flags, 31, 1, (d & (g & -g)) != 0);
- }
- return flags;
-}
-
-/* The same for a single word predicate. */
-uint32_t HELPER(sve_predtest1)(uint64_t d, uint64_t g)
-{
- return iter_predtest_fwd(d, g, PREDTEST_INIT);
-}
-
-/* The same for a multi-word predicate. */
-uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
-{
- uint32_t flags = PREDTEST_INIT;
- uint64_t *d = vd, *g = vg;
- uintptr_t i = 0;
-
- do {
- flags = iter_predtest_fwd(d[i], g[i], flags);
- } while (++i < words);
-
- return flags;
-}
-
-/* Expand active predicate bits to bytes, for byte elements.
- * for (i = 0; i < 256; ++i) {
- * unsigned long m = 0;
- * for (j = 0; j < 8; j++) {
- * if ((i >> j) & 1) {
- * m |= 0xfful << (j << 3);
- * }
- * }
- * printf("0x%016lx,\n", m);
- * }
- */
-static inline uint64_t expand_pred_b(uint8_t byte)
-{
- static const uint64_t word[256] = {
- 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00,
- 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff,
- 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000,
- 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff,
- 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00,
- 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff,
- 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000,
- 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff,
- 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00,
- 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff,
- 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000,
- 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff,
- 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00,
- 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff,
- 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000,
- 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff,
- 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00,
- 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff,
- 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000,
- 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff,
- 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00,
- 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff,
- 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000,
- 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff,
- 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00,
- 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff,
- 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000,
- 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff,
- 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
- 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff,
- 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000,
- 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff,
- 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00,
- 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff,
- 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000,
- 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff,
- 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00,
- 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff,
- 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000,
- 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff,
- 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00,
- 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff,
- 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000,
- 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff,
- 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00,
- 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff,
- 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000,
- 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff,
- 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00,
- 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff,
- 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000,
- 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff,
- 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00,
- 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff,
- 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000,
- 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff,
- 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00,
- 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff,
- 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000,
- 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff,
- 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00,
- 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff,
- 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000,
- 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff,
- 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00,
- 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff,
- 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000,
- 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff,
- 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00,
- 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff,
- 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000,
- 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff,
- 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00,
- 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff,
- 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000,
- 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff,
- 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00,
- 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff,
- 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000,
- 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff,
- 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00,
- 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff,
- 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000,
- 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff,
- 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00,
- 0xffffffffffffffff,
- };
- return word[byte];
-}
-
-/* Similarly for half-word elements.
- * for (i = 0; i < 256; ++i) {
- * unsigned long m = 0;
- * if (i & 0xaa) {
- * continue;
- * }
- * for (j = 0; j < 8; j += 2) {
- * if ((i >> j) & 1) {
- * m |= 0xfffful << (j << 3);
- * }
- * }
- * printf("[0x%x] = 0x%016lx,\n", i, m);
- * }
- */
-static inline uint64_t expand_pred_h(uint8_t byte)
-{
- static const uint64_t word[] = {
- [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000,
- [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000,
- [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000,
- [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000,
- [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000,
- [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000,
- [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000,
- [0x55] = 0xffffffffffffffff,
- };
- return word[byte & 0x55];
-}
-
-/* Similarly for single word elements. */
-static inline uint64_t expand_pred_s(uint8_t byte)
-{
- static const uint64_t word[] = {
- [0x01] = 0x00000000ffffffffull,
- [0x10] = 0xffffffff00000000ull,
- [0x11] = 0xffffffffffffffffull,
- };
- return word[byte & 0x11];
-}
-
-/* Swap 16-bit words within a 32-bit word. */
-static inline uint32_t hswap32(uint32_t h)
-{
- return rol32(h, 16);
-}
-
-/* Swap 16-bit words within a 64-bit word. */
-static inline uint64_t hswap64(uint64_t h)
-{
- uint64_t m = 0x0000ffff0000ffffull;
- h = rol64(h, 32);
- return ((h & m) << 16) | ((h >> 16) & m);
-}
-
-/* Swap 32-bit words within a 64-bit word. */
-static inline uint64_t wswap64(uint64_t h)
-{
- return rol64(h, 32);
-}
-
-#define LOGICAL_PPPP(NAME, FUNC) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{ \
- uintptr_t opr_sz = simd_oprsz(desc); \
- uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \
- uintptr_t i; \
- for (i = 0; i < opr_sz / 8; ++i) { \
- d[i] = FUNC(n[i], m[i], g[i]); \
- } \
-}
-
-#define DO_AND(N, M, G) (((N) & (M)) & (G))
-#define DO_BIC(N, M, G) (((N) & ~(M)) & (G))
-#define DO_EOR(N, M, G) (((N) ^ (M)) & (G))
-#define DO_ORR(N, M, G) (((N) | (M)) & (G))
-#define DO_ORN(N, M, G) (((N) | ~(M)) & (G))
-#define DO_NOR(N, M, G) (~((N) | (M)) & (G))
-#define DO_NAND(N, M, G) (~((N) & (M)) & (G))
-#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G)))
-
-LOGICAL_PPPP(sve_and_pppp, DO_AND)
-LOGICAL_PPPP(sve_bic_pppp, DO_BIC)
-LOGICAL_PPPP(sve_eor_pppp, DO_EOR)
-LOGICAL_PPPP(sve_sel_pppp, DO_SEL)
-LOGICAL_PPPP(sve_orr_pppp, DO_ORR)
-LOGICAL_PPPP(sve_orn_pppp, DO_ORN)
-LOGICAL_PPPP(sve_nor_pppp, DO_NOR)
-LOGICAL_PPPP(sve_nand_pppp, DO_NAND)
-
-#undef DO_AND
-#undef DO_BIC
-#undef DO_EOR
-#undef DO_ORR
-#undef DO_ORN
-#undef DO_NOR
-#undef DO_NAND
-#undef DO_SEL
-#undef LOGICAL_PPPP
-
-/* Fully general three-operand expander, controlled by a predicate.
- * This is complicated by the host-endian storage of the register file.
- */
-/* ??? I don't expect the compiler could ever vectorize this itself.
- * With some tables we can convert bit masks to byte masks, and with
- * extra care wrt byte/word ordering we could use gcc generic vectors
- * and do 16 bytes at a time.
- */
-#define DO_ZPZZ(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- for (i = 0; i < opr_sz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- TYPE mm = *(TYPE *)(vm + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(nn, mm); \
- } \
- i += sizeof(TYPE), pg >>= sizeof(TYPE); \
- } while (i & 15); \
- } \
-}
-
-/* Similarly, specialized for 64-bit operands. */
-#define DO_ZPZZ_D(NAME, TYPE, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
- TYPE *d = vd, *n = vn, *m = vm; \
- uint8_t *pg = vg; \
- for (i = 0; i < opr_sz; i += 1) { \
- if (pg[H1(i)] & 1) { \
- TYPE nn = n[i], mm = m[i]; \
- d[i] = OP(nn, mm); \
- } \
- } \
-}
-
-#define DO_AND(N, M) (N & M)
-#define DO_EOR(N, M) (N ^ M)
-#define DO_ORR(N, M) (N | M)
-#define DO_BIC(N, M) (N & ~M)
-#define DO_ADD(N, M) (N + M)
-#define DO_SUB(N, M) (N - M)
-#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
-#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
-#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N))
-#define DO_MUL(N, M) (N * M)
-
-
-/*
- * We must avoid the C undefined behaviour cases: division by
- * zero and signed division of INT_MIN by -1. Both of these
- * have architecturally defined required results for Arm.
- * We special case all signed divisions by -1 to avoid having
- * to deduce the minimum integer for the type involved.
- */
-#define DO_SDIV(N, M) (unlikely(M == 0) ? 0 : unlikely(M == -1) ? -N : N / M)
-#define DO_UDIV(N, M) (unlikely(M == 0) ? 0 : N / M)
-
-DO_ZPZZ(sve_and_zpzz_b, uint8_t, H1, DO_AND)
-DO_ZPZZ(sve_and_zpzz_h, uint16_t, H1_2, DO_AND)
-DO_ZPZZ(sve_and_zpzz_s, uint32_t, H1_4, DO_AND)
-DO_ZPZZ_D(sve_and_zpzz_d, uint64_t, DO_AND)
-
-DO_ZPZZ(sve_orr_zpzz_b, uint8_t, H1, DO_ORR)
-DO_ZPZZ(sve_orr_zpzz_h, uint16_t, H1_2, DO_ORR)
-DO_ZPZZ(sve_orr_zpzz_s, uint32_t, H1_4, DO_ORR)
-DO_ZPZZ_D(sve_orr_zpzz_d, uint64_t, DO_ORR)
-
-DO_ZPZZ(sve_eor_zpzz_b, uint8_t, H1, DO_EOR)
-DO_ZPZZ(sve_eor_zpzz_h, uint16_t, H1_2, DO_EOR)
-DO_ZPZZ(sve_eor_zpzz_s, uint32_t, H1_4, DO_EOR)
-DO_ZPZZ_D(sve_eor_zpzz_d, uint64_t, DO_EOR)
-
-DO_ZPZZ(sve_bic_zpzz_b, uint8_t, H1, DO_BIC)
-DO_ZPZZ(sve_bic_zpzz_h, uint16_t, H1_2, DO_BIC)
-DO_ZPZZ(sve_bic_zpzz_s, uint32_t, H1_4, DO_BIC)
-DO_ZPZZ_D(sve_bic_zpzz_d, uint64_t, DO_BIC)
-
-DO_ZPZZ(sve_add_zpzz_b, uint8_t, H1, DO_ADD)
-DO_ZPZZ(sve_add_zpzz_h, uint16_t, H1_2, DO_ADD)
-DO_ZPZZ(sve_add_zpzz_s, uint32_t, H1_4, DO_ADD)
-DO_ZPZZ_D(sve_add_zpzz_d, uint64_t, DO_ADD)
-
-DO_ZPZZ(sve_sub_zpzz_b, uint8_t, H1, DO_SUB)
-DO_ZPZZ(sve_sub_zpzz_h, uint16_t, H1_2, DO_SUB)
-DO_ZPZZ(sve_sub_zpzz_s, uint32_t, H1_4, DO_SUB)
-DO_ZPZZ_D(sve_sub_zpzz_d, uint64_t, DO_SUB)
-
-DO_ZPZZ(sve_smax_zpzz_b, int8_t, H1, DO_MAX)
-DO_ZPZZ(sve_smax_zpzz_h, int16_t, H1_2, DO_MAX)
-DO_ZPZZ(sve_smax_zpzz_s, int32_t, H1_4, DO_MAX)
-DO_ZPZZ_D(sve_smax_zpzz_d, int64_t, DO_MAX)
-
-DO_ZPZZ(sve_umax_zpzz_b, uint8_t, H1, DO_MAX)
-DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX)
-DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX)
-DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX)
-
-DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN)
-DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN)
-DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN)
-DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN)
-
-DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN)
-DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN)
-DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN)
-DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN)
-
-DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD)
-DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD)
-DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD)
-DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD)
-
-DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD)
-DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD)
-DO_ZPZZ(sve_uabd_zpzz_s, uint32_t, H1_4, DO_ABD)
-DO_ZPZZ_D(sve_uabd_zpzz_d, uint64_t, DO_ABD)
-
-/* Because the computation type is at least twice as large as required,
- these work for both signed and unsigned source types. */
-static inline uint8_t do_mulh_b(int32_t n, int32_t m)
-{
- return (n * m) >> 8;
-}
-
-static inline uint16_t do_mulh_h(int32_t n, int32_t m)
-{
- return (n * m) >> 16;
-}
-
-static inline uint32_t do_mulh_s(int64_t n, int64_t m)
-{
- return (n * m) >> 32;
-}
-
-static inline uint64_t do_smulh_d(uint64_t n, uint64_t m)
-{
- uint64_t lo, hi;
- muls64(&lo, &hi, n, m);
- return hi;
-}
-
-static inline uint64_t do_umulh_d(uint64_t n, uint64_t m)
-{
- uint64_t lo, hi;
- mulu64(&lo, &hi, n, m);
- return hi;
-}
-
-DO_ZPZZ(sve_mul_zpzz_b, uint8_t, H1, DO_MUL)
-DO_ZPZZ(sve_mul_zpzz_h, uint16_t, H1_2, DO_MUL)
-DO_ZPZZ(sve_mul_zpzz_s, uint32_t, H1_4, DO_MUL)
-DO_ZPZZ_D(sve_mul_zpzz_d, uint64_t, DO_MUL)
-
-DO_ZPZZ(sve_smulh_zpzz_b, int8_t, H1, do_mulh_b)
-DO_ZPZZ(sve_smulh_zpzz_h, int16_t, H1_2, do_mulh_h)
-DO_ZPZZ(sve_smulh_zpzz_s, int32_t, H1_4, do_mulh_s)
-DO_ZPZZ_D(sve_smulh_zpzz_d, uint64_t, do_smulh_d)
-
-DO_ZPZZ(sve_umulh_zpzz_b, uint8_t, H1, do_mulh_b)
-DO_ZPZZ(sve_umulh_zpzz_h, uint16_t, H1_2, do_mulh_h)
-DO_ZPZZ(sve_umulh_zpzz_s, uint32_t, H1_4, do_mulh_s)
-DO_ZPZZ_D(sve_umulh_zpzz_d, uint64_t, do_umulh_d)
-
-DO_ZPZZ(sve_sdiv_zpzz_s, int32_t, H1_4, DO_SDIV)
-DO_ZPZZ_D(sve_sdiv_zpzz_d, int64_t, DO_SDIV)
-
-DO_ZPZZ(sve_udiv_zpzz_s, uint32_t, H1_4, DO_UDIV)
-DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_UDIV)
-
-/* Note that all bits of the shift are significant
- and not modulo the element size. */
-#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1))
-#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0)
-#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0)
-
-DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR)
-DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR)
-DO_ZPZZ(sve_lsl_zpzz_b, uint8_t, H1_4, DO_LSL)
-
-DO_ZPZZ(sve_asr_zpzz_h, int16_t, H1, DO_ASR)
-DO_ZPZZ(sve_lsr_zpzz_h, uint16_t, H1_2, DO_LSR)
-DO_ZPZZ(sve_lsl_zpzz_h, uint16_t, H1_4, DO_LSL)
-
-DO_ZPZZ(sve_asr_zpzz_s, int32_t, H1, DO_ASR)
-DO_ZPZZ(sve_lsr_zpzz_s, uint32_t, H1_2, DO_LSR)
-DO_ZPZZ(sve_lsl_zpzz_s, uint32_t, H1_4, DO_LSL)
-
-DO_ZPZZ_D(sve_asr_zpzz_d, int64_t, DO_ASR)
-DO_ZPZZ_D(sve_lsr_zpzz_d, uint64_t, DO_LSR)
-DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL)
-
-#undef DO_ZPZZ
-#undef DO_ZPZZ_D
-
-/* Three-operand expander, controlled by a predicate, in which the
- * third operand is "wide". That is, for D = N op M, the same 64-bit
- * value of M is used with all of the narrower values of N.
- */
-#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- for (i = 0; i < opr_sz; ) { \
- uint8_t pg = *(uint8_t *)(vg + H1(i >> 3)); \
- TYPEW mm = *(TYPEW *)(vm + i); \
- do { \
- if (pg & 1) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(nn, mm); \
- } \
- i += sizeof(TYPE), pg >>= sizeof(TYPE); \
- } while (i & 7); \
- } \
-}
-
-DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR)
-DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR)
-DO_ZPZW(sve_lsl_zpzw_b, uint8_t, uint64_t, H1, DO_LSL)
-
-DO_ZPZW(sve_asr_zpzw_h, int16_t, uint64_t, H1_2, DO_ASR)
-DO_ZPZW(sve_lsr_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSR)
-DO_ZPZW(sve_lsl_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSL)
-
-DO_ZPZW(sve_asr_zpzw_s, int32_t, uint64_t, H1_4, DO_ASR)
-DO_ZPZW(sve_lsr_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSR)
-DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
-
-#undef DO_ZPZW
-
-/* Fully general two-operand expander, controlled by a predicate.
- */
-#define DO_ZPZ(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- for (i = 0; i < opr_sz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(nn); \
- } \
- i += sizeof(TYPE), pg >>= sizeof(TYPE); \
- } while (i & 15); \
- } \
-}
-
-/* Similarly, specialized for 64-bit operands. */
-#define DO_ZPZ_D(NAME, TYPE, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
- TYPE *d = vd, *n = vn; \
- uint8_t *pg = vg; \
- for (i = 0; i < opr_sz; i += 1) { \
- if (pg[H1(i)] & 1) { \
- TYPE nn = n[i]; \
- d[i] = OP(nn); \
- } \
- } \
-}
-
-#define DO_CLS_B(N) (clrsb32(N) - 24)
-#define DO_CLS_H(N) (clrsb32(N) - 16)
-
-DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B)
-DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H)
-DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32)
-DO_ZPZ_D(sve_cls_d, int64_t, clrsb64)
-
-#define DO_CLZ_B(N) (clz32(N) - 24)
-#define DO_CLZ_H(N) (clz32(N) - 16)
-
-DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B)
-DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H)
-DO_ZPZ(sve_clz_s, uint32_t, H1_4, clz32)
-DO_ZPZ_D(sve_clz_d, uint64_t, clz64)
-
-DO_ZPZ(sve_cnt_zpz_b, uint8_t, H1, ctpop8)
-DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16)
-DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32)
-DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64)
-
-#define DO_CNOT(N) (N == 0)
-
-DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT)
-DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT)
-DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT)
-DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT)
-
-#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1))
-
-DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
-DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
-DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
-
-#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
-
-DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
-DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
-DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
-
-#define DO_NOT(N) (~N)
-
-DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
-DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT)
-DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT)
-DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT)
-
-#define DO_SXTB(N) ((int8_t)N)
-#define DO_SXTH(N) ((int16_t)N)
-#define DO_SXTS(N) ((int32_t)N)
-#define DO_UXTB(N) ((uint8_t)N)
-#define DO_UXTH(N) ((uint16_t)N)
-#define DO_UXTS(N) ((uint32_t)N)
-
-DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB)
-DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB)
-DO_ZPZ(sve_sxth_s, uint32_t, H1_4, DO_SXTH)
-DO_ZPZ_D(sve_sxtb_d, uint64_t, DO_SXTB)
-DO_ZPZ_D(sve_sxth_d, uint64_t, DO_SXTH)
-DO_ZPZ_D(sve_sxtw_d, uint64_t, DO_SXTS)
-
-DO_ZPZ(sve_uxtb_h, uint16_t, H1_2, DO_UXTB)
-DO_ZPZ(sve_uxtb_s, uint32_t, H1_4, DO_UXTB)
-DO_ZPZ(sve_uxth_s, uint32_t, H1_4, DO_UXTH)
-DO_ZPZ_D(sve_uxtb_d, uint64_t, DO_UXTB)
-DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH)
-DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS)
-
-#define DO_ABS(N) (N < 0 ? -N : N)
-
-DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS)
-DO_ZPZ(sve_abs_h, int16_t, H1_2, DO_ABS)
-DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS)
-DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS)
-
-#define DO_NEG(N) (-N)
-
-DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG)
-DO_ZPZ(sve_neg_h, uint16_t, H1_2, DO_NEG)
-DO_ZPZ(sve_neg_s, uint32_t, H1_4, DO_NEG)
-DO_ZPZ_D(sve_neg_d, uint64_t, DO_NEG)
-
-DO_ZPZ(sve_revb_h, uint16_t, H1_2, bswap16)
-DO_ZPZ(sve_revb_s, uint32_t, H1_4, bswap32)
-DO_ZPZ_D(sve_revb_d, uint64_t, bswap64)
-
-DO_ZPZ(sve_revh_s, uint32_t, H1_4, hswap32)
-DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
-
-DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
-
-DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
-DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
-DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
-DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64)
-
-/* Three-operand expander, unpredicated, in which the third operand is "wide".
- */
-#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- for (i = 0; i < opr_sz; ) { \
- TYPEW mm = *(TYPEW *)(vm + i); \
- do { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(nn, mm); \
- i += sizeof(TYPE); \
- } while (i & 7); \
- } \
-}
-
-DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR)
-DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR)
-DO_ZZW(sve_lsl_zzw_b, uint8_t, uint64_t, H1, DO_LSL)
-
-DO_ZZW(sve_asr_zzw_h, int16_t, uint64_t, H1_2, DO_ASR)
-DO_ZZW(sve_lsr_zzw_h, uint16_t, uint64_t, H1_2, DO_LSR)
-DO_ZZW(sve_lsl_zzw_h, uint16_t, uint64_t, H1_2, DO_LSL)
-
-DO_ZZW(sve_asr_zzw_s, int32_t, uint64_t, H1_4, DO_ASR)
-DO_ZZW(sve_lsr_zzw_s, uint32_t, uint64_t, H1_4, DO_LSR)
-DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
-
-#undef DO_ZZW
-
-#undef DO_CLS_B
-#undef DO_CLS_H
-#undef DO_CLZ_B
-#undef DO_CLZ_H
-#undef DO_CNOT
-#undef DO_FABS
-#undef DO_FNEG
-#undef DO_ABS
-#undef DO_NEG
-#undef DO_ZPZ
-#undef DO_ZPZ_D
-
-/* Two-operand reduction expander, controlled by a predicate.
- * The difference between TYPERED and TYPERET has to do with
- * sign-extension. E.g. for SMAX, TYPERED must be signed,
- * but TYPERET must be unsigned so that e.g. a 32-bit value
- * is not sign-extended to the ABI uint64_t return type.
- */
-/* ??? If we were to vectorize this by hand the reduction ordering
- * would change. For integer operands, this is perfectly fine.
- */
-#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \
-uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- TYPERED ret = INIT; \
- for (i = 0; i < opr_sz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPEELT nn = *(TYPEELT *)(vn + H(i)); \
- ret = OP(ret, nn); \
- } \
- i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \
- } while (i & 15); \
- } \
- return (TYPERET)ret; \
-}
-
-#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \
-uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
- TYPEE *n = vn; \
- uint8_t *pg = vg; \
- TYPER ret = INIT; \
- for (i = 0; i < opr_sz; i += 1) { \
- if (pg[H1(i)] & 1) { \
- TYPEE nn = n[i]; \
- ret = OP(ret, nn); \
- } \
- } \
- return ret; \
-}
-
-DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR)
-DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR)
-DO_VPZ(sve_orv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_ORR)
-DO_VPZ_D(sve_orv_d, uint64_t, uint64_t, 0, DO_ORR)
-
-DO_VPZ(sve_eorv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_EOR)
-DO_VPZ(sve_eorv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_EOR)
-DO_VPZ(sve_eorv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_EOR)
-DO_VPZ_D(sve_eorv_d, uint64_t, uint64_t, 0, DO_EOR)
-
-DO_VPZ(sve_andv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_AND)
-DO_VPZ(sve_andv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_AND)
-DO_VPZ(sve_andv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_AND)
-DO_VPZ_D(sve_andv_d, uint64_t, uint64_t, -1, DO_AND)
-
-DO_VPZ(sve_saddv_b, int8_t, uint64_t, uint64_t, H1, 0, DO_ADD)
-DO_VPZ(sve_saddv_h, int16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD)
-DO_VPZ(sve_saddv_s, int32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD)
-
-DO_VPZ(sve_uaddv_b, uint8_t, uint64_t, uint64_t, H1, 0, DO_ADD)
-DO_VPZ(sve_uaddv_h, uint16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD)
-DO_VPZ(sve_uaddv_s, uint32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD)
-DO_VPZ_D(sve_uaddv_d, uint64_t, uint64_t, 0, DO_ADD)
-
-DO_VPZ(sve_smaxv_b, int8_t, int8_t, uint8_t, H1, INT8_MIN, DO_MAX)
-DO_VPZ(sve_smaxv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MIN, DO_MAX)
-DO_VPZ(sve_smaxv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MIN, DO_MAX)
-DO_VPZ_D(sve_smaxv_d, int64_t, int64_t, INT64_MIN, DO_MAX)
-
-DO_VPZ(sve_umaxv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_MAX)
-DO_VPZ(sve_umaxv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_MAX)
-DO_VPZ(sve_umaxv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_MAX)
-DO_VPZ_D(sve_umaxv_d, uint64_t, uint64_t, 0, DO_MAX)
-
-DO_VPZ(sve_sminv_b, int8_t, int8_t, uint8_t, H1, INT8_MAX, DO_MIN)
-DO_VPZ(sve_sminv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MAX, DO_MIN)
-DO_VPZ(sve_sminv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MAX, DO_MIN)
-DO_VPZ_D(sve_sminv_d, int64_t, int64_t, INT64_MAX, DO_MIN)
-
-DO_VPZ(sve_uminv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_MIN)
-DO_VPZ(sve_uminv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_MIN)
-DO_VPZ(sve_uminv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_MIN)
-DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
-
-#undef DO_VPZ
-#undef DO_VPZ_D
-
-/* Two vector operand, one scalar operand, unpredicated. */
-#define DO_ZZI(NAME, TYPE, OP) \
-void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \
- TYPE s = s64, *d = vd, *n = vn; \
- for (i = 0; i < opr_sz; ++i) { \
- d[i] = OP(n[i], s); \
- } \
-}
-
-#define DO_SUBR(X, Y) (Y - X)
-
-DO_ZZI(sve_subri_b, uint8_t, DO_SUBR)
-DO_ZZI(sve_subri_h, uint16_t, DO_SUBR)
-DO_ZZI(sve_subri_s, uint32_t, DO_SUBR)
-DO_ZZI(sve_subri_d, uint64_t, DO_SUBR)
-
-DO_ZZI(sve_smaxi_b, int8_t, DO_MAX)
-DO_ZZI(sve_smaxi_h, int16_t, DO_MAX)
-DO_ZZI(sve_smaxi_s, int32_t, DO_MAX)
-DO_ZZI(sve_smaxi_d, int64_t, DO_MAX)
-
-DO_ZZI(sve_smini_b, int8_t, DO_MIN)
-DO_ZZI(sve_smini_h, int16_t, DO_MIN)
-DO_ZZI(sve_smini_s, int32_t, DO_MIN)
-DO_ZZI(sve_smini_d, int64_t, DO_MIN)
-
-DO_ZZI(sve_umaxi_b, uint8_t, DO_MAX)
-DO_ZZI(sve_umaxi_h, uint16_t, DO_MAX)
-DO_ZZI(sve_umaxi_s, uint32_t, DO_MAX)
-DO_ZZI(sve_umaxi_d, uint64_t, DO_MAX)
-
-DO_ZZI(sve_umini_b, uint8_t, DO_MIN)
-DO_ZZI(sve_umini_h, uint16_t, DO_MIN)
-DO_ZZI(sve_umini_s, uint32_t, DO_MIN)
-DO_ZZI(sve_umini_d, uint64_t, DO_MIN)
-
-#undef DO_ZZI
-
-#undef DO_AND
-#undef DO_ORR
-#undef DO_EOR
-#undef DO_BIC
-#undef DO_ADD
-#undef DO_SUB
-#undef DO_MAX
-#undef DO_MIN
-#undef DO_ABD
-#undef DO_MUL
-#undef DO_DIV
-#undef DO_ASR
-#undef DO_LSR
-#undef DO_LSL
-#undef DO_SUBR
-
-/* Similar to the ARM LastActiveElement pseudocode function, except the
- result is multiplied by the element size. This includes the not found
- indication; e.g. not found for esz=3 is -8. */
-static intptr_t last_active_element(uint64_t *g, intptr_t words, intptr_t esz)
-{
- uint64_t mask = pred_esz_masks[esz];
- intptr_t i = words;
-
- do {
- uint64_t this_g = g[--i] & mask;
- if (this_g) {
- return i * 64 + (63 - clz64(this_g));
- }
- } while (i > 0);
- return (intptr_t)-1 << esz;
-}
-
-uint32_t HELPER(sve_pfirst)(void *vd, void *vg, uint32_t words)
-{
- uint32_t flags = PREDTEST_INIT;
- uint64_t *d = vd, *g = vg;
- intptr_t i = 0;
-
- do {
- uint64_t this_d = d[i];
- uint64_t this_g = g[i];
-
- if (this_g) {
- if (!(flags & 4)) {
- /* Set in D the first bit of G. */
- this_d |= this_g & -this_g;
- d[i] = this_d;
- }
- flags = iter_predtest_fwd(this_d, this_g, flags);
- }
- } while (++i < words);
-
- return flags;
-}
-
-uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
-{
- intptr_t words = extract32(pred_desc, 0, SIMD_OPRSZ_BITS);
- intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
- uint32_t flags = PREDTEST_INIT;
- uint64_t *d = vd, *g = vg, esz_mask;
- intptr_t i, next;
-
- next = last_active_element(vd, words, esz) + (1 << esz);
- esz_mask = pred_esz_masks[esz];
-
- /* Similar to the pseudocode for pnext, but scaled by ESZ
- so that we find the correct bit. */
- if (next < words * 64) {
- uint64_t mask = -1;
-
- if (next & 63) {
- mask = ~((1ull << (next & 63)) - 1);
- next &= -64;
- }
- do {
- uint64_t this_g = g[next / 64] & esz_mask & mask;
- if (this_g != 0) {
- next = (next & -64) + ctz64(this_g);
- break;
- }
- next += 64;
- mask = -1;
- } while (next < words * 64);
- }
-
- i = 0;
- do {
- uint64_t this_d = 0;
- if (i == next / 64) {
- this_d = 1ull << (next & 63);
- }
- d[i] = this_d;
- flags = iter_predtest_fwd(this_d, g[i] & esz_mask, flags);
- } while (++i < words);
-
- return flags;
-}
-
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result. In particular, logical shift by element size, which is
- * otherwise undefined on the host.
- *
- * For element sizes smaller than uint64_t, we use tables to expand
- * the N bits of the controlling predicate to a byte mask, and clear
- * those bytes.
- */
-void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_b(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_h(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] &= ~expand_pred_s(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- if (pg[H1(i)] & 1) {
- d[i] = 0;
- }
- }
-}
-
-/* Copy Zn into Zd, and store zero into inactive elements. */
-void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_b(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_h(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & expand_pred_s(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1);
- }
-}
-
-/* Three-operand expander, immediate operand, controlled by a predicate.
- */
-#define DO_ZPZI(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- TYPE imm = simd_data(desc); \
- for (i = 0; i < opr_sz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(nn, imm); \
- } \
- i += sizeof(TYPE), pg >>= sizeof(TYPE); \
- } while (i & 15); \
- } \
-}
-
-/* Similarly, specialized for 64-bit operands. */
-#define DO_ZPZI_D(NAME, TYPE, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
- TYPE *d = vd, *n = vn; \
- TYPE imm = simd_data(desc); \
- uint8_t *pg = vg; \
- for (i = 0; i < opr_sz; i += 1) { \
- if (pg[H1(i)] & 1) { \
- TYPE nn = n[i]; \
- d[i] = OP(nn, imm); \
- } \
- } \
-}
-
-#define DO_SHR(N, M) (N >> M)
-#define DO_SHL(N, M) (N << M)
-
-/* Arithmetic shift right for division. This rounds negative numbers
- toward zero as per signed division. Therefore before shifting,
- when N is negative, add 2**M-1. */
-#define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
-
-DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR)
-DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR)
-DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR)
-DO_ZPZI_D(sve_asr_zpzi_d, int64_t, DO_SHR)
-
-DO_ZPZI(sve_lsr_zpzi_b, uint8_t, H1, DO_SHR)
-DO_ZPZI(sve_lsr_zpzi_h, uint16_t, H1_2, DO_SHR)
-DO_ZPZI(sve_lsr_zpzi_s, uint32_t, H1_4, DO_SHR)
-DO_ZPZI_D(sve_lsr_zpzi_d, uint64_t, DO_SHR)
-
-DO_ZPZI(sve_lsl_zpzi_b, uint8_t, H1, DO_SHL)
-DO_ZPZI(sve_lsl_zpzi_h, uint16_t, H1_2, DO_SHL)
-DO_ZPZI(sve_lsl_zpzi_s, uint32_t, H1_4, DO_SHL)
-DO_ZPZI_D(sve_lsl_zpzi_d, uint64_t, DO_SHL)
-
-DO_ZPZI(sve_asrd_b, int8_t, H1, DO_ASRD)
-DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD)
-DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD)
-DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD)
-
-#undef DO_SHR
-#undef DO_SHL
-#undef DO_ASRD
-#undef DO_ZPZI
-#undef DO_ZPZI_D
-
-/* Fully general four-operand expander, controlled by a predicate.
- */
-#define DO_ZPZZZ(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \
- void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- for (i = 0; i < opr_sz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- if (pg & 1) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- TYPE mm = *(TYPE *)(vm + H(i)); \
- TYPE aa = *(TYPE *)(va + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(aa, nn, mm); \
- } \
- i += sizeof(TYPE), pg >>= sizeof(TYPE); \
- } while (i & 15); \
- } \
-}
-
-/* Similarly, specialized for 64-bit operands. */
-#define DO_ZPZZZ_D(NAME, TYPE, OP) \
-void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \
- void *vg, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
- TYPE *d = vd, *a = va, *n = vn, *m = vm; \
- uint8_t *pg = vg; \
- for (i = 0; i < opr_sz; i += 1) { \
- if (pg[H1(i)] & 1) { \
- TYPE aa = a[i], nn = n[i], mm = m[i]; \
- d[i] = OP(aa, nn, mm); \
- } \
- } \
-}
-
-#define DO_MLA(A, N, M) (A + N * M)
-#define DO_MLS(A, N, M) (A - N * M)
-
-DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA)
-DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS)
-
-DO_ZPZZZ(sve_mla_h, uint16_t, H1_2, DO_MLA)
-DO_ZPZZZ(sve_mls_h, uint16_t, H1_2, DO_MLS)
-
-DO_ZPZZZ(sve_mla_s, uint32_t, H1_4, DO_MLA)
-DO_ZPZZZ(sve_mls_s, uint32_t, H1_4, DO_MLS)
-
-DO_ZPZZZ_D(sve_mla_d, uint64_t, DO_MLA)
-DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS)
-
-#undef DO_MLA
-#undef DO_MLS
-#undef DO_ZPZZZ
-#undef DO_ZPZZZ_D
-
-void HELPER(sve_index_b)(void *vd, uint32_t start,
- uint32_t incr, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc);
- uint8_t *d = vd;
- for (i = 0; i < opr_sz; i += 1) {
- d[H1(i)] = start + i * incr;
- }
-}
-
-void HELPER(sve_index_h)(void *vd, uint32_t start,
- uint32_t incr, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 2;
- uint16_t *d = vd;
- for (i = 0; i < opr_sz; i += 1) {
- d[H2(i)] = start + i * incr;
- }
-}
-
-void HELPER(sve_index_s)(void *vd, uint32_t start,
- uint32_t incr, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 4;
- uint32_t *d = vd;
- for (i = 0; i < opr_sz; i += 1) {
- d[H4(i)] = start + i * incr;
- }
-}
-
-void HELPER(sve_index_d)(void *vd, uint64_t start,
- uint64_t incr, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = start + i * incr;
- }
-}
-
-void HELPER(sve_adr_p32)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 4;
- uint32_t sh = simd_data(desc);
- uint32_t *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] + (m[i] << sh);
- }
-}
-
-void HELPER(sve_adr_p64)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t sh = simd_data(desc);
- uint64_t *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] + (m[i] << sh);
- }
-}
-
-void HELPER(sve_adr_s32)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t sh = simd_data(desc);
- uint64_t *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] + ((uint64_t)(int32_t)m[i] << sh);
- }
-}
-
-void HELPER(sve_adr_u32)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t sh = simd_data(desc);
- uint64_t *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = n[i] + ((uint64_t)(uint32_t)m[i] << sh);
- }
-}
-
-void HELPER(sve_fexpa_h)(void *vd, void *vn, uint32_t desc)
-{
- /* These constants are cut-and-paste directly from the ARM pseudocode. */
- static const uint16_t coeff[] = {
- 0x0000, 0x0016, 0x002d, 0x0045, 0x005d, 0x0075, 0x008e, 0x00a8,
- 0x00c2, 0x00dc, 0x00f8, 0x0114, 0x0130, 0x014d, 0x016b, 0x0189,
- 0x01a8, 0x01c8, 0x01e8, 0x0209, 0x022b, 0x024e, 0x0271, 0x0295,
- 0x02ba, 0x02e0, 0x0306, 0x032e, 0x0356, 0x037f, 0x03a9, 0x03d4,
- };
- intptr_t i, opr_sz = simd_oprsz(desc) / 2;
- uint16_t *d = vd, *n = vn;
-
- for (i = 0; i < opr_sz; i++) {
- uint16_t nn = n[i];
- intptr_t idx = extract32(nn, 0, 5);
- uint16_t exp = extract32(nn, 5, 5);
- d[i] = coeff[idx] | (exp << 10);
- }
-}
-
-void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc)
-{
- /* These constants are cut-and-paste directly from the ARM pseudocode. */
- static const uint32_t coeff[] = {
- 0x000000, 0x0164d2, 0x02cd87, 0x043a29,
- 0x05aac3, 0x071f62, 0x08980f, 0x0a14d5,
- 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc,
- 0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d,
- 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda,
- 0x1ef532, 0x20b051, 0x227043, 0x243516,
- 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
- 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4,
- 0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b,
- 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd,
- 0x45672a, 0x478d75, 0x49b9be, 0x4bec15,
- 0x4e248c, 0x506334, 0x52a81e, 0x54f35b,
- 0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5,
- 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
- 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177,
- 0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c,
- };
- intptr_t i, opr_sz = simd_oprsz(desc) / 4;
- uint32_t *d = vd, *n = vn;
-
- for (i = 0; i < opr_sz; i++) {
- uint32_t nn = n[i];
- intptr_t idx = extract32(nn, 0, 6);
- uint32_t exp = extract32(nn, 6, 8);
- d[i] = coeff[idx] | (exp << 23);
- }
-}
-
-void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
-{
- /* These constants are cut-and-paste directly from the ARM pseudocode. */
- static const uint64_t coeff[] = {
- 0x0000000000000ull, 0x02C9A3E778061ull, 0x059B0D3158574ull,
- 0x0874518759BC8ull, 0x0B5586CF9890Full, 0x0E3EC32D3D1A2ull,
- 0x11301D0125B51ull, 0x1429AAEA92DE0ull, 0x172B83C7D517Bull,
- 0x1A35BEB6FCB75ull, 0x1D4873168B9AAull, 0x2063B88628CD6ull,
- 0x2387A6E756238ull, 0x26B4565E27CDDull, 0x29E9DF51FDEE1ull,
- 0x2D285A6E4030Bull, 0x306FE0A31B715ull, 0x33C08B26416FFull,
- 0x371A7373AA9CBull, 0x3A7DB34E59FF7ull, 0x3DEA64C123422ull,
- 0x4160A21F72E2Aull, 0x44E086061892Dull, 0x486A2B5C13CD0ull,
- 0x4BFDAD5362A27ull, 0x4F9B2769D2CA7ull, 0x5342B569D4F82ull,
- 0x56F4736B527DAull, 0x5AB07DD485429ull, 0x5E76F15AD2148ull,
- 0x6247EB03A5585ull, 0x6623882552225ull, 0x6A09E667F3BCDull,
- 0x6DFB23C651A2Full, 0x71F75E8EC5F74ull, 0x75FEB564267C9ull,
- 0x7A11473EB0187ull, 0x7E2F336CF4E62ull, 0x82589994CCE13ull,
- 0x868D99B4492EDull, 0x8ACE5422AA0DBull, 0x8F1AE99157736ull,
- 0x93737B0CDC5E5ull, 0x97D829FDE4E50ull, 0x9C49182A3F090ull,
- 0xA0C667B5DE565ull, 0xA5503B23E255Dull, 0xA9E6B5579FDBFull,
- 0xAE89F995AD3ADull, 0xB33A2B84F15FBull, 0xB7F76F2FB5E47ull,
- 0xBCC1E904BC1D2ull, 0xC199BDD85529Cull, 0xC67F12E57D14Bull,
- 0xCB720DCEF9069ull, 0xD072D4A07897Cull, 0xD5818DCFBA487ull,
- 0xDA9E603DB3285ull, 0xDFC97337B9B5Full, 0xE502EE78B3FF6ull,
- 0xEA4AFA2A490DAull, 0xEFA1BEE615A27ull, 0xF50765B6E4540ull,
- 0xFA7C1819E90D8ull,
- };
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
-
- for (i = 0; i < opr_sz; i++) {
- uint64_t nn = n[i];
- intptr_t idx = extract32(nn, 0, 6);
- uint64_t exp = extract32(nn, 6, 11);
- d[i] = coeff[idx] | (exp << 52);
- }
-}
-
-void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 2;
- uint16_t *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i += 1) {
- uint16_t nn = n[i];
- uint16_t mm = m[i];
- if (mm & 1) {
- nn = float16_one;
- }
- d[i] = nn ^ (mm & 2) << 14;
- }
-}
-
-void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 4;
- uint32_t *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i += 1) {
- uint32_t nn = n[i];
- uint32_t mm = m[i];
- if (mm & 1) {
- nn = float32_one;
- }
- d[i] = nn ^ (mm & 2) << 30;
- }
-}
-
-void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i];
- uint64_t mm = m[i];
- if (mm & 1) {
- nn = float64_one;
- }
- d[i] = nn ^ (mm & 2) << 62;
- }
-}
-
-/*
- * Signed saturating addition with scalar operand.
- */
-
-void HELPER(sve_sqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(int8_t)) {
- int r = *(int8_t *)(a + i) + b;
- if (r > INT8_MAX) {
- r = INT8_MAX;
- } else if (r < INT8_MIN) {
- r = INT8_MIN;
- }
- *(int8_t *)(d + i) = r;
- }
-}
-
-void HELPER(sve_sqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(int16_t)) {
- int r = *(int16_t *)(a + i) + b;
- if (r > INT16_MAX) {
- r = INT16_MAX;
- } else if (r < INT16_MIN) {
- r = INT16_MIN;
- }
- *(int16_t *)(d + i) = r;
- }
-}
-
-void HELPER(sve_sqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(int32_t)) {
- int64_t r = *(int32_t *)(a + i) + b;
- if (r > INT32_MAX) {
- r = INT32_MAX;
- } else if (r < INT32_MIN) {
- r = INT32_MIN;
- }
- *(int32_t *)(d + i) = r;
- }
-}
-
-void HELPER(sve_sqaddi_d)(void *d, void *a, int64_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(int64_t)) {
- int64_t ai = *(int64_t *)(a + i);
- int64_t r = ai + b;
- if (((r ^ ai) & ~(ai ^ b)) < 0) {
- /* Signed overflow. */
- r = (r < 0 ? INT64_MAX : INT64_MIN);
- }
- *(int64_t *)(d + i) = r;
- }
-}
-
-/*
- * Unsigned saturating addition with scalar operand.
- */
-
-void HELPER(sve_uqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
- int r = *(uint8_t *)(a + i) + b;
- if (r > UINT8_MAX) {
- r = UINT8_MAX;
- } else if (r < 0) {
- r = 0;
- }
- *(uint8_t *)(d + i) = r;
- }
-}
-
-void HELPER(sve_uqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
- int r = *(uint16_t *)(a + i) + b;
- if (r > UINT16_MAX) {
- r = UINT16_MAX;
- } else if (r < 0) {
- r = 0;
- }
- *(uint16_t *)(d + i) = r;
- }
-}
-
-void HELPER(sve_uqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
- int64_t r = *(uint32_t *)(a + i) + b;
- if (r > UINT32_MAX) {
- r = UINT32_MAX;
- } else if (r < 0) {
- r = 0;
- }
- *(uint32_t *)(d + i) = r;
- }
-}
-
-void HELPER(sve_uqaddi_d)(void *d, void *a, uint64_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
- uint64_t r = *(uint64_t *)(a + i) + b;
- if (r < b) {
- r = UINT64_MAX;
- }
- *(uint64_t *)(d + i) = r;
- }
-}
-
-void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc)
-{
- intptr_t i, oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
- uint64_t ai = *(uint64_t *)(a + i);
- *(uint64_t *)(d + i) = (ai < b ? 0 : ai - b);
- }
-}
-
-/* Two operand predicated copy immediate with merge. All valid immediates
- * can fit within 17 signed bits in the simd_data field.
- */
-void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
- uint64_t mm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
-
- mm = dup_const(MO_8, mm);
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i];
- uint64_t pp = expand_pred_b(pg[H1(i)]);
- d[i] = (mm & pp) | (nn & ~pp);
- }
-}
-
-void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg,
- uint64_t mm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
-
- mm = dup_const(MO_16, mm);
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i];
- uint64_t pp = expand_pred_h(pg[H1(i)]);
- d[i] = (mm & pp) | (nn & ~pp);
- }
-}
-
-void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg,
- uint64_t mm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
-
- mm = dup_const(MO_32, mm);
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i];
- uint64_t pp = expand_pred_s(pg[H1(i)]);
- d[i] = (mm & pp) | (nn & ~pp);
- }
-}
-
-void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg,
- uint64_t mm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
-
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i];
- d[i] = (pg[H1(i)] & 1 ? mm : nn);
- }
-}
-
-void HELPER(sve_cpy_z_b)(void *vd, void *vg, uint64_t val, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
-
- val = dup_const(MO_8, val);
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = val & expand_pred_b(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_cpy_z_h)(void *vd, void *vg, uint64_t val, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
-
- val = dup_const(MO_16, val);
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = val & expand_pred_h(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_cpy_z_s)(void *vd, void *vg, uint64_t val, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
-
- val = dup_const(MO_32, val);
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = val & expand_pred_s(pg[H1(i)]);
- }
-}
-
-void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd;
- uint8_t *pg = vg;
-
- for (i = 0; i < opr_sz; i += 1) {
- d[i] = (pg[H1(i)] & 1 ? val : 0);
- }
-}
-
-/* Big-endian hosts need to frob the byte indicies. If the copy
- * happens to be 8-byte aligned, then no frobbing necessary.
- */
-static void swap_memmove(void *vd, void *vs, size_t n)
-{
- uintptr_t d = (uintptr_t)vd;
- uintptr_t s = (uintptr_t)vs;
- uintptr_t o = (d | s | n) & 7;
- size_t i;
-
-#ifndef HOST_WORDS_BIGENDIAN
- o = 0;
-#endif
- switch (o) {
- case 0:
- memmove(vd, vs, n);
- break;
-
- case 4:
- if (d < s || d >= s + n) {
- for (i = 0; i < n; i += 4) {
- *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
- }
- } else {
- for (i = n; i > 0; ) {
- i -= 4;
- *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
- }
- }
- break;
-
- case 2:
- case 6:
- if (d < s || d >= s + n) {
- for (i = 0; i < n; i += 2) {
- *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
- }
- } else {
- for (i = n; i > 0; ) {
- i -= 2;
- *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
- }
- }
- break;
-
- default:
- if (d < s || d >= s + n) {
- for (i = 0; i < n; i++) {
- *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
- }
- } else {
- for (i = n; i > 0; ) {
- i -= 1;
- *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
- }
- }
- break;
- }
-}
-
-/* Similarly for memset of 0. */
-static void swap_memzero(void *vd, size_t n)
-{
- uintptr_t d = (uintptr_t)vd;
- uintptr_t o = (d | n) & 7;
- size_t i;
-
- /* Usually, the first bit of a predicate is set, so N is 0. */
- if (likely(n == 0)) {
- return;
- }
-
-#ifndef HOST_WORDS_BIGENDIAN
- o = 0;
-#endif
- switch (o) {
- case 0:
- memset(vd, 0, n);
- break;
-
- case 4:
- for (i = 0; i < n; i += 4) {
- *(uint32_t *)H1_4(d + i) = 0;
- }
- break;
-
- case 2:
- case 6:
- for (i = 0; i < n; i += 2) {
- *(uint16_t *)H1_2(d + i) = 0;
- }
- break;
-
- default:
- for (i = 0; i < n; i++) {
- *(uint8_t *)H1(d + i) = 0;
- }
- break;
- }
-}
-
-void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t opr_sz = simd_oprsz(desc);
- size_t n_ofs = simd_data(desc);
- size_t n_siz = opr_sz - n_ofs;
-
- if (vd != vm) {
- swap_memmove(vd, vn + n_ofs, n_siz);
- swap_memmove(vd + n_siz, vm, n_ofs);
- } else if (vd != vn) {
- swap_memmove(vd + n_siz, vd, n_ofs);
- swap_memmove(vd, vn + n_ofs, n_siz);
- } else {
- /* vd == vn == vm. Need temp space. */
- ARMVectorReg tmp;
- swap_memmove(&tmp, vm, n_ofs);
- swap_memmove(vd, vd + n_ofs, n_siz);
- memcpy(vd + n_siz, &tmp, n_ofs);
- }
-}
-
-#define DO_INSR(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
-{ \
- intptr_t opr_sz = simd_oprsz(desc); \
- swap_memmove(vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \
- *(TYPE *)(vd + H(0)) = val; \
-}
-
-DO_INSR(sve_insr_b, uint8_t, H1)
-DO_INSR(sve_insr_h, uint16_t, H1_2)
-DO_INSR(sve_insr_s, uint32_t, H1_4)
-DO_INSR(sve_insr_d, uint64_t, )
-
-#undef DO_INSR
-
-void HELPER(sve_rev_b)(void *vd, void *vn, uint32_t desc)
-{
- intptr_t i, j, opr_sz = simd_oprsz(desc);
- for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
- uint64_t f = *(uint64_t *)(vn + i);
- uint64_t b = *(uint64_t *)(vn + j);
- *(uint64_t *)(vd + i) = bswap64(b);
- *(uint64_t *)(vd + j) = bswap64(f);
- }
-}
-
-void HELPER(sve_rev_h)(void *vd, void *vn, uint32_t desc)
-{
- intptr_t i, j, opr_sz = simd_oprsz(desc);
- for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
- uint64_t f = *(uint64_t *)(vn + i);
- uint64_t b = *(uint64_t *)(vn + j);
- *(uint64_t *)(vd + i) = hswap64(b);
- *(uint64_t *)(vd + j) = hswap64(f);
- }
-}
-
-void HELPER(sve_rev_s)(void *vd, void *vn, uint32_t desc)
-{
- intptr_t i, j, opr_sz = simd_oprsz(desc);
- for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
- uint64_t f = *(uint64_t *)(vn + i);
- uint64_t b = *(uint64_t *)(vn + j);
- *(uint64_t *)(vd + i) = rol64(b, 32);
- *(uint64_t *)(vd + j) = rol64(f, 32);
- }
-}
-
-void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
-{
- intptr_t i, j, opr_sz = simd_oprsz(desc);
- for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
- uint64_t f = *(uint64_t *)(vn + i);
- uint64_t b = *(uint64_t *)(vn + j);
- *(uint64_t *)(vd + i) = b;
- *(uint64_t *)(vd + j) = f;
- }
-}
-
-#define DO_TBL(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- uintptr_t elem = opr_sz / sizeof(TYPE); \
- TYPE *d = vd, *n = vn, *m = vm; \
- ARMVectorReg tmp; \
- if (unlikely(vd == vn)) { \
- n = memcpy(&tmp, vn, opr_sz); \
- } \
- for (i = 0; i < elem; i++) { \
- TYPE j = m[H(i)]; \
- d[H(i)] = j < elem ? n[H(j)] : 0; \
- } \
-}
-
-DO_TBL(sve_tbl_b, uint8_t, H1)
-DO_TBL(sve_tbl_h, uint16_t, H2)
-DO_TBL(sve_tbl_s, uint32_t, H4)
-DO_TBL(sve_tbl_d, uint64_t, )
-
-#undef TBL
-
-#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
-void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
-{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
- TYPED *d = vd; \
- TYPES *n = vn; \
- ARMVectorReg tmp; \
- if (unlikely(vn - vd < opr_sz)) { \
- n = memcpy(&tmp, n, opr_sz / 2); \
- } \
- for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \
- d[HD(i)] = n[HS(i)]; \
- } \
-}
-
-DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
-DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
-DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4)
-
-DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1)
-DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2)
-DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4)
-
-#undef DO_UNPK
-
-/* Mask of bits included in the even numbered predicates of width esz.
- * We also use this for expand_bits/compress_bits, and so extend the
- * same pattern out to 16-bit units.
- */
-static const uint64_t even_bit_esz_masks[5] = {
- 0x5555555555555555ull,
- 0x3333333333333333ull,
- 0x0f0f0f0f0f0f0f0full,
- 0x00ff00ff00ff00ffull,
- 0x0000ffff0000ffffull,
-};
-
-/* Zero-extend units of 2**N bits to units of 2**(N+1) bits.
- * For N==0, this corresponds to the operation that in qemu/bitops.h
- * we call half_shuffle64; this algorithm is from Hacker's Delight,
- * section 7-2 Shuffling Bits.
- */
-static uint64_t expand_bits(uint64_t x, int n)
-{
- int i;
-
- x &= 0xffffffffu;
- for (i = 4; i >= n; i--) {
- int sh = 1 << i;
- x = ((x << sh) | x) & even_bit_esz_masks[i];
- }
- return x;
-}
-
-/* Compress units of 2**(N+1) bits to units of 2**N bits.
- * For N==0, this corresponds to the operation that in qemu/bitops.h
- * we call half_unshuffle64; this algorithm is from Hacker's Delight,
- * section 7-2 Shuffling Bits, where it is called an inverse half shuffle.
- */
-static uint64_t compress_bits(uint64_t x, int n)
-{
- int i;
-
- for (i = n; i <= 4; i++) {
- int sh = 1 << i;
- x &= even_bit_esz_masks[i];
- x = (x >> sh) | x;
- }
- return x & 0xffffffffu;
-}
-
-void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
- intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
- uint64_t *d = vd;
- intptr_t i;
-
- if (oprsz <= 8) {
- uint64_t nn = *(uint64_t *)vn;
- uint64_t mm = *(uint64_t *)vm;
- int half = 4 * oprsz;
-
- nn = extract64(nn, high * half, half);
- mm = extract64(mm, high * half, half);
- nn = expand_bits(nn, esz);
- mm = expand_bits(mm, esz);
- d[0] = nn + (mm << (1 << esz));
- } else {
- ARMPredicateReg tmp_n, tmp_m;
-
- /* We produce output faster than we consume input.
- Therefore we must be mindful of possible overlap. */
- if ((vn - vd) < (uintptr_t)oprsz) {
- vn = memcpy(&tmp_n, vn, oprsz);
- }
- if ((vm - vd) < (uintptr_t)oprsz) {
- vm = memcpy(&tmp_m, vm, oprsz);
- }
- if (high) {
- high = oprsz >> 1;
- }
-
- if ((high & 3) == 0) {
- uint32_t *n = vn, *m = vm;
- high >>= 2;
-
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
- uint64_t nn = n[H4(high + i)];
- uint64_t mm = m[H4(high + i)];
-
- nn = expand_bits(nn, esz);
- mm = expand_bits(mm, esz);
- d[i] = nn + (mm << (1 << esz));
- }
- } else {
- uint8_t *n = vn, *m = vm;
- uint16_t *d16 = vd;
-
- for (i = 0; i < oprsz / 2; i++) {
- uint16_t nn = n[H1(high + i)];
- uint16_t mm = m[H1(high + i)];
-
- nn = expand_bits(nn, esz);
- mm = expand_bits(mm, esz);
- d16[H2(i)] = nn + (mm << (1 << esz));
- }
- }
- }
-}
-
-void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
- int odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1) << esz;
- uint64_t *d = vd, *n = vn, *m = vm;
- uint64_t l, h;
- intptr_t i;
-
- if (oprsz <= 8) {
- l = compress_bits(n[0] >> odd, esz);
- h = compress_bits(m[0] >> odd, esz);
- d[0] = extract64(l + (h << (4 * oprsz)), 0, 8 * oprsz);
- } else {
- ARMPredicateReg tmp_m;
- intptr_t oprsz_16 = oprsz / 16;
-
- if ((vm - vd) < (uintptr_t)oprsz) {
- m = memcpy(&tmp_m, vm, oprsz);
- }
-
- for (i = 0; i < oprsz_16; i++) {
- l = n[2 * i + 0];
- h = n[2 * i + 1];
- l = compress_bits(l >> odd, esz);
- h = compress_bits(h >> odd, esz);
- d[i] = l + (h << 32);
- }
-
- /* For VL which is not a power of 2, the results from M do not
- align nicely with the uint64_t for D. Put the aligned results
- from M into TMP_M and then copy it into place afterward. */
- if (oprsz & 15) {
- d[i] = compress_bits(n[2 * i] >> odd, esz);
-
- for (i = 0; i < oprsz_16; i++) {
- l = m[2 * i + 0];
- h = m[2 * i + 1];
- l = compress_bits(l >> odd, esz);
- h = compress_bits(h >> odd, esz);
- tmp_m.p[i] = l + (h << 32);
- }
- tmp_m.p[i] = compress_bits(m[2 * i] >> odd, esz);
-
- swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2);
- } else {
- for (i = 0; i < oprsz_16; i++) {
- l = m[2 * i + 0];
- h = m[2 * i + 1];
- l = compress_bits(l >> odd, esz);
- h = compress_bits(h >> odd, esz);
- d[oprsz_16 + i] = l + (h << 32);
- }
- }
- }
-}
-
-void HELPER(sve_trn_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- uintptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
- bool odd = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
- uint64_t *d = vd, *n = vn, *m = vm;
- uint64_t mask;
- int shr, shl;
- intptr_t i;
-
- shl = 1 << esz;
- shr = 0;
- mask = even_bit_esz_masks[esz];
- if (odd) {
- mask <<= shl;
- shr = shl;
- shl = 0;
- }
-
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
- uint64_t nn = (n[i] & mask) >> shr;
- uint64_t mm = (m[i] & mask) << shl;
- d[i] = nn + mm;
- }
-}
-
-/* Reverse units of 2**N bits. */
-static uint64_t reverse_bits_64(uint64_t x, int n)
-{
- int i, sh;
-
- x = bswap64(x);
- for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
- uint64_t mask = even_bit_esz_masks[i];
- x = ((x & mask) << sh) | ((x >> sh) & mask);
- }
- return x;
-}
-
-static uint8_t reverse_bits_8(uint8_t x, int n)
-{
- static const uint8_t mask[3] = { 0x55, 0x33, 0x0f };
- int i, sh;
-
- for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
- x = ((x & mask[i]) << sh) | ((x >> sh) & mask[i]);
- }
- return x;
-}
-
-void HELPER(sve_rev_p)(void *vd, void *vn, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- int esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
- intptr_t i, oprsz_2 = oprsz / 2;
-
- if (oprsz <= 8) {
- uint64_t l = *(uint64_t *)vn;
- l = reverse_bits_64(l << (64 - 8 * oprsz), esz);
- *(uint64_t *)vd = l;
- } else if ((oprsz & 15) == 0) {
- for (i = 0; i < oprsz_2; i += 8) {
- intptr_t ih = oprsz - 8 - i;
- uint64_t l = reverse_bits_64(*(uint64_t *)(vn + i), esz);
- uint64_t h = reverse_bits_64(*(uint64_t *)(vn + ih), esz);
- *(uint64_t *)(vd + i) = h;
- *(uint64_t *)(vd + ih) = l;
- }
- } else {
- for (i = 0; i < oprsz_2; i += 1) {
- intptr_t il = H1(i);
- intptr_t ih = H1(oprsz - 1 - i);
- uint8_t l = reverse_bits_8(*(uint8_t *)(vn + il), esz);
- uint8_t h = reverse_bits_8(*(uint8_t *)(vn + ih), esz);
- *(uint8_t *)(vd + il) = h;
- *(uint8_t *)(vd + ih) = l;
- }
- }
-}
-
-void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- intptr_t high = extract32(pred_desc, SIMD_DATA_SHIFT + 2, 1);
- uint64_t *d = vd;
- intptr_t i;
-
- if (oprsz <= 8) {
- uint64_t nn = *(uint64_t *)vn;
- int half = 4 * oprsz;
-
- nn = extract64(nn, high * half, half);
- nn = expand_bits(nn, 0);
- d[0] = nn;
- } else {
- ARMPredicateReg tmp_n;
-
- /* We produce output faster than we consume input.
- Therefore we must be mindful of possible overlap. */
- if ((vn - vd) < (uintptr_t)oprsz) {
- vn = memcpy(&tmp_n, vn, oprsz);
- }
- if (high) {
- high = oprsz >> 1;
- }
-
- if ((high & 3) == 0) {
- uint32_t *n = vn;
- high >>= 2;
-
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
- uint64_t nn = n[H4(high + i)];
- d[i] = expand_bits(nn, 0);
- }
- } else {
- uint16_t *d16 = vd;
- uint8_t *n = vn;
-
- for (i = 0; i < oprsz / 2; i++) {
- uint16_t nn = n[H1(high + i)];
- d16[H2(i)] = expand_bits(nn, 0);
- }
- }
- }
-}
-
-#define DO_ZIP(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
-{ \
- intptr_t oprsz = simd_oprsz(desc); \
- intptr_t i, oprsz_2 = oprsz / 2; \
- ARMVectorReg tmp_n, tmp_m; \
- /* We produce output faster than we consume input. \
- Therefore we must be mindful of possible overlap. */ \
- if (unlikely((vn - vd) < (uintptr_t)oprsz)) { \
- vn = memcpy(&tmp_n, vn, oprsz_2); \
- } \
- if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \
- vm = memcpy(&tmp_m, vm, oprsz_2); \
- } \
- for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \
- *(TYPE *)(vd + H(2 * i + 0)) = *(TYPE *)(vn + H(i)); \
- *(TYPE *)(vd + H(2 * i + sizeof(TYPE))) = *(TYPE *)(vm + H(i)); \
- } \
-}
-
-DO_ZIP(sve_zip_b, uint8_t, H1)
-DO_ZIP(sve_zip_h, uint16_t, H1_2)
-DO_ZIP(sve_zip_s, uint32_t, H1_4)
-DO_ZIP(sve_zip_d, uint64_t, )
-
-#define DO_UZP(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
-{ \
- intptr_t oprsz = simd_oprsz(desc); \
- intptr_t oprsz_2 = oprsz / 2; \
- intptr_t odd_ofs = simd_data(desc); \
- intptr_t i; \
- ARMVectorReg tmp_m; \
- if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \
- vm = memcpy(&tmp_m, vm, oprsz); \
- } \
- for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \
- *(TYPE *)(vd + H(i)) = *(TYPE *)(vn + H(2 * i + odd_ofs)); \
- } \
- for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \
- *(TYPE *)(vd + H(oprsz_2 + i)) = *(TYPE *)(vm + H(2 * i + odd_ofs)); \
- } \
-}
-
-DO_UZP(sve_uzp_b, uint8_t, H1)
-DO_UZP(sve_uzp_h, uint16_t, H1_2)
-DO_UZP(sve_uzp_s, uint32_t, H1_4)
-DO_UZP(sve_uzp_d, uint64_t, )
-
-#define DO_TRN(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
-{ \
- intptr_t oprsz = simd_oprsz(desc); \
- intptr_t odd_ofs = simd_data(desc); \
- intptr_t i; \
- for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \
- TYPE ae = *(TYPE *)(vn + H(i + odd_ofs)); \
- TYPE be = *(TYPE *)(vm + H(i + odd_ofs)); \
- *(TYPE *)(vd + H(i + 0)) = ae; \
- *(TYPE *)(vd + H(i + sizeof(TYPE))) = be; \
- } \
-}
-
-DO_TRN(sve_trn_b, uint8_t, H1)
-DO_TRN(sve_trn_h, uint16_t, H1_2)
-DO_TRN(sve_trn_s, uint32_t, H1_4)
-DO_TRN(sve_trn_d, uint64_t, )
-
-#undef DO_ZIP
-#undef DO_UZP
-#undef DO_TRN
-
-void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc)
-{
- intptr_t i, j, opr_sz = simd_oprsz(desc) / 4;
- uint32_t *d = vd, *n = vn;
- uint8_t *pg = vg;
-
- for (i = j = 0; i < opr_sz; i++) {
- if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) {
- d[H4(j)] = n[H4(i)];
- j++;
- }
- }
- for (; j < opr_sz; j++) {
- d[H4(j)] = 0;
- }
-}
-
-void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc)
-{
- intptr_t i, j, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn;
- uint8_t *pg = vg;
-
- for (i = j = 0; i < opr_sz; i++) {
- if (pg[H1(i)] & 1) {
- d[j] = n[i];
- j++;
- }
- }
- for (; j < opr_sz; j++) {
- d[j] = 0;
- }
-}
-
-/* Similar to the ARM LastActiveElement pseudocode function, except the
- * result is multiplied by the element size. This includes the not found
- * indication; e.g. not found for esz=3 is -8.
- */
-int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
-
- return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz);
-}
-
-void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)
-{
- intptr_t opr_sz = simd_oprsz(desc) / 8;
- int esz = simd_data(desc);
- uint64_t pg, first_g, last_g, len, mask = pred_esz_masks[esz];
- intptr_t i, first_i, last_i;
- ARMVectorReg tmp;
-
- first_i = last_i = 0;
- first_g = last_g = 0;
-
- /* Find the extent of the active elements within VG. */
- for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) {
- pg = *(uint64_t *)(vg + i) & mask;
- if (pg) {
- if (last_g == 0) {
- last_g = pg;
- last_i = i;
- }
- first_g = pg;
- first_i = i;
- }
- }
-
- len = 0;
- if (first_g != 0) {
- first_i = first_i * 8 + ctz64(first_g);
- last_i = last_i * 8 + 63 - clz64(last_g);
- len = last_i - first_i + (1 << esz);
- if (vd == vm) {
- vm = memcpy(&tmp, vm, opr_sz * 8);
- }
- swap_memmove(vd, vn + first_i, len);
- }
- swap_memmove(vd + len, vm, opr_sz * 8 - len);
-}
-
-void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm,
- void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn, *m = vm;
- uint8_t *pg = vg;
-
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i], mm = m[i];
- uint64_t pp = expand_pred_b(pg[H1(i)]);
- d[i] = (nn & pp) | (mm & ~pp);
- }
-}
-
-void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm,
- void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn, *m = vm;
- uint8_t *pg = vg;
-
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i], mm = m[i];
- uint64_t pp = expand_pred_h(pg[H1(i)]);
- d[i] = (nn & pp) | (mm & ~pp);
- }
-}
-
-void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm,
- void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn, *m = vm;
- uint8_t *pg = vg;
-
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i], mm = m[i];
- uint64_t pp = expand_pred_s(pg[H1(i)]);
- d[i] = (nn & pp) | (mm & ~pp);
- }
-}
-
-void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
- void *vg, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *d = vd, *n = vn, *m = vm;
- uint8_t *pg = vg;
-
- for (i = 0; i < opr_sz; i += 1) {
- uint64_t nn = n[i], mm = m[i];
- d[i] = (pg[H1(i)] & 1 ? nn : mm);
- }
-}
-
-/* Two operand comparison controlled by a predicate.
- * ??? It is very tempting to want to be able to expand this inline
- * with x86 instructions, e.g.
- *
- * vcmpeqw zm, zn, %ymm0
- * vpmovmskb %ymm0, %eax
- * and $0x5555, %eax
- * and pg, %eax
- *
- * or even aarch64, e.g.
- *
- * // mask = 4000 1000 0400 0100 0040 0010 0004 0001
- * cmeq v0.8h, zn, zm
- * and v0.8h, v0.8h, mask
- * addv h0, v0.8h
- * and v0.8b, pg
- *
- * However, coming up with an abstraction that allows vector inputs and
- * a scalar output, and also handles the byte-ordering of sub-uint64_t
- * scalar outputs, is tricky.
- */
-#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \
-uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{ \
- intptr_t opr_sz = simd_oprsz(desc); \
- uint32_t flags = PREDTEST_INIT; \
- intptr_t i = opr_sz; \
- do { \
- uint64_t out = 0, pg; \
- do { \
- i -= sizeof(TYPE), out <<= sizeof(TYPE); \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- TYPE mm = *(TYPE *)(vm + H(i)); \
- out |= nn OP mm; \
- } while (i & 63); \
- pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
- out &= pg; \
- *(uint64_t *)(vd + (i >> 3)) = out; \
- flags = iter_predtest_bwd(out, pg, flags); \
- } while (i > 0); \
- return flags; \
-}
-
-#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \
- DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull)
-#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \
- DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
-#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \
- DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
-#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \
- DO_CMP_PPZZ(NAME, TYPE, OP, , 0x0101010101010101ull)
-
-DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==)
-DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==)
-DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==)
-DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==)
-
-DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=)
-DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=)
-DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=)
-DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=)
-
-DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >)
-DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >)
-DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >)
-DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >)
-
-DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=)
-DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=)
-DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=)
-DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=)
-
-DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >)
-DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >)
-DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >)
-DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >)
-
-DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=)
-DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=)
-DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=)
-DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=)
-
-#undef DO_CMP_PPZZ_B
-#undef DO_CMP_PPZZ_H
-#undef DO_CMP_PPZZ_S
-#undef DO_CMP_PPZZ_D
-#undef DO_CMP_PPZZ
-
-/* Similar, but the second source is "wide". */
-#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \
-uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
-{ \
- intptr_t opr_sz = simd_oprsz(desc); \
- uint32_t flags = PREDTEST_INIT; \
- intptr_t i = opr_sz; \
- do { \
- uint64_t out = 0, pg; \
- do { \
- TYPEW mm = *(TYPEW *)(vm + i - 8); \
- do { \
- i -= sizeof(TYPE), out <<= sizeof(TYPE); \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- out |= nn OP mm; \
- } while (i & 7); \
- } while (i & 63); \
- pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
- out &= pg; \
- *(uint64_t *)(vd + (i >> 3)) = out; \
- flags = iter_predtest_bwd(out, pg, flags); \
- } while (i > 0); \
- return flags; \
-}
-
-#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \
- DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull)
-#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \
- DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull)
-#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \
- DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull)
-
-DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==)
-DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, int16_t, uint64_t, ==)
-DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, int32_t, uint64_t, ==)
-
-DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=)
-DO_CMP_PPZW_H(sve_cmpne_ppzw_h, int16_t, uint64_t, !=)
-DO_CMP_PPZW_S(sve_cmpne_ppzw_s, int32_t, uint64_t, !=)
-
-DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >)
-DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >)
-DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >)
-
-DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=)
-DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=)
-DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=)
-
-DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >)
-DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >)
-DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >)
-
-DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=)
-DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=)
-DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=)
-
-DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <)
-DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <)
-DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <)
-
-DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=)
-DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=)
-DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=)
-
-DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <)
-DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <)
-DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <)
-
-DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=)
-DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=)
-DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=)
-
-#undef DO_CMP_PPZW_B
-#undef DO_CMP_PPZW_H
-#undef DO_CMP_PPZW_S
-#undef DO_CMP_PPZW
-
-/* Similar, but the second source is immediate. */
-#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \
-uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
-{ \
- intptr_t opr_sz = simd_oprsz(desc); \
- uint32_t flags = PREDTEST_INIT; \
- TYPE mm = simd_data(desc); \
- intptr_t i = opr_sz; \
- do { \
- uint64_t out = 0, pg; \
- do { \
- i -= sizeof(TYPE), out <<= sizeof(TYPE); \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- out |= nn OP mm; \
- } while (i & 63); \
- pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
- out &= pg; \
- *(uint64_t *)(vd + (i >> 3)) = out; \
- flags = iter_predtest_bwd(out, pg, flags); \
- } while (i > 0); \
- return flags; \
-}
-
-#define DO_CMP_PPZI_B(NAME, TYPE, OP) \
- DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull)
-#define DO_CMP_PPZI_H(NAME, TYPE, OP) \
- DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
-#define DO_CMP_PPZI_S(NAME, TYPE, OP) \
- DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
-#define DO_CMP_PPZI_D(NAME, TYPE, OP) \
- DO_CMP_PPZI(NAME, TYPE, OP, , 0x0101010101010101ull)
-
-DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==)
-DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==)
-DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==)
-DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==)
-
-DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=)
-DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=)
-DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=)
-DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=)
-
-DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >)
-DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >)
-DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >)
-DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >)
-
-DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=)
-DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=)
-DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=)
-DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=)
-
-DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >)
-DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >)
-DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >)
-DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >)
-
-DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=)
-DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=)
-DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=)
-DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=)
-
-DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <)
-DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <)
-DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <)
-DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <)
-
-DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=)
-DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=)
-DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=)
-DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=)
-
-DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <)
-DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <)
-DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <)
-DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <)
-
-DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=)
-DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=)
-DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=)
-DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=)
-
-#undef DO_CMP_PPZI_B
-#undef DO_CMP_PPZI_H
-#undef DO_CMP_PPZI_S
-#undef DO_CMP_PPZI_D
-#undef DO_CMP_PPZI
-
-/* Similar to the ARM LastActive pseudocode function. */
-static bool last_active_pred(void *vd, void *vg, intptr_t oprsz)
-{
- intptr_t i;
-
- for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) {
- uint64_t pg = *(uint64_t *)(vg + i);
- if (pg) {
- return (pow2floor(pg) & *(uint64_t *)(vd + i)) != 0;
- }
- }
- return 0;
-}
-
-/* Compute a mask into RETB that is true for all G, up to and including
- * (if after) or excluding (if !after) the first G & N.
- * Return true if BRK found.
- */
-static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
- bool brk, bool after)
-{
- uint64_t b;
-
- if (brk) {
- b = 0;
- } else if ((g & n) == 0) {
- /* For all G, no N are set; break not found. */
- b = g;
- } else {
- /* Break somewhere in N. Locate it. */
- b = g & n; /* guard true, pred true */
- b = b & -b; /* first such */
- if (after) {
- b = b | (b - 1); /* break after same */
- } else {
- b = b - 1; /* break before same */
- }
- brk = true;
- }
-
- *retb = b;
- return brk;
-}
-
-/* Compute a zeroing BRK. */
-static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g,
- intptr_t oprsz, bool after)
-{
- bool brk = false;
- intptr_t i;
-
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
- uint64_t this_b, this_g = g[i];
-
- brk = compute_brk(&this_b, n[i], this_g, brk, after);
- d[i] = this_b & this_g;
- }
-}
-
-/* Likewise, but also compute flags. */
-static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g,
- intptr_t oprsz, bool after)
-{
- uint32_t flags = PREDTEST_INIT;
- bool brk = false;
- intptr_t i;
-
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
- uint64_t this_b, this_d, this_g = g[i];
-
- brk = compute_brk(&this_b, n[i], this_g, brk, after);
- d[i] = this_d = this_b & this_g;
- flags = iter_predtest_fwd(this_d, this_g, flags);
- }
- return flags;
-}
-
-/* Compute a merging BRK. */
-static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g,
- intptr_t oprsz, bool after)
-{
- bool brk = false;
- intptr_t i;
-
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
- uint64_t this_b, this_g = g[i];
-
- brk = compute_brk(&this_b, n[i], this_g, brk, after);
- d[i] = (this_b & this_g) | (d[i] & ~this_g);
- }
-}
-
-/* Likewise, but also compute flags. */
-static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g,
- intptr_t oprsz, bool after)
-{
- uint32_t flags = PREDTEST_INIT;
- bool brk = false;
- intptr_t i;
-
- for (i = 0; i < oprsz / 8; ++i) {
- uint64_t this_b, this_d = d[i], this_g = g[i];
-
- brk = compute_brk(&this_b, n[i], this_g, brk, after);
- d[i] = this_d = (this_b & this_g) | (this_d & ~this_g);
- flags = iter_predtest_fwd(this_d, this_g, flags);
- }
- return flags;
-}
-
-static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz)
-{
- /* It is quicker to zero the whole predicate than loop on OPRSZ.
- * The compiler should turn this into 4 64-bit integer stores.
- */
- memset(d, 0, sizeof(ARMPredicateReg));
- return PREDTEST_INIT;
-}
-
-void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
- uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- if (last_active_pred(vn, vg, oprsz)) {
- compute_brk_z(vd, vm, vg, oprsz, true);
- } else {
- do_zero(vd, oprsz);
- }
-}
-
-uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
- uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- if (last_active_pred(vn, vg, oprsz)) {
- return compute_brks_z(vd, vm, vg, oprsz, true);
- } else {
- return do_zero(vd, oprsz);
- }
-}
-
-void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
- uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- if (last_active_pred(vn, vg, oprsz)) {
- compute_brk_z(vd, vm, vg, oprsz, false);
- } else {
- do_zero(vd, oprsz);
- }
-}
-
-uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
- uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- if (last_active_pred(vn, vg, oprsz)) {
- return compute_brks_z(vd, vm, vg, oprsz, false);
- } else {
- return do_zero(vd, oprsz);
- }
-}
-
-void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- compute_brk_z(vd, vn, vg, oprsz, true);
-}
-
-uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- return compute_brks_z(vd, vn, vg, oprsz, true);
-}
-
-void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- compute_brk_z(vd, vn, vg, oprsz, false);
-}
-
-uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- return compute_brks_z(vd, vn, vg, oprsz, false);
-}
-
-void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- compute_brk_m(vd, vn, vg, oprsz, true);
-}
-
-uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- return compute_brks_m(vd, vn, vg, oprsz, true);
-}
-
-void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- compute_brk_m(vd, vn, vg, oprsz, false);
-}
-
-uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- return compute_brks_m(vd, vn, vg, oprsz, false);
-}
-
-void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-
- if (!last_active_pred(vn, vg, oprsz)) {
- do_zero(vd, oprsz);
- }
-}
-
-/* As if PredTest(Ones(PL), D, esz). */
-static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz,
- uint64_t esz_mask)
-{
- uint32_t flags = PREDTEST_INIT;
- intptr_t i;
-
- for (i = 0; i < oprsz / 8; i++) {
- flags = iter_predtest_fwd(d->p[i], esz_mask, flags);
- }
- if (oprsz & 7) {
- uint64_t mask = ~(-1ULL << (8 * (oprsz & 7)));
- flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags);
- }
- return flags;
-}
-
-uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
-
- if (last_active_pred(vn, vg, oprsz)) {
- return predtest_ones(vd, oprsz, -1);
- } else {
- return do_zero(vd, oprsz);
- }
-}
-
-uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
-{
- intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
- uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz];
- intptr_t i;
-
- for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
- uint64_t t = n[i] & g[i] & mask;
- sum += ctpop64(t);
- }
- return sum;
-}
-
-uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
-{
- uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
- intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
- uint64_t esz_mask = pred_esz_masks[esz];
- ARMPredicateReg *d = vd;
- uint32_t flags;
- intptr_t i;
-
- /* Begin with a zero predicate register. */
- flags = do_zero(d, oprsz);
- if (count == 0) {
- return flags;
- }
-
- /* Set all of the requested bits. */
- for (i = 0; i < count / 64; ++i) {
- d->p[i] = esz_mask;
- }
- if (count & 63) {
- d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask;
- }
-
- return predtest_ones(d, oprsz, esz_mask);
-}
-
-/* Recursive reduction on a function;
- * C.f. the ARM ARM function ReducePredicated.
- *
- * While it would be possible to write this without the DATA temporary,
- * it is much simpler to process the predicate register this way.
- * The recursion is bounded to depth 7 (128 fp16 elements), so there's
- * little to gain with a more complex non-recursive form.
- */
-#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \
-static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
-{ \
- if (n == 1) { \
- return *data; \
- } else { \
- uintptr_t half = n / 2; \
- TYPE lo = NAME##_reduce(data, status, half); \
- TYPE hi = NAME##_reduce(data + half, status, half); \
- return TYPE##_##FUNC(lo, hi, status); \
- } \
-} \
-uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \
-{ \
- uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_maxsz(desc); \
- TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \
- for (i = 0; i < oprsz; ) { \
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
- do { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- *(TYPE *)((void *)data + i) = (pg & 1 ? nn : IDENT); \
- i += sizeof(TYPE), pg >>= sizeof(TYPE); \
- } while (i & 15); \
- } \
- for (; i < maxsz; i += sizeof(TYPE)) { \
- *(TYPE *)((void *)data + i) = IDENT; \
- } \
- return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \
-}
-
-DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
-DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
-DO_REDUCE(sve_faddv_d, float64, , add, float64_zero)
-
-/* Identity is floatN_default_nan, without the function call. */
-DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
-DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
-DO_REDUCE(sve_fminnmv_d, float64, , minnum, 0x7FF8000000000000ULL)
-
-DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
-DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
-DO_REDUCE(sve_fmaxnmv_d, float64, , maxnum, 0x7FF8000000000000ULL)
-
-DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
-DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
-DO_REDUCE(sve_fminv_d, float64, , min, float64_infinity)
-
-DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
-DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
-DO_REDUCE(sve_fmaxv_d, float64, , max, float64_chs(float64_infinity))
-
-#undef DO_REDUCE
-
-uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
-{
- intptr_t i = 0, opr_sz = simd_oprsz(desc);
- float16 result = nn;
-
- do {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- float16 mm = *(float16 *)(vm + H1_2(i));
- result = float16_add(result, mm, status);
- }
- i += sizeof(float16), pg >>= sizeof(float16);
- } while (i & 15);
- } while (i < opr_sz);
-
- return result;
-}
-
-uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
-{
- intptr_t i = 0, opr_sz = simd_oprsz(desc);
- float32 result = nn;
-
- do {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- float32 mm = *(float32 *)(vm + H1_2(i));
- result = float32_add(result, mm, status);
- }
- i += sizeof(float32), pg >>= sizeof(float32);
- } while (i & 15);
- } while (i < opr_sz);
-
- return result;
-}
-
-uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
- void *status, uint32_t desc)
-{
- intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8;
- uint64_t *m = vm;
- uint8_t *pg = vg;
-
- for (i = 0; i < opr_sz; i++) {
- if (pg[H1(i)] & 1) {
- nn = float64_add(nn, m[i], status);
- }
- }
-
- return nn;
-}
-
-/* Fully general three-operand expander, controlled by a predicate,
- * With the extra float_status parameter.
- */
-#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
- void *status, uint32_t desc) \
-{ \
- intptr_t i = simd_oprsz(desc); \
- uint64_t *g = vg; \
- do { \
- uint64_t pg = g[(i - 1) >> 6]; \
- do { \
- i -= sizeof(TYPE); \
- if (likely((pg >> (i & 63)) & 1)) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- TYPE mm = *(TYPE *)(vm + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(nn, mm, status); \
- } \
- } while (i & 63); \
- } while (i != 0); \
-}
-
-DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add)
-DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add)
-DO_ZPZZ_FP(sve_fadd_d, uint64_t, , float64_add)
-
-DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub)
-DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub)
-DO_ZPZZ_FP(sve_fsub_d, uint64_t, , float64_sub)
-
-DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul)
-DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul)
-DO_ZPZZ_FP(sve_fmul_d, uint64_t, , float64_mul)
-
-DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div)
-DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div)
-DO_ZPZZ_FP(sve_fdiv_d, uint64_t, , float64_div)
-
-DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min)
-DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min)
-DO_ZPZZ_FP(sve_fmin_d, uint64_t, , float64_min)
-
-DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
-DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
-DO_ZPZZ_FP(sve_fmax_d, uint64_t, , float64_max)
-
-DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
-DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
-DO_ZPZZ_FP(sve_fminnum_d, uint64_t, , float64_minnum)
-
-DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum)
-DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum)
-DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, , float64_maxnum)
-
-static inline float16 abd_h(float16 a, float16 b, float_status *s)
-{
- return float16_abs(float16_sub(a, b, s));
-}
-
-static inline float32 abd_s(float32 a, float32 b, float_status *s)
-{
- return float32_abs(float32_sub(a, b, s));
-}
-
-static inline float64 abd_d(float64 a, float64 b, float_status *s)
-{
- return float64_abs(float64_sub(a, b, s));
-}
-
-DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
-DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
-DO_ZPZZ_FP(sve_fabd_d, uint64_t, , abd_d)
-
-static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
-{
- int b_int = MIN(MAX(b, INT_MIN), INT_MAX);
- return float64_scalbn(a, b_int, s);
-}
-
-DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn)
-DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn)
-DO_ZPZZ_FP(sve_fscalbn_d, int64_t, , scalbn_d)
-
-DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh)
-DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs)
-DO_ZPZZ_FP(sve_fmulx_d, uint64_t, , helper_vfp_mulxd)
-
-#undef DO_ZPZZ_FP
-
-/* Three-operand expander, with one scalar operand, controlled by
- * a predicate, with the extra float_status parameter.
- */
-#define DO_ZPZS_FP(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \
- void *status, uint32_t desc) \
-{ \
- intptr_t i = simd_oprsz(desc); \
- uint64_t *g = vg; \
- TYPE mm = scalar; \
- do { \
- uint64_t pg = g[(i - 1) >> 6]; \
- do { \
- i -= sizeof(TYPE); \
- if (likely((pg >> (i & 63)) & 1)) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(nn, mm, status); \
- } \
- } while (i & 63); \
- } while (i != 0); \
-}
-
-DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add)
-DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add)
-DO_ZPZS_FP(sve_fadds_d, float64, , float64_add)
-
-DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub)
-DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub)
-DO_ZPZS_FP(sve_fsubs_d, float64, , float64_sub)
-
-DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul)
-DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul)
-DO_ZPZS_FP(sve_fmuls_d, float64, , float64_mul)
-
-static inline float16 subr_h(float16 a, float16 b, float_status *s)
-{
- return float16_sub(b, a, s);
-}
-
-static inline float32 subr_s(float32 a, float32 b, float_status *s)
-{
- return float32_sub(b, a, s);
-}
-
-static inline float64 subr_d(float64 a, float64 b, float_status *s)
-{
- return float64_sub(b, a, s);
-}
-
-DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h)
-DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s)
-DO_ZPZS_FP(sve_fsubrs_d, float64, , subr_d)
-
-DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum)
-DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum)
-DO_ZPZS_FP(sve_fmaxnms_d, float64, , float64_maxnum)
-
-DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum)
-DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum)
-DO_ZPZS_FP(sve_fminnms_d, float64, , float64_minnum)
-
-DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max)
-DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max)
-DO_ZPZS_FP(sve_fmaxs_d, float64, , float64_max)
-
-DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
-DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
-DO_ZPZS_FP(sve_fmins_d, float64, , float64_min)
-
-/* Fully general two-operand expander, controlled by a predicate,
- * With the extra float_status parameter.
- */
-#define DO_ZPZ_FP(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
-{ \
- intptr_t i = simd_oprsz(desc); \
- uint64_t *g = vg; \
- do { \
- uint64_t pg = g[(i - 1) >> 6]; \
- do { \
- i -= sizeof(TYPE); \
- if (likely((pg >> (i & 63)) & 1)) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- *(TYPE *)(vd + H(i)) = OP(nn, status); \
- } \
- } while (i & 63); \
- } while (i != 0); \
-}
-
-/* SVE fp16 conversions always use IEEE mode. Like AdvSIMD, they ignore
- * FZ16. When converting from fp16, this affects flushing input denormals;
- * when converting to fp16, this affects flushing output denormals.
- */
-static inline float32 sve_f16_to_f32(float16 f, float_status *fpst)
-{
- flag save = get_flush_inputs_to_zero(fpst);
- float32 ret;
-
- set_flush_inputs_to_zero(false, fpst);
- ret = float16_to_float32(f, true, fpst);
- set_flush_inputs_to_zero(save, fpst);
- return ret;
-}
-
-static inline float64 sve_f16_to_f64(float16 f, float_status *fpst)
-{
- flag save = get_flush_inputs_to_zero(fpst);
- float64 ret;
-
- set_flush_inputs_to_zero(false, fpst);
- ret = float16_to_float64(f, true, fpst);
- set_flush_inputs_to_zero(save, fpst);
- return ret;
-}
-
-static inline float16 sve_f32_to_f16(float32 f, float_status *fpst)
-{
- flag save = get_flush_to_zero(fpst);
- float16 ret;
-
- set_flush_to_zero(false, fpst);
- ret = float32_to_float16(f, true, fpst);
- set_flush_to_zero(save, fpst);
- return ret;
-}
-
-static inline float16 sve_f64_to_f16(float64 f, float_status *fpst)
-{
- flag save = get_flush_to_zero(fpst);
- float16 ret;
-
- set_flush_to_zero(false, fpst);
- ret = float64_to_float16(f, true, fpst);
- set_flush_to_zero(save, fpst);
- return ret;
-}
-
-static inline int16_t vfp_float16_to_int16_rtz(float16 f, float_status *s)
-{
- if (float16_is_any_nan(f)) {
- float_raise(float_flag_invalid, s);
- return 0;
- }
- return float16_to_int16_round_to_zero(f, s);
-}
-
-static inline int64_t vfp_float16_to_int64_rtz(float16 f, float_status *s)
-{
- if (float16_is_any_nan(f)) {
- float_raise(float_flag_invalid, s);
- return 0;
- }
- return float16_to_int64_round_to_zero(f, s);
-}
-
-static inline int64_t vfp_float32_to_int64_rtz(float32 f, float_status *s)
-{
- if (float32_is_any_nan(f)) {
- float_raise(float_flag_invalid, s);
- return 0;
- }
- return float32_to_int64_round_to_zero(f, s);
-}
-
-static inline int64_t vfp_float64_to_int64_rtz(float64 f, float_status *s)
-{
- if (float64_is_any_nan(f)) {
- float_raise(float_flag_invalid, s);
- return 0;
- }
- return float64_to_int64_round_to_zero(f, s);
-}
-
-static inline uint16_t vfp_float16_to_uint16_rtz(float16 f, float_status *s)
-{
- if (float16_is_any_nan(f)) {
- float_raise(float_flag_invalid, s);
- return 0;
- }
- return float16_to_uint16_round_to_zero(f, s);
-}
-
-static inline uint64_t vfp_float16_to_uint64_rtz(float16 f, float_status *s)
-{
- if (float16_is_any_nan(f)) {
- float_raise(float_flag_invalid, s);
- return 0;
- }
- return float16_to_uint64_round_to_zero(f, s);
-}
-
-static inline uint64_t vfp_float32_to_uint64_rtz(float32 f, float_status *s)
-{
- if (float32_is_any_nan(f)) {
- float_raise(float_flag_invalid, s);
- return 0;
- }
- return float32_to_uint64_round_to_zero(f, s);
-}
-
-static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s)
-{
- if (float64_is_any_nan(f)) {
- float_raise(float_flag_invalid, s);
- return 0;
- }
- return float64_to_uint64_round_to_zero(f, s);
-}
-
-DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16)
-DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32)
-DO_ZPZ_FP(sve_fcvt_dh, uint64_t, , sve_f64_to_f16)
-DO_ZPZ_FP(sve_fcvt_hd, uint64_t, , sve_f16_to_f64)
-DO_ZPZ_FP(sve_fcvt_ds, uint64_t, , float64_to_float32)
-DO_ZPZ_FP(sve_fcvt_sd, uint64_t, , float32_to_float64)
-
-DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz)
-DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh)
-DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs)
-DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, , vfp_float16_to_int64_rtz)
-DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, , vfp_float32_to_int64_rtz)
-DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, , helper_vfp_tosizd)
-DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, , vfp_float64_to_int64_rtz)
-
-DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz)
-DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh)
-DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs)
-DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, , vfp_float16_to_uint64_rtz)
-DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, , vfp_float32_to_uint64_rtz)
-DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, , helper_vfp_touizd)
-DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, , vfp_float64_to_uint64_rtz)
-
-DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth)
-DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints)
-DO_ZPZ_FP(sve_frint_d, uint64_t, , helper_rintd)
-
-DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int)
-DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int)
-DO_ZPZ_FP(sve_frintx_d, uint64_t, , float64_round_to_int)
-
-DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16)
-DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32)
-DO_ZPZ_FP(sve_frecpx_d, uint64_t, , helper_frecpx_f64)
-
-DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt)
-DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt)
-DO_ZPZ_FP(sve_fsqrt_d, uint64_t, , float64_sqrt)
-
-DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16)
-DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16)
-DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32)
-DO_ZPZ_FP(sve_scvt_sd, uint64_t, , int32_to_float64)
-DO_ZPZ_FP(sve_scvt_dh, uint64_t, , int64_to_float16)
-DO_ZPZ_FP(sve_scvt_ds, uint64_t, , int64_to_float32)
-DO_ZPZ_FP(sve_scvt_dd, uint64_t, , int64_to_float64)
-
-DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16)
-DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16)
-DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32)
-DO_ZPZ_FP(sve_ucvt_sd, uint64_t, , uint32_to_float64)
-DO_ZPZ_FP(sve_ucvt_dh, uint64_t, , uint64_to_float16)
-DO_ZPZ_FP(sve_ucvt_ds, uint64_t, , uint64_to_float32)
-DO_ZPZ_FP(sve_ucvt_dd, uint64_t, , uint64_to_float64)
-
-#undef DO_ZPZ_FP
-
-/* 4-operand predicated multiply-add. This requires 7 operands to pass
- * "properly", so we need to encode some of the registers into DESC.
- */
-QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 20 > 32);
-
-static void do_fmla_zpzzz_h(CPUARMState *env, void *vg, uint32_t desc,
- uint16_t neg1, uint16_t neg3)
-{
- intptr_t i = simd_oprsz(desc);
- unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
- unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
- unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
- unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
- void *vd = &env->vfp.zregs[rd];
- void *vn = &env->vfp.zregs[rn];
- void *vm = &env->vfp.zregs[rm];
- void *va = &env->vfp.zregs[ra];
- uint64_t *g = vg;
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- i -= 2;
- if (likely((pg >> (i & 63)) & 1)) {
- float16 e1, e2, e3, r;
-
- e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
- e2 = *(uint16_t *)(vm + H1_2(i));
- e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
- r = float16_muladd(e1, e2, e3, 0, &env->vfp.fp_status_f16);
- *(uint16_t *)(vd + H1_2(i)) = r;
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-void HELPER(sve_fmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_h(env, vg, desc, 0, 0);
-}
-
-void HELPER(sve_fmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0);
-}
-
-void HELPER(sve_fnmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_h(env, vg, desc, 0x8000, 0x8000);
-}
-
-void HELPER(sve_fnmls_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_h(env, vg, desc, 0, 0x8000);
-}
-
-static void do_fmla_zpzzz_s(CPUARMState *env, void *vg, uint32_t desc,
- uint32_t neg1, uint32_t neg3)
-{
- intptr_t i = simd_oprsz(desc);
- unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
- unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
- unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
- unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
- void *vd = &env->vfp.zregs[rd];
- void *vn = &env->vfp.zregs[rn];
- void *vm = &env->vfp.zregs[rm];
- void *va = &env->vfp.zregs[ra];
- uint64_t *g = vg;
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- i -= 4;
- if (likely((pg >> (i & 63)) & 1)) {
- float32 e1, e2, e3, r;
-
- e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
- e2 = *(uint32_t *)(vm + H1_4(i));
- e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
- r = float32_muladd(e1, e2, e3, 0, &env->vfp.fp_status);
- *(uint32_t *)(vd + H1_4(i)) = r;
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-void HELPER(sve_fmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_s(env, vg, desc, 0, 0);
-}
-
-void HELPER(sve_fmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0);
-}
-
-void HELPER(sve_fnmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_s(env, vg, desc, 0x80000000, 0x80000000);
-}
-
-void HELPER(sve_fnmls_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_s(env, vg, desc, 0, 0x80000000);
-}
-
-static void do_fmla_zpzzz_d(CPUARMState *env, void *vg, uint32_t desc,
- uint64_t neg1, uint64_t neg3)
-{
- intptr_t i = simd_oprsz(desc);
- unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
- unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
- unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
- unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
- void *vd = &env->vfp.zregs[rd];
- void *vn = &env->vfp.zregs[rn];
- void *vm = &env->vfp.zregs[rm];
- void *va = &env->vfp.zregs[ra];
- uint64_t *g = vg;
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- i -= 8;
- if (likely((pg >> (i & 63)) & 1)) {
- float64 e1, e2, e3, r;
-
- e1 = *(uint64_t *)(vn + i) ^ neg1;
- e2 = *(uint64_t *)(vm + i);
- e3 = *(uint64_t *)(va + i) ^ neg3;
- r = float64_muladd(e1, e2, e3, 0, &env->vfp.fp_status);
- *(uint64_t *)(vd + i) = r;
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-void HELPER(sve_fmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_d(env, vg, desc, 0, 0);
-}
-
-void HELPER(sve_fmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, 0);
-}
-
-void HELPER(sve_fnmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_d(env, vg, desc, INT64_MIN, INT64_MIN);
-}
-
-void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
-{
- do_fmla_zpzzz_d(env, vg, desc, 0, INT64_MIN);
-}
-
-/* Two operand floating-point comparison controlled by a predicate.
- * Unlike the integer version, we are not allowed to optimistically
- * compare operands, since the comparison may have side effects wrt
- * the FPSR.
- */
-#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
- void *status, uint32_t desc) \
-{ \
- intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
- uint64_t *d = vd, *g = vg; \
- do { \
- uint64_t out = 0, pg = g[j]; \
- do { \
- i -= sizeof(TYPE), out <<= sizeof(TYPE); \
- if (likely((pg >> (i & 63)) & 1)) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- TYPE mm = *(TYPE *)(vm + H(i)); \
- out |= OP(TYPE, nn, mm, status); \
- } \
- } while (i & 63); \
- d[j--] = out; \
- } while (i > 0); \
-}
-
-#define DO_FPCMP_PPZZ_H(NAME, OP) \
- DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP)
-#define DO_FPCMP_PPZZ_S(NAME, OP) \
- DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP)
-#define DO_FPCMP_PPZZ_D(NAME, OP) \
- DO_FPCMP_PPZZ(NAME##_d, float64, , OP)
-
-#define DO_FPCMP_PPZZ_ALL(NAME, OP) \
- DO_FPCMP_PPZZ_H(NAME, OP) \
- DO_FPCMP_PPZZ_S(NAME, OP) \
- DO_FPCMP_PPZZ_D(NAME, OP)
-
-#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0
-#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0
-#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0
-#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0
-#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0
-#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0
-#define DO_FCMUO(TYPE, X, Y, ST) \
- TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered
-#define DO_FACGE(TYPE, X, Y, ST) \
- TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0
-#define DO_FACGT(TYPE, X, Y, ST) \
- TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0
-
-DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE)
-DO_FPCMP_PPZZ_ALL(sve_fcmgt, DO_FCMGT)
-DO_FPCMP_PPZZ_ALL(sve_fcmeq, DO_FCMEQ)
-DO_FPCMP_PPZZ_ALL(sve_fcmne, DO_FCMNE)
-DO_FPCMP_PPZZ_ALL(sve_fcmuo, DO_FCMUO)
-DO_FPCMP_PPZZ_ALL(sve_facge, DO_FACGE)
-DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT)
-
-#undef DO_FPCMP_PPZZ_ALL
-#undef DO_FPCMP_PPZZ_D
-#undef DO_FPCMP_PPZZ_S
-#undef DO_FPCMP_PPZZ_H
-#undef DO_FPCMP_PPZZ
-
-/* One operand floating-point comparison against zero, controlled
- * by a predicate.
- */
-#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \
-void HELPER(NAME)(void *vd, void *vn, void *vg, \
- void *status, uint32_t desc) \
-{ \
- intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
- uint64_t *d = vd, *g = vg; \
- do { \
- uint64_t out = 0, pg = g[j]; \
- do { \
- i -= sizeof(TYPE), out <<= sizeof(TYPE); \
- if ((pg >> (i & 63)) & 1) { \
- TYPE nn = *(TYPE *)(vn + H(i)); \
- out |= OP(TYPE, nn, 0, status); \
- } \
- } while (i & 63); \
- d[j--] = out; \
- } while (i > 0); \
-}
-
-#define DO_FPCMP_PPZ0_H(NAME, OP) \
- DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP)
-#define DO_FPCMP_PPZ0_S(NAME, OP) \
- DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP)
-#define DO_FPCMP_PPZ0_D(NAME, OP) \
- DO_FPCMP_PPZ0(NAME##_d, float64, , OP)
-
-#define DO_FPCMP_PPZ0_ALL(NAME, OP) \
- DO_FPCMP_PPZ0_H(NAME, OP) \
- DO_FPCMP_PPZ0_S(NAME, OP) \
- DO_FPCMP_PPZ0_D(NAME, OP)
-
-DO_FPCMP_PPZ0_ALL(sve_fcmge0, DO_FCMGE)
-DO_FPCMP_PPZ0_ALL(sve_fcmgt0, DO_FCMGT)
-DO_FPCMP_PPZ0_ALL(sve_fcmle0, DO_FCMLE)
-DO_FPCMP_PPZ0_ALL(sve_fcmlt0, DO_FCMLT)
-DO_FPCMP_PPZ0_ALL(sve_fcmeq0, DO_FCMEQ)
-DO_FPCMP_PPZ0_ALL(sve_fcmne0, DO_FCMNE)
-
-/* FP Trig Multiply-Add. */
-
-void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
-{
- static const float16 coeff[16] = {
- 0x3c00, 0xb155, 0x2030, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- };
- intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
- intptr_t x = simd_data(desc);
- float16 *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i++) {
- float16 mm = m[i];
- intptr_t xx = x;
- if (float16_is_neg(mm)) {
- mm = float16_abs(mm);
- xx += 8;
- }
- d[i] = float16_muladd(n[i], mm, coeff[xx], 0, vs);
- }
-}
-
-void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
-{
- static const float32 coeff[16] = {
- 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9,
- 0x36369d6d, 0x00000000, 0x00000000, 0x00000000,
- 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705,
- 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
- };
- intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
- intptr_t x = simd_data(desc);
- float32 *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i++) {
- float32 mm = m[i];
- intptr_t xx = x;
- if (float32_is_neg(mm)) {
- mm = float32_abs(mm);
- xx += 8;
- }
- d[i] = float32_muladd(n[i], mm, coeff[xx], 0, vs);
- }
-}
-
-void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
-{
- static const float64 coeff[16] = {
- 0x3ff0000000000000ull, 0xbfc5555555555543ull,
- 0x3f8111111110f30cull, 0xbf2a01a019b92fc6ull,
- 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull,
- 0x3de5d8408868552full, 0x0000000000000000ull,
- 0x3ff0000000000000ull, 0xbfe0000000000000ull,
- 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull,
- 0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull,
- 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
- };
- intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
- intptr_t x = simd_data(desc);
- float64 *d = vd, *n = vn, *m = vm;
- for (i = 0; i < opr_sz; i++) {
- float64 mm = m[i];
- intptr_t xx = x;
- if (float64_is_neg(mm)) {
- mm = float64_abs(mm);
- xx += 8;
- }
- d[i] = float64_muladd(n[i], mm, coeff[xx], 0, vs);
- }
-}
-
-/*
- * FP Complex Add
- */
-
-void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
-{
- intptr_t j, i = simd_oprsz(desc);
- uint64_t *g = vg;
- float16 neg_imag = float16_set_sign(0, simd_data(desc));
- float16 neg_real = float16_chs(neg_imag);
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- float16 e0, e1, e2, e3;
-
- /* I holds the real index; J holds the imag index. */
- j = i - sizeof(float16);
- i -= 2 * sizeof(float16);
-
- e0 = *(float16 *)(vn + H1_2(i));
- e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
- e2 = *(float16 *)(vn + H1_2(j));
- e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
-
- if (likely((pg >> (i & 63)) & 1)) {
- *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, vs);
- }
- if (likely((pg >> (j & 63)) & 1)) {
- *(float16 *)(vd + H1_2(j)) = float16_add(e2, e3, vs);
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
-{
- intptr_t j, i = simd_oprsz(desc);
- uint64_t *g = vg;
- float32 neg_imag = float32_set_sign(0, simd_data(desc));
- float32 neg_real = float32_chs(neg_imag);
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- float32 e0, e1, e2, e3;
-
- /* I holds the real index; J holds the imag index. */
- j = i - sizeof(float32);
- i -= 2 * sizeof(float32);
-
- e0 = *(float32 *)(vn + H1_2(i));
- e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
- e2 = *(float32 *)(vn + H1_2(j));
- e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
-
- if (likely((pg >> (i & 63)) & 1)) {
- *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, vs);
- }
- if (likely((pg >> (j & 63)) & 1)) {
- *(float32 *)(vd + H1_2(j)) = float32_add(e2, e3, vs);
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
- void *vs, uint32_t desc)
-{
- intptr_t j, i = simd_oprsz(desc);
- uint64_t *g = vg;
- float64 neg_imag = float64_set_sign(0, simd_data(desc));
- float64 neg_real = float64_chs(neg_imag);
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- float64 e0, e1, e2, e3;
-
- /* I holds the real index; J holds the imag index. */
- j = i - sizeof(float64);
- i -= 2 * sizeof(float64);
-
- e0 = *(float64 *)(vn + H1_2(i));
- e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
- e2 = *(float64 *)(vn + H1_2(j));
- e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
-
- if (likely((pg >> (i & 63)) & 1)) {
- *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, vs);
- }
- if (likely((pg >> (j & 63)) & 1)) {
- *(float64 *)(vd + H1_2(j)) = float64_add(e2, e3, vs);
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-/*
- * FP Complex Multiply
- */
-
-QEMU_BUILD_BUG_ON(SIMD_DATA_SHIFT + 22 > 32);
-
-void HELPER(sve_fcmla_zpzzz_h)(CPUARMState *env, void *vg, uint32_t desc)
-{
- intptr_t j, i = simd_oprsz(desc);
- unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
- unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
- unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
- unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
- unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2);
- bool flip = rot & 1;
- float16 neg_imag, neg_real;
- void *vd = &env->vfp.zregs[rd];
- void *vn = &env->vfp.zregs[rn];
- void *vm = &env->vfp.zregs[rm];
- void *va = &env->vfp.zregs[ra];
- uint64_t *g = vg;
-
- neg_imag = float16_set_sign(0, (rot & 2) != 0);
- neg_real = float16_set_sign(0, rot == 1 || rot == 2);
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- float16 e1, e2, e3, e4, nr, ni, mr, mi, d;
-
- /* I holds the real index; J holds the imag index. */
- j = i - sizeof(float16);
- i -= 2 * sizeof(float16);
-
- nr = *(float16 *)(vn + H1_2(i));
- ni = *(float16 *)(vn + H1_2(j));
- mr = *(float16 *)(vm + H1_2(i));
- mi = *(float16 *)(vm + H1_2(j));
-
- e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
- e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
-
- if (likely((pg >> (i & 63)) & 1)) {
- d = *(float16 *)(va + H1_2(i));
- d = float16_muladd(e2, e1, d, 0, &env->vfp.fp_status_f16);
- *(float16 *)(vd + H1_2(i)) = d;
- }
- if (likely((pg >> (j & 63)) & 1)) {
- d = *(float16 *)(va + H1_2(j));
- d = float16_muladd(e4, e3, d, 0, &env->vfp.fp_status_f16);
- *(float16 *)(vd + H1_2(j)) = d;
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-void HELPER(sve_fcmla_zpzzz_s)(CPUARMState *env, void *vg, uint32_t desc)
-{
- intptr_t j, i = simd_oprsz(desc);
- unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
- unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
- unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
- unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
- unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2);
- bool flip = rot & 1;
- float32 neg_imag, neg_real;
- void *vd = &env->vfp.zregs[rd];
- void *vn = &env->vfp.zregs[rn];
- void *vm = &env->vfp.zregs[rm];
- void *va = &env->vfp.zregs[ra];
- uint64_t *g = vg;
-
- neg_imag = float32_set_sign(0, (rot & 2) != 0);
- neg_real = float32_set_sign(0, rot == 1 || rot == 2);
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- float32 e1, e2, e3, e4, nr, ni, mr, mi, d;
-
- /* I holds the real index; J holds the imag index. */
- j = i - sizeof(float32);
- i -= 2 * sizeof(float32);
-
- nr = *(float32 *)(vn + H1_2(i));
- ni = *(float32 *)(vn + H1_2(j));
- mr = *(float32 *)(vm + H1_2(i));
- mi = *(float32 *)(vm + H1_2(j));
-
- e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
- e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
-
- if (likely((pg >> (i & 63)) & 1)) {
- d = *(float32 *)(va + H1_2(i));
- d = float32_muladd(e2, e1, d, 0, &env->vfp.fp_status);
- *(float32 *)(vd + H1_2(i)) = d;
- }
- if (likely((pg >> (j & 63)) & 1)) {
- d = *(float32 *)(va + H1_2(j));
- d = float32_muladd(e4, e3, d, 0, &env->vfp.fp_status);
- *(float32 *)(vd + H1_2(j)) = d;
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-void HELPER(sve_fcmla_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc)
-{
- intptr_t j, i = simd_oprsz(desc);
- unsigned rd = extract32(desc, SIMD_DATA_SHIFT, 5);
- unsigned rn = extract32(desc, SIMD_DATA_SHIFT + 5, 5);
- unsigned rm = extract32(desc, SIMD_DATA_SHIFT + 10, 5);
- unsigned ra = extract32(desc, SIMD_DATA_SHIFT + 15, 5);
- unsigned rot = extract32(desc, SIMD_DATA_SHIFT + 20, 2);
- bool flip = rot & 1;
- float64 neg_imag, neg_real;
- void *vd = &env->vfp.zregs[rd];
- void *vn = &env->vfp.zregs[rn];
- void *vm = &env->vfp.zregs[rm];
- void *va = &env->vfp.zregs[ra];
- uint64_t *g = vg;
-
- neg_imag = float64_set_sign(0, (rot & 2) != 0);
- neg_real = float64_set_sign(0, rot == 1 || rot == 2);
-
- do {
- uint64_t pg = g[(i - 1) >> 6];
- do {
- float64 e1, e2, e3, e4, nr, ni, mr, mi, d;
-
- /* I holds the real index; J holds the imag index. */
- j = i - sizeof(float64);
- i -= 2 * sizeof(float64);
-
- nr = *(float64 *)(vn + H1_2(i));
- ni = *(float64 *)(vn + H1_2(j));
- mr = *(float64 *)(vm + H1_2(i));
- mi = *(float64 *)(vm + H1_2(j));
-
- e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
- e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
-
- if (likely((pg >> (i & 63)) & 1)) {
- d = *(float64 *)(va + H1_2(i));
- d = float64_muladd(e2, e1, d, 0, &env->vfp.fp_status);
- *(float64 *)(vd + H1_2(i)) = d;
- }
- if (likely((pg >> (j & 63)) & 1)) {
- d = *(float64 *)(va + H1_2(j));
- d = float64_muladd(e4, e3, d, 0, &env->vfp.fp_status);
- *(float64 *)(vd + H1_2(j)) = d;
- }
- } while (i & 63);
- } while (i != 0);
-}
-
-/*
- * Load contiguous data, protected by a governing predicate.
- */
-
-/*
- * Load elements into @vd, controlled by @vg, from @host + @mem_ofs.
- * Memory is valid through @host + @mem_max. The register element
- * indicies are inferred from @mem_ofs, as modified by the types for
- * which the helper is built. Return the @mem_ofs of the first element
- * not loaded (which is @mem_max if they are all loaded).
- *
- * For softmmu, we have fully validated the guest page. For user-only,
- * we cannot fully validate without taking the mmap lock, but since we
- * know the access is within one host page, if any access is valid they
- * all must be valid. However, when @vg is all false, it may be that
- * no access is valid.
- */
-typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host,
- intptr_t mem_ofs, intptr_t mem_max);
-
-/*
- * Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
- * The controlling predicate is known to be true.
- */
-typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
- target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra);
-typedef sve_ld1_tlb_fn sve_st1_tlb_fn;
-
-/*
- * Generate the above primitives.
- */
-
-#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
-static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \
- intptr_t mem_off, const intptr_t mem_max) \
-{ \
- intptr_t reg_off = mem_off * (sizeof(TYPEE) / sizeof(TYPEM)); \
- uint64_t *pg = vg; \
- while (mem_off + sizeof(TYPEM) <= mem_max) { \
- TYPEM val = 0; \
- if (likely((pg[reg_off >> 6] >> (reg_off & 63)) & 1)) { \
- val = HOST(host + mem_off); \
- } \
- *(TYPEE *)(vd + H(reg_off)) = val; \
- mem_off += sizeof(TYPEM), reg_off += sizeof(TYPEE); \
- } \
- return mem_off; \
-}
-
-#ifdef CONFIG_SOFTMMU
-#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
-static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
- target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
-{ \
- TYPEM val = TLB(env, addr, oi, ra); \
- *(TYPEE *)(vd + H(reg_off)) = val; \
-}
-#else
-#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
-static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
- target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
-{ \
- TYPEM val = HOST(g2h(addr)); \
- *(TYPEE *)(vd + H(reg_off)) = val; \
-}
-#endif
-
-#define DO_LD_PRIM_1(NAME, H, TE, TM) \
- DO_LD_HOST(NAME, H, TE, TM, ldub_p) \
- DO_LD_TLB(NAME, H, TE, TM, ldub_p, 0, helper_ret_ldub_mmu)
-
-DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t)
-DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
-DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t)
-DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
-DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t)
-DO_LD_PRIM_1(ld1bdu, , uint64_t, uint8_t)
-DO_LD_PRIM_1(ld1bds, , uint64_t, int8_t)
-
-#define DO_LD_PRIM_2(NAME, end, MOEND, H, TE, TM, PH, PT) \
- DO_LD_HOST(NAME##_##end, H, TE, TM, PH##_##end##_p) \
- DO_LD_TLB(NAME##_##end, H, TE, TM, PH##_##end##_p, \
- MOEND, helper_##end##_##PT##_mmu)
-
-DO_LD_PRIM_2(ld1hh, le, MO_LE, H1_2, uint16_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hsu, le, MO_LE, H1_4, uint32_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hss, le, MO_LE, H1_4, uint32_t, int16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hdu, le, MO_LE, , uint64_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hds, le, MO_LE, , uint64_t, int16_t, lduw, lduw)
-
-DO_LD_PRIM_2(ld1ss, le, MO_LE, H1_4, uint32_t, uint32_t, ldl, ldul)
-DO_LD_PRIM_2(ld1sdu, le, MO_LE, , uint64_t, uint32_t, ldl, ldul)
-DO_LD_PRIM_2(ld1sds, le, MO_LE, , uint64_t, int32_t, ldl, ldul)
-
-DO_LD_PRIM_2(ld1dd, le, MO_LE, , uint64_t, uint64_t, ldq, ldq)
-
-DO_LD_PRIM_2(ld1hh, be, MO_BE, H1_2, uint16_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hsu, be, MO_BE, H1_4, uint32_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hss, be, MO_BE, H1_4, uint32_t, int16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hdu, be, MO_BE, , uint64_t, uint16_t, lduw, lduw)
-DO_LD_PRIM_2(ld1hds, be, MO_BE, , uint64_t, int16_t, lduw, lduw)
-
-DO_LD_PRIM_2(ld1ss, be, MO_BE, H1_4, uint32_t, uint32_t, ldl, ldul)
-DO_LD_PRIM_2(ld1sdu, be, MO_BE, , uint64_t, uint32_t, ldl, ldul)
-DO_LD_PRIM_2(ld1sds, be, MO_BE, , uint64_t, int32_t, ldl, ldul)
-
-DO_LD_PRIM_2(ld1dd, be, MO_BE, , uint64_t, uint64_t, ldq, ldq)
-
-#undef DO_LD_TLB
-#undef DO_LD_HOST
-#undef DO_LD_PRIM_1
-#undef DO_LD_PRIM_2
-
-/*
- * Skip through a sequence of inactive elements in the guarding predicate @vg,
- * beginning at @reg_off bounded by @reg_max. Return the offset of the active
- * element >= @reg_off, or @reg_max if there were no active elements at all.
- */
-static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
- intptr_t reg_max, int esz)
-{
- uint64_t pg_mask = pred_esz_masks[esz];
- uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63);
-
- /* In normal usage, the first element is active. */
- if (likely(pg & 1)) {
- return reg_off;
- }
-
- if (pg == 0) {
- reg_off &= -64;
- do {
- reg_off += 64;
- if (unlikely(reg_off >= reg_max)) {
- /* The entire predicate was false. */
- return reg_max;
- }
- pg = vg[reg_off >> 6] & pg_mask;
- } while (pg == 0);
- }
- reg_off += ctz64(pg);
-
- /* We should never see an out of range predicate bit set. */
- tcg_debug_assert(reg_off < reg_max);
- return reg_off;
-}
-
-/*
- * Return the maximum offset <= @mem_max which is still within the page
- * referenced by @base + @mem_off.
- */
-static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
- intptr_t mem_max)
-{
- target_ulong addr = base + mem_off;
- intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK);
- return MIN(split, mem_max - mem_off) + mem_off;
-}
-
-static inline void set_helper_retaddr(uintptr_t ra)
-{
-#ifdef CONFIG_USER_ONLY
- helper_retaddr = ra;
-#endif
-}
-
-/*
- * The result of tlb_vaddr_to_host for user-only is just g2h(x),
- * which is always non-null. Elide the useless test.
- */
-static inline bool test_host_page(void *host)
-{
-#ifdef CONFIG_USER_ONLY
- return true;
-#else
- return likely(host != NULL);
-#endif
-}
-
-/*
- * Common helper for all contiguous one-register predicated loads.
- */
-static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
- uint32_t desc, const uintptr_t retaddr,
- const int esz, const int msz,
- sve_ld1_host_fn *host_fn,
- sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int mmu_idx = get_mmuidx(oi);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- void *vd = &env->vfp.zregs[rd];
- const int diffsz = esz - msz;
- const intptr_t reg_max = simd_oprsz(desc);
- const intptr_t mem_max = reg_max >> diffsz;
- ARMVectorReg scratch;
- void *host;
- intptr_t split, reg_off, mem_off;
-
- /* Find the first active element. */
- reg_off = find_next_active(vg, 0, reg_max, esz);
- if (unlikely(reg_off == reg_max)) {
- /* The entire predicate was false; no load occurs. */
- memset(vd, 0, reg_max);
- return;
- }
- mem_off = reg_off >> diffsz;
- set_helper_retaddr(retaddr);
-
- /*
- * If the (remaining) load is entirely within a single page, then:
- * For softmmu, and the tlb hits, then no faults will occur;
- * For user-only, either the first load will fault or none will.
- * We can thus perform the load directly to the destination and
- * Vd will be unmodified on any exception path.
- */
- split = max_for_page(addr, mem_off, mem_max);
- if (likely(split == mem_max)) {
- host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
- if (test_host_page(host)) {
- mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
- tcg_debug_assert(mem_off == mem_max);
- set_helper_retaddr(0);
- /* After having taken any fault, zero leading inactive elements. */
- swap_memzero(vd, reg_off);
- return;
- }
- }
-
- /*
- * Perform the predicated read into a temporary, thus ensuring
- * if the load of the last element faults, Vd is not modified.
- */
-#ifdef CONFIG_USER_ONLY
- swap_memzero(&scratch, reg_off);
- host_fn(&scratch, vg, g2h(addr), mem_off, mem_max);
-#else
- memset(&scratch, 0, reg_max);
- goto start;
- while (1) {
- reg_off = find_next_active(vg, reg_off, reg_max, esz);
- if (reg_off >= reg_max) {
- break;
- }
- mem_off = reg_off >> diffsz;
- split = max_for_page(addr, mem_off, mem_max);
-
- start:
- if (split - mem_off >= (1 << msz)) {
- /* At least one whole element on this page. */
- host = tlb_vaddr_to_host(env, addr + mem_off,
- MMU_DATA_LOAD, mmu_idx);
- if (host) {
- mem_off = host_fn(&scratch, vg, host - mem_off,
- mem_off, split);
- reg_off = mem_off << diffsz;
- continue;
- }
- }
-
- /*
- * Perform one normal read. This may fault, longjmping out to the
- * main loop in order to raise an exception. It may succeed, and
- * as a side-effect load the TLB entry for the next round. Finally,
- * in the extremely unlikely case we're performing this operation
- * on I/O memory, it may succeed but not bring in the TLB entry.
- * But even then we have still made forward progress.
- */
- tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr);
- reg_off += 1 << esz;
- }
-#endif
-
- set_helper_retaddr(0);
- memcpy(vd, &scratch, reg_max);
-}
-
-#define DO_LD1_1(NAME, ESZ) \
-void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \
- sve_##NAME##_host, sve_##NAME##_tlb); \
-}
-
-#define DO_LD1_2(NAME, ESZ, MSZ) \
-void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
- sve_##NAME##_le_host, sve_##NAME##_le_tlb); \
-} \
-void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ld1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
- sve_##NAME##_be_host, sve_##NAME##_be_tlb); \
-}
-
-DO_LD1_1(ld1bb, 0)
-DO_LD1_1(ld1bhu, 1)
-DO_LD1_1(ld1bhs, 1)
-DO_LD1_1(ld1bsu, 2)
-DO_LD1_1(ld1bss, 2)
-DO_LD1_1(ld1bdu, 3)
-DO_LD1_1(ld1bds, 3)
-
-DO_LD1_2(ld1hh, 1, 1)
-DO_LD1_2(ld1hsu, 2, 1)
-DO_LD1_2(ld1hss, 2, 1)
-DO_LD1_2(ld1hdu, 3, 1)
-DO_LD1_2(ld1hds, 3, 1)
-
-DO_LD1_2(ld1ss, 2, 2)
-DO_LD1_2(ld1sdu, 3, 2)
-DO_LD1_2(ld1sds, 3, 2)
-
-DO_LD1_2(ld1dd, 3, 3)
-
-#undef DO_LD1_1
-#undef DO_LD1_2
-
-/*
- * Common helpers for all contiguous 2,3,4-register predicated loads.
- */
-static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
- uint32_t desc, int size, uintptr_t ra,
- sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- intptr_t i, oprsz = simd_oprsz(desc);
- ARMVectorReg scratch[2] = { };
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- tlb_fn(env, &scratch[0], i, addr, oi, ra);
- tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
- }
- i += size, pg >>= size;
- addr += 2 * size;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-
- /* Wait until all exceptions have been raised to write back. */
- memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
- memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
-}
-
-static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
- uint32_t desc, int size, uintptr_t ra,
- sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- intptr_t i, oprsz = simd_oprsz(desc);
- ARMVectorReg scratch[3] = { };
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- tlb_fn(env, &scratch[0], i, addr, oi, ra);
- tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
- tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
- }
- i += size, pg >>= size;
- addr += 3 * size;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-
- /* Wait until all exceptions have been raised to write back. */
- memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
- memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
- memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
-}
-
-static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
- uint32_t desc, int size, uintptr_t ra,
- sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- intptr_t i, oprsz = simd_oprsz(desc);
- ARMVectorReg scratch[4] = { };
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- tlb_fn(env, &scratch[0], i, addr, oi, ra);
- tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
- tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
- tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra);
- }
- i += size, pg >>= size;
- addr += 4 * size;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-
- /* Wait until all exceptions have been raised to write back. */
- memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
- memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
- memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
- memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz);
-}
-
-#define DO_LDN_1(N) \
-void __attribute__((flatten)) HELPER(sve_ld##N##bb_r) \
- (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
-{ \
- sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb); \
-}
-
-#define DO_LDN_2(N, SUFF, SIZE) \
-void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_le_r) \
- (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
-{ \
- sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \
- sve_ld1##SUFF##_le_tlb); \
-} \
-void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_be_r) \
- (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
-{ \
- sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(), \
- sve_ld1##SUFF##_be_tlb); \
-}
-
-DO_LDN_1(2)
-DO_LDN_1(3)
-DO_LDN_1(4)
-
-DO_LDN_2(2, hh, 2)
-DO_LDN_2(3, hh, 2)
-DO_LDN_2(4, hh, 2)
-
-DO_LDN_2(2, ss, 4)
-DO_LDN_2(3, ss, 4)
-DO_LDN_2(4, ss, 4)
-
-DO_LDN_2(2, dd, 8)
-DO_LDN_2(3, dd, 8)
-DO_LDN_2(4, dd, 8)
-
-#undef DO_LDN_1
-#undef DO_LDN_2
-
-/*
- * Load contiguous data, first-fault and no-fault.
- *
- * For user-only, one could argue that we should hold the mmap_lock during
- * the operation so that there is no race between page_check_range and the
- * load operation. However, unmapping pages out from under a running thread
- * is extraordinarily unlikely. This theoretical race condition also affects
- * linux-user/ in its get_user/put_user macros.
- *
- * TODO: Construct some helpers, written in assembly, that interact with
- * handle_cpu_signal to produce memory ops which can properly report errors
- * without racing.
- */
-
-/* Fault on byte I. All bits in FFR from I are cleared. The vector
- * result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE
- * option, which leaves subsequent data unchanged.
- */
-static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz)
-{
- uint64_t *ffr = env->vfp.pregs[FFR_PRED_NUM].p;
-
- if (i & 63) {
- ffr[i / 64] &= MAKE_64BIT_MASK(0, i & 63);
- i = ROUND_UP(i, 64);
- }
- for (; i < oprsz; i += 64) {
- ffr[i / 64] = 0;
- }
-}
-
-/*
- * Common helper for all contiguous first-fault loads.
- */
-static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
- uint32_t desc, const uintptr_t retaddr,
- const int esz, const int msz,
- sve_ld1_host_fn *host_fn,
- sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int mmu_idx = get_mmuidx(oi);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- void *vd = &env->vfp.zregs[rd];
- const int diffsz = esz - msz;
- const intptr_t reg_max = simd_oprsz(desc);
- const intptr_t mem_max = reg_max >> diffsz;
- intptr_t split, reg_off, mem_off;
- void *host;
-
- /* Skip to the first active element. */
- reg_off = find_next_active(vg, 0, reg_max, esz);
- if (unlikely(reg_off == reg_max)) {
- /* The entire predicate was false; no load occurs. */
- memset(vd, 0, reg_max);
- return;
- }
- mem_off = reg_off >> diffsz;
- set_helper_retaddr(retaddr);
-
- /*
- * If the (remaining) load is entirely within a single page, then:
- * For softmmu, and the tlb hits, then no faults will occur;
- * For user-only, either the first load will fault or none will.
- * We can thus perform the load directly to the destination and
- * Vd will be unmodified on any exception path.
- */
- split = max_for_page(addr, mem_off, mem_max);
- if (likely(split == mem_max)) {
- host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
- if (test_host_page(host)) {
- mem_off = host_fn(vd, vg, host - mem_off, mem_off, mem_max);
- tcg_debug_assert(mem_off == mem_max);
- set_helper_retaddr(0);
- /* After any fault, zero any leading inactive elements. */
- swap_memzero(vd, reg_off);
- return;
- }
- }
-
-#ifdef CONFIG_USER_ONLY
- /*
- * The page(s) containing this first element at ADDR+MEM_OFF must
- * be valid. Considering that this first element may be misaligned
- * and cross a page boundary itself, take the rest of the page from
- * the last byte of the element.
- */
- split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
- mem_off = host_fn(vd, vg, g2h(addr), mem_off, split);
-
- /* After any fault, zero any leading inactive elements. */
- swap_memzero(vd, reg_off);
- reg_off = mem_off << diffsz;
-#else
- /*
- * Perform one normal read, which will fault or not.
- * But it is likely to bring the page into the tlb.
- */
- tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr);
-
- /* After any fault, zero any leading predicated false elts. */
- swap_memzero(vd, reg_off);
- mem_off += 1 << msz;
- reg_off += 1 << esz;
-
- /* Try again to read the balance of the page. */
- split = max_for_page(addr, mem_off - 1, mem_max);
- if (split >= (1 << msz)) {
- host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
- if (host) {
- mem_off = host_fn(vd, vg, host - mem_off, mem_off, split);
- reg_off = mem_off << diffsz;
- }
- }
-#endif
-
- set_helper_retaddr(0);
- record_fault(env, reg_off, reg_max);
-}
-
-/*
- * Common helper for all contiguous no-fault loads.
- */
-static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
- uint32_t desc, const int esz, const int msz,
- sve_ld1_host_fn *host_fn)
-{
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- void *vd = &env->vfp.zregs[rd];
- const int diffsz = esz - msz;
- const intptr_t reg_max = simd_oprsz(desc);
- const intptr_t mem_max = reg_max >> diffsz;
- const int mmu_idx = cpu_mmu_index(env, false);
- intptr_t split, reg_off, mem_off;
- void *host;
-
-#ifdef CONFIG_USER_ONLY
- host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);
- if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) {
- /* The entire operation is valid and will not fault. */
- host_fn(vd, vg, host, 0, mem_max);
- return;
- }
-#endif
-
- /* There will be no fault, so we may modify in advance. */
- memset(vd, 0, reg_max);
-
- /* Skip to the first active element. */
- reg_off = find_next_active(vg, 0, reg_max, esz);
- if (unlikely(reg_off == reg_max)) {
- /* The entire predicate was false; no load occurs. */
- return;
- }
- mem_off = reg_off >> diffsz;
-
-#ifdef CONFIG_USER_ONLY
- if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) {
- /* At least one load is valid; take the rest of the page. */
- split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
- mem_off = host_fn(vd, vg, host, mem_off, split);
- reg_off = mem_off << diffsz;
- }
-#else
- /*
- * If the address is not in the TLB, we have no way to bring the
- * entry into the TLB without also risking a fault. Note that
- * the corollary is that we never load from an address not in RAM.
- *
- * This last is out of spec, in a weird corner case.
- * Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory
- * must not actually hit the bus -- it returns UNKNOWN data instead.
- * But if you map non-RAM with Normal memory attributes and do a NF
- * load then it should access the bus. (Nobody ought actually do this
- * in the real world, obviously.)
- *
- * Then there are the annoying special cases with watchpoints...
- *
- * TODO: Add a form of tlb_fill that does not raise an exception,
- * with a form of tlb_vaddr_to_host and a set of loads to match.
- * The non_fault_vaddr_to_host would handle everything, usually,
- * and the loads would handle the iomem path for watchpoints.
- */
- host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
- split = max_for_page(addr, mem_off, mem_max);
- if (host && split >= (1 << msz)) {
- mem_off = host_fn(vd, vg, host - mem_off, mem_off, split);
- reg_off = mem_off << diffsz;
- }
-#endif
-
- record_fault(env, reg_off, reg_max);
-}
-
-#define DO_LDFF1_LDNF1_1(PART, ESZ) \
-void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \
- sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
-} \
-void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \
-}
-
-#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
-void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
- sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
-} \
-void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \
-} \
-void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
- sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
-} \
-void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \
-}
-
-DO_LDFF1_LDNF1_1(bb, 0)
-DO_LDFF1_LDNF1_1(bhu, 1)
-DO_LDFF1_LDNF1_1(bhs, 1)
-DO_LDFF1_LDNF1_1(bsu, 2)
-DO_LDFF1_LDNF1_1(bss, 2)
-DO_LDFF1_LDNF1_1(bdu, 3)
-DO_LDFF1_LDNF1_1(bds, 3)
-
-DO_LDFF1_LDNF1_2(hh, 1, 1)
-DO_LDFF1_LDNF1_2(hsu, 2, 1)
-DO_LDFF1_LDNF1_2(hss, 2, 1)
-DO_LDFF1_LDNF1_2(hdu, 3, 1)
-DO_LDFF1_LDNF1_2(hds, 3, 1)
-
-DO_LDFF1_LDNF1_2(ss, 2, 2)
-DO_LDFF1_LDNF1_2(sdu, 3, 2)
-DO_LDFF1_LDNF1_2(sds, 3, 2)
-
-DO_LDFF1_LDNF1_2(dd, 3, 3)
-
-#undef DO_LDFF1_LDNF1_1
-#undef DO_LDFF1_LDNF1_2
-
-/*
- * Store contiguous data, protected by a governing predicate.
- */
-
-#ifdef CONFIG_SOFTMMU
-#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
-static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
- target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
-{ \
- TLB(env, addr, *(TYPEM *)(vd + H(reg_off)), oi, ra); \
-}
-#else
-#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
-static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
- target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
-{ \
- HOST(g2h(addr), *(TYPEM *)(vd + H(reg_off))); \
-}
-#endif
-
-DO_ST_TLB(st1bb, H1, uint8_t, stb_p, 0, helper_ret_stb_mmu)
-DO_ST_TLB(st1bh, H1_2, uint16_t, stb_p, 0, helper_ret_stb_mmu)
-DO_ST_TLB(st1bs, H1_4, uint32_t, stb_p, 0, helper_ret_stb_mmu)
-DO_ST_TLB(st1bd, , uint64_t, stb_p, 0, helper_ret_stb_mmu)
-
-DO_ST_TLB(st1hh_le, H1_2, uint16_t, stw_le_p, MO_LE, helper_le_stw_mmu)
-DO_ST_TLB(st1hs_le, H1_4, uint32_t, stw_le_p, MO_LE, helper_le_stw_mmu)
-DO_ST_TLB(st1hd_le, , uint64_t, stw_le_p, MO_LE, helper_le_stw_mmu)
-
-DO_ST_TLB(st1ss_le, H1_4, uint32_t, stl_le_p, MO_LE, helper_le_stl_mmu)
-DO_ST_TLB(st1sd_le, , uint64_t, stl_le_p, MO_LE, helper_le_stl_mmu)
-
-DO_ST_TLB(st1dd_le, , uint64_t, stq_le_p, MO_LE, helper_le_stq_mmu)
-
-DO_ST_TLB(st1hh_be, H1_2, uint16_t, stw_be_p, MO_BE, helper_be_stw_mmu)
-DO_ST_TLB(st1hs_be, H1_4, uint32_t, stw_be_p, MO_BE, helper_be_stw_mmu)
-DO_ST_TLB(st1hd_be, , uint64_t, stw_be_p, MO_BE, helper_be_stw_mmu)
-
-DO_ST_TLB(st1ss_be, H1_4, uint32_t, stl_be_p, MO_BE, helper_be_stl_mmu)
-DO_ST_TLB(st1sd_be, , uint64_t, stl_be_p, MO_BE, helper_be_stl_mmu)
-
-DO_ST_TLB(st1dd_be, , uint64_t, stq_be_p, MO_BE, helper_be_stq_mmu)
-
-#undef DO_ST_TLB
-
-/*
- * Common helpers for all contiguous 1,2,3,4-register predicated stores.
- */
-static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
- uint32_t desc, const uintptr_t ra,
- const int esize, const int msize,
- sve_st1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- intptr_t i, oprsz = simd_oprsz(desc);
- void *vd = &env->vfp.zregs[rd];
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- tlb_fn(env, vd, i, addr, oi, ra);
- }
- i += esize, pg >>= esize;
- addr += msize;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-}
-
-static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
- uint32_t desc, const uintptr_t ra,
- const int esize, const int msize,
- sve_st1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- intptr_t i, oprsz = simd_oprsz(desc);
- void *d1 = &env->vfp.zregs[rd];
- void *d2 = &env->vfp.zregs[(rd + 1) & 31];
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- tlb_fn(env, d1, i, addr, oi, ra);
- tlb_fn(env, d2, i, addr + msize, oi, ra);
- }
- i += esize, pg >>= esize;
- addr += 2 * msize;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-}
-
-static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
- uint32_t desc, const uintptr_t ra,
- const int esize, const int msize,
- sve_st1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- intptr_t i, oprsz = simd_oprsz(desc);
- void *d1 = &env->vfp.zregs[rd];
- void *d2 = &env->vfp.zregs[(rd + 1) & 31];
- void *d3 = &env->vfp.zregs[(rd + 2) & 31];
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- tlb_fn(env, d1, i, addr, oi, ra);
- tlb_fn(env, d2, i, addr + msize, oi, ra);
- tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
- }
- i += esize, pg >>= esize;
- addr += 3 * msize;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-}
-
-static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
- uint32_t desc, const uintptr_t ra,
- const int esize, const int msize,
- sve_st1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
- intptr_t i, oprsz = simd_oprsz(desc);
- void *d1 = &env->vfp.zregs[rd];
- void *d2 = &env->vfp.zregs[(rd + 1) & 31];
- void *d3 = &env->vfp.zregs[(rd + 2) & 31];
- void *d4 = &env->vfp.zregs[(rd + 3) & 31];
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (pg & 1) {
- tlb_fn(env, d1, i, addr, oi, ra);
- tlb_fn(env, d2, i, addr + msize, oi, ra);
- tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
- tlb_fn(env, d4, i, addr + 3 * msize, oi, ra);
- }
- i += esize, pg >>= esize;
- addr += 4 * msize;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-}
-
-#define DO_STN_1(N, NAME, ESIZE) \
-void __attribute__((flatten)) HELPER(sve_st##N##NAME##_r) \
- (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
-{ \
- sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, 1, \
- sve_st1##NAME##_tlb); \
-}
-
-#define DO_STN_2(N, NAME, ESIZE, MSIZE) \
-void __attribute__((flatten)) HELPER(sve_st##N##NAME##_le_r) \
- (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
-{ \
- sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \
- sve_st1##NAME##_le_tlb); \
-} \
-void __attribute__((flatten)) HELPER(sve_st##N##NAME##_be_r) \
- (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc) \
-{ \
- sve_st##N##_r(env, vg, addr, desc, GETPC(), ESIZE, MSIZE, \
- sve_st1##NAME##_be_tlb); \
-}
-
-DO_STN_1(1, bb, 1)
-DO_STN_1(1, bh, 2)
-DO_STN_1(1, bs, 4)
-DO_STN_1(1, bd, 8)
-DO_STN_1(2, bb, 1)
-DO_STN_1(3, bb, 1)
-DO_STN_1(4, bb, 1)
-
-DO_STN_2(1, hh, 2, 2)
-DO_STN_2(1, hs, 4, 2)
-DO_STN_2(1, hd, 8, 2)
-DO_STN_2(2, hh, 2, 2)
-DO_STN_2(3, hh, 2, 2)
-DO_STN_2(4, hh, 2, 2)
-
-DO_STN_2(1, ss, 4, 4)
-DO_STN_2(1, sd, 8, 4)
-DO_STN_2(2, ss, 4, 4)
-DO_STN_2(3, ss, 4, 4)
-DO_STN_2(4, ss, 4, 4)
-
-DO_STN_2(1, dd, 8, 8)
-DO_STN_2(2, dd, 8, 8)
-DO_STN_2(3, dd, 8, 8)
-DO_STN_2(4, dd, 8, 8)
-
-#undef DO_STN_1
-#undef DO_STN_2
-
-/*
- * Loads with a vector index.
- */
-
-/*
- * Load the element at @reg + @reg_ofs, sign or zero-extend as needed.
- */
-typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs);
-
-static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs)
-{
- return *(uint32_t *)(reg + H1_4(reg_ofs));
-}
-
-static target_ulong off_zss_s(void *reg, intptr_t reg_ofs)
-{
- return *(int32_t *)(reg + H1_4(reg_ofs));
-}
-
-static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs)
-{
- return (uint32_t)*(uint64_t *)(reg + reg_ofs);
-}
-
-static target_ulong off_zss_d(void *reg, intptr_t reg_ofs)
-{
- return (int32_t)*(uint64_t *)(reg + reg_ofs);
-}
-
-static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
-{
- return *(uint64_t *)(reg + reg_ofs);
-}
-
-static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t i, oprsz = simd_oprsz(desc);
- ARMVectorReg scratch = { };
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (likely(pg & 1)) {
- target_ulong off = off_fn(vm, i);
- tlb_fn(env, &scratch, i, base + (off << scale), oi, ra);
- }
- i += 4, pg >>= 4;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-
- /* Wait until all exceptions have been raised to write back. */
- memcpy(vd, &scratch, oprsz);
-}
-
-static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t i, oprsz = simd_oprsz(desc) / 8;
- ARMVectorReg scratch = { };
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; i++) {
- uint8_t pg = *(uint8_t *)(vg + H1(i));
- if (likely(pg & 1)) {
- target_ulong off = off_fn(vm, i * 8);
- tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
- }
- }
- set_helper_retaddr(0);
-
- /* Wait until all exceptions have been raised to write back. */
- memcpy(vd, &scratch, oprsz * 8);
-}
-
-#define DO_LD1_ZPZ_S(MEM, OFS) \
-void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_ld1_zs(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_s, sve_ld1##MEM##_tlb); \
-}
-
-#define DO_LD1_ZPZ_D(MEM, OFS) \
-void __attribute__((flatten)) HELPER(sve_ld##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_ld1_zd(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_d, sve_ld1##MEM##_tlb); \
-}
-
-DO_LD1_ZPZ_S(bsu, zsu)
-DO_LD1_ZPZ_S(bsu, zss)
-DO_LD1_ZPZ_D(bdu, zsu)
-DO_LD1_ZPZ_D(bdu, zss)
-DO_LD1_ZPZ_D(bdu, zd)
-
-DO_LD1_ZPZ_S(bss, zsu)
-DO_LD1_ZPZ_S(bss, zss)
-DO_LD1_ZPZ_D(bds, zsu)
-DO_LD1_ZPZ_D(bds, zss)
-DO_LD1_ZPZ_D(bds, zd)
-
-DO_LD1_ZPZ_S(hsu_le, zsu)
-DO_LD1_ZPZ_S(hsu_le, zss)
-DO_LD1_ZPZ_D(hdu_le, zsu)
-DO_LD1_ZPZ_D(hdu_le, zss)
-DO_LD1_ZPZ_D(hdu_le, zd)
-
-DO_LD1_ZPZ_S(hsu_be, zsu)
-DO_LD1_ZPZ_S(hsu_be, zss)
-DO_LD1_ZPZ_D(hdu_be, zsu)
-DO_LD1_ZPZ_D(hdu_be, zss)
-DO_LD1_ZPZ_D(hdu_be, zd)
-
-DO_LD1_ZPZ_S(hss_le, zsu)
-DO_LD1_ZPZ_S(hss_le, zss)
-DO_LD1_ZPZ_D(hds_le, zsu)
-DO_LD1_ZPZ_D(hds_le, zss)
-DO_LD1_ZPZ_D(hds_le, zd)
-
-DO_LD1_ZPZ_S(hss_be, zsu)
-DO_LD1_ZPZ_S(hss_be, zss)
-DO_LD1_ZPZ_D(hds_be, zsu)
-DO_LD1_ZPZ_D(hds_be, zss)
-DO_LD1_ZPZ_D(hds_be, zd)
-
-DO_LD1_ZPZ_S(ss_le, zsu)
-DO_LD1_ZPZ_S(ss_le, zss)
-DO_LD1_ZPZ_D(sdu_le, zsu)
-DO_LD1_ZPZ_D(sdu_le, zss)
-DO_LD1_ZPZ_D(sdu_le, zd)
-
-DO_LD1_ZPZ_S(ss_be, zsu)
-DO_LD1_ZPZ_S(ss_be, zss)
-DO_LD1_ZPZ_D(sdu_be, zsu)
-DO_LD1_ZPZ_D(sdu_be, zss)
-DO_LD1_ZPZ_D(sdu_be, zd)
-
-DO_LD1_ZPZ_D(sds_le, zsu)
-DO_LD1_ZPZ_D(sds_le, zss)
-DO_LD1_ZPZ_D(sds_le, zd)
-
-DO_LD1_ZPZ_D(sds_be, zsu)
-DO_LD1_ZPZ_D(sds_be, zss)
-DO_LD1_ZPZ_D(sds_be, zd)
-
-DO_LD1_ZPZ_D(dd_le, zsu)
-DO_LD1_ZPZ_D(dd_le, zss)
-DO_LD1_ZPZ_D(dd_le, zd)
-
-DO_LD1_ZPZ_D(dd_be, zsu)
-DO_LD1_ZPZ_D(dd_be, zss)
-DO_LD1_ZPZ_D(dd_be, zd)
-
-#undef DO_LD1_ZPZ_S
-#undef DO_LD1_ZPZ_D
-
-/* First fault loads with a vector index. */
-
-/* Load one element into VD+REG_OFF from (ENV,VADDR) without faulting.
- * The controlling predicate is known to be true. Return true if the
- * load was successful.
- */
-typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off,
- target_ulong vaddr, int mmu_idx);
-
-#ifdef CONFIG_SOFTMMU
-#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
-static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
- target_ulong addr, int mmu_idx) \
-{ \
- target_ulong next_page = -(addr | TARGET_PAGE_MASK); \
- if (likely(next_page - addr >= sizeof(TYPEM))) { \
- void *host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx); \
- if (likely(host)) { \
- TYPEM val = HOST(host); \
- *(TYPEE *)(vd + H(reg_off)) = val; \
- return true; \
- } \
- } \
- return false; \
-}
-#else
-#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
-static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
- target_ulong addr, int mmu_idx) \
-{ \
- if (likely(page_check_range(addr, sizeof(TYPEM), PAGE_READ))) { \
- TYPEM val = HOST(g2h(addr)); \
- *(TYPEE *)(vd + H(reg_off)) = val; \
- return true; \
- } \
- return false; \
-}
-#endif
-
-DO_LD_NF(bsu, H1_4, uint32_t, uint8_t, ldub_p)
-DO_LD_NF(bss, H1_4, uint32_t, int8_t, ldsb_p)
-DO_LD_NF(bdu, , uint64_t, uint8_t, ldub_p)
-DO_LD_NF(bds, , uint64_t, int8_t, ldsb_p)
-
-DO_LD_NF(hsu_le, H1_4, uint32_t, uint16_t, lduw_le_p)
-DO_LD_NF(hss_le, H1_4, uint32_t, int16_t, ldsw_le_p)
-DO_LD_NF(hsu_be, H1_4, uint32_t, uint16_t, lduw_be_p)
-DO_LD_NF(hss_be, H1_4, uint32_t, int16_t, ldsw_be_p)
-DO_LD_NF(hdu_le, , uint64_t, uint16_t, lduw_le_p)
-DO_LD_NF(hds_le, , uint64_t, int16_t, ldsw_le_p)
-DO_LD_NF(hdu_be, , uint64_t, uint16_t, lduw_be_p)
-DO_LD_NF(hds_be, , uint64_t, int16_t, ldsw_be_p)
-
-DO_LD_NF(ss_le, H1_4, uint32_t, uint32_t, ldl_le_p)
-DO_LD_NF(ss_be, H1_4, uint32_t, uint32_t, ldl_be_p)
-DO_LD_NF(sdu_le, , uint64_t, uint32_t, ldl_le_p)
-DO_LD_NF(sds_le, , uint64_t, int32_t, ldl_le_p)
-DO_LD_NF(sdu_be, , uint64_t, uint32_t, ldl_be_p)
-DO_LD_NF(sds_be, , uint64_t, int32_t, ldl_be_p)
-
-DO_LD_NF(dd_le, , uint64_t, uint64_t, ldq_le_p)
-DO_LD_NF(dd_be, , uint64_t, uint64_t, ldq_be_p)
-
-/*
- * Common helper for all gather first-faulting loads.
- */
-static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
- sve_ld1_nf_fn *nonfault_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int mmu_idx = get_mmuidx(oi);
- const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t reg_off, reg_max = simd_oprsz(desc);
- target_ulong addr;
-
- /* Skip to the first true predicate. */
- reg_off = find_next_active(vg, 0, reg_max, MO_32);
- if (likely(reg_off < reg_max)) {
- /* Perform one normal read, which will fault or not. */
- set_helper_retaddr(ra);
- addr = off_fn(vm, reg_off);
- addr = base + (addr << scale);
- tlb_fn(env, vd, reg_off, addr, oi, ra);
-
- /* The rest of the reads will be non-faulting. */
- set_helper_retaddr(0);
- }
-
- /* After any fault, zero the leading predicated false elements. */
- swap_memzero(vd, reg_off);
-
- while (likely((reg_off += 4) < reg_max)) {
- uint64_t pg = *(uint64_t *)(vg + (reg_off >> 6) * 8);
- if (likely((pg >> (reg_off & 63)) & 1)) {
- addr = off_fn(vm, reg_off);
- addr = base + (addr << scale);
- if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
- record_fault(env, reg_off, reg_max);
- break;
- }
- } else {
- *(uint32_t *)(vd + H1_4(reg_off)) = 0;
- }
- }
-}
-
-static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
- sve_ld1_nf_fn *nonfault_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int mmu_idx = get_mmuidx(oi);
- const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t reg_off, reg_max = simd_oprsz(desc);
- target_ulong addr;
-
- /* Skip to the first true predicate. */
- reg_off = find_next_active(vg, 0, reg_max, MO_64);
- if (likely(reg_off < reg_max)) {
- /* Perform one normal read, which will fault or not. */
- set_helper_retaddr(ra);
- addr = off_fn(vm, reg_off);
- addr = base + (addr << scale);
- tlb_fn(env, vd, reg_off, addr, oi, ra);
-
- /* The rest of the reads will be non-faulting. */
- set_helper_retaddr(0);
- }
-
- /* After any fault, zero the leading predicated false elements. */
- swap_memzero(vd, reg_off);
-
- while (likely((reg_off += 8) < reg_max)) {
- uint8_t pg = *(uint8_t *)(vg + H1(reg_off >> 3));
- if (likely(pg & 1)) {
- addr = off_fn(vm, reg_off);
- addr = base + (addr << scale);
- if (!nonfault_fn(env, vd, reg_off, addr, mmu_idx)) {
- record_fault(env, reg_off, reg_max);
- break;
- }
- } else {
- *(uint64_t *)(vd + reg_off) = 0;
- }
- }
-}
-
-#define DO_LDFF1_ZPZ_S(MEM, OFS) \
-void HELPER(sve_ldff##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_ldff1_zs(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_s, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \
-}
-
-#define DO_LDFF1_ZPZ_D(MEM, OFS) \
-void HELPER(sve_ldff##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_ldff1_zd(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_d, sve_ld1##MEM##_tlb, sve_ld##MEM##_nf); \
-}
-
-DO_LDFF1_ZPZ_S(bsu, zsu)
-DO_LDFF1_ZPZ_S(bsu, zss)
-DO_LDFF1_ZPZ_D(bdu, zsu)
-DO_LDFF1_ZPZ_D(bdu, zss)
-DO_LDFF1_ZPZ_D(bdu, zd)
-
-DO_LDFF1_ZPZ_S(bss, zsu)
-DO_LDFF1_ZPZ_S(bss, zss)
-DO_LDFF1_ZPZ_D(bds, zsu)
-DO_LDFF1_ZPZ_D(bds, zss)
-DO_LDFF1_ZPZ_D(bds, zd)
-
-DO_LDFF1_ZPZ_S(hsu_le, zsu)
-DO_LDFF1_ZPZ_S(hsu_le, zss)
-DO_LDFF1_ZPZ_D(hdu_le, zsu)
-DO_LDFF1_ZPZ_D(hdu_le, zss)
-DO_LDFF1_ZPZ_D(hdu_le, zd)
-
-DO_LDFF1_ZPZ_S(hsu_be, zsu)
-DO_LDFF1_ZPZ_S(hsu_be, zss)
-DO_LDFF1_ZPZ_D(hdu_be, zsu)
-DO_LDFF1_ZPZ_D(hdu_be, zss)
-DO_LDFF1_ZPZ_D(hdu_be, zd)
-
-DO_LDFF1_ZPZ_S(hss_le, zsu)
-DO_LDFF1_ZPZ_S(hss_le, zss)
-DO_LDFF1_ZPZ_D(hds_le, zsu)
-DO_LDFF1_ZPZ_D(hds_le, zss)
-DO_LDFF1_ZPZ_D(hds_le, zd)
-
-DO_LDFF1_ZPZ_S(hss_be, zsu)
-DO_LDFF1_ZPZ_S(hss_be, zss)
-DO_LDFF1_ZPZ_D(hds_be, zsu)
-DO_LDFF1_ZPZ_D(hds_be, zss)
-DO_LDFF1_ZPZ_D(hds_be, zd)
-
-DO_LDFF1_ZPZ_S(ss_le, zsu)
-DO_LDFF1_ZPZ_S(ss_le, zss)
-DO_LDFF1_ZPZ_D(sdu_le, zsu)
-DO_LDFF1_ZPZ_D(sdu_le, zss)
-DO_LDFF1_ZPZ_D(sdu_le, zd)
-
-DO_LDFF1_ZPZ_S(ss_be, zsu)
-DO_LDFF1_ZPZ_S(ss_be, zss)
-DO_LDFF1_ZPZ_D(sdu_be, zsu)
-DO_LDFF1_ZPZ_D(sdu_be, zss)
-DO_LDFF1_ZPZ_D(sdu_be, zd)
-
-DO_LDFF1_ZPZ_D(sds_le, zsu)
-DO_LDFF1_ZPZ_D(sds_le, zss)
-DO_LDFF1_ZPZ_D(sds_le, zd)
-
-DO_LDFF1_ZPZ_D(sds_be, zsu)
-DO_LDFF1_ZPZ_D(sds_be, zss)
-DO_LDFF1_ZPZ_D(sds_be, zd)
-
-DO_LDFF1_ZPZ_D(dd_le, zsu)
-DO_LDFF1_ZPZ_D(dd_le, zss)
-DO_LDFF1_ZPZ_D(dd_le, zd)
-
-DO_LDFF1_ZPZ_D(dd_be, zsu)
-DO_LDFF1_ZPZ_D(dd_be, zss)
-DO_LDFF1_ZPZ_D(dd_be, zd)
-
-/* Stores with a vector index. */
-
-static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t i, oprsz = simd_oprsz(desc);
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; ) {
- uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
- do {
- if (likely(pg & 1)) {
- target_ulong off = off_fn(vm, i);
- tlb_fn(env, vd, i, base + (off << scale), oi, ra);
- }
- i += 4, pg >>= 4;
- } while (i & 15);
- }
- set_helper_retaddr(0);
-}
-
-static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
- target_ulong base, uint32_t desc, uintptr_t ra,
- zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
-{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
- intptr_t i, oprsz = simd_oprsz(desc) / 8;
-
- set_helper_retaddr(ra);
- for (i = 0; i < oprsz; i++) {
- uint8_t pg = *(uint8_t *)(vg + H1(i));
- if (likely(pg & 1)) {
- target_ulong off = off_fn(vm, i * 8);
- tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
- }
- }
- set_helper_retaddr(0);
-}
-
-#define DO_ST1_ZPZ_S(MEM, OFS) \
-void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_s, sve_st1##MEM##_tlb); \
-}
-
-#define DO_ST1_ZPZ_D(MEM, OFS) \
-void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \
- (CPUARMState *env, void *vd, void *vg, void *vm, \
- target_ulong base, uint32_t desc) \
-{ \
- sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \
- off_##OFS##_d, sve_st1##MEM##_tlb); \
-}
-
-DO_ST1_ZPZ_S(bs, zsu)
-DO_ST1_ZPZ_S(hs_le, zsu)
-DO_ST1_ZPZ_S(hs_be, zsu)
-DO_ST1_ZPZ_S(ss_le, zsu)
-DO_ST1_ZPZ_S(ss_be, zsu)
-
-DO_ST1_ZPZ_S(bs, zss)
-DO_ST1_ZPZ_S(hs_le, zss)
-DO_ST1_ZPZ_S(hs_be, zss)
-DO_ST1_ZPZ_S(ss_le, zss)
-DO_ST1_ZPZ_S(ss_be, zss)
-
-DO_ST1_ZPZ_D(bd, zsu)
-DO_ST1_ZPZ_D(hd_le, zsu)
-DO_ST1_ZPZ_D(hd_be, zsu)
-DO_ST1_ZPZ_D(sd_le, zsu)
-DO_ST1_ZPZ_D(sd_be, zsu)
-DO_ST1_ZPZ_D(dd_le, zsu)
-DO_ST1_ZPZ_D(dd_be, zsu)
-
-DO_ST1_ZPZ_D(bd, zss)
-DO_ST1_ZPZ_D(hd_le, zss)
-DO_ST1_ZPZ_D(hd_be, zss)
-DO_ST1_ZPZ_D(sd_le, zss)
-DO_ST1_ZPZ_D(sd_be, zss)
-DO_ST1_ZPZ_D(dd_le, zss)
-DO_ST1_ZPZ_D(dd_be, zss)
-
-DO_ST1_ZPZ_D(bd, zd)
-DO_ST1_ZPZ_D(hd_le, zd)
-DO_ST1_ZPZ_D(hd_be, zd)
-DO_ST1_ZPZ_D(sd_le, zd)
-DO_ST1_ZPZ_D(sd_be, zd)
-DO_ST1_ZPZ_D(dd_le, zd)
-DO_ST1_ZPZ_D(dd_be, zd)
-
-#undef DO_ST1_ZPZ_S
-#undef DO_ST1_ZPZ_D
diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h
new file mode 100644
index 0000000000..3244e0740d
--- /dev/null
+++ b/target/arm/syndrome.h
@@ -0,0 +1,369 @@
+/*
+ * QEMU ARM CPU -- syndrome functions and types
+ *
+ * Copyright (c) 2014 Linaro Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ *
+ * This header defines functions, types, etc which need to be shared
+ * between different source files within target/arm/ but which are
+ * private to it and not required by the rest of QEMU.
+ */
+
+#ifndef TARGET_ARM_SYNDROME_H
+#define TARGET_ARM_SYNDROME_H
+
+#include "qemu/bitops.h"
+
+/* Valid Syndrome Register EC field values */
+enum arm_exception_class {
+ EC_UNCATEGORIZED = 0x00,
+ EC_WFX_TRAP = 0x01,
+ EC_CP15RTTRAP = 0x03,
+ EC_CP15RRTTRAP = 0x04,
+ EC_CP14RTTRAP = 0x05,
+ EC_CP14DTTRAP = 0x06,
+ EC_ADVSIMDFPACCESSTRAP = 0x07,
+ EC_FPIDTRAP = 0x08,
+ EC_PACTRAP = 0x09,
+ EC_BXJTRAP = 0x0a,
+ EC_CP14RRTTRAP = 0x0c,
+ EC_BTITRAP = 0x0d,
+ EC_ILLEGALSTATE = 0x0e,
+ EC_AA32_SVC = 0x11,
+ EC_AA32_HVC = 0x12,
+ EC_AA32_SMC = 0x13,
+ EC_AA64_SVC = 0x15,
+ EC_AA64_HVC = 0x16,
+ EC_AA64_SMC = 0x17,
+ EC_SYSTEMREGISTERTRAP = 0x18,
+ EC_SVEACCESSTRAP = 0x19,
+ EC_ERETTRAP = 0x1a,
+ EC_PACFAIL = 0x1c,
+ EC_SMETRAP = 0x1d,
+ EC_GPC = 0x1e,
+ EC_INSNABORT = 0x20,
+ EC_INSNABORT_SAME_EL = 0x21,
+ EC_PCALIGNMENT = 0x22,
+ EC_DATAABORT = 0x24,
+ EC_DATAABORT_SAME_EL = 0x25,
+ EC_SPALIGNMENT = 0x26,
+ EC_MOP = 0x27,
+ EC_AA32_FPTRAP = 0x28,
+ EC_AA64_FPTRAP = 0x2c,
+ EC_SERROR = 0x2f,
+ EC_BREAKPOINT = 0x30,
+ EC_BREAKPOINT_SAME_EL = 0x31,
+ EC_SOFTWARESTEP = 0x32,
+ EC_SOFTWARESTEP_SAME_EL = 0x33,
+ EC_WATCHPOINT = 0x34,
+ EC_WATCHPOINT_SAME_EL = 0x35,
+ EC_AA32_BKPT = 0x38,
+ EC_VECTORCATCH = 0x3a,
+ EC_AA64_BKPT = 0x3c,
+};
+
+typedef enum {
+ SME_ET_AccessTrap,
+ SME_ET_Streaming,
+ SME_ET_NotStreaming,
+ SME_ET_InactiveZA,
+} SMEExceptionType;
+
+#define ARM_EL_EC_LENGTH 6
+#define ARM_EL_EC_SHIFT 26
+#define ARM_EL_IL_SHIFT 25
+#define ARM_EL_ISV_SHIFT 24
+#define ARM_EL_IL (1 << ARM_EL_IL_SHIFT)
+#define ARM_EL_ISV (1 << ARM_EL_ISV_SHIFT)
+
+/* In the Data Abort syndrome */
+#define ARM_EL_VNCR (1 << 13)
+
+static inline uint32_t syn_get_ec(uint32_t syn)
+{
+ return syn >> ARM_EL_EC_SHIFT;
+}
+
+static inline uint32_t syn_set_ec(uint32_t syn, uint32_t ec)
+{
+ return deposit32(syn, ARM_EL_EC_SHIFT, ARM_EL_EC_LENGTH, ec);
+}
+
+/*
+ * Utility functions for constructing various kinds of syndrome value.
+ * Note that in general we follow the AArch64 syndrome values; in a
+ * few cases the value in HSR for exceptions taken to AArch32 Hyp
+ * mode differs slightly, and we fix this up when populating HSR in
+ * arm_cpu_do_interrupt_aarch32_hyp().
+ * The exception is FP/SIMD access traps -- these report extra information
+ * when taking an exception to AArch32. For those we include the extra coproc
+ * and TA fields, and mask them out when taking the exception to AArch64.
+ */
+static inline uint32_t syn_uncategorized(void)
+{
+ return (EC_UNCATEGORIZED << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
+static inline uint32_t syn_aa64_svc(uint32_t imm16)
+{
+ return (EC_AA64_SVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa64_hvc(uint32_t imm16)
+{
+ return (EC_AA64_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa64_smc(uint32_t imm16)
+{
+ return (EC_AA64_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa32_svc(uint32_t imm16, bool is_16bit)
+{
+ return (EC_AA32_SVC << ARM_EL_EC_SHIFT) | (imm16 & 0xffff)
+ | (is_16bit ? 0 : ARM_EL_IL);
+}
+
+static inline uint32_t syn_aa32_hvc(uint32_t imm16)
+{
+ return (EC_AA32_HVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa32_smc(void)
+{
+ return (EC_AA32_SMC << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
+static inline uint32_t syn_aa64_bkpt(uint32_t imm16)
+{
+ return (EC_AA64_BKPT << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa32_bkpt(uint32_t imm16, bool is_16bit)
+{
+ return (EC_AA32_BKPT << ARM_EL_EC_SHIFT) | (imm16 & 0xffff)
+ | (is_16bit ? 0 : ARM_EL_IL);
+}
+
+static inline uint32_t syn_aa64_sysregtrap(int op0, int op1, int op2,
+ int crn, int crm, int rt,
+ int isread)
+{
+ return (EC_SYSTEMREGISTERTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL
+ | (op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (rt << 5)
+ | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp14_rt_trap(int cv, int cond, int opc1, int opc2,
+ int crn, int crm, int rt, int isread,
+ bool is_16bit)
+{
+ return (EC_CP14RTTRAP << ARM_EL_EC_SHIFT)
+ | (is_16bit ? 0 : ARM_EL_IL)
+ | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14)
+ | (crn << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp15_rt_trap(int cv, int cond, int opc1, int opc2,
+ int crn, int crm, int rt, int isread,
+ bool is_16bit)
+{
+ return (EC_CP15RTTRAP << ARM_EL_EC_SHIFT)
+ | (is_16bit ? 0 : ARM_EL_IL)
+ | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14)
+ | (crn << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp14_rrt_trap(int cv, int cond, int opc1, int crm,
+ int rt, int rt2, int isread,
+ bool is_16bit)
+{
+ return (EC_CP14RRTTRAP << ARM_EL_EC_SHIFT)
+ | (is_16bit ? 0 : ARM_EL_IL)
+ | (cv << 24) | (cond << 20) | (opc1 << 16)
+ | (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm,
+ int rt, int rt2, int isread,
+ bool is_16bit)
+{
+ return (EC_CP15RRTTRAP << ARM_EL_EC_SHIFT)
+ | (is_16bit ? 0 : ARM_EL_IL)
+ | (cv << 24) | (cond << 20) | (opc1 << 16)
+ | (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_16bit,
+ int coproc)
+{
+ /* AArch32 FP trap or any AArch64 FP/SIMD trap: TA == 0 */
+ return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT)
+ | (is_16bit ? 0 : ARM_EL_IL)
+ | (cv << 24) | (cond << 20) | coproc;
+}
+
+static inline uint32_t syn_simd_access_trap(int cv, int cond, bool is_16bit)
+{
+ /* AArch32 SIMD trap: TA == 1 coproc == 0 */
+ return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT)
+ | (is_16bit ? 0 : ARM_EL_IL)
+ | (cv << 24) | (cond << 20) | (1 << 5);
+}
+
+static inline uint32_t syn_sve_access_trap(void)
+{
+ return (EC_SVEACCESSTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
+/*
+ * eret_op is bits [1:0] of the ERET instruction, so:
+ * 0 for ERET, 2 for ERETAA, 3 for ERETAB.
+ */
+static inline uint32_t syn_erettrap(int eret_op)
+{
+ return (EC_ERETTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL | eret_op;
+}
+
+static inline uint32_t syn_smetrap(SMEExceptionType etype, bool is_16bit)
+{
+ return (EC_SMETRAP << ARM_EL_EC_SHIFT)
+ | (is_16bit ? 0 : ARM_EL_IL) | etype;
+}
+
+static inline uint32_t syn_pacfail(bool data, int keynumber)
+{
+ int error_code = (data << 1) | keynumber;
+ return (EC_PACFAIL << ARM_EL_EC_SHIFT) | ARM_EL_IL | error_code;
+}
+
+static inline uint32_t syn_pactrap(void)
+{
+ return (EC_PACTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
+static inline uint32_t syn_btitrap(int btype)
+{
+ return (EC_BTITRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL | btype;
+}
+
+static inline uint32_t syn_bxjtrap(int cv, int cond, int rm)
+{
+ return (EC_BXJTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL |
+ (cv << 24) | (cond << 20) | rm;
+}
+
+static inline uint32_t syn_gpc(int s2ptw, int ind, int gpcsc, int vncr,
+ int cm, int s1ptw, int wnr, int fsc)
+{
+ return (EC_GPC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (s2ptw << 21)
+ | (ind << 20) | (gpcsc << 14) | (vncr << 13) | (cm << 8)
+ | (s1ptw << 7) | (wnr << 6) | fsc;
+}
+
+static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
+{
+ return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+ | ARM_EL_IL | (ea << 9) | (s1ptw << 7) | fsc;
+}
+
+static inline uint32_t syn_data_abort_no_iss(int same_el, int fnv,
+ int ea, int cm, int s1ptw,
+ int wnr, int fsc)
+{
+ return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+ | ARM_EL_IL
+ | (fnv << 10) | (ea << 9) | (cm << 8) | (s1ptw << 7)
+ | (wnr << 6) | fsc;
+}
+
+static inline uint32_t syn_data_abort_with_iss(int same_el,
+ int sas, int sse, int srt,
+ int sf, int ar,
+ int ea, int cm, int s1ptw,
+ int wnr, int fsc,
+ bool is_16bit)
+{
+ return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+ | (is_16bit ? 0 : ARM_EL_IL)
+ | ARM_EL_ISV | (sas << 22) | (sse << 21) | (srt << 16)
+ | (sf << 15) | (ar << 14)
+ | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
+}
+
+/*
+ * Faults due to FEAT_NV2 VNCR_EL2-based accesses report as same-EL
+ * Data Aborts with the VNCR bit set.
+ */
+static inline uint32_t syn_data_abort_vncr(int ea, int wnr, int fsc)
+{
+ return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (1 << ARM_EL_EC_SHIFT)
+ | ARM_EL_IL | ARM_EL_VNCR | (wnr << 6) | fsc;
+}
+
+static inline uint32_t syn_swstep(int same_el, int isv, int ex)
+{
+ return (EC_SOFTWARESTEP << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+ | ARM_EL_IL | (isv << 24) | (ex << 6) | 0x22;
+}
+
+static inline uint32_t syn_watchpoint(int same_el, int cm, int wnr)
+{
+ return (EC_WATCHPOINT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+ | ARM_EL_IL | (cm << 8) | (wnr << 6) | 0x22;
+}
+
+static inline uint32_t syn_breakpoint(int same_el)
+{
+ return (EC_BREAKPOINT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+ | ARM_EL_IL | 0x22;
+}
+
+static inline uint32_t syn_wfx(int cv, int cond, int ti, bool is_16bit)
+{
+ return (EC_WFX_TRAP << ARM_EL_EC_SHIFT) |
+ (is_16bit ? 0 : (1 << ARM_EL_IL_SHIFT)) |
+ (cv << 24) | (cond << 20) | ti;
+}
+
+static inline uint32_t syn_illegalstate(void)
+{
+ return (EC_ILLEGALSTATE << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
+static inline uint32_t syn_pcalignment(void)
+{
+ return (EC_PCALIGNMENT << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
+static inline uint32_t syn_serror(uint32_t extra)
+{
+ return (EC_SERROR << ARM_EL_EC_SHIFT) | ARM_EL_IL | extra;
+}
+
+static inline uint32_t syn_mop(bool is_set, bool is_setg, int options,
+ bool epilogue, bool wrong_option, bool option_a,
+ int destreg, int srcreg, int sizereg)
+{
+ return (EC_MOP << ARM_EL_EC_SHIFT) | ARM_EL_IL |
+ (is_set << 24) | (is_setg << 23) | (options << 19) |
+ (epilogue << 18) | (wrong_option << 17) | (option_a << 16) |
+ (destreg << 10) | (srcreg << 5) | sizereg;
+}
+
+
+#endif /* TARGET_ARM_SYNDROME_H */
diff --git a/target/arm/tcg-stubs.c b/target/arm/tcg-stubs.c
new file mode 100644
index 0000000000..152b172e24
--- /dev/null
+++ b/target/arm/tcg-stubs.c
@@ -0,0 +1,27 @@
+/*
+ * QEMU ARM stubs for some TCG helper functions
+ *
+ * Copyright 2021 SUSE LLC
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+
+void write_v7m_exception(CPUARMState *env, uint32_t new_exc)
+{
+ g_assert_not_reached();
+}
+
+void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
+ uint32_t target_el, uintptr_t ra)
+{
+ g_assert_not_reached();
+}
+/* Temporarily while cpu_get_tb_cpu_state() is still in common code */
+void assert_hflags_rebuild_correctly(CPUARMState *env)
+{
+}
diff --git a/target/arm/tcg/a32-uncond.decode b/target/arm/tcg/a32-uncond.decode
new file mode 100644
index 0000000000..2339de2e94
--- /dev/null
+++ b/target/arm/tcg/a32-uncond.decode
@@ -0,0 +1,74 @@
+# A32 unconditional instructions
+#
+# Copyright (c) 2019 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+# All insns that have 0xf in insn[31:28] are decoded here.
+# All of those that have a COND field in insn[31:28] are in a32.decode
+#
+
+&empty !extern
+&i !extern imm
+&setend E
+
+# Branch with Link and Exchange
+
+%imm24h 0:s24 24:1 !function=times_2
+
+BLX_i 1111 101 . ........................ &i imm=%imm24h
+
+# System Instructions
+
+&rfe rn w pu
+&srs mode w pu
+&cps mode imod M A I F
+
+RFE 1111 100 pu:2 0 w:1 1 rn:4 0000 1010 0000 0000 &rfe
+SRS 1111 100 pu:2 1 w:1 0 1101 0000 0101 000 mode:5 &srs
+CPS 1111 0001 0000 imod:2 M:1 0 0000 000 A:1 I:1 F:1 0 mode:5 \
+ &cps
+
+# Clear-Exclusive, Barriers
+
+# QEMU does not require the option field for the barriers.
+CLREX 1111 0101 0111 1111 1111 0000 0001 1111
+DSB 1111 0101 0111 1111 1111 0000 0100 ----
+DMB 1111 0101 0111 1111 1111 0000 0101 ----
+ISB 1111 0101 0111 1111 1111 0000 0110 ----
+SB 1111 0101 0111 1111 1111 0000 0111 0000
+
+# Set Endianness
+SETEND 1111 0001 0000 0001 0000 00 E:1 0 0000 0000 &setend
+
+# Preload instructions
+
+PLD 1111 0101 -101 ---- 1111 ---- ---- ---- # (imm, lit) 5te
+PLDW 1111 0101 -001 ---- 1111 ---- ---- ---- # (imm, lit) 7mp
+PLI 1111 0100 -101 ---- 1111 ---- ---- ---- # (imm, lit) 7
+
+PLD 1111 0111 -101 ---- 1111 ----- -- 0 ---- # (register) 5te
+PLDW 1111 0111 -001 ---- 1111 ----- -- 0 ---- # (register) 7mp
+PLI 1111 0110 -101 ---- 1111 ----- -- 0 ---- # (register) 7
+
+# Unallocated memory hints
+#
+# Since these are v7MP nops, and PLDW is v7MP and implemented as nop,
+# (ab)use the PLDW helper.
+
+PLDW 1111 0100 -001 ---- ---- ---- ---- ----
+PLDW 1111 0110 -001 ---- ---- ---- ---0 ----
diff --git a/target/arm/tcg/a32.decode b/target/arm/tcg/a32.decode
new file mode 100644
index 0000000000..f2ca480949
--- /dev/null
+++ b/target/arm/tcg/a32.decode
@@ -0,0 +1,557 @@
+# A32 conditional instructions
+#
+# Copyright (c) 2019 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+# All of the insn that have a COND field in insn[31:28] are here.
+# All insns that have 0xf in insn[31:28] are in a32-uncond.decode.
+#
+
+&empty
+&s_rrr_shi s rd rn rm shim shty
+&s_rrr_shr s rn rd rm rs shty
+&s_rri_rot s rn rd imm rot
+&s_rrrr s rd rn rm ra
+&rrrr rd rn rm ra
+&rrr_rot rd rn rm rot
+&rrr rd rn rm
+&rr rd rm
+&ri rd imm
+&r rm
+&i imm
+&msr_reg rn r mask
+&mrs_reg rd r
+&msr_bank rn r sysm
+&mrs_bank rd r sysm
+&ldst_rr p w u rn rt rm shimm shtype
+&ldst_ri p w u rn rt imm
+&ldst_block rn i b u w list
+&strex rn rd rt rt2 imm
+&ldrex rn rt rt2 imm
+&bfx rd rn lsb widthm1
+&bfi rd rn lsb msb
+&sat rd rn satimm imm sh
+&pkh rd rn rm imm tb
+&mcr cp opc1 crn crm opc2 rt
+&mcrr cp opc1 crm rt rt2
+
+# Data-processing (register)
+
+@s_rrr_shi ---- ... .... s:1 rn:4 rd:4 shim:5 shty:2 . rm:4 \
+ &s_rrr_shi
+@s_rxr_shi ---- ... .... s:1 .... rd:4 shim:5 shty:2 . rm:4 \
+ &s_rrr_shi rn=0
+@S_xrr_shi ---- ... .... . rn:4 .... shim:5 shty:2 . rm:4 \
+ &s_rrr_shi s=1 rd=0
+
+AND_rrri .... 000 0000 . .... .... ..... .. 0 .... @s_rrr_shi
+EOR_rrri .... 000 0001 . .... .... ..... .. 0 .... @s_rrr_shi
+SUB_rrri .... 000 0010 . .... .... ..... .. 0 .... @s_rrr_shi
+RSB_rrri .... 000 0011 . .... .... ..... .. 0 .... @s_rrr_shi
+ADD_rrri .... 000 0100 . .... .... ..... .. 0 .... @s_rrr_shi
+ADC_rrri .... 000 0101 . .... .... ..... .. 0 .... @s_rrr_shi
+SBC_rrri .... 000 0110 . .... .... ..... .. 0 .... @s_rrr_shi
+RSC_rrri .... 000 0111 . .... .... ..... .. 0 .... @s_rrr_shi
+TST_xrri .... 000 1000 1 .... 0000 ..... .. 0 .... @S_xrr_shi
+TEQ_xrri .... 000 1001 1 .... 0000 ..... .. 0 .... @S_xrr_shi
+CMP_xrri .... 000 1010 1 .... 0000 ..... .. 0 .... @S_xrr_shi
+CMN_xrri .... 000 1011 1 .... 0000 ..... .. 0 .... @S_xrr_shi
+ORR_rrri .... 000 1100 . .... .... ..... .. 0 .... @s_rrr_shi
+MOV_rxri .... 000 1101 . 0000 .... ..... .. 0 .... @s_rxr_shi
+BIC_rrri .... 000 1110 . .... .... ..... .. 0 .... @s_rrr_shi
+MVN_rxri .... 000 1111 . 0000 .... ..... .. 0 .... @s_rxr_shi
+
+%imm16 16:4 0:12
+@mov16 ---- .... .... .... rd:4 ............ &ri imm=%imm16
+
+MOVW .... 0011 0000 .... .... ............ @mov16
+MOVT .... 0011 0100 .... .... ............ @mov16
+
+# Data-processing (register-shifted register)
+
+@s_rrr_shr ---- ... .... s:1 rn:4 rd:4 rs:4 . shty:2 . rm:4 \
+ &s_rrr_shr
+@s_rxr_shr ---- ... .... s:1 .... rd:4 rs:4 . shty:2 . rm:4 \
+ &s_rrr_shr rn=0
+@S_xrr_shr ---- ... .... . rn:4 .... rs:4 . shty:2 . rm:4 \
+ &s_rrr_shr rd=0 s=1
+
+AND_rrrr .... 000 0000 . .... .... .... 0 .. 1 .... @s_rrr_shr
+EOR_rrrr .... 000 0001 . .... .... .... 0 .. 1 .... @s_rrr_shr
+SUB_rrrr .... 000 0010 . .... .... .... 0 .. 1 .... @s_rrr_shr
+RSB_rrrr .... 000 0011 . .... .... .... 0 .. 1 .... @s_rrr_shr
+ADD_rrrr .... 000 0100 . .... .... .... 0 .. 1 .... @s_rrr_shr
+ADC_rrrr .... 000 0101 . .... .... .... 0 .. 1 .... @s_rrr_shr
+SBC_rrrr .... 000 0110 . .... .... .... 0 .. 1 .... @s_rrr_shr
+RSC_rrrr .... 000 0111 . .... .... .... 0 .. 1 .... @s_rrr_shr
+TST_xrrr .... 000 1000 1 .... 0000 .... 0 .. 1 .... @S_xrr_shr
+TEQ_xrrr .... 000 1001 1 .... 0000 .... 0 .. 1 .... @S_xrr_shr
+CMP_xrrr .... 000 1010 1 .... 0000 .... 0 .. 1 .... @S_xrr_shr
+CMN_xrrr .... 000 1011 1 .... 0000 .... 0 .. 1 .... @S_xrr_shr
+ORR_rrrr .... 000 1100 . .... .... .... 0 .. 1 .... @s_rrr_shr
+MOV_rxrr .... 000 1101 . 0000 .... .... 0 .. 1 .... @s_rxr_shr
+BIC_rrrr .... 000 1110 . .... .... .... 0 .. 1 .... @s_rrr_shr
+MVN_rxrr .... 000 1111 . 0000 .... .... 0 .. 1 .... @s_rxr_shr
+
+# Data-processing (immediate)
+
+%a32extrot 8:4 !function=times_2
+
+@s_rri_rot ---- ... .... s:1 rn:4 rd:4 .... imm:8 \
+ &s_rri_rot rot=%a32extrot
+@s_rxi_rot ---- ... .... s:1 .... rd:4 .... imm:8 \
+ &s_rri_rot rot=%a32extrot rn=0
+@S_xri_rot ---- ... .... . rn:4 .... .... imm:8 \
+ &s_rri_rot rot=%a32extrot rd=0 s=1
+
+AND_rri .... 001 0000 . .... .... ............ @s_rri_rot
+EOR_rri .... 001 0001 . .... .... ............ @s_rri_rot
+SUB_rri .... 001 0010 . .... .... ............ @s_rri_rot
+RSB_rri .... 001 0011 . .... .... ............ @s_rri_rot
+ADD_rri .... 001 0100 . .... .... ............ @s_rri_rot
+ADC_rri .... 001 0101 . .... .... ............ @s_rri_rot
+SBC_rri .... 001 0110 . .... .... ............ @s_rri_rot
+RSC_rri .... 001 0111 . .... .... ............ @s_rri_rot
+TST_xri .... 001 1000 1 .... 0000 ............ @S_xri_rot
+TEQ_xri .... 001 1001 1 .... 0000 ............ @S_xri_rot
+CMP_xri .... 001 1010 1 .... 0000 ............ @S_xri_rot
+CMN_xri .... 001 1011 1 .... 0000 ............ @S_xri_rot
+ORR_rri .... 001 1100 . .... .... ............ @s_rri_rot
+MOV_rxi .... 001 1101 . 0000 .... ............ @s_rxi_rot
+BIC_rri .... 001 1110 . .... .... ............ @s_rri_rot
+MVN_rxi .... 001 1111 . 0000 .... ............ @s_rxi_rot
+
+# Multiply and multiply accumulate
+
+@s_rdamn ---- .... ... s:1 rd:4 ra:4 rm:4 .... rn:4 &s_rrrr
+@s_rd0mn ---- .... ... s:1 rd:4 .... rm:4 .... rn:4 &s_rrrr ra=0
+@rdamn ---- .... ... . rd:4 ra:4 rm:4 .... rn:4 &rrrr
+@rd0mn ---- .... ... . rd:4 .... rm:4 .... rn:4 &rrrr ra=0
+
+MUL .... 0000 000 . .... 0000 .... 1001 .... @s_rd0mn
+MLA .... 0000 001 . .... .... .... 1001 .... @s_rdamn
+UMAAL .... 0000 010 0 .... .... .... 1001 .... @rdamn
+MLS .... 0000 011 0 .... .... .... 1001 .... @rdamn
+UMULL .... 0000 100 . .... .... .... 1001 .... @s_rdamn
+UMLAL .... 0000 101 . .... .... .... 1001 .... @s_rdamn
+SMULL .... 0000 110 . .... .... .... 1001 .... @s_rdamn
+SMLAL .... 0000 111 . .... .... .... 1001 .... @s_rdamn
+
+# Saturating addition and subtraction
+
+@rndm ---- .... .... rn:4 rd:4 .... .... rm:4 &rrr
+
+QADD .... 0001 0000 .... .... 0000 0101 .... @rndm
+QSUB .... 0001 0010 .... .... 0000 0101 .... @rndm
+QDADD .... 0001 0100 .... .... 0000 0101 .... @rndm
+QDSUB .... 0001 0110 .... .... 0000 0101 .... @rndm
+
+# Halfword multiply and multiply accumulate
+
+SMLABB .... 0001 0000 .... .... .... 1000 .... @rdamn
+SMLABT .... 0001 0000 .... .... .... 1100 .... @rdamn
+SMLATB .... 0001 0000 .... .... .... 1010 .... @rdamn
+SMLATT .... 0001 0000 .... .... .... 1110 .... @rdamn
+SMLAWB .... 0001 0010 .... .... .... 1000 .... @rdamn
+SMULWB .... 0001 0010 .... 0000 .... 1010 .... @rd0mn
+SMLAWT .... 0001 0010 .... .... .... 1100 .... @rdamn
+SMULWT .... 0001 0010 .... 0000 .... 1110 .... @rd0mn
+SMLALBB .... 0001 0100 .... .... .... 1000 .... @rdamn
+SMLALBT .... 0001 0100 .... .... .... 1100 .... @rdamn
+SMLALTB .... 0001 0100 .... .... .... 1010 .... @rdamn
+SMLALTT .... 0001 0100 .... .... .... 1110 .... @rdamn
+SMULBB .... 0001 0110 .... 0000 .... 1000 .... @rd0mn
+SMULBT .... 0001 0110 .... 0000 .... 1100 .... @rd0mn
+SMULTB .... 0001 0110 .... 0000 .... 1010 .... @rd0mn
+SMULTT .... 0001 0110 .... 0000 .... 1110 .... @rd0mn
+
+# MSR (immediate) and hints
+
+&msr_i r mask rot imm
+@msr_i ---- .... .... mask:4 .... rot:4 imm:8 &msr_i
+
+{
+ {
+ [
+ YIELD ---- 0011 0010 0000 1111 ---- 0000 0001
+ WFE ---- 0011 0010 0000 1111 ---- 0000 0010
+ WFI ---- 0011 0010 0000 1111 ---- 0000 0011
+
+ # TODO: Implement SEV, SEVL; may help SMP performance.
+ # SEV ---- 0011 0010 0000 1111 ---- 0000 0100
+ # SEVL ---- 0011 0010 0000 1111 ---- 0000 0101
+
+ ESB ---- 0011 0010 0000 1111 ---- 0001 0000
+ ]
+
+ # The canonical nop ends in 00000000, but the whole of the
+ # rest of the space executes as nop if otherwise unsupported.
+ NOP ---- 0011 0010 0000 1111 ---- ---- ----
+ }
+ # Note mask = 0 is covered by NOP
+ MSR_imm .... 0011 0010 .... 1111 .... .... .... @msr_i r=0
+}
+MSR_imm .... 0011 0110 .... 1111 .... .... .... @msr_i r=1
+
+# Cyclic Redundancy Check
+
+CRC32B .... 0001 0000 .... .... 0000 0100 .... @rndm
+CRC32H .... 0001 0010 .... .... 0000 0100 .... @rndm
+CRC32W .... 0001 0100 .... .... 0000 0100 .... @rndm
+CRC32CB .... 0001 0000 .... .... 0010 0100 .... @rndm
+CRC32CH .... 0001 0010 .... .... 0010 0100 .... @rndm
+CRC32CW .... 0001 0100 .... .... 0010 0100 .... @rndm
+
+# Miscellaneous instructions
+
+%sysm 8:1 16:4
+%imm16_8_0 8:12 0:4
+
+@rm ---- .... .... .... .... .... .... rm:4 &r
+@rdm ---- .... .... .... rd:4 .... .... rm:4 &rr
+@i16 ---- .... .... .... .... .... .... .... &i imm=%imm16_8_0
+
+MRS_bank ---- 0001 0 r:1 00 .... rd:4 001. 0000 0000 &mrs_bank %sysm
+MSR_bank ---- 0001 0 r:1 10 .... 1111 001. 0000 rn:4 &msr_bank %sysm
+
+MRS_reg ---- 0001 0 r:1 00 1111 rd:4 0000 0000 0000 &mrs_reg
+MSR_reg ---- 0001 0 r:1 10 mask:4 1111 0000 0000 rn:4 &msr_reg
+
+BX .... 0001 0010 1111 1111 1111 0001 .... @rm
+BXJ .... 0001 0010 1111 1111 1111 0010 .... @rm
+BLX_r .... 0001 0010 1111 1111 1111 0011 .... @rm
+
+CLZ .... 0001 0110 1111 .... 1111 0001 .... @rdm
+
+ERET ---- 0001 0110 0000 0000 0000 0110 1110
+
+HLT .... 0001 0000 .... .... .... 0111 .... @i16
+BKPT .... 0001 0010 .... .... .... 0111 .... @i16
+HVC .... 0001 0100 .... .... .... 0111 .... @i16
+SMC ---- 0001 0110 0000 0000 0000 0111 imm:4 &i
+
+# Load/Store Dual, Half, Signed Byte (register)
+
+@ldst_rr_p1w ---- ...1 u:1 . w:1 . rn:4 rt:4 .... .... rm:4 \
+ &ldst_rr p=1 shimm=0 shtype=0
+@ldst_rr_pw0 ---- ...0 u:1 . 0 . rn:4 rt:4 .... .... rm:4 \
+ &ldst_rr p=0 w=0 shimm=0 shtype=0
+
+STRH_rr .... 000. .0.0 .... .... 0000 1011 .... @ldst_rr_pw0
+STRH_rr .... 000. .0.0 .... .... 0000 1011 .... @ldst_rr_p1w
+
+LDRD_rr .... 000. .0.0 .... .... 0000 1101 .... @ldst_rr_pw0
+LDRD_rr .... 000. .0.0 .... .... 0000 1101 .... @ldst_rr_p1w
+
+STRD_rr .... 000. .0.0 .... .... 0000 1111 .... @ldst_rr_pw0
+STRD_rr .... 000. .0.0 .... .... 0000 1111 .... @ldst_rr_p1w
+
+LDRH_rr .... 000. .0.1 .... .... 0000 1011 .... @ldst_rr_pw0
+LDRH_rr .... 000. .0.1 .... .... 0000 1011 .... @ldst_rr_p1w
+
+LDRSB_rr .... 000. .0.1 .... .... 0000 1101 .... @ldst_rr_pw0
+LDRSB_rr .... 000. .0.1 .... .... 0000 1101 .... @ldst_rr_p1w
+
+LDRSH_rr .... 000. .0.1 .... .... 0000 1111 .... @ldst_rr_pw0
+LDRSH_rr .... 000. .0.1 .... .... 0000 1111 .... @ldst_rr_p1w
+
+# Note the unpriv load/stores use the previously invalid P=0, W=1 encoding,
+# and act as normal post-indexed (P=0, W=0).
+@ldst_rr_p0w1 ---- ...0 u:1 . 1 . rn:4 rt:4 .... .... rm:4 \
+ &ldst_rr p=0 w=0 shimm=0 shtype=0
+
+STRHT_rr .... 000. .0.0 .... .... 0000 1011 .... @ldst_rr_p0w1
+LDRHT_rr .... 000. .0.1 .... .... 0000 1011 .... @ldst_rr_p0w1
+LDRSBT_rr .... 000. .0.1 .... .... 0000 1101 .... @ldst_rr_p0w1
+LDRSHT_rr .... 000. .0.1 .... .... 0000 1111 .... @ldst_rr_p0w1
+
+# Load/Store word and unsigned byte (register)
+
+@ldst_rs_p1w ---- ...1 u:1 . w:1 . rn:4 rt:4 shimm:5 shtype:2 . rm:4 \
+ &ldst_rr p=1
+@ldst_rs_pw0 ---- ...0 u:1 . 0 . rn:4 rt:4 shimm:5 shtype:2 . rm:4 \
+ &ldst_rr p=0 w=0
+
+STR_rr .... 011. .0.0 .... .... .... ...0 .... @ldst_rs_pw0
+STR_rr .... 011. .0.0 .... .... .... ...0 .... @ldst_rs_p1w
+STRB_rr .... 011. .1.0 .... .... .... ...0 .... @ldst_rs_pw0
+STRB_rr .... 011. .1.0 .... .... .... ...0 .... @ldst_rs_p1w
+
+LDR_rr .... 011. .0.1 .... .... .... ...0 .... @ldst_rs_pw0
+LDR_rr .... 011. .0.1 .... .... .... ...0 .... @ldst_rs_p1w
+LDRB_rr .... 011. .1.1 .... .... .... ...0 .... @ldst_rs_pw0
+LDRB_rr .... 011. .1.1 .... .... .... ...0 .... @ldst_rs_p1w
+
+@ldst_rs_p0w1 ---- ...0 u:1 . 1 . rn:4 rt:4 shimm:5 shtype:2 . rm:4 \
+ &ldst_rr p=0 w=0
+
+STRT_rr .... 011. .0.0 .... .... .... ...0 .... @ldst_rs_p0w1
+STRBT_rr .... 011. .1.0 .... .... .... ...0 .... @ldst_rs_p0w1
+LDRT_rr .... 011. .0.1 .... .... .... ...0 .... @ldst_rs_p0w1
+LDRBT_rr .... 011. .1.1 .... .... .... ...0 .... @ldst_rs_p0w1
+
+# Load/Store Dual, Half, Signed Byte (immediate)
+
+%imm8s_8_0 8:4 0:4
+@ldst_ri8_p1w ---- ...1 u:1 . w:1 . rn:4 rt:4 .... .... .... \
+ &ldst_ri imm=%imm8s_8_0 p=1
+@ldst_ri8_pw0 ---- ...0 u:1 . 0 . rn:4 rt:4 .... .... .... \
+ &ldst_ri imm=%imm8s_8_0 p=0 w=0
+
+STRH_ri .... 000. .1.0 .... .... .... 1011 .... @ldst_ri8_pw0
+STRH_ri .... 000. .1.0 .... .... .... 1011 .... @ldst_ri8_p1w
+
+LDRD_ri_a32 .... 000. .1.0 .... .... .... 1101 .... @ldst_ri8_pw0
+LDRD_ri_a32 .... 000. .1.0 .... .... .... 1101 .... @ldst_ri8_p1w
+
+STRD_ri_a32 .... 000. .1.0 .... .... .... 1111 .... @ldst_ri8_pw0
+STRD_ri_a32 .... 000. .1.0 .... .... .... 1111 .... @ldst_ri8_p1w
+
+LDRH_ri .... 000. .1.1 .... .... .... 1011 .... @ldst_ri8_pw0
+LDRH_ri .... 000. .1.1 .... .... .... 1011 .... @ldst_ri8_p1w
+
+LDRSB_ri .... 000. .1.1 .... .... .... 1101 .... @ldst_ri8_pw0
+LDRSB_ri .... 000. .1.1 .... .... .... 1101 .... @ldst_ri8_p1w
+
+LDRSH_ri .... 000. .1.1 .... .... .... 1111 .... @ldst_ri8_pw0
+LDRSH_ri .... 000. .1.1 .... .... .... 1111 .... @ldst_ri8_p1w
+
+# Note the unpriv load/stores use the previously invalid P=0, W=1 encoding,
+# and act as normal post-indexed (P=0, W=0).
+@ldst_ri8_p0w1 ---- ...0 u:1 . 1 . rn:4 rt:4 .... .... .... \
+ &ldst_ri imm=%imm8s_8_0 p=0 w=0
+
+STRHT_ri .... 000. .1.0 .... .... .... 1011 .... @ldst_ri8_p0w1
+LDRHT_ri .... 000. .1.1 .... .... .... 1011 .... @ldst_ri8_p0w1
+LDRSBT_ri .... 000. .1.1 .... .... .... 1101 .... @ldst_ri8_p0w1
+LDRSHT_ri .... 000. .1.1 .... .... .... 1111 .... @ldst_ri8_p0w1
+
+# Load/Store word and unsigned byte (immediate)
+
+@ldst_ri12_p1w ---- ...1 u:1 . w:1 . rn:4 rt:4 imm:12 &ldst_ri p=1
+@ldst_ri12_pw0 ---- ...0 u:1 . 0 . rn:4 rt:4 imm:12 &ldst_ri p=0 w=0
+
+STR_ri .... 010. .0.0 .... .... ............ @ldst_ri12_p1w
+STR_ri .... 010. .0.0 .... .... ............ @ldst_ri12_pw0
+STRB_ri .... 010. .1.0 .... .... ............ @ldst_ri12_p1w
+STRB_ri .... 010. .1.0 .... .... ............ @ldst_ri12_pw0
+
+LDR_ri .... 010. .0.1 .... .... ............ @ldst_ri12_p1w
+LDR_ri .... 010. .0.1 .... .... ............ @ldst_ri12_pw0
+LDRB_ri .... 010. .1.1 .... .... ............ @ldst_ri12_p1w
+LDRB_ri .... 010. .1.1 .... .... ............ @ldst_ri12_pw0
+
+@ldst_ri12_p0w1 ---- ...0 u:1 . 1 . rn:4 rt:4 imm:12 &ldst_ri p=0 w=0
+
+STRT_ri .... 010. .0.0 .... .... ............ @ldst_ri12_p0w1
+STRBT_ri .... 010. .1.0 .... .... ............ @ldst_ri12_p0w1
+LDRT_ri .... 010. .0.1 .... .... ............ @ldst_ri12_p0w1
+LDRBT_ri .... 010. .1.1 .... .... ............ @ldst_ri12_p0w1
+
+# Synchronization primitives
+
+@swp ---- .... .... rn:4 rt:4 .... .... rt2:4
+
+SWP .... 0001 0000 .... .... 0000 1001 .... @swp
+SWPB .... 0001 0100 .... .... 0000 1001 .... @swp
+
+# Load/Store Exclusive and Load-Acquire/Store-Release
+#
+# Note rt2 for STREXD/LDREXD is set by the helper after checking rt is even.
+
+@strex ---- .... .... rn:4 rd:4 .... .... rt:4 \
+ &strex imm=0 rt2=15
+@ldrex ---- .... .... rn:4 rt:4 .... .... .... \
+ &ldrex imm=0 rt2=15
+@stl ---- .... .... rn:4 .... .... .... rt:4 \
+ &ldrex imm=0 rt2=15
+
+STREX .... 0001 1000 .... .... 1111 1001 .... @strex
+STREXD_a32 .... 0001 1010 .... .... 1111 1001 .... @strex
+STREXB .... 0001 1100 .... .... 1111 1001 .... @strex
+STREXH .... 0001 1110 .... .... 1111 1001 .... @strex
+
+STLEX .... 0001 1000 .... .... 1110 1001 .... @strex
+STLEXD_a32 .... 0001 1010 .... .... 1110 1001 .... @strex
+STLEXB .... 0001 1100 .... .... 1110 1001 .... @strex
+STLEXH .... 0001 1110 .... .... 1110 1001 .... @strex
+
+STL .... 0001 1000 .... 1111 1100 1001 .... @stl
+STLB .... 0001 1100 .... 1111 1100 1001 .... @stl
+STLH .... 0001 1110 .... 1111 1100 1001 .... @stl
+
+LDREX .... 0001 1001 .... .... 1111 1001 1111 @ldrex
+LDREXD_a32 .... 0001 1011 .... .... 1111 1001 1111 @ldrex
+LDREXB .... 0001 1101 .... .... 1111 1001 1111 @ldrex
+LDREXH .... 0001 1111 .... .... 1111 1001 1111 @ldrex
+
+LDAEX .... 0001 1001 .... .... 1110 1001 1111 @ldrex
+LDAEXD_a32 .... 0001 1011 .... .... 1110 1001 1111 @ldrex
+LDAEXB .... 0001 1101 .... .... 1110 1001 1111 @ldrex
+LDAEXH .... 0001 1111 .... .... 1110 1001 1111 @ldrex
+
+LDA .... 0001 1001 .... .... 1100 1001 1111 @ldrex
+LDAB .... 0001 1101 .... .... 1100 1001 1111 @ldrex
+LDAH .... 0001 1111 .... .... 1100 1001 1111 @ldrex
+
+# Media instructions
+
+# usad8 is usada8 w/ ra=15
+USADA8 ---- 0111 1000 rd:4 ra:4 rm:4 0001 rn:4
+
+# ubfx and sbfx
+@bfx ---- .... ... widthm1:5 rd:4 lsb:5 ... rn:4 &bfx
+
+SBFX .... 0111 101 ..... .... ..... 101 .... @bfx
+UBFX .... 0111 111 ..... .... ..... 101 .... @bfx
+
+# bfc is bfi w/ rn=15
+BFCI ---- 0111 110 msb:5 rd:4 lsb:5 001 rn:4 &bfi
+
+# While we could get UDEF by not including this, add the pattern for
+# documentation and to conflict with any other typos in this file.
+UDF 1110 0111 1111 ---- ---- ---- 1111 ----
+
+# Parallel addition and subtraction
+
+SADD16 .... 0110 0001 .... .... 1111 0001 .... @rndm
+SASX .... 0110 0001 .... .... 1111 0011 .... @rndm
+SSAX .... 0110 0001 .... .... 1111 0101 .... @rndm
+SSUB16 .... 0110 0001 .... .... 1111 0111 .... @rndm
+SADD8 .... 0110 0001 .... .... 1111 1001 .... @rndm
+SSUB8 .... 0110 0001 .... .... 1111 1111 .... @rndm
+
+QADD16 .... 0110 0010 .... .... 1111 0001 .... @rndm
+QASX .... 0110 0010 .... .... 1111 0011 .... @rndm
+QSAX .... 0110 0010 .... .... 1111 0101 .... @rndm
+QSUB16 .... 0110 0010 .... .... 1111 0111 .... @rndm
+QADD8 .... 0110 0010 .... .... 1111 1001 .... @rndm
+QSUB8 .... 0110 0010 .... .... 1111 1111 .... @rndm
+
+SHADD16 .... 0110 0011 .... .... 1111 0001 .... @rndm
+SHASX .... 0110 0011 .... .... 1111 0011 .... @rndm
+SHSAX .... 0110 0011 .... .... 1111 0101 .... @rndm
+SHSUB16 .... 0110 0011 .... .... 1111 0111 .... @rndm
+SHADD8 .... 0110 0011 .... .... 1111 1001 .... @rndm
+SHSUB8 .... 0110 0011 .... .... 1111 1111 .... @rndm
+
+UADD16 .... 0110 0101 .... .... 1111 0001 .... @rndm
+UASX .... 0110 0101 .... .... 1111 0011 .... @rndm
+USAX .... 0110 0101 .... .... 1111 0101 .... @rndm
+USUB16 .... 0110 0101 .... .... 1111 0111 .... @rndm
+UADD8 .... 0110 0101 .... .... 1111 1001 .... @rndm
+USUB8 .... 0110 0101 .... .... 1111 1111 .... @rndm
+
+UQADD16 .... 0110 0110 .... .... 1111 0001 .... @rndm
+UQASX .... 0110 0110 .... .... 1111 0011 .... @rndm
+UQSAX .... 0110 0110 .... .... 1111 0101 .... @rndm
+UQSUB16 .... 0110 0110 .... .... 1111 0111 .... @rndm
+UQADD8 .... 0110 0110 .... .... 1111 1001 .... @rndm
+UQSUB8 .... 0110 0110 .... .... 1111 1111 .... @rndm
+
+UHADD16 .... 0110 0111 .... .... 1111 0001 .... @rndm
+UHASX .... 0110 0111 .... .... 1111 0011 .... @rndm
+UHSAX .... 0110 0111 .... .... 1111 0101 .... @rndm
+UHSUB16 .... 0110 0111 .... .... 1111 0111 .... @rndm
+UHADD8 .... 0110 0111 .... .... 1111 1001 .... @rndm
+UHSUB8 .... 0110 0111 .... .... 1111 1111 .... @rndm
+
+# Packing, unpacking, saturation, and reversal
+
+PKH ---- 0110 1000 rn:4 rd:4 imm:5 tb:1 01 rm:4 &pkh
+
+@sat ---- .... ... satimm:5 rd:4 imm:5 sh:1 .. rn:4 &sat
+@sat16 ---- .... .... satimm:4 rd:4 .... .... rn:4 \
+ &sat imm=0 sh=0
+
+SSAT .... 0110 101. .... .... .... ..01 .... @sat
+USAT .... 0110 111. .... .... .... ..01 .... @sat
+
+SSAT16 .... 0110 1010 .... .... 1111 0011 .... @sat16
+USAT16 .... 0110 1110 .... .... 1111 0011 .... @sat16
+
+@rrr_rot ---- .... .... rn:4 rd:4 rot:2 ...... rm:4 &rrr_rot
+
+SXTAB16 .... 0110 1000 .... .... ..00 0111 .... @rrr_rot
+SXTAB .... 0110 1010 .... .... ..00 0111 .... @rrr_rot
+SXTAH .... 0110 1011 .... .... ..00 0111 .... @rrr_rot
+UXTAB16 .... 0110 1100 .... .... ..00 0111 .... @rrr_rot
+UXTAB .... 0110 1110 .... .... ..00 0111 .... @rrr_rot
+UXTAH .... 0110 1111 .... .... ..00 0111 .... @rrr_rot
+
+SEL .... 0110 1000 .... .... 1111 1011 .... @rndm
+REV .... 0110 1011 1111 .... 1111 0011 .... @rdm
+REV16 .... 0110 1011 1111 .... 1111 1011 .... @rdm
+REVSH .... 0110 1111 1111 .... 1111 1011 .... @rdm
+RBIT .... 0110 1111 1111 .... 1111 0011 .... @rdm
+
+# Signed multiply, signed and unsigned divide
+
+@rdmn ---- .... .... rd:4 .... rm:4 .... rn:4 &rrr
+
+SMLAD .... 0111 0000 .... .... .... 0001 .... @rdamn
+SMLADX .... 0111 0000 .... .... .... 0011 .... @rdamn
+SMLSD .... 0111 0000 .... .... .... 0101 .... @rdamn
+SMLSDX .... 0111 0000 .... .... .... 0111 .... @rdamn
+
+SDIV .... 0111 0001 .... 1111 .... 0001 .... @rdmn
+UDIV .... 0111 0011 .... 1111 .... 0001 .... @rdmn
+
+SMLALD .... 0111 0100 .... .... .... 0001 .... @rdamn
+SMLALDX .... 0111 0100 .... .... .... 0011 .... @rdamn
+SMLSLD .... 0111 0100 .... .... .... 0101 .... @rdamn
+SMLSLDX .... 0111 0100 .... .... .... 0111 .... @rdamn
+
+SMMLA .... 0111 0101 .... .... .... 0001 .... @rdamn
+SMMLAR .... 0111 0101 .... .... .... 0011 .... @rdamn
+SMMLS .... 0111 0101 .... .... .... 1101 .... @rdamn
+SMMLSR .... 0111 0101 .... .... .... 1111 .... @rdamn
+
+# Block data transfer
+
+STM ---- 100 b:1 i:1 u:1 w:1 0 rn:4 list:16 &ldst_block
+LDM_a32 ---- 100 b:1 i:1 u:1 w:1 1 rn:4 list:16 &ldst_block
+
+# Branch, branch with link
+
+%imm26 0:s24 !function=times_4
+@branch ---- .... ........................ &i imm=%imm26
+
+B .... 1010 ........................ @branch
+BL .... 1011 ........................ @branch
+
+# Coprocessor instructions
+
+# We decode MCR, MCR, MRRC and MCRR only, because for QEMU the
+# other coprocessor instructions always UNDEF.
+# The trans_ functions for these will ignore cp values 8..13 for v7 or
+# earlier, and 0..13 for v8 and later, because those areas of the
+# encoding space may be used for other things, such as VFP or Neon.
+
+@mcr ---- .... opc1:3 . crn:4 rt:4 cp:4 opc2:3 . crm:4 &mcr
+@mcrr ---- .... .... rt2:4 rt:4 cp:4 opc1:4 crm:4 &mcrr
+
+MCRR .... 1100 0100 .... .... .... .... .... @mcrr
+MRRC .... 1100 0101 .... .... .... .... .... @mcrr
+
+MCR .... 1110 ... 0 .... .... .... ... 1 .... @mcr
+MRC .... 1110 ... 1 .... .... .... ... 1 .... @mcr
+
+# Supervisor call
+
+SVC ---- 1111 imm:24 &i
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
new file mode 100644
index 0000000000..8a20dce3c8
--- /dev/null
+++ b/target/arm/tcg/a64.decode
@@ -0,0 +1,591 @@
+# AArch64 A64 allowed instruction decoding
+#
+# Copyright (c) 2023 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+&r rn
+&ri rd imm
+&rri_sf rd rn imm sf
+&i imm
+
+
+### Data Processing - Immediate
+
+# PC-rel addressing
+
+%imm_pcrel 5:s19 29:2
+@pcrel . .. ..... ................... rd:5 &ri imm=%imm_pcrel
+
+ADR 0 .. 10000 ................... ..... @pcrel
+ADRP 1 .. 10000 ................... ..... @pcrel
+
+# Add/subtract (immediate)
+
+%imm12_sh12 10:12 !function=shl_12
+@addsub_imm sf:1 .. ...... . imm:12 rn:5 rd:5
+@addsub_imm12 sf:1 .. ...... . ............ rn:5 rd:5 imm=%imm12_sh12
+
+ADD_i . 00 100010 0 ............ ..... ..... @addsub_imm
+ADD_i . 00 100010 1 ............ ..... ..... @addsub_imm12
+ADDS_i . 01 100010 0 ............ ..... ..... @addsub_imm
+ADDS_i . 01 100010 1 ............ ..... ..... @addsub_imm12
+
+SUB_i . 10 100010 0 ............ ..... ..... @addsub_imm
+SUB_i . 10 100010 1 ............ ..... ..... @addsub_imm12
+SUBS_i . 11 100010 0 ............ ..... ..... @addsub_imm
+SUBS_i . 11 100010 1 ............ ..... ..... @addsub_imm12
+
+# Add/subtract (immediate with tags)
+
+&rri_tag rd rn uimm6 uimm4
+@addsub_imm_tag . .. ...... . uimm6:6 .. uimm4:4 rn:5 rd:5 &rri_tag
+
+ADDG_i 1 00 100011 0 ...... 00 .... ..... ..... @addsub_imm_tag
+SUBG_i 1 10 100011 0 ...... 00 .... ..... ..... @addsub_imm_tag
+
+# Logical (immediate)
+
+&rri_log rd rn sf dbm
+@logic_imm_64 1 .. ...... dbm:13 rn:5 rd:5 &rri_log sf=1
+@logic_imm_32 0 .. ...... 0 dbm:12 rn:5 rd:5 &rri_log sf=0
+
+AND_i . 00 100100 . ...... ...... ..... ..... @logic_imm_64
+AND_i . 00 100100 . ...... ...... ..... ..... @logic_imm_32
+ORR_i . 01 100100 . ...... ...... ..... ..... @logic_imm_64
+ORR_i . 01 100100 . ...... ...... ..... ..... @logic_imm_32
+EOR_i . 10 100100 . ...... ...... ..... ..... @logic_imm_64
+EOR_i . 10 100100 . ...... ...... ..... ..... @logic_imm_32
+ANDS_i . 11 100100 . ...... ...... ..... ..... @logic_imm_64
+ANDS_i . 11 100100 . ...... ...... ..... ..... @logic_imm_32
+
+# Move wide (immediate)
+
+&movw rd sf imm hw
+@movw_64 1 .. ...... hw:2 imm:16 rd:5 &movw sf=1
+@movw_32 0 .. ...... 0 hw:1 imm:16 rd:5 &movw sf=0
+
+MOVN . 00 100101 .. ................ ..... @movw_64
+MOVN . 00 100101 .. ................ ..... @movw_32
+MOVZ . 10 100101 .. ................ ..... @movw_64
+MOVZ . 10 100101 .. ................ ..... @movw_32
+MOVK . 11 100101 .. ................ ..... @movw_64
+MOVK . 11 100101 .. ................ ..... @movw_32
+
+# Bitfield
+
+&bitfield rd rn sf immr imms
+@bitfield_64 1 .. ...... 1 immr:6 imms:6 rn:5 rd:5 &bitfield sf=1
+@bitfield_32 0 .. ...... 0 0 immr:5 0 imms:5 rn:5 rd:5 &bitfield sf=0
+
+SBFM . 00 100110 . ...... ...... ..... ..... @bitfield_64
+SBFM . 00 100110 . ...... ...... ..... ..... @bitfield_32
+BFM . 01 100110 . ...... ...... ..... ..... @bitfield_64
+BFM . 01 100110 . ...... ...... ..... ..... @bitfield_32
+UBFM . 10 100110 . ...... ...... ..... ..... @bitfield_64
+UBFM . 10 100110 . ...... ...... ..... ..... @bitfield_32
+
+# Extract
+
+&extract rd rn rm imm sf
+
+EXTR 1 00 100111 1 0 rm:5 imm:6 rn:5 rd:5 &extract sf=1
+EXTR 0 00 100111 0 0 rm:5 0 imm:5 rn:5 rd:5 &extract sf=0
+
+# Branches
+
+%imm26 0:s26 !function=times_4
+@branch . ..... .......................... &i imm=%imm26
+
+B 0 00101 .......................... @branch
+BL 1 00101 .......................... @branch
+
+%imm19 5:s19 !function=times_4
+&cbz rt imm sf nz
+
+CBZ sf:1 011010 nz:1 ................... rt:5 &cbz imm=%imm19
+
+%imm14 5:s14 !function=times_4
+%imm31_19 31:1 19:5
+&tbz rt imm nz bitpos
+
+TBZ . 011011 nz:1 ..... .............. rt:5 &tbz imm=%imm14 bitpos=%imm31_19
+
+# B.cond and BC.cond
+B_cond 0101010 0 ................... c:1 cond:4 imm=%imm19
+
+BR 1101011 0000 11111 000000 rn:5 00000 &r
+BLR 1101011 0001 11111 000000 rn:5 00000 &r
+RET 1101011 0010 11111 000000 rn:5 00000 &r
+
+&braz rn m
+BRAZ 1101011 0000 11111 00001 m:1 rn:5 11111 &braz # BRAAZ, BRABZ
+BLRAZ 1101011 0001 11111 00001 m:1 rn:5 11111 &braz # BLRAAZ, BLRABZ
+
+&reta m
+RETA 1101011 0010 11111 00001 m:1 11111 11111 &reta # RETAA, RETAB
+
+&bra rn rm m
+BRA 1101011 1000 11111 00001 m:1 rn:5 rm:5 &bra # BRAA, BRAB
+BLRA 1101011 1001 11111 00001 m:1 rn:5 rm:5 &bra # BLRAA, BLRAB
+
+ERET 1101011 0100 11111 000000 11111 00000
+ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB
+
+# We don't need to decode DRPS because it always UNDEFs except when
+# the processor is in halting debug state (which we don't implement).
+# The pattern is listed here as documentation.
+# DRPS 1101011 0101 11111 000000 11111 00000
+
+# Hint instruction group
+{
+ [
+ YIELD 1101 0101 0000 0011 0010 0000 001 11111
+ WFE 1101 0101 0000 0011 0010 0000 010 11111
+ WFI 1101 0101 0000 0011 0010 0000 011 11111
+ # We implement WFE to never block, so our SEV/SEVL are NOPs
+ # SEV 1101 0101 0000 0011 0010 0000 100 11111
+ # SEVL 1101 0101 0000 0011 0010 0000 101 11111
+ # Our DGL is a NOP because we don't merge memory accesses anyway.
+ # DGL 1101 0101 0000 0011 0010 0000 110 11111
+ XPACLRI 1101 0101 0000 0011 0010 0000 111 11111
+ PACIA1716 1101 0101 0000 0011 0010 0001 000 11111
+ PACIB1716 1101 0101 0000 0011 0010 0001 010 11111
+ AUTIA1716 1101 0101 0000 0011 0010 0001 100 11111
+ AUTIB1716 1101 0101 0000 0011 0010 0001 110 11111
+ ESB 1101 0101 0000 0011 0010 0010 000 11111
+ PACIAZ 1101 0101 0000 0011 0010 0011 000 11111
+ PACIASP 1101 0101 0000 0011 0010 0011 001 11111
+ PACIBZ 1101 0101 0000 0011 0010 0011 010 11111
+ PACIBSP 1101 0101 0000 0011 0010 0011 011 11111
+ AUTIAZ 1101 0101 0000 0011 0010 0011 100 11111
+ AUTIASP 1101 0101 0000 0011 0010 0011 101 11111
+ AUTIBZ 1101 0101 0000 0011 0010 0011 110 11111
+ AUTIBSP 1101 0101 0000 0011 0010 0011 111 11111
+ ]
+ # The canonical NOP has CRm == op2 == 0, but all of the space
+ # that isn't specifically allocated to an instruction must NOP
+ NOP 1101 0101 0000 0011 0010 ---- --- 11111
+}
+
+# Barriers
+
+CLREX 1101 0101 0000 0011 0011 ---- 010 11111
+DSB_DMB 1101 0101 0000 0011 0011 domain:2 types:2 10- 11111
+ISB 1101 0101 0000 0011 0011 ---- 110 11111
+SB 1101 0101 0000 0011 0011 0000 111 11111
+
+# PSTATE
+
+CFINV 1101 0101 0000 0 000 0100 0000 000 11111
+XAFLAG 1101 0101 0000 0 000 0100 0000 001 11111
+AXFLAG 1101 0101 0000 0 000 0100 0000 010 11111
+
+# These are architecturally all "MSR (immediate)"; we decode the destination
+# register too because there is no commonality in our implementation.
+@msr_i .... .... .... . ... .... imm:4 ... .....
+MSR_i_UAO 1101 0101 0000 0 000 0100 .... 011 11111 @msr_i
+MSR_i_PAN 1101 0101 0000 0 000 0100 .... 100 11111 @msr_i
+MSR_i_SPSEL 1101 0101 0000 0 000 0100 .... 101 11111 @msr_i
+MSR_i_SBSS 1101 0101 0000 0 011 0100 .... 001 11111 @msr_i
+MSR_i_DIT 1101 0101 0000 0 011 0100 .... 010 11111 @msr_i
+MSR_i_TCO 1101 0101 0000 0 011 0100 .... 100 11111 @msr_i
+MSR_i_DAIFSET 1101 0101 0000 0 011 0100 .... 110 11111 @msr_i
+MSR_i_DAIFCLEAR 1101 0101 0000 0 011 0100 .... 111 11111 @msr_i
+MSR_i_SVCR 1101 0101 0000 0 011 0100 0 mask:2 imm:1 011 11111
+
+# MRS, MSR (register), SYS, SYSL. These are all essentially the
+# same instruction as far as QEMU is concerned.
+# NB: op0 is bits [20:19], but op0=0b00 is other insns, so we have
+# to hand-decode it.
+SYS 1101 0101 00 l:1 01 op1:3 crn:4 crm:4 op2:3 rt:5 op0=1
+SYS 1101 0101 00 l:1 10 op1:3 crn:4 crm:4 op2:3 rt:5 op0=2
+SYS 1101 0101 00 l:1 11 op1:3 crn:4 crm:4 op2:3 rt:5 op0=3
+
+# Exception generation
+
+@i16 .... .... ... imm:16 ... .. &i
+SVC 1101 0100 000 ................ 000 01 @i16
+HVC 1101 0100 000 ................ 000 10 @i16
+SMC 1101 0100 000 ................ 000 11 @i16
+BRK 1101 0100 001 ................ 000 00 @i16
+HLT 1101 0100 010 ................ 000 00 @i16
+# These insns always UNDEF unless in halting debug state, which
+# we don't implement. So we don't need to decode them. The patterns
+# are listed here as documentation.
+# DCPS1 1101 0100 101 ................ 000 01 @i16
+# DCPS2 1101 0100 101 ................ 000 10 @i16
+# DCPS3 1101 0100 101 ................ 000 11 @i16
+
+# Loads and stores
+
+&stxr rn rt rt2 rs sz lasr
+&stlr rn rt sz lasr
+@stxr sz:2 ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr
+@stlr sz:2 ...... ... ..... lasr:1 ..... rn:5 rt:5 &stlr
+%imm1_30_p2 30:1 !function=plus_2
+@stxp .. ...... ... rs:5 lasr:1 rt2:5 rn:5 rt:5 &stxr sz=%imm1_30_p2
+STXR .. 001000 000 ..... . ..... ..... ..... @stxr # inc STLXR
+LDXR .. 001000 010 ..... . ..... ..... ..... @stxr # inc LDAXR
+STLR .. 001000 100 11111 . 11111 ..... ..... @stlr # inc STLLR
+LDAR .. 001000 110 11111 . 11111 ..... ..... @stlr # inc LDLAR
+
+STXP 1 . 001000 001 ..... . ..... ..... ..... @stxp # inc STLXP
+LDXP 1 . 001000 011 ..... . ..... ..... ..... @stxp # inc LDAXP
+
+# CASP, CASPA, CASPAL, CASPL (we don't decode the bits that determine
+# acquire/release semantics because QEMU's cmpxchg always has those)
+CASP 0 . 001000 0 - 1 rs:5 - 11111 rn:5 rt:5 sz=%imm1_30_p2
+# CAS, CASA, CASAL, CASL
+CAS sz:2 001000 1 - 1 rs:5 - 11111 rn:5 rt:5
+
+&ldlit rt imm sz sign
+@ldlit .. ... . .. ................... rt:5 &ldlit imm=%imm19
+
+LD_lit 00 011 0 00 ................... ..... @ldlit sz=2 sign=0
+LD_lit 01 011 0 00 ................... ..... @ldlit sz=3 sign=0
+LD_lit 10 011 0 00 ................... ..... @ldlit sz=2 sign=1
+LD_lit_v 00 011 1 00 ................... ..... @ldlit sz=2 sign=0
+LD_lit_v 01 011 1 00 ................... ..... @ldlit sz=3 sign=0
+LD_lit_v 10 011 1 00 ................... ..... @ldlit sz=4 sign=0
+
+# PRFM
+NOP 11 011 0 00 ------------------- -----
+
+&ldstpair rt2 rt rn imm sz sign w p
+@ldstpair .. ... . ... . imm:s7 rt2:5 rn:5 rt:5 &ldstpair
+
+# STNP, LDNP: Signed offset, non-temporal hint. We don't emulate caches
+# so we ignore hints about data access patterns, and handle these like
+# plain signed offset.
+STP 00 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0
+LDP 00 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0
+STP 10 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+LDP 10 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+STP_v 00 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0
+LDP_v 00 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0
+STP_v 01 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+LDP_v 01 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+STP_v 10 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0
+LDP_v 10 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0
+
+# STP and LDP: post-indexed
+STP 00 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1
+LDP 00 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1
+LDP 01 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=1 w=1
+STP 10 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1
+LDP 10 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1
+STP_v 00 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1
+LDP_v 00 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1
+STP_v 01 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1
+LDP_v 01 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1
+STP_v 10 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1
+LDP_v 10 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1
+
+# STP and LDP: offset
+STP 00 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0
+LDP 00 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0
+LDP 01 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=0
+STP 10 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+LDP 10 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+STP_v 00 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0
+LDP_v 00 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0
+STP_v 01 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+LDP_v 01 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+STP_v 10 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0
+LDP_v 10 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0
+
+# STP and LDP: pre-indexed
+STP 00 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1
+LDP 00 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1
+LDP 01 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=1
+STP 10 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1
+LDP 10 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1
+STP_v 00 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1
+LDP_v 00 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1
+STP_v 01 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1
+LDP_v 01 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1
+STP_v 10 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1
+LDP_v 10 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1
+
+# STGP: store tag and pair
+STGP 01 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1
+STGP 01 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0
+STGP 01 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1
+
+# Load/store register (unscaled immediate)
+&ldst_imm rt rn imm sz sign w p unpriv ext
+@ldst_imm .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=0
+@ldst_imm_pre .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=1
+@ldst_imm_post .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=0 p=1 w=1
+@ldst_imm_user .. ... . .. .. . imm:s9 .. rn:5 rt:5 &ldst_imm unpriv=1 p=0 w=0
+
+STR_i sz:2 111 0 00 00 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0
+LDR_i 00 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=0
+LDR_i 01 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=1
+LDR_i 10 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=1 sz=2
+LDR_i 11 111 0 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=3
+LDR_i 00 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=0
+LDR_i 01 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=1
+LDR_i 10 111 0 00 10 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=0 sz=2
+LDR_i 00 111 0 00 11 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=1 sz=0
+LDR_i 01 111 0 00 11 0 ......... 00 ..... ..... @ldst_imm sign=1 ext=1 sz=1
+
+STR_i sz:2 111 0 00 00 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0
+LDR_i 00 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=0
+LDR_i 01 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=1
+LDR_i 10 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=1 sz=2
+LDR_i 11 111 0 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=3
+LDR_i 00 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=0
+LDR_i 01 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=1
+LDR_i 10 111 0 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=0 sz=2
+LDR_i 00 111 0 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=1 sz=0
+LDR_i 01 111 0 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=1 ext=1 sz=1
+
+STR_i sz:2 111 0 00 00 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=0
+LDR_i 00 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=0
+LDR_i 01 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=1
+LDR_i 10 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=1 sz=2
+LDR_i 11 111 0 00 01 0 ......... 10 ..... ..... @ldst_imm_user sign=0 ext=0 sz=3
+LDR_i 00 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=0
+LDR_i 01 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=1
+LDR_i 10 111 0 00 10 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=0 sz=2
+LDR_i 00 111 0 00 11 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=1 sz=0
+LDR_i 01 111 0 00 11 0 ......... 10 ..... ..... @ldst_imm_user sign=1 ext=1 sz=1
+
+STR_i sz:2 111 0 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0
+LDR_i 00 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=0
+LDR_i 01 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=1
+LDR_i 10 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=1 sz=2
+LDR_i 11 111 0 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=3
+LDR_i 00 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=0
+LDR_i 01 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=1
+LDR_i 10 111 0 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=0 sz=2
+LDR_i 00 111 0 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=1 sz=0
+LDR_i 01 111 0 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=1 ext=1 sz=1
+
+# PRFM : prefetch memory: a no-op for QEMU
+NOP 11 111 0 00 10 0 --------- 00 ----- -----
+
+STR_v_i sz:2 111 1 00 00 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0
+STR_v_i 00 111 1 00 10 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=4
+LDR_v_i sz:2 111 1 00 01 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0
+LDR_v_i 00 111 1 00 11 0 ......... 00 ..... ..... @ldst_imm sign=0 ext=0 sz=4
+
+STR_v_i sz:2 111 1 00 00 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0
+STR_v_i 00 111 1 00 10 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=4
+LDR_v_i sz:2 111 1 00 01 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0
+LDR_v_i 00 111 1 00 11 0 ......... 01 ..... ..... @ldst_imm_post sign=0 ext=0 sz=4
+
+STR_v_i sz:2 111 1 00 00 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0
+STR_v_i 00 111 1 00 10 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4
+LDR_v_i sz:2 111 1 00 01 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0
+LDR_v_i 00 111 1 00 11 0 ......... 11 ..... ..... @ldst_imm_pre sign=0 ext=0 sz=4
+
+# Load/store with an unsigned 12 bit immediate, which is scaled by the
+# element size. The function gets the sz:imm and returns the scaled immediate.
+%uimm_scaled 10:12 sz:3 !function=uimm_scaled
+
+@ldst_uimm .. ... . .. .. ............ rn:5 rt:5 &ldst_imm unpriv=0 p=0 w=0 imm=%uimm_scaled
+
+STR_i sz:2 111 0 01 00 ............ ..... ..... @ldst_uimm sign=0 ext=0
+LDR_i 00 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=0
+LDR_i 01 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=1
+LDR_i 10 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=1 sz=2
+LDR_i 11 111 0 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=3
+LDR_i 00 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=0
+LDR_i 01 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=1
+LDR_i 10 111 0 01 10 ............ ..... ..... @ldst_uimm sign=1 ext=0 sz=2
+LDR_i 00 111 0 01 11 ............ ..... ..... @ldst_uimm sign=1 ext=1 sz=0
+LDR_i 01 111 0 01 11 ............ ..... ..... @ldst_uimm sign=1 ext=1 sz=1
+
+# PRFM
+NOP 11 111 0 01 10 ------------ ----- -----
+
+STR_v_i sz:2 111 1 01 00 ............ ..... ..... @ldst_uimm sign=0 ext=0
+STR_v_i 00 111 1 01 10 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4
+LDR_v_i sz:2 111 1 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0
+LDR_v_i 00 111 1 01 11 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4
+
+# Load/store with register offset
+&ldst rm rn rt sign ext sz opt s
+@ldst .. ... . .. .. . rm:5 opt:3 s:1 .. rn:5 rt:5 &ldst
+STR sz:2 111 0 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0
+LDR 00 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=0
+LDR 01 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=1
+LDR 10 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=1 sz=2
+LDR 11 111 0 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=3
+LDR 00 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=0
+LDR 01 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=1
+LDR 10 111 0 00 10 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=0 sz=2
+LDR 00 111 0 00 11 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=1 sz=0
+LDR 01 111 0 00 11 1 ..... ... . 10 ..... ..... @ldst sign=1 ext=1 sz=1
+
+# PRFM
+NOP 11 111 0 00 10 1 ----- -1- - 10 ----- -----
+
+STR_v sz:2 111 1 00 00 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0
+STR_v 00 111 1 00 10 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4
+LDR_v sz:2 111 1 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0
+LDR_v 00 111 1 00 11 1 ..... ... . 10 ..... ..... @ldst sign=0 ext=0 sz=4
+
+# Atomic memory operations
+&atomic rs rn rt a r sz
+@atomic sz:2 ... . .. a:1 r:1 . rs:5 . ... .. rn:5 rt:5 &atomic
+LDADD .. 111 0 00 . . 1 ..... 0000 00 ..... ..... @atomic
+LDCLR .. 111 0 00 . . 1 ..... 0001 00 ..... ..... @atomic
+LDEOR .. 111 0 00 . . 1 ..... 0010 00 ..... ..... @atomic
+LDSET .. 111 0 00 . . 1 ..... 0011 00 ..... ..... @atomic
+LDSMAX .. 111 0 00 . . 1 ..... 0100 00 ..... ..... @atomic
+LDSMIN .. 111 0 00 . . 1 ..... 0101 00 ..... ..... @atomic
+LDUMAX .. 111 0 00 . . 1 ..... 0110 00 ..... ..... @atomic
+LDUMIN .. 111 0 00 . . 1 ..... 0111 00 ..... ..... @atomic
+SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic
+
+LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5
+
+# Load/store register (pointer authentication)
+
+# LDRA immediate is 10 bits signed and scaled, but the bits aren't all contiguous
+%ldra_imm 22:s1 12:9 !function=times_8
+
+LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm
+
+&ldapr_stlr_i rn rt imm sz sign ext
+@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i
+STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0
+LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0
+LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0
+LDAPR_i 01 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=1
+LDAPR_i 10 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=2
+LDAPR_i 00 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=0
+LDAPR_i 01 011001 11 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=1 sz=1
+
+# Load/store multiple structures
+# The 4-bit opcode in [15:12] encodes repeat count and structure elements
+&ldst_mult rm rn rt sz q p rpt selem
+@ldst_mult . q:1 ...... p:1 . . rm:5 .... sz:2 rn:5 rt:5 &ldst_mult
+ST_mult 0 . 001100 . 0 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4
+ST_mult 0 . 001100 . 0 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1
+ST_mult 0 . 001100 . 0 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3
+ST_mult 0 . 001100 . 0 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1
+ST_mult 0 . 001100 . 0 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1
+ST_mult 0 . 001100 . 0 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2
+ST_mult 0 . 001100 . 0 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1
+
+LD_mult 0 . 001100 . 1 0 ..... 0000 .. ..... ..... @ldst_mult rpt=1 selem=4
+LD_mult 0 . 001100 . 1 0 ..... 0010 .. ..... ..... @ldst_mult rpt=4 selem=1
+LD_mult 0 . 001100 . 1 0 ..... 0100 .. ..... ..... @ldst_mult rpt=1 selem=3
+LD_mult 0 . 001100 . 1 0 ..... 0110 .. ..... ..... @ldst_mult rpt=3 selem=1
+LD_mult 0 . 001100 . 1 0 ..... 0111 .. ..... ..... @ldst_mult rpt=1 selem=1
+LD_mult 0 . 001100 . 1 0 ..... 1000 .. ..... ..... @ldst_mult rpt=1 selem=2
+LD_mult 0 . 001100 . 1 0 ..... 1010 .. ..... ..... @ldst_mult rpt=2 selem=1
+
+# Load/store single structure
+&ldst_single rm rn rt p selem index scale
+
+%ldst_single_selem 13:1 21:1 !function=plus_1
+
+%ldst_single_index_b 30:1 10:3
+%ldst_single_index_h 30:1 11:2
+%ldst_single_index_s 30:1 12:1
+
+@ldst_single_b .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \
+ &ldst_single scale=0 selem=%ldst_single_selem \
+ index=%ldst_single_index_b
+@ldst_single_h .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \
+ &ldst_single scale=1 selem=%ldst_single_selem \
+ index=%ldst_single_index_h
+@ldst_single_s .. ...... p:1 .. rm:5 ...... rn:5 rt:5 \
+ &ldst_single scale=2 selem=%ldst_single_selem \
+ index=%ldst_single_index_s
+@ldst_single_d . index:1 ...... p:1 .. rm:5 ...... rn:5 rt:5 \
+ &ldst_single scale=3 selem=%ldst_single_selem
+
+ST_single 0 . 001101 . 0 . ..... 00 . ... ..... ..... @ldst_single_b
+ST_single 0 . 001101 . 0 . ..... 01 . ..0 ..... ..... @ldst_single_h
+ST_single 0 . 001101 . 0 . ..... 10 . .00 ..... ..... @ldst_single_s
+ST_single 0 . 001101 . 0 . ..... 10 . 001 ..... ..... @ldst_single_d
+
+LD_single 0 . 001101 . 1 . ..... 00 . ... ..... ..... @ldst_single_b
+LD_single 0 . 001101 . 1 . ..... 01 . ..0 ..... ..... @ldst_single_h
+LD_single 0 . 001101 . 1 . ..... 10 . .00 ..... ..... @ldst_single_s
+LD_single 0 . 001101 . 1 . ..... 10 . 001 ..... ..... @ldst_single_d
+
+# Replicating load case
+LD_single_repl 0 q:1 001101 p:1 1 . rm:5 11 . 0 scale:2 rn:5 rt:5 selem=%ldst_single_selem
+
+%tag_offset 12:s9 !function=scale_by_log2_tag_granule
+&ldst_tag rn rt imm p w
+@ldst_tag ........ .. . ......... .. rn:5 rt:5 &ldst_tag imm=%tag_offset
+@ldst_tag_mult ........ .. . 000000000 .. rn:5 rt:5 &ldst_tag imm=0
+
+STZGM 11011001 00 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0
+STG 11011001 00 1 ......... 01 ..... ..... @ldst_tag p=1 w=1
+STG 11011001 00 1 ......... 10 ..... ..... @ldst_tag p=0 w=0
+STG 11011001 00 1 ......... 11 ..... ..... @ldst_tag p=0 w=1
+
+LDG 11011001 01 1 ......... 00 ..... ..... @ldst_tag p=0 w=0
+STZG 11011001 01 1 ......... 01 ..... ..... @ldst_tag p=1 w=1
+STZG 11011001 01 1 ......... 10 ..... ..... @ldst_tag p=0 w=0
+STZG 11011001 01 1 ......... 11 ..... ..... @ldst_tag p=0 w=1
+
+STGM 11011001 10 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0
+ST2G 11011001 10 1 ......... 01 ..... ..... @ldst_tag p=1 w=1
+ST2G 11011001 10 1 ......... 10 ..... ..... @ldst_tag p=0 w=0
+ST2G 11011001 10 1 ......... 11 ..... ..... @ldst_tag p=0 w=1
+
+LDGM 11011001 11 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0
+STZ2G 11011001 11 1 ......... 01 ..... ..... @ldst_tag p=1 w=1
+STZ2G 11011001 11 1 ......... 10 ..... ..... @ldst_tag p=0 w=0
+STZ2G 11011001 11 1 ......... 11 ..... ..... @ldst_tag p=0 w=1
+
+# Memory operations (memset, memcpy, memmove)
+# Each of these comes in a set of three, eg SETP (prologue), SETM (main),
+# SETE (epilogue), and each of those has different flavours to
+# indicate whether memory accesses should be unpriv or non-temporal.
+# We don't distinguish temporal and non-temporal accesses, but we
+# do need to report it in syndrome register values.
+
+# Memset
+&set rs rn rd unpriv nontemp
+# op2 bit 1 is nontemporal bit
+@set .. ......... rs:5 .. nontemp:1 unpriv:1 .. rn:5 rd:5 &set
+
+SETP 00 011001110 ..... 00 . . 01 ..... ..... @set
+SETM 00 011001110 ..... 01 . . 01 ..... ..... @set
+SETE 00 011001110 ..... 10 . . 01 ..... ..... @set
+
+# Like SET, but also setting MTE tags
+SETGP 00 011101110 ..... 00 . . 01 ..... ..... @set
+SETGM 00 011101110 ..... 01 . . 01 ..... ..... @set
+SETGE 00 011101110 ..... 10 . . 01 ..... ..... @set
+
+# Memmove/Memcopy: the CPY insns allow overlapping src/dest and
+# copy in the correct direction; the CPYF insns always copy forwards.
+#
+# options has the nontemporal and unpriv bits for src and dest
+&cpy rs rn rd options
+@cpy .. ... . ..... rs:5 options:4 .. rn:5 rd:5 &cpy
+
+CPYFP 00 011 0 01000 ..... .... 01 ..... ..... @cpy
+CPYFM 00 011 0 01010 ..... .... 01 ..... ..... @cpy
+CPYFE 00 011 0 01100 ..... .... 01 ..... ..... @cpy
+CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy
+CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy
+CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy
diff --git a/target/arm/arm_ldst.h b/target/arm/tcg/arm_ldst.h
index 5e0ac8bef0..cee0548a1c 100644
--- a/target/arm/arm_ldst.h
+++ b/target/arm/tcg/arm_ldst.h
@@ -6,7 +6,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -20,25 +20,20 @@
#ifndef ARM_LDST_H
#define ARM_LDST_H
-#include "exec/cpu_ldst.h"
+#include "exec/translator.h"
#include "qemu/bswap.h"
/* Load an instruction and return it in the standard little-endian order */
-static inline uint32_t arm_ldl_code(CPUARMState *env, target_ulong addr,
- bool sctlr_b)
+static inline uint32_t arm_ldl_code(CPUARMState *env, DisasContextBase *s,
+ target_ulong addr, bool sctlr_b)
{
- uint32_t insn = cpu_ldl_code(env, addr);
- if (bswap_code(sctlr_b)) {
- return bswap32(insn);
- }
- return insn;
+ return translator_ldl_swap(env, s, addr, bswap_code(sctlr_b));
}
/* Ditto, for a halfword (Thumb) instruction */
-static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
- bool sctlr_b)
+static inline uint16_t arm_lduw_code(CPUARMState *env, DisasContextBase* s,
+ target_ulong addr, bool sctlr_b)
{
- uint16_t insn;
#ifndef CONFIG_USER_ONLY
/* In big-endian (BE32) mode, adjacent Thumb instructions have been swapped
within each word. Undo that now. */
@@ -46,11 +41,7 @@ static inline uint16_t arm_lduw_code(CPUARMState *env, target_ulong addr,
addr ^= 2;
}
#endif
- insn = cpu_lduw_code(env, addr);
- if (bswap_code(sctlr_b)) {
- return bswap16(insn);
- }
- return insn;
+ return translator_lduw_swap(env, s, addr, bswap_code(sctlr_b));
}
#endif
diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c
new file mode 100644
index 0000000000..c059c681e9
--- /dev/null
+++ b/target/arm/tcg/cpu-v7m.c
@@ -0,0 +1,290 @@
+/*
+ * QEMU ARMv7-M TCG-only CPUs.
+ *
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/core/tcg-cpu-ops.h"
+#include "internals.h"
+
+#if !defined(CONFIG_USER_ONLY)
+
+#include "hw/intc/armv7m_nvic.h"
+
+static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+{
+ CPUClass *cc = CPU_GET_CLASS(cs);
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ bool ret = false;
+
+ /*
+ * ARMv7-M interrupt masking works differently than -A or -R.
+ * There is no FIQ/IRQ distinction. Instead of I and F bits
+ * masking FIQ and IRQ interrupts, an exception is taken only
+ * if it is higher priority than the current execution priority
+ * (which depends on state like BASEPRI, FAULTMASK and the
+ * currently active exception).
+ */
+ if (interrupt_request & CPU_INTERRUPT_HARD
+ && (armv7m_nvic_can_take_pending_exception(env->nvic))) {
+ cs->exception_index = EXCP_IRQ;
+ cc->tcg_ops->do_interrupt(cs);
+ ret = true;
+ }
+ return ret;
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
+static void cortex_m0_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ set_feature(&cpu->env, ARM_FEATURE_V6);
+ set_feature(&cpu->env, ARM_FEATURE_M);
+
+ cpu->midr = 0x410cc200;
+
+ /*
+ * These ID register values are not guest visible, because
+ * we do not implement the Main Extension. They must be set
+ * to values corresponding to the Cortex-M0's implemented
+ * features, because QEMU generally controls its emulation
+ * by looking at ID register fields. We use the same values as
+ * for the M3.
+ */
+ cpu->isar.id_pfr0 = 0x00000030;
+ cpu->isar.id_pfr1 = 0x00000200;
+ cpu->isar.id_dfr0 = 0x00100000;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00000030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x00000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
+ cpu->isar.id_isar0 = 0x01141110;
+ cpu->isar.id_isar1 = 0x02111000;
+ cpu->isar.id_isar2 = 0x21112231;
+ cpu->isar.id_isar3 = 0x01111110;
+ cpu->isar.id_isar4 = 0x01310102;
+ cpu->isar.id_isar5 = 0x00000000;
+ cpu->isar.id_isar6 = 0x00000000;
+}
+
+static void cortex_m3_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ set_feature(&cpu->env, ARM_FEATURE_V7);
+ set_feature(&cpu->env, ARM_FEATURE_M);
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
+ cpu->midr = 0x410fc231;
+ cpu->pmsav7_dregion = 8;
+ cpu->isar.id_pfr0 = 0x00000030;
+ cpu->isar.id_pfr1 = 0x00000200;
+ cpu->isar.id_dfr0 = 0x00100000;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00000030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x00000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
+ cpu->isar.id_isar0 = 0x01141110;
+ cpu->isar.id_isar1 = 0x02111000;
+ cpu->isar.id_isar2 = 0x21112231;
+ cpu->isar.id_isar3 = 0x01111110;
+ cpu->isar.id_isar4 = 0x01310102;
+ cpu->isar.id_isar5 = 0x00000000;
+ cpu->isar.id_isar6 = 0x00000000;
+}
+
+static void cortex_m4_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ set_feature(&cpu->env, ARM_FEATURE_V7);
+ set_feature(&cpu->env, ARM_FEATURE_M);
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
+ set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
+ cpu->midr = 0x410fc240; /* r0p0 */
+ cpu->pmsav7_dregion = 8;
+ cpu->isar.mvfr0 = 0x10110021;
+ cpu->isar.mvfr1 = 0x11000011;
+ cpu->isar.mvfr2 = 0x00000000;
+ cpu->isar.id_pfr0 = 0x00000030;
+ cpu->isar.id_pfr1 = 0x00000200;
+ cpu->isar.id_dfr0 = 0x00100000;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00000030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x00000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
+ cpu->isar.id_isar0 = 0x01141110;
+ cpu->isar.id_isar1 = 0x02111000;
+ cpu->isar.id_isar2 = 0x21112231;
+ cpu->isar.id_isar3 = 0x01111110;
+ cpu->isar.id_isar4 = 0x01310102;
+ cpu->isar.id_isar5 = 0x00000000;
+ cpu->isar.id_isar6 = 0x00000000;
+}
+
+static void cortex_m7_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ set_feature(&cpu->env, ARM_FEATURE_V7);
+ set_feature(&cpu->env, ARM_FEATURE_M);
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
+ set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
+ cpu->midr = 0x411fc272; /* r1p2 */
+ cpu->pmsav7_dregion = 8;
+ cpu->isar.mvfr0 = 0x10110221;
+ cpu->isar.mvfr1 = 0x12000011;
+ cpu->isar.mvfr2 = 0x00000040;
+ cpu->isar.id_pfr0 = 0x00000030;
+ cpu->isar.id_pfr1 = 0x00000200;
+ cpu->isar.id_dfr0 = 0x00100000;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00100030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x01000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
+ cpu->isar.id_isar0 = 0x01101110;
+ cpu->isar.id_isar1 = 0x02112000;
+ cpu->isar.id_isar2 = 0x20232231;
+ cpu->isar.id_isar3 = 0x01111131;
+ cpu->isar.id_isar4 = 0x01310132;
+ cpu->isar.id_isar5 = 0x00000000;
+ cpu->isar.id_isar6 = 0x00000000;
+}
+
+static void cortex_m33_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_M);
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
+ set_feature(&cpu->env, ARM_FEATURE_M_SECURITY);
+ set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
+ cpu->midr = 0x410fd213; /* r0p3 */
+ cpu->pmsav7_dregion = 16;
+ cpu->sau_sregion = 8;
+ cpu->isar.mvfr0 = 0x10110021;
+ cpu->isar.mvfr1 = 0x11000011;
+ cpu->isar.mvfr2 = 0x00000040;
+ cpu->isar.id_pfr0 = 0x00000030;
+ cpu->isar.id_pfr1 = 0x00000210;
+ cpu->isar.id_dfr0 = 0x00200000;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00101F40;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x01000000;
+ cpu->isar.id_mmfr3 = 0x00000000;
+ cpu->isar.id_isar0 = 0x01101110;
+ cpu->isar.id_isar1 = 0x02212000;
+ cpu->isar.id_isar2 = 0x20232232;
+ cpu->isar.id_isar3 = 0x01111131;
+ cpu->isar.id_isar4 = 0x01310132;
+ cpu->isar.id_isar5 = 0x00000000;
+ cpu->isar.id_isar6 = 0x00000000;
+ cpu->clidr = 0x00000000;
+ cpu->ctr = 0x8000c000;
+}
+
+static void cortex_m55_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_V8_1M);
+ set_feature(&cpu->env, ARM_FEATURE_M);
+ set_feature(&cpu->env, ARM_FEATURE_M_MAIN);
+ set_feature(&cpu->env, ARM_FEATURE_M_SECURITY);
+ set_feature(&cpu->env, ARM_FEATURE_THUMB_DSP);
+ cpu->midr = 0x410fd221; /* r0p1 */
+ cpu->revidr = 0;
+ cpu->pmsav7_dregion = 16;
+ cpu->sau_sregion = 8;
+ /* These are the MVFR* values for the FPU + full MVE configuration */
+ cpu->isar.mvfr0 = 0x10110221;
+ cpu->isar.mvfr1 = 0x12100211;
+ cpu->isar.mvfr2 = 0x00000040;
+ cpu->isar.id_pfr0 = 0x20000030;
+ cpu->isar.id_pfr1 = 0x00000230;
+ cpu->isar.id_dfr0 = 0x10200000;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00111040;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x01000000;
+ cpu->isar.id_mmfr3 = 0x00000011;
+ cpu->isar.id_isar0 = 0x01103110;
+ cpu->isar.id_isar1 = 0x02212000;
+ cpu->isar.id_isar2 = 0x20232232;
+ cpu->isar.id_isar3 = 0x01111131;
+ cpu->isar.id_isar4 = 0x01310132;
+ cpu->isar.id_isar5 = 0x00000000;
+ cpu->isar.id_isar6 = 0x00000000;
+ cpu->clidr = 0x00000000; /* caches not implemented */
+ cpu->ctr = 0x8303c003;
+}
+
+static const TCGCPUOps arm_v7m_tcg_ops = {
+ .initialize = arm_translate_init,
+ .synchronize_from_tb = arm_cpu_synchronize_from_tb,
+ .debug_excp_handler = arm_debug_excp_handler,
+ .restore_state_to_opc = arm_restore_state_to_opc,
+
+#ifdef CONFIG_USER_ONLY
+ .record_sigsegv = arm_cpu_record_sigsegv,
+ .record_sigbus = arm_cpu_record_sigbus,
+#else
+ .tlb_fill = arm_cpu_tlb_fill,
+ .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt,
+ .do_interrupt = arm_v7m_cpu_do_interrupt,
+ .do_transaction_failed = arm_cpu_do_transaction_failed,
+ .do_unaligned_access = arm_cpu_do_unaligned_access,
+ .adjust_watchpoint_address = arm_adjust_watchpoint_address,
+ .debug_check_watchpoint = arm_debug_check_watchpoint,
+ .debug_check_breakpoint = arm_debug_check_breakpoint,
+#endif /* !CONFIG_USER_ONLY */
+};
+
+static void arm_v7m_class_init(ObjectClass *oc, void *data)
+{
+ ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(oc);
+
+ acc->info = data;
+ cc->tcg_ops = &arm_v7m_tcg_ops;
+ cc->gdb_core_xml_file = "arm-m-profile.xml";
+}
+
+static const ARMCPUInfo arm_v7m_cpus[] = {
+ { .name = "cortex-m0", .initfn = cortex_m0_initfn,
+ .class_init = arm_v7m_class_init },
+ { .name = "cortex-m3", .initfn = cortex_m3_initfn,
+ .class_init = arm_v7m_class_init },
+ { .name = "cortex-m4", .initfn = cortex_m4_initfn,
+ .class_init = arm_v7m_class_init },
+ { .name = "cortex-m7", .initfn = cortex_m7_initfn,
+ .class_init = arm_v7m_class_init },
+ { .name = "cortex-m33", .initfn = cortex_m33_initfn,
+ .class_init = arm_v7m_class_init },
+ { .name = "cortex-m55", .initfn = cortex_m55_initfn,
+ .class_init = arm_v7m_class_init },
+};
+
+static void arm_v7m_cpu_register_types(void)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(arm_v7m_cpus); ++i) {
+ arm_cpu_register(&arm_v7m_cpus[i]);
+ }
+}
+
+type_init(arm_v7m_cpu_register_types)
diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c
new file mode 100644
index 0000000000..de8f2be941
--- /dev/null
+++ b/target/arm/tcg/cpu32.c
@@ -0,0 +1,1039 @@
+/*
+ * QEMU ARM TCG-only CPUs.
+ *
+ * Copyright (c) 2012 SUSE LINUX Products GmbH
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/core/tcg-cpu-ops.h"
+#include "internals.h"
+#include "target/arm/idau.h"
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/boards.h"
+#endif
+#include "cpregs.h"
+
+
+/* Share AArch32 -cpu max features with AArch64. */
+void aa32_max_features(ARMCPU *cpu)
+{
+ uint32_t t;
+
+ /* Add additional features supported by QEMU */
+ t = cpu->isar.id_isar5;
+ t = FIELD_DP32(t, ID_ISAR5, AES, 2); /* FEAT_PMULL */
+ t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); /* FEAT_SHA1 */
+ t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); /* FEAT_SHA256 */
+ t = FIELD_DP32(t, ID_ISAR5, CRC32, 1);
+ t = FIELD_DP32(t, ID_ISAR5, RDM, 1); /* FEAT_RDM */
+ t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); /* FEAT_FCMA */
+ cpu->isar.id_isar5 = t;
+
+ t = cpu->isar.id_isar6;
+ t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); /* FEAT_JSCVT */
+ t = FIELD_DP32(t, ID_ISAR6, DP, 1); /* Feat_DotProd */
+ t = FIELD_DP32(t, ID_ISAR6, FHM, 1); /* FEAT_FHM */
+ t = FIELD_DP32(t, ID_ISAR6, SB, 1); /* FEAT_SB */
+ t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); /* FEAT_SPECRES */
+ t = FIELD_DP32(t, ID_ISAR6, BF16, 1); /* FEAT_AA32BF16 */
+ t = FIELD_DP32(t, ID_ISAR6, I8MM, 1); /* FEAT_AA32I8MM */
+ cpu->isar.id_isar6 = t;
+
+ t = cpu->isar.mvfr1;
+ t = FIELD_DP32(t, MVFR1, FPHP, 3); /* FEAT_FP16 */
+ t = FIELD_DP32(t, MVFR1, SIMDHP, 2); /* FEAT_FP16 */
+ cpu->isar.mvfr1 = t;
+
+ t = cpu->isar.mvfr2;
+ t = FIELD_DP32(t, MVFR2, SIMDMISC, 3); /* SIMD MaxNum */
+ t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */
+ cpu->isar.mvfr2 = t;
+
+ t = cpu->isar.id_mmfr3;
+ t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* FEAT_PAN2 */
+ cpu->isar.id_mmfr3 = t;
+
+ t = cpu->isar.id_mmfr4;
+ t = FIELD_DP32(t, ID_MMFR4, HPDS, 2); /* FEAT_HPDS2 */
+ t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
+ t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* FEAT_TTCNP */
+ t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* FEAT_XNX */
+ t = FIELD_DP32(t, ID_MMFR4, EVT, 2); /* FEAT_EVT */
+ cpu->isar.id_mmfr4 = t;
+
+ t = cpu->isar.id_mmfr5;
+ t = FIELD_DP32(t, ID_MMFR5, ETS, 1); /* FEAT_ETS */
+ cpu->isar.id_mmfr5 = t;
+
+ t = cpu->isar.id_pfr0;
+ t = FIELD_DP32(t, ID_PFR0, CSV2, 2); /* FEAT_CVS2 */
+ t = FIELD_DP32(t, ID_PFR0, DIT, 1); /* FEAT_DIT */
+ t = FIELD_DP32(t, ID_PFR0, RAS, 1); /* FEAT_RAS */
+ cpu->isar.id_pfr0 = t;
+
+ t = cpu->isar.id_pfr2;
+ t = FIELD_DP32(t, ID_PFR2, CSV3, 1); /* FEAT_CSV3 */
+ t = FIELD_DP32(t, ID_PFR2, SSBS, 1); /* FEAT_SSBS */
+ cpu->isar.id_pfr2 = t;
+
+ t = cpu->isar.id_dfr0;
+ t = FIELD_DP32(t, ID_DFR0, COPDBG, 9); /* FEAT_Debugv8p4 */
+ t = FIELD_DP32(t, ID_DFR0, COPSDBG, 9); /* FEAT_Debugv8p4 */
+ t = FIELD_DP32(t, ID_DFR0, PERFMON, 6); /* FEAT_PMUv3p5 */
+ cpu->isar.id_dfr0 = t;
+
+ t = cpu->isar.id_dfr1;
+ t = FIELD_DP32(t, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */
+ cpu->isar.id_dfr1 = t;
+}
+
+/* CPU models. These are not needed for the AArch64 linux-user build. */
+#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
+
+static void arm926_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,arm926";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN);
+ cpu->midr = 0x41069265;
+ cpu->reset_fpsid = 0x41011090;
+ cpu->ctr = 0x1dd20d2;
+ cpu->reset_sctlr = 0x00090078;
+
+ /*
+ * ARMv5 does not have the ID_ISAR registers, but we can still
+ * set the field to indicate Jazelle support within QEMU.
+ */
+ cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1);
+ /*
+ * Similarly, we need to set MVFR0 fields to enable vfp and short vector
+ * support even though ARMv5 doesn't have this register.
+ */
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1);
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1);
+}
+
+static void arm946_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,arm946";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_PMSA);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ cpu->midr = 0x41059461;
+ cpu->ctr = 0x0f004006;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void arm1026_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,arm1026";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_AUXCR);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ set_feature(&cpu->env, ARM_FEATURE_CACHE_TEST_CLEAN);
+ cpu->midr = 0x4106a262;
+ cpu->reset_fpsid = 0x410110a0;
+ cpu->ctr = 0x1dd20d2;
+ cpu->reset_sctlr = 0x00090078;
+ cpu->reset_auxcr = 1;
+
+ /*
+ * ARMv5 does not have the ID_ISAR registers, but we can still
+ * set the field to indicate Jazelle support within QEMU.
+ */
+ cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1);
+ /*
+ * Similarly, we need to set MVFR0 fields to enable vfp and short vector
+ * support even though ARMv5 doesn't have this register.
+ */
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSP, 1);
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1);
+
+ {
+ /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */
+ ARMCPRegInfo ifar = {
+ .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
+ .access = PL1_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.ifar_ns),
+ .resetvalue = 0
+ };
+ define_one_arm_cp_reg(cpu, &ifar);
+ }
+}
+
+static void arm1136_r2_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ /*
+ * What qemu calls "arm1136_r2" is actually the 1136 r0p2, ie an
+ * older core than plain "arm1136". In particular this does not
+ * have the v6K features.
+ * These ID register values are correct for 1136 but may be wrong
+ * for 1136_r2 (in particular r0p2 does not actually implement most
+ * of the ID registers).
+ */
+
+ cpu->dtb_compatible = "arm,arm1136";
+ set_feature(&cpu->env, ARM_FEATURE_V6);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG);
+ set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS);
+ cpu->midr = 0x4107b362;
+ cpu->reset_fpsid = 0x410120b4;
+ cpu->isar.mvfr0 = 0x11111111;
+ cpu->isar.mvfr1 = 0x00000000;
+ cpu->ctr = 0x1dd20d2;
+ cpu->reset_sctlr = 0x00050078;
+ cpu->isar.id_pfr0 = 0x111;
+ cpu->isar.id_pfr1 = 0x1;
+ cpu->isar.id_dfr0 = 0x2;
+ cpu->id_afr0 = 0x3;
+ cpu->isar.id_mmfr0 = 0x01130003;
+ cpu->isar.id_mmfr1 = 0x10030302;
+ cpu->isar.id_mmfr2 = 0x01222110;
+ cpu->isar.id_isar0 = 0x00140011;
+ cpu->isar.id_isar1 = 0x12002111;
+ cpu->isar.id_isar2 = 0x11231111;
+ cpu->isar.id_isar3 = 0x01102131;
+ cpu->isar.id_isar4 = 0x141;
+ cpu->reset_auxcr = 7;
+}
+
+static void arm1136_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,arm1136";
+ set_feature(&cpu->env, ARM_FEATURE_V6K);
+ set_feature(&cpu->env, ARM_FEATURE_V6);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG);
+ set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS);
+ cpu->midr = 0x4117b363;
+ cpu->reset_fpsid = 0x410120b4;
+ cpu->isar.mvfr0 = 0x11111111;
+ cpu->isar.mvfr1 = 0x00000000;
+ cpu->ctr = 0x1dd20d2;
+ cpu->reset_sctlr = 0x00050078;
+ cpu->isar.id_pfr0 = 0x111;
+ cpu->isar.id_pfr1 = 0x1;
+ cpu->isar.id_dfr0 = 0x2;
+ cpu->id_afr0 = 0x3;
+ cpu->isar.id_mmfr0 = 0x01130003;
+ cpu->isar.id_mmfr1 = 0x10030302;
+ cpu->isar.id_mmfr2 = 0x01222110;
+ cpu->isar.id_isar0 = 0x00140011;
+ cpu->isar.id_isar1 = 0x12002111;
+ cpu->isar.id_isar2 = 0x11231111;
+ cpu->isar.id_isar3 = 0x01102131;
+ cpu->isar.id_isar4 = 0x141;
+ cpu->reset_auxcr = 7;
+}
+
+static void arm1176_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,arm1176";
+ set_feature(&cpu->env, ARM_FEATURE_V6K);
+ set_feature(&cpu->env, ARM_FEATURE_VAPA);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ set_feature(&cpu->env, ARM_FEATURE_CACHE_DIRTY_REG);
+ set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ cpu->midr = 0x410fb767;
+ cpu->reset_fpsid = 0x410120b5;
+ cpu->isar.mvfr0 = 0x11111111;
+ cpu->isar.mvfr1 = 0x00000000;
+ cpu->ctr = 0x1dd20d2;
+ cpu->reset_sctlr = 0x00050078;
+ cpu->isar.id_pfr0 = 0x111;
+ cpu->isar.id_pfr1 = 0x11;
+ cpu->isar.id_dfr0 = 0x33;
+ cpu->id_afr0 = 0;
+ cpu->isar.id_mmfr0 = 0x01130003;
+ cpu->isar.id_mmfr1 = 0x10030302;
+ cpu->isar.id_mmfr2 = 0x01222100;
+ cpu->isar.id_isar0 = 0x0140011;
+ cpu->isar.id_isar1 = 0x12002111;
+ cpu->isar.id_isar2 = 0x11231121;
+ cpu->isar.id_isar3 = 0x01102131;
+ cpu->isar.id_isar4 = 0x01141;
+ cpu->reset_auxcr = 7;
+}
+
+static void arm11mpcore_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,arm11mpcore";
+ set_feature(&cpu->env, ARM_FEATURE_V6K);
+ set_feature(&cpu->env, ARM_FEATURE_VAPA);
+ set_feature(&cpu->env, ARM_FEATURE_MPIDR);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ cpu->midr = 0x410fb022;
+ cpu->reset_fpsid = 0x410120b4;
+ cpu->isar.mvfr0 = 0x11111111;
+ cpu->isar.mvfr1 = 0x00000000;
+ cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */
+ cpu->isar.id_pfr0 = 0x111;
+ cpu->isar.id_pfr1 = 0x1;
+ cpu->isar.id_dfr0 = 0;
+ cpu->id_afr0 = 0x2;
+ cpu->isar.id_mmfr0 = 0x01100103;
+ cpu->isar.id_mmfr1 = 0x10020302;
+ cpu->isar.id_mmfr2 = 0x01222000;
+ cpu->isar.id_isar0 = 0x00100011;
+ cpu->isar.id_isar1 = 0x12002111;
+ cpu->isar.id_isar2 = 0x11221011;
+ cpu->isar.id_isar3 = 0x01102131;
+ cpu->isar.id_isar4 = 0x141;
+ cpu->reset_auxcr = 1;
+}
+
+static const ARMCPRegInfo cortexa8_cp_reginfo[] = {
+ { .name = "L2LOCKDOWN", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "L2AUXCR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+};
+
+static void cortex_a8_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a8";
+ set_feature(&cpu->env, ARM_FEATURE_V7);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ cpu->midr = 0x410fc080;
+ cpu->reset_fpsid = 0x410330c0;
+ cpu->isar.mvfr0 = 0x11110222;
+ cpu->isar.mvfr1 = 0x00011111;
+ cpu->ctr = 0x82048004;
+ cpu->reset_sctlr = 0x00c50078;
+ cpu->isar.id_pfr0 = 0x1031;
+ cpu->isar.id_pfr1 = 0x11;
+ cpu->isar.id_dfr0 = 0x400;
+ cpu->id_afr0 = 0;
+ cpu->isar.id_mmfr0 = 0x31100003;
+ cpu->isar.id_mmfr1 = 0x20000000;
+ cpu->isar.id_mmfr2 = 0x01202000;
+ cpu->isar.id_mmfr3 = 0x11;
+ cpu->isar.id_isar0 = 0x00101111;
+ cpu->isar.id_isar1 = 0x12112111;
+ cpu->isar.id_isar2 = 0x21232031;
+ cpu->isar.id_isar3 = 0x11112131;
+ cpu->isar.id_isar4 = 0x00111142;
+ cpu->isar.dbgdidr = 0x15141000;
+ cpu->clidr = (1 << 27) | (2 << 24) | 3;
+ cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */
+ cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */
+ cpu->ccsidr[2] = 0xf0000000; /* No L2 icache. */
+ cpu->reset_auxcr = 2;
+ cpu->isar.reset_pmcr_el0 = 0x41002000;
+ define_arm_cp_regs(cpu, cortexa8_cp_reginfo);
+}
+
+static const ARMCPRegInfo cortexa9_cp_reginfo[] = {
+ /*
+ * power_control should be set to maximum latency. Again,
+ * default to 0 and set by private hook
+ */
+ { .name = "A9_PWRCTL", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0,
+ .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c15_power_control) },
+ { .name = "A9_DIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 1,
+ .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c15_diagnostic) },
+ { .name = "A9_PWRDIAG", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 2,
+ .access = PL1_RW, .resetvalue = 0,
+ .fieldoffset = offsetof(CPUARMState, cp15.c15_power_diagnostic) },
+ { .name = "NEONBUSY", .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0,
+ .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
+ /* TLB lockdown control */
+ { .name = "TLB_LOCKR", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 2,
+ .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP },
+ { .name = "TLB_LOCKW", .cp = 15, .crn = 15, .crm = 4, .opc1 = 5, .opc2 = 4,
+ .access = PL1_W, .resetvalue = 0, .type = ARM_CP_NOP },
+ { .name = "TLB_VA", .cp = 15, .crn = 15, .crm = 5, .opc1 = 5, .opc2 = 2,
+ .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
+ { .name = "TLB_PA", .cp = 15, .crn = 15, .crm = 6, .opc1 = 5, .opc2 = 2,
+ .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
+ { .name = "TLB_ATTR", .cp = 15, .crn = 15, .crm = 7, .opc1 = 5, .opc2 = 2,
+ .access = PL1_RW, .resetvalue = 0, .type = ARM_CP_CONST },
+};
+
+static void cortex_a9_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a9";
+ set_feature(&cpu->env, ARM_FEATURE_V7);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ /*
+ * Note that A9 supports the MP extensions even for
+ * A9UP and single-core A9MP (which are both different
+ * and valid configurations; we don't model A9UP).
+ */
+ set_feature(&cpu->env, ARM_FEATURE_V7MP);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR);
+ cpu->midr = 0x410fc090;
+ cpu->reset_fpsid = 0x41033090;
+ cpu->isar.mvfr0 = 0x11110222;
+ cpu->isar.mvfr1 = 0x01111111;
+ cpu->ctr = 0x80038003;
+ cpu->reset_sctlr = 0x00c50078;
+ cpu->isar.id_pfr0 = 0x1031;
+ cpu->isar.id_pfr1 = 0x11;
+ cpu->isar.id_dfr0 = 0x000;
+ cpu->id_afr0 = 0;
+ cpu->isar.id_mmfr0 = 0x00100103;
+ cpu->isar.id_mmfr1 = 0x20000000;
+ cpu->isar.id_mmfr2 = 0x01230000;
+ cpu->isar.id_mmfr3 = 0x00002111;
+ cpu->isar.id_isar0 = 0x00101111;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232041;
+ cpu->isar.id_isar3 = 0x11112131;
+ cpu->isar.id_isar4 = 0x00111142;
+ cpu->isar.dbgdidr = 0x35141000;
+ cpu->clidr = (1 << 27) | (1 << 24) | 3;
+ cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */
+ cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
+ cpu->isar.reset_pmcr_el0 = 0x41093000;
+ define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
+}
+
+#ifndef CONFIG_USER_ONLY
+static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+
+ /*
+ * Linux wants the number of processors from here.
+ * Might as well set the interrupt-controller bit too.
+ */
+ return ((ms->smp.cpus - 1) << 24) | (1 << 23);
+}
+#endif
+
+static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
+#ifndef CONFIG_USER_ONLY
+ { .name = "L2CTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 2,
+ .access = PL1_RW, .resetvalue = 0, .readfn = a15_l2ctlr_read,
+ .writefn = arm_cp_write_ignore, },
+#endif
+ { .name = "L2ECTLR", .cp = 15, .crn = 9, .crm = 0, .opc1 = 1, .opc2 = 3,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+};
+
+static void cortex_a7_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a7";
+ set_feature(&cpu->env, ARM_FEATURE_V7VE);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ cpu->midr = 0x410fc075;
+ cpu->reset_fpsid = 0x41023075;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x11111111;
+ cpu->ctr = 0x84448003;
+ cpu->reset_sctlr = 0x00c50078;
+ cpu->isar.id_pfr0 = 0x00001131;
+ cpu->isar.id_pfr1 = 0x00011011;
+ cpu->isar.id_dfr0 = 0x02010555;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x10101105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01240000;
+ cpu->isar.id_mmfr3 = 0x02102211;
+ /*
+ * a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but
+ * table 4-41 gives 0x02101110, which includes the arm div insns.
+ */
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232041;
+ cpu->isar.id_isar3 = 0x11112131;
+ cpu->isar.id_isar4 = 0x10011142;
+ cpu->isar.dbgdidr = 0x3515f005;
+ cpu->isar.dbgdevid = 0x01110f13;
+ cpu->isar.dbgdevid1 = 0x1;
+ cpu->clidr = 0x0a200023;
+ cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
+ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
+ cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
+ cpu->isar.reset_pmcr_el0 = 0x41072000;
+ define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
+}
+
+static void cortex_a15_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a15";
+ set_feature(&cpu->env, ARM_FEATURE_V7VE);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ /* r4p0 cpu, not requiring expensive tlb flush errata */
+ cpu->midr = 0x414fc0f0;
+ cpu->revidr = 0x0;
+ cpu->reset_fpsid = 0x410430f0;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x11111111;
+ cpu->ctr = 0x8444c004;
+ cpu->reset_sctlr = 0x00c50078;
+ cpu->isar.id_pfr0 = 0x00001131;
+ cpu->isar.id_pfr1 = 0x00011011;
+ cpu->isar.id_dfr0 = 0x02010555;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x20000000;
+ cpu->isar.id_mmfr2 = 0x01240000;
+ cpu->isar.id_mmfr3 = 0x02102211;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232041;
+ cpu->isar.id_isar3 = 0x11112131;
+ cpu->isar.id_isar4 = 0x10011142;
+ cpu->isar.dbgdidr = 0x3515f021;
+ cpu->isar.dbgdevid = 0x01110f13;
+ cpu->isar.dbgdevid1 = 0x0;
+ cpu->clidr = 0x0a200023;
+ cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
+ cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
+ cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
+ cpu->isar.reset_pmcr_el0 = 0x410F3000;
+ define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
+}
+
+static const ARMCPRegInfo cortexr5_cp_reginfo[] = {
+ /* Dummy the TCM region regs for the moment */
+ { .name = "ATCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST },
+ { .name = "BTCM", .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST },
+ { .name = "DCACHE_INVAL", .cp = 15, .opc1 = 0, .crn = 15, .crm = 5,
+ .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP },
+};
+
+static void cortex_r5_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ set_feature(&cpu->env, ARM_FEATURE_V7);
+ set_feature(&cpu->env, ARM_FEATURE_V7MP);
+ set_feature(&cpu->env, ARM_FEATURE_PMSA);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ cpu->midr = 0x411fc153; /* r1p3 */
+ cpu->isar.id_pfr0 = 0x0131;
+ cpu->isar.id_pfr1 = 0x001;
+ cpu->isar.id_dfr0 = 0x010400;
+ cpu->id_afr0 = 0x0;
+ cpu->isar.id_mmfr0 = 0x0210030;
+ cpu->isar.id_mmfr1 = 0x00000000;
+ cpu->isar.id_mmfr2 = 0x01200000;
+ cpu->isar.id_mmfr3 = 0x0211;
+ cpu->isar.id_isar0 = 0x02101111;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232141;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x0010142;
+ cpu->isar.id_isar5 = 0x0;
+ cpu->isar.id_isar6 = 0x0;
+ cpu->mp_is_up = true;
+ cpu->pmsav7_dregion = 16;
+ cpu->isar.reset_pmcr_el0 = 0x41151800;
+ define_arm_cp_regs(cpu, cortexr5_cp_reginfo);
+}
+
+static const ARMCPRegInfo cortex_r52_cp_reginfo[] = {
+ { .name = "CPUACTLR", .cp = 15, .opc1 = 0, .crm = 15,
+ .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+ { .name = "IMP_ATCMREGIONR",
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_BTCMREGIONR",
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_CTCMREGIONR",
+ .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_CSCTLR",
+ .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_BPCTLR",
+ .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_MEMPROTCLR",
+ .cp = 15, .opc1 = 1, .crn = 9, .crm = 1, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_SLAVEPCTLR",
+ .cp = 15, .opc1 = 0, .crn = 11, .crm = 0, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_PERIPHREGIONR",
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_FLASHIFREGIONR",
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_BUILDOPTR",
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_PINOPTR",
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_QOSR",
+ .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_BUSTIMEOUTR",
+ .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_INTMONR",
+ .cp = 15, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 4,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_ICERR0",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_ICERR1",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 0, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_DCERR0",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 1, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_DCERR1",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 1, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_TCMERR0",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_TCMERR1",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_TCMSYNDR0",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 2,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_TCMSYNDR1",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 2, .opc2 = 3,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_FLASHERR0",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 3, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_FLASHERR1",
+ .cp = 15, .opc1 = 2, .crn = 15, .crm = 3, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_CDBGDR0",
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_CBDGBR1",
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 0, .opc2 = 1,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_TESTR0",
+ .cp = 15, .opc1 = 4, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IMP_TESTR1",
+ .cp = 15, .opc1 = 4, .crn = 15, .crm = 0, .opc2 = 1,
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
+ { .name = "IMP_CDBGDCI",
+ .cp = 15, .opc1 = 0, .crn = 15, .crm = 15, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
+ { .name = "IMP_CDBGDCT",
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 2, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
+ { .name = "IMP_CDBGICT",
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 2, .opc2 = 1,
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
+ { .name = "IMP_CDBGDCD",
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 4, .opc2 = 0,
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
+ { .name = "IMP_CDBGICD",
+ .cp = 15, .opc1 = 3, .crn = 15, .crm = 4, .opc2 = 1,
+ .access = PL1_W, .type = ARM_CP_NOP, .resetvalue = 0 },
+};
+
+
+static void cortex_r52_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_PMSA);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_AUXCR);
+ cpu->midr = 0x411fd133; /* r1p3 */
+ cpu->revidr = 0x00000000;
+ cpu->reset_fpsid = 0x41034023;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x12111111;
+ cpu->isar.mvfr2 = 0x00000043;
+ cpu->ctr = 0x8144c004;
+ cpu->reset_sctlr = 0x30c50838;
+ cpu->isar.id_pfr0 = 0x00000131;
+ cpu->isar.id_pfr1 = 0x10111001;
+ cpu->isar.id_dfr0 = 0x03010006;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x00211040;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01200000;
+ cpu->isar.id_mmfr3 = 0xf0102211;
+ cpu->isar.id_mmfr4 = 0x00000010;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232142;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00010142;
+ cpu->isar.id_isar5 = 0x00010001;
+ cpu->isar.dbgdidr = 0x77168000;
+ cpu->clidr = (1 << 27) | (1 << 24) | 0x3;
+ cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
+ cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
+
+ cpu->pmsav7_dregion = 16;
+ cpu->pmsav8r_hdregion = 16;
+
+ define_arm_cp_regs(cpu, cortex_r52_cp_reginfo);
+}
+
+static void cortex_r5f_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cortex_r5_initfn(obj);
+ cpu->isar.mvfr0 = 0x10110221;
+ cpu->isar.mvfr1 = 0x00000011;
+}
+
+static void ti925t_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ set_feature(&cpu->env, ARM_FEATURE_V4T);
+ set_feature(&cpu->env, ARM_FEATURE_OMAPCP);
+ cpu->midr = ARM_CPUID_TI925T;
+ cpu->ctr = 0x5109149;
+ cpu->reset_sctlr = 0x00000070;
+}
+
+static void sa1100_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "intel,sa1100";
+ set_feature(&cpu->env, ARM_FEATURE_STRONGARM);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ cpu->midr = 0x4401A11B;
+ cpu->reset_sctlr = 0x00000070;
+}
+
+static void sa1110_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ set_feature(&cpu->env, ARM_FEATURE_STRONGARM);
+ set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+ cpu->midr = 0x6901B119;
+ cpu->reset_sctlr = 0x00000070;
+}
+
+static void pxa250_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ cpu->midr = 0x69052100;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa255_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ cpu->midr = 0x69052d00;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa260_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ cpu->midr = 0x69052903;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa261_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ cpu->midr = 0x69052d05;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa262_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ cpu->midr = 0x69052d06;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa270a0_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
+ cpu->midr = 0x69054110;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa270a1_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
+ cpu->midr = 0x69054111;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa270b0_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
+ cpu->midr = 0x69054112;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa270b1_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
+ cpu->midr = 0x69054113;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa270c0_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
+ cpu->midr = 0x69054114;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+static void pxa270c5_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "marvell,xscale";
+ set_feature(&cpu->env, ARM_FEATURE_V5);
+ set_feature(&cpu->env, ARM_FEATURE_XSCALE);
+ set_feature(&cpu->env, ARM_FEATURE_IWMMXT);
+ cpu->midr = 0x69054117;
+ cpu->ctr = 0xd172172;
+ cpu->reset_sctlr = 0x00000078;
+}
+
+#ifndef TARGET_AARCH64
+/*
+ * -cpu max: a CPU with as many features enabled as our emulation supports.
+ * The version of '-cpu max' for qemu-system-aarch64 is defined in cpu64.c;
+ * this only needs to handle 32 bits, and need not care about KVM.
+ */
+static void arm_max_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ /* aarch64_a57_initfn, advertising none of the aarch64 features */
+ cpu->dtb_compatible = "arm,cortex-a57";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ cpu->midr = 0x411fd070;
+ cpu->revidr = 0x00000000;
+ cpu->reset_fpsid = 0x41034070;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x12111111;
+ cpu->isar.mvfr2 = 0x00000043;
+ cpu->ctr = 0x8444c004;
+ cpu->reset_sctlr = 0x00c50838;
+ cpu->isar.id_pfr0 = 0x00000131;
+ cpu->isar.id_pfr1 = 0x00011011;
+ cpu->isar.id_dfr0 = 0x03010066;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x10101105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02102211;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00011142;
+ cpu->isar.id_isar5 = 0x00011121;
+ cpu->isar.id_isar6 = 0;
+ cpu->isar.dbgdidr = 0x3516d000;
+ cpu->isar.dbgdevid = 0x00110f13;
+ cpu->isar.dbgdevid1 = 0x2;
+ cpu->isar.reset_pmcr_el0 = 0x41013000;
+ cpu->clidr = 0x0a200023;
+ cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
+ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
+ cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
+ define_cortex_a72_a57_a53_cp_reginfo(cpu);
+
+ aa32_max_features(cpu);
+
+#ifdef CONFIG_USER_ONLY
+ /*
+ * Break with true ARMv8 and add back old-style VFP short-vector support.
+ * Only do this for user-mode, where -cpu max is the default, so that
+ * older v6 and v7 programs are more likely to work without adjustment.
+ */
+ cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1);
+#endif
+}
+#endif /* !TARGET_AARCH64 */
+
+static const ARMCPUInfo arm_tcg_cpus[] = {
+ { .name = "arm926", .initfn = arm926_initfn },
+ { .name = "arm946", .initfn = arm946_initfn },
+ { .name = "arm1026", .initfn = arm1026_initfn },
+ /*
+ * What QEMU calls "arm1136-r2" is actually the 1136 r0p2, i.e. an
+ * older core than plain "arm1136". In particular this does not
+ * have the v6K features.
+ */
+ { .name = "arm1136-r2", .initfn = arm1136_r2_initfn },
+ { .name = "arm1136", .initfn = arm1136_initfn },
+ { .name = "arm1176", .initfn = arm1176_initfn },
+ { .name = "arm11mpcore", .initfn = arm11mpcore_initfn },
+ { .name = "cortex-a7", .initfn = cortex_a7_initfn },
+ { .name = "cortex-a8", .initfn = cortex_a8_initfn },
+ { .name = "cortex-a9", .initfn = cortex_a9_initfn },
+ { .name = "cortex-a15", .initfn = cortex_a15_initfn },
+ { .name = "cortex-r5", .initfn = cortex_r5_initfn },
+ { .name = "cortex-r5f", .initfn = cortex_r5f_initfn },
+ { .name = "cortex-r52", .initfn = cortex_r52_initfn },
+ { .name = "ti925t", .initfn = ti925t_initfn },
+ { .name = "sa1100", .initfn = sa1100_initfn },
+ { .name = "sa1110", .initfn = sa1110_initfn },
+ { .name = "pxa250", .initfn = pxa250_initfn },
+ { .name = "pxa255", .initfn = pxa255_initfn },
+ { .name = "pxa260", .initfn = pxa260_initfn },
+ { .name = "pxa261", .initfn = pxa261_initfn },
+ { .name = "pxa262", .initfn = pxa262_initfn },
+ /* "pxa270" is an alias for "pxa270-a0" */
+ { .name = "pxa270", .initfn = pxa270a0_initfn },
+ { .name = "pxa270-a0", .initfn = pxa270a0_initfn },
+ { .name = "pxa270-a1", .initfn = pxa270a1_initfn },
+ { .name = "pxa270-b0", .initfn = pxa270b0_initfn },
+ { .name = "pxa270-b1", .initfn = pxa270b1_initfn },
+ { .name = "pxa270-c0", .initfn = pxa270c0_initfn },
+ { .name = "pxa270-c5", .initfn = pxa270c5_initfn },
+#ifndef TARGET_AARCH64
+ { .name = "max", .initfn = arm_max_initfn },
+#endif
+#ifdef CONFIG_USER_ONLY
+ { .name = "any", .initfn = arm_max_initfn },
+#endif
+};
+
+static const TypeInfo idau_interface_type_info = {
+ .name = TYPE_IDAU_INTERFACE,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(IDAUInterfaceClass),
+};
+
+static void arm_tcg_cpu_register_types(void)
+{
+ size_t i;
+
+ type_register_static(&idau_interface_type_info);
+ for (i = 0; i < ARRAY_SIZE(arm_tcg_cpus); ++i) {
+ arm_cpu_register(&arm_tcg_cpus[i]);
+ }
+}
+
+type_init(arm_tcg_cpu_register_types)
+
+#endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
new file mode 100644
index 0000000000..9f7a9f3d2c
--- /dev/null
+++ b/target/arm/tcg/cpu64.c
@@ -0,0 +1,1295 @@
+/*
+ * QEMU AArch64 TCG CPUs
+ *
+ * Copyright (c) 2013 Linaro Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "cpu.h"
+#include "qemu/module.h"
+#include "qapi/visitor.h"
+#include "hw/qdev-properties.h"
+#include "qemu/units.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "cpregs.h"
+
+static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize,
+ unsigned cachesize)
+{
+ unsigned lg_linesize = ctz32(linesize);
+ unsigned sets;
+
+ /*
+ * The 64-bit CCSIDR_EL1 format is:
+ * [55:32] number of sets - 1
+ * [23:3] associativity - 1
+ * [2:0] log2(linesize) - 4
+ * so 0 == 16 bytes, 1 == 32 bytes, 2 == 64 bytes, etc
+ */
+ assert(assoc != 0);
+ assert(is_power_of_2(linesize));
+ assert(lg_linesize >= 4 && lg_linesize <= 7 + 4);
+
+ /* sets * associativity * linesize == cachesize. */
+ sets = cachesize / (assoc * linesize);
+ assert(cachesize % (assoc * linesize) == 0);
+
+ return ((uint64_t)(sets - 1) << 32)
+ | ((assoc - 1) << 3)
+ | (lg_linesize - 4);
+}
+
+static void aarch64_a35_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a35";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+
+ /* From B2.2 AArch64 identification registers. */
+ cpu->midr = 0x411fd040;
+ cpu->revidr = 0;
+ cpu->ctr = 0x84448004;
+ cpu->isar.id_pfr0 = 0x00000131;
+ cpu->isar.id_pfr1 = 0x00011011;
+ cpu->isar.id_dfr0 = 0x03010066;
+ cpu->id_afr0 = 0;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02102211;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00011142;
+ cpu->isar.id_isar5 = 0x00011121;
+ cpu->isar.id_aa64pfr0 = 0x00002222;
+ cpu->isar.id_aa64pfr1 = 0;
+ cpu->isar.id_aa64dfr0 = 0x10305106;
+ cpu->isar.id_aa64dfr1 = 0;
+ cpu->isar.id_aa64isar0 = 0x00011120;
+ cpu->isar.id_aa64isar1 = 0;
+ cpu->isar.id_aa64mmfr0 = 0x00101122;
+ cpu->isar.id_aa64mmfr1 = 0;
+ cpu->clidr = 0x0a200023;
+ cpu->dcz_blocksize = 4;
+
+ /* From B2.4 AArch64 Virtual Memory control registers */
+ cpu->reset_sctlr = 0x00c50838;
+
+ /* From B2.10 AArch64 performance monitor registers */
+ cpu->isar.reset_pmcr_el0 = 0x410a3000;
+
+ /* From B2.29 Cache ID registers */
+ cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
+ cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */
+ cpu->ccsidr[2] = 0x703fe03a; /* 512KB L2 cache */
+
+ /* From B3.5 VGIC Type register */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+
+ /* From C6.4 Debug ID Register */
+ cpu->isar.dbgdidr = 0x3516d000;
+ /* From C6.5 Debug Device ID Register */
+ cpu->isar.dbgdevid = 0x00110f13;
+ /* From C6.6 Debug Device ID Register 1 */
+ cpu->isar.dbgdevid1 = 0x2;
+
+ /* From Cortex-A35 SIMD and Floating-point Support r1p0 */
+ /* From 3.2 AArch32 register summary */
+ cpu->reset_fpsid = 0x41034043;
+
+ /* From 2.2 AArch64 register summary */
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x12111111;
+ cpu->isar.mvfr2 = 0x00000043;
+
+ /* These values are the same with A53/A57/A72. */
+ define_cortex_a72_a57_a53_cp_reginfo(cpu);
+}
+
+static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint32_t value;
+
+ /* All vector lengths are disabled when SVE is off. */
+ if (!cpu_isar_feature(aa64_sve, cpu)) {
+ value = 0;
+ } else {
+ value = cpu->sve_max_vq;
+ }
+ visit_type_uint32(v, name, &value, errp);
+}
+
+static void cpu_max_set_sve_max_vq(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint32_t max_vq;
+
+ if (!visit_type_uint32(v, name, &max_vq, errp)) {
+ return;
+ }
+
+ if (max_vq == 0 || max_vq > ARM_MAX_VQ) {
+ error_setg(errp, "unsupported SVE vector length");
+ error_append_hint(errp, "Valid sve-max-vq in range [1-%d]\n",
+ ARM_MAX_VQ);
+ return;
+ }
+
+ cpu->sve_max_vq = max_vq;
+}
+
+static bool cpu_arm_get_rme(Object *obj, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ return cpu_isar_feature(aa64_rme, cpu);
+}
+
+static void cpu_arm_set_rme(Object *obj, bool value, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint64_t t;
+
+ t = cpu->isar.id_aa64pfr0;
+ t = FIELD_DP64(t, ID_AA64PFR0, RME, value);
+ cpu->isar.id_aa64pfr0 = t;
+}
+
+static void cpu_max_set_l0gptsz(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint32_t value;
+
+ if (!visit_type_uint32(v, name, &value, errp)) {
+ return;
+ }
+
+ /* Encode the value for the GPCCR_EL3 field. */
+ switch (value) {
+ case 30:
+ case 34:
+ case 36:
+ case 39:
+ cpu->reset_l0gptsz = value - 30;
+ break;
+ default:
+ error_setg(errp, "invalid value for l0gptsz");
+ error_append_hint(errp, "valid values are 30, 34, 36, 39\n");
+ break;
+ }
+}
+
+static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint32_t value = cpu->reset_l0gptsz + 30;
+
+ visit_type_uint32(v, name, &value, errp);
+}
+
+static Property arm_cpu_lpa2_property =
+ DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true);
+
+static void aarch64_a55_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a55";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+
+ /* Ordered by B2.4 AArch64 registers by functional group */
+ cpu->clidr = 0x82000023;
+ cpu->ctr = 0x84448004; /* L1Ip = VIPT */
+ cpu->dcz_blocksize = 4; /* 64 bytes */
+ cpu->isar.id_aa64dfr0 = 0x0000000010305408ull;
+ cpu->isar.id_aa64isar0 = 0x0000100010211120ull;
+ cpu->isar.id_aa64isar1 = 0x0000000000100001ull;
+ cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull;
+ cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
+ cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull;
+ cpu->isar.id_aa64pfr0 = 0x0000000010112222ull;
+ cpu->isar.id_aa64pfr1 = 0x0000000000000010ull;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_dfr0 = 0x04010088;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00011142;
+ cpu->isar.id_isar5 = 0x01011121;
+ cpu->isar.id_isar6 = 0x00000010;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02122211;
+ cpu->isar.id_mmfr4 = 0x00021110;
+ cpu->isar.id_pfr0 = 0x10010131;
+ cpu->isar.id_pfr1 = 0x00011011;
+ cpu->isar.id_pfr2 = 0x00000011;
+ cpu->midr = 0x412FD050; /* r2p0 */
+ cpu->revidr = 0;
+
+ /* From B2.23 CCSIDR_EL1 */
+ cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */
+ cpu->ccsidr[1] = 0x200fe01a; /* 32KB L1 icache */
+ cpu->ccsidr[2] = 0x703fe07a; /* 512KB L2 cache */
+
+ /* From B2.96 SCTLR_EL3 */
+ cpu->reset_sctlr = 0x30c50838;
+
+ /* From B4.45 ICH_VTR_EL2 */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x13211111;
+ cpu->isar.mvfr2 = 0x00000043;
+
+ /* From D5.4 AArch64 PMU register summary */
+ cpu->isar.reset_pmcr_el0 = 0x410b3000;
+}
+
+static void aarch64_a72_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a72";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ cpu->midr = 0x410fd083;
+ cpu->revidr = 0x00000000;
+ cpu->reset_fpsid = 0x41034080;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x12111111;
+ cpu->isar.mvfr2 = 0x00000043;
+ cpu->ctr = 0x8444c004;
+ cpu->reset_sctlr = 0x00c50838;
+ cpu->isar.id_pfr0 = 0x00000131;
+ cpu->isar.id_pfr1 = 0x00011011;
+ cpu->isar.id_dfr0 = 0x03010066;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02102211;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00011142;
+ cpu->isar.id_isar5 = 0x00011121;
+ cpu->isar.id_aa64pfr0 = 0x00002222;
+ cpu->isar.id_aa64dfr0 = 0x10305106;
+ cpu->isar.id_aa64isar0 = 0x00011120;
+ cpu->isar.id_aa64mmfr0 = 0x00001124;
+ cpu->isar.dbgdidr = 0x3516d000;
+ cpu->isar.dbgdevid = 0x01110f13;
+ cpu->isar.dbgdevid1 = 0x2;
+ cpu->isar.reset_pmcr_el0 = 0x41023000;
+ cpu->clidr = 0x0a200023;
+ cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
+ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
+ cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */
+ cpu->dcz_blocksize = 4; /* 64 bytes */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+ define_cortex_a72_a57_a53_cp_reginfo(cpu);
+}
+
+static void aarch64_a76_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a76";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+
+ /* Ordered by B2.4 AArch64 registers by functional group */
+ cpu->clidr = 0x82000023;
+ cpu->ctr = 0x8444C004;
+ cpu->dcz_blocksize = 4;
+ cpu->isar.id_aa64dfr0 = 0x0000000010305408ull;
+ cpu->isar.id_aa64isar0 = 0x0000100010211120ull;
+ cpu->isar.id_aa64isar1 = 0x0000000000100001ull;
+ cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull;
+ cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
+ cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull;
+ cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */
+ cpu->isar.id_aa64pfr1 = 0x0000000000000010ull;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_dfr0 = 0x04010088;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00010142;
+ cpu->isar.id_isar5 = 0x01011121;
+ cpu->isar.id_isar6 = 0x00000010;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02122211;
+ cpu->isar.id_mmfr4 = 0x00021110;
+ cpu->isar.id_pfr0 = 0x10010131;
+ cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
+ cpu->isar.id_pfr2 = 0x00000011;
+ cpu->midr = 0x414fd0b1; /* r4p1 */
+ cpu->revidr = 0;
+
+ /* From B2.18 CCSIDR_EL1 */
+ cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */
+ cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */
+ cpu->ccsidr[2] = 0x707fe03a; /* 512KB L2 cache */
+
+ /* From B2.93 SCTLR_EL3 */
+ cpu->reset_sctlr = 0x30c50838;
+
+ /* From B4.23 ICH_VTR_EL2 */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+
+ /* From B5.1 AdvSIMD AArch64 register summary */
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x13211111;
+ cpu->isar.mvfr2 = 0x00000043;
+
+ /* From D5.1 AArch64 PMU register summary */
+ cpu->isar.reset_pmcr_el0 = 0x410b3000;
+}
+
+static void aarch64_a64fx_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,a64fx";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+ cpu->midr = 0x461f0010;
+ cpu->revidr = 0x00000000;
+ cpu->ctr = 0x86668006;
+ cpu->reset_sctlr = 0x30000180;
+ cpu->isar.id_aa64pfr0 = 0x0000000101111111; /* No RAS Extensions */
+ cpu->isar.id_aa64pfr1 = 0x0000000000000000;
+ cpu->isar.id_aa64dfr0 = 0x0000000010305408;
+ cpu->isar.id_aa64dfr1 = 0x0000000000000000;
+ cpu->id_aa64afr0 = 0x0000000000000000;
+ cpu->id_aa64afr1 = 0x0000000000000000;
+ cpu->isar.id_aa64mmfr0 = 0x0000000000001122;
+ cpu->isar.id_aa64mmfr1 = 0x0000000011212100;
+ cpu->isar.id_aa64mmfr2 = 0x0000000000001011;
+ cpu->isar.id_aa64isar0 = 0x0000000010211120;
+ cpu->isar.id_aa64isar1 = 0x0000000000010001;
+ cpu->isar.id_aa64zfr0 = 0x0000000000000000;
+ cpu->clidr = 0x0000000080000023;
+ cpu->ccsidr[0] = 0x7007e01c; /* 64KB L1 dcache */
+ cpu->ccsidr[1] = 0x2007e01c; /* 64KB L1 icache */
+ cpu->ccsidr[2] = 0x70ffe07c; /* 8MB L2 cache */
+ cpu->dcz_blocksize = 6; /* 256 bytes */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+
+ /* The A64FX supports only 128, 256 and 512 bit vector lengths */
+ aarch64_add_sve_properties(obj);
+ cpu->sve_vq.supported = (1 << 0) /* 128bit */
+ | (1 << 1) /* 256bit */
+ | (1 << 3); /* 512bit */
+
+ cpu->isar.reset_pmcr_el0 = 0x46014040;
+
+ /* TODO: Add A64FX specific HPC extension registers */
+}
+
+static CPAccessResult access_actlr_w(CPUARMState *env, const ARMCPRegInfo *r,
+ bool read)
+{
+ if (!read) {
+ int el = arm_current_el(env);
+
+ /* Because ACTLR_EL2 is constant 0, writes below EL2 trap to EL2. */
+ if (el < 2 && arm_is_el2_enabled(env)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ /* Because ACTLR_EL3 is constant 0, writes below EL3 trap to EL3. */
+ if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ }
+ return CP_ACCESS_OK;
+}
+
+static const ARMCPRegInfo neoverse_n1_cp_reginfo[] = {
+ { .name = "ATCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 7, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ /* Traps and enables are the same as for TCR_EL1. */
+ .accessfn = access_tvm_trvm, .fgt = FGT_TCR_EL1, },
+ { .name = "ATCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 0,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "ATCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 7, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "ATCR_EL12", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 5, .crn = 15, .crm = 7, .opc2 = 0,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "AVTCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 1,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUACTLR2_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUACTLR3_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ /*
+ * Report CPUCFR_EL1.SCU as 1, as we do not implement the DSU
+ * (and in particular its system registers).
+ */
+ { .name = "CPUCFR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 4 },
+ { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 4,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0x961563010,
+ .accessfn = access_actlr_w },
+ { .name = "CPUPCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 1,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPMR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 3,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPOR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 2,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPSELR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPWRCTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "ERXPFGCDN_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "ERXPFGCTL_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "ERXPFGF_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+};
+
+static void define_neoverse_n1_cp_reginfo(ARMCPU *cpu)
+{
+ define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo);
+}
+
+static const ARMCPRegInfo neoverse_v1_cp_reginfo[] = {
+ { .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPPMCR3_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+};
+
+static void define_neoverse_v1_cp_reginfo(ARMCPU *cpu)
+{
+ /*
+ * The Neoverse V1 has all of the Neoverse N1's IMPDEF
+ * registers and a few more of its own.
+ */
+ define_arm_cp_regs(cpu, neoverse_n1_cp_reginfo);
+ define_arm_cp_regs(cpu, neoverse_v1_cp_reginfo);
+}
+
+static void aarch64_neoverse_n1_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,neoverse-n1";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+
+ /* Ordered by B2.4 AArch64 registers by functional group */
+ cpu->clidr = 0x82000023;
+ cpu->ctr = 0x8444c004;
+ cpu->dcz_blocksize = 4;
+ cpu->isar.id_aa64dfr0 = 0x0000000110305408ull;
+ cpu->isar.id_aa64isar0 = 0x0000100010211120ull;
+ cpu->isar.id_aa64isar1 = 0x0000000000100001ull;
+ cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull;
+ cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
+ cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull;
+ cpu->isar.id_aa64pfr0 = 0x1100000010111112ull; /* GIC filled in later */
+ cpu->isar.id_aa64pfr1 = 0x0000000000000020ull;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_dfr0 = 0x04010088;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00010142;
+ cpu->isar.id_isar5 = 0x01011121;
+ cpu->isar.id_isar6 = 0x00000010;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02122211;
+ cpu->isar.id_mmfr4 = 0x00021110;
+ cpu->isar.id_pfr0 = 0x10010131;
+ cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
+ cpu->isar.id_pfr2 = 0x00000011;
+ cpu->midr = 0x414fd0c1; /* r4p1 */
+ cpu->revidr = 0;
+
+ /* From B2.23 CCSIDR_EL1 */
+ cpu->ccsidr[0] = 0x701fe01a; /* 64KB L1 dcache */
+ cpu->ccsidr[1] = 0x201fe01a; /* 64KB L1 icache */
+ cpu->ccsidr[2] = 0x70ffe03a; /* 1MB L2 cache */
+
+ /* From B2.98 SCTLR_EL3 */
+ cpu->reset_sctlr = 0x30c50838;
+
+ /* From B4.23 ICH_VTR_EL2 */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+
+ /* From B5.1 AdvSIMD AArch64 register summary */
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x13211111;
+ cpu->isar.mvfr2 = 0x00000043;
+
+ /* From D5.1 AArch64 PMU register summary */
+ cpu->isar.reset_pmcr_el0 = 0x410c3000;
+
+ define_neoverse_n1_cp_reginfo(cpu);
+}
+
+static void aarch64_neoverse_v1_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,neoverse-v1";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+
+ /* Ordered by 3.2.4 AArch64 registers by functional group */
+ cpu->clidr = 0x82000023;
+ cpu->ctr = 0xb444c004; /* With DIC and IDC set */
+ cpu->dcz_blocksize = 4;
+ cpu->id_aa64afr0 = 0x00000000;
+ cpu->id_aa64afr1 = 0x00000000;
+ cpu->isar.id_aa64dfr0 = 0x000001f210305519ull;
+ cpu->isar.id_aa64dfr1 = 0x00000000;
+ cpu->isar.id_aa64isar0 = 0x1011111110212120ull; /* with FEAT_RNG */
+ cpu->isar.id_aa64isar1 = 0x0111000001211032ull;
+ cpu->isar.id_aa64mmfr0 = 0x0000000000101125ull;
+ cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
+ cpu->isar.id_aa64mmfr2 = 0x0220011102101011ull;
+ cpu->isar.id_aa64pfr0 = 0x1101110120111112ull; /* GIC filled in later */
+ cpu->isar.id_aa64pfr1 = 0x0000000000000020ull;
+ cpu->id_afr0 = 0x00000000;
+ cpu->isar.id_dfr0 = 0x15011099;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00010142;
+ cpu->isar.id_isar5 = 0x11011121;
+ cpu->isar.id_isar6 = 0x01100111;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02122211;
+ cpu->isar.id_mmfr4 = 0x01021110;
+ cpu->isar.id_pfr0 = 0x21110131;
+ cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
+ cpu->isar.id_pfr2 = 0x00000011;
+ cpu->midr = 0x411FD402; /* r1p2 */
+ cpu->revidr = 0;
+
+ /*
+ * The Neoverse-V1 r1p2 TRM lists 32-bit format CCSIDR_EL1 values,
+ * but also says it implements CCIDX, which means they should be
+ * 64-bit format. So we here use values which are based on the textual
+ * information in chapter 2 of the TRM:
+ *
+ * L1: 4-way set associative 64-byte line size, total size 64K.
+ * L2: 8-way set associative, 64 byte line size, either 512K or 1MB.
+ * L3: No L3 (this matches the CLIDR_EL1 value).
+ */
+ cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */
+ cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */
+ cpu->ccsidr[2] = make_ccsidr64(8, 64, 1 * MiB); /* L2 cache */
+
+ /* From 3.2.115 SCTLR_EL3 */
+ cpu->reset_sctlr = 0x30c50838;
+
+ /* From 3.4.8 ICC_CTLR_EL3 and 3.4.23 ICH_VTR_EL2 */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+
+ /* From 3.5.1 AdvSIMD AArch64 register summary */
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x13211111;
+ cpu->isar.mvfr2 = 0x00000043;
+
+ /* From 3.7.5 ID_AA64ZFR0_EL1 */
+ cpu->isar.id_aa64zfr0 = 0x0000100000100000;
+ cpu->sve_vq.supported = (1 << 0) /* 128bit */
+ | (1 << 1); /* 256bit */
+
+ /* From 5.5.1 AArch64 PMU register summary */
+ cpu->isar.reset_pmcr_el0 = 0x41213000;
+
+ define_neoverse_v1_cp_reginfo(cpu);
+
+ aarch64_add_pauth_properties(obj);
+ aarch64_add_sve_properties(obj);
+}
+
+static const ARMCPRegInfo cortex_a710_cp_reginfo[] = {
+ { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUACTLR2_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUACTLR3_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUACTLR4_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 3,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 4,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 4,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPWRCTLR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "ATCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 7, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUACTLR5_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 0,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUACTLR6_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 1,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "CPUACTLR7_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 2,
+ .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0,
+ .accessfn = access_actlr_w },
+ { .name = "ATCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 0,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "AVTCR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 1,
+ .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPPMCR4_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 4,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPPMCR5_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 5,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPPMCR6_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUACTLR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 4, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "ATCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 7, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPSELR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPCR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 1,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPOR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 2,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPMR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 3,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPOR2_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 4,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPMR2_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 5,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPUPFR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 6,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ /*
+ * Report CPUCFR_EL1.SCU as 1, as we do not implement the DSU
+ * (and in particular its system registers).
+ */
+ { .name = "CPUCFR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 4 },
+
+ /*
+ * Stub RAMINDEX, as we don't actually implement caches, BTB,
+ * or anything else with cpu internal memory.
+ * "Read" zeros into the IDATA* and DDATA* output registers.
+ */
+ { .name = "RAMINDEX_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL3_W, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IDATA0_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 0,
+ .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IDATA1_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 1,
+ .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "IDATA2_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 2,
+ .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "DDATA0_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 0,
+ .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "DDATA1_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 1,
+ .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "DDATA2_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 2,
+ .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+};
+
+static void aarch64_a710_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,cortex-a710";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+
+ /* Ordered by Section B.4: AArch64 registers */
+ cpu->midr = 0x412FD471; /* r2p1 */
+ cpu->revidr = 0;
+ cpu->isar.id_pfr0 = 0x21110131;
+ cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
+ cpu->isar.id_dfr0 = 0x16011099;
+ cpu->id_afr0 = 0;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02122211;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00010142;
+ cpu->isar.id_isar5 = 0x11011121; /* with Crypto */
+ cpu->isar.id_mmfr4 = 0x21021110;
+ cpu->isar.id_isar6 = 0x01111111;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x13211111;
+ cpu->isar.mvfr2 = 0x00000043;
+ cpu->isar.id_pfr2 = 0x00000011;
+ cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */
+ cpu->isar.id_aa64pfr1 = 0x0000000000000221ull;
+ cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */
+ cpu->isar.id_aa64dfr0 = 0x000011f010305619ull;
+ cpu->isar.id_aa64dfr1 = 0;
+ cpu->id_aa64afr0 = 0;
+ cpu->id_aa64afr1 = 0;
+ cpu->isar.id_aa64isar0 = 0x0221111110212120ull; /* with Crypto */
+ cpu->isar.id_aa64isar1 = 0x0010111101211052ull;
+ cpu->isar.id_aa64mmfr0 = 0x0000022200101122ull;
+ cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
+ cpu->isar.id_aa64mmfr2 = 0x1221011110101011ull;
+ cpu->clidr = 0x0000001482000023ull;
+ cpu->gm_blocksize = 4;
+ cpu->ctr = 0x000000049444c004ull;
+ cpu->dcz_blocksize = 4;
+ /* TODO FEAT_MPAM: mpamidr_el1 = 0x0000_0001_0006_003f */
+
+ /* Section B.5.2: PMCR_EL0 */
+ cpu->isar.reset_pmcr_el0 = 0xa000; /* with 20 counters */
+
+ /* Section B.6.7: ICH_VTR_EL2 */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+
+ /* Section 14: Scalable Vector Extensions support */
+ cpu->sve_vq.supported = 1 << 0; /* 128bit */
+
+ /*
+ * The cortex-a710 TRM does not list CCSIDR values. The layout of
+ * the caches are in text in Table 7-1, Table 8-1, and Table 9-1.
+ *
+ * L1: 4-way set associative 64-byte line size, total either 32K or 64K.
+ * L2: 8-way set associative 64 byte line size, total either 256K or 512K.
+ */
+ cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */
+ cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */
+ cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */
+
+ /* FIXME: Not documented -- copied from neoverse-v1 */
+ cpu->reset_sctlr = 0x30c50838;
+
+ define_arm_cp_regs(cpu, cortex_a710_cp_reginfo);
+
+ aarch64_add_pauth_properties(obj);
+ aarch64_add_sve_properties(obj);
+}
+
+/* Extra IMPDEF regs in the N2 beyond those in the A710 */
+static const ARMCPRegInfo neoverse_n2_cp_reginfo[] = {
+ { .name = "CPURNDBR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 3, .opc2 = 0,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+ { .name = "CPURNDPEID_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 3, .opc2 = 1,
+ .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+};
+
+static void aarch64_neoverse_n2_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+
+ cpu->dtb_compatible = "arm,neoverse-n2";
+ set_feature(&cpu->env, ARM_FEATURE_V8);
+ set_feature(&cpu->env, ARM_FEATURE_NEON);
+ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+ set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_EL2);
+ set_feature(&cpu->env, ARM_FEATURE_EL3);
+ set_feature(&cpu->env, ARM_FEATURE_PMU);
+
+ /* Ordered by Section B.5: AArch64 ID registers */
+ cpu->midr = 0x410FD493; /* r0p3 */
+ cpu->revidr = 0;
+ cpu->isar.id_pfr0 = 0x21110131;
+ cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */
+ cpu->isar.id_dfr0 = 0x16011099;
+ cpu->id_afr0 = 0;
+ cpu->isar.id_mmfr0 = 0x10201105;
+ cpu->isar.id_mmfr1 = 0x40000000;
+ cpu->isar.id_mmfr2 = 0x01260000;
+ cpu->isar.id_mmfr3 = 0x02122211;
+ cpu->isar.id_isar0 = 0x02101110;
+ cpu->isar.id_isar1 = 0x13112111;
+ cpu->isar.id_isar2 = 0x21232042;
+ cpu->isar.id_isar3 = 0x01112131;
+ cpu->isar.id_isar4 = 0x00010142;
+ cpu->isar.id_isar5 = 0x11011121; /* with Crypto */
+ cpu->isar.id_mmfr4 = 0x01021110;
+ cpu->isar.id_isar6 = 0x01111111;
+ cpu->isar.mvfr0 = 0x10110222;
+ cpu->isar.mvfr1 = 0x13211111;
+ cpu->isar.mvfr2 = 0x00000043;
+ cpu->isar.id_pfr2 = 0x00000011;
+ cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */
+ cpu->isar.id_aa64pfr1 = 0x0000000000000221ull;
+ cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */
+ cpu->isar.id_aa64dfr0 = 0x000011f210305619ull;
+ cpu->isar.id_aa64dfr1 = 0;
+ cpu->id_aa64afr0 = 0;
+ cpu->id_aa64afr1 = 0;
+ cpu->isar.id_aa64isar0 = 0x1221111110212120ull; /* with Crypto and FEAT_RNG */
+ cpu->isar.id_aa64isar1 = 0x0011111101211052ull;
+ cpu->isar.id_aa64mmfr0 = 0x0000022200101125ull;
+ cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull;
+ cpu->isar.id_aa64mmfr2 = 0x1221011112101011ull;
+ cpu->clidr = 0x0000001482000023ull;
+ cpu->gm_blocksize = 4;
+ cpu->ctr = 0x00000004b444c004ull;
+ cpu->dcz_blocksize = 4;
+ /* TODO FEAT_MPAM: mpamidr_el1 = 0x0000_0001_001e_01ff */
+
+ /* Section B.7.2: PMCR_EL0 */
+ cpu->isar.reset_pmcr_el0 = 0x3000; /* with 6 counters */
+
+ /* Section B.8.9: ICH_VTR_EL2 */
+ cpu->gic_num_lrs = 4;
+ cpu->gic_vpribits = 5;
+ cpu->gic_vprebits = 5;
+ cpu->gic_pribits = 5;
+
+ /* Section 14: Scalable Vector Extensions support */
+ cpu->sve_vq.supported = 1 << 0; /* 128bit */
+
+ /*
+ * The Neoverse N2 TRM does not list CCSIDR values. The layout of
+ * the caches are in text in Table 7-1, Table 8-1, and Table 9-1.
+ *
+ * L1: 4-way set associative 64-byte line size, total 64K.
+ * L2: 8-way set associative 64 byte line size, total either 512K or 1024K.
+ */
+ cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */
+ cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */
+ cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */
+
+ /* FIXME: Not documented -- copied from neoverse-v1 */
+ cpu->reset_sctlr = 0x30c50838;
+
+ /*
+ * The Neoverse N2 has all of the Cortex-A710 IMPDEF registers,
+ * and a few more RNG related ones.
+ */
+ define_arm_cp_regs(cpu, cortex_a710_cp_reginfo);
+ define_arm_cp_regs(cpu, neoverse_n2_cp_reginfo);
+
+ aarch64_add_pauth_properties(obj);
+ aarch64_add_sve_properties(obj);
+}
+
+/*
+ * -cpu max: a CPU with as many features enabled as our emulation supports.
+ * The version of '-cpu max' for qemu-system-arm is defined in cpu32.c;
+ * this only needs to handle 64 bits.
+ */
+void aarch64_max_tcg_initfn(Object *obj)
+{
+ ARMCPU *cpu = ARM_CPU(obj);
+ uint64_t t;
+ uint32_t u;
+
+ /*
+ * Reset MIDR so the guest doesn't mistake our 'max' CPU type for a real
+ * one and try to apply errata workarounds or use impdef features we
+ * don't provide.
+ * An IMPLEMENTER field of 0 means "reserved for software use";
+ * ARCHITECTURE must be 0xf indicating "v7 or later, check ID registers
+ * to see which features are present";
+ * the VARIANT, PARTNUM and REVISION fields are all implementation
+ * defined and we choose to define PARTNUM just in case guest
+ * code needs to distinguish this QEMU CPU from other software
+ * implementations, though this shouldn't be needed.
+ */
+ t = FIELD_DP64(0, MIDR_EL1, IMPLEMENTER, 0);
+ t = FIELD_DP64(t, MIDR_EL1, ARCHITECTURE, 0xf);
+ t = FIELD_DP64(t, MIDR_EL1, PARTNUM, 'Q');
+ t = FIELD_DP64(t, MIDR_EL1, VARIANT, 0);
+ t = FIELD_DP64(t, MIDR_EL1, REVISION, 0);
+ cpu->midr = t;
+
+ /*
+ * We're going to set FEAT_S2FWB, which mandates that CLIDR_EL1.{LoUU,LoUIS}
+ * are zero.
+ */
+ u = cpu->clidr;
+ u = FIELD_DP32(u, CLIDR_EL1, LOUIS, 0);
+ u = FIELD_DP32(u, CLIDR_EL1, LOUU, 0);
+ cpu->clidr = u;
+
+ /*
+ * Set CTR_EL0.DIC and IDC to tell the guest it doesnt' need to
+ * do any cache maintenance for data-to-instruction or
+ * instruction-to-guest coherence. (Our cache ops are nops.)
+ */
+ t = cpu->ctr;
+ t = FIELD_DP64(t, CTR_EL0, IDC, 1);
+ t = FIELD_DP64(t, CTR_EL0, DIC, 1);
+ cpu->ctr = t;
+
+ t = cpu->isar.id_aa64isar0;
+ t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* FEAT_PMULL */
+ t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); /* FEAT_SHA1 */
+ t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* FEAT_SHA512 */
+ t = FIELD_DP64(t, ID_AA64ISAR0, CRC32, 1); /* FEAT_CRC32 */
+ t = FIELD_DP64(t, ID_AA64ISAR0, ATOMIC, 2); /* FEAT_LSE */
+ t = FIELD_DP64(t, ID_AA64ISAR0, RDM, 1); /* FEAT_RDM */
+ t = FIELD_DP64(t, ID_AA64ISAR0, SHA3, 1); /* FEAT_SHA3 */
+ t = FIELD_DP64(t, ID_AA64ISAR0, SM3, 1); /* FEAT_SM3 */
+ t = FIELD_DP64(t, ID_AA64ISAR0, SM4, 1); /* FEAT_SM4 */
+ t = FIELD_DP64(t, ID_AA64ISAR0, DP, 1); /* FEAT_DotProd */
+ t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 1); /* FEAT_FHM */
+ t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* FEAT_FlagM2 */
+ t = FIELD_DP64(t, ID_AA64ISAR0, TLB, 2); /* FEAT_TLBIRANGE */
+ t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); /* FEAT_RNG */
+ cpu->isar.id_aa64isar0 = t;
+
+ t = cpu->isar.id_aa64isar1;
+ t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2); /* FEAT_DPB2 */
+ t = FIELD_DP64(t, ID_AA64ISAR1, APA, PauthFeat_FPACCOMBINED);
+ t = FIELD_DP64(t, ID_AA64ISAR1, API, 1);
+ t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1); /* FEAT_JSCVT */
+ t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); /* FEAT_FCMA */
+ t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 2); /* FEAT_LRCPC2 */
+ t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); /* FEAT_FRINTTS */
+ t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); /* FEAT_SB */
+ t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); /* FEAT_SPECRES */
+ t = FIELD_DP64(t, ID_AA64ISAR1, BF16, 1); /* FEAT_BF16 */
+ t = FIELD_DP64(t, ID_AA64ISAR1, DGH, 1); /* FEAT_DGH */
+ t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */
+ cpu->isar.id_aa64isar1 = t;
+
+ t = cpu->isar.id_aa64isar2;
+ t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */
+ t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */
+ cpu->isar.id_aa64isar2 = t;
+
+ t = cpu->isar.id_aa64pfr0;
+ t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */
+ t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */
+ t = FIELD_DP64(t, ID_AA64PFR0, RAS, 2); /* FEAT_RASv1p1 + FEAT_DoubleFault */
+ t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
+ t = FIELD_DP64(t, ID_AA64PFR0, SEL2, 1); /* FEAT_SEL2 */
+ t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */
+ t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 2); /* FEAT_CSV2_2 */
+ t = FIELD_DP64(t, ID_AA64PFR0, CSV3, 1); /* FEAT_CSV3 */
+ cpu->isar.id_aa64pfr0 = t;
+
+ t = cpu->isar.id_aa64pfr1;
+ t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); /* FEAT_BTI */
+ t = FIELD_DP64(t, ID_AA64PFR1, SSBS, 2); /* FEAT_SSBS2 */
+ /*
+ * Begin with full support for MTE. This will be downgraded to MTE=0
+ * during realize if the board provides no tag memory, much like
+ * we do for EL2 with the virtualization=on property.
+ */
+ t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */
+ t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */
+ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */
+ t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
+ cpu->isar.id_aa64pfr1 = t;
+
+ t = cpu->isar.id_aa64mmfr0;
+ t = FIELD_DP64(t, ID_AA64MMFR0, PARANGE, 6); /* FEAT_LPA: 52 bits */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16, 1); /* 16k pages supported */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN16_2, 2); /* 16k stage2 supported */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN64_2, 2); /* 64k stage2 supported */
+ t = FIELD_DP64(t, ID_AA64MMFR0, TGRAN4_2, 2); /* 4k stage2 supported */
+ t = FIELD_DP64(t, ID_AA64MMFR0, FGT, 1); /* FEAT_FGT */
+ t = FIELD_DP64(t, ID_AA64MMFR0, ECV, 2); /* FEAT_ECV */
+ cpu->isar.id_aa64mmfr0 = t;
+
+ t = cpu->isar.id_aa64mmfr1;
+ t = FIELD_DP64(t, ID_AA64MMFR1, HAFDBS, 2); /* FEAT_HAFDBS */
+ t = FIELD_DP64(t, ID_AA64MMFR1, VMIDBITS, 2); /* FEAT_VMID16 */
+ t = FIELD_DP64(t, ID_AA64MMFR1, VH, 1); /* FEAT_VHE */
+ t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 2); /* FEAT_HPDS2 */
+ t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1); /* FEAT_LOR */
+ t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 3); /* FEAT_PAN3 */
+ t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */
+ t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 1); /* FEAT_ETS */
+ t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */
+ t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */
+ cpu->isar.id_aa64mmfr1 = t;
+
+ t = cpu->isar.id_aa64mmfr2;
+ t = FIELD_DP64(t, ID_AA64MMFR2, CNP, 1); /* FEAT_TTCNP */
+ t = FIELD_DP64(t, ID_AA64MMFR2, UAO, 1); /* FEAT_UAO */
+ t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */
+ t = FIELD_DP64(t, ID_AA64MMFR2, VARANGE, 1); /* FEAT_LVA */
+ t = FIELD_DP64(t, ID_AA64MMFR2, NV, 2); /* FEAT_NV2 */
+ t = FIELD_DP64(t, ID_AA64MMFR2, ST, 1); /* FEAT_TTST */
+ t = FIELD_DP64(t, ID_AA64MMFR2, AT, 1); /* FEAT_LSE2 */
+ t = FIELD_DP64(t, ID_AA64MMFR2, IDS, 1); /* FEAT_IDST */
+ t = FIELD_DP64(t, ID_AA64MMFR2, FWB, 1); /* FEAT_S2FWB */
+ t = FIELD_DP64(t, ID_AA64MMFR2, TTL, 1); /* FEAT_TTL */
+ t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */
+ t = FIELD_DP64(t, ID_AA64MMFR2, EVT, 2); /* FEAT_EVT */
+ t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */
+ cpu->isar.id_aa64mmfr2 = t;
+
+ t = cpu->isar.id_aa64zfr0;
+ t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1);
+ t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */
+ t = FIELD_DP64(t, ID_AA64ZFR0, BITPERM, 1); /* FEAT_SVE_BitPerm */
+ t = FIELD_DP64(t, ID_AA64ZFR0, BFLOAT16, 1); /* FEAT_BF16 */
+ t = FIELD_DP64(t, ID_AA64ZFR0, SHA3, 1); /* FEAT_SVE_SHA3 */
+ t = FIELD_DP64(t, ID_AA64ZFR0, SM4, 1); /* FEAT_SVE_SM4 */
+ t = FIELD_DP64(t, ID_AA64ZFR0, I8MM, 1); /* FEAT_I8MM */
+ t = FIELD_DP64(t, ID_AA64ZFR0, F32MM, 1); /* FEAT_F32MM */
+ t = FIELD_DP64(t, ID_AA64ZFR0, F64MM, 1); /* FEAT_F64MM */
+ cpu->isar.id_aa64zfr0 = t;
+
+ t = cpu->isar.id_aa64dfr0;
+ t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 9); /* FEAT_Debugv8p4 */
+ t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 6); /* FEAT_PMUv3p5 */
+ t = FIELD_DP64(t, ID_AA64DFR0, HPMN0, 1); /* FEAT_HPMN0 */
+ cpu->isar.id_aa64dfr0 = t;
+
+ t = cpu->isar.id_aa64smfr0;
+ t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1); /* FEAT_SME */
+ t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1); /* FEAT_SME */
+ t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1); /* FEAT_SME */
+ t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf); /* FEAT_SME */
+ t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1); /* FEAT_SME_F64F64 */
+ t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
+ t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
+ cpu->isar.id_aa64smfr0 = t;
+
+ /* Replicate the same data to the 32-bit id registers. */
+ aa32_max_features(cpu);
+
+#ifdef CONFIG_USER_ONLY
+ /*
+ * For usermode -cpu max we can use a larger and more efficient DCZ
+ * blocksize since we don't have to follow what the hardware does.
+ */
+ cpu->ctr = 0x80038003; /* 32 byte I and D cacheline size, VIPT icache */
+ cpu->dcz_blocksize = 7; /* 512 bytes */
+#endif
+ cpu->gm_blocksize = 6; /* 256 bytes */
+
+ cpu->sve_vq.supported = MAKE_64BIT_MASK(0, ARM_MAX_VQ);
+ cpu->sme_vq.supported = SVE_VQ_POW2_MAP;
+
+ aarch64_add_pauth_properties(obj);
+ aarch64_add_sve_properties(obj);
+ aarch64_add_sme_properties(obj);
+ object_property_add(obj, "sve-max-vq", "uint32", cpu_max_get_sve_max_vq,
+ cpu_max_set_sve_max_vq, NULL, NULL);
+ object_property_add_bool(obj, "x-rme", cpu_arm_get_rme, cpu_arm_set_rme);
+ object_property_add(obj, "x-l0gptsz", "uint32", cpu_max_get_l0gptsz,
+ cpu_max_set_l0gptsz, NULL, NULL);
+ qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property);
+}
+
+static const ARMCPUInfo aarch64_cpus[] = {
+ { .name = "cortex-a35", .initfn = aarch64_a35_initfn },
+ { .name = "cortex-a55", .initfn = aarch64_a55_initfn },
+ { .name = "cortex-a72", .initfn = aarch64_a72_initfn },
+ { .name = "cortex-a76", .initfn = aarch64_a76_initfn },
+ { .name = "cortex-a710", .initfn = aarch64_a710_initfn },
+ { .name = "a64fx", .initfn = aarch64_a64fx_initfn },
+ { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn },
+ { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn },
+ { .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn },
+};
+
+static void aarch64_cpu_register_types(void)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) {
+ aarch64_cpu_register(&aarch64_cpus[i]);
+ }
+}
+
+type_init(aarch64_cpu_register_types)
diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c
new file mode 100644
index 0000000000..7cadd61e12
--- /dev/null
+++ b/target/arm/tcg/crypto_helper.c
@@ -0,0 +1,679 @@
+/*
+ * crypto_helper.c - emulate v8 Crypto Extensions instructions
+ *
+ * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "crypto/aes-round.h"
+#include "crypto/sm4.h"
+#include "vec_internal.h"
+
+union CRYPTO_STATE {
+ uint8_t bytes[16];
+ uint32_t words[4];
+ uint64_t l[2];
+};
+
+#if HOST_BIG_ENDIAN
+#define CR_ST_BYTE(state, i) ((state).bytes[(15 - (i)) ^ 8])
+#define CR_ST_WORD(state, i) ((state).words[(3 - (i)) ^ 2])
+#else
+#define CR_ST_BYTE(state, i) ((state).bytes[i])
+#define CR_ST_WORD(state, i) ((state).words[i])
+#endif
+
+/*
+ * The caller has not been converted to full gvec, and so only
+ * modifies the low 16 bytes of the vector register.
+ */
+static void clear_tail_16(void *vd, uint32_t desc)
+{
+ int opr_sz = simd_oprsz(desc);
+ int max_sz = simd_maxsz(desc);
+
+ assert(opr_sz == 16);
+ clear_tail(vd, opr_sz, max_sz);
+}
+
+static const AESState aes_zero = { };
+
+void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ AESState *ad = (AESState *)(vd + i);
+ AESState *st = (AESState *)(vn + i);
+ AESState *rk = (AESState *)(vm + i);
+ AESState t;
+
+ /*
+ * Our uint64_t are in the wrong order for big-endian.
+ * The Arm AddRoundKey comes first, while the API AddRoundKey
+ * comes last: perform the xor here, and provide zero to API.
+ */
+ if (HOST_BIG_ENDIAN) {
+ t.d[0] = st->d[1] ^ rk->d[1];
+ t.d[1] = st->d[0] ^ rk->d[0];
+ aesenc_SB_SR_AK(&t, &t, &aes_zero, false);
+ ad->d[0] = t.d[1];
+ ad->d[1] = t.d[0];
+ } else {
+ t.v = st->v ^ rk->v;
+ aesenc_SB_SR_AK(ad, &t, &aes_zero, false);
+ }
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ AESState *ad = (AESState *)(vd + i);
+ AESState *st = (AESState *)(vn + i);
+ AESState *rk = (AESState *)(vm + i);
+ AESState t;
+
+ /* Our uint64_t are in the wrong order for big-endian. */
+ if (HOST_BIG_ENDIAN) {
+ t.d[0] = st->d[1] ^ rk->d[1];
+ t.d[1] = st->d[0] ^ rk->d[0];
+ aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false);
+ ad->d[0] = t.d[1];
+ ad->d[1] = t.d[0];
+ } else {
+ t.v = st->v ^ rk->v;
+ aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false);
+ }
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ AESState *ad = (AESState *)(vd + i);
+ AESState *st = (AESState *)(vm + i);
+ AESState t;
+
+ /* Our uint64_t are in the wrong order for big-endian. */
+ if (HOST_BIG_ENDIAN) {
+ t.d[0] = st->d[1];
+ t.d[1] = st->d[0];
+ aesenc_MC(&t, &t, false);
+ ad->d[0] = t.d[1];
+ ad->d[1] = t.d[0];
+ } else {
+ aesenc_MC(ad, st, false);
+ }
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ AESState *ad = (AESState *)(vd + i);
+ AESState *st = (AESState *)(vm + i);
+ AESState t;
+
+ /* Our uint64_t are in the wrong order for big-endian. */
+ if (HOST_BIG_ENDIAN) {
+ t.d[0] = st->d[1];
+ t.d[1] = st->d[0];
+ aesdec_IMC(&t, &t, false);
+ ad->d[0] = t.d[1];
+ ad->d[1] = t.d[0];
+ } else {
+ aesdec_IMC(ad, st, false);
+ }
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+/*
+ * SHA-1 logical functions
+ */
+
+static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
+{
+ return (x & (y ^ z)) ^ z;
+}
+
+static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
+{
+ return x ^ y ^ z;
+}
+
+static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
+{
+ return (x & y) | ((x | y) & z);
+}
+
+void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t d0, d1;
+
+ d0 = d[1] ^ d[0] ^ m[0];
+ d1 = n[0] ^ d[1] ^ m[1];
+ d[0] = d0;
+ d[1] = d1;
+
+ clear_tail_16(vd, desc);
+}
+
+static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
+ uint64_t *rm, uint32_t desc,
+ uint32_t (*fn)(union CRYPTO_STATE *d))
+{
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t = fn(&d);
+
+ t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
+ + CR_ST_WORD(m, i);
+
+ CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
+ CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
+ CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
+ CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
+ CR_ST_WORD(d, 0) = t;
+ }
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(rd, desc);
+}
+
+static uint32_t do_sha1c(union CRYPTO_STATE *d)
+{
+ return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
+}
+
+static uint32_t do_sha1p(union CRYPTO_STATE *d)
+{
+ return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
+}
+
+static uint32_t do_sha1m(union CRYPTO_STATE *d)
+{
+ return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
+}
+
+void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
+}
+
+void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+
+ CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
+ CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
+
+ rd[0] = m.l[0];
+ rd[1] = m.l[1];
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+
+ CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
+ CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
+ CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
+ CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
+}
+
+/*
+ * The SHA-256 logical functions, according to
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ */
+
+static uint32_t S0(uint32_t x)
+{
+ return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
+}
+
+static uint32_t S1(uint32_t x)
+{
+ return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
+}
+
+static uint32_t s0(uint32_t x)
+{
+ return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
+}
+
+static uint32_t s1(uint32_t x)
+{
+ return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
+}
+
+void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
+ + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
+ + CR_ST_WORD(m, i);
+
+ CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
+ CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
+ CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
+ CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
+
+ t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
+ + S0(CR_ST_WORD(d, 0));
+
+ CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
+ CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
+ CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
+ CR_ST_WORD(d, 0) = t;
+ }
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
+ + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
+ + CR_ST_WORD(m, i);
+
+ CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
+ CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
+ CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
+ CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
+ }
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+
+ CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
+ CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
+ CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
+ CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+
+ CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
+ CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
+ CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
+ CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
+}
+
+/*
+ * The SHA-512 logical functions (same as above but using 64-bit operands)
+ */
+
+static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
+{
+ return (x & (y ^ z)) ^ z;
+}
+
+static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
+{
+ return (x & y) | ((x | y) & z);
+}
+
+static uint64_t S0_512(uint64_t x)
+{
+ return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
+}
+
+static uint64_t S1_512(uint64_t x)
+{
+ return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
+}
+
+static uint64_t s0_512(uint64_t x)
+{
+ return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
+}
+
+static uint64_t s1_512(uint64_t x)
+{
+ return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
+}
+
+void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ uint64_t d0 = rd[0];
+ uint64_t d1 = rd[1];
+
+ d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
+ d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
+
+ rd[0] = d0;
+ rd[1] = d1;
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ uint64_t d0 = rd[0];
+ uint64_t d1 = rd[1];
+
+ d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
+ d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
+
+ rd[0] = d0;
+ rd[1] = d1;
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t d0 = rd[0];
+ uint64_t d1 = rd[1];
+
+ d0 += s0_512(rd[1]);
+ d1 += s0_512(rn[0]);
+
+ rd[0] = d0;
+ rd[1] = d1;
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+
+ rd[0] += s1_512(rn[0]) + rm[0];
+ rd[1] += s1_512(rn[1]) + rm[1];
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t t;
+
+ t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
+ CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
+
+ t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
+ CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
+
+ t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
+ CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
+
+ t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
+ CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
+}
+
+void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ uint64_t *rd = vd;
+ uint64_t *rn = vn;
+ uint64_t *rm = vm;
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
+
+ CR_ST_WORD(d, 0) ^= t;
+ CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
+ CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
+ CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
+ ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(vd, desc);
+}
+
+static inline void QEMU_ALWAYS_INLINE
+crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
+ uint32_t desc, uint32_t opcode)
+{
+ union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t imm2 = simd_data(desc);
+ uint32_t t;
+
+ assert(imm2 < 4);
+
+ if (opcode == 0 || opcode == 2) {
+ /* SM3TT1A, SM3TT2A */
+ t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
+ } else if (opcode == 1) {
+ /* SM3TT1B */
+ t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
+ } else if (opcode == 3) {
+ /* SM3TT2B */
+ t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
+ } else {
+ qemu_build_not_reached();
+ }
+
+ t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
+
+ CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
+
+ if (opcode < 2) {
+ /* SM3TT1A, SM3TT1B */
+ t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
+
+ CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
+ } else {
+ /* SM3TT2A, SM3TT2B */
+ t += CR_ST_WORD(n, 3);
+ t ^= rol32(t, 9) ^ rol32(t, 17);
+
+ CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
+ }
+
+ CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
+ CR_ST_WORD(d, 3) = t;
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+
+ clear_tail_16(rd, desc);
+}
+
+#define DO_SM3TT(NAME, OPCODE) \
+ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+ { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
+
+DO_SM3TT(crypto_sm3tt1a, 0)
+DO_SM3TT(crypto_sm3tt1b, 1)
+DO_SM3TT(crypto_sm3tt2a, 2)
+DO_SM3TT(crypto_sm3tt2b, 3)
+
+#undef DO_SM3TT
+
+static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
+{
+ union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
+ uint32_t t, i;
+
+ for (i = 0; i < 4; i++) {
+ t = CR_ST_WORD(d, (i + 1) % 4) ^
+ CR_ST_WORD(d, (i + 2) % 4) ^
+ CR_ST_WORD(d, (i + 3) % 4) ^
+ CR_ST_WORD(n, i);
+
+ t = sm4_subword(t);
+
+ CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
+ rol32(t, 24);
+ }
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+}
+
+void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ do_crypto_sm4e(vd + i, vn + i, vm + i);
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
+{
+ union CRYPTO_STATE d;
+ union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
+ union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
+ uint32_t t, i;
+
+ d = n;
+ for (i = 0; i < 4; i++) {
+ t = CR_ST_WORD(d, (i + 1) % 4) ^
+ CR_ST_WORD(d, (i + 2) % 4) ^
+ CR_ST_WORD(d, (i + 3) % 4) ^
+ CR_ST_WORD(m, i);
+
+ t = sm4_subword(t);
+
+ CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
+ }
+
+ rd[0] = d.l[0];
+ rd[1] = d.l[1];
+}
+
+void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ do_crypto_sm4ekey(vd + i, vn + i, vm + i);
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = n[i] ^ rol64(m[i], 1);
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
new file mode 100644
index 0000000000..ebaa7f00df
--- /dev/null
+++ b/target/arm/tcg/helper-a64.c
@@ -0,0 +1,1857 @@
+/*
+ * AArch64 specific helpers
+ *
+ * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "cpu.h"
+#include "gdbstub/helpers.h"
+#include "exec/helper-proto.h"
+#include "qemu/host-utils.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/bitops.h"
+#include "internals.h"
+#include "qemu/crc32c.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
+#include "qemu/atomic128.h"
+#include "fpu/softfloat.h"
+#include <zlib.h> /* For crc32 */
+
+/* C2.4.7 Multiply and divide */
+/* special cases for 0 and LLONG_MIN are mandated by the standard */
+uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
+{
+ if (den == 0) {
+ return 0;
+ }
+ return num / den;
+}
+
+int64_t HELPER(sdiv64)(int64_t num, int64_t den)
+{
+ if (den == 0) {
+ return 0;
+ }
+ if (num == LLONG_MIN && den == -1) {
+ return LLONG_MIN;
+ }
+ return num / den;
+}
+
+uint64_t HELPER(rbit64)(uint64_t x)
+{
+ return revbit64(x);
+}
+
+void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm)
+{
+ update_spsel(env, imm);
+}
+
+static void daif_check(CPUARMState *env, uint32_t op,
+ uint32_t imm, uintptr_t ra)
+{
+ /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */
+ if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
+ raise_exception_ra(env, EXCP_UDEF,
+ syn_aa64_sysregtrap(0, extract32(op, 0, 3),
+ extract32(op, 3, 3), 4,
+ imm, 0x1f, 0),
+ exception_target_el(env), ra);
+ }
+}
+
+void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm)
+{
+ daif_check(env, 0x1e, imm, GETPC());
+ env->daif |= (imm << 6) & PSTATE_DAIF;
+ arm_rebuild_hflags(env);
+}
+
+void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm)
+{
+ daif_check(env, 0x1f, imm, GETPC());
+ env->daif &= ~((imm << 6) & PSTATE_DAIF);
+ arm_rebuild_hflags(env);
+}
+
+/* Convert a softfloat float_relation_ (as returned by
+ * the float*_compare functions) to the correct ARM
+ * NZCV flag state.
+ */
+static inline uint32_t float_rel_to_flags(int res)
+{
+ uint64_t flags;
+ switch (res) {
+ case float_relation_equal:
+ flags = PSTATE_Z | PSTATE_C;
+ break;
+ case float_relation_less:
+ flags = PSTATE_N;
+ break;
+ case float_relation_greater:
+ flags = PSTATE_C;
+ break;
+ case float_relation_unordered:
+ default:
+ flags = PSTATE_C | PSTATE_V;
+ break;
+ }
+ return flags;
+}
+
+uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
+{
+ return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
+{
+ return float_rel_to_flags(float16_compare(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
+{
+ return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
+{
+ return float_rel_to_flags(float32_compare(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
+{
+ return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
+}
+
+uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
+{
+ return float_rel_to_flags(float64_compare(x, y, fp_status));
+}
+
+float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float32_squash_input_denormal(a, fpst);
+ b = float32_squash_input_denormal(b, fpst);
+
+ if ((float32_is_zero(a) && float32_is_infinity(b)) ||
+ (float32_is_infinity(a) && float32_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float32((1U << 30) |
+ ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
+ }
+ return float32_mul(a, b, fpst);
+}
+
+float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float64_squash_input_denormal(a, fpst);
+ b = float64_squash_input_denormal(b, fpst);
+
+ if ((float64_is_zero(a) && float64_is_infinity(b)) ||
+ (float64_is_infinity(a) && float64_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float64((1ULL << 62) |
+ ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
+ }
+ return float64_mul(a, b, fpst);
+}
+
+/* 64bit/double versions of the neon float compare functions */
+uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_eq_quiet(a, b, fpst);
+}
+
+uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_le(b, a, fpst);
+}
+
+uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_lt(b, a, fpst);
+}
+
+/* Reciprocal step and sqrt step. Note that unlike the A32/T32
+ * versions, these do a fully fused multiply-add or
+ * multiply-add-and-halve.
+ */
+
+uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float16_squash_input_denormal(a, fpst);
+ b = float16_squash_input_denormal(b, fpst);
+
+ a = float16_chs(a);
+ if ((float16_is_infinity(a) && float16_is_zero(b)) ||
+ (float16_is_infinity(b) && float16_is_zero(a))) {
+ return float16_two;
+ }
+ return float16_muladd(a, b, float16_two, 0, fpst);
+}
+
+float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float32_squash_input_denormal(a, fpst);
+ b = float32_squash_input_denormal(b, fpst);
+
+ a = float32_chs(a);
+ if ((float32_is_infinity(a) && float32_is_zero(b)) ||
+ (float32_is_infinity(b) && float32_is_zero(a))) {
+ return float32_two;
+ }
+ return float32_muladd(a, b, float32_two, 0, fpst);
+}
+
+float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float64_squash_input_denormal(a, fpst);
+ b = float64_squash_input_denormal(b, fpst);
+
+ a = float64_chs(a);
+ if ((float64_is_infinity(a) && float64_is_zero(b)) ||
+ (float64_is_infinity(b) && float64_is_zero(a))) {
+ return float64_two;
+ }
+ return float64_muladd(a, b, float64_two, 0, fpst);
+}
+
+uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float16_squash_input_denormal(a, fpst);
+ b = float16_squash_input_denormal(b, fpst);
+
+ a = float16_chs(a);
+ if ((float16_is_infinity(a) && float16_is_zero(b)) ||
+ (float16_is_infinity(b) && float16_is_zero(a))) {
+ return float16_one_point_five;
+ }
+ return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
+}
+
+float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float32_squash_input_denormal(a, fpst);
+ b = float32_squash_input_denormal(b, fpst);
+
+ a = float32_chs(a);
+ if ((float32_is_infinity(a) && float32_is_zero(b)) ||
+ (float32_is_infinity(b) && float32_is_zero(a))) {
+ return float32_one_point_five;
+ }
+ return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
+}
+
+float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float64_squash_input_denormal(a, fpst);
+ b = float64_squash_input_denormal(b, fpst);
+
+ a = float64_chs(a);
+ if ((float64_is_infinity(a) && float64_is_zero(b)) ||
+ (float64_is_infinity(b) && float64_is_zero(a))) {
+ return float64_one_point_five;
+ }
+ return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
+}
+
+/* Pairwise long add: add pairs of adjacent elements into
+ * double-width elements in the result (eg _s8 is an 8x8->16 op)
+ */
+uint64_t HELPER(neon_addlp_s8)(uint64_t a)
+{
+ uint64_t nsignmask = 0x0080008000800080ULL;
+ uint64_t wsignmask = 0x8000800080008000ULL;
+ uint64_t elementmask = 0x00ff00ff00ff00ffULL;
+ uint64_t tmp1, tmp2;
+ uint64_t res, signres;
+
+ /* Extract odd elements, sign extend each to a 16 bit field */
+ tmp1 = a & elementmask;
+ tmp1 ^= nsignmask;
+ tmp1 |= wsignmask;
+ tmp1 = (tmp1 - nsignmask) ^ wsignmask;
+ /* Ditto for the even elements */
+ tmp2 = (a >> 8) & elementmask;
+ tmp2 ^= nsignmask;
+ tmp2 |= wsignmask;
+ tmp2 = (tmp2 - nsignmask) ^ wsignmask;
+
+ /* calculate the result by summing bits 0..14, 16..22, etc,
+ * and then adjusting the sign bits 15, 23, etc manually.
+ * This ensures the addition can't overflow the 16 bit field.
+ */
+ signres = (tmp1 ^ tmp2) & wsignmask;
+ res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
+ res ^= signres;
+
+ return res;
+}
+
+uint64_t HELPER(neon_addlp_u8)(uint64_t a)
+{
+ uint64_t tmp;
+
+ tmp = a & 0x00ff00ff00ff00ffULL;
+ tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
+ return tmp;
+}
+
+uint64_t HELPER(neon_addlp_s16)(uint64_t a)
+{
+ int32_t reslo, reshi;
+
+ reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
+ reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
+
+ return (uint32_t)reslo | (((uint64_t)reshi) << 32);
+}
+
+uint64_t HELPER(neon_addlp_u16)(uint64_t a)
+{
+ uint64_t tmp;
+
+ tmp = a & 0x0000ffff0000ffffULL;
+ tmp += (a >> 16) & 0x0000ffff0000ffffULL;
+ return tmp;
+}
+
+/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
+uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ uint16_t val16, sbit;
+ int16_t exp;
+
+ if (float16_is_any_nan(a)) {
+ float16 nan = a;
+ if (float16_is_signaling_nan(a, fpst)) {
+ float_raise(float_flag_invalid, fpst);
+ if (!fpst->default_nan_mode) {
+ nan = float16_silence_nan(a, fpst);
+ }
+ }
+ if (fpst->default_nan_mode) {
+ nan = float16_default_nan(fpst);
+ }
+ return nan;
+ }
+
+ a = float16_squash_input_denormal(a, fpst);
+
+ val16 = float16_val(a);
+ sbit = 0x8000 & val16;
+ exp = extract32(val16, 10, 5);
+
+ if (exp == 0) {
+ return make_float16(deposit32(sbit, 10, 5, 0x1e));
+ } else {
+ return make_float16(deposit32(sbit, 10, 5, ~exp));
+ }
+}
+
+float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ uint32_t val32, sbit;
+ int32_t exp;
+
+ if (float32_is_any_nan(a)) {
+ float32 nan = a;
+ if (float32_is_signaling_nan(a, fpst)) {
+ float_raise(float_flag_invalid, fpst);
+ if (!fpst->default_nan_mode) {
+ nan = float32_silence_nan(a, fpst);
+ }
+ }
+ if (fpst->default_nan_mode) {
+ nan = float32_default_nan(fpst);
+ }
+ return nan;
+ }
+
+ a = float32_squash_input_denormal(a, fpst);
+
+ val32 = float32_val(a);
+ sbit = 0x80000000ULL & val32;
+ exp = extract32(val32, 23, 8);
+
+ if (exp == 0) {
+ return make_float32(sbit | (0xfe << 23));
+ } else {
+ return make_float32(sbit | (~exp & 0xff) << 23);
+ }
+}
+
+float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ uint64_t val64, sbit;
+ int64_t exp;
+
+ if (float64_is_any_nan(a)) {
+ float64 nan = a;
+ if (float64_is_signaling_nan(a, fpst)) {
+ float_raise(float_flag_invalid, fpst);
+ if (!fpst->default_nan_mode) {
+ nan = float64_silence_nan(a, fpst);
+ }
+ }
+ if (fpst->default_nan_mode) {
+ nan = float64_default_nan(fpst);
+ }
+ return nan;
+ }
+
+ a = float64_squash_input_denormal(a, fpst);
+
+ val64 = float64_val(a);
+ sbit = 0x8000000000000000ULL & val64;
+ exp = extract64(float64_val(a), 52, 11);
+
+ if (exp == 0) {
+ return make_float64(sbit | (0x7feULL << 52));
+ } else {
+ return make_float64(sbit | (~exp & 0x7ffULL) << 52);
+ }
+}
+
+float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
+{
+ /* Von Neumann rounding is implemented by using round-to-zero
+ * and then setting the LSB of the result if Inexact was raised.
+ */
+ float32 r;
+ float_status *fpst = &env->vfp.fp_status;
+ float_status tstat = *fpst;
+ int exflags;
+
+ set_float_rounding_mode(float_round_to_zero, &tstat);
+ set_float_exception_flags(0, &tstat);
+ r = float64_to_float32(a, &tstat);
+ exflags = get_float_exception_flags(&tstat);
+ if (exflags & float_flag_inexact) {
+ r = make_float32(float32_val(r) | 1);
+ }
+ exflags |= get_float_exception_flags(fpst);
+ set_float_exception_flags(exflags, fpst);
+ return r;
+}
+
+/* 64-bit versions of the CRC helpers. Note that although the operation
+ * (and the prototypes of crc32c() and crc32() mean that only the bottom
+ * 32 bits of the accumulator and result are used, we pass and return
+ * uint64_t for convenience of the generated code. Unlike the 32-bit
+ * instruction set versions, val may genuinely have 64 bits of data in it.
+ * The upper bytes of val (above the number specified by 'bytes') must have
+ * been zeroed out by the caller.
+ */
+uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
+{
+ uint8_t buf[8];
+
+ stq_le_p(buf, val);
+
+ /* zlib crc32 converts the accumulator and output to one's complement. */
+ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
+}
+
+uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
+{
+ uint8_t buf[8];
+
+ stq_le_p(buf, val);
+
+ /* Linux crc32c converts the output to one's complement. */
+ return crc32c(acc, buf, bytes) ^ 0xffffffff;
+}
+
+/*
+ * AdvSIMD half-precision
+ */
+
+#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
+
+#define ADVSIMD_HALFOP(name) \
+uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return float16_ ## name(a, b, fpst); \
+}
+
+ADVSIMD_HALFOP(add)
+ADVSIMD_HALFOP(sub)
+ADVSIMD_HALFOP(mul)
+ADVSIMD_HALFOP(div)
+ADVSIMD_HALFOP(min)
+ADVSIMD_HALFOP(max)
+ADVSIMD_HALFOP(minnum)
+ADVSIMD_HALFOP(maxnum)
+
+#define ADVSIMD_TWOHALFOP(name) \
+uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
+{ \
+ float16 a1, a2, b1, b2; \
+ uint32_t r1, r2; \
+ float_status *fpst = fpstp; \
+ a1 = extract32(two_a, 0, 16); \
+ a2 = extract32(two_a, 16, 16); \
+ b1 = extract32(two_b, 0, 16); \
+ b2 = extract32(two_b, 16, 16); \
+ r1 = float16_ ## name(a1, b1, fpst); \
+ r2 = float16_ ## name(a2, b2, fpst); \
+ return deposit32(r1, 16, 16, r2); \
+}
+
+ADVSIMD_TWOHALFOP(add)
+ADVSIMD_TWOHALFOP(sub)
+ADVSIMD_TWOHALFOP(mul)
+ADVSIMD_TWOHALFOP(div)
+ADVSIMD_TWOHALFOP(min)
+ADVSIMD_TWOHALFOP(max)
+ADVSIMD_TWOHALFOP(minnum)
+ADVSIMD_TWOHALFOP(maxnum)
+
+/* Data processing - scalar floating-point and advanced SIMD */
+static float16 float16_mulx(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float16_squash_input_denormal(a, fpst);
+ b = float16_squash_input_denormal(b, fpst);
+
+ if ((float16_is_zero(a) && float16_is_infinity(b)) ||
+ (float16_is_infinity(a) && float16_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float16((1U << 14) |
+ ((float16_val(a) ^ float16_val(b)) & (1U << 15)));
+ }
+ return float16_mul(a, b, fpst);
+}
+
+ADVSIMD_HALFOP(mulx)
+ADVSIMD_TWOHALFOP(mulx)
+
+/* fused multiply-accumulate */
+uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
+ void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return float16_muladd(a, b, c, 0, fpst);
+}
+
+uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
+ uint32_t two_c, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 a1, a2, b1, b2, c1, c2;
+ uint32_t r1, r2;
+ a1 = extract32(two_a, 0, 16);
+ a2 = extract32(two_a, 16, 16);
+ b1 = extract32(two_b, 0, 16);
+ b2 = extract32(two_b, 16, 16);
+ c1 = extract32(two_c, 0, 16);
+ c2 = extract32(two_c, 16, 16);
+ r1 = float16_muladd(a1, b1, c1, 0, fpst);
+ r2 = float16_muladd(a2, b2, c2, 0, fpst);
+ return deposit32(r1, 16, 16, r2);
+}
+
+/*
+ * Floating point comparisons produce an integer result. Softfloat
+ * routines return float_relation types which we convert to the 0/-1
+ * Neon requires.
+ */
+
+#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
+
+uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare_quiet(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater ||
+ compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater);
+}
+
+uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 f0 = float16_abs(a);
+ float16 f1 = float16_abs(b);
+ int compare = float16_compare(f0, f1, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater ||
+ compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 f0 = float16_abs(a);
+ float16 f1 = float16_abs(b);
+ int compare = float16_compare(f0, f1, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater);
+}
+
+/* round to integral */
+uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
+{
+ return float16_round_to_int(x, fp_status);
+}
+
+uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float16 ret;
+
+ ret = float16_round_to_int(x, fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
+/*
+ * Half-precision floating point conversion functions
+ *
+ * There are a multitude of conversion functions with various
+ * different rounding modes. This is dealt with by the calling code
+ * setting the mode appropriately before calling the helper.
+ */
+
+uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ /* Invalid if we are passed a NaN */
+ if (float16_is_any_nan(a)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_int16(a, fpst);
+}
+
+uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ /* Invalid if we are passed a NaN */
+ if (float16_is_any_nan(a)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_uint16(a, fpst);
+}
+
+static int el_from_spsr(uint32_t spsr)
+{
+ /* Return the exception level that this SPSR is requesting a return to,
+ * or -1 if it is invalid (an illegal return)
+ */
+ if (spsr & PSTATE_nRW) {
+ switch (spsr & CPSR_M) {
+ case ARM_CPU_MODE_USR:
+ return 0;
+ case ARM_CPU_MODE_HYP:
+ return 2;
+ case ARM_CPU_MODE_FIQ:
+ case ARM_CPU_MODE_IRQ:
+ case ARM_CPU_MODE_SVC:
+ case ARM_CPU_MODE_ABT:
+ case ARM_CPU_MODE_UND:
+ case ARM_CPU_MODE_SYS:
+ return 1;
+ case ARM_CPU_MODE_MON:
+ /* Returning to Mon from AArch64 is never possible,
+ * so this is an illegal return.
+ */
+ default:
+ return -1;
+ }
+ } else {
+ if (extract32(spsr, 1, 1)) {
+ /* Return with reserved M[1] bit set */
+ return -1;
+ }
+ if (extract32(spsr, 0, 4) == 1) {
+ /* return to EL0 with M[0] bit set */
+ return -1;
+ }
+ return extract32(spsr, 2, 2);
+ }
+}
+
+static void cpsr_write_from_spsr_elx(CPUARMState *env,
+ uint32_t val)
+{
+ uint32_t mask;
+
+ /* Save SPSR_ELx.SS into PSTATE. */
+ env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS);
+ val &= ~PSTATE_SS;
+
+ /* Move DIT to the correct location for CPSR */
+ if (val & PSTATE_DIT) {
+ val &= ~PSTATE_DIT;
+ val |= CPSR_DIT;
+ }
+
+ mask = aarch32_cpsr_valid_mask(env->features, \
+ &env_archcpu(env)->isar);
+ cpsr_write(env, val, mask, CPSRWriteRaw);
+}
+
+void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
+{
+ int cur_el = arm_current_el(env);
+ unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
+ uint32_t spsr = env->banked_spsr[spsr_idx];
+ int new_el;
+ bool return_to_aa64 = (spsr & PSTATE_nRW) == 0;
+
+ aarch64_save_sp(env, cur_el);
+
+ arm_clear_exclusive(env);
+
+ /* We must squash the PSTATE.SS bit to zero unless both of the
+ * following hold:
+ * 1. debug exceptions are currently disabled
+ * 2. singlestep will be active in the EL we return to
+ * We check 1 here and 2 after we've done the pstate/cpsr write() to
+ * transition to the EL we're going to.
+ */
+ if (arm_generate_debug_exceptions(env)) {
+ spsr &= ~PSTATE_SS;
+ }
+
+ /*
+ * FEAT_RME forbids return from EL3 with an invalid security state.
+ * We don't need an explicit check for FEAT_RME here because we enforce
+ * in scr_write() that you can't set the NSE bit without it.
+ */
+ if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) {
+ goto illegal_return;
+ }
+
+ new_el = el_from_spsr(spsr);
+ if (new_el == -1) {
+ goto illegal_return;
+ }
+ if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) {
+ /* Disallow return to an EL which is unimplemented or higher
+ * than the current one.
+ */
+ goto illegal_return;
+ }
+
+ if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) {
+ /* Return to an EL which is configured for a different register width */
+ goto illegal_return;
+ }
+
+ if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
+ goto illegal_return;
+ }
+
+ bql_lock();
+ arm_call_pre_el_change_hook(env_archcpu(env));
+ bql_unlock();
+
+ if (!return_to_aa64) {
+ env->aarch64 = false;
+ /* We do a raw CPSR write because aarch64_sync_64_to_32()
+ * will sort the register banks out for us, and we've already
+ * caught all the bad-mode cases in el_from_spsr().
+ */
+ cpsr_write_from_spsr_elx(env, spsr);
+ if (!arm_singlestep_active(env)) {
+ env->pstate &= ~PSTATE_SS;
+ }
+ aarch64_sync_64_to_32(env);
+
+ if (spsr & CPSR_T) {
+ env->regs[15] = new_pc & ~0x1;
+ } else {
+ env->regs[15] = new_pc & ~0x3;
+ }
+ helper_rebuild_hflags_a32(env, new_el);
+ qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
+ "AArch32 EL%d PC 0x%" PRIx32 "\n",
+ cur_el, new_el, env->regs[15]);
+ } else {
+ int tbii;
+
+ env->aarch64 = true;
+ spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar);
+ pstate_write(env, spsr);
+ if (!arm_singlestep_active(env)) {
+ env->pstate &= ~PSTATE_SS;
+ }
+ aarch64_restore_sp(env, new_el);
+ helper_rebuild_hflags_a64(env, new_el);
+
+ /*
+ * Apply TBI to the exception return address. We had to delay this
+ * until after we selected the new EL, so that we could select the
+ * correct TBI+TBID bits. This is made easier by waiting until after
+ * the hflags rebuild, since we can pull the composite TBII field
+ * from there.
+ */
+ tbii = EX_TBFLAG_A64(env->hflags, TBII);
+ if ((tbii >> extract64(new_pc, 55, 1)) & 1) {
+ /* TBI is enabled. */
+ int core_mmu_idx = arm_env_mmu_index(env);
+ if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) {
+ new_pc = sextract64(new_pc, 0, 56);
+ } else {
+ new_pc = extract64(new_pc, 0, 56);
+ }
+ }
+ env->pc = new_pc;
+
+ qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
+ "AArch64 EL%d PC 0x%" PRIx64 "\n",
+ cur_el, new_el, env->pc);
+ }
+
+ /*
+ * Note that cur_el can never be 0. If new_el is 0, then
+ * el0_a64 is return_to_aa64, else el0_a64 is ignored.
+ */
+ aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
+
+ bql_lock();
+ arm_call_el_change_hook(env_archcpu(env));
+ bql_unlock();
+
+ return;
+
+illegal_return:
+ /* Illegal return events of various kinds have architecturally
+ * mandated behaviour:
+ * restore NZCV and DAIF from SPSR_ELx
+ * set PSTATE.IL
+ * restore PC from ELR_ELx
+ * no change to exception level, execution state or stack pointer
+ */
+ env->pstate |= PSTATE_IL;
+ env->pc = new_pc;
+ spsr &= PSTATE_NZCV | PSTATE_DAIF;
+ spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF);
+ pstate_write(env, spsr);
+ if (!arm_singlestep_active(env)) {
+ env->pstate &= ~PSTATE_SS;
+ }
+ helper_rebuild_hflags_a64(env, cur_el);
+ qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
+ "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
+}
+
+/*
+ * Square Root and Reciprocal square root
+ */
+
+uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
+{
+ float_status *s = fpstp;
+
+ return float16_sqrt(a, s);
+}
+
+void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
+{
+ /*
+ * Implement DC ZVA, which zeroes a fixed-length block of memory.
+ * Note that we do not implement the (architecturally mandated)
+ * alignment fault for attempts to use this on Device memory
+ * (which matches the usual QEMU behaviour of not implementing either
+ * alignment faults or any memory attribute handling).
+ */
+ int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
+ uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+ int mmu_idx = arm_env_mmu_index(env);
+ void *mem;
+
+ /*
+ * Trapless lookup. In addition to actual invalid page, may
+ * return NULL for I/O, watchpoints, clean pages, etc.
+ */
+ mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+
+#ifndef CONFIG_USER_ONLY
+ if (unlikely(!mem)) {
+ uintptr_t ra = GETPC();
+
+ /*
+ * Trap if accessing an invalid page. DC_ZVA requires that we supply
+ * the original pointer for an invalid page. But watchpoints require
+ * that we probe the actual space. So do both.
+ */
+ (void) probe_write(env, vaddr_in, 1, mmu_idx, ra);
+ mem = probe_write(env, vaddr, blocklen, mmu_idx, ra);
+
+ if (unlikely(!mem)) {
+ /*
+ * The only remaining reason for mem == NULL is I/O.
+ * Just do a series of byte writes as the architecture demands.
+ */
+ for (int i = 0; i < blocklen; i++) {
+ cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra);
+ }
+ return;
+ }
+ }
+#endif
+
+ memset(mem, 0, blocklen);
+}
+
+void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
+ uint32_t access_type, uint32_t mmu_idx)
+{
+ arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type,
+ mmu_idx, GETPC());
+}
+
+/* Memory operations (memset, memmove, memcpy) */
+
+/*
+ * Return true if the CPY* and SET* insns can execute; compare
+ * pseudocode CheckMOPSEnabled(), though we refactor it a little.
+ */
+static bool mops_enabled(CPUARMState *env)
+{
+ int el = arm_current_el(env);
+
+ if (el < 2 &&
+ (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) &&
+ !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) {
+ return false;
+ }
+
+ if (el == 0) {
+ if (!el_is_in_host(env, 0)) {
+ return env->cp15.sctlr_el[1] & SCTLR_MSCEN;
+ } else {
+ return env->cp15.sctlr_el[2] & SCTLR_MSCEN;
+ }
+ }
+ return true;
+}
+
+static void check_mops_enabled(CPUARMState *env, uintptr_t ra)
+{
+ if (!mops_enabled(env)) {
+ raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(),
+ exception_target_el(env), ra);
+ }
+}
+
+/*
+ * Return the target exception level for an exception due
+ * to mismatched arguments in a FEAT_MOPS copy or set.
+ * Compare pseudocode MismatchedCpySetTargetEL()
+ */
+static int mops_mismatch_exception_target_el(CPUARMState *env)
+{
+ int el = arm_current_el(env);
+
+ if (el > 1) {
+ return el;
+ }
+ if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
+ return 2;
+ }
+ if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) {
+ return 2;
+ }
+ return 1;
+}
+
+/*
+ * Check whether an M or E instruction was executed with a CF value
+ * indicating the wrong option for this implementation.
+ * Assumes we are always Option A.
+ */
+static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome,
+ uintptr_t ra)
+{
+ if (env->CF != 0) {
+ syndrome |= 1 << 17; /* Set the wrong-option bit */
+ raise_exception_ra(env, EXCP_UDEF, syndrome,
+ mops_mismatch_exception_target_el(env), ra);
+ }
+}
+
+/*
+ * Return the maximum number of bytes we can transfer starting at addr
+ * without crossing a page boundary.
+ */
+static uint64_t page_limit(uint64_t addr)
+{
+ return TARGET_PAGE_ALIGN(addr + 1) - addr;
+}
+
+/*
+ * Return the number of bytes we can copy starting from addr and working
+ * backwards without crossing a page boundary.
+ */
+static uint64_t page_limit_rev(uint64_t addr)
+{
+ return (addr & ~TARGET_PAGE_MASK) + 1;
+}
+
+/*
+ * Perform part of a memory set on an area of guest memory starting at
+ * toaddr (a dirty address) and extending for setsize bytes.
+ *
+ * Returns the number of bytes actually set, which might be less than
+ * setsize; the caller should loop until the whole set has been done.
+ * The caller should ensure that the guest registers are correct
+ * for the possibility that the first byte of the set encounters
+ * an exception or watchpoint. We guarantee not to take any faults
+ * for bytes other than the first.
+ */
+static uint64_t set_step(CPUARMState *env, uint64_t toaddr,
+ uint64_t setsize, uint32_t data, int memidx,
+ uint32_t *mtedesc, uintptr_t ra)
+{
+ void *mem;
+
+ setsize = MIN(setsize, page_limit(toaddr));
+ if (*mtedesc) {
+ uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc);
+ if (mtesize == 0) {
+ /* Trap, or not. All CPU state is up to date */
+ mte_check_fail(env, *mtedesc, toaddr, ra);
+ /* Continue, with no further MTE checks required */
+ *mtedesc = 0;
+ } else {
+ /* Advance to the end, or to the tag mismatch */
+ setsize = MIN(setsize, mtesize);
+ }
+ }
+
+ toaddr = useronly_clean_ptr(toaddr);
+ /*
+ * Trapless lookup: returns NULL for invalid page, I/O,
+ * watchpoints, clean pages, etc.
+ */
+ mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx);
+
+#ifndef CONFIG_USER_ONLY
+ if (unlikely(!mem)) {
+ /*
+ * Slow-path: just do one byte write. This will handle the
+ * watchpoint, invalid page, etc handling correctly.
+ * For clean code pages, the next iteration will see
+ * the page dirty and will use the fast path.
+ */
+ cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra);
+ return 1;
+ }
+#endif
+ /* Easy case: just memset the host memory */
+ memset(mem, data, setsize);
+ return setsize;
+}
+
+/*
+ * Similar, but setting tags. The architecture requires us to do this
+ * in 16-byte chunks. SETP accesses are not tag checked; they set
+ * the tags.
+ */
+static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr,
+ uint64_t setsize, uint32_t data, int memidx,
+ uint32_t *mtedesc, uintptr_t ra)
+{
+ void *mem;
+ uint64_t cleanaddr;
+
+ setsize = MIN(setsize, page_limit(toaddr));
+
+ cleanaddr = useronly_clean_ptr(toaddr);
+ /*
+ * Trapless lookup: returns NULL for invalid page, I/O,
+ * watchpoints, clean pages, etc.
+ */
+ mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx);
+
+#ifndef CONFIG_USER_ONLY
+ if (unlikely(!mem)) {
+ /*
+ * Slow-path: just do one write. This will handle the
+ * watchpoint, invalid page, etc handling correctly.
+ * The architecture requires that we do 16 bytes at a time,
+ * and we know both ptr and size are 16 byte aligned.
+ * For clean code pages, the next iteration will see
+ * the page dirty and will use the fast path.
+ */
+ uint64_t repldata = data * 0x0101010101010101ULL;
+ MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx);
+ cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra);
+ mte_mops_set_tags(env, toaddr, 16, *mtedesc);
+ return 16;
+ }
+#endif
+ /* Easy case: just memset the host memory */
+ memset(mem, data, setsize);
+ mte_mops_set_tags(env, toaddr, setsize, *mtedesc);
+ return setsize;
+}
+
+typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr,
+ uint64_t setsize, uint32_t data,
+ int memidx, uint32_t *mtedesc, uintptr_t ra);
+
+/* Extract register numbers from a MOPS exception syndrome value */
+static int mops_destreg(uint32_t syndrome)
+{
+ return extract32(syndrome, 10, 5);
+}
+
+static int mops_srcreg(uint32_t syndrome)
+{
+ return extract32(syndrome, 5, 5);
+}
+
+static int mops_sizereg(uint32_t syndrome)
+{
+ return extract32(syndrome, 0, 5);
+}
+
+/*
+ * Return true if TCMA and TBI bits mean we need to do MTE checks.
+ * We only need to do this once per MOPS insn, not for every page.
+ */
+static bool mte_checks_needed(uint64_t ptr, uint32_t desc)
+{
+ int bit55 = extract64(ptr, 55, 1);
+
+ /*
+ * Note that tbi_check() returns true for "access checked" but
+ * tcma_check() returns true for "access unchecked".
+ */
+ if (!tbi_check(desc, bit55)) {
+ return false;
+ }
+ return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr));
+}
+
+/* Take an exception if the SETG addr/size are not granule aligned */
+static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size,
+ uint32_t memidx, uintptr_t ra)
+{
+ if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) ||
+ !QEMU_IS_ALIGNED(size, TAG_GRANULE)) {
+ arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
+ memidx, ra);
+
+ }
+}
+
+static uint64_t arm_reg_or_xzr(CPUARMState *env, int reg)
+{
+ /*
+ * Runtime equivalent of cpu_reg() -- return the CPU register value,
+ * for contexts when index 31 means XZR (not SP).
+ */
+ return reg == 31 ? 0 : env->xregs[reg];
+}
+
+/*
+ * For the Memory Set operation, our implementation chooses
+ * always to use "option A", where we update Xd to the final
+ * address in the SETP insn, and set Xn to be -(bytes remaining).
+ * On SETM and SETE insns we only need update Xn.
+ *
+ * @env: CPU
+ * @syndrome: syndrome value for mismatch exceptions
+ * (also contains the register numbers we need to use)
+ * @mtedesc: MTE descriptor word
+ * @stepfn: function which does a single part of the set operation
+ * @is_setg: true if this is the tag-setting SETG variant
+ */
+static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
+ StepFn *stepfn, bool is_setg, uintptr_t ra)
+{
+ /* Prologue: we choose to do up to the next page boundary */
+ int rd = mops_destreg(syndrome);
+ int rs = mops_srcreg(syndrome);
+ int rn = mops_sizereg(syndrome);
+ uint8_t data = arm_reg_or_xzr(env, rs);
+ uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
+ uint64_t toaddr = env->xregs[rd];
+ uint64_t setsize = env->xregs[rn];
+ uint64_t stagesetsize, step;
+
+ check_mops_enabled(env, ra);
+
+ if (setsize > INT64_MAX) {
+ setsize = INT64_MAX;
+ if (is_setg) {
+ setsize &= ~0xf;
+ }
+ }
+
+ if (unlikely(is_setg)) {
+ check_setg_alignment(env, toaddr, setsize, memidx, ra);
+ } else if (!mte_checks_needed(toaddr, mtedesc)) {
+ mtedesc = 0;
+ }
+
+ stagesetsize = MIN(setsize, page_limit(toaddr));
+ while (stagesetsize) {
+ env->xregs[rd] = toaddr;
+ env->xregs[rn] = setsize;
+ step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra);
+ toaddr += step;
+ setsize -= step;
+ stagesetsize -= step;
+ }
+ /* Insn completed, so update registers to the Option A format */
+ env->xregs[rd] = toaddr + setsize;
+ env->xregs[rn] = -setsize;
+
+ /* Set NZCV = 0000 to indicate we are an Option A implementation */
+ env->NF = 0;
+ env->ZF = 1; /* our env->ZF encoding is inverted */
+ env->CF = 0;
+ env->VF = 0;
+ return;
+}
+
+void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
+{
+ do_setp(env, syndrome, mtedesc, set_step, false, GETPC());
+}
+
+void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
+{
+ do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC());
+}
+
+static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
+ StepFn *stepfn, bool is_setg, uintptr_t ra)
+{
+ /* Main: we choose to do all the full-page chunks */
+ CPUState *cs = env_cpu(env);
+ int rd = mops_destreg(syndrome);
+ int rs = mops_srcreg(syndrome);
+ int rn = mops_sizereg(syndrome);
+ uint8_t data = arm_reg_or_xzr(env, rs);
+ uint64_t toaddr = env->xregs[rd] + env->xregs[rn];
+ uint64_t setsize = -env->xregs[rn];
+ uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
+ uint64_t step, stagesetsize;
+
+ check_mops_enabled(env, ra);
+
+ /*
+ * We're allowed to NOP out "no data to copy" before the consistency
+ * checks; we choose to do so.
+ */
+ if (env->xregs[rn] == 0) {
+ return;
+ }
+
+ check_mops_wrong_option(env, syndrome, ra);
+
+ /*
+ * Our implementation will work fine even if we have an unaligned
+ * destination address, and because we update Xn every time around
+ * the loop below and the return value from stepfn() may be less
+ * than requested, we might find toaddr is unaligned. So we don't
+ * have an IMPDEF check for alignment here.
+ */
+
+ if (unlikely(is_setg)) {
+ check_setg_alignment(env, toaddr, setsize, memidx, ra);
+ } else if (!mte_checks_needed(toaddr, mtedesc)) {
+ mtedesc = 0;
+ }
+
+ /* Do the actual memset: we leave the last partial page to SETE */
+ stagesetsize = setsize & TARGET_PAGE_MASK;
+ while (stagesetsize > 0) {
+ step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra);
+ toaddr += step;
+ setsize -= step;
+ stagesetsize -= step;
+ env->xregs[rn] = -setsize;
+ if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) {
+ cpu_loop_exit_restore(cs, ra);
+ }
+ }
+}
+
+void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
+{
+ do_setm(env, syndrome, mtedesc, set_step, false, GETPC());
+}
+
+void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
+{
+ do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC());
+}
+
+static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc,
+ StepFn *stepfn, bool is_setg, uintptr_t ra)
+{
+ /* Epilogue: do the last partial page */
+ int rd = mops_destreg(syndrome);
+ int rs = mops_srcreg(syndrome);
+ int rn = mops_sizereg(syndrome);
+ uint8_t data = arm_reg_or_xzr(env, rs);
+ uint64_t toaddr = env->xregs[rd] + env->xregs[rn];
+ uint64_t setsize = -env->xregs[rn];
+ uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX);
+ uint64_t step;
+
+ check_mops_enabled(env, ra);
+
+ /*
+ * We're allowed to NOP out "no data to copy" before the consistency
+ * checks; we choose to do so.
+ */
+ if (setsize == 0) {
+ return;
+ }
+
+ check_mops_wrong_option(env, syndrome, ra);
+
+ /*
+ * Our implementation has no address alignment requirements, but
+ * we do want to enforce the "less than a page" size requirement,
+ * so we don't need to have the "check for interrupts" here.
+ */
+ if (setsize >= TARGET_PAGE_SIZE) {
+ raise_exception_ra(env, EXCP_UDEF, syndrome,
+ mops_mismatch_exception_target_el(env), ra);
+ }
+
+ if (unlikely(is_setg)) {
+ check_setg_alignment(env, toaddr, setsize, memidx, ra);
+ } else if (!mte_checks_needed(toaddr, mtedesc)) {
+ mtedesc = 0;
+ }
+
+ /* Do the actual memset */
+ while (setsize > 0) {
+ step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra);
+ toaddr += step;
+ setsize -= step;
+ env->xregs[rn] = -setsize;
+ }
+}
+
+void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
+{
+ do_sete(env, syndrome, mtedesc, set_step, false, GETPC());
+}
+
+void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc)
+{
+ do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC());
+}
+
+/*
+ * Perform part of a memory copy from the guest memory at fromaddr
+ * and extending for copysize bytes, to the guest memory at
+ * toaddr. Both addresses are dirty.
+ *
+ * Returns the number of bytes actually set, which might be less than
+ * copysize; the caller should loop until the whole copy has been done.
+ * The caller should ensure that the guest registers are correct
+ * for the possibility that the first byte of the copy encounters
+ * an exception or watchpoint. We guarantee not to take any faults
+ * for bytes other than the first.
+ */
+static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr,
+ uint64_t copysize, int wmemidx, int rmemidx,
+ uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra)
+{
+ void *rmem;
+ void *wmem;
+
+ /* Don't cross a page boundary on either source or destination */
+ copysize = MIN(copysize, page_limit(toaddr));
+ copysize = MIN(copysize, page_limit(fromaddr));
+ /*
+ * Handle MTE tag checks: either handle the tag mismatch for byte 0,
+ * or else copy up to but not including the byte with the mismatch.
+ */
+ if (*rdesc) {
+ uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc);
+ if (mtesize == 0) {
+ mte_check_fail(env, *rdesc, fromaddr, ra);
+ *rdesc = 0;
+ } else {
+ copysize = MIN(copysize, mtesize);
+ }
+ }
+ if (*wdesc) {
+ uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc);
+ if (mtesize == 0) {
+ mte_check_fail(env, *wdesc, toaddr, ra);
+ *wdesc = 0;
+ } else {
+ copysize = MIN(copysize, mtesize);
+ }
+ }
+
+ toaddr = useronly_clean_ptr(toaddr);
+ fromaddr = useronly_clean_ptr(fromaddr);
+ /* Trapless lookup of whether we can get a host memory pointer */
+ wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx);
+ rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx);
+
+#ifndef CONFIG_USER_ONLY
+ /*
+ * If we don't have host memory for both source and dest then just
+ * do a single byte copy. This will handle watchpoints, invalid pages,
+ * etc correctly. For clean code pages, the next iteration will see
+ * the page dirty and will use the fast path.
+ */
+ if (unlikely(!rmem || !wmem)) {
+ uint8_t byte;
+ if (rmem) {
+ byte = *(uint8_t *)rmem;
+ } else {
+ byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra);
+ }
+ if (wmem) {
+ *(uint8_t *)wmem = byte;
+ } else {
+ cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra);
+ }
+ return 1;
+ }
+#endif
+ /* Easy case: just memmove the host memory */
+ memmove(wmem, rmem, copysize);
+ return copysize;
+}
+
+/*
+ * Do part of a backwards memory copy. Here toaddr and fromaddr point
+ * to the *last* byte to be copied.
+ */
+static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr,
+ uint64_t fromaddr,
+ uint64_t copysize, int wmemidx, int rmemidx,
+ uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra)
+{
+ void *rmem;
+ void *wmem;
+
+ /* Don't cross a page boundary on either source or destination */
+ copysize = MIN(copysize, page_limit_rev(toaddr));
+ copysize = MIN(copysize, page_limit_rev(fromaddr));
+
+ /*
+ * Handle MTE tag checks: either handle the tag mismatch for byte 0,
+ * or else copy up to but not including the byte with the mismatch.
+ */
+ if (*rdesc) {
+ uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc);
+ if (mtesize == 0) {
+ mte_check_fail(env, *rdesc, fromaddr, ra);
+ *rdesc = 0;
+ } else {
+ copysize = MIN(copysize, mtesize);
+ }
+ }
+ if (*wdesc) {
+ uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc);
+ if (mtesize == 0) {
+ mte_check_fail(env, *wdesc, toaddr, ra);
+ *wdesc = 0;
+ } else {
+ copysize = MIN(copysize, mtesize);
+ }
+ }
+
+ toaddr = useronly_clean_ptr(toaddr);
+ fromaddr = useronly_clean_ptr(fromaddr);
+ /* Trapless lookup of whether we can get a host memory pointer */
+ wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx);
+ rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx);
+
+#ifndef CONFIG_USER_ONLY
+ /*
+ * If we don't have host memory for both source and dest then just
+ * do a single byte copy. This will handle watchpoints, invalid pages,
+ * etc correctly. For clean code pages, the next iteration will see
+ * the page dirty and will use the fast path.
+ */
+ if (unlikely(!rmem || !wmem)) {
+ uint8_t byte;
+ if (rmem) {
+ byte = *(uint8_t *)rmem;
+ } else {
+ byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra);
+ }
+ if (wmem) {
+ *(uint8_t *)wmem = byte;
+ } else {
+ cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra);
+ }
+ return 1;
+ }
+#endif
+ /*
+ * Easy case: just memmove the host memory. Note that wmem and
+ * rmem here point to the *last* byte to copy.
+ */
+ memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize);
+ return copysize;
+}
+
+/*
+ * for the Memory Copy operation, our implementation chooses always
+ * to use "option A", where we update Xd and Xs to the final addresses
+ * in the CPYP insn, and then in CPYM and CPYE only need to update Xn.
+ *
+ * @env: CPU
+ * @syndrome: syndrome value for mismatch exceptions
+ * (also contains the register numbers we need to use)
+ * @wdesc: MTE descriptor for the writes (destination)
+ * @rdesc: MTE descriptor for the reads (source)
+ * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards)
+ */
+static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc, uint32_t move, uintptr_t ra)
+{
+ int rd = mops_destreg(syndrome);
+ int rs = mops_srcreg(syndrome);
+ int rn = mops_sizereg(syndrome);
+ uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
+ uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
+ bool forwards = true;
+ uint64_t toaddr = env->xregs[rd];
+ uint64_t fromaddr = env->xregs[rs];
+ uint64_t copysize = env->xregs[rn];
+ uint64_t stagecopysize, step;
+
+ check_mops_enabled(env, ra);
+
+
+ if (move) {
+ /*
+ * Copy backwards if necessary. The direction for a non-overlapping
+ * copy is IMPDEF; we choose forwards.
+ */
+ if (copysize > 0x007FFFFFFFFFFFFFULL) {
+ copysize = 0x007FFFFFFFFFFFFFULL;
+ }
+ uint64_t fs = extract64(fromaddr, 0, 56);
+ uint64_t ts = extract64(toaddr, 0, 56);
+ uint64_t fe = extract64(fromaddr + copysize, 0, 56);
+
+ if (fs < ts && fe > ts) {
+ forwards = false;
+ }
+ } else {
+ if (copysize > INT64_MAX) {
+ copysize = INT64_MAX;
+ }
+ }
+
+ if (!mte_checks_needed(fromaddr, rdesc)) {
+ rdesc = 0;
+ }
+ if (!mte_checks_needed(toaddr, wdesc)) {
+ wdesc = 0;
+ }
+
+ if (forwards) {
+ stagecopysize = MIN(copysize, page_limit(toaddr));
+ stagecopysize = MIN(stagecopysize, page_limit(fromaddr));
+ while (stagecopysize) {
+ env->xregs[rd] = toaddr;
+ env->xregs[rs] = fromaddr;
+ env->xregs[rn] = copysize;
+ step = copy_step(env, toaddr, fromaddr, stagecopysize,
+ wmemidx, rmemidx, &wdesc, &rdesc, ra);
+ toaddr += step;
+ fromaddr += step;
+ copysize -= step;
+ stagecopysize -= step;
+ }
+ /* Insn completed, so update registers to the Option A format */
+ env->xregs[rd] = toaddr + copysize;
+ env->xregs[rs] = fromaddr + copysize;
+ env->xregs[rn] = -copysize;
+ } else {
+ /*
+ * In a reverse copy the to and from addrs in Xs and Xd are the start
+ * of the range, but it's more convenient for us to work with pointers
+ * to the last byte being copied.
+ */
+ toaddr += copysize - 1;
+ fromaddr += copysize - 1;
+ stagecopysize = MIN(copysize, page_limit_rev(toaddr));
+ stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr));
+ while (stagecopysize) {
+ env->xregs[rn] = copysize;
+ step = copy_step_rev(env, toaddr, fromaddr, stagecopysize,
+ wmemidx, rmemidx, &wdesc, &rdesc, ra);
+ copysize -= step;
+ stagecopysize -= step;
+ toaddr -= step;
+ fromaddr -= step;
+ }
+ /*
+ * Insn completed, so update registers to the Option A format.
+ * For a reverse copy this is no different to the CPYP input format.
+ */
+ env->xregs[rn] = copysize;
+ }
+
+ /* Set NZCV = 0000 to indicate we are an Option A implementation */
+ env->NF = 0;
+ env->ZF = 1; /* our env->ZF encoding is inverted */
+ env->CF = 0;
+ env->VF = 0;
+ return;
+}
+
+void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc)
+{
+ do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC());
+}
+
+void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc)
+{
+ do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC());
+}
+
+static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc, uint32_t move, uintptr_t ra)
+{
+ /* Main: we choose to copy until less than a page remaining */
+ CPUState *cs = env_cpu(env);
+ int rd = mops_destreg(syndrome);
+ int rs = mops_srcreg(syndrome);
+ int rn = mops_sizereg(syndrome);
+ uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
+ uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
+ bool forwards = true;
+ uint64_t toaddr, fromaddr, copysize, step;
+
+ check_mops_enabled(env, ra);
+
+ /* We choose to NOP out "no data to copy" before consistency checks */
+ if (env->xregs[rn] == 0) {
+ return;
+ }
+
+ check_mops_wrong_option(env, syndrome, ra);
+
+ if (move) {
+ forwards = (int64_t)env->xregs[rn] < 0;
+ }
+
+ if (forwards) {
+ toaddr = env->xregs[rd] + env->xregs[rn];
+ fromaddr = env->xregs[rs] + env->xregs[rn];
+ copysize = -env->xregs[rn];
+ } else {
+ copysize = env->xregs[rn];
+ /* This toaddr and fromaddr point to the *last* byte to copy */
+ toaddr = env->xregs[rd] + copysize - 1;
+ fromaddr = env->xregs[rs] + copysize - 1;
+ }
+
+ if (!mte_checks_needed(fromaddr, rdesc)) {
+ rdesc = 0;
+ }
+ if (!mte_checks_needed(toaddr, wdesc)) {
+ wdesc = 0;
+ }
+
+ /* Our implementation has no particular parameter requirements for CPYM */
+
+ /* Do the actual memmove */
+ if (forwards) {
+ while (copysize >= TARGET_PAGE_SIZE) {
+ step = copy_step(env, toaddr, fromaddr, copysize,
+ wmemidx, rmemidx, &wdesc, &rdesc, ra);
+ toaddr += step;
+ fromaddr += step;
+ copysize -= step;
+ env->xregs[rn] = -copysize;
+ if (copysize >= TARGET_PAGE_SIZE &&
+ unlikely(cpu_loop_exit_requested(cs))) {
+ cpu_loop_exit_restore(cs, ra);
+ }
+ }
+ } else {
+ while (copysize >= TARGET_PAGE_SIZE) {
+ step = copy_step_rev(env, toaddr, fromaddr, copysize,
+ wmemidx, rmemidx, &wdesc, &rdesc, ra);
+ toaddr -= step;
+ fromaddr -= step;
+ copysize -= step;
+ env->xregs[rn] = copysize;
+ if (copysize >= TARGET_PAGE_SIZE &&
+ unlikely(cpu_loop_exit_requested(cs))) {
+ cpu_loop_exit_restore(cs, ra);
+ }
+ }
+ }
+}
+
+void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc)
+{
+ do_cpym(env, syndrome, wdesc, rdesc, true, GETPC());
+}
+
+void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc)
+{
+ do_cpym(env, syndrome, wdesc, rdesc, false, GETPC());
+}
+
+static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc, uint32_t move, uintptr_t ra)
+{
+ /* Epilogue: do the last partial page */
+ int rd = mops_destreg(syndrome);
+ int rs = mops_srcreg(syndrome);
+ int rn = mops_sizereg(syndrome);
+ uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX);
+ uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX);
+ bool forwards = true;
+ uint64_t toaddr, fromaddr, copysize, step;
+
+ check_mops_enabled(env, ra);
+
+ /* We choose to NOP out "no data to copy" before consistency checks */
+ if (env->xregs[rn] == 0) {
+ return;
+ }
+
+ check_mops_wrong_option(env, syndrome, ra);
+
+ if (move) {
+ forwards = (int64_t)env->xregs[rn] < 0;
+ }
+
+ if (forwards) {
+ toaddr = env->xregs[rd] + env->xregs[rn];
+ fromaddr = env->xregs[rs] + env->xregs[rn];
+ copysize = -env->xregs[rn];
+ } else {
+ copysize = env->xregs[rn];
+ /* This toaddr and fromaddr point to the *last* byte to copy */
+ toaddr = env->xregs[rd] + copysize - 1;
+ fromaddr = env->xregs[rs] + copysize - 1;
+ }
+
+ if (!mte_checks_needed(fromaddr, rdesc)) {
+ rdesc = 0;
+ }
+ if (!mte_checks_needed(toaddr, wdesc)) {
+ wdesc = 0;
+ }
+
+ /* Check the size; we don't want to have do a check-for-interrupts */
+ if (copysize >= TARGET_PAGE_SIZE) {
+ raise_exception_ra(env, EXCP_UDEF, syndrome,
+ mops_mismatch_exception_target_el(env), ra);
+ }
+
+ /* Do the actual memmove */
+ if (forwards) {
+ while (copysize > 0) {
+ step = copy_step(env, toaddr, fromaddr, copysize,
+ wmemidx, rmemidx, &wdesc, &rdesc, ra);
+ toaddr += step;
+ fromaddr += step;
+ copysize -= step;
+ env->xregs[rn] = -copysize;
+ }
+ } else {
+ while (copysize > 0) {
+ step = copy_step_rev(env, toaddr, fromaddr, copysize,
+ wmemidx, rmemidx, &wdesc, &rdesc, ra);
+ toaddr -= step;
+ fromaddr -= step;
+ copysize -= step;
+ env->xregs[rn] = copysize;
+ }
+ }
+}
+
+void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc)
+{
+ do_cpye(env, syndrome, wdesc, rdesc, true, GETPC());
+}
+
+void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc,
+ uint32_t rdesc)
+{
+ do_cpye(env, syndrome, wdesc, rdesc, false, GETPC());
+}
diff --git a/target/arm/helper-a64.h b/target/arm/tcg/helper-a64.h
index 9d3a907049..575a5dab7d 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -6,7 +6,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -19,13 +19,16 @@
DEF_HELPER_FLAGS_2(udiv64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(sdiv64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
DEF_HELPER_FLAGS_1(rbit64, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_2(msr_i_spsel, void, env, i32)
+DEF_HELPER_2(msr_i_daifset, void, env, i32)
+DEF_HELPER_2(msr_i_daifclear, void, env, i32)
DEF_HELPER_3(vfp_cmph_a64, i64, f16, f16, ptr)
DEF_HELPER_3(vfp_cmpeh_a64, i64, f16, f16, ptr)
DEF_HELPER_3(vfp_cmps_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
-DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
+DEF_HELPER_FLAGS_4(simd_tblx, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
@@ -47,14 +50,6 @@ DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
- i64, env, i64, i64, i64)
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
-DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
- i64, env, i64, i64, i64)
-DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64)
-DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64)
DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
@@ -85,3 +80,54 @@ DEF_HELPER_2(advsimd_rinth, f16, f16, ptr)
DEF_HELPER_2(advsimd_f16tosinth, i32, f16, ptr)
DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)
DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
+
+DEF_HELPER_2(exception_return, void, env, i64)
+DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)
+
+DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(pacda, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(pacdb, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(pacga, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autia, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autia_combined, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autib, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autib_combined, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autda_combined, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(autdb_combined, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64)
+DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
+
+DEF_HELPER_FLAGS_3(mte_check, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32)
+DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
+
+DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG,
+ noreturn, env, i64, i32, i32)
+
+DEF_HELPER_3(setp, void, env, i32, i32)
+DEF_HELPER_3(setm, void, env, i32, i32)
+DEF_HELPER_3(sete, void, env, i32, i32)
+DEF_HELPER_3(setgp, void, env, i32, i32)
+DEF_HELPER_3(setgm, void, env, i32, i32)
+DEF_HELPER_3(setge, void, env, i32, i32)
+
+DEF_HELPER_4(cpyp, void, env, i32, i32, i32)
+DEF_HELPER_4(cpym, void, env, i32, i32, i32)
+DEF_HELPER_4(cpye, void, env, i32, i32, i32)
+DEF_HELPER_4(cpyfp, void, env, i32, i32, i32)
+DEF_HELPER_4(cpyfm, void, env, i32, i32, i32)
+DEF_HELPER_4(cpyfe, void, env, i32, i32, i32)
diff --git a/target/arm/tcg/helper-mve.h b/target/arm/tcg/helper-mve.h
new file mode 100644
index 0000000000..76bd25006d
--- /dev/null
+++ b/target/arm/tcg/helper-mve.h
@@ -0,0 +1,890 @@
+/*
+ * M-profile MVE specific helper definitions
+ *
+ * Copyright (c) 2021 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+DEF_HELPER_FLAGS_3(mve_vldrb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vldrb_sh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrb_sw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrb_uh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrb_uw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrh_sw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vldrh_uw, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrb_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrb_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_wb_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_wb_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_wb_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_wb_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld20b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld20h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld20w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld21b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld21h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld21w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld40b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld40h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld40w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld41b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld41h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld41w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld42b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld42h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld42w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vld43b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld43h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vld43w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst20b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst20h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst20w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst21b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst21h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst21w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst40b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst40h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst40w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst41b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst41h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst41w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst42b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst42h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst42w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vst43b, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst43h, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_vst43w, TCG_CALL_NO_WG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vdup, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vidupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
+DEF_HELPER_FLAGS_4(mve_viduph, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
+DEF_HELPER_FLAGS_4(mve_vidupw, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
+
+DEF_HELPER_FLAGS_5(mve_viwdupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_5(mve_viwduph, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_5(mve_viwdupw, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+
+DEF_HELPER_FLAGS_5(mve_vdwdupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_5(mve_vdwduph, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+DEF_HELPER_FLAGS_5(mve_vdwdupw, TCG_CALL_NO_WG, i32, env, ptr, i32, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vclsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vclsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vclsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vclzb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vclzh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vclzw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vrev16b, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev32b, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev32h, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev64b, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev64h, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrev64w, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vmvn, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vabsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vabsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfabss, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vnegb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vnegw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfnegs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqabsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqabsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqabsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqnegb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqnegw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vmaxab, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmaxah, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmaxaw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vminab, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vminah, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vminaw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcvt_rm_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_rm_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_rm_ss, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_rm_us, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcvtb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtb_hs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtt_hs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vmovnbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovnbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovnth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovunbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovunbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovuntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovunth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovnbsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovnbsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovnbub, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovnbuh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntub, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntuh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vand, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vbic, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vorr, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vorn, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_veor, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vpsel, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_1(mve_vpnot, TCG_CALL_NO_WG, void, env)
+
+DEF_HELPER_FLAGS_2(mve_vctp, TCG_CALL_NO_WG, void, env, i32)
+
+DEF_HELPER_FLAGS_4(mve_vaddb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vaddh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vaddw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vsubb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vsubh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vsubw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmulb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmulhsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulhuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrmulhsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrmulhuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmaxsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vminsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vabdsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabdsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabdsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabdub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vabduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vhaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vhsubsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmullbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullbuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmulltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmulltuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmullpbh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullpth, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullpbw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmullptw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmulhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmulhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmulhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmulhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmulhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmulhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqsubsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqsubsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqsubsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqsubub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqsubuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqsubuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrshlsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrshlsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrshlsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrshlub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrshluh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrshluw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmladhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmladhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmladhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmladhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmladhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmladhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmladhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmladhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmladhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmladhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmlsdhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqrdmlsdhxw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vqdmullbh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmullbw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmullth, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vqdmulltw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrhaddsb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrhaddsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrhaddsw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vrhaddub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrhadduh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vrhadduw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vadc, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vadci, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vsbc, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vsbci, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcadd90b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcadd90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcadd90w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcadd270b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcadd270w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vhcadd90b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhcadd90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhcadd90w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vhcadd270b, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vhcadd270w, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfaddh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfadds, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfsubh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfsubs, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfmulh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfmuls, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfabdh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfabds, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmaxnmh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxnms, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vminnmh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminnms, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vmaxnmah, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vmaxnmas, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vminnmah, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vminnmas, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfcadd90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfcadd90s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfcadd270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfcadd270s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfmah, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfmas, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vfmsh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vfmss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcmul0h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul0s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul90s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul180h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul180s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmul270s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vcmla0h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla0s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla90h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla90s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla180h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla180s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla270h, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vcmla270s, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+
+DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vsub_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsub_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsub_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmul_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmul_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmul_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhadds_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhaddu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhsubs_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vhsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmulh_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmulh_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vbrsrb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vbrsrh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vbrsrw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqdmullb_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmullb_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmullt_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmullt_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmlab, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlah, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlaw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmlasb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlash, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlasw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqdmlahb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmlahh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmlahw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmlahb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmlahh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmlahw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqdmlashb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmlashh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqdmlashw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrdmlashb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmlashh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrdmlashw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlaldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vmlaldavuh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlaldavuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vmlsldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlsldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlsldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vmlsldavxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vrmlaldavhuw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vrmlsldavhsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+DEF_HELPER_FLAGS_4(mve_vrmlsldavhxsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vmladavsb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavsh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavsw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavub, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavuh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavuw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vmladavsxb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavsxh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmladavsxw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavxb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavxh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vmlsdavxw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vaddvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vmaxvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxavw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vminvsb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvsh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvsw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvub, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvuh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminvuw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavb, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminavw, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vmaxnmvh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxnmvs, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vminnmvh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminnmvs, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vmaxnmavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vmaxnmavs, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vminnmavh, TCG_CALL_NO_WG, i32, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vminnmavs, TCG_CALL_NO_WG, i32, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vaddlv_s, TCG_CALL_NO_WG, i64, env, ptr, i64)
+DEF_HELPER_FLAGS_3(mve_vaddlv_u, TCG_CALL_NO_WG, i64, env, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vabavsb, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavsh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavsw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavub, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavuh, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vabavuw, TCG_CALL_NO_WG, i32, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vmovi, TCG_CALL_NO_WG, void, env, ptr, i64)
+DEF_HELPER_FLAGS_3(mve_vandi, TCG_CALL_NO_WG, void, env, ptr, i64)
+DEF_HELPER_FLAGS_3(mve_vorri, TCG_CALL_NO_WG, void, env, ptr, i64)
+
+DEF_HELPER_FLAGS_4(mve_vshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshlui_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshlui_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshlui_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshli_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshli_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshli_sw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshli_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshli_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshli_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vshllbsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshllbsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshllbub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshllbuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshlltsb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshlltsh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshlltub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshlltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vsrib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsrih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsriw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vslib, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vslih, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vsliw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vrshrnbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshrnbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshrntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrshrnth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_sb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnb_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_ub, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrnt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqrshrunbb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrunbh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshruntb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqrshrunth, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vshlc, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_sshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_ushll, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_sqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_uqshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_sqrshrl, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_uqrshll, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_sqrshrl48, TCG_CALL_NO_RWG, i64, env, i64, i32)
+DEF_HELPER_FLAGS_3(mve_uqrshll48, TCG_CALL_NO_RWG, i64, env, i64, i32)
+
+DEF_HELPER_FLAGS_3(mve_uqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_sqshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_uqrshl, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(mve_sqrshr, TCG_CALL_NO_RWG, i32, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpeqb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpeqh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpeqw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpneb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpneh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpnew, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpcsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpcsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpcsw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmphib, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmphih, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmphiw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpgeb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpgeh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpgew, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpltb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmplth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpltw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpgtb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpgth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpgtw, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpleb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmpleh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcmplew, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpeq_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpne_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpne_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpne_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpcs_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmphi_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmphi_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmphi_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpge_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpge_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpge_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmplt_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmplt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmplt_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmpgt_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vcmple_scalarb, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmple_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcmple_scalarw, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpeqh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmpeqs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpneh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmpnes, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpgeh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmpges, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmplth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmplts, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpgth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmpgts, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpleh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vfcmples, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpeq_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmpeq_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpne_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmpne_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpge_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmpge_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmplt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmplt_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmpgt_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmpgt_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vfcmple_scalarh, TCG_CALL_NO_WG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vfcmple_scalars, TCG_CALL_NO_WG, void, env, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfadd_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfsub_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfsub_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfmul_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfmul_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfma_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfma_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vfmas_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vfmas_scalars, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vcvt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_hs, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_hu, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_sf, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_uf, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_fs, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vcvt_fu, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vrint_rm_h, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vrint_rm_s, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(mve_vrintx_h, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vrintx_s, TCG_CALL_NO_WG, void, env, ptr, ptr)
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
new file mode 100644
index 0000000000..27eef49a11
--- /dev/null
+++ b/target/arm/tcg/helper-sme.h
@@ -0,0 +1,146 @@
+/*
+ * AArch64 SME specific helper definitions
+ *
+ * Copyright (c) 2022 Linaro, Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+DEF_HELPER_FLAGS_3(set_svcr, TCG_CALL_NO_RWG, void, env, i32, i32)
+
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
+
+/* Move to/from vertical array slices, i.e. columns, so 'c'. */
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_st1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_st1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper-sve.h b/target/arm/tcg/helper-sve.h
index 9e79182ab4..cc4e1d8948 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/tcg/helper-sve.h
@@ -6,7 +6,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -158,6 +158,128 @@ DEF_HELPER_FLAGS_5(sve_umulh_zpzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(sve_umulh_zpzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sadalp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sadalp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sadalp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uadalp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uadalp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uadalp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_srshl_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srshl_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srshl_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srshl_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_urshl_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urshl_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urshl_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urshl_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqshl_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqshl_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqshl_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqshl_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uqshl_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqshl_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqshl_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqshl_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrshl_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrshl_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrshl_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrshl_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uqrshl_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqrshl_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqrshl_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqrshl_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_shadd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shadd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shadd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shadd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uhadd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhadd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhadd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhadd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_srhadd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srhadd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srhadd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_srhadd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_urhadd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urhadd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urhadd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_urhadd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_shsub_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shsub_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shsub_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_shsub_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uhsub_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhsub_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhsub_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uhsub_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(sve_sdiv_zpzz_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_sdiv_zpzz_d, TCG_CALL_NO_RWG,
@@ -203,6 +325,107 @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqadd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqadd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqadd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqadd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uqadd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqadd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqadd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqadd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqsub_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqsub_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqsub_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqsub_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uqsub_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqsub_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqsub_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uqsub_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_suqadd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_suqadd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_suqadd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_suqadd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_usqadd_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_usqadd_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_usqadd_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_usqadd_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_asr_zpzw_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
@@ -269,11 +492,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
-
DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -445,6 +663,16 @@ DEF_HELPER_FLAGS_4(sve_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_tbx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_3(sve_sunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_sunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_sunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
@@ -463,16 +691,19 @@ DEF_HELPER_FLAGS_4(sve_zip_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_zip_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_zip_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uzp_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_trn_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_trn_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_compact_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_compact_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -488,11 +719,26 @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_rbit_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqabs_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqneg_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_urecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ursqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(sve_splice, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_cmpeq_ppzz_b, TCG_CALL_NO_RWG,
@@ -684,7 +930,8 @@ DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_while, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
+DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
+DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
DEF_HELPER_FLAGS_4(sve_subri_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(sve_subri_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
@@ -954,6 +1201,8 @@ DEF_HELPER_FLAGS_5(sve_fcvt_hd, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_fcvt_sd, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_bfcvt, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_fcvtzs_hh, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
@@ -1099,29 +1348,84 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32)
-DEF_HELPER_FLAGS_3(sve_fcmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sabdl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sabdl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sabdl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
-DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_usubl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uabdl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uabdl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uabdl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_saddw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_saddw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_ssubw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_ssubw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uaddw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uaddw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_usubw_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubw_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_usubw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_ld1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1181,6 +1485,64 @@ DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1212,6 +1574,55 @@ DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1243,6 +1654,55 @@ DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1290,6 +1750,53 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG,
@@ -1399,6 +1906,115 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG,
@@ -1508,6 +2124,115 @@ DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG,
@@ -1574,3 +2299,506 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmull_zzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_smull_zzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_smull_zzz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_smull_zzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_umull_zzz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_umull_zzz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_umull_zzz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_pmull_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sshll_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sshll_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sshll_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_ushll_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_ushll_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_ushll_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_eoril_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_eoril_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_eoril_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_eoril_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_bext_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bext_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bext_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bext_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_bdep_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bdep_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bdep_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bdep_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_bgrp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bgrp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bgrp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_bgrp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_cadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_cadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_cadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_cadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqcadd_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqcadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqcadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqcadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sabal_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sabal_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sabal_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uabal_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uabal_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uabal_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_adcl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_adcl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqxtnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqxtnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqxtnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqxtnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqxtunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqxtnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqxtnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqxtnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqxtnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqxtunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqxtunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_shrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_shrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_shrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_shrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_shrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_shrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_rshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_rshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_rshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_rshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_rshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_rshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqrshrnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqrshrnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqrshrnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_uqrshrnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_uqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_addhnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_addhnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_addhnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_addhnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_addhnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_addhnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_raddhnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_raddhnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_raddhnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_raddhnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_raddhnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_raddhnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_subhnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_subhnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_subhnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_subhnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_subhnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_subhnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_rsubhnb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_rsubhnb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_rsubhnb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_rsubhnt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_rsubhnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_rsubhnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_match_ppzz_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_match_ppzz_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_b, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_h, TCG_CALL_NO_RWG,
+ i32, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_histcnt_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_histcnt_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_histseg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_xar_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_xar_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_xar_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmaxnmp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fminnmp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fmaxp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_eor3, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bcax, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bsl1n, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bsl2n, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_nbsl, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_zzzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_zzzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_zzzw_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_zzzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_zzzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_zzzw_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smlal_zzzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlal_zzzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlal_zzzw_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_umlal_zzzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlal_zzzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlal_zzzw_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smlsl_zzzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlsl_zzzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlsl_zzzw_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_umlsl_zzzw_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlsl_zzzw_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlsl_zzzw_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_cmla_zzzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cmla_zzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cmla_zzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cmla_zzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlah_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdmlsh_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlal_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqdmlsl_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqdmull_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqdmull_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smlal_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlal_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlsl_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smlsl_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlal_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlal_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlsl_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umlsl_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_smull_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_smull_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_umull_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_umull_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_cmla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cmla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_cdot_zzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cdot_zzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_cdot_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_cdot_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_fcvtnt_sh, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_fcvtnt_ds, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_bfcvtnt, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_fcvtlt_hs, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_fcvtlt_sd, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(flogb_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(flogb_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(flogb_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshl_zpzi_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_uqshl_zpzi_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_srshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_srshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_urshr_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_urshr_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_sqshlu_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_sqshlu_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
new file mode 100644
index 0000000000..5da1b0fc1d
--- /dev/null
+++ b/target/arm/tcg/hflags.c
@@ -0,0 +1,485 @@
+/*
+ * ARM hflags
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "exec/helper-proto.h"
+#include "cpregs.h"
+
+static inline bool fgt_svc(CPUARMState *env, int el)
+{
+ /*
+ * Assuming fine-grained-traps are active, return true if we
+ * should be trapping on SVC instructions. Only AArch64 can
+ * trap on an SVC at EL1, but we don't need to special-case this
+ * because if this is AArch32 EL1 then arm_fgt_active() is false.
+ * We also know el is 0 or 1.
+ */
+ return el == 0 ?
+ FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, SVC_EL0) :
+ FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, SVC_EL1);
+}
+
+/* Return true if memory alignment should be enforced. */
+static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr)
+{
+#ifdef CONFIG_USER_ONLY
+ return false;
+#else
+ /* Check the alignment enable bit. */
+ if (sctlr & SCTLR_A) {
+ return true;
+ }
+
+ /*
+ * If translation is disabled, then the default memory type is
+ * Device(-nGnRnE) instead of Normal, which requires that alignment
+ * be enforced. Since this affects all ram, it is most efficient
+ * to handle this during translation.
+ */
+ if (sctlr & SCTLR_M) {
+ /* Translation enabled: memory type in PTE via MAIR_ELx. */
+ return false;
+ }
+ if (el < 2 && (arm_hcr_el2_eff(env) & (HCR_DC | HCR_VM))) {
+ /* Stage 2 translation enabled: memory type in PTE. */
+ return false;
+ }
+ return true;
+#endif
+}
+
+static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el,
+ ARMMMUIdx mmu_idx,
+ CPUARMTBFlags flags)
+{
+ DP_TBFLAG_ANY(flags, FPEXC_EL, fp_el);
+ DP_TBFLAG_ANY(flags, MMUIDX, arm_to_core_mmu_idx(mmu_idx));
+
+ if (arm_singlestep_active(env)) {
+ DP_TBFLAG_ANY(flags, SS_ACTIVE, 1);
+ }
+
+ return flags;
+}
+
+static CPUARMTBFlags rebuild_hflags_common_32(CPUARMState *env, int fp_el,
+ ARMMMUIdx mmu_idx,
+ CPUARMTBFlags flags)
+{
+ bool sctlr_b = arm_sctlr_b(env);
+
+ if (sctlr_b) {
+ DP_TBFLAG_A32(flags, SCTLR__B, 1);
+ }
+ if (arm_cpu_data_is_big_endian_a32(env, sctlr_b)) {
+ DP_TBFLAG_ANY(flags, BE_DATA, 1);
+ }
+ DP_TBFLAG_A32(flags, NS, !access_secure_reg(env));
+
+ return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
+}
+
+static CPUARMTBFlags rebuild_hflags_m32(CPUARMState *env, int fp_el,
+ ARMMMUIdx mmu_idx)
+{
+ CPUARMTBFlags flags = {};
+ uint32_t ccr = env->v7m.ccr[env->v7m.secure];
+
+ /* Without HaveMainExt, CCR.UNALIGN_TRP is RES1. */
+ if (ccr & R_V7M_CCR_UNALIGN_TRP_MASK) {
+ DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
+ }
+
+ if (arm_v7m_is_handler_mode(env)) {
+ DP_TBFLAG_M32(flags, HANDLER, 1);
+ }
+
+ /*
+ * v8M always applies stack limit checks unless CCR.STKOFHFNMIGN
+ * is suppressing them because the requested execution priority
+ * is less than 0.
+ */
+ if (arm_feature(env, ARM_FEATURE_V8) &&
+ !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
+ (ccr & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
+ DP_TBFLAG_M32(flags, STACKCHECK, 1);
+ }
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY) && env->v7m.secure) {
+ DP_TBFLAG_M32(flags, SECURE, 1);
+ }
+
+ return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
+}
+
+/* This corresponds to the ARM pseudocode function IsFullA64Enabled(). */
+static bool sme_fa64(CPUARMState *env, int el)
+{
+ if (!cpu_isar_feature(aa64_sme_fa64, env_archcpu(env))) {
+ return false;
+ }
+
+ if (el <= 1 && !el_is_in_host(env, el)) {
+ if (!FIELD_EX64(env->vfp.smcr_el[1], SMCR, FA64)) {
+ return false;
+ }
+ }
+ if (el <= 2 && arm_is_el2_enabled(env)) {
+ if (!FIELD_EX64(env->vfp.smcr_el[2], SMCR, FA64)) {
+ return false;
+ }
+ }
+ if (arm_feature(env, ARM_FEATURE_EL3)) {
+ if (!FIELD_EX64(env->vfp.smcr_el[3], SMCR, FA64)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
+ ARMMMUIdx mmu_idx)
+{
+ CPUARMTBFlags flags = {};
+ int el = arm_current_el(env);
+ uint64_t sctlr = arm_sctlr(env, el);
+
+ if (aprofile_require_alignment(env, el, sctlr)) {
+ DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
+ }
+
+ if (arm_el_is_aa64(env, 1)) {
+ DP_TBFLAG_A32(flags, VFPEN, 1);
+ }
+
+ if (el < 2 && env->cp15.hstr_el2 && arm_is_el2_enabled(env) &&
+ (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
+ DP_TBFLAG_A32(flags, HSTR_ACTIVE, 1);
+ }
+
+ if (arm_fgt_active(env, el)) {
+ DP_TBFLAG_ANY(flags, FGT_ACTIVE, 1);
+ if (fgt_svc(env, el)) {
+ DP_TBFLAG_ANY(flags, FGT_SVC, 1);
+ }
+ }
+
+ if (env->uncached_cpsr & CPSR_IL) {
+ DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
+ }
+
+ /*
+ * The SME exception we are testing for is raised via
+ * AArch64.CheckFPAdvSIMDEnabled(), as called from
+ * AArch32.CheckAdvSIMDOrFPEnabled().
+ */
+ if (el == 0
+ && FIELD_EX64(env->svcr, SVCR, SM)
+ && (!arm_is_el2_enabled(env)
+ || (arm_el_is_aa64(env, 2) && !(env->cp15.hcr_el2 & HCR_TGE)))
+ && arm_el_is_aa64(env, 1)
+ && !sme_fa64(env, el)) {
+ DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1);
+ }
+
+ return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags);
+}
+
+static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
+ ARMMMUIdx mmu_idx)
+{
+ CPUARMTBFlags flags = {};
+ ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
+ uint64_t tcr = regime_tcr(env, mmu_idx);
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ uint64_t sctlr;
+ int tbii, tbid;
+
+ DP_TBFLAG_ANY(flags, AARCH64_STATE, 1);
+
+ /* Get control bits for tagged addresses. */
+ tbid = aa64_va_parameter_tbi(tcr, mmu_idx);
+ tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx);
+
+ DP_TBFLAG_A64(flags, TBII, tbii);
+ DP_TBFLAG_A64(flags, TBID, tbid);
+
+ if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
+ int sve_el = sve_exception_el(env, el);
+
+ /*
+ * If either FP or SVE are disabled, translator does not need len.
+ * If SVE EL > FP EL, FP exception has precedence, and translator
+ * does not need SVE EL. Save potential re-translations by forcing
+ * the unneeded data to zero.
+ */
+ if (fp_el != 0) {
+ if (sve_el > fp_el) {
+ sve_el = 0;
+ }
+ } else if (sve_el == 0) {
+ DP_TBFLAG_A64(flags, VL, sve_vqm1_for_el(env, el));
+ }
+ DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el);
+ }
+ if (cpu_isar_feature(aa64_sme, env_archcpu(env))) {
+ int sme_el = sme_exception_el(env, el);
+ bool sm = FIELD_EX64(env->svcr, SVCR, SM);
+
+ DP_TBFLAG_A64(flags, SMEEXC_EL, sme_el);
+ if (sme_el == 0) {
+ /* Similarly, do not compute SVL if SME is disabled. */
+ int svl = sve_vqm1_for_el_sm(env, el, true);
+ DP_TBFLAG_A64(flags, SVL, svl);
+ if (sm) {
+ /* If SVE is disabled, we will not have set VL above. */
+ DP_TBFLAG_A64(flags, VL, svl);
+ }
+ }
+ if (sm) {
+ DP_TBFLAG_A64(flags, PSTATE_SM, 1);
+ DP_TBFLAG_A64(flags, SME_TRAP_NONSTREAMING, !sme_fa64(env, el));
+ }
+ DP_TBFLAG_A64(flags, PSTATE_ZA, FIELD_EX64(env->svcr, SVCR, ZA));
+ }
+
+ sctlr = regime_sctlr(env, stage1);
+
+ if (aprofile_require_alignment(env, el, sctlr)) {
+ DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
+ }
+
+ if (arm_cpu_data_is_big_endian_a64(el, sctlr)) {
+ DP_TBFLAG_ANY(flags, BE_DATA, 1);
+ }
+
+ if (cpu_isar_feature(aa64_pauth, env_archcpu(env))) {
+ /*
+ * In order to save space in flags, we record only whether
+ * pauth is "inactive", meaning all insns are implemented as
+ * a nop, or "active" when some action must be performed.
+ * The decision of which action to take is left to a helper.
+ */
+ if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
+ DP_TBFLAG_A64(flags, PAUTH_ACTIVE, 1);
+ }
+ }
+
+ if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
+ /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */
+ if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
+ DP_TBFLAG_A64(flags, BT, 1);
+ }
+ }
+
+ if (cpu_isar_feature(aa64_lse2, env_archcpu(env))) {
+ if (sctlr & SCTLR_nAA) {
+ DP_TBFLAG_A64(flags, NAA, 1);
+ }
+ }
+
+ /* Compute the condition for using AccType_UNPRIV for LDTR et al. */
+ if (!(env->pstate & PSTATE_UAO)) {
+ switch (mmu_idx) {
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ /* FEAT_NV: NV,NV1 == 1,1 means we don't do UNPRIV accesses */
+ if ((hcr & (HCR_NV | HCR_NV1)) != (HCR_NV | HCR_NV1)) {
+ DP_TBFLAG_A64(flags, UNPRIV, 1);
+ }
+ break;
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ /*
+ * Note that EL20_2 is gated by HCR_EL2.E2H == 1, but EL20_0 is
+ * gated by HCR_EL2.<E2H,TGE> == '11', and so is LDTR.
+ */
+ if (env->cp15.hcr_el2 & HCR_TGE) {
+ DP_TBFLAG_A64(flags, UNPRIV, 1);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (env->pstate & PSTATE_IL) {
+ DP_TBFLAG_ANY(flags, PSTATE__IL, 1);
+ }
+
+ if (arm_fgt_active(env, el)) {
+ DP_TBFLAG_ANY(flags, FGT_ACTIVE, 1);
+ if (FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, ERET)) {
+ DP_TBFLAG_A64(flags, TRAP_ERET, 1);
+ }
+ if (fgt_svc(env, el)) {
+ DP_TBFLAG_ANY(flags, FGT_SVC, 1);
+ }
+ }
+
+ /*
+ * ERET can also be trapped for FEAT_NV. arm_hcr_el2_eff() takes care
+ * of "is EL2 enabled" and the NV bit can only be set if FEAT_NV is present.
+ */
+ if (el == 1 && (hcr & HCR_NV)) {
+ DP_TBFLAG_A64(flags, TRAP_ERET, 1);
+ DP_TBFLAG_A64(flags, NV, 1);
+ if (hcr & HCR_NV1) {
+ DP_TBFLAG_A64(flags, NV1, 1);
+ }
+ if (hcr & HCR_NV2) {
+ DP_TBFLAG_A64(flags, NV2, 1);
+ if (hcr & HCR_E2H) {
+ DP_TBFLAG_A64(flags, NV2_MEM_E20, 1);
+ }
+ if (env->cp15.sctlr_el[2] & SCTLR_EE) {
+ DP_TBFLAG_A64(flags, NV2_MEM_BE, 1);
+ }
+ }
+ }
+
+ if (cpu_isar_feature(aa64_mte, env_archcpu(env))) {
+ /*
+ * Set MTE_ACTIVE if any access may be Checked, and leave clear
+ * if all accesses must be Unchecked:
+ * 1) If no TBI, then there are no tags in the address to check,
+ * 2) If Tag Check Override, then all accesses are Unchecked,
+ * 3) If Tag Check Fail == 0, then Checked access have no effect,
+ * 4) If no Allocation Tag Access, then all accesses are Unchecked.
+ */
+ if (allocation_tag_access_enabled(env, el, sctlr)) {
+ DP_TBFLAG_A64(flags, ATA, 1);
+ if (tbid
+ && !(env->pstate & PSTATE_TCO)
+ && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) {
+ DP_TBFLAG_A64(flags, MTE_ACTIVE, 1);
+ if (!EX_TBFLAG_A64(flags, UNPRIV)) {
+ /*
+ * In non-unpriv contexts (eg EL0), unpriv load/stores
+ * act like normal ones; duplicate the MTE info to
+ * avoid translate-a64.c having to check UNPRIV to see
+ * whether it is OK to index into MTE_ACTIVE[].
+ */
+ DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1);
+ }
+ }
+ }
+ /* And again for unprivileged accesses, if required. */
+ if (EX_TBFLAG_A64(flags, UNPRIV)
+ && tbid
+ && !(env->pstate & PSTATE_TCO)
+ && (sctlr & SCTLR_TCF0)
+ && allocation_tag_access_enabled(env, 0, sctlr)) {
+ DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1);
+ }
+ /*
+ * For unpriv tag-setting accesses we also need ATA0. Again, in
+ * contexts where unpriv and normal insns are the same we
+ * duplicate the ATA bit to save effort for translate-a64.c.
+ */
+ if (EX_TBFLAG_A64(flags, UNPRIV)) {
+ if (allocation_tag_access_enabled(env, 0, sctlr)) {
+ DP_TBFLAG_A64(flags, ATA0, 1);
+ }
+ } else {
+ DP_TBFLAG_A64(flags, ATA0, EX_TBFLAG_A64(flags, ATA));
+ }
+ /* Cache TCMA as well as TBI. */
+ DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx));
+ }
+
+ return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
+}
+
+static CPUARMTBFlags rebuild_hflags_internal(CPUARMState *env)
+{
+ int el = arm_current_el(env);
+ int fp_el = fp_exception_el(env, el);
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
+
+ if (is_a64(env)) {
+ return rebuild_hflags_a64(env, el, fp_el, mmu_idx);
+ } else if (arm_feature(env, ARM_FEATURE_M)) {
+ return rebuild_hflags_m32(env, fp_el, mmu_idx);
+ } else {
+ return rebuild_hflags_a32(env, fp_el, mmu_idx);
+ }
+}
+
+void arm_rebuild_hflags(CPUARMState *env)
+{
+ env->hflags = rebuild_hflags_internal(env);
+}
+
+/*
+ * If we have triggered a EL state change we can't rely on the
+ * translator having passed it to us, we need to recompute.
+ */
+void HELPER(rebuild_hflags_m32_newel)(CPUARMState *env)
+{
+ int el = arm_current_el(env);
+ int fp_el = fp_exception_el(env, el);
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
+
+ env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx);
+}
+
+void HELPER(rebuild_hflags_m32)(CPUARMState *env, int el)
+{
+ int fp_el = fp_exception_el(env, el);
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
+
+ env->hflags = rebuild_hflags_m32(env, fp_el, mmu_idx);
+}
+
+/*
+ * If we have triggered a EL state change we can't rely on the
+ * translator having passed it to us, we need to recompute.
+ */
+void HELPER(rebuild_hflags_a32_newel)(CPUARMState *env)
+{
+ int el = arm_current_el(env);
+ int fp_el = fp_exception_el(env, el);
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
+ env->hflags = rebuild_hflags_a32(env, fp_el, mmu_idx);
+}
+
+void HELPER(rebuild_hflags_a32)(CPUARMState *env, int el)
+{
+ int fp_el = fp_exception_el(env, el);
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
+
+ env->hflags = rebuild_hflags_a32(env, fp_el, mmu_idx);
+}
+
+void HELPER(rebuild_hflags_a64)(CPUARMState *env, int el)
+{
+ int fp_el = fp_exception_el(env, el);
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, el);
+
+ env->hflags = rebuild_hflags_a64(env, el, fp_el, mmu_idx);
+}
+
+void assert_hflags_rebuild_correctly(CPUARMState *env)
+{
+#ifdef CONFIG_DEBUG_TCG
+ CPUARMTBFlags c = env->hflags;
+ CPUARMTBFlags r = rebuild_hflags_internal(env);
+
+ if (unlikely(c.flags != r.flags || c.flags2 != r.flags2)) {
+ fprintf(stderr, "TCG hflags mismatch "
+ "(current:(0x%08x,0x" TARGET_FMT_lx ")"
+ " rebuilt:(0x%08x,0x" TARGET_FMT_lx ")\n",
+ c.flags, c.flags2, r.flags, r.flags2);
+ abort();
+ }
+#endif
+}
diff --git a/target/arm/iwmmxt_helper.c b/target/arm/tcg/iwmmxt_helper.c
index 24244d012c..610b1b2103 100644
--- a/target/arm/iwmmxt_helper.c
+++ b/target/arm/tcg/iwmmxt_helper.c
@@ -8,7 +8,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/target/arm/tcg/m-nocp.decode b/target/arm/tcg/m-nocp.decode
new file mode 100644
index 0000000000..b65c801c97
--- /dev/null
+++ b/target/arm/tcg/m-nocp.decode
@@ -0,0 +1,72 @@
+# M-profile UserFault.NOCP exception handling
+#
+# Copyright (c) 2020 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+# For M-profile, the architecture specifies that NOCP UsageFaults
+# should take precedence over UNDEF faults over the whole wide
+# range of coprocessor-space encodings, with the exception of
+# VLLDM and VLSTM. (Compare v8.1M IsCPInstruction() pseudocode and
+# v8M Arm ARM rule R_QLGM.) This isn't mandatory for v8.0M but we choose
+# to behave the same as v8.1M.
+# This decode is handled before any others (and in particular before
+# decoding FP instructions which are in the coprocessor space).
+# If the coprocessor is not present or disabled then we will generate
+# the NOCP exception; otherwise we let the insn through to the main decode.
+
+%vd_dp 22:1 12:4
+%vd_sp 12:4 22:1
+
+&nocp cp
+
+# M-profile VLDR/VSTR to sysreg
+%vldr_sysreg 22:1 13:3
+%imm7_0x4 0:7 !function=times_4
+
+&vldr_sysreg rn reg imm a w p
+@vldr_sysreg .... ... . a:1 . . . rn:4 ... . ... .. ....... \
+ reg=%vldr_sysreg imm=%imm7_0x4 &vldr_sysreg
+
+{
+ # Special cases which do not take an early NOCP: VLLDM and VLSTM
+ VLLDM_VLSTM 1110 1100 001 l:1 rn:4 0000 1010 op:1 000 0000
+ # VSCCLRM (new in v8.1M) is similar:
+ VSCCLRM 1110 1100 1.01 1111 .... 1011 imm:7 0 vd=%vd_dp size=3
+ VSCCLRM 1110 1100 1.01 1111 .... 1010 imm:8 vd=%vd_sp size=2
+
+ # FP system register accesses: these are a special case because accesses
+ # to FPCXT_NS succeed even if the FPU is disabled. We therefore need
+ # to handle them before the big NOCP blocks. Note that within these
+ # insns NOCP still has higher priority than UNDEFs; this is implemented
+ # by their returning 'false' for UNDEF so as to fall through into the
+ # NOCP check (in contrast to VLLDM etc, which call unallocated_encoding()
+ # for the UNDEFs there that must take precedence over NOCP.)
+
+ VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
+
+ # P=0 W=0 is SEE "Related encodings", so split into two patterns
+ VLDR_sysreg ---- 110 1 . . w:1 1 .... ... 0 111 11 ....... @vldr_sysreg p=1
+ VLDR_sysreg ---- 110 0 . . 1 1 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
+ VSTR_sysreg ---- 110 1 . . w:1 0 .... ... 0 111 11 ....... @vldr_sysreg p=1
+ VSTR_sysreg ---- 110 0 . . 1 0 .... ... 0 111 11 ....... @vldr_sysreg p=0 w=1
+
+ NOCP 111- 1110 ---- ---- ---- cp:4 ---- ---- &nocp
+ NOCP 111- 110- ---- ---- ---- cp:4 ---- ---- &nocp
+ # From v8.1M onwards this range will also NOCP:
+ NOCP_8_1 111- 1111 ---- ---- ---- ---- ---- ---- &nocp cp=10
+}
diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c
new file mode 100644
index 0000000000..d1f1e02acc
--- /dev/null
+++ b/target/arm/tcg/m_helper.c
@@ -0,0 +1,2902 @@
+/*
+ * ARM generic helpers.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "gdbstub/helpers.h"
+#include "exec/helper-proto.h"
+#include "qemu/main-loop.h"
+#include "qemu/bitops.h"
+#include "qemu/log.h"
+#include "exec/exec-all.h"
+#ifdef CONFIG_TCG
+#include "exec/cpu_ldst.h"
+#include "semihosting/common-semi.h"
+#endif
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/intc/armv7m_nvic.h"
+#endif
+
+static void v7m_msr_xpsr(CPUARMState *env, uint32_t mask,
+ uint32_t reg, uint32_t val)
+{
+ /* Only APSR is actually writable */
+ if (!(reg & 4)) {
+ uint32_t apsrmask = 0;
+
+ if (mask & 8) {
+ apsrmask |= XPSR_NZCV | XPSR_Q;
+ }
+ if ((mask & 4) && arm_feature(env, ARM_FEATURE_THUMB_DSP)) {
+ apsrmask |= XPSR_GE;
+ }
+ xpsr_write(env, val, apsrmask);
+ }
+}
+
+static uint32_t v7m_mrs_xpsr(CPUARMState *env, uint32_t reg, unsigned el)
+{
+ uint32_t mask = 0;
+
+ if ((reg & 1) && el) {
+ mask |= XPSR_EXCP; /* IPSR (unpriv. reads as zero) */
+ }
+ if (!(reg & 4)) {
+ mask |= XPSR_NZCV | XPSR_Q; /* APSR */
+ if (arm_feature(env, ARM_FEATURE_THUMB_DSP)) {
+ mask |= XPSR_GE;
+ }
+ }
+ /* EPSR reads as zero */
+ return xpsr_read(env) & mask;
+}
+
+uint32_t arm_v7m_mrs_control(CPUARMState *env, uint32_t secure)
+{
+ uint32_t value = env->v7m.control[secure];
+
+ if (!secure) {
+ /* SFPA is RAZ/WI from NS; FPCA is stored in the M_REG_S bank */
+ value |= env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK;
+ }
+ return value;
+}
+
+#ifdef CONFIG_USER_ONLY
+
+void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
+{
+ uint32_t mask = extract32(maskreg, 8, 4);
+ uint32_t reg = extract32(maskreg, 0, 8);
+
+ switch (reg) {
+ case 0 ... 7: /* xPSR sub-fields */
+ v7m_msr_xpsr(env, mask, reg, val);
+ break;
+ case 20: /* CONTROL */
+ /* There are no sub-fields that are actually writable from EL0. */
+ break;
+ default:
+ /* Unprivileged writes to other registers are ignored */
+ break;
+ }
+}
+
+uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
+{
+ switch (reg) {
+ case 0 ... 7: /* xPSR sub-fields */
+ return v7m_mrs_xpsr(env, reg, 0);
+ case 20: /* CONTROL */
+ return arm_v7m_mrs_control(env, 0);
+ default:
+ /* Unprivileged reads others as zero. */
+ return 0;
+ }
+}
+
+void HELPER(v7m_bxns)(CPUARMState *env, uint32_t dest)
+{
+ /* translate.c should never generate calls here in user-only mode */
+ g_assert_not_reached();
+}
+
+void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
+{
+ /* translate.c should never generate calls here in user-only mode */
+ g_assert_not_reached();
+}
+
+void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
+{
+ /* translate.c should never generate calls here in user-only mode */
+ g_assert_not_reached();
+}
+
+void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
+{
+ /* translate.c should never generate calls here in user-only mode */
+ g_assert_not_reached();
+}
+
+void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
+{
+ /* translate.c should never generate calls here in user-only mode */
+ g_assert_not_reached();
+}
+
+uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
+{
+ /*
+ * The TT instructions can be used by unprivileged code, but in
+ * user-only emulation we don't have the MPU.
+ * Luckily since we know we are NonSecure unprivileged (and that in
+ * turn means that the A flag wasn't specified), all the bits in the
+ * register must be zero:
+ * IREGION: 0 because IRVALID is 0
+ * IRVALID: 0 because NS
+ * S: 0 because NS
+ * NSRW: 0 because NS
+ * NSR: 0 because NS
+ * RW: 0 because unpriv and A flag not set
+ * R: 0 because unpriv and A flag not set
+ * SRVALID: 0 because NS
+ * MRVALID: 0 because unpriv and A flag not set
+ * SREGION: 0 because SRVALID is 0
+ * MREGION: 0 because MRVALID is 0
+ */
+ return 0;
+}
+
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
+{
+ return ARMMMUIdx_MUser;
+}
+
+#else /* !CONFIG_USER_ONLY */
+
+static ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env,
+ bool secstate, bool priv, bool negpri)
+{
+ ARMMMUIdx mmu_idx = ARM_MMU_IDX_M;
+
+ if (priv) {
+ mmu_idx |= ARM_MMU_IDX_M_PRIV;
+ }
+
+ if (negpri) {
+ mmu_idx |= ARM_MMU_IDX_M_NEGPRI;
+ }
+
+ if (secstate) {
+ mmu_idx |= ARM_MMU_IDX_M_S;
+ }
+
+ return mmu_idx;
+}
+
+static ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
+ bool secstate, bool priv)
+{
+ bool negpri = armv7m_nvic_neg_prio_requested(env->nvic, secstate);
+
+ return arm_v7m_mmu_idx_all(env, secstate, priv, negpri);
+}
+
+/* Return the MMU index for a v7M CPU in the specified security state */
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
+{
+ bool priv = arm_v7m_is_handler_mode(env) ||
+ !(env->v7m.control[secstate] & 1);
+
+ return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
+}
+
+/*
+ * What kind of stack write are we doing? This affects how exceptions
+ * generated during the stacking are treated.
+ */
+typedef enum StackingMode {
+ STACK_NORMAL,
+ STACK_IGNFAULTS,
+ STACK_LAZYFP,
+} StackingMode;
+
+static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
+ ARMMMUIdx mmu_idx, StackingMode mode)
+{
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+ MemTxResult txres;
+ GetPhysAddrResult res = {};
+ ARMMMUFaultInfo fi = {};
+ bool secure = mmu_idx & ARM_MMU_IDX_M_S;
+ int exc;
+ bool exc_secure;
+
+ if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &res, &fi)) {
+ /* MPU/SAU lookup failed */
+ if (fi.type == ARMFault_QEMU_SFault) {
+ if (mode == STACK_LAZYFP) {
+ qemu_log_mask(CPU_LOG_INT,
+ "...SecureFault with SFSR.LSPERR "
+ "during lazy stacking\n");
+ env->v7m.sfsr |= R_V7M_SFSR_LSPERR_MASK;
+ } else {
+ qemu_log_mask(CPU_LOG_INT,
+ "...SecureFault with SFSR.AUVIOL "
+ "during stacking\n");
+ env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK;
+ }
+ env->v7m.sfsr |= R_V7M_SFSR_SFARVALID_MASK;
+ env->v7m.sfar = addr;
+ exc = ARMV7M_EXCP_SECURE;
+ exc_secure = false;
+ } else {
+ if (mode == STACK_LAZYFP) {
+ qemu_log_mask(CPU_LOG_INT,
+ "...MemManageFault with CFSR.MLSPERR\n");
+ env->v7m.cfsr[secure] |= R_V7M_CFSR_MLSPERR_MASK;
+ } else {
+ qemu_log_mask(CPU_LOG_INT,
+ "...MemManageFault with CFSR.MSTKERR\n");
+ env->v7m.cfsr[secure] |= R_V7M_CFSR_MSTKERR_MASK;
+ }
+ exc = ARMV7M_EXCP_MEM;
+ exc_secure = secure;
+ }
+ goto pend_fault;
+ }
+ address_space_stl_le(arm_addressspace(cs, res.f.attrs), res.f.phys_addr,
+ value, res.f.attrs, &txres);
+ if (txres != MEMTX_OK) {
+ /* BusFault trying to write the data */
+ if (mode == STACK_LAZYFP) {
+ qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.LSPERR\n");
+ env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_LSPERR_MASK;
+ } else {
+ qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.STKERR\n");
+ env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_STKERR_MASK;
+ }
+ exc = ARMV7M_EXCP_BUS;
+ exc_secure = false;
+ goto pend_fault;
+ }
+ return true;
+
+pend_fault:
+ /*
+ * By pending the exception at this point we are making
+ * the IMPDEF choice "overridden exceptions pended" (see the
+ * MergeExcInfo() pseudocode). The other choice would be to not
+ * pend them now and then make a choice about which to throw away
+ * later if we have two derived exceptions.
+ * The only case when we must not pend the exception but instead
+ * throw it away is if we are doing the push of the callee registers
+ * and we've already generated a derived exception (this is indicated
+ * by the caller passing STACK_IGNFAULTS). Even in this case we will
+ * still update the fault status registers.
+ */
+ switch (mode) {
+ case STACK_NORMAL:
+ armv7m_nvic_set_pending_derived(env->nvic, exc, exc_secure);
+ break;
+ case STACK_LAZYFP:
+ armv7m_nvic_set_pending_lazyfp(env->nvic, exc, exc_secure);
+ break;
+ case STACK_IGNFAULTS:
+ break;
+ }
+ return false;
+}
+
+static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr,
+ ARMMMUIdx mmu_idx)
+{
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+ MemTxResult txres;
+ GetPhysAddrResult res = {};
+ ARMMMUFaultInfo fi = {};
+ bool secure = mmu_idx & ARM_MMU_IDX_M_S;
+ int exc;
+ bool exc_secure;
+ uint32_t value;
+
+ if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi)) {
+ /* MPU/SAU lookup failed */
+ if (fi.type == ARMFault_QEMU_SFault) {
+ qemu_log_mask(CPU_LOG_INT,
+ "...SecureFault with SFSR.AUVIOL during unstack\n");
+ env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK;
+ env->v7m.sfar = addr;
+ exc = ARMV7M_EXCP_SECURE;
+ exc_secure = false;
+ } else {
+ qemu_log_mask(CPU_LOG_INT,
+ "...MemManageFault with CFSR.MUNSTKERR\n");
+ env->v7m.cfsr[secure] |= R_V7M_CFSR_MUNSTKERR_MASK;
+ exc = ARMV7M_EXCP_MEM;
+ exc_secure = secure;
+ }
+ goto pend_fault;
+ }
+
+ value = address_space_ldl(arm_addressspace(cs, res.f.attrs),
+ res.f.phys_addr, res.f.attrs, &txres);
+ if (txres != MEMTX_OK) {
+ /* BusFault trying to read the data */
+ qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.UNSTKERR\n");
+ env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_UNSTKERR_MASK;
+ exc = ARMV7M_EXCP_BUS;
+ exc_secure = false;
+ goto pend_fault;
+ }
+
+ *dest = value;
+ return true;
+
+pend_fault:
+ /*
+ * By pending the exception at this point we are making
+ * the IMPDEF choice "overridden exceptions pended" (see the
+ * MergeExcInfo() pseudocode). The other choice would be to not
+ * pend them now and then make a choice about which to throw away
+ * later if we have two derived exceptions.
+ */
+ armv7m_nvic_set_pending(env->nvic, exc, exc_secure);
+ return false;
+}
+
+void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
+{
+ /*
+ * Preserve FP state (because LSPACT was set and we are about
+ * to execute an FP instruction). This corresponds to the
+ * PreserveFPState() pseudocode.
+ * We may throw an exception if the stacking fails.
+ */
+ ARMCPU *cpu = env_archcpu(env);
+ bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+ bool negpri = !(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_HFRDY_MASK);
+ bool is_priv = !(env->v7m.fpccr[is_secure] & R_V7M_FPCCR_USER_MASK);
+ bool splimviol = env->v7m.fpccr[is_secure] & R_V7M_FPCCR_SPLIMVIOL_MASK;
+ uint32_t fpcar = env->v7m.fpcar[is_secure];
+ bool stacked_ok = true;
+ bool ts = is_secure && (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK);
+ bool take_exception;
+
+ /* Take the BQL as we are going to touch the NVIC */
+ bql_lock();
+
+ /* Check the background context had access to the FPU */
+ if (!v7m_cpacr_pass(env, is_secure, is_priv)) {
+ armv7m_nvic_set_pending_lazyfp(env->nvic, ARMV7M_EXCP_USAGE, is_secure);
+ env->v7m.cfsr[is_secure] |= R_V7M_CFSR_NOCP_MASK;
+ stacked_ok = false;
+ } else if (!is_secure && !extract32(env->v7m.nsacr, 10, 1)) {
+ armv7m_nvic_set_pending_lazyfp(env->nvic, ARMV7M_EXCP_USAGE, M_REG_S);
+ env->v7m.cfsr[M_REG_S] |= R_V7M_CFSR_NOCP_MASK;
+ stacked_ok = false;
+ }
+
+ if (!splimviol && stacked_ok) {
+ /* We only stack if the stack limit wasn't violated */
+ int i;
+ ARMMMUIdx mmu_idx;
+
+ mmu_idx = arm_v7m_mmu_idx_all(env, is_secure, is_priv, negpri);
+ for (i = 0; i < (ts ? 32 : 16); i += 2) {
+ uint64_t dn = *aa32_vfp_dreg(env, i / 2);
+ uint32_t faddr = fpcar + 4 * i;
+ uint32_t slo = extract64(dn, 0, 32);
+ uint32_t shi = extract64(dn, 32, 32);
+
+ if (i >= 16) {
+ faddr += 8; /* skip the slot for the FPSCR/VPR */
+ }
+ stacked_ok = stacked_ok &&
+ v7m_stack_write(cpu, faddr, slo, mmu_idx, STACK_LAZYFP) &&
+ v7m_stack_write(cpu, faddr + 4, shi, mmu_idx, STACK_LAZYFP);
+ }
+
+ stacked_ok = stacked_ok &&
+ v7m_stack_write(cpu, fpcar + 0x40,
+ vfp_get_fpscr(env), mmu_idx, STACK_LAZYFP);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ stacked_ok = stacked_ok &&
+ v7m_stack_write(cpu, fpcar + 0x44,
+ env->v7m.vpr, mmu_idx, STACK_LAZYFP);
+ }
+ }
+
+ /*
+ * We definitely pended an exception, but it's possible that it
+ * might not be able to be taken now. If its priority permits us
+ * to take it now, then we must not update the LSPACT or FP regs,
+ * but instead jump out to take the exception immediately.
+ * If it's just pending and won't be taken until the current
+ * handler exits, then we do update LSPACT and the FP regs.
+ */
+ take_exception = !stacked_ok &&
+ armv7m_nvic_can_take_pending_exception(env->nvic);
+
+ bql_unlock();
+
+ if (take_exception) {
+ raise_exception_ra(env, EXCP_LAZYFP, 0, 1, GETPC());
+ }
+
+ env->v7m.fpccr[is_secure] &= ~R_V7M_FPCCR_LSPACT_MASK;
+
+ if (ts) {
+ /* Clear s0 to s31 and the FPSCR and VPR */
+ int i;
+
+ for (i = 0; i < 32; i += 2) {
+ *aa32_vfp_dreg(env, i / 2) = 0;
+ }
+ vfp_set_fpscr(env, 0);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.vpr = 0;
+ }
+ }
+ /*
+ * Otherwise s0 to s15, FPSCR and VPR are UNKNOWN; we choose to leave them
+ * unchanged.
+ */
+}
+
+/*
+ * Write to v7M CONTROL.SPSEL bit for the specified security bank.
+ * This may change the current stack pointer between Main and Process
+ * stack pointers if it is done for the CONTROL register for the current
+ * security state.
+ */
+static void write_v7m_control_spsel_for_secstate(CPUARMState *env,
+ bool new_spsel,
+ bool secstate)
+{
+ bool old_is_psp = v7m_using_psp(env);
+
+ env->v7m.control[secstate] =
+ deposit32(env->v7m.control[secstate],
+ R_V7M_CONTROL_SPSEL_SHIFT,
+ R_V7M_CONTROL_SPSEL_LENGTH, new_spsel);
+
+ if (secstate == env->v7m.secure) {
+ bool new_is_psp = v7m_using_psp(env);
+ uint32_t tmp;
+
+ if (old_is_psp != new_is_psp) {
+ tmp = env->v7m.other_sp;
+ env->v7m.other_sp = env->regs[13];
+ env->regs[13] = tmp;
+ }
+ }
+}
+
+/*
+ * Write to v7M CONTROL.SPSEL bit. This may change the current
+ * stack pointer between Main and Process stack pointers.
+ */
+static void write_v7m_control_spsel(CPUARMState *env, bool new_spsel)
+{
+ write_v7m_control_spsel_for_secstate(env, new_spsel, env->v7m.secure);
+}
+
+void write_v7m_exception(CPUARMState *env, uint32_t new_exc)
+{
+ /*
+ * Write a new value to v7m.exception, thus transitioning into or out
+ * of Handler mode; this may result in a change of active stack pointer.
+ */
+ bool new_is_psp, old_is_psp = v7m_using_psp(env);
+ uint32_t tmp;
+
+ env->v7m.exception = new_exc;
+
+ new_is_psp = v7m_using_psp(env);
+
+ if (old_is_psp != new_is_psp) {
+ tmp = env->v7m.other_sp;
+ env->v7m.other_sp = env->regs[13];
+ env->regs[13] = tmp;
+ }
+}
+
+/* Switch M profile security state between NS and S */
+static void switch_v7m_security_state(CPUARMState *env, bool new_secstate)
+{
+ uint32_t new_ss_msp, new_ss_psp;
+
+ if (env->v7m.secure == new_secstate) {
+ return;
+ }
+
+ /*
+ * All the banked state is accessed by looking at env->v7m.secure
+ * except for the stack pointer; rearrange the SP appropriately.
+ */
+ new_ss_msp = env->v7m.other_ss_msp;
+ new_ss_psp = env->v7m.other_ss_psp;
+
+ if (v7m_using_psp(env)) {
+ env->v7m.other_ss_psp = env->regs[13];
+ env->v7m.other_ss_msp = env->v7m.other_sp;
+ } else {
+ env->v7m.other_ss_msp = env->regs[13];
+ env->v7m.other_ss_psp = env->v7m.other_sp;
+ }
+
+ env->v7m.secure = new_secstate;
+
+ if (v7m_using_psp(env)) {
+ env->regs[13] = new_ss_psp;
+ env->v7m.other_sp = new_ss_msp;
+ } else {
+ env->regs[13] = new_ss_msp;
+ env->v7m.other_sp = new_ss_psp;
+ }
+}
+
+void HELPER(v7m_bxns)(CPUARMState *env, uint32_t dest)
+{
+ /*
+ * Handle v7M BXNS:
+ * - if the return value is a magic value, do exception return (like BX)
+ * - otherwise bit 0 of the return value is the target security state
+ */
+ uint32_t min_magic;
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ /* Covers FNC_RETURN and EXC_RETURN magic */
+ min_magic = FNC_RETURN_MIN_MAGIC;
+ } else {
+ /* EXC_RETURN magic only */
+ min_magic = EXC_RETURN_MIN_MAGIC;
+ }
+
+ if (dest >= min_magic) {
+ /*
+ * This is an exception return magic value; put it where
+ * do_v7m_exception_exit() expects and raise EXCEPTION_EXIT.
+ * Note that if we ever add gen_ss_advance() singlestep support to
+ * M profile this should count as an "instruction execution complete"
+ * event (compare gen_bx_excret_final_code()).
+ */
+ env->regs[15] = dest & ~1;
+ env->thumb = dest & 1;
+ HELPER(exception_internal)(env, EXCP_EXCEPTION_EXIT);
+ /* notreached */
+ }
+
+ /* translate.c should have made BXNS UNDEF unless we're secure */
+ assert(env->v7m.secure);
+
+ if (!(dest & 1)) {
+ env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
+ }
+ switch_v7m_security_state(env, dest & 1);
+ env->thumb = true;
+ env->regs[15] = dest & ~1;
+ arm_rebuild_hflags(env);
+}
+
+void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
+{
+ /*
+ * Handle v7M BLXNS:
+ * - bit 0 of the destination address is the target security state
+ */
+
+ /* At this point regs[15] is the address just after the BLXNS */
+ uint32_t nextinst = env->regs[15] | 1;
+ uint32_t sp = env->regs[13] - 8;
+ uint32_t saved_psr;
+
+ /* translate.c will have made BLXNS UNDEF unless we're secure */
+ assert(env->v7m.secure);
+
+ if (dest & 1) {
+ /*
+ * Target is Secure, so this is just a normal BLX,
+ * except that the low bit doesn't indicate Thumb/not.
+ */
+ env->regs[14] = nextinst;
+ env->thumb = true;
+ env->regs[15] = dest & ~1;
+ return;
+ }
+
+ /* Target is non-secure: first push a stack frame */
+ if (!QEMU_IS_ALIGNED(sp, 8)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "BLXNS with misaligned SP is UNPREDICTABLE\n");
+ }
+
+ if (sp < v7m_sp_limit(env)) {
+ raise_exception(env, EXCP_STKOF, 0, 1);
+ }
+
+ saved_psr = env->v7m.exception;
+ if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK) {
+ saved_psr |= XPSR_SFPA;
+ }
+
+ /* Note that these stores can throw exceptions on MPU faults */
+ cpu_stl_data_ra(env, sp, nextinst, GETPC());
+ cpu_stl_data_ra(env, sp + 4, saved_psr, GETPC());
+
+ env->regs[13] = sp;
+ env->regs[14] = 0xfeffffff;
+ if (arm_v7m_is_handler_mode(env)) {
+ /*
+ * Write a dummy value to IPSR, to avoid leaking the current secure
+ * exception number to non-secure code. This is guaranteed not
+ * to cause write_v7m_exception() to actually change stacks.
+ */
+ write_v7m_exception(env, 1);
+ }
+ env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
+ switch_v7m_security_state(env, 0);
+ env->thumb = true;
+ env->regs[15] = dest;
+ arm_rebuild_hflags(env);
+}
+
+static bool arm_v7m_load_vector(ARMCPU *cpu, int exc, bool targets_secure,
+ uint32_t *pvec)
+{
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+ MemTxResult result;
+ uint32_t addr = env->v7m.vecbase[targets_secure] + exc * 4;
+ uint32_t vector_entry;
+ MemTxAttrs attrs = {};
+ ARMMMUIdx mmu_idx;
+ bool exc_secure;
+
+ qemu_log_mask(CPU_LOG_INT,
+ "...loading from element %d of %s vector table at 0x%x\n",
+ exc, targets_secure ? "secure" : "non-secure", addr);
+
+ mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, targets_secure, true);
+
+ /*
+ * We don't do a get_phys_addr() here because the rules for vector
+ * loads are special: they always use the default memory map, and
+ * the default memory map permits reads from all addresses.
+ * Since there's no easy way to pass through to pmsav8_mpu_lookup()
+ * that we want this special case which would always say "yes",
+ * we just do the SAU lookup here followed by a direct physical load.
+ */
+ attrs.secure = targets_secure;
+ attrs.user = false;
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ V8M_SAttributes sattrs = {};
+
+ v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx,
+ targets_secure, &sattrs);
+ if (sattrs.ns) {
+ attrs.secure = false;
+ } else if (!targets_secure) {
+ /*
+ * NS access to S memory: the underlying exception which we escalate
+ * to HardFault is SecureFault, which always targets Secure.
+ */
+ exc_secure = true;
+ goto load_fail;
+ }
+ }
+
+ vector_entry = address_space_ldl(arm_addressspace(cs, attrs), addr,
+ attrs, &result);
+ if (result != MEMTX_OK) {
+ /*
+ * Underlying exception is BusFault: its target security state
+ * depends on BFHFNMINS.
+ */
+ exc_secure = !(cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK);
+ goto load_fail;
+ }
+ *pvec = vector_entry;
+ qemu_log_mask(CPU_LOG_INT, "...loaded new PC 0x%x\n", *pvec);
+ return true;
+
+load_fail:
+ /*
+ * All vector table fetch fails are reported as HardFault, with
+ * HFSR.VECTTBL and .FORCED set. (FORCED is set because
+ * technically the underlying exception is a SecureFault or BusFault
+ * that is escalated to HardFault.) This is a terminal exception,
+ * so we will either take the HardFault immediately or else enter
+ * lockup (the latter case is handled in armv7m_nvic_set_pending_derived()).
+ * The HardFault is Secure if BFHFNMINS is 0 (meaning that all HFs are
+ * secure); otherwise it targets the same security state as the
+ * underlying exception.
+ * In v8.1M HardFaults from vector table fetch fails don't set FORCED.
+ */
+ if (!(cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK)) {
+ exc_secure = true;
+ }
+ env->v7m.hfsr |= R_V7M_HFSR_VECTTBL_MASK;
+ if (!arm_feature(env, ARM_FEATURE_V8_1M)) {
+ env->v7m.hfsr |= R_V7M_HFSR_FORCED_MASK;
+ }
+ armv7m_nvic_set_pending_derived(env->nvic, ARMV7M_EXCP_HARD, exc_secure);
+ return false;
+}
+
+static uint32_t v7m_integrity_sig(CPUARMState *env, uint32_t lr)
+{
+ /*
+ * Return the integrity signature value for the callee-saves
+ * stack frame section. @lr is the exception return payload/LR value
+ * whose FType bit forms bit 0 of the signature if FP is present.
+ */
+ uint32_t sig = 0xfefa125a;
+
+ if (!cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))
+ || (lr & R_V7M_EXCRET_FTYPE_MASK)) {
+ sig |= 1;
+ }
+ return sig;
+}
+
+static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
+ bool ignore_faults)
+{
+ /*
+ * For v8M, push the callee-saves register part of the stack frame.
+ * Compare the v8M pseudocode PushCalleeStack().
+ * In the tailchaining case this may not be the current stack.
+ */
+ CPUARMState *env = &cpu->env;
+ uint32_t *frame_sp_p;
+ uint32_t frameptr;
+ ARMMMUIdx mmu_idx;
+ bool stacked_ok;
+ uint32_t limit;
+ bool want_psp;
+ uint32_t sig;
+ StackingMode smode = ignore_faults ? STACK_IGNFAULTS : STACK_NORMAL;
+
+ if (dotailchain) {
+ bool mode = lr & R_V7M_EXCRET_MODE_MASK;
+ bool priv = !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_NPRIV_MASK) ||
+ !mode;
+
+ mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv);
+ frame_sp_p = arm_v7m_get_sp_ptr(env, M_REG_S, mode,
+ lr & R_V7M_EXCRET_SPSEL_MASK);
+ want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK);
+ if (want_psp) {
+ limit = env->v7m.psplim[M_REG_S];
+ } else {
+ limit = env->v7m.msplim[M_REG_S];
+ }
+ } else {
+ mmu_idx = arm_mmu_idx(env);
+ frame_sp_p = &env->regs[13];
+ limit = v7m_sp_limit(env);
+ }
+
+ frameptr = *frame_sp_p - 0x28;
+ if (frameptr < limit) {
+ /*
+ * Stack limit failure: set SP to the limit value, and generate
+ * STKOF UsageFault. Stack pushes below the limit must not be
+ * performed. It is IMPDEF whether pushes above the limit are
+ * performed; we choose not to.
+ */
+ qemu_log_mask(CPU_LOG_INT,
+ "...STKOF during callee-saves register stacking\n");
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ env->v7m.secure);
+ *frame_sp_p = limit;
+ return true;
+ }
+
+ /*
+ * Write as much of the stack frame as we can. A write failure may
+ * cause us to pend a derived exception.
+ */
+ sig = v7m_integrity_sig(env, lr);
+ stacked_ok =
+ v7m_stack_write(cpu, frameptr, sig, mmu_idx, smode) &&
+ v7m_stack_write(cpu, frameptr + 0x8, env->regs[4], mmu_idx, smode) &&
+ v7m_stack_write(cpu, frameptr + 0xc, env->regs[5], mmu_idx, smode) &&
+ v7m_stack_write(cpu, frameptr + 0x10, env->regs[6], mmu_idx, smode) &&
+ v7m_stack_write(cpu, frameptr + 0x14, env->regs[7], mmu_idx, smode) &&
+ v7m_stack_write(cpu, frameptr + 0x18, env->regs[8], mmu_idx, smode) &&
+ v7m_stack_write(cpu, frameptr + 0x1c, env->regs[9], mmu_idx, smode) &&
+ v7m_stack_write(cpu, frameptr + 0x20, env->regs[10], mmu_idx, smode) &&
+ v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx, smode);
+
+ /* Update SP regardless of whether any of the stack accesses failed. */
+ *frame_sp_p = frameptr;
+
+ return !stacked_ok;
+}
+
+static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
+ bool ignore_stackfaults)
+{
+ /*
+ * Do the "take the exception" parts of exception entry,
+ * but not the pushing of state to the stack. This is
+ * similar to the pseudocode ExceptionTaken() function.
+ */
+ CPUARMState *env = &cpu->env;
+ uint32_t addr;
+ bool targets_secure;
+ int exc;
+ bool push_failed = false;
+
+ armv7m_nvic_get_pending_irq_info(env->nvic, &exc, &targets_secure);
+ qemu_log_mask(CPU_LOG_INT, "...taking pending %s exception %d\n",
+ targets_secure ? "secure" : "nonsecure", exc);
+
+ if (dotailchain) {
+ /* Sanitize LR FType and PREFIX bits */
+ if (!cpu_isar_feature(aa32_vfp_simd, cpu)) {
+ lr |= R_V7M_EXCRET_FTYPE_MASK;
+ }
+ lr = deposit32(lr, 24, 8, 0xff);
+ }
+
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
+ (lr & R_V7M_EXCRET_S_MASK)) {
+ /*
+ * The background code (the owner of the registers in the
+ * exception frame) is Secure. This means it may either already
+ * have or now needs to push callee-saves registers.
+ */
+ if (targets_secure) {
+ if (dotailchain && !(lr & R_V7M_EXCRET_ES_MASK)) {
+ /*
+ * We took an exception from Secure to NonSecure
+ * (which means the callee-saved registers got stacked)
+ * and are now tailchaining to a Secure exception.
+ * Clear DCRS so eventual return from this Secure
+ * exception unstacks the callee-saved registers.
+ */
+ lr &= ~R_V7M_EXCRET_DCRS_MASK;
+ }
+ } else {
+ /*
+ * We're going to a non-secure exception; push the
+ * callee-saves registers to the stack now, if they're
+ * not already saved.
+ */
+ if (lr & R_V7M_EXCRET_DCRS_MASK &&
+ !(dotailchain && !(lr & R_V7M_EXCRET_ES_MASK))) {
+ push_failed = v7m_push_callee_stack(cpu, lr, dotailchain,
+ ignore_stackfaults);
+ }
+ lr |= R_V7M_EXCRET_DCRS_MASK;
+ }
+ }
+
+ lr &= ~R_V7M_EXCRET_ES_MASK;
+ if (targets_secure) {
+ lr |= R_V7M_EXCRET_ES_MASK;
+ }
+ lr &= ~R_V7M_EXCRET_SPSEL_MASK;
+ if (env->v7m.control[targets_secure] & R_V7M_CONTROL_SPSEL_MASK) {
+ lr |= R_V7M_EXCRET_SPSEL_MASK;
+ }
+
+ /*
+ * Clear registers if necessary to prevent non-secure exception
+ * code being able to see register values from secure code.
+ * Where register values become architecturally UNKNOWN we leave
+ * them with their previous values. v8.1M is tighter than v8.0M
+ * here and always zeroes the caller-saved registers regardless
+ * of the security state the exception is targeting.
+ */
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ if (!targets_secure || arm_feature(env, ARM_FEATURE_V8_1M)) {
+ /*
+ * Always clear the caller-saved registers (they have been
+ * pushed to the stack earlier in v7m_push_stack()).
+ * Clear callee-saved registers if the background code is
+ * Secure (in which case these regs were saved in
+ * v7m_push_callee_stack()).
+ */
+ int i;
+ /*
+ * r4..r11 are callee-saves, zero only if background
+ * state was Secure (EXCRET.S == 1) and exception
+ * targets Non-secure state
+ */
+ bool zero_callee_saves = !targets_secure &&
+ (lr & R_V7M_EXCRET_S_MASK);
+
+ for (i = 0; i < 13; i++) {
+ if (i < 4 || i > 11 || zero_callee_saves) {
+ env->regs[i] = 0;
+ }
+ }
+ /* Clear EAPSR */
+ xpsr_write(env, 0, XPSR_NZCV | XPSR_Q | XPSR_GE | XPSR_IT);
+ }
+ }
+ }
+
+ if (push_failed && !ignore_stackfaults) {
+ /*
+ * Derived exception on callee-saves register stacking:
+ * we might now want to take a different exception which
+ * targets a different security state, so try again from the top.
+ */
+ qemu_log_mask(CPU_LOG_INT,
+ "...derived exception on callee-saves register stacking");
+ v7m_exception_taken(cpu, lr, true, true);
+ return;
+ }
+
+ if (!arm_v7m_load_vector(cpu, exc, targets_secure, &addr)) {
+ /* Vector load failed: derived exception */
+ qemu_log_mask(CPU_LOG_INT, "...derived exception on vector table load");
+ v7m_exception_taken(cpu, lr, true, true);
+ return;
+ }
+
+ /*
+ * Now we've done everything that might cause a derived exception
+ * we can go ahead and activate whichever exception we're going to
+ * take (which might now be the derived exception).
+ */
+ armv7m_nvic_acknowledge_irq(env->nvic);
+
+ /* Switch to target security state -- must do this before writing SPSEL */
+ switch_v7m_security_state(env, targets_secure);
+ write_v7m_control_spsel(env, 0);
+ arm_clear_exclusive(env);
+ /* Clear SFPA and FPCA (has no effect if no FPU) */
+ env->v7m.control[M_REG_S] &=
+ ~(R_V7M_CONTROL_FPCA_MASK | R_V7M_CONTROL_SFPA_MASK);
+ /* Clear IT bits */
+ env->condexec_bits = 0;
+ env->regs[14] = lr;
+ env->regs[15] = addr & 0xfffffffe;
+ env->thumb = addr & 1;
+ arm_rebuild_hflags(env);
+}
+
+static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr,
+ bool apply_splim)
+{
+ /*
+ * Like the pseudocode UpdateFPCCR: save state in FPCAR and FPCCR
+ * that we will need later in order to do lazy FP reg stacking.
+ */
+ bool is_secure = env->v7m.secure;
+ NVICState *nvic = env->nvic;
+ /*
+ * Some bits are unbanked and live always in fpccr[M_REG_S]; some bits
+ * are banked and we want to update the bit in the bank for the
+ * current security state; and in one case we want to specifically
+ * update the NS banked version of a bit even if we are secure.
+ */
+ uint32_t *fpccr_s = &env->v7m.fpccr[M_REG_S];
+ uint32_t *fpccr_ns = &env->v7m.fpccr[M_REG_NS];
+ uint32_t *fpccr = &env->v7m.fpccr[is_secure];
+ bool hfrdy, bfrdy, mmrdy, ns_ufrdy, s_ufrdy, sfrdy, monrdy;
+
+ env->v7m.fpcar[is_secure] = frameptr & ~0x7;
+
+ if (apply_splim && arm_feature(env, ARM_FEATURE_V8)) {
+ bool splimviol;
+ uint32_t splim = v7m_sp_limit(env);
+ bool ign = armv7m_nvic_neg_prio_requested(nvic, is_secure) &&
+ (env->v7m.ccr[is_secure] & R_V7M_CCR_STKOFHFNMIGN_MASK);
+
+ splimviol = !ign && frameptr < splim;
+ *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, SPLIMVIOL, splimviol);
+ }
+
+ *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, LSPACT, 1);
+
+ *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, S, is_secure);
+
+ *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, USER, arm_current_el(env) == 0);
+
+ *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, THREAD,
+ !arm_v7m_is_handler_mode(env));
+
+ hfrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_HARD, false);
+ *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, HFRDY, hfrdy);
+
+ bfrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_BUS, false);
+ *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, BFRDY, bfrdy);
+
+ mmrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_MEM, is_secure);
+ *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, MMRDY, mmrdy);
+
+ ns_ufrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_USAGE, false);
+ *fpccr_ns = FIELD_DP32(*fpccr_ns, V7M_FPCCR, UFRDY, ns_ufrdy);
+
+ monrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_DEBUG, false);
+ *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, MONRDY, monrdy);
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ s_ufrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_USAGE, true);
+ *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, UFRDY, s_ufrdy);
+
+ sfrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_SECURE, false);
+ *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, SFRDY, sfrdy);
+ }
+}
+
+void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
+{
+ /* fptr is the value of Rn, the frame pointer we store the FP regs to */
+ ARMCPU *cpu = env_archcpu(env);
+ bool s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+ bool lspact = env->v7m.fpccr[s] & R_V7M_FPCCR_LSPACT_MASK;
+ uintptr_t ra = GETPC();
+
+ assert(env->v7m.secure);
+
+ if (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)) {
+ return;
+ }
+
+ /* Check access to the coprocessor is permitted */
+ if (!v7m_cpacr_pass(env, true, arm_current_el(env) != 0)) {
+ raise_exception_ra(env, EXCP_NOCP, 0, 1, GETPC());
+ }
+
+ if (lspact) {
+ /* LSPACT should not be active when there is active FP state */
+ raise_exception_ra(env, EXCP_LSERR, 0, 1, GETPC());
+ }
+
+ if (fptr & 7) {
+ raise_exception_ra(env, EXCP_UNALIGNED, 0, 1, GETPC());
+ }
+
+ /*
+ * Note that we do not use v7m_stack_write() here, because the
+ * accesses should not set the FSR bits for stacking errors if they
+ * fail. (In pseudocode terms, they are AccType_NORMAL, not AccType_STACK
+ * or AccType_LAZYFP). Faults in cpu_stl_data_ra() will throw exceptions
+ * and longjmp out.
+ */
+ if (!(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPEN_MASK)) {
+ bool ts = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK;
+ int i;
+
+ for (i = 0; i < (ts ? 32 : 16); i += 2) {
+ uint64_t dn = *aa32_vfp_dreg(env, i / 2);
+ uint32_t faddr = fptr + 4 * i;
+ uint32_t slo = extract64(dn, 0, 32);
+ uint32_t shi = extract64(dn, 32, 32);
+
+ if (i >= 16) {
+ faddr += 8; /* skip the slot for the FPSCR */
+ }
+ cpu_stl_data_ra(env, faddr, slo, ra);
+ cpu_stl_data_ra(env, faddr + 4, shi, ra);
+ }
+ cpu_stl_data_ra(env, fptr + 0x40, vfp_get_fpscr(env), ra);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ cpu_stl_data_ra(env, fptr + 0x44, env->v7m.vpr, ra);
+ }
+
+ /*
+ * If TS is 0 then s0 to s15, FPSCR and VPR are UNKNOWN; we choose to
+ * leave them unchanged, matching our choice in v7m_preserve_fp_state.
+ */
+ if (ts) {
+ for (i = 0; i < 32; i += 2) {
+ *aa32_vfp_dreg(env, i / 2) = 0;
+ }
+ vfp_set_fpscr(env, 0);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.vpr = 0;
+ }
+ }
+ } else {
+ v7m_update_fpccr(env, fptr, false);
+ }
+
+ env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_FPCA_MASK;
+}
+
+void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ uintptr_t ra = GETPC();
+
+ /* fptr is the value of Rn, the frame pointer we load the FP regs from */
+ assert(env->v7m.secure);
+
+ if (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)) {
+ return;
+ }
+
+ /* Check access to the coprocessor is permitted */
+ if (!v7m_cpacr_pass(env, true, arm_current_el(env) != 0)) {
+ raise_exception_ra(env, EXCP_NOCP, 0, 1, GETPC());
+ }
+
+ if (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPACT_MASK) {
+ /* State in FP is still valid */
+ env->v7m.fpccr[M_REG_S] &= ~R_V7M_FPCCR_LSPACT_MASK;
+ } else {
+ bool ts = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK;
+ int i;
+ uint32_t fpscr;
+
+ if (fptr & 7) {
+ raise_exception_ra(env, EXCP_UNALIGNED, 0, 1, GETPC());
+ }
+
+ for (i = 0; i < (ts ? 32 : 16); i += 2) {
+ uint32_t slo, shi;
+ uint64_t dn;
+ uint32_t faddr = fptr + 4 * i;
+
+ if (i >= 16) {
+ faddr += 8; /* skip the slot for the FPSCR and VPR */
+ }
+
+ slo = cpu_ldl_data_ra(env, faddr, ra);
+ shi = cpu_ldl_data_ra(env, faddr + 4, ra);
+
+ dn = (uint64_t) shi << 32 | slo;
+ *aa32_vfp_dreg(env, i / 2) = dn;
+ }
+ fpscr = cpu_ldl_data_ra(env, fptr + 0x40, ra);
+ vfp_set_fpscr(env, fpscr);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.vpr = cpu_ldl_data_ra(env, fptr + 0x44, ra);
+ }
+ }
+
+ env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK;
+}
+
+static bool v7m_push_stack(ARMCPU *cpu)
+{
+ /*
+ * Do the "set up stack frame" part of exception entry,
+ * similar to pseudocode PushStack().
+ * Return true if we generate a derived exception (and so
+ * should ignore further stack faults trying to process
+ * that derived exception.)
+ */
+ bool stacked_ok = true, limitviol = false;
+ CPUARMState *env = &cpu->env;
+ uint32_t xpsr = xpsr_read(env);
+ uint32_t frameptr = env->regs[13];
+ ARMMMUIdx mmu_idx = arm_mmu_idx(env);
+ uint32_t framesize;
+ bool nsacr_cp10 = extract32(env->v7m.nsacr, 10, 1);
+
+ if ((env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) &&
+ (env->v7m.secure || nsacr_cp10)) {
+ if (env->v7m.secure &&
+ env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK) {
+ framesize = 0xa8;
+ } else {
+ framesize = 0x68;
+ }
+ } else {
+ framesize = 0x20;
+ }
+
+ /* Align stack pointer if the guest wants that */
+ if ((frameptr & 4) &&
+ (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKALIGN_MASK)) {
+ frameptr -= 4;
+ xpsr |= XPSR_SPREALIGN;
+ }
+
+ xpsr &= ~XPSR_SFPA;
+ if (env->v7m.secure &&
+ (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)) {
+ xpsr |= XPSR_SFPA;
+ }
+
+ frameptr -= framesize;
+
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ uint32_t limit = v7m_sp_limit(env);
+
+ if (frameptr < limit) {
+ /*
+ * Stack limit failure: set SP to the limit value, and generate
+ * STKOF UsageFault. Stack pushes below the limit must not be
+ * performed. It is IMPDEF whether pushes above the limit are
+ * performed; we choose not to.
+ */
+ qemu_log_mask(CPU_LOG_INT,
+ "...STKOF during stacking\n");
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ env->v7m.secure);
+ env->regs[13] = limit;
+ /*
+ * We won't try to perform any further memory accesses but
+ * we must continue through the following code to check for
+ * permission faults during FPU state preservation, and we
+ * must update FPCCR if lazy stacking is enabled.
+ */
+ limitviol = true;
+ stacked_ok = false;
+ }
+ }
+
+ /*
+ * Write as much of the stack frame as we can. If we fail a stack
+ * write this will result in a derived exception being pended
+ * (which may be taken in preference to the one we started with
+ * if it has higher priority).
+ */
+ stacked_ok = stacked_ok &&
+ v7m_stack_write(cpu, frameptr, env->regs[0], mmu_idx, STACK_NORMAL) &&
+ v7m_stack_write(cpu, frameptr + 4, env->regs[1],
+ mmu_idx, STACK_NORMAL) &&
+ v7m_stack_write(cpu, frameptr + 8, env->regs[2],
+ mmu_idx, STACK_NORMAL) &&
+ v7m_stack_write(cpu, frameptr + 12, env->regs[3],
+ mmu_idx, STACK_NORMAL) &&
+ v7m_stack_write(cpu, frameptr + 16, env->regs[12],
+ mmu_idx, STACK_NORMAL) &&
+ v7m_stack_write(cpu, frameptr + 20, env->regs[14],
+ mmu_idx, STACK_NORMAL) &&
+ v7m_stack_write(cpu, frameptr + 24, env->regs[15],
+ mmu_idx, STACK_NORMAL) &&
+ v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, STACK_NORMAL);
+
+ if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) {
+ /* FPU is active, try to save its registers */
+ bool fpccr_s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+ bool lspact = env->v7m.fpccr[fpccr_s] & R_V7M_FPCCR_LSPACT_MASK;
+
+ if (lspact && arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ qemu_log_mask(CPU_LOG_INT,
+ "...SecureFault because LSPACT and FPCA both set\n");
+ env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ } else if (!env->v7m.secure && !nsacr_cp10) {
+ qemu_log_mask(CPU_LOG_INT,
+ "...Secure UsageFault with CFSR.NOCP because "
+ "NSACR.CP10 prevents stacking FP regs\n");
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, M_REG_S);
+ env->v7m.cfsr[M_REG_S] |= R_V7M_CFSR_NOCP_MASK;
+ } else {
+ if (!(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPEN_MASK)) {
+ /* Lazy stacking disabled, save registers now */
+ int i;
+ bool cpacr_pass = v7m_cpacr_pass(env, env->v7m.secure,
+ arm_current_el(env) != 0);
+
+ if (stacked_ok && !cpacr_pass) {
+ /*
+ * Take UsageFault if CPACR forbids access. The pseudocode
+ * here does a full CheckCPEnabled() but we know the NSACR
+ * check can never fail as we have already handled that.
+ */
+ qemu_log_mask(CPU_LOG_INT,
+ "...UsageFault with CFSR.NOCP because "
+ "CPACR.CP10 prevents stacking FP regs\n");
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ env->v7m.secure);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_NOCP_MASK;
+ stacked_ok = false;
+ }
+
+ for (i = 0; i < ((framesize == 0xa8) ? 32 : 16); i += 2) {
+ uint64_t dn = *aa32_vfp_dreg(env, i / 2);
+ uint32_t faddr = frameptr + 0x20 + 4 * i;
+ uint32_t slo = extract64(dn, 0, 32);
+ uint32_t shi = extract64(dn, 32, 32);
+
+ if (i >= 16) {
+ faddr += 8; /* skip the slot for the FPSCR and VPR */
+ }
+ stacked_ok = stacked_ok &&
+ v7m_stack_write(cpu, faddr, slo,
+ mmu_idx, STACK_NORMAL) &&
+ v7m_stack_write(cpu, faddr + 4, shi,
+ mmu_idx, STACK_NORMAL);
+ }
+ stacked_ok = stacked_ok &&
+ v7m_stack_write(cpu, frameptr + 0x60,
+ vfp_get_fpscr(env), mmu_idx, STACK_NORMAL);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ stacked_ok = stacked_ok &&
+ v7m_stack_write(cpu, frameptr + 0x64,
+ env->v7m.vpr, mmu_idx, STACK_NORMAL);
+ }
+ if (cpacr_pass) {
+ for (i = 0; i < ((framesize == 0xa8) ? 32 : 16); i += 2) {
+ *aa32_vfp_dreg(env, i / 2) = 0;
+ }
+ vfp_set_fpscr(env, 0);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.vpr = 0;
+ }
+ }
+ } else {
+ /* Lazy stacking enabled, save necessary info to stack later */
+ v7m_update_fpccr(env, frameptr + 0x20, true);
+ }
+ }
+ }
+
+ /*
+ * If we broke a stack limit then SP was already updated earlier;
+ * otherwise we update SP regardless of whether any of the stack
+ * accesses failed or we took some other kind of fault.
+ */
+ if (!limitviol) {
+ env->regs[13] = frameptr;
+ }
+
+ return !stacked_ok;
+}
+
+static void do_v7m_exception_exit(ARMCPU *cpu)
+{
+ CPUARMState *env = &cpu->env;
+ uint32_t excret;
+ uint32_t xpsr, xpsr_mask;
+ bool ufault = false;
+ bool sfault = false;
+ bool return_to_sp_process;
+ bool return_to_handler;
+ bool rettobase = false;
+ bool exc_secure = false;
+ bool return_to_secure;
+ bool ftype;
+ bool restore_s16_s31 = false;
+
+ /*
+ * If we're not in Handler mode then jumps to magic exception-exit
+ * addresses don't have magic behaviour. However for the v8M
+ * security extensions the magic secure-function-return has to
+ * work in thread mode too, so to avoid doing an extra check in
+ * the generated code we allow exception-exit magic to also cause the
+ * internal exception and bring us here in thread mode. Correct code
+ * will never try to do this (the following insn fetch will always
+ * fault) so we the overhead of having taken an unnecessary exception
+ * doesn't matter.
+ */
+ if (!arm_v7m_is_handler_mode(env)) {
+ return;
+ }
+
+ /*
+ * In the spec pseudocode ExceptionReturn() is called directly
+ * from BXWritePC() and gets the full target PC value including
+ * bit zero. In QEMU's implementation we treat it as a normal
+ * jump-to-register (which is then caught later on), and so split
+ * the target value up between env->regs[15] and env->thumb in
+ * gen_bx(). Reconstitute it.
+ */
+ excret = env->regs[15];
+ if (env->thumb) {
+ excret |= 1;
+ }
+
+ qemu_log_mask(CPU_LOG_INT, "Exception return: magic PC %" PRIx32
+ " previous exception %d\n",
+ excret, env->v7m.exception);
+
+ if ((excret & R_V7M_EXCRET_RES1_MASK) != R_V7M_EXCRET_RES1_MASK) {
+ qemu_log_mask(LOG_GUEST_ERROR, "M profile: zero high bits in exception "
+ "exit PC value 0x%" PRIx32 " are UNPREDICTABLE\n",
+ excret);
+ }
+
+ ftype = excret & R_V7M_EXCRET_FTYPE_MASK;
+
+ if (!ftype && !cpu_isar_feature(aa32_vfp_simd, cpu)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "M profile: zero FTYPE in exception "
+ "exit PC value 0x%" PRIx32 " is UNPREDICTABLE "
+ "if FPU not present\n",
+ excret);
+ ftype = true;
+ }
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ /*
+ * EXC_RETURN.ES validation check (R_SMFL). We must do this before
+ * we pick which FAULTMASK to clear.
+ */
+ if (!env->v7m.secure &&
+ ((excret & R_V7M_EXCRET_ES_MASK) ||
+ !(excret & R_V7M_EXCRET_DCRS_MASK))) {
+ sfault = 1;
+ /* For all other purposes, treat ES as 0 (R_HXSR) */
+ excret &= ~R_V7M_EXCRET_ES_MASK;
+ }
+ exc_secure = excret & R_V7M_EXCRET_ES_MASK;
+ }
+
+ if (env->v7m.exception != ARMV7M_EXCP_NMI) {
+ /*
+ * Auto-clear FAULTMASK on return from other than NMI.
+ * If the security extension is implemented then this only
+ * happens if the raw execution priority is >= 0; the
+ * value of the ES bit in the exception return value indicates
+ * which security state's faultmask to clear. (v8M ARM ARM R_KBNF.)
+ */
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ if (armv7m_nvic_raw_execution_priority(env->nvic) >= 0) {
+ env->v7m.faultmask[exc_secure] = 0;
+ }
+ } else {
+ env->v7m.faultmask[M_REG_NS] = 0;
+ }
+ }
+
+ switch (armv7m_nvic_complete_irq(env->nvic, env->v7m.exception,
+ exc_secure)) {
+ case -1:
+ /* attempt to exit an exception that isn't active */
+ ufault = true;
+ break;
+ case 0:
+ /* still an irq active now */
+ break;
+ case 1:
+ /*
+ * We returned to base exception level, no nesting.
+ * (In the pseudocode this is written using "NestedActivation != 1"
+ * where we have 'rettobase == false'.)
+ */
+ rettobase = true;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ return_to_handler = !(excret & R_V7M_EXCRET_MODE_MASK);
+ return_to_sp_process = excret & R_V7M_EXCRET_SPSEL_MASK;
+ return_to_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
+ (excret & R_V7M_EXCRET_S_MASK);
+
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ if (!arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ /*
+ * UNPREDICTABLE if S == 1 or DCRS == 0 or ES == 1 (R_XLCP);
+ * we choose to take the UsageFault.
+ */
+ if ((excret & R_V7M_EXCRET_S_MASK) ||
+ (excret & R_V7M_EXCRET_ES_MASK) ||
+ !(excret & R_V7M_EXCRET_DCRS_MASK)) {
+ ufault = true;
+ }
+ }
+ if (excret & R_V7M_EXCRET_RES0_MASK) {
+ ufault = true;
+ }
+ } else {
+ /* For v7M we only recognize certain combinations of the low bits */
+ switch (excret & 0xf) {
+ case 1: /* Return to Handler */
+ break;
+ case 13: /* Return to Thread using Process stack */
+ case 9: /* Return to Thread using Main stack */
+ /*
+ * We only need to check NONBASETHRDENA for v7M, because in
+ * v8M this bit does not exist (it is RES1).
+ */
+ if (!rettobase &&
+ !(env->v7m.ccr[env->v7m.secure] &
+ R_V7M_CCR_NONBASETHRDENA_MASK)) {
+ ufault = true;
+ }
+ break;
+ default:
+ ufault = true;
+ }
+ }
+
+ /*
+ * Set CONTROL.SPSEL from excret.SPSEL. Since we're still in
+ * Handler mode (and will be until we write the new XPSR.Interrupt
+ * field) this does not switch around the current stack pointer.
+ * We must do this before we do any kind of tailchaining, including
+ * for the derived exceptions on integrity check failures, or we will
+ * give the guest an incorrect EXCRET.SPSEL value on exception entry.
+ */
+ write_v7m_control_spsel_for_secstate(env, return_to_sp_process, exc_secure);
+
+ /*
+ * Clear scratch FP values left in caller saved registers; this
+ * must happen before any kind of tail chaining.
+ */
+ if ((env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_CLRONRET_MASK) &&
+ (env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK)) {
+ if (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPACT_MASK) {
+ env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
+ "stackframe: error during lazy state deactivation\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ } else {
+ if (arm_feature(env, ARM_FEATURE_V8_1M)) {
+ /* v8.1M adds this NOCP check */
+ bool nsacr_pass = exc_secure ||
+ extract32(env->v7m.nsacr, 10, 1);
+ bool cpacr_pass = v7m_cpacr_pass(env, exc_secure, true);
+ if (!nsacr_pass) {
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, true);
+ env->v7m.cfsr[M_REG_S] |= R_V7M_CFSR_NOCP_MASK;
+ qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
+ "stackframe: NSACR prevents clearing FPU registers\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ } else if (!cpacr_pass) {
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ exc_secure);
+ env->v7m.cfsr[exc_secure] |= R_V7M_CFSR_NOCP_MASK;
+ qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
+ "stackframe: CPACR prevents clearing FPU registers\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+ }
+ /* Clear s0..s15, FPSCR and VPR */
+ int i;
+
+ for (i = 0; i < 16; i += 2) {
+ *aa32_vfp_dreg(env, i / 2) = 0;
+ }
+ vfp_set_fpscr(env, 0);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.vpr = 0;
+ }
+ }
+ }
+
+ if (sfault) {
+ env->v7m.sfsr |= R_V7M_SFSR_INVER_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
+ "stackframe: failed EXC_RETURN.ES validity check\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+
+ if (ufault) {
+ /*
+ * Bad exception return: instead of popping the exception
+ * stack, directly take a usage fault on the current stack.
+ */
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+ qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
+ "stackframe: failed exception return integrity check\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+
+ /*
+ * Tailchaining: if there is currently a pending exception that
+ * is high enough priority to preempt execution at the level we're
+ * about to return to, then just directly take that exception now,
+ * avoiding an unstack-and-then-stack. Note that now we have
+ * deactivated the previous exception by calling armv7m_nvic_complete_irq()
+ * our current execution priority is already the execution priority we are
+ * returning to -- none of the state we would unstack or set based on
+ * the EXCRET value affects it.
+ */
+ if (armv7m_nvic_can_take_pending_exception(env->nvic)) {
+ qemu_log_mask(CPU_LOG_INT, "...tailchaining to pending exception\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+
+ switch_v7m_security_state(env, return_to_secure);
+
+ {
+ /*
+ * The stack pointer we should be reading the exception frame from
+ * depends on bits in the magic exception return type value (and
+ * for v8M isn't necessarily the stack pointer we will eventually
+ * end up resuming execution with). Get a pointer to the location
+ * in the CPU state struct where the SP we need is currently being
+ * stored; we will use and modify it in place.
+ * We use this limited C variable scope so we don't accidentally
+ * use 'frame_sp_p' after we do something that makes it invalid.
+ */
+ bool spsel = env->v7m.control[return_to_secure] & R_V7M_CONTROL_SPSEL_MASK;
+ uint32_t *frame_sp_p = arm_v7m_get_sp_ptr(env, return_to_secure,
+ !return_to_handler, spsel);
+ uint32_t frameptr = *frame_sp_p;
+ bool pop_ok = true;
+ ARMMMUIdx mmu_idx;
+ bool return_to_priv = return_to_handler ||
+ !(env->v7m.control[return_to_secure] & R_V7M_CONTROL_NPRIV_MASK);
+
+ mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, return_to_secure,
+ return_to_priv);
+
+ if (!QEMU_IS_ALIGNED(frameptr, 8) &&
+ arm_feature(env, ARM_FEATURE_V8)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "M profile exception return with non-8-aligned SP "
+ "for destination state is UNPREDICTABLE\n");
+ }
+
+ /* Do we need to pop callee-saved registers? */
+ if (return_to_secure &&
+ ((excret & R_V7M_EXCRET_ES_MASK) == 0 ||
+ (excret & R_V7M_EXCRET_DCRS_MASK) == 0)) {
+ uint32_t actual_sig;
+
+ pop_ok = v7m_stack_read(cpu, &actual_sig, frameptr, mmu_idx);
+
+ if (pop_ok && v7m_integrity_sig(env, excret) != actual_sig) {
+ /* Take a SecureFault on the current stack */
+ env->v7m.sfsr |= R_V7M_SFSR_INVIS_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
+ "stackframe: failed exception return integrity "
+ "signature check\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+
+ pop_ok = pop_ok &&
+ v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[7], frameptr + 0x14, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[8], frameptr + 0x18, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[9], frameptr + 0x1c, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[10], frameptr + 0x20, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[11], frameptr + 0x24, mmu_idx);
+
+ frameptr += 0x28;
+ }
+
+ /* Pop registers */
+ pop_ok = pop_ok &&
+ v7m_stack_read(cpu, &env->regs[0], frameptr, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[1], frameptr + 0x4, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[2], frameptr + 0x8, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[3], frameptr + 0xc, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[12], frameptr + 0x10, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[14], frameptr + 0x14, mmu_idx) &&
+ v7m_stack_read(cpu, &env->regs[15], frameptr + 0x18, mmu_idx) &&
+ v7m_stack_read(cpu, &xpsr, frameptr + 0x1c, mmu_idx);
+
+ if (!pop_ok) {
+ /*
+ * v7m_stack_read() pended a fault, so take it (as a tail
+ * chained exception on the same stack frame)
+ */
+ qemu_log_mask(CPU_LOG_INT, "...derived exception on unstacking\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+
+ /*
+ * Returning from an exception with a PC with bit 0 set is defined
+ * behaviour on v8M (bit 0 is ignored), but for v7M it was specified
+ * to be UNPREDICTABLE. In practice actual v7M hardware seems to ignore
+ * the lsbit, and there are several RTOSes out there which incorrectly
+ * assume the r15 in the stack frame should be a Thumb-style "lsbit
+ * indicates ARM/Thumb" value, so ignore the bit on v7M as well, but
+ * complain about the badly behaved guest.
+ */
+ if (env->regs[15] & 1) {
+ env->regs[15] &= ~1U;
+ if (!arm_feature(env, ARM_FEATURE_V8)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "M profile return from interrupt with misaligned "
+ "PC is UNPREDICTABLE on v7M\n");
+ }
+ }
+
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ /*
+ * For v8M we have to check whether the xPSR exception field
+ * matches the EXCRET value for return to handler/thread
+ * before we commit to changing the SP and xPSR.
+ */
+ bool will_be_handler = (xpsr & XPSR_EXCP) != 0;
+ if (return_to_handler != will_be_handler) {
+ /*
+ * Take an INVPC UsageFault on the current stack.
+ * By this point we will have switched to the security state
+ * for the background state, so this UsageFault will target
+ * that state.
+ */
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ env->v7m.secure);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
+ qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
+ "stackframe: failed exception return integrity "
+ "check\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+ }
+
+ if (!ftype) {
+ /* FP present and we need to handle it */
+ if (!return_to_secure &&
+ (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPACT_MASK)) {
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
+ qemu_log_mask(CPU_LOG_INT,
+ "...taking SecureFault on existing stackframe: "
+ "Secure LSPACT set but exception return is "
+ "not to secure state\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+
+ restore_s16_s31 = return_to_secure &&
+ (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK);
+
+ if (env->v7m.fpccr[return_to_secure] & R_V7M_FPCCR_LSPACT_MASK) {
+ /* State in FPU is still valid, just clear LSPACT */
+ env->v7m.fpccr[return_to_secure] &= ~R_V7M_FPCCR_LSPACT_MASK;
+ } else {
+ int i;
+ uint32_t fpscr;
+ bool cpacr_pass, nsacr_pass;
+
+ cpacr_pass = v7m_cpacr_pass(env, return_to_secure,
+ return_to_priv);
+ nsacr_pass = return_to_secure ||
+ extract32(env->v7m.nsacr, 10, 1);
+
+ if (!cpacr_pass) {
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ return_to_secure);
+ env->v7m.cfsr[return_to_secure] |= R_V7M_CFSR_NOCP_MASK;
+ qemu_log_mask(CPU_LOG_INT,
+ "...taking UsageFault on existing "
+ "stackframe: CPACR.CP10 prevents unstacking "
+ "FP regs\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ } else if (!nsacr_pass) {
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, true);
+ env->v7m.cfsr[M_REG_S] |= R_V7M_CFSR_INVPC_MASK;
+ qemu_log_mask(CPU_LOG_INT,
+ "...taking Secure UsageFault on existing "
+ "stackframe: NSACR.CP10 prevents unstacking "
+ "FP regs\n");
+ v7m_exception_taken(cpu, excret, true, false);
+ return;
+ }
+
+ for (i = 0; i < (restore_s16_s31 ? 32 : 16); i += 2) {
+ uint32_t slo, shi;
+ uint64_t dn;
+ uint32_t faddr = frameptr + 0x20 + 4 * i;
+
+ if (i >= 16) {
+ faddr += 8; /* Skip the slot for the FPSCR and VPR */
+ }
+
+ pop_ok = pop_ok &&
+ v7m_stack_read(cpu, &slo, faddr, mmu_idx) &&
+ v7m_stack_read(cpu, &shi, faddr + 4, mmu_idx);
+
+ if (!pop_ok) {
+ break;
+ }
+
+ dn = (uint64_t)shi << 32 | slo;
+ *aa32_vfp_dreg(env, i / 2) = dn;
+ }
+ pop_ok = pop_ok &&
+ v7m_stack_read(cpu, &fpscr, frameptr + 0x60, mmu_idx);
+ if (pop_ok) {
+ vfp_set_fpscr(env, fpscr);
+ }
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ pop_ok = pop_ok &&
+ v7m_stack_read(cpu, &env->v7m.vpr,
+ frameptr + 0x64, mmu_idx);
+ }
+ if (!pop_ok) {
+ /*
+ * These regs are 0 if security extension present;
+ * otherwise merely UNKNOWN. We zero always.
+ */
+ for (i = 0; i < (restore_s16_s31 ? 32 : 16); i += 2) {
+ *aa32_vfp_dreg(env, i / 2) = 0;
+ }
+ vfp_set_fpscr(env, 0);
+ if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.vpr = 0;
+ }
+ }
+ }
+ }
+ env->v7m.control[M_REG_S] = FIELD_DP32(env->v7m.control[M_REG_S],
+ V7M_CONTROL, FPCA, !ftype);
+
+ /* Commit to consuming the stack frame */
+ frameptr += 0x20;
+ if (!ftype) {
+ frameptr += 0x48;
+ if (restore_s16_s31) {
+ frameptr += 0x40;
+ }
+ }
+ /*
+ * Undo stack alignment (the SPREALIGN bit indicates that the original
+ * pre-exception SP was not 8-aligned and we added a padding word to
+ * align it, so we undo this by ORing in the bit that increases it
+ * from the current 8-aligned value to the 8-unaligned value. (Adding 4
+ * would work too but a logical OR is how the pseudocode specifies it.)
+ */
+ if (xpsr & XPSR_SPREALIGN) {
+ frameptr |= 4;
+ }
+ *frame_sp_p = frameptr;
+ }
+
+ xpsr_mask = ~(XPSR_SPREALIGN | XPSR_SFPA);
+ if (!arm_feature(env, ARM_FEATURE_THUMB_DSP)) {
+ xpsr_mask &= ~XPSR_GE;
+ }
+ /* This xpsr_write() will invalidate frame_sp_p as it may switch stack */
+ xpsr_write(env, xpsr, xpsr_mask);
+
+ if (env->v7m.secure) {
+ bool sfpa = xpsr & XPSR_SFPA;
+
+ env->v7m.control[M_REG_S] = FIELD_DP32(env->v7m.control[M_REG_S],
+ V7M_CONTROL, SFPA, sfpa);
+ }
+
+ /*
+ * The restored xPSR exception field will be zero if we're
+ * resuming in Thread mode. If that doesn't match what the
+ * exception return excret specified then this is a UsageFault.
+ * v7M requires we make this check here; v8M did it earlier.
+ */
+ if (return_to_handler != arm_v7m_is_handler_mode(env)) {
+ /*
+ * Take an INVPC UsageFault by pushing the stack again;
+ * we know we're v7M so this is never a Secure UsageFault.
+ */
+ bool ignore_stackfaults;
+
+ assert(!arm_feature(env, ARM_FEATURE_V8));
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, false);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
+ ignore_stackfaults = v7m_push_stack(cpu);
+ qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on new stackframe: "
+ "failed exception return integrity check\n");
+ v7m_exception_taken(cpu, excret, false, ignore_stackfaults);
+ return;
+ }
+
+ /* Otherwise, we have a successful exception exit. */
+ arm_clear_exclusive(env);
+ arm_rebuild_hflags(env);
+ qemu_log_mask(CPU_LOG_INT, "...successful exception return\n");
+}
+
+static bool do_v7m_function_return(ARMCPU *cpu)
+{
+ /*
+ * v8M security extensions magic function return.
+ * We may either:
+ * (1) throw an exception (longjump)
+ * (2) return true if we successfully handled the function return
+ * (3) return false if we failed a consistency check and have
+ * pended a UsageFault that needs to be taken now
+ *
+ * At this point the magic return value is split between env->regs[15]
+ * and env->thumb. We don't bother to reconstitute it because we don't
+ * need it (all values are handled the same way).
+ */
+ CPUARMState *env = &cpu->env;
+ uint32_t newpc, newpsr, newpsr_exc;
+
+ qemu_log_mask(CPU_LOG_INT, "...really v7M secure function return\n");
+
+ {
+ bool threadmode, spsel;
+ MemOpIdx oi;
+ ARMMMUIdx mmu_idx;
+ uint32_t *frame_sp_p;
+ uint32_t frameptr;
+
+ /* Pull the return address and IPSR from the Secure stack */
+ threadmode = !arm_v7m_is_handler_mode(env);
+ spsel = env->v7m.control[M_REG_S] & R_V7M_CONTROL_SPSEL_MASK;
+
+ frame_sp_p = arm_v7m_get_sp_ptr(env, true, threadmode, spsel);
+ frameptr = *frame_sp_p;
+
+ /*
+ * These loads may throw an exception (for MPU faults). We want to
+ * do them as secure, so work out what MMU index that is.
+ */
+ mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
+ oi = make_memop_idx(MO_LEUL, arm_to_core_mmu_idx(mmu_idx));
+ newpc = cpu_ldl_mmu(env, frameptr, oi, 0);
+ newpsr = cpu_ldl_mmu(env, frameptr + 4, oi, 0);
+
+ /* Consistency checks on new IPSR */
+ newpsr_exc = newpsr & XPSR_EXCP;
+ if (!((env->v7m.exception == 0 && newpsr_exc == 0) ||
+ (env->v7m.exception == 1 && newpsr_exc != 0))) {
+ /* Pend the fault and tell our caller to take it */
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ env->v7m.secure);
+ qemu_log_mask(CPU_LOG_INT,
+ "...taking INVPC UsageFault: "
+ "IPSR consistency check failed\n");
+ return false;
+ }
+
+ *frame_sp_p = frameptr + 8;
+ }
+
+ /* This invalidates frame_sp_p */
+ switch_v7m_security_state(env, true);
+ env->v7m.exception = newpsr_exc;
+ env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
+ if (newpsr & XPSR_SFPA) {
+ env->v7m.control[M_REG_S] |= R_V7M_CONTROL_SFPA_MASK;
+ }
+ xpsr_write(env, 0, XPSR_IT);
+ env->thumb = newpc & 1;
+ env->regs[15] = newpc & ~1;
+ arm_rebuild_hflags(env);
+
+ qemu_log_mask(CPU_LOG_INT, "...function return successful\n");
+ return true;
+}
+
+static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, bool secure,
+ uint32_t addr, uint16_t *insn)
+{
+ /*
+ * Load a 16-bit portion of a v7M instruction, returning true on success,
+ * or false on failure (in which case we will have pended the appropriate
+ * exception).
+ * We need to do the instruction fetch's MPU and SAU checks
+ * like this because there is no MMU index that would allow
+ * doing the load with a single function call. Instead we must
+ * first check that the security attributes permit the load
+ * and that they don't mismatch on the two halves of the instruction,
+ * and then we do the load as a secure load (ie using the security
+ * attributes of the address, not the CPU, as architecturally required).
+ */
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+ V8M_SAttributes sattrs = {};
+ GetPhysAddrResult res = {};
+ ARMMMUFaultInfo fi = {};
+ MemTxResult txres;
+
+ v8m_security_lookup(env, addr, MMU_INST_FETCH, mmu_idx, secure, &sattrs);
+ if (!sattrs.nsc || sattrs.ns) {
+ /*
+ * This must be the second half of the insn, and it straddles a
+ * region boundary with the second half not being S&NSC.
+ */
+ env->v7m.sfsr |= R_V7M_SFSR_INVEP_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ qemu_log_mask(CPU_LOG_INT,
+ "...really SecureFault with SFSR.INVEP\n");
+ return false;
+ }
+ if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, &res, &fi)) {
+ /* the MPU lookup failed */
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure);
+ qemu_log_mask(CPU_LOG_INT, "...really MemManage with CFSR.IACCVIOL\n");
+ return false;
+ }
+ *insn = address_space_lduw_le(arm_addressspace(cs, res.f.attrs),
+ res.f.phys_addr, res.f.attrs, &txres);
+ if (txres != MEMTX_OK) {
+ env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_IBUSERR_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS, false);
+ qemu_log_mask(CPU_LOG_INT, "...really BusFault with CFSR.IBUSERR\n");
+ return false;
+ }
+ return true;
+}
+
+static bool v7m_read_sg_stack_word(ARMCPU *cpu, ARMMMUIdx mmu_idx,
+ uint32_t addr, uint32_t *spdata)
+{
+ /*
+ * Read a word of data from the stack for the SG instruction,
+ * writing the value into *spdata. If the load succeeds, return
+ * true; otherwise pend an appropriate exception and return false.
+ * (We can't use data load helpers here that throw an exception
+ * because of the context we're called in, which is halfway through
+ * arm_v7m_cpu_do_interrupt().)
+ */
+ CPUState *cs = CPU(cpu);
+ CPUARMState *env = &cpu->env;
+ MemTxResult txres;
+ GetPhysAddrResult res = {};
+ ARMMMUFaultInfo fi = {};
+ uint32_t value;
+
+ if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &res, &fi)) {
+ /* MPU/SAU lookup failed */
+ if (fi.type == ARMFault_QEMU_SFault) {
+ qemu_log_mask(CPU_LOG_INT,
+ "...SecureFault during stack word read\n");
+ env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK;
+ env->v7m.sfar = addr;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ } else {
+ qemu_log_mask(CPU_LOG_INT,
+ "...MemManageFault during stack word read\n");
+ env->v7m.cfsr[M_REG_S] |= R_V7M_CFSR_DACCVIOL_MASK |
+ R_V7M_CFSR_MMARVALID_MASK;
+ env->v7m.mmfar[M_REG_S] = addr;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, false);
+ }
+ return false;
+ }
+ value = address_space_ldl(arm_addressspace(cs, res.f.attrs),
+ res.f.phys_addr, res.f.attrs, &txres);
+ if (txres != MEMTX_OK) {
+ /* BusFault trying to read the data */
+ qemu_log_mask(CPU_LOG_INT,
+ "...BusFault during stack word read\n");
+ env->v7m.cfsr[M_REG_NS] |=
+ (R_V7M_CFSR_PRECISERR_MASK | R_V7M_CFSR_BFARVALID_MASK);
+ env->v7m.bfar = addr;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS, false);
+ return false;
+ }
+
+ *spdata = value;
+ return true;
+}
+
+static bool v7m_handle_execute_nsc(ARMCPU *cpu)
+{
+ /*
+ * Check whether this attempt to execute code in a Secure & NS-Callable
+ * memory region is for an SG instruction; if so, then emulate the
+ * effect of the SG instruction and return true. Otherwise pend
+ * the correct kind of exception and return false.
+ */
+ CPUARMState *env = &cpu->env;
+ ARMMMUIdx mmu_idx;
+ uint16_t insn;
+
+ /*
+ * We should never get here unless get_phys_addr_pmsav8() caused
+ * an exception for NS executing in S&NSC memory.
+ */
+ assert(!env->v7m.secure);
+ assert(arm_feature(env, ARM_FEATURE_M_SECURITY));
+
+ /* We want to do the MPU lookup as secure; work out what mmu_idx that is */
+ mmu_idx = arm_v7m_mmu_idx_for_secstate(env, true);
+
+ if (!v7m_read_half_insn(cpu, mmu_idx, true, env->regs[15], &insn)) {
+ return false;
+ }
+
+ if (!env->thumb) {
+ goto gen_invep;
+ }
+
+ if (insn != 0xe97f) {
+ /*
+ * Not an SG instruction first half (we choose the IMPDEF
+ * early-SG-check option).
+ */
+ goto gen_invep;
+ }
+
+ if (!v7m_read_half_insn(cpu, mmu_idx, true, env->regs[15] + 2, &insn)) {
+ return false;
+ }
+
+ if (insn != 0xe97f) {
+ /*
+ * Not an SG instruction second half (yes, both halves of the SG
+ * insn have the same hex value)
+ */
+ goto gen_invep;
+ }
+
+ /*
+ * OK, we have confirmed that we really have an SG instruction.
+ * We know we're NS in S memory so don't need to repeat those checks.
+ */
+ qemu_log_mask(CPU_LOG_INT, "...really an SG instruction at 0x%08" PRIx32
+ ", executing it\n", env->regs[15]);
+
+ if (cpu_isar_feature(aa32_m_sec_state, cpu) &&
+ !arm_v7m_is_handler_mode(env)) {
+ /*
+ * v8.1M exception stack frame integrity check. Note that we
+ * must perform the memory access even if CCR_S.TRD is zero
+ * and we aren't going to check what the data loaded is.
+ */
+ uint32_t spdata, sp;
+
+ /*
+ * We know we are currently NS, so the S stack pointers must be
+ * in other_ss_{psp,msp}, not in regs[13]/other_sp.
+ */
+ sp = v7m_using_psp(env) ? env->v7m.other_ss_psp : env->v7m.other_ss_msp;
+ if (!v7m_read_sg_stack_word(cpu, mmu_idx, sp, &spdata)) {
+ /* Stack access failed and an exception has been pended */
+ return false;
+ }
+
+ if (env->v7m.ccr[M_REG_S] & R_V7M_CCR_TRD_MASK) {
+ if (((spdata & ~1) == 0xfefa125a) ||
+ !(env->v7m.control[M_REG_S] & 1)) {
+ goto gen_invep;
+ }
+ }
+ }
+
+ env->regs[14] &= ~1;
+ env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
+ switch_v7m_security_state(env, true);
+ xpsr_write(env, 0, XPSR_IT);
+ env->regs[15] += 4;
+ arm_rebuild_hflags(env);
+ return true;
+
+gen_invep:
+ env->v7m.sfsr |= R_V7M_SFSR_INVEP_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ qemu_log_mask(CPU_LOG_INT,
+ "...really SecureFault with SFSR.INVEP\n");
+ return false;
+}
+
+void arm_v7m_cpu_do_interrupt(CPUState *cs)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
+ uint32_t lr;
+ bool ignore_stackfaults;
+
+ arm_log_exception(cs);
+
+ /*
+ * For exceptions we just mark as pending on the NVIC, and let that
+ * handle it.
+ */
+ switch (cs->exception_index) {
+ case EXCP_UDEF:
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_UNDEFINSTR_MASK;
+ break;
+ case EXCP_NOCP:
+ {
+ /*
+ * NOCP might be directed to something other than the current
+ * security state if this fault is because of NSACR; we indicate
+ * the target security state using exception.target_el.
+ */
+ int target_secstate;
+
+ if (env->exception.target_el == 3) {
+ target_secstate = M_REG_S;
+ } else {
+ target_secstate = env->v7m.secure;
+ }
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, target_secstate);
+ env->v7m.cfsr[target_secstate] |= R_V7M_CFSR_NOCP_MASK;
+ break;
+ }
+ case EXCP_INVSTATE:
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVSTATE_MASK;
+ break;
+ case EXCP_STKOF:
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+ break;
+ case EXCP_LSERR:
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
+ break;
+ case EXCP_UNALIGNED:
+ /* Unaligned faults reported by M-profile aware code */
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_UNALIGNED_MASK;
+ break;
+ case EXCP_DIVBYZERO:
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_DIVBYZERO_MASK;
+ break;
+ case EXCP_SWI:
+ /* The PC already points to the next instruction. */
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure);
+ break;
+ case EXCP_PREFETCH_ABORT:
+ case EXCP_DATA_ABORT:
+ /*
+ * Note that for M profile we don't have a guest facing FSR, but
+ * the env->exception.fsr will be populated by the code that
+ * raises the fault, in the A profile short-descriptor format.
+ *
+ * Log the exception.vaddress now regardless of subtype, because
+ * logging below only logs it when it goes into a guest visible
+ * register.
+ */
+ qemu_log_mask(CPU_LOG_INT, "...at fault address 0x%x\n",
+ (uint32_t)env->exception.vaddress);
+ switch (env->exception.fsr & 0xf) {
+ case M_FAKE_FSR_NSC_EXEC:
+ /*
+ * Exception generated when we try to execute code at an address
+ * which is marked as Secure & Non-Secure Callable and the CPU
+ * is in the Non-Secure state. The only instruction which can
+ * be executed like this is SG (and that only if both halves of
+ * the SG instruction have the same security attributes.)
+ * Everything else must generate an INVEP SecureFault, so we
+ * emulate the SG instruction here.
+ */
+ if (v7m_handle_execute_nsc(cpu)) {
+ return;
+ }
+ break;
+ case M_FAKE_FSR_SFAULT:
+ /*
+ * Various flavours of SecureFault for attempts to execute or
+ * access data in the wrong security state.
+ */
+ switch (cs->exception_index) {
+ case EXCP_PREFETCH_ABORT:
+ if (env->v7m.secure) {
+ env->v7m.sfsr |= R_V7M_SFSR_INVTRAN_MASK;
+ qemu_log_mask(CPU_LOG_INT,
+ "...really SecureFault with SFSR.INVTRAN\n");
+ } else {
+ env->v7m.sfsr |= R_V7M_SFSR_INVEP_MASK;
+ qemu_log_mask(CPU_LOG_INT,
+ "...really SecureFault with SFSR.INVEP\n");
+ }
+ break;
+ case EXCP_DATA_ABORT:
+ /* This must be an NS access to S memory */
+ env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK;
+ qemu_log_mask(CPU_LOG_INT,
+ "...really SecureFault with SFSR.AUVIOL\n");
+ break;
+ }
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+ break;
+ case 0x8: /* External Abort */
+ switch (cs->exception_index) {
+ case EXCP_PREFETCH_ABORT:
+ env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_IBUSERR_MASK;
+ qemu_log_mask(CPU_LOG_INT, "...with CFSR.IBUSERR\n");
+ break;
+ case EXCP_DATA_ABORT:
+ env->v7m.cfsr[M_REG_NS] |=
+ (R_V7M_CFSR_PRECISERR_MASK | R_V7M_CFSR_BFARVALID_MASK);
+ env->v7m.bfar = env->exception.vaddress;
+ qemu_log_mask(CPU_LOG_INT,
+ "...with CFSR.PRECISERR and BFAR 0x%x\n",
+ env->v7m.bfar);
+ break;
+ }
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS, false);
+ break;
+ case 0x1: /* Alignment fault reported by generic code */
+ qemu_log_mask(CPU_LOG_INT,
+ "...really UsageFault with UFSR.UNALIGNED\n");
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_UNALIGNED_MASK;
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+ env->v7m.secure);
+ break;
+ default:
+ /*
+ * All other FSR values are either MPU faults or "can't happen
+ * for M profile" cases.
+ */
+ switch (cs->exception_index) {
+ case EXCP_PREFETCH_ABORT:
+ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK;
+ qemu_log_mask(CPU_LOG_INT, "...with CFSR.IACCVIOL\n");
+ break;
+ case EXCP_DATA_ABORT:
+ env->v7m.cfsr[env->v7m.secure] |=
+ (R_V7M_CFSR_DACCVIOL_MASK | R_V7M_CFSR_MMARVALID_MASK);
+ env->v7m.mmfar[env->v7m.secure] = env->exception.vaddress;
+ qemu_log_mask(CPU_LOG_INT,
+ "...with CFSR.DACCVIOL and MMFAR 0x%x\n",
+ env->v7m.mmfar[env->v7m.secure]);
+ break;
+ }
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM,
+ env->v7m.secure);
+ break;
+ }
+ break;
+ case EXCP_SEMIHOST:
+ qemu_log_mask(CPU_LOG_INT,
+ "...handling as semihosting call 0x%x\n",
+ env->regs[0]);
+#ifdef CONFIG_TCG
+ do_common_semihosting(cs);
+#else
+ g_assert_not_reached();
+#endif
+ env->regs[15] += env->thumb ? 2 : 4;
+ return;
+ case EXCP_BKPT:
+ armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_DEBUG, false);
+ break;
+ case EXCP_IRQ:
+ break;
+ case EXCP_EXCEPTION_EXIT:
+ if (env->regs[15] < EXC_RETURN_MIN_MAGIC) {
+ /* Must be v8M security extension function return */
+ assert(env->regs[15] >= FNC_RETURN_MIN_MAGIC);
+ assert(arm_feature(env, ARM_FEATURE_M_SECURITY));
+ if (do_v7m_function_return(cpu)) {
+ return;
+ }
+ } else {
+ do_v7m_exception_exit(cpu);
+ return;
+ }
+ break;
+ case EXCP_LAZYFP:
+ /*
+ * We already pended the specific exception in the NVIC in the
+ * v7m_preserve_fp_state() helper function.
+ */
+ break;
+ default:
+ cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
+ return; /* Never happens. Keep compiler happy. */
+ }
+
+ if (arm_feature(env, ARM_FEATURE_V8)) {
+ lr = R_V7M_EXCRET_RES1_MASK |
+ R_V7M_EXCRET_DCRS_MASK;
+ /*
+ * The S bit indicates whether we should return to Secure
+ * or NonSecure (ie our current state).
+ * The ES bit indicates whether we're taking this exception
+ * to Secure or NonSecure (ie our target state). We set it
+ * later, in v7m_exception_taken().
+ * The SPSEL bit is also set in v7m_exception_taken() for v8M.
+ * This corresponds to the ARM ARM pseudocode for v8M setting
+ * some LR bits in PushStack() and some in ExceptionTaken();
+ * the distinction matters for the tailchain cases where we
+ * can take an exception without pushing the stack.
+ */
+ if (env->v7m.secure) {
+ lr |= R_V7M_EXCRET_S_MASK;
+ }
+ } else {
+ lr = R_V7M_EXCRET_RES1_MASK |
+ R_V7M_EXCRET_S_MASK |
+ R_V7M_EXCRET_DCRS_MASK |
+ R_V7M_EXCRET_ES_MASK;
+ if (env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK) {
+ lr |= R_V7M_EXCRET_SPSEL_MASK;
+ }
+ }
+ if (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK)) {
+ lr |= R_V7M_EXCRET_FTYPE_MASK;
+ }
+ if (!arm_v7m_is_handler_mode(env)) {
+ lr |= R_V7M_EXCRET_MODE_MASK;
+ }
+
+ ignore_stackfaults = v7m_push_stack(cpu);
+ v7m_exception_taken(cpu, lr, false, ignore_stackfaults);
+}
+
+uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
+{
+ unsigned el = arm_current_el(env);
+
+ /* First handle registers which unprivileged can read */
+ switch (reg) {
+ case 0 ... 7: /* xPSR sub-fields */
+ return v7m_mrs_xpsr(env, reg, el);
+ case 20: /* CONTROL */
+ return arm_v7m_mrs_control(env, env->v7m.secure);
+ case 0x94: /* CONTROL_NS */
+ /*
+ * We have to handle this here because unprivileged Secure code
+ * can read the NS CONTROL register.
+ */
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ return env->v7m.control[M_REG_NS] |
+ (env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK);
+ }
+
+ if (el == 0) {
+ return 0; /* unprivileged reads others as zero */
+ }
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ switch (reg) {
+ case 0x88: /* MSP_NS */
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ return env->v7m.other_ss_msp;
+ case 0x89: /* PSP_NS */
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ return env->v7m.other_ss_psp;
+ case 0x8a: /* MSPLIM_NS */
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ return env->v7m.msplim[M_REG_NS];
+ case 0x8b: /* PSPLIM_NS */
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ return env->v7m.psplim[M_REG_NS];
+ case 0x90: /* PRIMASK_NS */
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ return env->v7m.primask[M_REG_NS];
+ case 0x91: /* BASEPRI_NS */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ return env->v7m.basepri[M_REG_NS];
+ case 0x93: /* FAULTMASK_NS */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ return env->v7m.faultmask[M_REG_NS];
+ case 0x98: /* SP_NS */
+ {
+ /*
+ * This gives the non-secure SP selected based on whether we're
+ * currently in handler mode or not, using the NS CONTROL.SPSEL.
+ */
+ bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK;
+
+ if (!env->v7m.secure) {
+ return 0;
+ }
+ if (!arm_v7m_is_handler_mode(env) && spsel) {
+ return env->v7m.other_ss_psp;
+ } else {
+ return env->v7m.other_ss_msp;
+ }
+ }
+ default:
+ break;
+ }
+ }
+
+ switch (reg) {
+ case 8: /* MSP */
+ return v7m_using_psp(env) ? env->v7m.other_sp : env->regs[13];
+ case 9: /* PSP */
+ return v7m_using_psp(env) ? env->regs[13] : env->v7m.other_sp;
+ case 10: /* MSPLIM */
+ if (!arm_feature(env, ARM_FEATURE_V8)) {
+ goto bad_reg;
+ }
+ return env->v7m.msplim[env->v7m.secure];
+ case 11: /* PSPLIM */
+ if (!arm_feature(env, ARM_FEATURE_V8)) {
+ goto bad_reg;
+ }
+ return env->v7m.psplim[env->v7m.secure];
+ case 16: /* PRIMASK */
+ return env->v7m.primask[env->v7m.secure];
+ case 17: /* BASEPRI */
+ case 18: /* BASEPRI_MAX */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ return env->v7m.basepri[env->v7m.secure];
+ case 19: /* FAULTMASK */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ return env->v7m.faultmask[env->v7m.secure];
+ default:
+ bad_reg:
+ qemu_log_mask(LOG_GUEST_ERROR, "Attempt to read unknown special"
+ " register %d\n", reg);
+ return 0;
+ }
+}
+
+void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
+{
+ /*
+ * We're passed bits [11..0] of the instruction; extract
+ * SYSm and the mask bits.
+ * Invalid combinations of SYSm and mask are UNPREDICTABLE;
+ * we choose to treat them as if the mask bits were valid.
+ * NB that the pseudocode 'mask' variable is bits [11..10],
+ * whereas ours is [11..8].
+ */
+ uint32_t mask = extract32(maskreg, 8, 4);
+ uint32_t reg = extract32(maskreg, 0, 8);
+ int cur_el = arm_current_el(env);
+
+ if (cur_el == 0 && reg > 7 && reg != 20) {
+ /*
+ * only xPSR sub-fields and CONTROL.SFPA may be written by
+ * unprivileged code
+ */
+ return;
+ }
+
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+ switch (reg) {
+ case 0x88: /* MSP_NS */
+ if (!env->v7m.secure) {
+ return;
+ }
+ env->v7m.other_ss_msp = val & ~3;
+ return;
+ case 0x89: /* PSP_NS */
+ if (!env->v7m.secure) {
+ return;
+ }
+ env->v7m.other_ss_psp = val & ~3;
+ return;
+ case 0x8a: /* MSPLIM_NS */
+ if (!env->v7m.secure) {
+ return;
+ }
+ env->v7m.msplim[M_REG_NS] = val & ~7;
+ return;
+ case 0x8b: /* PSPLIM_NS */
+ if (!env->v7m.secure) {
+ return;
+ }
+ env->v7m.psplim[M_REG_NS] = val & ~7;
+ return;
+ case 0x90: /* PRIMASK_NS */
+ if (!env->v7m.secure) {
+ return;
+ }
+ env->v7m.primask[M_REG_NS] = val & 1;
+ return;
+ case 0x91: /* BASEPRI_NS */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ if (!env->v7m.secure) {
+ return;
+ }
+ env->v7m.basepri[M_REG_NS] = val & 0xff;
+ return;
+ case 0x93: /* FAULTMASK_NS */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ if (!env->v7m.secure) {
+ return;
+ }
+ env->v7m.faultmask[M_REG_NS] = val & 1;
+ return;
+ case 0x94: /* CONTROL_NS */
+ if (!env->v7m.secure) {
+ return;
+ }
+ write_v7m_control_spsel_for_secstate(env,
+ val & R_V7M_CONTROL_SPSEL_MASK,
+ M_REG_NS);
+ if (arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ env->v7m.control[M_REG_NS] &= ~R_V7M_CONTROL_NPRIV_MASK;
+ env->v7m.control[M_REG_NS] |= val & R_V7M_CONTROL_NPRIV_MASK;
+ }
+ /*
+ * SFPA is RAZ/WI from NS. FPCA is RO if NSACR.CP10 == 0,
+ * RES0 if the FPU is not present, and is stored in the S bank
+ */
+ if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env)) &&
+ extract32(env->v7m.nsacr, 10, 1)) {
+ env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_FPCA_MASK;
+ env->v7m.control[M_REG_S] |= val & R_V7M_CONTROL_FPCA_MASK;
+ }
+ return;
+ case 0x98: /* SP_NS */
+ {
+ /*
+ * This gives the non-secure SP selected based on whether we're
+ * currently in handler mode or not, using the NS CONTROL.SPSEL.
+ */
+ bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK;
+ bool is_psp = !arm_v7m_is_handler_mode(env) && spsel;
+ uint32_t limit;
+
+ if (!env->v7m.secure) {
+ return;
+ }
+
+ limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false];
+
+ val &= ~0x3;
+
+ if (val < limit) {
+ raise_exception_ra(env, EXCP_STKOF, 0, 1, GETPC());
+ }
+
+ if (is_psp) {
+ env->v7m.other_ss_psp = val;
+ } else {
+ env->v7m.other_ss_msp = val;
+ }
+ return;
+ }
+ default:
+ break;
+ }
+ }
+
+ switch (reg) {
+ case 0 ... 7: /* xPSR sub-fields */
+ v7m_msr_xpsr(env, mask, reg, val);
+ break;
+ case 8: /* MSP */
+ if (v7m_using_psp(env)) {
+ env->v7m.other_sp = val & ~3;
+ } else {
+ env->regs[13] = val & ~3;
+ }
+ break;
+ case 9: /* PSP */
+ if (v7m_using_psp(env)) {
+ env->regs[13] = val & ~3;
+ } else {
+ env->v7m.other_sp = val & ~3;
+ }
+ break;
+ case 10: /* MSPLIM */
+ if (!arm_feature(env, ARM_FEATURE_V8)) {
+ goto bad_reg;
+ }
+ env->v7m.msplim[env->v7m.secure] = val & ~7;
+ break;
+ case 11: /* PSPLIM */
+ if (!arm_feature(env, ARM_FEATURE_V8)) {
+ goto bad_reg;
+ }
+ env->v7m.psplim[env->v7m.secure] = val & ~7;
+ break;
+ case 16: /* PRIMASK */
+ env->v7m.primask[env->v7m.secure] = val & 1;
+ break;
+ case 17: /* BASEPRI */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ env->v7m.basepri[env->v7m.secure] = val & 0xff;
+ break;
+ case 18: /* BASEPRI_MAX */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ val &= 0xff;
+ if (val != 0 && (val < env->v7m.basepri[env->v7m.secure]
+ || env->v7m.basepri[env->v7m.secure] == 0)) {
+ env->v7m.basepri[env->v7m.secure] = val;
+ }
+ break;
+ case 19: /* FAULTMASK */
+ if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ goto bad_reg;
+ }
+ env->v7m.faultmask[env->v7m.secure] = val & 1;
+ break;
+ case 20: /* CONTROL */
+ /*
+ * Writing to the SPSEL bit only has an effect if we are in
+ * thread mode; other bits can be updated by any privileged code.
+ * write_v7m_control_spsel() deals with updating the SPSEL bit in
+ * env->v7m.control, so we only need update the others.
+ * For v7M, we must just ignore explicit writes to SPSEL in handler
+ * mode; for v8M the write is permitted but will have no effect.
+ * All these bits are writes-ignored from non-privileged code,
+ * except for SFPA.
+ */
+ if (cur_el > 0 && (arm_feature(env, ARM_FEATURE_V8) ||
+ !arm_v7m_is_handler_mode(env))) {
+ write_v7m_control_spsel(env, (val & R_V7M_CONTROL_SPSEL_MASK) != 0);
+ }
+ if (cur_el > 0 && arm_feature(env, ARM_FEATURE_M_MAIN)) {
+ env->v7m.control[env->v7m.secure] &= ~R_V7M_CONTROL_NPRIV_MASK;
+ env->v7m.control[env->v7m.secure] |= val & R_V7M_CONTROL_NPRIV_MASK;
+ }
+ if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
+ /*
+ * SFPA is RAZ/WI from NS or if no FPU.
+ * FPCA is RO if NSACR.CP10 == 0, RES0 if the FPU is not present.
+ * Both are stored in the S bank.
+ */
+ if (env->v7m.secure) {
+ env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
+ env->v7m.control[M_REG_S] |= val & R_V7M_CONTROL_SFPA_MASK;
+ }
+ if (cur_el > 0 &&
+ (env->v7m.secure || !arm_feature(env, ARM_FEATURE_M_SECURITY) ||
+ extract32(env->v7m.nsacr, 10, 1))) {
+ env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_FPCA_MASK;
+ env->v7m.control[M_REG_S] |= val & R_V7M_CONTROL_FPCA_MASK;
+ }
+ }
+ break;
+ default:
+ bad_reg:
+ qemu_log_mask(LOG_GUEST_ERROR, "Attempt to write unknown special"
+ " register %d\n", reg);
+ return;
+ }
+}
+
+uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
+{
+ /* Implement the TT instruction. op is bits [7:6] of the insn. */
+ bool forceunpriv = op & 1;
+ bool alt = op & 2;
+ V8M_SAttributes sattrs = {};
+ uint32_t tt_resp;
+ bool r, rw, nsr, nsrw, mrvalid;
+ ARMMMUIdx mmu_idx;
+ uint32_t mregion;
+ bool targetpriv;
+ bool targetsec = env->v7m.secure;
+
+ /*
+ * Work out what the security state and privilege level we're
+ * interested in is...
+ */
+ if (alt) {
+ targetsec = !targetsec;
+ }
+
+ if (forceunpriv) {
+ targetpriv = false;
+ } else {
+ targetpriv = arm_v7m_is_handler_mode(env) ||
+ !(env->v7m.control[targetsec] & R_V7M_CONTROL_NPRIV_MASK);
+ }
+
+ /* ...and then figure out which MMU index this is */
+ mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, targetsec, targetpriv);
+
+ /*
+ * We know that the MPU and SAU don't care about the access type
+ * for our purposes beyond that we don't want to claim to be
+ * an insn fetch, so we arbitrarily call this a read.
+ */
+
+ /*
+ * MPU region info only available for privileged or if
+ * inspecting the other MPU state.
+ */
+ if (arm_current_el(env) != 0 || alt) {
+ GetPhysAddrResult res = {};
+ ARMMMUFaultInfo fi = {};
+
+ /* We can ignore the return value as prot is always set */
+ pmsav8_mpu_lookup(env, addr, MMU_DATA_LOAD, mmu_idx, targetsec,
+ &res, &fi, &mregion);
+ if (mregion == -1) {
+ mrvalid = false;
+ mregion = 0;
+ } else {
+ mrvalid = true;
+ }
+ r = res.f.prot & PAGE_READ;
+ rw = res.f.prot & PAGE_WRITE;
+ } else {
+ r = false;
+ rw = false;
+ mrvalid = false;
+ mregion = 0;
+ }
+
+ if (env->v7m.secure) {
+ v8m_security_lookup(env, addr, MMU_DATA_LOAD, mmu_idx,
+ targetsec, &sattrs);
+ nsr = sattrs.ns && r;
+ nsrw = sattrs.ns && rw;
+ } else {
+ sattrs.ns = true;
+ nsr = false;
+ nsrw = false;
+ }
+
+ tt_resp = (sattrs.iregion << 24) |
+ (sattrs.irvalid << 23) |
+ ((!sattrs.ns) << 22) |
+ (nsrw << 21) |
+ (nsr << 20) |
+ (rw << 19) |
+ (r << 18) |
+ (sattrs.srvalid << 17) |
+ (mrvalid << 16) |
+ (sattrs.sregion << 8) |
+ mregion;
+
+ return tt_resp;
+}
+
+#endif /* !CONFIG_USER_ONLY */
+
+uint32_t *arm_v7m_get_sp_ptr(CPUARMState *env, bool secure, bool threadmode,
+ bool spsel)
+{
+ /*
+ * Return a pointer to the location where we currently store the
+ * stack pointer for the requested security state and thread mode.
+ * This pointer will become invalid if the CPU state is updated
+ * such that the stack pointers are switched around (eg changing
+ * the SPSEL control bit).
+ * Compare the v8M ARM ARM pseudocode LookUpSP_with_security_mode().
+ * Unlike that pseudocode, we require the caller to pass us in the
+ * SPSEL control bit value; this is because we also use this
+ * function in handling of pushing of the callee-saves registers
+ * part of the v8M stack frame (pseudocode PushCalleeStack()),
+ * and in the tailchain codepath the SPSEL bit comes from the exception
+ * return magic LR value from the previous exception. The pseudocode
+ * opencodes the stack-selection in PushCalleeStack(), but we prefer
+ * to make this utility function generic enough to do the job.
+ */
+ bool want_psp = threadmode && spsel;
+
+ if (secure == env->v7m.secure) {
+ if (want_psp == v7m_using_psp(env)) {
+ return &env->regs[13];
+ } else {
+ return &env->v7m.other_sp;
+ }
+ } else {
+ if (want_psp) {
+ return &env->v7m.other_ss_psp;
+ } else {
+ return &env->v7m.other_ss_msp;
+ }
+ }
+}
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
new file mode 100644
index 0000000000..3b1a9f0fc5
--- /dev/null
+++ b/target/arm/tcg/meson.build
@@ -0,0 +1,60 @@
+gen_a64 = [
+ decodetree.process('a64.decode', extra_args: ['--static-decode=disas_a64']),
+ decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
+ decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
+ decodetree.process('sme-fa64.decode', extra_args: '--static-decode=disas_sme_fa64'),
+]
+
+gen_a32 = [
+ decodetree.process('neon-shared.decode', extra_args: '--decode=disas_neon_shared'),
+ decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
+ decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
+ decodetree.process('vfp.decode', extra_args: '--decode=disas_vfp'),
+ decodetree.process('vfp-uncond.decode', extra_args: '--decode=disas_vfp_uncond'),
+ decodetree.process('m-nocp.decode', extra_args: '--decode=disas_m_nocp'),
+ decodetree.process('mve.decode', extra_args: '--decode=disas_mve'),
+ decodetree.process('a32.decode', extra_args: '--static-decode=disas_a32'),
+ decodetree.process('a32-uncond.decode', extra_args: '--static-decode=disas_a32_uncond'),
+ decodetree.process('t32.decode', extra_args: '--static-decode=disas_t32'),
+ decodetree.process('t16.decode', extra_args: ['-w', '16', '--static-decode=disas_t16']),
+]
+
+arm_ss.add(gen_a32)
+arm_ss.add(when: 'TARGET_AARCH64', if_true: gen_a64)
+
+arm_ss.add(files(
+ 'cpu32.c',
+ 'translate.c',
+ 'translate-m-nocp.c',
+ 'translate-mve.c',
+ 'translate-neon.c',
+ 'translate-vfp.c',
+ 'crypto_helper.c',
+ 'hflags.c',
+ 'iwmmxt_helper.c',
+ 'm_helper.c',
+ 'mve_helper.c',
+ 'neon_helper.c',
+ 'op_helper.c',
+ 'tlb_helper.c',
+ 'vec_helper.c',
+))
+
+arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
+ 'cpu64.c',
+ 'translate-a64.c',
+ 'translate-sve.c',
+ 'translate-sme.c',
+ 'helper-a64.c',
+ 'mte_helper.c',
+ 'pauth_helper.c',
+ 'sme_helper.c',
+ 'sve_helper.c',
+))
+
+arm_system_ss.add(files(
+ 'psci.c',
+))
+
+arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c'))
+arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c'))
diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
new file mode 100644
index 0000000000..d971b81370
--- /dev/null
+++ b/target/arm/tcg/mte_helper.c
@@ -0,0 +1,1190 @@
+/*
+ * ARM v8.5-MemTag Operations
+ *
+ * Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "cpu.h"
+#include "internals.h"
+#include "exec/exec-all.h"
+#include "exec/ram_addr.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+#include "hw/core/tcg-cpu-ops.h"
+#include "qapi/error.h"
+#include "qemu/guest-random.h"
+
+
+static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
+{
+ if (exclude == 0xffff) {
+ return 0;
+ }
+ if (offset == 0) {
+ while (exclude & (1 << tag)) {
+ tag = (tag + 1) & 15;
+ }
+ } else {
+ do {
+ do {
+ tag = (tag + 1) & 15;
+ } while (exclude & (1 << tag));
+ } while (--offset > 0);
+ }
+ return tag;
+}
+
+/**
+ * allocation_tag_mem_probe:
+ * @env: the cpu environment
+ * @ptr_mmu_idx: the addressing regime to use for the virtual address
+ * @ptr: the virtual address for which to look up tag memory
+ * @ptr_access: the access to use for the virtual address
+ * @ptr_size: the number of bytes in the normal memory access
+ * @tag_access: the access to use for the tag memory
+ * @probe: true to merely probe, never taking an exception
+ * @ra: the return address for exception handling
+ *
+ * Our tag memory is formatted as a sequence of little-endian nibbles.
+ * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
+ * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
+ * for the higher addr.
+ *
+ * Here, resolve the physical address from the virtual address, and return
+ * a pointer to the corresponding tag byte.
+ *
+ * If there is no tag storage corresponding to @ptr, return NULL.
+ *
+ * If the page is inaccessible for @ptr_access, or has a watchpoint, there are
+ * three options:
+ * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not
+ * accessible, and do not take watchpoint traps. The calling code must
+ * handle those cases in the right priority compared to MTE traps.
+ * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees
+ * that the page is going to be accessible. We will take watchpoint traps.
+ * (3) probe = false, ra != 0 : non-probe -- we will take both memory access
+ * traps and watchpoint traps.
+ * (probe = true, ra != 0 is invalid and will assert.)
+ */
+static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx,
+ uint64_t ptr, MMUAccessType ptr_access,
+ int ptr_size, MMUAccessType tag_access,
+ bool probe, uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+ uint64_t clean_ptr = useronly_clean_ptr(ptr);
+ int flags = page_get_flags(clean_ptr);
+ uint8_t *tags;
+ uintptr_t index;
+
+ assert(!(probe && ra));
+
+ if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) {
+ cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access,
+ !(flags & PAGE_VALID), ra);
+ }
+
+ /* Require both MAP_ANON and PROT_MTE for the page. */
+ if (!(flags & PAGE_ANON) || !(flags & PAGE_MTE)) {
+ return NULL;
+ }
+
+ tags = page_get_target_data(clean_ptr);
+
+ index = extract32(ptr, LOG2_TAG_GRANULE + 1,
+ TARGET_PAGE_BITS - LOG2_TAG_GRANULE - 1);
+ return tags + index;
+#else
+ CPUTLBEntryFull *full;
+ MemTxAttrs attrs;
+ int in_page, flags;
+ hwaddr ptr_paddr, tag_paddr, xlat;
+ MemoryRegion *mr;
+ ARMASIdx tag_asi;
+ AddressSpace *tag_as;
+ void *host;
+
+ /*
+ * Probe the first byte of the virtual address. This raises an
+ * exception for inaccessible pages, and resolves the virtual address
+ * into the softmmu tlb.
+ *
+ * When RA == 0, this is either a pure probe or a no-fault-expected probe.
+ * Indicate to probe_access_flags no-fault, then either return NULL
+ * for the pure probe, or assert that we received a valid page for the
+ * no-fault-expected probe.
+ */
+ flags = probe_access_full(env, ptr, 0, ptr_access, ptr_mmu_idx,
+ ra == 0, &host, &full, ra);
+ if (probe && (flags & TLB_INVALID_MASK)) {
+ return NULL;
+ }
+ assert(!(flags & TLB_INVALID_MASK));
+
+ /* If the virtual page MemAttr != Tagged, access unchecked. */
+ if (full->extra.arm.pte_attrs != 0xf0) {
+ return NULL;
+ }
+
+ /*
+ * If not backed by host ram, there is no tag storage: access unchecked.
+ * This is probably a guest os bug though, so log it.
+ */
+ if (unlikely(flags & TLB_MMIO)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Page @ 0x%" PRIx64 " indicates Tagged Normal memory "
+ "but is not backed by host ram\n", ptr);
+ return NULL;
+ }
+
+ /*
+ * Remember these values across the second lookup below,
+ * which may invalidate this pointer via tlb resize.
+ */
+ ptr_paddr = full->phys_addr | (ptr & ~TARGET_PAGE_MASK);
+ attrs = full->attrs;
+ full = NULL;
+
+ /*
+ * The Normal memory access can extend to the next page. E.g. a single
+ * 8-byte access to the last byte of a page will check only the last
+ * tag on the first page.
+ * Any page access exception has priority over tag check exception.
+ */
+ in_page = -(ptr | TARGET_PAGE_MASK);
+ if (unlikely(ptr_size > in_page)) {
+ flags |= probe_access_full(env, ptr + in_page, 0, ptr_access,
+ ptr_mmu_idx, ra == 0, &host, &full, ra);
+ assert(!(flags & TLB_INVALID_MASK));
+ }
+
+ /* Any debug exception has priority over a tag check exception. */
+ if (!probe && unlikely(flags & TLB_WATCHPOINT)) {
+ int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
+ assert(ra != 0);
+ cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, attrs, wp, ra);
+ }
+
+ /* Convert to the physical address in tag space. */
+ tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
+
+ /* Look up the address in tag space. */
+ tag_asi = attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
+ tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
+ mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
+ tag_access == MMU_DATA_STORE, attrs);
+
+ /*
+ * Note that @mr will never be NULL. If there is nothing in the address
+ * space at @tag_paddr, the translation will return the unallocated memory
+ * region. For our purposes, the result must be ram.
+ */
+ if (unlikely(!memory_region_is_ram(mr))) {
+ /* ??? Failure is a board configuration error. */
+ qemu_log_mask(LOG_UNIMP,
+ "Tag Memory @ 0x%" HWADDR_PRIx " not found for "
+ "Normal Memory @ 0x%" HWADDR_PRIx "\n",
+ tag_paddr, ptr_paddr);
+ return NULL;
+ }
+
+ /*
+ * Ensure the tag memory is dirty on write, for migration.
+ * Tag memory can never contain code or display memory (vga).
+ */
+ if (tag_access == MMU_DATA_STORE) {
+ ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat;
+ cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION);
+ }
+
+ return memory_region_get_ram_ptr(mr) + xlat;
+#endif
+}
+
+static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
+ uint64_t ptr, MMUAccessType ptr_access,
+ int ptr_size, MMUAccessType tag_access,
+ uintptr_t ra)
+{
+ return allocation_tag_mem_probe(env, ptr_mmu_idx, ptr, ptr_access,
+ ptr_size, tag_access, false, ra);
+}
+
+uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
+{
+ uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16);
+ int rrnd = extract32(env->cp15.gcr_el1, 16, 1);
+ int start = extract32(env->cp15.rgsr_el1, 0, 4);
+ int seed = extract32(env->cp15.rgsr_el1, 8, 16);
+ int offset, i, rtag;
+
+ /*
+ * Our IMPDEF choice for GCR_EL1.RRND==1 is to continue to use the
+ * deterministic algorithm. Except that with RRND==1 the kernel is
+ * not required to have set RGSR_EL1.SEED != 0, which is required for
+ * the deterministic algorithm to function. So we force a non-zero
+ * SEED for that case.
+ */
+ if (unlikely(seed == 0) && rrnd) {
+ do {
+ Error *err = NULL;
+ uint16_t two;
+
+ if (qemu_guest_getrandom(&two, sizeof(two), &err) < 0) {
+ /*
+ * Failed, for unknown reasons in the crypto subsystem.
+ * Best we can do is log the reason and use a constant seed.
+ */
+ qemu_log_mask(LOG_UNIMP, "IRG: Crypto failure: %s\n",
+ error_get_pretty(err));
+ error_free(err);
+ two = 1;
+ }
+ seed = two;
+ } while (seed == 0);
+ }
+
+ /* RandomTag */
+ for (i = offset = 0; i < 4; ++i) {
+ /* NextRandomTagBit */
+ int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^
+ extract32(seed, 2, 1) ^ extract32(seed, 0, 1));
+ seed = (top << 15) | (seed >> 1);
+ offset |= top << i;
+ }
+ rtag = choose_nonexcluded_tag(start, offset, exclude);
+ env->cp15.rgsr_el1 = rtag | (seed << 8);
+
+ return address_with_allocation_tag(rn, rtag);
+}
+
+uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr,
+ int32_t offset, uint32_t tag_offset)
+{
+ int start_tag = allocation_tag_from_addr(ptr);
+ uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16);
+ int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude);
+
+ return address_with_allocation_tag(ptr + offset, rtag);
+}
+
+static int load_tag1(uint64_t ptr, uint8_t *mem)
+{
+ int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+ return extract32(*mem, ofs, 4);
+}
+
+uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ int mmu_idx = arm_env_mmu_index(env);
+ uint8_t *mem;
+ int rtag = 0;
+
+ /* Trap if accessing an invalid page. */
+ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1,
+ MMU_DATA_LOAD, GETPC());
+
+ /* Load if page supports tags. */
+ if (mem) {
+ rtag = load_tag1(ptr, mem);
+ }
+
+ return address_with_allocation_tag(xt, rtag);
+}
+
+static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra)
+{
+ if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) {
+ arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
+ arm_env_mmu_index(env), ra);
+ g_assert_not_reached();
+ }
+}
+
+/* For use in a non-parallel context, store to the given nibble. */
+static void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
+{
+ int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+ *mem = deposit32(*mem, ofs, 4, tag);
+}
+
+/* For use in a parallel context, atomically store to the given nibble. */
+static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag)
+{
+ int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+ uint8_t old = qatomic_read(mem);
+
+ while (1) {
+ uint8_t new = deposit32(old, ofs, 4, tag);
+ uint8_t cmp = qatomic_cmpxchg(mem, old, new);
+ if (likely(cmp == old)) {
+ return;
+ }
+ old = cmp;
+ }
+}
+
+typedef void stg_store1(uint64_t, uint8_t *, int);
+
+static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt,
+ uintptr_t ra, stg_store1 store1)
+{
+ int mmu_idx = arm_env_mmu_index(env);
+ uint8_t *mem;
+
+ check_tag_aligned(env, ptr, ra);
+
+ /* Trap if accessing an invalid page. */
+ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE,
+ MMU_DATA_STORE, ra);
+
+ /* Store if page supports tags. */
+ if (mem) {
+ store1(ptr, mem, allocation_tag_from_addr(xt));
+ }
+}
+
+void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ do_stg(env, ptr, xt, GETPC(), store_tag1);
+}
+
+void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ do_stg(env, ptr, xt, GETPC(), store_tag1_parallel);
+}
+
+void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr)
+{
+ int mmu_idx = arm_env_mmu_index(env);
+ uintptr_t ra = GETPC();
+
+ check_tag_aligned(env, ptr, ra);
+ probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra);
+}
+
+static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt,
+ uintptr_t ra, stg_store1 store1)
+{
+ int mmu_idx = arm_env_mmu_index(env);
+ int tag = allocation_tag_from_addr(xt);
+ uint8_t *mem1, *mem2;
+
+ check_tag_aligned(env, ptr, ra);
+
+ /*
+ * Trap if accessing an invalid page(s).
+ * This takes priority over !allocation_tag_access_enabled.
+ */
+ if (ptr & TAG_GRANULE) {
+ /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+ TAG_GRANULE, MMU_DATA_STORE, ra);
+ mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE,
+ MMU_DATA_STORE, TAG_GRANULE,
+ MMU_DATA_STORE, ra);
+
+ /* Store if page(s) support tags. */
+ if (mem1) {
+ store1(TAG_GRANULE, mem1, tag);
+ }
+ if (mem2) {
+ store1(0, mem2, tag);
+ }
+ } else {
+ /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+ 2 * TAG_GRANULE, MMU_DATA_STORE, ra);
+ if (mem1) {
+ tag |= tag << 4;
+ qatomic_set(mem1, tag);
+ }
+ }
+}
+
+void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ do_st2g(env, ptr, xt, GETPC(), store_tag1);
+}
+
+void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel);
+}
+
+void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr)
+{
+ int mmu_idx = arm_env_mmu_index(env);
+ uintptr_t ra = GETPC();
+ int in_page = -(ptr | TARGET_PAGE_MASK);
+
+ check_tag_aligned(env, ptr, ra);
+
+ if (likely(in_page >= 2 * TAG_GRANULE)) {
+ probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra);
+ } else {
+ probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra);
+ probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra);
+ }
+}
+
+uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
+{
+ int mmu_idx = arm_env_mmu_index(env);
+ uintptr_t ra = GETPC();
+ int gm_bs = env_archcpu(env)->gm_blocksize;
+ int gm_bs_bytes = 4 << gm_bs;
+ void *tag_mem;
+ uint64_t ret;
+ int shift;
+
+ ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes);
+
+ /* Trap if accessing an invalid page. */
+ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD,
+ gm_bs_bytes, MMU_DATA_LOAD, ra);
+
+ /* The tag is squashed to zero if the page does not support tags. */
+ if (!tag_mem) {
+ return 0;
+ }
+
+ /*
+ * The ordering of elements within the word corresponds to
+ * a little-endian operation. Computation of shift comes from
+ *
+ * index = address<LOG2_TAG_GRANULE+3:LOG2_TAG_GRANULE>
+ * data<index*4+3:index*4> = tag
+ *
+ * Because of the alignment of ptr above, BS=6 has shift=0.
+ * All memory operations are aligned. Defer support for BS=2,
+ * requiring insertion or extraction of a nibble, until we
+ * support a cpu that requires it.
+ */
+ switch (gm_bs) {
+ case 3:
+ /* 32 bytes -> 2 tags -> 8 result bits */
+ ret = *(uint8_t *)tag_mem;
+ break;
+ case 4:
+ /* 64 bytes -> 4 tags -> 16 result bits */
+ ret = cpu_to_le16(*(uint16_t *)tag_mem);
+ break;
+ case 5:
+ /* 128 bytes -> 8 tags -> 32 result bits */
+ ret = cpu_to_le32(*(uint32_t *)tag_mem);
+ break;
+ case 6:
+ /* 256 bytes -> 16 tags -> 64 result bits */
+ return cpu_to_le64(*(uint64_t *)tag_mem);
+ default:
+ /*
+ * CPU configured with unsupported/invalid gm blocksize.
+ * This is detected early in arm_cpu_realizefn.
+ */
+ g_assert_not_reached();
+ }
+ shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4;
+ return ret << shift;
+}
+
+void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+ int mmu_idx = arm_env_mmu_index(env);
+ uintptr_t ra = GETPC();
+ int gm_bs = env_archcpu(env)->gm_blocksize;
+ int gm_bs_bytes = 4 << gm_bs;
+ void *tag_mem;
+ int shift;
+
+ ptr = QEMU_ALIGN_DOWN(ptr, gm_bs_bytes);
+
+ /* Trap if accessing an invalid page. */
+ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+ gm_bs_bytes, MMU_DATA_LOAD, ra);
+
+ /*
+ * Tag store only happens if the page support tags,
+ * and if the OS has enabled access to the tags.
+ */
+ if (!tag_mem) {
+ return;
+ }
+
+ /* See LDGM for comments on BS and on shift. */
+ shift = extract64(ptr, LOG2_TAG_GRANULE, 4) * 4;
+ val >>= shift;
+ switch (gm_bs) {
+ case 3:
+ /* 32 bytes -> 2 tags -> 8 result bits */
+ *(uint8_t *)tag_mem = val;
+ break;
+ case 4:
+ /* 64 bytes -> 4 tags -> 16 result bits */
+ *(uint16_t *)tag_mem = cpu_to_le16(val);
+ break;
+ case 5:
+ /* 128 bytes -> 8 tags -> 32 result bits */
+ *(uint32_t *)tag_mem = cpu_to_le32(val);
+ break;
+ case 6:
+ /* 256 bytes -> 16 tags -> 64 result bits */
+ *(uint64_t *)tag_mem = cpu_to_le64(val);
+ break;
+ default:
+ /* cpu configured with unsupported gm blocksize. */
+ g_assert_not_reached();
+ }
+}
+
+void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+ uintptr_t ra = GETPC();
+ int mmu_idx = arm_env_mmu_index(env);
+ int log2_dcz_bytes, log2_tag_bytes;
+ intptr_t dcz_bytes, tag_bytes;
+ uint8_t *mem;
+
+ /*
+ * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1,
+ * i.e. 32 bytes, which is an unreasonably small dcz anyway,
+ * to make sure that we can access one complete tag byte here.
+ */
+ log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
+ log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
+ dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
+ tag_bytes = (intptr_t)1 << log2_tag_bytes;
+ ptr &= -dcz_bytes;
+
+ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes,
+ MMU_DATA_STORE, ra);
+ if (mem) {
+ int tag_pair = (val & 0xf) * 0x11;
+ memset(mem, tag_pair, tag_bytes);
+ }
+}
+
+static void mte_sync_check_fail(CPUARMState *env, uint32_t desc,
+ uint64_t dirty_ptr, uintptr_t ra)
+{
+ int is_write, syn;
+
+ env->exception.vaddress = dirty_ptr;
+
+ is_write = FIELD_EX32(desc, MTEDESC, WRITE);
+ syn = syn_data_abort_no_iss(arm_current_el(env) != 0, 0, 0, 0, 0, is_write,
+ 0x11);
+ raise_exception_ra(env, EXCP_DATA_ABORT, syn, exception_target_el(env), ra);
+ g_assert_not_reached();
+}
+
+static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr,
+ uintptr_t ra, ARMMMUIdx arm_mmu_idx, int el)
+{
+ int select;
+
+ if (regime_has_2_ranges(arm_mmu_idx)) {
+ select = extract64(dirty_ptr, 55, 1);
+ } else {
+ select = 0;
+ }
+ env->cp15.tfsr_el[el] |= 1 << select;
+#ifdef CONFIG_USER_ONLY
+ /*
+ * Stand in for a timer irq, setting _TIF_MTE_ASYNC_FAULT,
+ * which then sends a SIGSEGV when the thread is next scheduled.
+ * This cpu will return to the main loop at the end of the TB,
+ * which is rather sooner than "normal". But the alternative
+ * is waiting until the next syscall.
+ */
+ qemu_cpu_kick(env_cpu(env));
+#endif
+}
+
+/* Record a tag check failure. */
+void mte_check_fail(CPUARMState *env, uint32_t desc,
+ uint64_t dirty_ptr, uintptr_t ra)
+{
+ int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
+ int el, reg_el, tcf;
+ uint64_t sctlr;
+
+ reg_el = regime_el(env, arm_mmu_idx);
+ sctlr = env->cp15.sctlr_el[reg_el];
+
+ switch (arm_mmu_idx) {
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E20_0:
+ el = 0;
+ tcf = extract64(sctlr, 38, 2);
+ break;
+ default:
+ el = reg_el;
+ tcf = extract64(sctlr, 40, 2);
+ }
+
+ switch (tcf) {
+ case 1:
+ /* Tag check fail causes a synchronous exception. */
+ mte_sync_check_fail(env, desc, dirty_ptr, ra);
+ break;
+
+ case 0:
+ /*
+ * Tag check fail does not affect the PE.
+ * We eliminate this case by not setting MTE_ACTIVE
+ * in tb_flags, so that we never make this runtime call.
+ */
+ g_assert_not_reached();
+
+ case 2:
+ /* Tag check fail causes asynchronous flag set. */
+ mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
+ break;
+
+ case 3:
+ /*
+ * Tag check fail causes asynchronous flag set for stores, or
+ * a synchronous exception for loads.
+ */
+ if (FIELD_EX32(desc, MTEDESC, WRITE)) {
+ mte_async_check_fail(env, dirty_ptr, ra, arm_mmu_idx, el);
+ } else {
+ mte_sync_check_fail(env, desc, dirty_ptr, ra);
+ }
+ break;
+ }
+}
+
+/**
+ * checkN:
+ * @tag: tag memory to test
+ * @odd: true to begin testing at tags at odd nibble
+ * @cmp: the tag to compare against
+ * @count: number of tags to test
+ *
+ * Return the number of successful tests.
+ * Thus a return value < @count indicates a failure.
+ *
+ * A note about sizes: count is expected to be small.
+ *
+ * The most common use will be LDP/STP of two integer registers,
+ * which means 16 bytes of memory touching at most 2 tags, but
+ * often the access is aligned and thus just 1 tag.
+ *
+ * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory,
+ * touching at most 5 tags. SVE LDR/STR (vector) with the default
+ * vector length is also 64 bytes; the maximum architectural length
+ * is 256 bytes touching at most 9 tags.
+ *
+ * The loop below uses 7 logical operations and 1 memory operation
+ * per tag pair. An implementation that loads an aligned word and
+ * uses masking to ignore adjacent tags requires 18 logical operations
+ * and thus does not begin to pay off until 6 tags.
+ * Which, according to the survey above, is unlikely to be common.
+ */
+static int checkN(uint8_t *mem, int odd, int cmp, int count)
+{
+ int n = 0, diff;
+
+ /* Replicate the test tag and compare. */
+ cmp *= 0x11;
+ diff = *mem++ ^ cmp;
+
+ if (odd) {
+ goto start_odd;
+ }
+
+ while (1) {
+ /* Test even tag. */
+ if (unlikely((diff) & 0x0f)) {
+ break;
+ }
+ if (++n == count) {
+ break;
+ }
+
+ start_odd:
+ /* Test odd tag. */
+ if (unlikely((diff) & 0xf0)) {
+ break;
+ }
+ if (++n == count) {
+ break;
+ }
+
+ diff = *mem++ ^ cmp;
+ }
+ return n;
+}
+
+/**
+ * checkNrev:
+ * @tag: tag memory to test
+ * @odd: true to begin testing at tags at odd nibble
+ * @cmp: the tag to compare against
+ * @count: number of tags to test
+ *
+ * Return the number of successful tests.
+ * Thus a return value < @count indicates a failure.
+ *
+ * This is like checkN, but it runs backwards, checking the
+ * tags starting with @tag and then the tags preceding it.
+ * This is needed by the backwards-memory-copying operations.
+ */
+static int checkNrev(uint8_t *mem, int odd, int cmp, int count)
+{
+ int n = 0, diff;
+
+ /* Replicate the test tag and compare. */
+ cmp *= 0x11;
+ diff = *mem-- ^ cmp;
+
+ if (!odd) {
+ goto start_even;
+ }
+
+ while (1) {
+ /* Test odd tag. */
+ if (unlikely((diff) & 0xf0)) {
+ break;
+ }
+ if (++n == count) {
+ break;
+ }
+
+ start_even:
+ /* Test even tag. */
+ if (unlikely((diff) & 0x0f)) {
+ break;
+ }
+ if (++n == count) {
+ break;
+ }
+
+ diff = *mem-- ^ cmp;
+ }
+ return n;
+}
+
+/**
+ * mte_probe_int() - helper for mte_probe and mte_check
+ * @env: CPU environment
+ * @desc: MTEDESC descriptor
+ * @ptr: virtual address of the base of the access
+ * @fault: return virtual address of the first check failure
+ *
+ * Internal routine for both mte_probe and mte_check.
+ * Return zero on failure, filling in *fault.
+ * Return negative on trivial success for tbi disabled.
+ * Return positive on success with tbi enabled.
+ */
+static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
+ uintptr_t ra, uint64_t *fault)
+{
+ int mmu_idx, ptr_tag, bit55;
+ uint64_t ptr_last, prev_page, next_page;
+ uint64_t tag_first, tag_last;
+ uint32_t sizem1, tag_count, n, c;
+ uint8_t *mem1, *mem2;
+ MMUAccessType type;
+
+ bit55 = extract64(ptr, 55, 1);
+ *fault = ptr;
+
+ /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+ if (unlikely(!tbi_check(desc, bit55))) {
+ return -1;
+ }
+
+ ptr_tag = allocation_tag_from_addr(ptr);
+
+ if (tcma_check(desc, bit55, ptr_tag)) {
+ return 1;
+ }
+
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
+ sizem1 = FIELD_EX32(desc, MTEDESC, SIZEM1);
+
+ /* Find the addr of the end of the access */
+ ptr_last = ptr + sizem1;
+
+ /* Round the bounds to the tag granule, and compute the number of tags. */
+ tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
+ tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE);
+ tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
+
+ /* Locate the page boundaries. */
+ prev_page = ptr & TARGET_PAGE_MASK;
+ next_page = prev_page + TARGET_PAGE_SIZE;
+
+ if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) {
+ /* Memory access stays on one page. */
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1,
+ MMU_DATA_LOAD, ra);
+ if (!mem1) {
+ return 1;
+ }
+ /* Perform all of the comparisons. */
+ n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count);
+ } else {
+ /* Memory access crosses to next page. */
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr,
+ MMU_DATA_LOAD, ra);
+
+ mem2 = allocation_tag_mem(env, mmu_idx, next_page, type,
+ ptr_last - next_page + 1,
+ MMU_DATA_LOAD, ra);
+
+ /*
+ * Perform all of the comparisons.
+ * Note the possible but unlikely case of the operation spanning
+ * two pages that do not both have tagging enabled.
+ */
+ n = c = (next_page - tag_first) / TAG_GRANULE;
+ if (mem1) {
+ n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c);
+ }
+ if (n == c) {
+ if (!mem2) {
+ return 1;
+ }
+ n += checkN(mem2, 0, ptr_tag, tag_count - c);
+ }
+ }
+
+ if (likely(n == tag_count)) {
+ return 1;
+ }
+
+ /*
+ * If we failed, we know which granule. For the first granule, the
+ * failure address is @ptr, the first byte accessed. Otherwise the
+ * failure address is the first byte of the nth granule.
+ */
+ if (n > 0) {
+ *fault = tag_first + n * TAG_GRANULE;
+ }
+ return 0;
+}
+
+uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra)
+{
+ uint64_t fault;
+ int ret = mte_probe_int(env, desc, ptr, ra, &fault);
+
+ if (unlikely(ret == 0)) {
+ mte_check_fail(env, desc, fault, ra);
+ } else if (ret < 0) {
+ return ptr;
+ }
+ return useronly_clean_ptr(ptr);
+}
+
+uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+ /*
+ * R_XCHFJ: Alignment check not caused by memory type is priority 1,
+ * higher than any translation fault. When MTE is disabled, tcg
+ * performs the alignment check during the code generated for the
+ * memory access. With MTE enabled, we must check this here before
+ * raising any translation fault in allocation_tag_mem.
+ */
+ unsigned align = FIELD_EX32(desc, MTEDESC, ALIGN);
+ if (unlikely(align)) {
+ align = (1u << align) - 1;
+ if (unlikely(ptr & align)) {
+ int idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ bool w = FIELD_EX32(desc, MTEDESC, WRITE);
+ MMUAccessType type = w ? MMU_DATA_STORE : MMU_DATA_LOAD;
+ arm_cpu_do_unaligned_access(env_cpu(env), ptr, type, idx, GETPC());
+ }
+ }
+
+ return mte_check(env, desc, ptr, GETPC());
+}
+
+/*
+ * No-fault version of mte_check, to be used by SVE for MemSingleNF.
+ * Returns false if the access is Checked and the check failed. This
+ * is only intended to probe the tag -- the validity of the page must
+ * be checked beforehand.
+ */
+bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+ uint64_t fault;
+ int ret = mte_probe_int(env, desc, ptr, 0, &fault);
+
+ return ret != 0;
+}
+
+/*
+ * Perform an MTE checked access for DC_ZVA.
+ */
+uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+ uintptr_t ra = GETPC();
+ int log2_dcz_bytes, log2_tag_bytes;
+ int mmu_idx, bit55;
+ intptr_t dcz_bytes, tag_bytes, i;
+ void *mem;
+ uint64_t ptr_tag, mem_tag, align_ptr;
+
+ bit55 = extract64(ptr, 55, 1);
+
+ /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+ if (unlikely(!tbi_check(desc, bit55))) {
+ return ptr;
+ }
+
+ ptr_tag = allocation_tag_from_addr(ptr);
+
+ if (tcma_check(desc, bit55, ptr_tag)) {
+ goto done;
+ }
+
+ /*
+ * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1,
+ * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make
+ * sure that we can access one complete tag byte here.
+ */
+ log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
+ log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
+ dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
+ tag_bytes = (intptr_t)1 << log2_tag_bytes;
+ align_ptr = ptr & -dcz_bytes;
+
+ /*
+ * Trap if accessing an invalid page. DC_ZVA requires that we supply
+ * the original pointer for an invalid page. But watchpoints require
+ * that we probe the actual space. So do both.
+ */
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ (void) probe_write(env, ptr, 1, mmu_idx, ra);
+ mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE,
+ dcz_bytes, MMU_DATA_LOAD, ra);
+ if (!mem) {
+ goto done;
+ }
+
+ /*
+ * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus
+ * it is quite easy to perform all of the comparisons at once without
+ * any extra masking.
+ *
+ * The most common zva block size is 64; some of the thunderx cpus use
+ * a block size of 128. For user-only, aarch64_max_initfn will set the
+ * block size to 512. Fill out the other cases for future-proofing.
+ *
+ * In order to be able to find the first miscompare later, we want the
+ * tag bytes to be in little-endian order.
+ */
+ switch (log2_tag_bytes) {
+ case 0: /* zva_blocksize 32 */
+ mem_tag = *(uint8_t *)mem;
+ ptr_tag *= 0x11u;
+ break;
+ case 1: /* zva_blocksize 64 */
+ mem_tag = cpu_to_le16(*(uint16_t *)mem);
+ ptr_tag *= 0x1111u;
+ break;
+ case 2: /* zva_blocksize 128 */
+ mem_tag = cpu_to_le32(*(uint32_t *)mem);
+ ptr_tag *= 0x11111111u;
+ break;
+ case 3: /* zva_blocksize 256 */
+ mem_tag = cpu_to_le64(*(uint64_t *)mem);
+ ptr_tag *= 0x1111111111111111ull;
+ break;
+
+ default: /* zva_blocksize 512, 1024, 2048 */
+ ptr_tag *= 0x1111111111111111ull;
+ i = 0;
+ do {
+ mem_tag = cpu_to_le64(*(uint64_t *)(mem + i));
+ if (unlikely(mem_tag != ptr_tag)) {
+ goto fail;
+ }
+ i += 8;
+ align_ptr += 16 * TAG_GRANULE;
+ } while (i < tag_bytes);
+ goto done;
+ }
+
+ if (likely(mem_tag == ptr_tag)) {
+ goto done;
+ }
+
+ fail:
+ /* Locate the first nibble that differs. */
+ i = ctz64(mem_tag ^ ptr_tag) >> 4;
+ mte_check_fail(env, desc, align_ptr + i * TAG_GRANULE, ra);
+
+ done:
+ return useronly_clean_ptr(ptr);
+}
+
+uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size,
+ uint32_t desc)
+{
+ int mmu_idx, tag_count;
+ uint64_t ptr_tag, tag_first, tag_last;
+ void *mem;
+ bool w = FIELD_EX32(desc, MTEDESC, WRITE);
+ uint32_t n;
+
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ /* True probe; this will never fault */
+ mem = allocation_tag_mem_probe(env, mmu_idx, ptr,
+ w ? MMU_DATA_STORE : MMU_DATA_LOAD,
+ size, MMU_DATA_LOAD, true, 0);
+ if (!mem) {
+ return size;
+ }
+
+ /*
+ * TODO: checkN() is not designed for checks of the size we expect
+ * for FEAT_MOPS operations, so we should implement this differently.
+ * Maybe we should do something like
+ * if (region start and size are aligned nicely) {
+ * do direct loads of 64 tag bits at a time;
+ * } else {
+ * call checkN()
+ * }
+ */
+ /* Round the bounds to the tag granule, and compute the number of tags. */
+ ptr_tag = allocation_tag_from_addr(ptr);
+ tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
+ tag_last = QEMU_ALIGN_DOWN(ptr + size - 1, TAG_GRANULE);
+ tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
+ n = checkN(mem, ptr & TAG_GRANULE, ptr_tag, tag_count);
+ if (likely(n == tag_count)) {
+ return size;
+ }
+
+ /*
+ * Failure; for the first granule, it's at @ptr. Otherwise
+ * it's at the first byte of the nth granule. Calculate how
+ * many bytes we can access without hitting that failure.
+ */
+ if (n == 0) {
+ return 0;
+ } else {
+ return n * TAG_GRANULE - (ptr - tag_first);
+ }
+}
+
+uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size,
+ uint32_t desc)
+{
+ int mmu_idx, tag_count;
+ uint64_t ptr_tag, tag_first, tag_last;
+ void *mem;
+ bool w = FIELD_EX32(desc, MTEDESC, WRITE);
+ uint32_t n;
+
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ /*
+ * True probe; this will never fault. Note that our caller passes
+ * us a pointer to the end of the region, but allocation_tag_mem_probe()
+ * wants a pointer to the start. Because we know we don't span a page
+ * boundary and that allocation_tag_mem_probe() doesn't otherwise care
+ * about the size, pass in a size of 1 byte. This is simpler than
+ * adjusting the ptr to point to the start of the region and then having
+ * to adjust the returned 'mem' to get the end of the tag memory.
+ */
+ mem = allocation_tag_mem_probe(env, mmu_idx, ptr,
+ w ? MMU_DATA_STORE : MMU_DATA_LOAD,
+ 1, MMU_DATA_LOAD, true, 0);
+ if (!mem) {
+ return size;
+ }
+
+ /*
+ * TODO: checkNrev() is not designed for checks of the size we expect
+ * for FEAT_MOPS operations, so we should implement this differently.
+ * Maybe we should do something like
+ * if (region start and size are aligned nicely) {
+ * do direct loads of 64 tag bits at a time;
+ * } else {
+ * call checkN()
+ * }
+ */
+ /* Round the bounds to the tag granule, and compute the number of tags. */
+ ptr_tag = allocation_tag_from_addr(ptr);
+ tag_first = QEMU_ALIGN_DOWN(ptr - (size - 1), TAG_GRANULE);
+ tag_last = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
+ tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1;
+ n = checkNrev(mem, ptr & TAG_GRANULE, ptr_tag, tag_count);
+ if (likely(n == tag_count)) {
+ return size;
+ }
+
+ /*
+ * Failure; for the first granule, it's at @ptr. Otherwise
+ * it's at the last byte of the nth granule. Calculate how
+ * many bytes we can access without hitting that failure.
+ */
+ if (n == 0) {
+ return 0;
+ } else {
+ return (n - 1) * TAG_GRANULE + ((ptr + 1) - tag_last);
+ }
+}
+
+void mte_mops_set_tags(CPUARMState *env, uint64_t ptr, uint64_t size,
+ uint32_t desc)
+{
+ int mmu_idx, tag_count;
+ uint64_t ptr_tag;
+ void *mem;
+
+ if (!desc) {
+ /* Tags not actually enabled */
+ return;
+ }
+
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ /* True probe: this will never fault */
+ mem = allocation_tag_mem_probe(env, mmu_idx, ptr, MMU_DATA_STORE, size,
+ MMU_DATA_STORE, true, 0);
+ if (!mem) {
+ return;
+ }
+
+ /*
+ * We know that ptr and size are both TAG_GRANULE aligned; store
+ * the tag from the pointer value into the tag memory.
+ */
+ ptr_tag = allocation_tag_from_addr(ptr);
+ tag_count = size / TAG_GRANULE;
+ if (ptr & TAG_GRANULE) {
+ /* Not 2*TAG_GRANULE-aligned: store tag to first nibble */
+ store_tag1_parallel(TAG_GRANULE, mem, ptr_tag);
+ mem++;
+ tag_count--;
+ }
+ memset(mem, ptr_tag | (ptr_tag << 4), tag_count / 2);
+ if (tag_count & 1) {
+ /* Final trailing unaligned nibble */
+ mem += tag_count / 2;
+ store_tag1_parallel(0, mem, ptr_tag);
+ }
+}
diff --git a/target/arm/tcg/mve.decode b/target/arm/tcg/mve.decode
new file mode 100644
index 0000000000..14a4f39802
--- /dev/null
+++ b/target/arm/tcg/mve.decode
@@ -0,0 +1,832 @@
+# M-profile MVE instruction descriptions
+#
+# Copyright (c) 2021 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+%qd 22:1 13:3
+%qm 5:1 1:3
+%qn 7:1 17:3
+
+# VQDMULL has size in bit 28: 0 for 16 bit, 1 for 32 bit
+%size_28 28:1 !function=plus_1
+
+# 2 operand fp insns have size in bit 20: 1 for 16 bit, 0 for 32 bit,
+# like Neon FP insns.
+%2op_fp_size 20:1 !function=neon_3same_fp_size
+# VCADD is an exception, where bit 20 is 0 for 16 bit and 1 for 32 bit
+%2op_fp_size_rev 20:1 !function=plus_1
+# FP scalars have size in bit 28, 1 for 16 bit, 0 for 32 bit
+%2op_fp_scalar_size 28:1 !function=neon_3same_fp_size
+
+# 1imm format immediate
+%imm_28_16_0 28:1 16:3 0:4
+
+&vldr_vstr rn qd imm p a w size l u
+&1op qd qm size
+&2op qd qm qn size
+&2scalar qd qn rm size
+&1imm qd imm cmode op
+&2shift qd qm shift size
+&vidup qd rn size imm
+&viwdup qd rn rm size imm
+&vcmp qm qn size mask
+&vcmp_scalar qn rm size mask
+&shl_scalar qda rm size
+&vmaxv qm rda size
+&vabav qn qm rda size
+&vldst_sg qd qm rn size msize os
+&vldst_sg_imm qd qm a w imm
+&vldst_il qd rn size pat w
+
+# scatter-gather memory size is in bits 6:4
+%sg_msize 6:1 4:1
+
+@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
+# Note that both Rn and Qd are 3 bits only (no D bit)
+@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 &vldr_vstr
+
+@vldst_sg .... .... .... rn:4 .... ... size:2 ... ... os:1 &vldst_sg \
+ qd=%qd qm=%qm msize=%sg_msize
+
+# Qm is in the fields usually labeled Qn
+@vldst_sg_imm .... .... a:1 . w:1 . .... .... .... . imm:7 &vldst_sg_imm \
+ qd=%qd qm=%qn
+
+# Deinterleaving load/interleaving store
+@vldst_il .... .... .. w:1 . rn:4 .... ... size:2 pat:2 ..... &vldst_il \
+ qd=%qd
+
+@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
+@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0
+@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
+@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
+@2op_sz28 .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn \
+ size=%size_28
+@1imm .... .... .... .... .... cmode:4 .. op:1 . .... &1imm qd=%qd imm=%imm_28_16_0
+
+# The _rev suffix indicates that Vn and Vm are reversed. This is
+# the case for shifts. In the Arm ARM these insns are documented
+# with the Vm and Vn fields in their usual places, but in the
+# assembly the operands are listed "backwards", ie in the order
+# Qd, Qm, Qn where other insns use Qd, Qn, Qm. For QEMU we choose
+# to consider Vm and Vn as being in different fields in the insn.
+# This gives us consistency with A64 and Neon.
+@2op_rev .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qn qn=%qm
+
+@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
+@2scalar_nosz .... .... .... .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
+
+@2_shl_b .... .... .. 001 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
+@2_shl_h .... .... .. 01 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
+@2_shl_w .... .... .. 1 shift:5 .... .... .... .... &2shift qd=%qd qm=%qm size=2
+
+@2_shll_b .... .... ... 01 shift:3 .... .... .... .... &2shift qd=%qd qm=%qm size=0
+@2_shll_h .... .... ... 1 shift:4 .... .... .... .... &2shift qd=%qd qm=%qm size=1
+# VSHLL encoding T2 where shift == esize
+@2_shll_esize_b .... .... .... 00 .. .... .... .... .... &2shift \
+ qd=%qd qm=%qm size=0 shift=8
+@2_shll_esize_h .... .... .... 01 .. .... .... .... .... &2shift \
+ qd=%qd qm=%qm size=1 shift=16
+
+# Right shifts are encoded as N - shift, where N is the element size in bits.
+%rshift_i5 16:5 !function=rsub_32
+%rshift_i4 16:4 !function=rsub_16
+%rshift_i3 16:3 !function=rsub_8
+
+@2_shr_b .... .... .. 001 ... .... .... .... .... &2shift qd=%qd qm=%qm \
+ size=0 shift=%rshift_i3
+@2_shr_h .... .... .. 01 .... .... .... .... .... &2shift qd=%qd qm=%qm \
+ size=1 shift=%rshift_i4
+@2_shr_w .... .... .. 1 ..... .... .... .... .... &2shift qd=%qd qm=%qm \
+ size=2 shift=%rshift_i5
+
+@shl_scalar .... .... .... size:2 .. .... .... .... rm:4 &shl_scalar qda=%qd
+
+# Vector comparison; 4-bit Qm but 3-bit Qn
+%mask_22_13 22:1 13:3
+@vcmp .... .... .. size:2 qn:3 . .... .... .... .... &vcmp qm=%qm mask=%mask_22_13
+@vcmp_scalar .... .... .. size:2 qn:3 . .... .... .... rm:4 &vcmp_scalar \
+ mask=%mask_22_13
+
+@vcmp_fp .... .... .... qn:3 . .... .... .... .... &vcmp \
+ qm=%qm size=%2op_fp_scalar_size mask=%mask_22_13
+
+# Bit 28 is a 2op_fp_scalar_size bit, but we do not decode it in this
+# format to avoid complicated overlapping-instruction-groups
+@vcmp_fp_scalar .... .... .... qn:3 . .... .... .... rm:4 &vcmp_scalar \
+ mask=%mask_22_13
+
+@vmaxv .... .... .... size:2 .. rda:4 .... .... .... &vmaxv qm=%qm
+
+@2op_fp .... .... .... .... .... .... .... .... &2op \
+ qd=%qd qn=%qn qm=%qm size=%2op_fp_size
+
+@2op_fp_size_rev .... .... .... .... .... .... .... .... &2op \
+ qd=%qd qn=%qn qm=%qm size=%2op_fp_size_rev
+
+# 2-operand, but Qd and Qn share a field. Size is in bit 28, but we
+# don't decode it in this format
+@vmaxnma .... .... .... .... .... .... .... .... &2op \
+ qd=%qd qn=%qd qm=%qm
+
+# Here also we don't decode the bit 28 size in the format to avoid
+# awkward nested overlap groups
+@vmaxnmv .... .... .... .... rda:4 .... .... .... &vmaxv qm=%qm
+
+@2op_fp_scalar .... .... .... .... .... .... .... rm:4 &2scalar \
+ qd=%qd qn=%qn size=%2op_fp_scalar_size
+
+# Vector loads and stores
+
+# Widening loads and narrowing stores:
+# for these P=0 W=0 is 'related encoding'; sz=11 is 'related encoding'
+# This means we need to expand out to multiple patterns for P, W, SZ.
+# For stores the U bit must be 0 but we catch that in the trans_ function.
+# The naming scheme here is "VLDSTB_H == in-memory byte load/store to/from
+# signed halfword element in register", etc.
+VLDSTB_H 111 . 110 0 a:1 0 1 . 0 ... ... 0 111 01 ....... @vldst_wn \
+ p=0 w=1 size=1
+VLDSTB_H 111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 01 ....... @vldst_wn \
+ p=1 size=1
+VLDSTB_W 111 . 110 0 a:1 0 1 . 0 ... ... 0 111 10 ....... @vldst_wn \
+ p=0 w=1 size=2
+VLDSTB_W 111 . 110 1 a:1 0 w:1 . 0 ... ... 0 111 10 ....... @vldst_wn \
+ p=1 size=2
+VLDSTH_W 111 . 110 0 a:1 0 1 . 1 ... ... 0 111 10 ....... @vldst_wn \
+ p=0 w=1 size=2
+VLDSTH_W 111 . 110 1 a:1 0 w:1 . 1 ... ... 0 111 10 ....... @vldst_wn \
+ p=1 size=2
+
+# Non-widening loads/stores (P=0 W=0 is 'related encoding')
+VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111100 ....... @vldr_vstr \
+ size=0 p=0 w=1
+VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111101 ....... @vldr_vstr \
+ size=1 p=0 w=1
+VLDR_VSTR 1110110 0 a:1 . 1 . .... ... 111110 ....... @vldr_vstr \
+ size=2 p=0 w=1
+VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111100 ....... @vldr_vstr \
+ size=0 p=1
+VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111101 ....... @vldr_vstr \
+ size=1 p=1
+VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
+ size=2 p=1
+
+# gather loads/scatter stores
+VLDR_S_sg 111 0 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg
+VLDR_U_sg 111 1 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg
+VSTR_sg 111 0 1100 1 . 00 .... ... 0 111 . .... .... @vldst_sg
+
+VLDRW_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1110 .... .... @vldst_sg_imm
+VLDRD_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1111 .... .... @vldst_sg_imm
+VSTRW_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1110 .... .... @vldst_sg_imm
+VSTRD_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1111 .... .... @vldst_sg_imm
+
+# deinterleaving loads/interleaving stores
+VLD2 1111 1100 1 .. 1 .... ... 1 111 .. .. 00000 @vldst_il
+VLD4 1111 1100 1 .. 1 .... ... 1 111 .. .. 00001 @vldst_il
+VST2 1111 1100 1 .. 0 .... ... 1 111 .. .. 00000 @vldst_il
+VST4 1111 1100 1 .. 0 .... ... 1 111 .. .. 00001 @vldst_il
+
+# Moves between 2 32-bit vector lanes and 2 general purpose registers
+VMOV_to_2gp 1110 1100 0 . 00 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
+VMOV_from_2gp 1110 1100 0 . 01 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
+
+# Vector 2-op
+VAND 1110 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+VBIC 1110 1111 0 . 01 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+VORR 1110 1111 0 . 10 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+VORN 1110 1111 0 . 11 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+VEOR 1111 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
+
+VADD 1110 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
+VSUB 1111 1111 0 . .. ... 0 ... 0 1000 . 1 . 0 ... 0 @2op
+VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
+
+# The VSHLL T2 encoding is not a @2op pattern, but is here because it
+# overlaps what would be size=0b11 VMULH/VRMULH
+{
+ VCVTB_SH 111 0 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
+
+ VMAXNMA 111 0 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=2
+
+ VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
+ VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
+
+ VQMOVUNB 111 0 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
+ VQMOVN_BS 111 0 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
+
+ VMAXA 111 0 1110 0 . 11 .. 11 ... 0 1110 1 0 . 0 ... 1 @1op
+
+ VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
+}
+
+{
+ VCVTB_HS 111 1 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
+
+ VMAXNMA 111 1 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=1
+
+ VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
+ VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
+
+ VMOVNB 111 1 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
+ VQMOVN_BU 111 1 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
+
+ VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
+}
+
+{
+ VCVTT_SH 111 0 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
+
+ VMINNMA 111 0 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=2
+ VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
+ VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
+
+ VQMOVUNT 111 0 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
+ VQMOVN_TS 111 0 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
+
+ VMINA 111 0 1110 0 . 11 .. 11 ... 1 1110 1 0 . 0 ... 1 @1op
+
+ VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
+}
+
+{
+ VCVTT_HS 111 1 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
+
+ VMINNMA 111 1 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=1
+ VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
+ VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
+
+ VMOVNT 111 1 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
+ VQMOVN_TU 111 1 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
+
+ VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
+}
+
+VMAX_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
+VMAX_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 0 ... 0 @2op
+VMIN_S 111 0 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
+VMIN_U 111 1 1111 0 . .. ... 0 ... 0 0110 . 1 . 1 ... 0 @2op
+
+VABD_S 111 0 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
+VABD_U 111 1 1111 0 . .. ... 0 ... 0 0111 . 1 . 0 ... 0 @2op
+
+VHADD_S 111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
+VHADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 0 ... 0 @2op
+VHSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
+VHSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 0 ... 0 @2op
+
+{
+ VMULLP_B 111 . 1110 0 . 11 ... 1 ... 0 1110 . 0 . 0 ... 0 @2op_sz28
+ VMULL_BS 111 0 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
+ VMULL_BU 111 1 1110 0 . .. ... 1 ... 0 1110 . 0 . 0 ... 0 @2op
+}
+{
+ VMULLP_T 111 . 1110 0 . 11 ... 1 ... 1 1110 . 0 . 0 ... 0 @2op_sz28
+ VMULL_TS 111 0 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
+ VMULL_TU 111 1 1110 0 . .. ... 1 ... 1 1110 . 0 . 0 ... 0 @2op
+}
+
+VQDMULH 1110 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
+VQRDMULH 1111 1111 0 . .. ... 0 ... 0 1011 . 1 . 0 ... 0 @2op
+
+VQADD_S 111 0 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
+VQADD_U 111 1 1111 0 . .. ... 0 ... 0 0000 . 1 . 1 ... 0 @2op
+VQSUB_S 111 0 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
+VQSUB_U 111 1 1111 0 . .. ... 0 ... 0 0010 . 1 . 1 ... 0 @2op
+
+VSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
+VSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 0 ... 0 @2op_rev
+
+VRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev
+VRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 0 ... 0 @2op_rev
+
+VQSHL_S 111 0 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
+VQSHL_U 111 1 1111 0 . .. ... 0 ... 0 0100 . 1 . 1 ... 0 @2op_rev
+
+VQRSHL_S 111 0 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
+VQRSHL_U 111 1 1111 0 . .. ... 0 ... 0 0101 . 1 . 1 ... 0 @2op_rev
+
+{
+ VCMUL0 111 . 1110 0 . 11 ... 0 ... 0 1110 . 0 . 0 ... 0 @2op_sz28
+ VQDMLADH 1110 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op
+ VQDMLSDH 1111 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 0 @2op
+}
+
+{
+ VCMUL180 111 . 1110 0 . 11 ... 0 ... 1 1110 . 0 . 0 ... 0 @2op_sz28
+ VQDMLADHX 111 0 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
+ VQDMLSDHX 111 1 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 0 @2op
+}
+
+{
+ VCMUL90 111 . 1110 0 . 11 ... 0 ... 0 1110 . 0 . 0 ... 1 @2op_sz28
+ VQRDMLADH 111 0 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
+ VQRDMLSDH 111 1 1110 0 . .. ... 0 ... 0 1110 . 0 . 0 ... 1 @2op
+}
+
+{
+ VCMUL270 111 . 1110 0 . 11 ... 0 ... 1 1110 . 0 . 0 ... 1 @2op_sz28
+ VQRDMLADHX 111 0 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
+ VQRDMLSDHX 111 1 1110 0 . .. ... 0 ... 1 1110 . 0 . 0 ... 1 @2op
+}
+
+VQDMULLB 111 . 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 1 @2op_sz28
+VQDMULLT 111 . 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 1 @2op_sz28
+
+VRHADD_S 111 0 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
+VRHADD_U 111 1 1111 0 . .. ... 0 ... 0 0001 . 1 . 0 ... 0 @2op
+
+{
+ VADC 1110 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
+ VADCI 1110 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
+ VHCADD90 1110 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op
+ VHCADD270 1110 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op
+}
+
+{
+ VSBC 1111 1110 0 . 11 ... 0 ... 0 1111 . 0 . 0 ... 0 @2op_nosz
+ VSBCI 1111 1110 0 . 11 ... 0 ... 1 1111 . 0 . 0 ... 0 @2op_nosz
+ VCADD90 1111 1110 0 . .. ... 0 ... 0 1111 . 0 . 0 ... 0 @2op
+ VCADD270 1111 1110 0 . .. ... 0 ... 1 1111 . 0 . 0 ... 0 @2op
+}
+
+# Vector miscellaneous
+
+VCLS 1111 1111 1 . 11 .. 00 ... 0 0100 01 . 0 ... 0 @1op
+VCLZ 1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op
+
+VREV16 1111 1111 1 . 11 .. 00 ... 0 0001 01 . 0 ... 0 @1op
+VREV32 1111 1111 1 . 11 .. 00 ... 0 0000 11 . 0 ... 0 @1op
+VREV64 1111 1111 1 . 11 .. 00 ... 0 0000 01 . 0 ... 0 @1op
+
+VMVN 1111 1111 1 . 11 00 00 ... 0 0101 11 . 0 ... 0 @1op_nosz
+
+VABS 1111 1111 1 . 11 .. 01 ... 0 0011 01 . 0 ... 0 @1op
+VABS_fp 1111 1111 1 . 11 .. 01 ... 0 0111 01 . 0 ... 0 @1op
+VNEG 1111 1111 1 . 11 .. 01 ... 0 0011 11 . 0 ... 0 @1op
+VNEG_fp 1111 1111 1 . 11 .. 01 ... 0 0111 11 . 0 ... 0 @1op
+
+VQABS 1111 1111 1 . 11 .. 00 ... 0 0111 01 . 0 ... 0 @1op
+VQNEG 1111 1111 1 . 11 .. 00 ... 0 0111 11 . 0 ... 0 @1op
+
+&vdup qd rt size
+# Qd is in the fields usually named Qn
+@vdup .... .... . . .. ... . rt:4 .... . . . . .... qd=%qn &vdup
+
+# B and E bits encode size, which we decode here to the usual size values
+VDUP 1110 1110 1 1 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=0
+VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 1 1 0000 @vdup size=1
+VDUP 1110 1110 1 0 10 ... 0 .... 1011 . 0 0 1 0000 @vdup size=2
+
+# Incrementing and decrementing dup
+
+# VIDUP, VDDUP format immediate: 1 << (immh:imml)
+%imm_vidup 7:1 0:1 !function=vidup_imm
+
+# VIDUP, VDDUP registers: Rm bits [3:1] from insn, bit 0 is 1;
+# Rn bits [3:1] from insn, bit 0 is 0
+%vidup_rm 1:3 !function=times_2_plus_1
+%vidup_rn 17:3 !function=times_2
+
+@vidup .... .... . . size:2 .... .... .... .... .... \
+ qd=%qd imm=%imm_vidup rn=%vidup_rn &vidup
+@viwdup .... .... . . size:2 .... .... .... .... .... \
+ qd=%qd imm=%imm_vidup rm=%vidup_rm rn=%vidup_rn &viwdup
+{
+ VIDUP 1110 1110 0 . .. ... 1 ... 0 1111 . 110 111 . @vidup
+ VIWDUP 1110 1110 0 . .. ... 1 ... 0 1111 . 110 ... . @viwdup
+}
+{
+ VCMPGT_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 0110 .... @vcmp_fp_scalar size=2
+ VCMPLE_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 1110 .... @vcmp_fp_scalar size=2
+ VDDUP 1110 1110 0 . .. ... 1 ... 1 1111 . 110 111 . @vidup
+ VDWDUP 1110 1110 0 . .. ... 1 ... 1 1111 . 110 ... . @viwdup
+}
+
+# multiply-add long dual accumulate
+# rdahi: bits [3:1] from insn, bit 0 is 1
+# rdalo: bits [3:1] from insn, bit 0 is 0
+%rdahi 20:3 !function=times_2_plus_1
+%rdalo 13:3 !function=times_2
+# size bit is 0 for 16 bit, 1 for 32 bit
+%size_16 16:1 !function=plus_1
+
+&vmlaldav rdahi rdalo size qn qm x a
+&vmladav rda size qn qm x a
+
+@vmlaldav .... .... . ... ... . ... x:1 .... .. a:1 . qm:3 . \
+ qn=%qn rdahi=%rdahi rdalo=%rdalo size=%size_16 &vmlaldav
+@vmlaldav_nosz .... .... . ... ... . ... x:1 .... .. a:1 . qm:3 . \
+ qn=%qn rdahi=%rdahi rdalo=%rdalo size=0 &vmlaldav
+@vmladav .... .... .... ... . ... x:1 .... . . a:1 . qm:3 . \
+ qn=%qn rda=%rdalo size=%size_16 &vmladav
+@vmladav_nosz .... .... .... ... . ... x:1 .... . . a:1 . qm:3 . \
+ qn=%qn rda=%rdalo size=0 &vmladav
+
+{
+ VMLADAV_S 1110 1110 1111 ... . ... . 1110 . 0 . 0 ... 0 @vmladav
+ VMLALDAV_S 1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav
+}
+{
+ VMLADAV_U 1111 1110 1111 ... . ... . 1110 . 0 . 0 ... 0 @vmladav
+ VMLALDAV_U 1111 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav
+}
+
+{
+ VMLSDAV 1110 1110 1111 ... . ... . 1110 . 0 . 0 ... 1 @vmladav
+ VMLSLDAV 1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 1 @vmlaldav
+}
+
+{
+ VMLSDAV 1111 1110 1111 ... 0 ... . 1110 . 0 . 0 ... 1 @vmladav_nosz
+ VRMLSLDAVH 1111 1110 1 ... ... 0 ... . 1110 . 0 . 0 ... 1 @vmlaldav_nosz
+}
+
+VMLADAV_S 1110 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 1 @vmladav_nosz
+VMLADAV_U 1111 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 1 @vmladav_nosz
+
+{
+ [
+ VMAXNMAV 1110 1110 1110 11 00 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=2
+ VMINNMAV 1110 1110 1110 11 00 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=2
+ VMAXNMV 1110 1110 1110 11 10 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=2
+ VMINNMV 1110 1110 1110 11 10 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=2
+ ]
+ [
+ VMAXV_S 1110 1110 1110 .. 10 .... 1111 0 0 . 0 ... 0 @vmaxv
+ VMINV_S 1110 1110 1110 .. 10 .... 1111 1 0 . 0 ... 0 @vmaxv
+ VMAXAV 1110 1110 1110 .. 00 .... 1111 0 0 . 0 ... 0 @vmaxv
+ VMINAV 1110 1110 1110 .. 00 .... 1111 1 0 . 0 ... 0 @vmaxv
+ ]
+ VMLADAV_S 1110 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 0 @vmladav_nosz
+ VRMLALDAVH_S 1110 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
+}
+
+{
+ [
+ VMAXNMAV 1111 1110 1110 11 00 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=1
+ VMINNMAV 1111 1110 1110 11 00 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=1
+ VMAXNMV 1111 1110 1110 11 10 .... 1111 0 0 . 0 ... 0 @vmaxnmv size=1
+ VMINNMV 1111 1110 1110 11 10 .... 1111 1 0 . 0 ... 0 @vmaxnmv size=1
+ ]
+ [
+ VMAXV_U 1111 1110 1110 .. 10 .... 1111 0 0 . 0 ... 0 @vmaxv
+ VMINV_U 1111 1110 1110 .. 10 .... 1111 1 0 . 0 ... 0 @vmaxv
+ ]
+ VMLADAV_U 1111 1110 1111 ... 0 ... . 1111 . 0 . 0 ... 0 @vmladav_nosz
+ VRMLALDAVH_U 1111 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz
+}
+
+# Scalar operations
+
+{
+ VCMPEQ_fp_scalar 1110 1110 0 . 11 ... 1 ... 0 1111 0100 .... @vcmp_fp_scalar size=2
+ VCMPNE_fp_scalar 1110 1110 0 . 11 ... 1 ... 0 1111 1100 .... @vcmp_fp_scalar size=2
+ VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar
+}
+
+{
+ VCMPLT_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 1100 .... @vcmp_fp_scalar size=2
+ VCMPGE_fp_scalar 1110 1110 0 . 11 ... 1 ... 1 1111 0100 .... @vcmp_fp_scalar size=2
+ VSUB_scalar 1110 1110 0 . .. ... 1 ... 1 1111 . 100 .... @2scalar
+}
+
+{
+ VSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar
+ VRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar
+ VQSHL_S_scalar 1110 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar
+ VQRSHL_S_scalar 1110 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar
+ VMUL_scalar 1110 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
+}
+
+{
+ VSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 0110 .... @shl_scalar
+ VRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 0110 .... @shl_scalar
+ VQSHL_U_scalar 1111 1110 0 . 11 .. 01 ... 1 1110 1110 .... @shl_scalar
+ VQRSHL_U_scalar 1111 1110 0 . 11 .. 11 ... 1 1110 1110 .... @shl_scalar
+ VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
+}
+
+{
+ VADD_fp_scalar 111 . 1110 0 . 11 ... 0 ... 0 1111 . 100 .... @2op_fp_scalar
+ VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
+ VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
+}
+
+{
+ VSUB_fp_scalar 111 . 1110 0 . 11 ... 0 ... 1 1111 . 100 .... @2op_fp_scalar
+ VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
+ VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
+}
+
+{
+ VQADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
+ VQADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
+ VQDMULLB_scalar 111 . 1110 0 . 11 ... 0 ... 0 1111 . 110 .... @2scalar_nosz \
+ size=%size_28
+}
+
+{
+ VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
+ VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
+ VQDMULLT_scalar 111 . 1110 0 . 11 ... 0 ... 1 1111 . 110 .... @2scalar_nosz \
+ size=%size_28
+}
+
+{
+ VMUL_fp_scalar 111 . 1110 0 . 11 ... 1 ... 0 1110 . 110 .... @2op_fp_scalar
+ VQDMULH_scalar 1110 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
+ VQRDMULH_scalar 1111 1110 0 . .. ... 1 ... 0 1110 . 110 .... @2scalar
+}
+
+{
+ VFMA_scalar 111 . 1110 0 . 11 ... 1 ... 0 1110 . 100 .... @2op_fp_scalar
+ # The U bit (28) is don't-care because it does not affect the result
+ VMLA 111 - 1110 0 . .. ... 1 ... 0 1110 . 100 .... @2scalar
+}
+
+{
+ VFMAS_scalar 111 . 1110 0 . 11 ... 1 ... 1 1110 . 100 .... @2op_fp_scalar
+ # The U bit (28) is don't-care because it does not affect the result
+ VMLAS 111 - 1110 0 . .. ... 1 ... 1 1110 . 100 .... @2scalar
+}
+
+VQRDMLAH 1110 1110 0 . .. ... 0 ... 0 1110 . 100 .... @2scalar
+VQRDMLASH 1110 1110 0 . .. ... 0 ... 1 1110 . 100 .... @2scalar
+VQDMLAH 1110 1110 0 . .. ... 0 ... 0 1110 . 110 .... @2scalar
+VQDMLASH 1110 1110 0 . .. ... 0 ... 1 1110 . 110 .... @2scalar
+
+# Vector add across vector
+{
+ VADDV 111 u:1 1110 1111 size:2 01 ... 0 1111 0 0 a:1 0 qm:3 0 rda=%rdalo
+ VADDLV 111 u:1 1110 1 ... 1001 ... 0 1111 00 a:1 0 qm:3 0 \
+ rdahi=%rdahi rdalo=%rdalo
+}
+
+@vabav .... .... .. size:2 .... rda:4 .... .... .... &vabav qn=%qn qm=%qm
+
+VABAV_S 111 0 1110 10 .. ... 0 .... 1111 . 0 . 0 ... 1 @vabav
+VABAV_U 111 1 1110 10 .. ... 0 .... 1111 . 0 . 0 ... 1 @vabav
+
+# Logical immediate operations (1 reg and modified-immediate)
+
+# The cmode/op bits here decode VORR/VBIC/VMOV/VMVN, but
+# not in a way we can conveniently represent in decodetree without
+# a lot of repetition:
+# VORR: op=0, (cmode & 1) && cmode < 12
+# VBIC: op=1, (cmode & 1) && cmode < 12
+# VMOV: everything else
+# So we have a single decode line and check the cmode/op in the
+# trans function.
+Vimm_1r 111 . 1111 1 . 00 0 ... ... 0 .... 0 1 . 1 .... @1imm
+
+# Shifts by immediate
+
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
+VSHLI 111 0 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
+
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
+VQSHLI_S 111 0 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
+
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_b
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_h
+VQSHLI_U 111 1 1111 1 . ... ... ... 0 0111 0 1 . 1 ... 0 @2_shl_w
+
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_b
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_h
+VQSHLUI 111 1 1111 1 . ... ... ... 0 0110 0 1 . 1 ... 0 @2_shl_w
+
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
+VSHRI_S 111 0 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
+
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_b
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_h
+VSHRI_U 111 1 1111 1 . ... ... ... 0 0000 0 1 . 1 ... 0 @2_shr_w
+
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
+VRSHRI_S 111 0 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
+
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_b
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_h
+VRSHRI_U 111 1 1111 1 . ... ... ... 0 0010 0 1 . 1 ... 0 @2_shr_w
+
+# VSHLL T1 encoding; the T2 VSHLL encoding is elsewhere in this file
+# Note that VMOVL is encoded as "VSHLL with a zero shift count"; we
+# implement it that way rather than special-casing it in the decode.
+VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
+VSHLL_BS 111 0 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
+
+VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_b
+VSHLL_BU 111 1 1110 1 . 1 .. ... ... 0 1111 0 1 . 0 ... 0 @2_shll_h
+
+VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
+VSHLL_TS 111 0 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
+
+VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_b
+VSHLL_TU 111 1 1110 1 . 1 .. ... ... 1 1111 0 1 . 0 ... 0 @2_shll_h
+
+# Shift-and-insert
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_b
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_h
+VSRI 111 1 1111 1 . ... ... ... 0 0100 0 1 . 1 ... 0 @2_shr_w
+
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_b
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_h
+VSLI 111 1 1111 1 . ... ... ... 0 0101 0 1 . 1 ... 0 @2_shl_w
+
+# Narrowing shifts (which only support b and h sizes)
+VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
+VSHRNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
+VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
+VSHRNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
+
+VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_b
+VRSHRNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 1 @2_shr_h
+VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_b
+VRSHRNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 1 @2_shr_h
+
+VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
+VQSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
+VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
+VQSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
+VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_b
+VQSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 0 @2_shr_h
+VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_b
+VQSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 0 @2_shr_h
+
+VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
+VQSHRUNB 111 0 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
+VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
+VQSHRUNT 111 0 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
+
+VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
+VQRSHRNB_S 111 0 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
+VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
+VQRSHRNT_S 111 0 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
+VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_b
+VQRSHRNB_U 111 1 1110 1 . ... ... ... 0 1111 0 1 . 0 ... 1 @2_shr_h
+VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_b
+VQRSHRNT_U 111 1 1110 1 . ... ... ... 1 1111 0 1 . 0 ... 1 @2_shr_h
+
+VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_b
+VQRSHRUNB 111 1 1110 1 . ... ... ... 0 1111 1 1 . 0 ... 0 @2_shr_h
+VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_b
+VQRSHRUNT 111 1 1110 1 . ... ... ... 1 1111 1 1 . 0 ... 0 @2_shr_h
+
+VSHLC 111 0 1110 1 . 1 imm:5 ... 0 1111 1100 rdm:4 qd=%qd
+
+# Comparisons. We expand out the conditions which are split across
+# encodings T1, T2, T3 and the fc bits. These include VPT, which is
+# effectively "VCMP then VPST". A plain "VCMP" has a mask field of zero.
+{
+ VCMPEQ_fp 111 . 1110 0 . 11 ... 1 ... 0 1111 0 0 . 0 ... 0 @vcmp_fp
+ VCMPEQ 111 1 1110 0 . .. ... 1 ... 0 1111 0 0 . 0 ... 0 @vcmp
+}
+
+{
+ VCMPNE_fp 111 . 1110 0 . 11 ... 1 ... 0 1111 1 0 . 0 ... 0 @vcmp_fp
+ VCMPNE 111 1 1110 0 . .. ... 1 ... 0 1111 1 0 . 0 ... 0 @vcmp
+}
+
+{
+ VCMPGE_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp_fp
+ VCMPGE 111 1 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 0 @vcmp
+}
+
+{
+ VCMPLT_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp_fp
+ VCMPLT 111 1 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 0 @vcmp
+}
+
+{
+ VCMPGT_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp_fp
+ VCMPGT 111 1 1110 0 . .. ... 1 ... 1 1111 0 0 . 0 ... 1 @vcmp
+}
+
+{
+ VCMPLE_fp 111 . 1110 0 . 11 ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp_fp
+ VCMPLE 1111 1110 0 . .. ... 1 ... 1 1111 1 0 . 0 ... 1 @vcmp
+}
+
+{
+ VPSEL 1111 1110 0 . 11 ... 1 ... 0 1111 . 0 . 0 ... 1 @2op_nosz
+ VCMPCS 1111 1110 0 . .. ... 1 ... 0 1111 0 0 . 0 ... 1 @vcmp
+ VCMPHI 1111 1110 0 . .. ... 1 ... 0 1111 1 0 . 0 ... 1 @vcmp
+}
+
+{
+ VPNOT 1111 1110 0 0 11 000 1 000 0 1111 0100 1101
+ VPST 1111 1110 0 . 11 000 1 ... 0 1111 0100 1101 mask=%mask_22_13
+ VCMPEQ_fp_scalar 1111 1110 0 . 11 ... 1 ... 0 1111 0100 .... @vcmp_fp_scalar size=1
+ VCMPEQ_scalar 1111 1110 0 . .. ... 1 ... 0 1111 0100 .... @vcmp_scalar
+}
+
+{
+ VCMPNE_fp_scalar 1111 1110 0 . 11 ... 1 ... 0 1111 1100 .... @vcmp_fp_scalar size=1
+ VCMPNE_scalar 1111 1110 0 . .. ... 1 ... 0 1111 1100 .... @vcmp_scalar
+}
+
+{
+ VCMPGT_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 0110 .... @vcmp_fp_scalar size=1
+ VCMPGT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0110 .... @vcmp_scalar
+}
+
+{
+ VCMPLE_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 1110 .... @vcmp_fp_scalar size=1
+ VCMPLE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1110 .... @vcmp_scalar
+}
+
+{
+ VCMPGE_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 0100 .... @vcmp_fp_scalar size=1
+ VCMPGE_scalar 1111 1110 0 . .. ... 1 ... 1 1111 0100 .... @vcmp_scalar
+}
+{
+ VCMPLT_fp_scalar 1111 1110 0 . 11 ... 1 ... 1 1111 1100 .... @vcmp_fp_scalar size=1
+ VCMPLT_scalar 1111 1110 0 . .. ... 1 ... 1 1111 1100 .... @vcmp_scalar
+}
+
+VCMPCS_scalar 1111 1110 0 . .. ... 1 ... 0 1111 0 1 1 0 .... @vcmp_scalar
+VCMPHI_scalar 1111 1110 0 . .. ... 1 ... 0 1111 1 1 1 0 .... @vcmp_scalar
+
+# 2-operand FP
+VADD_fp 1110 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
+VSUB_fp 1110 1111 0 . 1 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
+VMUL_fp 1111 1111 0 . 0 . ... 0 ... 0 1101 . 1 . 1 ... 0 @2op_fp
+VABD_fp 1111 1111 0 . 1 . ... 0 ... 0 1101 . 1 . 0 ... 0 @2op_fp
+
+VMAXNM 1111 1111 0 . 0 . ... 0 ... 0 1111 . 1 . 1 ... 0 @2op_fp
+VMINNM 1111 1111 0 . 1 . ... 0 ... 0 1111 . 1 . 1 ... 0 @2op_fp
+
+VCADD90_fp 1111 1100 1 . 0 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+VCADD270_fp 1111 1101 1 . 0 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+
+VFMA 1110 1111 0 . 0 . ... 0 ... 0 1100 . 1 . 1 ... 0 @2op_fp
+VFMS 1110 1111 0 . 1 . ... 0 ... 0 1100 . 1 . 1 ... 0 @2op_fp
+
+VCMLA0 1111 110 00 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+VCMLA90 1111 110 01 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+VCMLA180 1111 110 10 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+VCMLA270 1111 110 11 . 1 . ... 0 ... 0 1000 . 1 . 0 ... 0 @2op_fp_size_rev
+
+# floating-point <-> fixed-point conversions. Naming convention:
+# VCVT_<from><to>, S = signed int, U = unsigned int, H = halfprec, F = singleprec
+@vcvt .... .... .. 1 ..... .... .. 1 . .... .... &2shift \
+ qd=%qd qm=%qm shift=%rshift_i5 size=2
+@vcvt_f16 .... .... .. 11 .... .... .. 0 . .... .... &2shift \
+ qd=%qd qm=%qm shift=%rshift_i4 size=1
+
+VCVT_SH_fixed 1110 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt_f16
+VCVT_UH_fixed 1111 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt_f16
+
+VCVT_HS_fixed 1110 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt_f16
+VCVT_HU_fixed 1111 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt_f16
+
+VCVT_SF_fixed 1110 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt
+VCVT_UF_fixed 1111 1111 1 . ...... ... 0 11 . 0 01 . 1 ... 0 @vcvt
+
+VCVT_FS_fixed 1110 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt
+VCVT_FU_fixed 1111 1111 1 . ...... ... 0 11 . 1 01 . 1 ... 0 @vcvt
+
+# VCVT between floating point and integer (halfprec and single);
+# VCVT_<from><to>, S = signed int, U = unsigned int, F = float
+VCVT_SF 1111 1111 1 . 11 .. 11 ... 0 011 00 1 . 0 ... 0 @1op
+VCVT_UF 1111 1111 1 . 11 .. 11 ... 0 011 01 1 . 0 ... 0 @1op
+VCVT_FS 1111 1111 1 . 11 .. 11 ... 0 011 10 1 . 0 ... 0 @1op
+VCVT_FU 1111 1111 1 . 11 .. 11 ... 0 011 11 1 . 0 ... 0 @1op
+
+# VCVT from floating point to integer with specified rounding mode
+VCVTAS 1111 1111 1 . 11 .. 11 ... 000 00 0 1 . 0 ... 0 @1op
+VCVTAU 1111 1111 1 . 11 .. 11 ... 000 00 1 1 . 0 ... 0 @1op
+VCVTNS 1111 1111 1 . 11 .. 11 ... 000 01 0 1 . 0 ... 0 @1op
+VCVTNU 1111 1111 1 . 11 .. 11 ... 000 01 1 1 . 0 ... 0 @1op
+VCVTPS 1111 1111 1 . 11 .. 11 ... 000 10 0 1 . 0 ... 0 @1op
+VCVTPU 1111 1111 1 . 11 .. 11 ... 000 10 1 1 . 0 ... 0 @1op
+VCVTMS 1111 1111 1 . 11 .. 11 ... 000 11 0 1 . 0 ... 0 @1op
+VCVTMU 1111 1111 1 . 11 .. 11 ... 000 11 1 1 . 0 ... 0 @1op
+
+VRINTN 1111 1111 1 . 11 .. 10 ... 001 000 1 . 0 ... 0 @1op
+VRINTX 1111 1111 1 . 11 .. 10 ... 001 001 1 . 0 ... 0 @1op
+VRINTA 1111 1111 1 . 11 .. 10 ... 001 010 1 . 0 ... 0 @1op
+VRINTZ 1111 1111 1 . 11 .. 10 ... 001 011 1 . 0 ... 0 @1op
+VRINTM 1111 1111 1 . 11 .. 10 ... 001 101 1 . 0 ... 0 @1op
+VRINTP 1111 1111 1 . 11 .. 10 ... 001 111 1 . 0 ... 0 @1op
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
new file mode 100644
index 0000000000..8b99736aad
--- /dev/null
+++ b/target/arm/tcg/mve_helper.c
@@ -0,0 +1,3444 @@
+/*
+ * M-profile MVE Operations
+ *
+ * Copyright (c) 2021 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "vec_internal.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+#include "fpu/softfloat.h"
+#include "crypto/clmul.h"
+
+static uint16_t mve_eci_mask(CPUARMState *env)
+{
+ /*
+ * Return the mask of which elements in the MVE vector correspond
+ * to beats being executed. The mask has 1 bits for executed lanes
+ * and 0 bits where ECI says this beat was already executed.
+ */
+ int eci;
+
+ if ((env->condexec_bits & 0xf) != 0) {
+ return 0xffff;
+ }
+
+ eci = env->condexec_bits >> 4;
+ switch (eci) {
+ case ECI_NONE:
+ return 0xffff;
+ case ECI_A0:
+ return 0xfff0;
+ case ECI_A0A1:
+ return 0xff00;
+ case ECI_A0A1A2:
+ case ECI_A0A1A2B0:
+ return 0xf000;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static uint16_t mve_element_mask(CPUARMState *env)
+{
+ /*
+ * Return the mask of which elements in the MVE vector should be
+ * updated. This is a combination of multiple things:
+ * (1) by default, we update every lane in the vector
+ * (2) VPT predication stores its state in the VPR register;
+ * (3) low-overhead-branch tail predication will mask out part
+ * the vector on the final iteration of the loop
+ * (4) if EPSR.ECI is set then we must execute only some beats
+ * of the insn
+ * We combine all these into a 16-bit result with the same semantics
+ * as VPR.P0: 0 to mask the lane, 1 if it is active.
+ * 8-bit vector ops will look at all bits of the result;
+ * 16-bit ops will look at bits 0, 2, 4, ...;
+ * 32-bit ops will look at bits 0, 4, 8 and 12.
+ * Compare pseudocode GetCurInstrBeat(), though that only returns
+ * the 4-bit slice of the mask corresponding to a single beat.
+ */
+ uint16_t mask = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0);
+
+ if (!(env->v7m.vpr & R_V7M_VPR_MASK01_MASK)) {
+ mask |= 0xff;
+ }
+ if (!(env->v7m.vpr & R_V7M_VPR_MASK23_MASK)) {
+ mask |= 0xff00;
+ }
+
+ if (env->v7m.ltpsize < 4 &&
+ env->regs[14] <= (1 << (4 - env->v7m.ltpsize))) {
+ /*
+ * Tail predication active, and this is the last loop iteration.
+ * The element size is (1 << ltpsize), and we only want to process
+ * loopcount elements, so we want to retain the least significant
+ * (loopcount * esize) predicate bits and zero out bits above that.
+ */
+ int masklen = env->regs[14] << env->v7m.ltpsize;
+ assert(masklen <= 16);
+ uint16_t ltpmask = masklen ? MAKE_64BIT_MASK(0, masklen) : 0;
+ mask &= ltpmask;
+ }
+
+ /*
+ * ECI bits indicate which beats are already executed;
+ * we handle this by effectively predicating them out.
+ */
+ mask &= mve_eci_mask(env);
+ return mask;
+}
+
+static void mve_advance_vpt(CPUARMState *env)
+{
+ /* Advance the VPT and ECI state if necessary */
+ uint32_t vpr = env->v7m.vpr;
+ unsigned mask01, mask23;
+ uint16_t inv_mask;
+ uint16_t eci_mask = mve_eci_mask(env);
+
+ if ((env->condexec_bits & 0xf) == 0) {
+ env->condexec_bits = (env->condexec_bits == (ECI_A0A1A2B0 << 4)) ?
+ (ECI_A0 << 4) : (ECI_NONE << 4);
+ }
+
+ if (!(vpr & (R_V7M_VPR_MASK01_MASK | R_V7M_VPR_MASK23_MASK))) {
+ /* VPT not enabled, nothing to do */
+ return;
+ }
+
+ /* Invert P0 bits if needed, but only for beats we actually executed */
+ mask01 = FIELD_EX32(vpr, V7M_VPR, MASK01);
+ mask23 = FIELD_EX32(vpr, V7M_VPR, MASK23);
+ /* Start by assuming we invert all bits corresponding to executed beats */
+ inv_mask = eci_mask;
+ if (mask01 <= 8) {
+ /* MASK01 says don't invert low half of P0 */
+ inv_mask &= ~0xff;
+ }
+ if (mask23 <= 8) {
+ /* MASK23 says don't invert high half of P0 */
+ inv_mask &= ~0xff00;
+ }
+ vpr ^= inv_mask;
+ /* Only update MASK01 if beat 1 executed */
+ if (eci_mask & 0xf0) {
+ vpr = FIELD_DP32(vpr, V7M_VPR, MASK01, mask01 << 1);
+ }
+ /* Beat 3 always executes, so update MASK23 */
+ vpr = FIELD_DP32(vpr, V7M_VPR, MASK23, mask23 << 1);
+ env->v7m.vpr = vpr;
+}
+
+/* For loads, predicated lanes are zeroed instead of keeping their old values */
+#define DO_VLDR(OP, MSIZE, LDTYPE, ESIZE, TYPE) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \
+ { \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ unsigned b, e; \
+ /* \
+ * R_SXTM allows the dest reg to become UNKNOWN for abandoned \
+ * beats so we don't care if we update part of the dest and \
+ * then take an exception. \
+ */ \
+ for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
+ if (eci_mask & (1 << b)) { \
+ d[H##ESIZE(e)] = (mask & (1 << b)) ? \
+ cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \
+ } \
+ addr += MSIZE; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VSTR(OP, MSIZE, STTYPE, ESIZE, TYPE) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \
+ { \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned b, e; \
+ for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
+ if (mask & (1 << b)) { \
+ cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
+ } \
+ addr += MSIZE; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_VLDR(vldrb, 1, ldub, 1, uint8_t)
+DO_VLDR(vldrh, 2, lduw, 2, uint16_t)
+DO_VLDR(vldrw, 4, ldl, 4, uint32_t)
+
+DO_VSTR(vstrb, 1, stb, 1, uint8_t)
+DO_VSTR(vstrh, 2, stw, 2, uint16_t)
+DO_VSTR(vstrw, 4, stl, 4, uint32_t)
+
+DO_VLDR(vldrb_sh, 1, ldsb, 2, int16_t)
+DO_VLDR(vldrb_sw, 1, ldsb, 4, int32_t)
+DO_VLDR(vldrb_uh, 1, ldub, 2, uint16_t)
+DO_VLDR(vldrb_uw, 1, ldub, 4, uint32_t)
+DO_VLDR(vldrh_sw, 2, ldsw, 4, int32_t)
+DO_VLDR(vldrh_uw, 2, lduw, 4, uint32_t)
+
+DO_VSTR(vstrb_h, 1, stb, 2, int16_t)
+DO_VSTR(vstrb_w, 1, stb, 4, int32_t)
+DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
+
+#undef DO_VLDR
+#undef DO_VSTR
+
+/*
+ * Gather loads/scatter stores. Here each element of Qm specifies
+ * an offset to use from the base register Rm. In the _os_ versions
+ * that offset is scaled by the element size.
+ * For loads, predicated lanes are zeroed instead of retaining
+ * their previous values.
+ */
+#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
+ uint32_t base) \
+ { \
+ TYPE *d = vd; \
+ OFFTYPE *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ unsigned e; \
+ uint32_t addr; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
+ if (!(eci_mask & 1)) { \
+ continue; \
+ } \
+ addr = ADDRFN(base, m[H##ESIZE(e)]); \
+ d[H##ESIZE(e)] = (mask & 1) ? \
+ cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \
+ if (WB) { \
+ m[H##ESIZE(e)] = addr; \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/* We know here TYPE is unsigned so always the same as the offset type */
+#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN, WB) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
+ uint32_t base) \
+ { \
+ TYPE *d = vd; \
+ TYPE *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ unsigned e; \
+ uint32_t addr; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
+ if (!(eci_mask & 1)) { \
+ continue; \
+ } \
+ addr = ADDRFN(base, m[H##ESIZE(e)]); \
+ if (mask & 1) { \
+ cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
+ } \
+ if (WB) { \
+ m[H##ESIZE(e)] = addr; \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/*
+ * 64-bit accesses are slightly different: they are done as two 32-bit
+ * accesses, controlled by the predicate mask for the relevant beat,
+ * and with a single 32-bit offset in the first of the two Qm elements.
+ * Note that for QEMU our IMPDEF AIRCR.ENDIANNESS is always 0 (little).
+ * Address writeback happens on the odd beats and updates the address
+ * stored in the even-beat element.
+ */
+#define DO_VLDR64_SG(OP, ADDRFN, WB) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
+ uint32_t base) \
+ { \
+ uint32_t *d = vd; \
+ uint32_t *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ unsigned e; \
+ uint32_t addr; \
+ for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
+ if (!(eci_mask & 1)) { \
+ continue; \
+ } \
+ addr = ADDRFN(base, m[H4(e & ~1)]); \
+ addr += 4 * (e & 1); \
+ d[H4(e)] = (mask & 1) ? cpu_ldl_data_ra(env, addr, GETPC()) : 0; \
+ if (WB && (e & 1)) { \
+ m[H4(e & ~1)] = addr - 4; \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VSTR64_SG(OP, ADDRFN, WB) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
+ uint32_t base) \
+ { \
+ uint32_t *d = vd; \
+ uint32_t *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ unsigned e; \
+ uint32_t addr; \
+ for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
+ if (!(eci_mask & 1)) { \
+ continue; \
+ } \
+ addr = ADDRFN(base, m[H4(e & ~1)]); \
+ addr += 4 * (e & 1); \
+ if (mask & 1) { \
+ cpu_stl_data_ra(env, addr, d[H4(e)], GETPC()); \
+ } \
+ if (WB && (e & 1)) { \
+ m[H4(e & ~1)] = addr - 4; \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define ADDR_ADD(BASE, OFFSET) ((BASE) + (OFFSET))
+#define ADDR_ADD_OSH(BASE, OFFSET) ((BASE) + ((OFFSET) << 1))
+#define ADDR_ADD_OSW(BASE, OFFSET) ((BASE) + ((OFFSET) << 2))
+#define ADDR_ADD_OSD(BASE, OFFSET) ((BASE) + ((OFFSET) << 3))
+
+DO_VLDR_SG(vldrb_sg_sh, ldsb, 2, int16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_sw, ldsb, 4, int32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD, false)
+
+DO_VLDR_SG(vldrb_sg_ub, ldub, 1, uint8_t, uint8_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_uh, ldub, 2, uint16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_uw, ldub, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrw_sg_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR64_SG(vldrd_sg_ud, ADDR_ADD, false)
+
+DO_VLDR_SG(vldrh_sg_os_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrh_sg_os_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrh_sg_os_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrw_sg_os_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD_OSW, false)
+DO_VLDR64_SG(vldrd_sg_os_ud, ADDR_ADD_OSD, false)
+
+DO_VSTR_SG(vstrb_sg_ub, stb, 1, uint8_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_uh, stb, 2, uint16_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_uw, stb, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrh_sg_uh, stw, 2, uint16_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrh_sg_uw, stw, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrw_sg_uw, stl, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR64_SG(vstrd_sg_ud, ADDR_ADD, false)
+
+DO_VSTR_SG(vstrh_sg_os_uh, stw, 2, uint16_t, ADDR_ADD_OSH, false)
+DO_VSTR_SG(vstrh_sg_os_uw, stw, 4, uint32_t, ADDR_ADD_OSH, false)
+DO_VSTR_SG(vstrw_sg_os_uw, stl, 4, uint32_t, ADDR_ADD_OSW, false)
+DO_VSTR64_SG(vstrd_sg_os_ud, ADDR_ADD_OSD, false)
+
+DO_VLDR_SG(vldrw_sg_wb_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true)
+DO_VLDR64_SG(vldrd_sg_wb_ud, ADDR_ADD, true)
+DO_VSTR_SG(vstrw_sg_wb_uw, stl, 4, uint32_t, ADDR_ADD, true)
+DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
+
+/*
+ * Deinterleaving loads/interleaving stores.
+ *
+ * For these helpers we are passed the index of the first Qreg
+ * (VLD2/VST2 will also access Qn+1, VLD4/VST4 access Qn .. Qn+3)
+ * and the value of the base address register Rn.
+ * The helpers are specialized for pattern and element size, so
+ * for instance vld42h is VLD4 with pattern 2, element size MO_16.
+ *
+ * These insns are beatwise but not predicated, so we must honour ECI,
+ * but need not look at mve_element_mask().
+ *
+ * The pseudocode implements these insns with multiple memory accesses
+ * of the element size, but rules R_VVVG and R_FXDM permit us to make
+ * one 32-bit memory access per beat.
+ */
+#define DO_VLD4B(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat, e; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 4; \
+ data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ for (e = 0; e < 4; e++, data >>= 8) { \
+ uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
+ qd[H1(off[beat])] = data; \
+ } \
+ } \
+ }
+
+#define DO_VLD4H(OP, O1, O2) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O1, O2, O2 }; \
+ uint32_t addr, data; \
+ int y; /* y counts 0 2 0 2 */ \
+ uint16_t *qd; \
+ for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 8 + (beat & 1) * 4; \
+ data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y); \
+ qd[H2(off[beat])] = data; \
+ data >>= 16; \
+ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y + 1); \
+ qd[H2(off[beat])] = data; \
+ } \
+ }
+
+#define DO_VLD4W(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ uint32_t *qd; \
+ int y; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 4; \
+ data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ y = (beat + (O1 & 2)) & 3; \
+ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \
+ qd[H4(off[beat] >> 2)] = data; \
+ } \
+ }
+
+DO_VLD4B(vld40b, 0, 1, 10, 11)
+DO_VLD4B(vld41b, 2, 3, 12, 13)
+DO_VLD4B(vld42b, 4, 5, 14, 15)
+DO_VLD4B(vld43b, 6, 7, 8, 9)
+
+DO_VLD4H(vld40h, 0, 5)
+DO_VLD4H(vld41h, 1, 6)
+DO_VLD4H(vld42h, 2, 7)
+DO_VLD4H(vld43h, 3, 4)
+
+DO_VLD4W(vld40w, 0, 1, 10, 11)
+DO_VLD4W(vld41w, 2, 3, 12, 13)
+DO_VLD4W(vld42w, 4, 5, 14, 15)
+DO_VLD4W(vld43w, 6, 7, 8, 9)
+
+#define DO_VLD2B(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat, e; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ uint8_t *qd; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 2; \
+ data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ for (e = 0; e < 4; e++, data >>= 8) { \
+ qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \
+ qd[H1(off[beat] + (e >> 1))] = data; \
+ } \
+ } \
+ }
+
+#define DO_VLD2H(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ int e; \
+ uint16_t *qd; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 4; \
+ data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ for (e = 0; e < 2; e++, data >>= 16) { \
+ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \
+ qd[H2(off[beat])] = data; \
+ } \
+ } \
+ }
+
+#define DO_VLD2W(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ uint32_t *qd; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat]; \
+ data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \
+ qd[H4(off[beat] >> 3)] = data; \
+ } \
+ }
+
+DO_VLD2B(vld20b, 0, 2, 12, 14)
+DO_VLD2B(vld21b, 4, 6, 8, 10)
+
+DO_VLD2H(vld20h, 0, 1, 6, 7)
+DO_VLD2H(vld21h, 2, 3, 4, 5)
+
+DO_VLD2W(vld20w, 0, 4, 24, 28)
+DO_VLD2W(vld21w, 8, 12, 16, 20)
+
+#define DO_VST4B(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat, e; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 4; \
+ data = 0; \
+ for (e = 3; e >= 0; e--) { \
+ uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
+ data = (data << 8) | qd[H1(off[beat])]; \
+ } \
+ cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ } \
+ }
+
+#define DO_VST4H(OP, O1, O2) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O1, O2, O2 }; \
+ uint32_t addr, data; \
+ int y; /* y counts 0 2 0 2 */ \
+ uint16_t *qd; \
+ for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 8 + (beat & 1) * 4; \
+ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y); \
+ data = qd[H2(off[beat])]; \
+ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y + 1); \
+ data |= qd[H2(off[beat])] << 16; \
+ cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ } \
+ }
+
+#define DO_VST4W(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ uint32_t *qd; \
+ int y; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 4; \
+ y = (beat + (O1 & 2)) & 3; \
+ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \
+ data = qd[H4(off[beat] >> 2)]; \
+ cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ } \
+ }
+
+DO_VST4B(vst40b, 0, 1, 10, 11)
+DO_VST4B(vst41b, 2, 3, 12, 13)
+DO_VST4B(vst42b, 4, 5, 14, 15)
+DO_VST4B(vst43b, 6, 7, 8, 9)
+
+DO_VST4H(vst40h, 0, 5)
+DO_VST4H(vst41h, 1, 6)
+DO_VST4H(vst42h, 2, 7)
+DO_VST4H(vst43h, 3, 4)
+
+DO_VST4W(vst40w, 0, 1, 10, 11)
+DO_VST4W(vst41w, 2, 3, 12, 13)
+DO_VST4W(vst42w, 4, 5, 14, 15)
+DO_VST4W(vst43w, 6, 7, 8, 9)
+
+#define DO_VST2B(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat, e; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ uint8_t *qd; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 2; \
+ data = 0; \
+ for (e = 3; e >= 0; e--) { \
+ qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \
+ data = (data << 8) | qd[H1(off[beat] + (e >> 1))]; \
+ } \
+ cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ } \
+ }
+
+#define DO_VST2H(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ int e; \
+ uint16_t *qd; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat] * 4; \
+ data = 0; \
+ for (e = 1; e >= 0; e--) { \
+ qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \
+ data = (data << 16) | qd[H2(off[beat])]; \
+ } \
+ cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ } \
+ }
+
+#define DO_VST2W(OP, O1, O2, O3, O4) \
+ void HELPER(mve_##OP)(CPUARMState *env, uint32_t qnidx, \
+ uint32_t base) \
+ { \
+ int beat; \
+ uint16_t mask = mve_eci_mask(env); \
+ static const uint8_t off[4] = { O1, O2, O3, O4 }; \
+ uint32_t addr, data; \
+ uint32_t *qd; \
+ for (beat = 0; beat < 4; beat++, mask >>= 4) { \
+ if ((mask & 1) == 0) { \
+ /* ECI says skip this beat */ \
+ continue; \
+ } \
+ addr = base + off[beat]; \
+ qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \
+ data = qd[H4(off[beat] >> 3)]; \
+ cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ } \
+ }
+
+DO_VST2B(vst20b, 0, 2, 12, 14)
+DO_VST2B(vst21b, 4, 6, 8, 10)
+
+DO_VST2H(vst20h, 0, 1, 6, 7)
+DO_VST2H(vst21h, 2, 3, 4, 5)
+
+DO_VST2W(vst20w, 0, 4, 24, 28)
+DO_VST2W(vst21w, 8, 12, 16, 20)
+
+/*
+ * The mergemask(D, R, M) macro performs the operation "*D = R" but
+ * storing only the bytes which correspond to 1 bits in M,
+ * leaving other bytes in *D unchanged. We use _Generic
+ * to select the correct implementation based on the type of D.
+ */
+
+static void mergemask_ub(uint8_t *d, uint8_t r, uint16_t mask)
+{
+ if (mask & 1) {
+ *d = r;
+ }
+}
+
+static void mergemask_sb(int8_t *d, int8_t r, uint16_t mask)
+{
+ mergemask_ub((uint8_t *)d, r, mask);
+}
+
+static void mergemask_uh(uint16_t *d, uint16_t r, uint16_t mask)
+{
+ uint16_t bmask = expand_pred_b(mask);
+ *d = (*d & ~bmask) | (r & bmask);
+}
+
+static void mergemask_sh(int16_t *d, int16_t r, uint16_t mask)
+{
+ mergemask_uh((uint16_t *)d, r, mask);
+}
+
+static void mergemask_uw(uint32_t *d, uint32_t r, uint16_t mask)
+{
+ uint32_t bmask = expand_pred_b(mask);
+ *d = (*d & ~bmask) | (r & bmask);
+}
+
+static void mergemask_sw(int32_t *d, int32_t r, uint16_t mask)
+{
+ mergemask_uw((uint32_t *)d, r, mask);
+}
+
+static void mergemask_uq(uint64_t *d, uint64_t r, uint16_t mask)
+{
+ uint64_t bmask = expand_pred_b(mask);
+ *d = (*d & ~bmask) | (r & bmask);
+}
+
+static void mergemask_sq(int64_t *d, int64_t r, uint16_t mask)
+{
+ mergemask_uq((uint64_t *)d, r, mask);
+}
+
+#define mergemask(D, R, M) \
+ _Generic(D, \
+ uint8_t *: mergemask_ub, \
+ int8_t *: mergemask_sb, \
+ uint16_t *: mergemask_uh, \
+ int16_t *: mergemask_sh, \
+ uint32_t *: mergemask_uw, \
+ int32_t *: mergemask_sw, \
+ uint64_t *: mergemask_uq, \
+ int64_t *: mergemask_sq)(D, R, M)
+
+void HELPER(mve_vdup)(CPUARMState *env, void *vd, uint32_t val)
+{
+ /*
+ * The generated code already replicated an 8 or 16 bit constant
+ * into the 32-bit value, so we only need to write the 32-bit
+ * value to all elements of the Qreg, allowing for predication.
+ */
+ uint32_t *d = vd;
+ uint16_t mask = mve_element_mask(env);
+ unsigned e;
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+ mergemask(&d[H4(e)], val, mask);
+ }
+ mve_advance_vpt(env);
+}
+
+#define DO_1OP(OP, ESIZE, TYPE, FN) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
+ { \
+ TYPE *d = vd, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], FN(m[H##ESIZE(e)]), mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_CLS_B(N) (clrsb32(N) - 24)
+#define DO_CLS_H(N) (clrsb32(N) - 16)
+
+DO_1OP(vclsb, 1, int8_t, DO_CLS_B)
+DO_1OP(vclsh, 2, int16_t, DO_CLS_H)
+DO_1OP(vclsw, 4, int32_t, clrsb32)
+
+#define DO_CLZ_B(N) (clz32(N) - 24)
+#define DO_CLZ_H(N) (clz32(N) - 16)
+
+DO_1OP(vclzb, 1, uint8_t, DO_CLZ_B)
+DO_1OP(vclzh, 2, uint16_t, DO_CLZ_H)
+DO_1OP(vclzw, 4, uint32_t, clz32)
+
+DO_1OP(vrev16b, 2, uint16_t, bswap16)
+DO_1OP(vrev32b, 4, uint32_t, bswap32)
+DO_1OP(vrev32h, 4, uint32_t, hswap32)
+DO_1OP(vrev64b, 8, uint64_t, bswap64)
+DO_1OP(vrev64h, 8, uint64_t, hswap64)
+DO_1OP(vrev64w, 8, uint64_t, wswap64)
+
+#define DO_NOT(N) (~(N))
+
+DO_1OP(vmvn, 8, uint64_t, DO_NOT)
+
+#define DO_ABS(N) ((N) < 0 ? -(N) : (N))
+#define DO_FABSH(N) ((N) & dup_const(MO_16, 0x7fff))
+#define DO_FABSS(N) ((N) & dup_const(MO_32, 0x7fffffff))
+
+DO_1OP(vabsb, 1, int8_t, DO_ABS)
+DO_1OP(vabsh, 2, int16_t, DO_ABS)
+DO_1OP(vabsw, 4, int32_t, DO_ABS)
+
+/* We can do these 64 bits at a time */
+DO_1OP(vfabsh, 8, uint64_t, DO_FABSH)
+DO_1OP(vfabss, 8, uint64_t, DO_FABSS)
+
+#define DO_NEG(N) (-(N))
+#define DO_FNEGH(N) ((N) ^ dup_const(MO_16, 0x8000))
+#define DO_FNEGS(N) ((N) ^ dup_const(MO_32, 0x80000000))
+
+DO_1OP(vnegb, 1, int8_t, DO_NEG)
+DO_1OP(vnegh, 2, int16_t, DO_NEG)
+DO_1OP(vnegw, 4, int32_t, DO_NEG)
+
+/* We can do these 64 bits at a time */
+DO_1OP(vfnegh, 8, uint64_t, DO_FNEGH)
+DO_1OP(vfnegs, 8, uint64_t, DO_FNEGS)
+
+/*
+ * 1 operand immediates: Vda is destination and possibly also one source.
+ * All these insns work at 64-bit widths.
+ */
+#define DO_1OP_IMM(OP, FN) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vda, uint64_t imm) \
+ { \
+ uint64_t *da = vda; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / 8; e++, mask >>= 8) { \
+ mergemask(&da[H8(e)], FN(da[H8(e)], imm), mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_MOVI(N, I) (I)
+#define DO_ANDI(N, I) ((N) & (I))
+#define DO_ORRI(N, I) ((N) | (I))
+
+DO_1OP_IMM(vmovi, DO_MOVI)
+DO_1OP_IMM(vandi, DO_ANDI)
+DO_1OP_IMM(vorri, DO_ORRI)
+
+#define DO_2OP(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
+ void *vd, void *vn, void *vm) \
+ { \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], \
+ FN(n[H##ESIZE(e)], m[H##ESIZE(e)]), mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/* provide unsigned 2-op helpers for all sizes */
+#define DO_2OP_U(OP, FN) \
+ DO_2OP(OP##b, 1, uint8_t, FN) \
+ DO_2OP(OP##h, 2, uint16_t, FN) \
+ DO_2OP(OP##w, 4, uint32_t, FN)
+
+/* provide signed 2-op helpers for all sizes */
+#define DO_2OP_S(OP, FN) \
+ DO_2OP(OP##b, 1, int8_t, FN) \
+ DO_2OP(OP##h, 2, int16_t, FN) \
+ DO_2OP(OP##w, 4, int32_t, FN)
+
+/*
+ * "Long" operations where two half-sized inputs (taken from either the
+ * top or the bottom of the input vector) produce a double-width result.
+ * Here ESIZE, TYPE are for the input, and LESIZE, LTYPE for the output.
+ */
+#define DO_2OP_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
+ { \
+ LTYPE *d = vd; \
+ TYPE *n = vn, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned le; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ LTYPE r = FN((LTYPE)n[H##ESIZE(le * 2 + TOP)], \
+ m[H##ESIZE(le * 2 + TOP)]); \
+ mergemask(&d[H##LESIZE(le)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_2OP_SAT(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
+ { \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ bool qc = false; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ bool sat = false; \
+ TYPE r_ = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], &sat); \
+ mergemask(&d[H##ESIZE(e)], r_, mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/* provide unsigned 2-op helpers for all sizes */
+#define DO_2OP_SAT_U(OP, FN) \
+ DO_2OP_SAT(OP##b, 1, uint8_t, FN) \
+ DO_2OP_SAT(OP##h, 2, uint16_t, FN) \
+ DO_2OP_SAT(OP##w, 4, uint32_t, FN)
+
+/* provide signed 2-op helpers for all sizes */
+#define DO_2OP_SAT_S(OP, FN) \
+ DO_2OP_SAT(OP##b, 1, int8_t, FN) \
+ DO_2OP_SAT(OP##h, 2, int16_t, FN) \
+ DO_2OP_SAT(OP##w, 4, int32_t, FN)
+
+#define DO_AND(N, M) ((N) & (M))
+#define DO_BIC(N, M) ((N) & ~(M))
+#define DO_ORR(N, M) ((N) | (M))
+#define DO_ORN(N, M) ((N) | ~(M))
+#define DO_EOR(N, M) ((N) ^ (M))
+
+DO_2OP(vand, 8, uint64_t, DO_AND)
+DO_2OP(vbic, 8, uint64_t, DO_BIC)
+DO_2OP(vorr, 8, uint64_t, DO_ORR)
+DO_2OP(vorn, 8, uint64_t, DO_ORN)
+DO_2OP(veor, 8, uint64_t, DO_EOR)
+
+#define DO_ADD(N, M) ((N) + (M))
+#define DO_SUB(N, M) ((N) - (M))
+#define DO_MUL(N, M) ((N) * (M))
+
+DO_2OP_U(vadd, DO_ADD)
+DO_2OP_U(vsub, DO_SUB)
+DO_2OP_U(vmul, DO_MUL)
+
+DO_2OP_L(vmullbsb, 0, 1, int8_t, 2, int16_t, DO_MUL)
+DO_2OP_L(vmullbsh, 0, 2, int16_t, 4, int32_t, DO_MUL)
+DO_2OP_L(vmullbsw, 0, 4, int32_t, 8, int64_t, DO_MUL)
+DO_2OP_L(vmullbub, 0, 1, uint8_t, 2, uint16_t, DO_MUL)
+DO_2OP_L(vmullbuh, 0, 2, uint16_t, 4, uint32_t, DO_MUL)
+DO_2OP_L(vmullbuw, 0, 4, uint32_t, 8, uint64_t, DO_MUL)
+
+DO_2OP_L(vmulltsb, 1, 1, int8_t, 2, int16_t, DO_MUL)
+DO_2OP_L(vmulltsh, 1, 2, int16_t, 4, int32_t, DO_MUL)
+DO_2OP_L(vmulltsw, 1, 4, int32_t, 8, int64_t, DO_MUL)
+DO_2OP_L(vmulltub, 1, 1, uint8_t, 2, uint16_t, DO_MUL)
+DO_2OP_L(vmulltuh, 1, 2, uint16_t, 4, uint32_t, DO_MUL)
+DO_2OP_L(vmulltuw, 1, 4, uint32_t, 8, uint64_t, DO_MUL)
+
+/*
+ * Polynomial multiply. We can always do this generating 64 bits
+ * of the result at a time, so we don't need to use DO_2OP_L.
+ */
+DO_2OP(vmullpbh, 8, uint64_t, clmul_8x4_even)
+DO_2OP(vmullpth, 8, uint64_t, clmul_8x4_odd)
+DO_2OP(vmullpbw, 8, uint64_t, clmul_16x2_even)
+DO_2OP(vmullptw, 8, uint64_t, clmul_16x2_odd)
+
+/*
+ * Because the computation type is at least twice as large as required,
+ * these work for both signed and unsigned source types.
+ */
+static inline uint8_t do_mulh_b(int32_t n, int32_t m)
+{
+ return (n * m) >> 8;
+}
+
+static inline uint16_t do_mulh_h(int32_t n, int32_t m)
+{
+ return (n * m) >> 16;
+}
+
+static inline uint32_t do_mulh_w(int64_t n, int64_t m)
+{
+ return (n * m) >> 32;
+}
+
+static inline uint8_t do_rmulh_b(int32_t n, int32_t m)
+{
+ return (n * m + (1U << 7)) >> 8;
+}
+
+static inline uint16_t do_rmulh_h(int32_t n, int32_t m)
+{
+ return (n * m + (1U << 15)) >> 16;
+}
+
+static inline uint32_t do_rmulh_w(int64_t n, int64_t m)
+{
+ return (n * m + (1U << 31)) >> 32;
+}
+
+DO_2OP(vmulhsb, 1, int8_t, do_mulh_b)
+DO_2OP(vmulhsh, 2, int16_t, do_mulh_h)
+DO_2OP(vmulhsw, 4, int32_t, do_mulh_w)
+DO_2OP(vmulhub, 1, uint8_t, do_mulh_b)
+DO_2OP(vmulhuh, 2, uint16_t, do_mulh_h)
+DO_2OP(vmulhuw, 4, uint32_t, do_mulh_w)
+
+DO_2OP(vrmulhsb, 1, int8_t, do_rmulh_b)
+DO_2OP(vrmulhsh, 2, int16_t, do_rmulh_h)
+DO_2OP(vrmulhsw, 4, int32_t, do_rmulh_w)
+DO_2OP(vrmulhub, 1, uint8_t, do_rmulh_b)
+DO_2OP(vrmulhuh, 2, uint16_t, do_rmulh_h)
+DO_2OP(vrmulhuw, 4, uint32_t, do_rmulh_w)
+
+#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
+#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
+
+DO_2OP_S(vmaxs, DO_MAX)
+DO_2OP_U(vmaxu, DO_MAX)
+DO_2OP_S(vmins, DO_MIN)
+DO_2OP_U(vminu, DO_MIN)
+
+#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N))
+
+DO_2OP_S(vabds, DO_ABD)
+DO_2OP_U(vabdu, DO_ABD)
+
+static inline uint32_t do_vhadd_u(uint32_t n, uint32_t m)
+{
+ return ((uint64_t)n + m) >> 1;
+}
+
+static inline int32_t do_vhadd_s(int32_t n, int32_t m)
+{
+ return ((int64_t)n + m) >> 1;
+}
+
+static inline uint32_t do_vhsub_u(uint32_t n, uint32_t m)
+{
+ return ((uint64_t)n - m) >> 1;
+}
+
+static inline int32_t do_vhsub_s(int32_t n, int32_t m)
+{
+ return ((int64_t)n - m) >> 1;
+}
+
+DO_2OP_S(vhadds, do_vhadd_s)
+DO_2OP_U(vhaddu, do_vhadd_u)
+DO_2OP_S(vhsubs, do_vhsub_s)
+DO_2OP_U(vhsubu, do_vhsub_u)
+
+#define DO_VSHLS(N, M) do_sqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, false, NULL)
+#define DO_VSHLU(N, M) do_uqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, false, NULL)
+#define DO_VRSHLS(N, M) do_sqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, true, NULL)
+#define DO_VRSHLU(N, M) do_uqrshl_bhs(N, (int8_t)(M), sizeof(N) * 8, true, NULL)
+
+DO_2OP_S(vshls, DO_VSHLS)
+DO_2OP_U(vshlu, DO_VSHLU)
+DO_2OP_S(vrshls, DO_VRSHLS)
+DO_2OP_U(vrshlu, DO_VRSHLU)
+
+#define DO_RHADD_S(N, M) (((int64_t)(N) + (M) + 1) >> 1)
+#define DO_RHADD_U(N, M) (((uint64_t)(N) + (M) + 1) >> 1)
+
+DO_2OP_S(vrhadds, DO_RHADD_S)
+DO_2OP_U(vrhaddu, DO_RHADD_U)
+
+static void do_vadc(CPUARMState *env, uint32_t *d, uint32_t *n, uint32_t *m,
+ uint32_t inv, uint32_t carry_in, bool update_flags)
+{
+ uint16_t mask = mve_element_mask(env);
+ unsigned e;
+
+ /* If any additions trigger, we will update flags. */
+ if (mask & 0x1111) {
+ update_flags = true;
+ }
+
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+ uint64_t r = carry_in;
+ r += n[H4(e)];
+ r += m[H4(e)] ^ inv;
+ if (mask & 1) {
+ carry_in = r >> 32;
+ }
+ mergemask(&d[H4(e)], r, mask);
+ }
+
+ if (update_flags) {
+ /* Store C, clear NZV. */
+ env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK;
+ env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C;
+ }
+ mve_advance_vpt(env);
+}
+
+void HELPER(mve_vadc)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+ bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+ do_vadc(env, vd, vn, vm, 0, carry_in, false);
+}
+
+void HELPER(mve_vsbc)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+ bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
+ do_vadc(env, vd, vn, vm, -1, carry_in, false);
+}
+
+
+void HELPER(mve_vadci)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+ do_vadc(env, vd, vn, vm, 0, 0, true);
+}
+
+void HELPER(mve_vsbci)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+ do_vadc(env, vd, vn, vm, -1, 1, true);
+}
+
+#define DO_VCADD(OP, ESIZE, TYPE, FN0, FN1) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void *vm) \
+ { \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE r[16 / ESIZE]; \
+ /* Calculate all results first to avoid overwriting inputs */ \
+ for (e = 0; e < 16 / ESIZE; e++) { \
+ if (!(e & 1)) { \
+ r[e] = FN0(n[H##ESIZE(e)], m[H##ESIZE(e + 1)]); \
+ } else { \
+ r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)]); \
+ } \
+ } \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], r[e], mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VCADD_ALL(OP, FN0, FN1) \
+ DO_VCADD(OP##b, 1, int8_t, FN0, FN1) \
+ DO_VCADD(OP##h, 2, int16_t, FN0, FN1) \
+ DO_VCADD(OP##w, 4, int32_t, FN0, FN1)
+
+DO_VCADD_ALL(vcadd90, DO_SUB, DO_ADD)
+DO_VCADD_ALL(vcadd270, DO_ADD, DO_SUB)
+DO_VCADD_ALL(vhcadd90, do_vhsub_s, do_vhadd_s)
+DO_VCADD_ALL(vhcadd270, do_vhadd_s, do_vhsub_s)
+
+static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
+{
+ if (val > max) {
+ *s = true;
+ return max;
+ } else if (val < min) {
+ *s = true;
+ return min;
+ }
+ return val;
+}
+
+#define DO_SQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, INT8_MIN, INT8_MAX, s)
+#define DO_SQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, INT16_MIN, INT16_MAX, s)
+#define DO_SQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, INT32_MIN, INT32_MAX, s)
+
+#define DO_UQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT8_MAX, s)
+#define DO_UQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT16_MAX, s)
+#define DO_UQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT32_MAX, s)
+
+#define DO_SQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, INT8_MIN, INT8_MAX, s)
+#define DO_SQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, INT16_MIN, INT16_MAX, s)
+#define DO_SQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, INT32_MIN, INT32_MAX, s)
+
+#define DO_UQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT8_MAX, s)
+#define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
+#define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
+
+/*
+ * For QDMULH and QRDMULH we simplify "double and shift by esize" into
+ * "shift by esize-1", adjusting the QRDMULH rounding constant to match.
+ */
+#define DO_QDMULH_B(n, m, s) do_sat_bhw(((int64_t)n * m) >> 7, \
+ INT8_MIN, INT8_MAX, s)
+#define DO_QDMULH_H(n, m, s) do_sat_bhw(((int64_t)n * m) >> 15, \
+ INT16_MIN, INT16_MAX, s)
+#define DO_QDMULH_W(n, m, s) do_sat_bhw(((int64_t)n * m) >> 31, \
+ INT32_MIN, INT32_MAX, s)
+
+#define DO_QRDMULH_B(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 6)) >> 7, \
+ INT8_MIN, INT8_MAX, s)
+#define DO_QRDMULH_H(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 14)) >> 15, \
+ INT16_MIN, INT16_MAX, s)
+#define DO_QRDMULH_W(n, m, s) do_sat_bhw(((int64_t)n * m + (1 << 30)) >> 31, \
+ INT32_MIN, INT32_MAX, s)
+
+DO_2OP_SAT(vqdmulhb, 1, int8_t, DO_QDMULH_B)
+DO_2OP_SAT(vqdmulhh, 2, int16_t, DO_QDMULH_H)
+DO_2OP_SAT(vqdmulhw, 4, int32_t, DO_QDMULH_W)
+
+DO_2OP_SAT(vqrdmulhb, 1, int8_t, DO_QRDMULH_B)
+DO_2OP_SAT(vqrdmulhh, 2, int16_t, DO_QRDMULH_H)
+DO_2OP_SAT(vqrdmulhw, 4, int32_t, DO_QRDMULH_W)
+
+DO_2OP_SAT(vqaddub, 1, uint8_t, DO_UQADD_B)
+DO_2OP_SAT(vqadduh, 2, uint16_t, DO_UQADD_H)
+DO_2OP_SAT(vqadduw, 4, uint32_t, DO_UQADD_W)
+DO_2OP_SAT(vqaddsb, 1, int8_t, DO_SQADD_B)
+DO_2OP_SAT(vqaddsh, 2, int16_t, DO_SQADD_H)
+DO_2OP_SAT(vqaddsw, 4, int32_t, DO_SQADD_W)
+
+DO_2OP_SAT(vqsubub, 1, uint8_t, DO_UQSUB_B)
+DO_2OP_SAT(vqsubuh, 2, uint16_t, DO_UQSUB_H)
+DO_2OP_SAT(vqsubuw, 4, uint32_t, DO_UQSUB_W)
+DO_2OP_SAT(vqsubsb, 1, int8_t, DO_SQSUB_B)
+DO_2OP_SAT(vqsubsh, 2, int16_t, DO_SQSUB_H)
+DO_2OP_SAT(vqsubsw, 4, int32_t, DO_SQSUB_W)
+
+/*
+ * This wrapper fixes up the impedance mismatch between do_sqrshl_bhs()
+ * and friends wanting a uint32_t* sat and our needing a bool*.
+ */
+#define WRAP_QRSHL_HELPER(FN, N, M, ROUND, satp) \
+ ({ \
+ uint32_t su32 = 0; \
+ typeof(N) qrshl_ret = FN(N, (int8_t)(M), sizeof(N) * 8, ROUND, &su32); \
+ if (su32) { \
+ *satp = true; \
+ } \
+ qrshl_ret; \
+ })
+
+#define DO_SQSHL_OP(N, M, satp) \
+ WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, false, satp)
+#define DO_UQSHL_OP(N, M, satp) \
+ WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, false, satp)
+#define DO_SQRSHL_OP(N, M, satp) \
+ WRAP_QRSHL_HELPER(do_sqrshl_bhs, N, M, true, satp)
+#define DO_UQRSHL_OP(N, M, satp) \
+ WRAP_QRSHL_HELPER(do_uqrshl_bhs, N, M, true, satp)
+#define DO_SUQSHL_OP(N, M, satp) \
+ WRAP_QRSHL_HELPER(do_suqrshl_bhs, N, M, false, satp)
+
+DO_2OP_SAT_S(vqshls, DO_SQSHL_OP)
+DO_2OP_SAT_U(vqshlu, DO_UQSHL_OP)
+DO_2OP_SAT_S(vqrshls, DO_SQRSHL_OP)
+DO_2OP_SAT_U(vqrshlu, DO_UQRSHL_OP)
+
+/*
+ * Multiply add dual returning high half
+ * The 'FN' here takes four inputs A, B, C, D, a 0/1 indicator of
+ * whether to add the rounding constant, and the pointer to the
+ * saturation flag, and should do "(A * B + C * D) * 2 + rounding constant",
+ * saturate to twice the input size and return the high half; or
+ * (A * B - C * D) etc for VQDMLSDH.
+ */
+#define DO_VQDMLADH_OP(OP, ESIZE, TYPE, XCHG, ROUND, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ void *vm) \
+ { \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ bool qc = false; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ bool sat = false; \
+ if ((e & 1) == XCHG) { \
+ TYPE vqdmladh_ret = FN(n[H##ESIZE(e)], \
+ m[H##ESIZE(e - XCHG)], \
+ n[H##ESIZE(e + (1 - 2 * XCHG))], \
+ m[H##ESIZE(e + (1 - XCHG))], \
+ ROUND, &sat); \
+ mergemask(&d[H##ESIZE(e)], vqdmladh_ret, mask); \
+ qc |= sat & mask & 1; \
+ } \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+static int8_t do_vqdmladh_b(int8_t a, int8_t b, int8_t c, int8_t d,
+ int round, bool *sat)
+{
+ int64_t r = ((int64_t)a * b + (int64_t)c * d) * 2 + (round << 7);
+ return do_sat_bhw(r, INT16_MIN, INT16_MAX, sat) >> 8;
+}
+
+static int16_t do_vqdmladh_h(int16_t a, int16_t b, int16_t c, int16_t d,
+ int round, bool *sat)
+{
+ int64_t r = ((int64_t)a * b + (int64_t)c * d) * 2 + (round << 15);
+ return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat) >> 16;
+}
+
+static int32_t do_vqdmladh_w(int32_t a, int32_t b, int32_t c, int32_t d,
+ int round, bool *sat)
+{
+ int64_t m1 = (int64_t)a * b;
+ int64_t m2 = (int64_t)c * d;
+ int64_t r;
+ /*
+ * Architecturally we should do the entire add, double, round
+ * and then check for saturation. We do three saturating adds,
+ * but we need to be careful about the order. If the first
+ * m1 + m2 saturates then it's impossible for the *2+rc to
+ * bring it back into the non-saturated range. However, if
+ * m1 + m2 is negative then it's possible that doing the doubling
+ * would take the intermediate result below INT64_MAX and the
+ * addition of the rounding constant then brings it back in range.
+ * So we add half the rounding constant before doubling rather
+ * than adding the rounding constant after the doubling.
+ */
+ if (sadd64_overflow(m1, m2, &r) ||
+ sadd64_overflow(r, (round << 30), &r) ||
+ sadd64_overflow(r, r, &r)) {
+ *sat = true;
+ return r < 0 ? INT32_MAX : INT32_MIN;
+ }
+ return r >> 32;
+}
+
+static int8_t do_vqdmlsdh_b(int8_t a, int8_t b, int8_t c, int8_t d,
+ int round, bool *sat)
+{
+ int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 7);
+ return do_sat_bhw(r, INT16_MIN, INT16_MAX, sat) >> 8;
+}
+
+static int16_t do_vqdmlsdh_h(int16_t a, int16_t b, int16_t c, int16_t d,
+ int round, bool *sat)
+{
+ int64_t r = ((int64_t)a * b - (int64_t)c * d) * 2 + (round << 15);
+ return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat) >> 16;
+}
+
+static int32_t do_vqdmlsdh_w(int32_t a, int32_t b, int32_t c, int32_t d,
+ int round, bool *sat)
+{
+ int64_t m1 = (int64_t)a * b;
+ int64_t m2 = (int64_t)c * d;
+ int64_t r;
+ /* The same ordering issue as in do_vqdmladh_w applies here too */
+ if (ssub64_overflow(m1, m2, &r) ||
+ sadd64_overflow(r, (round << 30), &r) ||
+ sadd64_overflow(r, r, &r)) {
+ *sat = true;
+ return r < 0 ? INT32_MAX : INT32_MIN;
+ }
+ return r >> 32;
+}
+
+DO_VQDMLADH_OP(vqdmladhb, 1, int8_t, 0, 0, do_vqdmladh_b)
+DO_VQDMLADH_OP(vqdmladhh, 2, int16_t, 0, 0, do_vqdmladh_h)
+DO_VQDMLADH_OP(vqdmladhw, 4, int32_t, 0, 0, do_vqdmladh_w)
+DO_VQDMLADH_OP(vqdmladhxb, 1, int8_t, 1, 0, do_vqdmladh_b)
+DO_VQDMLADH_OP(vqdmladhxh, 2, int16_t, 1, 0, do_vqdmladh_h)
+DO_VQDMLADH_OP(vqdmladhxw, 4, int32_t, 1, 0, do_vqdmladh_w)
+
+DO_VQDMLADH_OP(vqrdmladhb, 1, int8_t, 0, 1, do_vqdmladh_b)
+DO_VQDMLADH_OP(vqrdmladhh, 2, int16_t, 0, 1, do_vqdmladh_h)
+DO_VQDMLADH_OP(vqrdmladhw, 4, int32_t, 0, 1, do_vqdmladh_w)
+DO_VQDMLADH_OP(vqrdmladhxb, 1, int8_t, 1, 1, do_vqdmladh_b)
+DO_VQDMLADH_OP(vqrdmladhxh, 2, int16_t, 1, 1, do_vqdmladh_h)
+DO_VQDMLADH_OP(vqrdmladhxw, 4, int32_t, 1, 1, do_vqdmladh_w)
+
+DO_VQDMLADH_OP(vqdmlsdhb, 1, int8_t, 0, 0, do_vqdmlsdh_b)
+DO_VQDMLADH_OP(vqdmlsdhh, 2, int16_t, 0, 0, do_vqdmlsdh_h)
+DO_VQDMLADH_OP(vqdmlsdhw, 4, int32_t, 0, 0, do_vqdmlsdh_w)
+DO_VQDMLADH_OP(vqdmlsdhxb, 1, int8_t, 1, 0, do_vqdmlsdh_b)
+DO_VQDMLADH_OP(vqdmlsdhxh, 2, int16_t, 1, 0, do_vqdmlsdh_h)
+DO_VQDMLADH_OP(vqdmlsdhxw, 4, int32_t, 1, 0, do_vqdmlsdh_w)
+
+DO_VQDMLADH_OP(vqrdmlsdhb, 1, int8_t, 0, 1, do_vqdmlsdh_b)
+DO_VQDMLADH_OP(vqrdmlsdhh, 2, int16_t, 0, 1, do_vqdmlsdh_h)
+DO_VQDMLADH_OP(vqrdmlsdhw, 4, int32_t, 0, 1, do_vqdmlsdh_w)
+DO_VQDMLADH_OP(vqrdmlsdhxb, 1, int8_t, 1, 1, do_vqdmlsdh_b)
+DO_VQDMLADH_OP(vqrdmlsdhxh, 2, int16_t, 1, 1, do_vqdmlsdh_h)
+DO_VQDMLADH_OP(vqrdmlsdhxw, 4, int32_t, 1, 1, do_vqdmlsdh_w)
+
+#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m), mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_2OP_SAT_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ bool qc = false; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ bool sat = false; \
+ mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m, &sat), \
+ mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/* "accumulating" version where FN takes d as well as n and m */
+#define DO_2OP_ACC_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], \
+ FN(d[H##ESIZE(e)], n[H##ESIZE(e)], m), mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_2OP_SAT_ACC_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ bool qc = false; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ bool sat = false; \
+ mergemask(&d[H##ESIZE(e)], \
+ FN(d[H##ESIZE(e)], n[H##ESIZE(e)], m, &sat), \
+ mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/* provide unsigned 2-op scalar helpers for all sizes */
+#define DO_2OP_SCALAR_U(OP, FN) \
+ DO_2OP_SCALAR(OP##b, 1, uint8_t, FN) \
+ DO_2OP_SCALAR(OP##h, 2, uint16_t, FN) \
+ DO_2OP_SCALAR(OP##w, 4, uint32_t, FN)
+#define DO_2OP_SCALAR_S(OP, FN) \
+ DO_2OP_SCALAR(OP##b, 1, int8_t, FN) \
+ DO_2OP_SCALAR(OP##h, 2, int16_t, FN) \
+ DO_2OP_SCALAR(OP##w, 4, int32_t, FN)
+
+#define DO_2OP_ACC_SCALAR_U(OP, FN) \
+ DO_2OP_ACC_SCALAR(OP##b, 1, uint8_t, FN) \
+ DO_2OP_ACC_SCALAR(OP##h, 2, uint16_t, FN) \
+ DO_2OP_ACC_SCALAR(OP##w, 4, uint32_t, FN)
+
+DO_2OP_SCALAR_U(vadd_scalar, DO_ADD)
+DO_2OP_SCALAR_U(vsub_scalar, DO_SUB)
+DO_2OP_SCALAR_U(vmul_scalar, DO_MUL)
+DO_2OP_SCALAR_S(vhadds_scalar, do_vhadd_s)
+DO_2OP_SCALAR_U(vhaddu_scalar, do_vhadd_u)
+DO_2OP_SCALAR_S(vhsubs_scalar, do_vhsub_s)
+DO_2OP_SCALAR_U(vhsubu_scalar, do_vhsub_u)
+
+DO_2OP_SAT_SCALAR(vqaddu_scalarb, 1, uint8_t, DO_UQADD_B)
+DO_2OP_SAT_SCALAR(vqaddu_scalarh, 2, uint16_t, DO_UQADD_H)
+DO_2OP_SAT_SCALAR(vqaddu_scalarw, 4, uint32_t, DO_UQADD_W)
+DO_2OP_SAT_SCALAR(vqadds_scalarb, 1, int8_t, DO_SQADD_B)
+DO_2OP_SAT_SCALAR(vqadds_scalarh, 2, int16_t, DO_SQADD_H)
+DO_2OP_SAT_SCALAR(vqadds_scalarw, 4, int32_t, DO_SQADD_W)
+
+DO_2OP_SAT_SCALAR(vqsubu_scalarb, 1, uint8_t, DO_UQSUB_B)
+DO_2OP_SAT_SCALAR(vqsubu_scalarh, 2, uint16_t, DO_UQSUB_H)
+DO_2OP_SAT_SCALAR(vqsubu_scalarw, 4, uint32_t, DO_UQSUB_W)
+DO_2OP_SAT_SCALAR(vqsubs_scalarb, 1, int8_t, DO_SQSUB_B)
+DO_2OP_SAT_SCALAR(vqsubs_scalarh, 2, int16_t, DO_SQSUB_H)
+DO_2OP_SAT_SCALAR(vqsubs_scalarw, 4, int32_t, DO_SQSUB_W)
+
+DO_2OP_SAT_SCALAR(vqdmulh_scalarb, 1, int8_t, DO_QDMULH_B)
+DO_2OP_SAT_SCALAR(vqdmulh_scalarh, 2, int16_t, DO_QDMULH_H)
+DO_2OP_SAT_SCALAR(vqdmulh_scalarw, 4, int32_t, DO_QDMULH_W)
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarb, 1, int8_t, DO_QRDMULH_B)
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarh, 2, int16_t, DO_QRDMULH_H)
+DO_2OP_SAT_SCALAR(vqrdmulh_scalarw, 4, int32_t, DO_QRDMULH_W)
+
+static int8_t do_vqdmlah_b(int8_t a, int8_t b, int8_t c, int round, bool *sat)
+{
+ int64_t r = (int64_t)a * b * 2 + ((int64_t)c << 8) + (round << 7);
+ return do_sat_bhw(r, INT16_MIN, INT16_MAX, sat) >> 8;
+}
+
+static int16_t do_vqdmlah_h(int16_t a, int16_t b, int16_t c,
+ int round, bool *sat)
+{
+ int64_t r = (int64_t)a * b * 2 + ((int64_t)c << 16) + (round << 15);
+ return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat) >> 16;
+}
+
+static int32_t do_vqdmlah_w(int32_t a, int32_t b, int32_t c,
+ int round, bool *sat)
+{
+ /*
+ * Architecturally we should do the entire add, double, round
+ * and then check for saturation. We do three saturating adds,
+ * but we need to be careful about the order. If the first
+ * m1 + m2 saturates then it's impossible for the *2+rc to
+ * bring it back into the non-saturated range. However, if
+ * m1 + m2 is negative then it's possible that doing the doubling
+ * would take the intermediate result below INT64_MAX and the
+ * addition of the rounding constant then brings it back in range.
+ * So we add half the rounding constant and half the "c << esize"
+ * before doubling rather than adding the rounding constant after
+ * the doubling.
+ */
+ int64_t m1 = (int64_t)a * b;
+ int64_t m2 = (int64_t)c << 31;
+ int64_t r;
+ if (sadd64_overflow(m1, m2, &r) ||
+ sadd64_overflow(r, (round << 30), &r) ||
+ sadd64_overflow(r, r, &r)) {
+ *sat = true;
+ return r < 0 ? INT32_MAX : INT32_MIN;
+ }
+ return r >> 32;
+}
+
+/*
+ * The *MLAH insns are vector * scalar + vector;
+ * the *MLASH insns are vector * vector + scalar
+ */
+#define DO_VQDMLAH_B(D, N, M, S) do_vqdmlah_b(N, M, D, 0, S)
+#define DO_VQDMLAH_H(D, N, M, S) do_vqdmlah_h(N, M, D, 0, S)
+#define DO_VQDMLAH_W(D, N, M, S) do_vqdmlah_w(N, M, D, 0, S)
+#define DO_VQRDMLAH_B(D, N, M, S) do_vqdmlah_b(N, M, D, 1, S)
+#define DO_VQRDMLAH_H(D, N, M, S) do_vqdmlah_h(N, M, D, 1, S)
+#define DO_VQRDMLAH_W(D, N, M, S) do_vqdmlah_w(N, M, D, 1, S)
+
+#define DO_VQDMLASH_B(D, N, M, S) do_vqdmlah_b(N, D, M, 0, S)
+#define DO_VQDMLASH_H(D, N, M, S) do_vqdmlah_h(N, D, M, 0, S)
+#define DO_VQDMLASH_W(D, N, M, S) do_vqdmlah_w(N, D, M, 0, S)
+#define DO_VQRDMLASH_B(D, N, M, S) do_vqdmlah_b(N, D, M, 1, S)
+#define DO_VQRDMLASH_H(D, N, M, S) do_vqdmlah_h(N, D, M, 1, S)
+#define DO_VQRDMLASH_W(D, N, M, S) do_vqdmlah_w(N, D, M, 1, S)
+
+DO_2OP_SAT_ACC_SCALAR(vqdmlahb, 1, int8_t, DO_VQDMLAH_B)
+DO_2OP_SAT_ACC_SCALAR(vqdmlahh, 2, int16_t, DO_VQDMLAH_H)
+DO_2OP_SAT_ACC_SCALAR(vqdmlahw, 4, int32_t, DO_VQDMLAH_W)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlahb, 1, int8_t, DO_VQRDMLAH_B)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlahh, 2, int16_t, DO_VQRDMLAH_H)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlahw, 4, int32_t, DO_VQRDMLAH_W)
+
+DO_2OP_SAT_ACC_SCALAR(vqdmlashb, 1, int8_t, DO_VQDMLASH_B)
+DO_2OP_SAT_ACC_SCALAR(vqdmlashh, 2, int16_t, DO_VQDMLASH_H)
+DO_2OP_SAT_ACC_SCALAR(vqdmlashw, 4, int32_t, DO_VQDMLASH_W)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlashb, 1, int8_t, DO_VQRDMLASH_B)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlashh, 2, int16_t, DO_VQRDMLASH_H)
+DO_2OP_SAT_ACC_SCALAR(vqrdmlashw, 4, int32_t, DO_VQRDMLASH_W)
+
+/* Vector by scalar plus vector */
+#define DO_VMLA(D, N, M) ((N) * (M) + (D))
+
+DO_2OP_ACC_SCALAR_U(vmla, DO_VMLA)
+
+/* Vector by vector plus scalar */
+#define DO_VMLAS(D, N, M) ((N) * (D) + (M))
+
+DO_2OP_ACC_SCALAR_U(vmlas, DO_VMLAS)
+
+/*
+ * Long saturating scalar ops. As with DO_2OP_L, TYPE and H are for the
+ * input (smaller) type and LESIZE, LTYPE, LH for the output (long) type.
+ * SATMASK specifies which bits of the predicate mask matter for determining
+ * whether to propagate a saturation indication into FPSCR.QC -- for
+ * the 16x16->32 case we must check only the bit corresponding to the T or B
+ * half that we used, but for the 32x32->64 case we propagate if the mask
+ * bit is set for either half.
+ */
+#define DO_2OP_SAT_SCALAR_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN, SATMASK) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ uint32_t rm) \
+ { \
+ LTYPE *d = vd; \
+ TYPE *n = vn; \
+ TYPE m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned le; \
+ bool qc = false; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ bool sat = false; \
+ LTYPE r = FN((LTYPE)n[H##ESIZE(le * 2 + TOP)], m, &sat); \
+ mergemask(&d[H##LESIZE(le)], r, mask); \
+ qc |= sat && (mask & SATMASK); \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+static inline int32_t do_qdmullh(int16_t n, int16_t m, bool *sat)
+{
+ int64_t r = ((int64_t)n * m) * 2;
+ return do_sat_bhw(r, INT32_MIN, INT32_MAX, sat);
+}
+
+static inline int64_t do_qdmullw(int32_t n, int32_t m, bool *sat)
+{
+ /* The multiply can't overflow, but the doubling might */
+ int64_t r = (int64_t)n * m;
+ if (r > INT64_MAX / 2) {
+ *sat = true;
+ return INT64_MAX;
+ } else if (r < INT64_MIN / 2) {
+ *sat = true;
+ return INT64_MIN;
+ } else {
+ return r * 2;
+ }
+}
+
+#define SATMASK16B 1
+#define SATMASK16T (1 << 2)
+#define SATMASK32 ((1 << 4) | 1)
+
+DO_2OP_SAT_SCALAR_L(vqdmullb_scalarh, 0, 2, int16_t, 4, int32_t, \
+ do_qdmullh, SATMASK16B)
+DO_2OP_SAT_SCALAR_L(vqdmullb_scalarw, 0, 4, int32_t, 8, int64_t, \
+ do_qdmullw, SATMASK32)
+DO_2OP_SAT_SCALAR_L(vqdmullt_scalarh, 1, 2, int16_t, 4, int32_t, \
+ do_qdmullh, SATMASK16T)
+DO_2OP_SAT_SCALAR_L(vqdmullt_scalarw, 1, 4, int32_t, 8, int64_t, \
+ do_qdmullw, SATMASK32)
+
+/*
+ * Long saturating ops
+ */
+#define DO_2OP_SAT_L(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN, SATMASK) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ void *vm) \
+ { \
+ LTYPE *d = vd; \
+ TYPE *n = vn, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned le; \
+ bool qc = false; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ bool sat = false; \
+ LTYPE op1 = n[H##ESIZE(le * 2 + TOP)]; \
+ LTYPE op2 = m[H##ESIZE(le * 2 + TOP)]; \
+ mergemask(&d[H##LESIZE(le)], FN(op1, op2, &sat), mask); \
+ qc |= sat && (mask & SATMASK); \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_2OP_SAT_L(vqdmullbh, 0, 2, int16_t, 4, int32_t, do_qdmullh, SATMASK16B)
+DO_2OP_SAT_L(vqdmullbw, 0, 4, int32_t, 8, int64_t, do_qdmullw, SATMASK32)
+DO_2OP_SAT_L(vqdmullth, 1, 2, int16_t, 4, int32_t, do_qdmullh, SATMASK16T)
+DO_2OP_SAT_L(vqdmulltw, 1, 4, int32_t, 8, int64_t, do_qdmullw, SATMASK32)
+
+static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
+{
+ m &= 0xff;
+ if (m == 0) {
+ return 0;
+ }
+ n = revbit8(n);
+ if (m < 8) {
+ n >>= 8 - m;
+ }
+ return n;
+}
+
+static inline uint32_t do_vbrsrh(uint32_t n, uint32_t m)
+{
+ m &= 0xff;
+ if (m == 0) {
+ return 0;
+ }
+ n = revbit16(n);
+ if (m < 16) {
+ n >>= 16 - m;
+ }
+ return n;
+}
+
+static inline uint32_t do_vbrsrw(uint32_t n, uint32_t m)
+{
+ m &= 0xff;
+ if (m == 0) {
+ return 0;
+ }
+ n = revbit32(n);
+ if (m < 32) {
+ n >>= 32 - m;
+ }
+ return n;
+}
+
+DO_2OP_SCALAR(vbrsrb, 1, uint8_t, do_vbrsrb)
+DO_2OP_SCALAR(vbrsrh, 2, uint16_t, do_vbrsrh)
+DO_2OP_SCALAR(vbrsrw, 4, uint32_t, do_vbrsrw)
+
+/*
+ * Multiply add long dual accumulate ops.
+ */
+#define DO_LDAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC) \
+ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
+ void *vm, uint64_t a) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *n = vn, *m = vm; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if (mask & 1) { \
+ if (e & 1) { \
+ a ODDACC \
+ (int64_t)n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
+ } else { \
+ a EVENACC \
+ (int64_t)n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \
+ } \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return a; \
+ }
+
+DO_LDAV(vmlaldavsh, 2, int16_t, false, +=, +=)
+DO_LDAV(vmlaldavxsh, 2, int16_t, true, +=, +=)
+DO_LDAV(vmlaldavsw, 4, int32_t, false, +=, +=)
+DO_LDAV(vmlaldavxsw, 4, int32_t, true, +=, +=)
+
+DO_LDAV(vmlaldavuh, 2, uint16_t, false, +=, +=)
+DO_LDAV(vmlaldavuw, 4, uint32_t, false, +=, +=)
+
+DO_LDAV(vmlsldavsh, 2, int16_t, false, +=, -=)
+DO_LDAV(vmlsldavxsh, 2, int16_t, true, +=, -=)
+DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
+DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
+
+/*
+ * Multiply add dual accumulate ops
+ */
+#define DO_DAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC) \
+ uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
+ void *vm, uint32_t a) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *n = vn, *m = vm; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if (mask & 1) { \
+ if (e & 1) { \
+ a ODDACC \
+ n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
+ } else { \
+ a EVENACC \
+ n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \
+ } \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return a; \
+ }
+
+#define DO_DAV_S(INSN, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##b, 1, int8_t, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##h, 2, int16_t, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##w, 4, int32_t, XCHG, EVENACC, ODDACC)
+
+#define DO_DAV_U(INSN, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##b, 1, uint8_t, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##h, 2, uint16_t, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##w, 4, uint32_t, XCHG, EVENACC, ODDACC)
+
+DO_DAV_S(vmladavs, false, +=, +=)
+DO_DAV_U(vmladavu, false, +=, +=)
+DO_DAV_S(vmlsdav, false, +=, -=)
+DO_DAV_S(vmladavsx, true, +=, +=)
+DO_DAV_S(vmlsdavx, true, +=, -=)
+
+/*
+ * Rounding multiply add long dual accumulate high. In the pseudocode
+ * this is implemented with a 72-bit internal accumulator value of which
+ * the top 64 bits are returned. We optimize this to avoid having to
+ * use 128-bit arithmetic -- we can do this because the 74-bit accumulator
+ * is squashed back into 64-bits after each beat.
+ */
+#define DO_LDAVH(OP, TYPE, LTYPE, XCHG, SUB) \
+ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
+ void *vm, uint64_t a) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *n = vn, *m = vm; \
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
+ if (mask & 1) { \
+ LTYPE mul; \
+ if (e & 1) { \
+ mul = (LTYPE)n[H4(e - 1 * XCHG)] * m[H4(e)]; \
+ if (SUB) { \
+ mul = -mul; \
+ } \
+ } else { \
+ mul = (LTYPE)n[H4(e + 1 * XCHG)] * m[H4(e)]; \
+ } \
+ mul = (mul >> 8) + ((mul >> 7) & 1); \
+ a += mul; \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return a; \
+ }
+
+DO_LDAVH(vrmlaldavhsw, int32_t, int64_t, false, false)
+DO_LDAVH(vrmlaldavhxsw, int32_t, int64_t, true, false)
+
+DO_LDAVH(vrmlaldavhuw, uint32_t, uint64_t, false, false)
+
+DO_LDAVH(vrmlsldavhsw, int32_t, int64_t, false, true)
+DO_LDAVH(vrmlsldavhxsw, int32_t, int64_t, true, true)
+
+/* Vector add across vector */
+#define DO_VADDV(OP, ESIZE, TYPE) \
+ uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+ uint32_t ra) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *m = vm; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if (mask & 1) { \
+ ra += m[H##ESIZE(e)]; \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return ra; \
+ } \
+
+DO_VADDV(vaddvsb, 1, int8_t)
+DO_VADDV(vaddvsh, 2, int16_t)
+DO_VADDV(vaddvsw, 4, int32_t)
+DO_VADDV(vaddvub, 1, uint8_t)
+DO_VADDV(vaddvuh, 2, uint16_t)
+DO_VADDV(vaddvuw, 4, uint32_t)
+
+/*
+ * Vector max/min across vector. Unlike VADDV, we must
+ * read ra as the element size, not its full width.
+ * We work with int64_t internally for simplicity.
+ */
+#define DO_VMAXMINV(OP, ESIZE, TYPE, RATYPE, FN) \
+ uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+ uint32_t ra_in) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *m = vm; \
+ int64_t ra = (RATYPE)ra_in; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if (mask & 1) { \
+ ra = FN(ra, m[H##ESIZE(e)]); \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return ra; \
+ } \
+
+#define DO_VMAXMINV_U(INSN, FN) \
+ DO_VMAXMINV(INSN##b, 1, uint8_t, uint8_t, FN) \
+ DO_VMAXMINV(INSN##h, 2, uint16_t, uint16_t, FN) \
+ DO_VMAXMINV(INSN##w, 4, uint32_t, uint32_t, FN)
+#define DO_VMAXMINV_S(INSN, FN) \
+ DO_VMAXMINV(INSN##b, 1, int8_t, int8_t, FN) \
+ DO_VMAXMINV(INSN##h, 2, int16_t, int16_t, FN) \
+ DO_VMAXMINV(INSN##w, 4, int32_t, int32_t, FN)
+
+/*
+ * Helpers for max and min of absolute values across vector:
+ * note that we only take the absolute value of 'm', not 'n'
+ */
+static int64_t do_maxa(int64_t n, int64_t m)
+{
+ if (m < 0) {
+ m = -m;
+ }
+ return MAX(n, m);
+}
+
+static int64_t do_mina(int64_t n, int64_t m)
+{
+ if (m < 0) {
+ m = -m;
+ }
+ return MIN(n, m);
+}
+
+DO_VMAXMINV_S(vmaxvs, DO_MAX)
+DO_VMAXMINV_U(vmaxvu, DO_MAX)
+DO_VMAXMINV_S(vminvs, DO_MIN)
+DO_VMAXMINV_U(vminvu, DO_MIN)
+/*
+ * VMAXAV, VMINAV treat the general purpose input as unsigned
+ * and the vector elements as signed.
+ */
+DO_VMAXMINV(vmaxavb, 1, int8_t, uint8_t, do_maxa)
+DO_VMAXMINV(vmaxavh, 2, int16_t, uint16_t, do_maxa)
+DO_VMAXMINV(vmaxavw, 4, int32_t, uint32_t, do_maxa)
+DO_VMAXMINV(vminavb, 1, int8_t, uint8_t, do_mina)
+DO_VMAXMINV(vminavh, 2, int16_t, uint16_t, do_mina)
+DO_VMAXMINV(vminavw, 4, int32_t, uint32_t, do_mina)
+
+#define DO_VABAV(OP, ESIZE, TYPE) \
+ uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
+ void *vm, uint32_t ra) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *m = vm, *n = vn; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if (mask & 1) { \
+ int64_t n0 = n[H##ESIZE(e)]; \
+ int64_t m0 = m[H##ESIZE(e)]; \
+ uint32_t r = n0 >= m0 ? (n0 - m0) : (m0 - n0); \
+ ra += r; \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return ra; \
+ }
+
+DO_VABAV(vabavsb, 1, int8_t)
+DO_VABAV(vabavsh, 2, int16_t)
+DO_VABAV(vabavsw, 4, int32_t)
+DO_VABAV(vabavub, 1, uint8_t)
+DO_VABAV(vabavuh, 2, uint16_t)
+DO_VABAV(vabavuw, 4, uint32_t)
+
+#define DO_VADDLV(OP, TYPE, LTYPE) \
+ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+ uint64_t ra) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *m = vm; \
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
+ if (mask & 1) { \
+ ra += (LTYPE)m[H4(e)]; \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return ra; \
+ } \
+
+DO_VADDLV(vaddlv_s, int32_t, int64_t)
+DO_VADDLV(vaddlv_u, uint32_t, uint64_t)
+
+/* Shifts by immediate */
+#define DO_2SHIFT(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
+ void *vm, uint32_t shift) \
+ { \
+ TYPE *d = vd, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], \
+ FN(m[H##ESIZE(e)], shift), mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_2SHIFT_SAT(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
+ void *vm, uint32_t shift) \
+ { \
+ TYPE *d = vd, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ bool qc = false; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ bool sat = false; \
+ mergemask(&d[H##ESIZE(e)], \
+ FN(m[H##ESIZE(e)], shift, &sat), mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/* provide unsigned 2-op shift helpers for all sizes */
+#define DO_2SHIFT_U(OP, FN) \
+ DO_2SHIFT(OP##b, 1, uint8_t, FN) \
+ DO_2SHIFT(OP##h, 2, uint16_t, FN) \
+ DO_2SHIFT(OP##w, 4, uint32_t, FN)
+#define DO_2SHIFT_S(OP, FN) \
+ DO_2SHIFT(OP##b, 1, int8_t, FN) \
+ DO_2SHIFT(OP##h, 2, int16_t, FN) \
+ DO_2SHIFT(OP##w, 4, int32_t, FN)
+
+#define DO_2SHIFT_SAT_U(OP, FN) \
+ DO_2SHIFT_SAT(OP##b, 1, uint8_t, FN) \
+ DO_2SHIFT_SAT(OP##h, 2, uint16_t, FN) \
+ DO_2SHIFT_SAT(OP##w, 4, uint32_t, FN)
+#define DO_2SHIFT_SAT_S(OP, FN) \
+ DO_2SHIFT_SAT(OP##b, 1, int8_t, FN) \
+ DO_2SHIFT_SAT(OP##h, 2, int16_t, FN) \
+ DO_2SHIFT_SAT(OP##w, 4, int32_t, FN)
+
+DO_2SHIFT_U(vshli_u, DO_VSHLU)
+DO_2SHIFT_S(vshli_s, DO_VSHLS)
+DO_2SHIFT_SAT_U(vqshli_u, DO_UQSHL_OP)
+DO_2SHIFT_SAT_S(vqshli_s, DO_SQSHL_OP)
+DO_2SHIFT_SAT_S(vqshlui_s, DO_SUQSHL_OP)
+DO_2SHIFT_U(vrshli_u, DO_VRSHLU)
+DO_2SHIFT_S(vrshli_s, DO_VRSHLS)
+DO_2SHIFT_SAT_U(vqrshli_u, DO_UQRSHL_OP)
+DO_2SHIFT_SAT_S(vqrshli_s, DO_SQRSHL_OP)
+
+/* Shift-and-insert; we always work with 64 bits at a time */
+#define DO_2SHIFT_INSERT(OP, ESIZE, SHIFTFN, MASKFN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
+ void *vm, uint32_t shift) \
+ { \
+ uint64_t *d = vd, *m = vm; \
+ uint16_t mask; \
+ uint64_t shiftmask; \
+ unsigned e; \
+ if (shift == ESIZE * 8) { \
+ /* \
+ * Only VSRI can shift by <dt>; it should mean "don't \
+ * update the destination". The generic logic can't handle \
+ * this because it would try to shift by an out-of-range \
+ * amount, so special case it here. \
+ */ \
+ goto done; \
+ } \
+ assert(shift < ESIZE * 8); \
+ mask = mve_element_mask(env); \
+ /* ESIZE / 2 gives the MO_* value if ESIZE is in [1,2,4] */ \
+ shiftmask = dup_const(ESIZE / 2, MASKFN(ESIZE * 8, shift)); \
+ for (e = 0; e < 16 / 8; e++, mask >>= 8) { \
+ uint64_t r = (SHIFTFN(m[H8(e)], shift) & shiftmask) | \
+ (d[H8(e)] & ~shiftmask); \
+ mergemask(&d[H8(e)], r, mask); \
+ } \
+done: \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_SHL(N, SHIFT) ((N) << (SHIFT))
+#define DO_SHR(N, SHIFT) ((N) >> (SHIFT))
+#define SHL_MASK(EBITS, SHIFT) MAKE_64BIT_MASK((SHIFT), (EBITS) - (SHIFT))
+#define SHR_MASK(EBITS, SHIFT) MAKE_64BIT_MASK(0, (EBITS) - (SHIFT))
+
+DO_2SHIFT_INSERT(vsrib, 1, DO_SHR, SHR_MASK)
+DO_2SHIFT_INSERT(vsrih, 2, DO_SHR, SHR_MASK)
+DO_2SHIFT_INSERT(vsriw, 4, DO_SHR, SHR_MASK)
+DO_2SHIFT_INSERT(vslib, 1, DO_SHL, SHL_MASK)
+DO_2SHIFT_INSERT(vslih, 2, DO_SHL, SHL_MASK)
+DO_2SHIFT_INSERT(vsliw, 4, DO_SHL, SHL_MASK)
+
+/*
+ * Long shifts taking half-sized inputs from top or bottom of the input
+ * vector and producing a double-width result. ESIZE, TYPE are for
+ * the input, and LESIZE, LTYPE for the output.
+ * Unlike the normal shift helpers, we do not handle negative shift counts,
+ * because the long shift is strictly left-only.
+ */
+#define DO_VSHLL(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
+ void *vm, uint32_t shift) \
+ { \
+ LTYPE *d = vd; \
+ TYPE *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned le; \
+ assert(shift <= 16); \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ LTYPE r = (LTYPE)m[H##ESIZE(le * 2 + TOP)] << shift; \
+ mergemask(&d[H##LESIZE(le)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VSHLL_ALL(OP, TOP) \
+ DO_VSHLL(OP##sb, TOP, 1, int8_t, 2, int16_t) \
+ DO_VSHLL(OP##ub, TOP, 1, uint8_t, 2, uint16_t) \
+ DO_VSHLL(OP##sh, TOP, 2, int16_t, 4, int32_t) \
+ DO_VSHLL(OP##uh, TOP, 2, uint16_t, 4, uint32_t) \
+
+DO_VSHLL_ALL(vshllb, false)
+DO_VSHLL_ALL(vshllt, true)
+
+/*
+ * Narrowing right shifts, taking a double sized input, shifting it
+ * and putting the result in either the top or bottom half of the output.
+ * ESIZE, TYPE are the output, and LESIZE, LTYPE the input.
+ */
+#define DO_VSHRN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
+ void *vm, uint32_t shift) \
+ { \
+ LTYPE *m = vm; \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned le; \
+ mask >>= ESIZE * TOP; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ TYPE r = FN(m[H##LESIZE(le)], shift); \
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VSHRN_ALL(OP, FN) \
+ DO_VSHRN(OP##bb, false, 1, uint8_t, 2, uint16_t, FN) \
+ DO_VSHRN(OP##bh, false, 2, uint16_t, 4, uint32_t, FN) \
+ DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN) \
+ DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN)
+
+static inline uint64_t do_urshr(uint64_t x, unsigned sh)
+{
+ if (likely(sh < 64)) {
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
+ } else if (sh == 64) {
+ return x >> 63;
+ } else {
+ return 0;
+ }
+}
+
+static inline int64_t do_srshr(int64_t x, unsigned sh)
+{
+ if (likely(sh < 64)) {
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
+ } else {
+ /* Rounding the sign bit always produces 0. */
+ return 0;
+ }
+}
+
+DO_VSHRN_ALL(vshrn, DO_SHR)
+DO_VSHRN_ALL(vrshrn, do_urshr)
+
+static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max,
+ bool *satp)
+{
+ if (val > max) {
+ *satp = true;
+ return max;
+ } else if (val < min) {
+ *satp = true;
+ return min;
+ } else {
+ return val;
+ }
+}
+
+/* Saturating narrowing right shifts */
+#define DO_VSHRN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
+ void *vm, uint32_t shift) \
+ { \
+ LTYPE *m = vm; \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ bool qc = false; \
+ unsigned le; \
+ mask >>= ESIZE * TOP; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ bool sat = false; \
+ TYPE r = FN(m[H##LESIZE(le)], shift, &sat); \
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VSHRN_SAT_UB(BOP, TOP, FN) \
+ DO_VSHRN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \
+ DO_VSHRN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN)
+
+#define DO_VSHRN_SAT_UH(BOP, TOP, FN) \
+ DO_VSHRN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \
+ DO_VSHRN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN)
+
+#define DO_VSHRN_SAT_SB(BOP, TOP, FN) \
+ DO_VSHRN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \
+ DO_VSHRN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN)
+
+#define DO_VSHRN_SAT_SH(BOP, TOP, FN) \
+ DO_VSHRN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \
+ DO_VSHRN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN)
+
+#define DO_SHRN_SB(N, M, SATP) \
+ do_sat_bhs((int64_t)(N) >> (M), INT8_MIN, INT8_MAX, SATP)
+#define DO_SHRN_UB(N, M, SATP) \
+ do_sat_bhs((uint64_t)(N) >> (M), 0, UINT8_MAX, SATP)
+#define DO_SHRUN_B(N, M, SATP) \
+ do_sat_bhs((int64_t)(N) >> (M), 0, UINT8_MAX, SATP)
+
+#define DO_SHRN_SH(N, M, SATP) \
+ do_sat_bhs((int64_t)(N) >> (M), INT16_MIN, INT16_MAX, SATP)
+#define DO_SHRN_UH(N, M, SATP) \
+ do_sat_bhs((uint64_t)(N) >> (M), 0, UINT16_MAX, SATP)
+#define DO_SHRUN_H(N, M, SATP) \
+ do_sat_bhs((int64_t)(N) >> (M), 0, UINT16_MAX, SATP)
+
+#define DO_RSHRN_SB(N, M, SATP) \
+ do_sat_bhs(do_srshr(N, M), INT8_MIN, INT8_MAX, SATP)
+#define DO_RSHRN_UB(N, M, SATP) \
+ do_sat_bhs(do_urshr(N, M), 0, UINT8_MAX, SATP)
+#define DO_RSHRUN_B(N, M, SATP) \
+ do_sat_bhs(do_srshr(N, M), 0, UINT8_MAX, SATP)
+
+#define DO_RSHRN_SH(N, M, SATP) \
+ do_sat_bhs(do_srshr(N, M), INT16_MIN, INT16_MAX, SATP)
+#define DO_RSHRN_UH(N, M, SATP) \
+ do_sat_bhs(do_urshr(N, M), 0, UINT16_MAX, SATP)
+#define DO_RSHRUN_H(N, M, SATP) \
+ do_sat_bhs(do_srshr(N, M), 0, UINT16_MAX, SATP)
+
+DO_VSHRN_SAT_SB(vqshrnb_sb, vqshrnt_sb, DO_SHRN_SB)
+DO_VSHRN_SAT_SH(vqshrnb_sh, vqshrnt_sh, DO_SHRN_SH)
+DO_VSHRN_SAT_UB(vqshrnb_ub, vqshrnt_ub, DO_SHRN_UB)
+DO_VSHRN_SAT_UH(vqshrnb_uh, vqshrnt_uh, DO_SHRN_UH)
+DO_VSHRN_SAT_SB(vqshrunbb, vqshruntb, DO_SHRUN_B)
+DO_VSHRN_SAT_SH(vqshrunbh, vqshrunth, DO_SHRUN_H)
+
+DO_VSHRN_SAT_SB(vqrshrnb_sb, vqrshrnt_sb, DO_RSHRN_SB)
+DO_VSHRN_SAT_SH(vqrshrnb_sh, vqrshrnt_sh, DO_RSHRN_SH)
+DO_VSHRN_SAT_UB(vqrshrnb_ub, vqrshrnt_ub, DO_RSHRN_UB)
+DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH)
+DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B)
+DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H)
+
+#define DO_VMOVN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
+ { \
+ LTYPE *m = vm; \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned le; \
+ mask >>= ESIZE * TOP; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], \
+ m[H##LESIZE(le)], mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_VMOVN(vmovnbb, false, 1, uint8_t, 2, uint16_t)
+DO_VMOVN(vmovnbh, false, 2, uint16_t, 4, uint32_t)
+DO_VMOVN(vmovntb, true, 1, uint8_t, 2, uint16_t)
+DO_VMOVN(vmovnth, true, 2, uint16_t, 4, uint32_t)
+
+#define DO_VMOVN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
+ { \
+ LTYPE *m = vm; \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ bool qc = false; \
+ unsigned le; \
+ mask >>= ESIZE * TOP; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ bool sat = false; \
+ TYPE r = FN(m[H##LESIZE(le)], &sat); \
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VMOVN_SAT_UB(BOP, TOP, FN) \
+ DO_VMOVN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \
+ DO_VMOVN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN)
+
+#define DO_VMOVN_SAT_UH(BOP, TOP, FN) \
+ DO_VMOVN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \
+ DO_VMOVN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN)
+
+#define DO_VMOVN_SAT_SB(BOP, TOP, FN) \
+ DO_VMOVN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \
+ DO_VMOVN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN)
+
+#define DO_VMOVN_SAT_SH(BOP, TOP, FN) \
+ DO_VMOVN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \
+ DO_VMOVN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN)
+
+#define DO_VQMOVN_SB(N, SATP) \
+ do_sat_bhs((int64_t)(N), INT8_MIN, INT8_MAX, SATP)
+#define DO_VQMOVN_UB(N, SATP) \
+ do_sat_bhs((uint64_t)(N), 0, UINT8_MAX, SATP)
+#define DO_VQMOVUN_B(N, SATP) \
+ do_sat_bhs((int64_t)(N), 0, UINT8_MAX, SATP)
+
+#define DO_VQMOVN_SH(N, SATP) \
+ do_sat_bhs((int64_t)(N), INT16_MIN, INT16_MAX, SATP)
+#define DO_VQMOVN_UH(N, SATP) \
+ do_sat_bhs((uint64_t)(N), 0, UINT16_MAX, SATP)
+#define DO_VQMOVUN_H(N, SATP) \
+ do_sat_bhs((int64_t)(N), 0, UINT16_MAX, SATP)
+
+DO_VMOVN_SAT_SB(vqmovnbsb, vqmovntsb, DO_VQMOVN_SB)
+DO_VMOVN_SAT_SH(vqmovnbsh, vqmovntsh, DO_VQMOVN_SH)
+DO_VMOVN_SAT_UB(vqmovnbub, vqmovntub, DO_VQMOVN_UB)
+DO_VMOVN_SAT_UH(vqmovnbuh, vqmovntuh, DO_VQMOVN_UH)
+DO_VMOVN_SAT_SB(vqmovunbb, vqmovuntb, DO_VQMOVUN_B)
+DO_VMOVN_SAT_SH(vqmovunbh, vqmovunth, DO_VQMOVUN_H)
+
+uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
+ uint32_t shift)
+{
+ uint32_t *d = vd;
+ uint16_t mask = mve_element_mask(env);
+ unsigned e;
+ uint32_t r;
+
+ /*
+ * For each 32-bit element, we shift it left, bringing in the
+ * low 'shift' bits of rdm at the bottom. Bits shifted out at
+ * the top become the new rdm, if the predicate mask permits.
+ * The final rdm value is returned to update the register.
+ * shift == 0 here means "shift by 32 bits".
+ */
+ if (shift == 0) {
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+ r = rdm;
+ if (mask & 1) {
+ rdm = d[H4(e)];
+ }
+ mergemask(&d[H4(e)], r, mask);
+ }
+ } else {
+ uint32_t shiftmask = MAKE_64BIT_MASK(0, shift);
+
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+ r = (d[H4(e)] << shift) | (rdm & shiftmask);
+ if (mask & 1) {
+ rdm = d[H4(e)] >> (32 - shift);
+ }
+ mergemask(&d[H4(e)], r, mask);
+ }
+ }
+ mve_advance_vpt(env);
+ return rdm;
+}
+
+uint64_t HELPER(mve_sshrl)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+ return do_sqrshl_d(n, -(int8_t)shift, false, NULL);
+}
+
+uint64_t HELPER(mve_ushll)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+ return do_uqrshl_d(n, (int8_t)shift, false, NULL);
+}
+
+uint64_t HELPER(mve_sqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+ return do_sqrshl_d(n, (int8_t)shift, false, &env->QF);
+}
+
+uint64_t HELPER(mve_uqshll)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+ return do_uqrshl_d(n, (int8_t)shift, false, &env->QF);
+}
+
+uint64_t HELPER(mve_sqrshrl)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+ return do_sqrshl_d(n, -(int8_t)shift, true, &env->QF);
+}
+
+uint64_t HELPER(mve_uqrshll)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+ return do_uqrshl_d(n, (int8_t)shift, true, &env->QF);
+}
+
+/* Operate on 64-bit values, but saturate at 48 bits */
+static inline int64_t do_sqrshl48_d(int64_t src, int64_t shift,
+ bool round, uint32_t *sat)
+{
+ int64_t val, extval;
+
+ if (shift <= -48) {
+ /* Rounding the sign bit always produces 0. */
+ if (round) {
+ return 0;
+ }
+ return src >> 63;
+ } else if (shift < 0) {
+ if (round) {
+ src >>= -shift - 1;
+ val = (src >> 1) + (src & 1);
+ } else {
+ val = src >> -shift;
+ }
+ extval = sextract64(val, 0, 48);
+ if (!sat || val == extval) {
+ return extval;
+ }
+ } else if (shift < 48) {
+ extval = sextract64(src << shift, 0, 48);
+ if (!sat || src == (extval >> shift)) {
+ return extval;
+ }
+ } else if (!sat || src == 0) {
+ return 0;
+ }
+
+ *sat = 1;
+ return src >= 0 ? MAKE_64BIT_MASK(0, 47) : MAKE_64BIT_MASK(47, 17);
+}
+
+/* Operate on 64-bit values, but saturate at 48 bits */
+static inline uint64_t do_uqrshl48_d(uint64_t src, int64_t shift,
+ bool round, uint32_t *sat)
+{
+ uint64_t val, extval;
+
+ if (shift <= -(48 + round)) {
+ return 0;
+ } else if (shift < 0) {
+ if (round) {
+ val = src >> (-shift - 1);
+ val = (val >> 1) + (val & 1);
+ } else {
+ val = src >> -shift;
+ }
+ extval = extract64(val, 0, 48);
+ if (!sat || val == extval) {
+ return extval;
+ }
+ } else if (shift < 48) {
+ extval = extract64(src << shift, 0, 48);
+ if (!sat || src == (extval >> shift)) {
+ return extval;
+ }
+ } else if (!sat || src == 0) {
+ return 0;
+ }
+
+ *sat = 1;
+ return MAKE_64BIT_MASK(0, 48);
+}
+
+uint64_t HELPER(mve_sqrshrl48)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+ return do_sqrshl48_d(n, -(int8_t)shift, true, &env->QF);
+}
+
+uint64_t HELPER(mve_uqrshll48)(CPUARMState *env, uint64_t n, uint32_t shift)
+{
+ return do_uqrshl48_d(n, (int8_t)shift, true, &env->QF);
+}
+
+uint32_t HELPER(mve_uqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
+{
+ return do_uqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
+}
+
+uint32_t HELPER(mve_sqshl)(CPUARMState *env, uint32_t n, uint32_t shift)
+{
+ return do_sqrshl_bhs(n, (int8_t)shift, 32, false, &env->QF);
+}
+
+uint32_t HELPER(mve_uqrshl)(CPUARMState *env, uint32_t n, uint32_t shift)
+{
+ return do_uqrshl_bhs(n, (int8_t)shift, 32, true, &env->QF);
+}
+
+uint32_t HELPER(mve_sqrshr)(CPUARMState *env, uint32_t n, uint32_t shift)
+{
+ return do_sqrshl_bhs(n, -(int8_t)shift, 32, true, &env->QF);
+}
+
+#define DO_VIDUP(OP, ESIZE, TYPE, FN) \
+ uint32_t HELPER(mve_##OP)(CPUARMState *env, void *vd, \
+ uint32_t offset, uint32_t imm) \
+ { \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], offset, mask); \
+ offset = FN(offset, imm); \
+ } \
+ mve_advance_vpt(env); \
+ return offset; \
+ }
+
+#define DO_VIWDUP(OP, ESIZE, TYPE, FN) \
+ uint32_t HELPER(mve_##OP)(CPUARMState *env, void *vd, \
+ uint32_t offset, uint32_t wrap, \
+ uint32_t imm) \
+ { \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], offset, mask); \
+ offset = FN(offset, wrap, imm); \
+ } \
+ mve_advance_vpt(env); \
+ return offset; \
+ }
+
+#define DO_VIDUP_ALL(OP, FN) \
+ DO_VIDUP(OP##b, 1, int8_t, FN) \
+ DO_VIDUP(OP##h, 2, int16_t, FN) \
+ DO_VIDUP(OP##w, 4, int32_t, FN)
+
+#define DO_VIWDUP_ALL(OP, FN) \
+ DO_VIWDUP(OP##b, 1, int8_t, FN) \
+ DO_VIWDUP(OP##h, 2, int16_t, FN) \
+ DO_VIWDUP(OP##w, 4, int32_t, FN)
+
+static uint32_t do_add_wrap(uint32_t offset, uint32_t wrap, uint32_t imm)
+{
+ offset += imm;
+ if (offset == wrap) {
+ offset = 0;
+ }
+ return offset;
+}
+
+static uint32_t do_sub_wrap(uint32_t offset, uint32_t wrap, uint32_t imm)
+{
+ if (offset == 0) {
+ offset = wrap;
+ }
+ offset -= imm;
+ return offset;
+}
+
+DO_VIDUP_ALL(vidup, DO_ADD)
+DO_VIWDUP_ALL(viwdup, do_add_wrap)
+DO_VIWDUP_ALL(vdwdup, do_sub_wrap)
+
+/*
+ * Vector comparison.
+ * P0 bits for non-executed beats (where eci_mask is 0) are unchanged.
+ * P0 bits for predicated lanes in executed beats (where mask is 0) are 0.
+ * P0 bits otherwise are updated with the results of the comparisons.
+ * We must also keep unchanged the MASK fields at the top of v7m.vpr.
+ */
+#define DO_VCMP(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, void *vm) \
+ { \
+ TYPE *n = vn, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ uint16_t beatpred = 0; \
+ uint16_t emask = MAKE_64BIT_MASK(0, ESIZE); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++) { \
+ bool r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)]); \
+ /* Comparison sets 0/1 bits for each byte in the element */ \
+ beatpred |= r * emask; \
+ emask <<= ESIZE; \
+ } \
+ beatpred &= mask; \
+ env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
+ (beatpred & eci_mask); \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VCMP_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *n = vn; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ uint16_t beatpred = 0; \
+ uint16_t emask = MAKE_64BIT_MASK(0, ESIZE); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++) { \
+ bool r = FN(n[H##ESIZE(e)], (TYPE)rm); \
+ /* Comparison sets 0/1 bits for each byte in the element */ \
+ beatpred |= r * emask; \
+ emask <<= ESIZE; \
+ } \
+ beatpred &= mask; \
+ env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
+ (beatpred & eci_mask); \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VCMP_S(OP, FN) \
+ DO_VCMP(OP##b, 1, int8_t, FN) \
+ DO_VCMP(OP##h, 2, int16_t, FN) \
+ DO_VCMP(OP##w, 4, int32_t, FN) \
+ DO_VCMP_SCALAR(OP##_scalarb, 1, int8_t, FN) \
+ DO_VCMP_SCALAR(OP##_scalarh, 2, int16_t, FN) \
+ DO_VCMP_SCALAR(OP##_scalarw, 4, int32_t, FN)
+
+#define DO_VCMP_U(OP, FN) \
+ DO_VCMP(OP##b, 1, uint8_t, FN) \
+ DO_VCMP(OP##h, 2, uint16_t, FN) \
+ DO_VCMP(OP##w, 4, uint32_t, FN) \
+ DO_VCMP_SCALAR(OP##_scalarb, 1, uint8_t, FN) \
+ DO_VCMP_SCALAR(OP##_scalarh, 2, uint16_t, FN) \
+ DO_VCMP_SCALAR(OP##_scalarw, 4, uint32_t, FN)
+
+#define DO_EQ(N, M) ((N) == (M))
+#define DO_NE(N, M) ((N) != (M))
+#define DO_EQ(N, M) ((N) == (M))
+#define DO_EQ(N, M) ((N) == (M))
+#define DO_GE(N, M) ((N) >= (M))
+#define DO_LT(N, M) ((N) < (M))
+#define DO_GT(N, M) ((N) > (M))
+#define DO_LE(N, M) ((N) <= (M))
+
+DO_VCMP_U(vcmpeq, DO_EQ)
+DO_VCMP_U(vcmpne, DO_NE)
+DO_VCMP_U(vcmpcs, DO_GE)
+DO_VCMP_U(vcmphi, DO_GT)
+DO_VCMP_S(vcmpge, DO_GE)
+DO_VCMP_S(vcmplt, DO_LT)
+DO_VCMP_S(vcmpgt, DO_GT)
+DO_VCMP_S(vcmple, DO_LE)
+
+void HELPER(mve_vpsel)(CPUARMState *env, void *vd, void *vn, void *vm)
+{
+ /*
+ * Qd[n] = VPR.P0[n] ? Qn[n] : Qm[n]
+ * but note that whether bytes are written to Qd is still subject
+ * to (all forms of) predication in the usual way.
+ */
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint16_t mask = mve_element_mask(env);
+ uint16_t p0 = FIELD_EX32(env->v7m.vpr, V7M_VPR, P0);
+ unsigned e;
+ for (e = 0; e < 16 / 8; e++, mask >>= 8, p0 >>= 8) {
+ uint64_t r = m[H8(e)];
+ mergemask(&r, n[H8(e)], p0);
+ mergemask(&d[H8(e)], r, mask);
+ }
+ mve_advance_vpt(env);
+}
+
+void HELPER(mve_vpnot)(CPUARMState *env)
+{
+ /*
+ * P0 bits for unexecuted beats (where eci_mask is 0) are unchanged.
+ * P0 bits for predicated lanes in executed bits (where mask is 0) are 0.
+ * P0 bits otherwise are inverted.
+ * (This is the same logic as VCMP.)
+ * This insn is itself subject to predication and to beat-wise execution,
+ * and after it executes VPT state advances in the usual way.
+ */
+ uint16_t mask = mve_element_mask(env);
+ uint16_t eci_mask = mve_eci_mask(env);
+ uint16_t beatpred = ~env->v7m.vpr & mask;
+ env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (beatpred & eci_mask);
+ mve_advance_vpt(env);
+}
+
+/*
+ * VCTP: P0 unexecuted bits unchanged, predicated bits zeroed,
+ * otherwise set according to value of Rn. The calculation of
+ * newmask here works in the same way as the calculation of the
+ * ltpmask in mve_element_mask(), but we have pre-calculated
+ * the masklen in the generated code.
+ */
+void HELPER(mve_vctp)(CPUARMState *env, uint32_t masklen)
+{
+ uint16_t mask = mve_element_mask(env);
+ uint16_t eci_mask = mve_eci_mask(env);
+ uint16_t newmask;
+
+ assert(masklen <= 16);
+ newmask = masklen ? MAKE_64BIT_MASK(0, masklen) : 0;
+ newmask &= mask;
+ env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (newmask & eci_mask);
+ mve_advance_vpt(env);
+}
+
+#define DO_1OP_SAT(OP, ESIZE, TYPE, FN) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
+ { \
+ TYPE *d = vd, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ bool qc = false; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ bool sat = false; \
+ mergemask(&d[H##ESIZE(e)], FN(m[H##ESIZE(e)], &sat), mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VQABS_B(N, SATP) \
+ do_sat_bhs(DO_ABS((int64_t)N), INT8_MIN, INT8_MAX, SATP)
+#define DO_VQABS_H(N, SATP) \
+ do_sat_bhs(DO_ABS((int64_t)N), INT16_MIN, INT16_MAX, SATP)
+#define DO_VQABS_W(N, SATP) \
+ do_sat_bhs(DO_ABS((int64_t)N), INT32_MIN, INT32_MAX, SATP)
+
+#define DO_VQNEG_B(N, SATP) do_sat_bhs(-(int64_t)N, INT8_MIN, INT8_MAX, SATP)
+#define DO_VQNEG_H(N, SATP) do_sat_bhs(-(int64_t)N, INT16_MIN, INT16_MAX, SATP)
+#define DO_VQNEG_W(N, SATP) do_sat_bhs(-(int64_t)N, INT32_MIN, INT32_MAX, SATP)
+
+DO_1OP_SAT(vqabsb, 1, int8_t, DO_VQABS_B)
+DO_1OP_SAT(vqabsh, 2, int16_t, DO_VQABS_H)
+DO_1OP_SAT(vqabsw, 4, int32_t, DO_VQABS_W)
+
+DO_1OP_SAT(vqnegb, 1, int8_t, DO_VQNEG_B)
+DO_1OP_SAT(vqnegh, 2, int16_t, DO_VQNEG_H)
+DO_1OP_SAT(vqnegw, 4, int32_t, DO_VQNEG_W)
+
+/*
+ * VMAXA, VMINA: vd is unsigned; vm is signed, and we take its
+ * absolute value; we then do an unsigned comparison.
+ */
+#define DO_VMAXMINA(OP, ESIZE, STYPE, UTYPE, FN) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
+ { \
+ UTYPE *d = vd; \
+ STYPE *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ UTYPE r = DO_ABS(m[H##ESIZE(e)]); \
+ r = FN(d[H##ESIZE(e)], r); \
+ mergemask(&d[H##ESIZE(e)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_VMAXMINA(vmaxab, 1, int8_t, uint8_t, DO_MAX)
+DO_VMAXMINA(vmaxah, 2, int16_t, uint16_t, DO_MAX)
+DO_VMAXMINA(vmaxaw, 4, int32_t, uint32_t, DO_MAX)
+DO_VMAXMINA(vminab, 1, int8_t, uint8_t, DO_MIN)
+DO_VMAXMINA(vminah, 2, int16_t, uint16_t, DO_MIN)
+DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
+
+/*
+ * 2-operand floating point. Note that if an element is partially
+ * predicated we must do the FP operation to update the non-predicated
+ * bytes, but we must be careful to avoid updating the FP exception
+ * state unless byte 0 of the element was unpredicated.
+ */
+#define DO_2OP_FP(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
+ void *vd, void *vn, void *vm) \
+ { \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ TYPE r; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(mask & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], fpst); \
+ mergemask(&d[H##ESIZE(e)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_2OP_FP_ALL(OP, FN) \
+ DO_2OP_FP(OP##h, 2, float16, float16_##FN) \
+ DO_2OP_FP(OP##s, 4, float32, float32_##FN)
+
+DO_2OP_FP_ALL(vfadd, add)
+DO_2OP_FP_ALL(vfsub, sub)
+DO_2OP_FP_ALL(vfmul, mul)
+
+static inline float16 float16_abd(float16 a, float16 b, float_status *s)
+{
+ return float16_abs(float16_sub(a, b, s));
+}
+
+static inline float32 float32_abd(float32 a, float32 b, float_status *s)
+{
+ return float32_abs(float32_sub(a, b, s));
+}
+
+DO_2OP_FP_ALL(vfabd, abd)
+DO_2OP_FP_ALL(vmaxnm, maxnum)
+DO_2OP_FP_ALL(vminnm, minnum)
+
+static inline float16 float16_maxnuma(float16 a, float16 b, float_status *s)
+{
+ return float16_maxnum(float16_abs(a), float16_abs(b), s);
+}
+
+static inline float32 float32_maxnuma(float32 a, float32 b, float_status *s)
+{
+ return float32_maxnum(float32_abs(a), float32_abs(b), s);
+}
+
+static inline float16 float16_minnuma(float16 a, float16 b, float_status *s)
+{
+ return float16_minnum(float16_abs(a), float16_abs(b), s);
+}
+
+static inline float32 float32_minnuma(float32 a, float32 b, float_status *s)
+{
+ return float32_minnum(float32_abs(a), float32_abs(b), s);
+}
+
+DO_2OP_FP_ALL(vmaxnma, maxnuma)
+DO_2OP_FP_ALL(vminnma, minnuma)
+
+#define DO_VCADD_FP(OP, ESIZE, TYPE, FN0, FN1) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
+ void *vd, void *vn, void *vm) \
+ { \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ TYPE r[16 / ESIZE]; \
+ uint16_t tm, mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ /* Calculate all results first to avoid overwriting inputs */ \
+ for (e = 0, tm = mask; e < 16 / ESIZE; e++, tm >>= ESIZE) { \
+ if ((tm & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
+ r[e] = 0; \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(tm & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ if (!(e & 1)) { \
+ r[e] = FN0(n[H##ESIZE(e)], m[H##ESIZE(e + 1)], fpst); \
+ } else { \
+ r[e] = FN1(n[H##ESIZE(e)], m[H##ESIZE(e - 1)], fpst); \
+ } \
+ } \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], r[e], mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_VCADD_FP(vfcadd90h, 2, float16, float16_sub, float16_add)
+DO_VCADD_FP(vfcadd90s, 4, float32, float32_sub, float32_add)
+DO_VCADD_FP(vfcadd270h, 2, float16, float16_add, float16_sub)
+DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
+
+#define DO_VFMA(OP, ESIZE, TYPE, CHS) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
+ void *vd, void *vn, void *vm) \
+ { \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ TYPE r; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(mask & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = n[H##ESIZE(e)]; \
+ if (CHS) { \
+ r = TYPE##_chs(r); \
+ } \
+ r = TYPE##_muladd(r, m[H##ESIZE(e)], d[H##ESIZE(e)], \
+ 0, fpst); \
+ mergemask(&d[H##ESIZE(e)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_VFMA(vfmah, 2, float16, false)
+DO_VFMA(vfmas, 4, float32, false)
+DO_VFMA(vfmsh, 2, float16, true)
+DO_VFMA(vfmss, 4, float32, true)
+
+#define DO_VCMLA(OP, ESIZE, TYPE, ROT, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
+ void *vd, void *vn, void *vm) \
+ { \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ TYPE r0, r1, e1, e2, e3, e4; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst0, *fpst1; \
+ float_status scratch_fpst; \
+ /* We loop through pairs of elements at a time */ \
+ for (e = 0; e < 16 / ESIZE; e += 2, mask >>= ESIZE * 2) { \
+ if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
+ continue; \
+ } \
+ fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ fpst1 = fpst0; \
+ if (!(mask & 1)) { \
+ scratch_fpst = *fpst0; \
+ fpst0 = &scratch_fpst; \
+ } \
+ if (!(mask & (1 << ESIZE))) { \
+ scratch_fpst = *fpst1; \
+ fpst1 = &scratch_fpst; \
+ } \
+ switch (ROT) { \
+ case 0: \
+ e1 = m[H##ESIZE(e)]; \
+ e2 = n[H##ESIZE(e)]; \
+ e3 = m[H##ESIZE(e + 1)]; \
+ e4 = n[H##ESIZE(e)]; \
+ break; \
+ case 1: \
+ e1 = TYPE##_chs(m[H##ESIZE(e + 1)]); \
+ e2 = n[H##ESIZE(e + 1)]; \
+ e3 = m[H##ESIZE(e)]; \
+ e4 = n[H##ESIZE(e + 1)]; \
+ break; \
+ case 2: \
+ e1 = TYPE##_chs(m[H##ESIZE(e)]); \
+ e2 = n[H##ESIZE(e)]; \
+ e3 = TYPE##_chs(m[H##ESIZE(e + 1)]); \
+ e4 = n[H##ESIZE(e)]; \
+ break; \
+ case 3: \
+ e1 = m[H##ESIZE(e + 1)]; \
+ e2 = n[H##ESIZE(e + 1)]; \
+ e3 = TYPE##_chs(m[H##ESIZE(e)]); \
+ e4 = n[H##ESIZE(e + 1)]; \
+ break; \
+ default: \
+ g_assert_not_reached(); \
+ } \
+ r0 = FN(e2, e1, d[H##ESIZE(e)], fpst0); \
+ r1 = FN(e4, e3, d[H##ESIZE(e + 1)], fpst1); \
+ mergemask(&d[H##ESIZE(e)], r0, mask); \
+ mergemask(&d[H##ESIZE(e + 1)], r1, mask >> ESIZE); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VCMULH(N, M, D, S) float16_mul(N, M, S)
+#define DO_VCMULS(N, M, D, S) float32_mul(N, M, S)
+
+#define DO_VCMLAH(N, M, D, S) float16_muladd(N, M, D, 0, S)
+#define DO_VCMLAS(N, M, D, S) float32_muladd(N, M, D, 0, S)
+
+DO_VCMLA(vcmul0h, 2, float16, 0, DO_VCMULH)
+DO_VCMLA(vcmul0s, 4, float32, 0, DO_VCMULS)
+DO_VCMLA(vcmul90h, 2, float16, 1, DO_VCMULH)
+DO_VCMLA(vcmul90s, 4, float32, 1, DO_VCMULS)
+DO_VCMLA(vcmul180h, 2, float16, 2, DO_VCMULH)
+DO_VCMLA(vcmul180s, 4, float32, 2, DO_VCMULS)
+DO_VCMLA(vcmul270h, 2, float16, 3, DO_VCMULH)
+DO_VCMLA(vcmul270s, 4, float32, 3, DO_VCMULS)
+
+DO_VCMLA(vcmla0h, 2, float16, 0, DO_VCMLAH)
+DO_VCMLA(vcmla0s, 4, float32, 0, DO_VCMLAS)
+DO_VCMLA(vcmla90h, 2, float16, 1, DO_VCMLAH)
+DO_VCMLA(vcmla90s, 4, float32, 1, DO_VCMLAS)
+DO_VCMLA(vcmla180h, 2, float16, 2, DO_VCMLAH)
+DO_VCMLA(vcmla180s, 4, float32, 2, DO_VCMLAS)
+DO_VCMLA(vcmla270h, 2, float16, 3, DO_VCMLAH)
+DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
+
+#define DO_2OP_FP_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
+ void *vd, void *vn, uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE r, m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(mask & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = FN(n[H##ESIZE(e)], m, fpst); \
+ mergemask(&d[H##ESIZE(e)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_2OP_FP_SCALAR_ALL(OP, FN) \
+ DO_2OP_FP_SCALAR(OP##h, 2, float16, float16_##FN) \
+ DO_2OP_FP_SCALAR(OP##s, 4, float32, float32_##FN)
+
+DO_2OP_FP_SCALAR_ALL(vfadd_scalar, add)
+DO_2OP_FP_SCALAR_ALL(vfsub_scalar, sub)
+DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
+
+#define DO_2OP_FP_ACC_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
+ void *vd, void *vn, uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE r, m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(mask & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = FN(n[H##ESIZE(e)], m, d[H##ESIZE(e)], 0, fpst); \
+ mergemask(&d[H##ESIZE(e)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/* VFMAS is vector * vector + scalar, so swap op2 and op3 */
+#define DO_VFMAS_SCALARH(N, M, D, F, S) float16_muladd(N, D, M, F, S)
+#define DO_VFMAS_SCALARS(N, M, D, F, S) float32_muladd(N, D, M, F, S)
+
+/* VFMA is vector * scalar + vector */
+DO_2OP_FP_ACC_SCALAR(vfma_scalarh, 2, float16, float16_muladd)
+DO_2OP_FP_ACC_SCALAR(vfma_scalars, 4, float32, float32_muladd)
+DO_2OP_FP_ACC_SCALAR(vfmas_scalarh, 2, float16, DO_VFMAS_SCALARH)
+DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
+
+/* Floating point max/min across vector. */
+#define DO_FP_VMAXMINV(OP, ESIZE, TYPE, ABS, FN) \
+ uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \
+ uint32_t ra_in) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *m = vm; \
+ TYPE ra = (TYPE)ra_in; \
+ float_status *fpst = (ESIZE == 2) ? \
+ &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if (mask & 1) { \
+ TYPE v = m[H##ESIZE(e)]; \
+ if (TYPE##_is_signaling_nan(ra, fpst)) { \
+ ra = TYPE##_silence_nan(ra, fpst); \
+ float_raise(float_flag_invalid, fpst); \
+ } \
+ if (TYPE##_is_signaling_nan(v, fpst)) { \
+ v = TYPE##_silence_nan(v, fpst); \
+ float_raise(float_flag_invalid, fpst); \
+ } \
+ if (ABS) { \
+ v = TYPE##_abs(v); \
+ } \
+ ra = FN(ra, v, fpst); \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return ra; \
+ } \
+
+#define NOP(X) (X)
+
+DO_FP_VMAXMINV(vmaxnmvh, 2, float16, false, float16_maxnum)
+DO_FP_VMAXMINV(vmaxnmvs, 4, float32, false, float32_maxnum)
+DO_FP_VMAXMINV(vminnmvh, 2, float16, false, float16_minnum)
+DO_FP_VMAXMINV(vminnmvs, 4, float32, false, float32_minnum)
+DO_FP_VMAXMINV(vmaxnmavh, 2, float16, true, float16_maxnum)
+DO_FP_VMAXMINV(vmaxnmavs, 4, float32, true, float32_maxnum)
+DO_FP_VMAXMINV(vminnmavh, 2, float16, true, float16_minnum)
+DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
+
+/* FP compares; note that all comparisons signal InvalidOp for QNaNs */
+#define DO_VCMP_FP(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, void *vm) \
+ { \
+ TYPE *n = vn, *m = vm; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ uint16_t beatpred = 0; \
+ uint16_t emask = MAKE_64BIT_MASK(0, ESIZE); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ bool r; \
+ for (e = 0; e < 16 / ESIZE; e++, emask <<= ESIZE) { \
+ if ((mask & emask) == 0) { \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(mask & (1 << (e * ESIZE)))) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = FN(n[H##ESIZE(e)], m[H##ESIZE(e)], fpst); \
+ /* Comparison sets 0/1 bits for each byte in the element */ \
+ beatpred |= r * emask; \
+ } \
+ beatpred &= mask; \
+ env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
+ (beatpred & eci_mask); \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VCMP_FP_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *n = vn; \
+ uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
+ uint16_t beatpred = 0; \
+ uint16_t emask = MAKE_64BIT_MASK(0, ESIZE); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ bool r; \
+ for (e = 0; e < 16 / ESIZE; e++, emask <<= ESIZE) { \
+ if ((mask & emask) == 0) { \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(mask & (1 << (e * ESIZE)))) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = FN(n[H##ESIZE(e)], (TYPE)rm, fpst); \
+ /* Comparison sets 0/1 bits for each byte in the element */ \
+ beatpred |= r * emask; \
+ } \
+ beatpred &= mask; \
+ env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | \
+ (beatpred & eci_mask); \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VCMP_FP_BOTH(VOP, SOP, ESIZE, TYPE, FN) \
+ DO_VCMP_FP(VOP, ESIZE, TYPE, FN) \
+ DO_VCMP_FP_SCALAR(SOP, ESIZE, TYPE, FN)
+
+/*
+ * Some care is needed here to get the correct result for the unordered case.
+ * Architecturally EQ, GE and GT are defined to be false for unordered, but
+ * the NE, LT and LE comparisons are defined as simple logical inverses of
+ * EQ, GE and GT and so they must return true for unordered. The softfloat
+ * comparison functions float*_{eq,le,lt} all return false for unordered.
+ */
+#define DO_GE16(X, Y, S) float16_le(Y, X, S)
+#define DO_GE32(X, Y, S) float32_le(Y, X, S)
+#define DO_GT16(X, Y, S) float16_lt(Y, X, S)
+#define DO_GT32(X, Y, S) float32_lt(Y, X, S)
+
+DO_VCMP_FP_BOTH(vfcmpeqh, vfcmpeq_scalarh, 2, float16, float16_eq)
+DO_VCMP_FP_BOTH(vfcmpeqs, vfcmpeq_scalars, 4, float32, float32_eq)
+
+DO_VCMP_FP_BOTH(vfcmpneh, vfcmpne_scalarh, 2, float16, !float16_eq)
+DO_VCMP_FP_BOTH(vfcmpnes, vfcmpne_scalars, 4, float32, !float32_eq)
+
+DO_VCMP_FP_BOTH(vfcmpgeh, vfcmpge_scalarh, 2, float16, DO_GE16)
+DO_VCMP_FP_BOTH(vfcmpges, vfcmpge_scalars, 4, float32, DO_GE32)
+
+DO_VCMP_FP_BOTH(vfcmplth, vfcmplt_scalarh, 2, float16, !DO_GE16)
+DO_VCMP_FP_BOTH(vfcmplts, vfcmplt_scalars, 4, float32, !DO_GE32)
+
+DO_VCMP_FP_BOTH(vfcmpgth, vfcmpgt_scalarh, 2, float16, DO_GT16)
+DO_VCMP_FP_BOTH(vfcmpgts, vfcmpgt_scalars, 4, float32, DO_GT32)
+
+DO_VCMP_FP_BOTH(vfcmpleh, vfcmple_scalarh, 2, float16, !DO_GT16)
+DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
+
+#define DO_VCVT_FIXED(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vm, \
+ uint32_t shift) \
+ { \
+ TYPE *d = vd, *m = vm; \
+ TYPE r; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(mask & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = FN(m[H##ESIZE(e)], shift, fpst); \
+ mergemask(&d[H##ESIZE(e)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_VCVT_FIXED(vcvt_sh, 2, int16_t, helper_vfp_shtoh)
+DO_VCVT_FIXED(vcvt_uh, 2, uint16_t, helper_vfp_uhtoh)
+DO_VCVT_FIXED(vcvt_hs, 2, int16_t, helper_vfp_toshh_round_to_zero)
+DO_VCVT_FIXED(vcvt_hu, 2, uint16_t, helper_vfp_touhh_round_to_zero)
+DO_VCVT_FIXED(vcvt_sf, 4, int32_t, helper_vfp_sltos)
+DO_VCVT_FIXED(vcvt_uf, 4, uint32_t, helper_vfp_ultos)
+DO_VCVT_FIXED(vcvt_fs, 4, int32_t, helper_vfp_tosls_round_to_zero)
+DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
+
+/* VCVT with specified rmode */
+#define DO_VCVT_RMODE(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, \
+ void *vd, void *vm, uint32_t rmode) \
+ { \
+ TYPE *d = vd, *m = vm; \
+ TYPE r; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ float_status *base_fpst = (ESIZE == 2) ? \
+ &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
+ set_float_rounding_mode(rmode, base_fpst); \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
+ continue; \
+ } \
+ fpst = base_fpst; \
+ if (!(mask & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = FN(m[H##ESIZE(e)], 0, fpst); \
+ mergemask(&d[H##ESIZE(e)], r, mask); \
+ } \
+ set_float_rounding_mode(prev_rmode, base_fpst); \
+ mve_advance_vpt(env); \
+ }
+
+DO_VCVT_RMODE(vcvt_rm_sh, 2, uint16_t, helper_vfp_toshh)
+DO_VCVT_RMODE(vcvt_rm_uh, 2, uint16_t, helper_vfp_touhh)
+DO_VCVT_RMODE(vcvt_rm_ss, 4, uint32_t, helper_vfp_tosls)
+DO_VCVT_RMODE(vcvt_rm_us, 4, uint32_t, helper_vfp_touls)
+
+#define DO_VRINT_RM_H(M, F, S) helper_rinth(M, S)
+#define DO_VRINT_RM_S(M, F, S) helper_rints(M, S)
+
+DO_VCVT_RMODE(vrint_rm_h, 2, uint16_t, DO_VRINT_RM_H)
+DO_VCVT_RMODE(vrint_rm_s, 4, uint32_t, DO_VRINT_RM_S)
+
+/*
+ * VCVT between halfprec and singleprec. As usual for halfprec
+ * conversions, FZ16 is ignored and AHP is observed.
+ */
+static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
+{
+ uint16_t *d = vd;
+ uint32_t *m = vm;
+ uint16_t r;
+ uint16_t mask = mve_element_mask(env);
+ bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+ unsigned e;
+ float_status *fpst;
+ float_status scratch_fpst;
+ float_status *base_fpst = &env->vfp.standard_fp_status;
+ bool old_fz = get_flush_to_zero(base_fpst);
+ set_flush_to_zero(false, base_fpst);
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+ if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) {
+ continue;
+ }
+ fpst = base_fpst;
+ if (!(mask & 1)) {
+ /* We need the result but without updating flags */
+ scratch_fpst = *fpst;
+ fpst = &scratch_fpst;
+ }
+ r = float32_to_float16(m[H4(e)], ieee, fpst);
+ mergemask(&d[H2(e * 2 + top)], r, mask >> (top * 2));
+ }
+ set_flush_to_zero(old_fz, base_fpst);
+ mve_advance_vpt(env);
+}
+
+static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
+{
+ uint32_t *d = vd;
+ uint16_t *m = vm;
+ uint32_t r;
+ uint16_t mask = mve_element_mask(env);
+ bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP);
+ unsigned e;
+ float_status *fpst;
+ float_status scratch_fpst;
+ float_status *base_fpst = &env->vfp.standard_fp_status;
+ bool old_fiz = get_flush_inputs_to_zero(base_fpst);
+ set_flush_inputs_to_zero(false, base_fpst);
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) {
+ if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) {
+ continue;
+ }
+ fpst = base_fpst;
+ if (!(mask & (1 << (top * 2)))) {
+ /* We need the result but without updating flags */
+ scratch_fpst = *fpst;
+ fpst = &scratch_fpst;
+ }
+ r = float16_to_float32(m[H2(e * 2 + top)], ieee, fpst);
+ mergemask(&d[H4(e)], r, mask);
+ }
+ set_flush_inputs_to_zero(old_fiz, base_fpst);
+ mve_advance_vpt(env);
+}
+
+void HELPER(mve_vcvtb_sh)(CPUARMState *env, void *vd, void *vm)
+{
+ do_vcvt_sh(env, vd, vm, 0);
+}
+void HELPER(mve_vcvtt_sh)(CPUARMState *env, void *vd, void *vm)
+{
+ do_vcvt_sh(env, vd, vm, 1);
+}
+void HELPER(mve_vcvtb_hs)(CPUARMState *env, void *vd, void *vm)
+{
+ do_vcvt_hs(env, vd, vm, 0);
+}
+void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
+{
+ do_vcvt_hs(env, vd, vm, 1);
+}
+
+#define DO_1OP_FP(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vm) \
+ { \
+ TYPE *d = vd, *m = vm; \
+ TYPE r; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
+ continue; \
+ } \
+ fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.standard_fp_status; \
+ if (!(mask & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = FN(m[H##ESIZE(e)], fpst); \
+ mergemask(&d[H##ESIZE(e)], r, mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_1OP_FP(vrintx_h, 2, float16, float16_round_to_int)
+DO_1OP_FP(vrintx_s, 4, float32, float32_round_to_int)
diff --git a/target/arm/tcg/neon-dp.decode b/target/arm/tcg/neon-dp.decode
new file mode 100644
index 0000000000..fd3a01bfa0
--- /dev/null
+++ b/target/arm/tcg/neon-dp.decode
@@ -0,0 +1,646 @@
+# AArch32 Neon data-processing instruction descriptions
+#
+# Copyright (c) 2020 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+# VFP/Neon register fields; same as vfp.decode
+%vm_dp 5:1 0:4
+%vn_dp 7:1 16:4
+%vd_dp 22:1 12:4
+
+# Encodings for Neon data processing instructions where the T32 encoding
+# is a simple transformation of the A32 encoding.
+# More specifically, this file covers instructions where the A32 encoding is
+# 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+# and the T32 encoding is
+# 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+# This file works on the A32 encoding only; calling code for T32 has to
+# transform the insn into the A32 version first.
+
+######################################################################
+# 3-reg-same grouping:
+# 1111 001 U 0 D sz:2 Vn:4 Vd:4 opc:4 N Q M op Vm:4
+######################################################################
+
+&3same vm vn vd q size
+
+@3same .... ... . . . size:2 .... .... .... . q:1 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+@3same_q0 .... ... . . . size:2 .... .... .... . 0 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
+
+# For FP insns the high bit of 'size' is used as part of opcode decode,
+# and the 'size' bit is 0 for 32-bit float and 1 for 16-bit float.
+# This converts this encoding to the same MO_8/16/32/64 values that the
+# integer neon insns use.
+%3same_fp_size 20:1 !function=neon_3same_fp_size
+
+@3same_fp .... ... . . . . . .... .... .... . q:1 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%3same_fp_size
+@3same_fp_q0 .... ... . . . . . .... .... .... . 0 . . .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0 size=%3same_fp_size
+
+VHADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
+VHADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
+VQADD_S_3s 1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
+VQADD_U_3s 1111 001 1 0 . .. .... .... 0000 . . . 1 .... @3same
+
+VRHADD_S_3s 1111 001 0 0 . .. .... .... 0001 . . . 0 .... @3same
+VRHADD_U_3s 1111 001 1 0 . .. .... .... 0001 . . . 0 .... @3same
+
+@3same_logic .... ... . . . .. .... .... .... . q:1 .. .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0
+
+VAND_3s 1111 001 0 0 . 00 .... .... 0001 ... 1 .... @3same_logic
+VBIC_3s 1111 001 0 0 . 01 .... .... 0001 ... 1 .... @3same_logic
+VORR_3s 1111 001 0 0 . 10 .... .... 0001 ... 1 .... @3same_logic
+VORN_3s 1111 001 0 0 . 11 .... .... 0001 ... 1 .... @3same_logic
+VEOR_3s 1111 001 1 0 . 00 .... .... 0001 ... 1 .... @3same_logic
+VBSL_3s 1111 001 1 0 . 01 .... .... 0001 ... 1 .... @3same_logic
+VBIT_3s 1111 001 1 0 . 10 .... .... 0001 ... 1 .... @3same_logic
+VBIF_3s 1111 001 1 0 . 11 .... .... 0001 ... 1 .... @3same_logic
+
+VHSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 0 .... @3same
+VHSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 0 .... @3same
+
+VQSUB_S_3s 1111 001 0 0 . .. .... .... 0010 . . . 1 .... @3same
+VQSUB_U_3s 1111 001 1 0 . .. .... .... 0010 . . . 1 .... @3same
+
+VCGT_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 0 .... @3same
+VCGT_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 0 .... @3same
+VCGE_S_3s 1111 001 0 0 . .. .... .... 0011 . . . 1 .... @3same
+VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same
+
+# The _rev suffix indicates that Vn and Vm are reversed. This is
+# the case for shifts. In the Arm ARM these insns are documented
+# with the Vm and Vn fields in their usual places, but in the
+# assembly the operands are listed "backwards", ie in the order
+# Dd, Dm, Dn where other insns use Dd, Dn, Dm. For QEMU we choose
+# to consider Vm and Vn as being in different fields in the insn,
+# which allows us to avoid special-casing shifts in the trans_
+# function code. We would otherwise need to manually swap the operands
+# over to call Neon helper functions that are shared with AArch64,
+# which does not have this odd reversed-operand situation.
+@3same_rev .... ... . . . size:2 .... .... .... . q:1 . . .... \
+ &3same vn=%vm_dp vm=%vn_dp vd=%vd_dp
+
+VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev
+VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev
+
+# Insns operating on 64-bit elements (size!=0b11 handled elsewhere)
+# The _rev suffix indicates that Vn and Vm are reversed (as explained
+# by the comment for the @3same_rev format).
+@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \
+ &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3
+
+{
+ VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
+ VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev
+}
+{
+ VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev
+ VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev
+}
+{
+ VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
+ VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev
+}
+{
+ VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev
+ VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev
+}
+{
+ VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
+ VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev
+}
+{
+ VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev
+ VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev
+}
+
+VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same
+VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same
+VMIN_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 1 .... @3same
+VMIN_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 1 .... @3same
+
+VABD_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 0 .... @3same
+VABD_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 0 .... @3same
+
+VABA_S_3s 1111 001 0 0 . .. .... .... 0111 . . . 1 .... @3same
+VABA_U_3s 1111 001 1 0 . .. .... .... 0111 . . . 1 .... @3same
+
+VADD_3s 1111 001 0 0 . .. .... .... 1000 . . . 0 .... @3same
+VSUB_3s 1111 001 1 0 . .. .... .... 1000 . . . 0 .... @3same
+
+VTST_3s 1111 001 0 0 . .. .... .... 1000 . . . 1 .... @3same
+VCEQ_3s 1111 001 1 0 . .. .... .... 1000 . . . 1 .... @3same
+
+VMLA_3s 1111 001 0 0 . .. .... .... 1001 . . . 0 .... @3same
+VMLS_3s 1111 001 1 0 . .. .... .... 1001 . . . 0 .... @3same
+
+VMUL_3s 1111 001 0 0 . .. .... .... 1001 . . . 1 .... @3same
+VMUL_p_3s 1111 001 1 0 . .. .... .... 1001 . . . 1 .... @3same
+
+VPMAX_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 0 .... @3same_q0
+VPMAX_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 0 .... @3same_q0
+
+VPMIN_S_3s 1111 001 0 0 . .. .... .... 1010 . . . 1 .... @3same_q0
+VPMIN_U_3s 1111 001 1 0 . .. .... .... 1010 . . . 1 .... @3same_q0
+
+VQDMULH_3s 1111 001 0 0 . .. .... .... 1011 . . . 0 .... @3same
+VQRDMULH_3s 1111 001 1 0 . .. .... .... 1011 . . . 0 .... @3same
+
+VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0
+
+VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
+
+@3same_crypto .... .... .... .... .... .... .... .... \
+ &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0 q=1
+
+SHA1C_3s 1111 001 0 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto
+SHA1P_3s 1111 001 0 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto
+SHA1M_3s 1111 001 0 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto
+SHA1SU0_3s 1111 001 0 0 . 11 .... .... 1100 . 1 . 0 .... @3same_crypto
+SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto
+SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto
+SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto
+
+VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
+VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp
+
+VQRDMLSH_3s 1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
+
+VADD_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
+VSUB_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
+VPADD_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 0 .... @3same_fp_q0
+VABD_fp_3s 1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
+VMLA_fp_3s 1111 001 0 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
+VMLS_fp_3s 1111 001 0 0 . 1 . .... .... 1101 ... 1 .... @3same_fp
+VMUL_fp_3s 1111 001 1 0 . 0 . .... .... 1101 ... 1 .... @3same_fp
+VCEQ_fp_3s 1111 001 0 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
+VCGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 0 .... @3same_fp
+VACGE_fp_3s 1111 001 1 0 . 0 . .... .... 1110 ... 1 .... @3same_fp
+VCGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 0 .... @3same_fp
+VACGT_fp_3s 1111 001 1 0 . 1 . .... .... 1110 ... 1 .... @3same_fp
+VMAX_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 0 .... @3same_fp
+VMIN_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 0 .... @3same_fp
+VPMAX_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 0 .... @3same_fp_q0
+VPMIN_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 0 .... @3same_fp_q0
+VRECPS_fp_3s 1111 001 0 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
+VRSQRTS_fp_3s 1111 001 0 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
+VMAXNM_fp_3s 1111 001 1 0 . 0 . .... .... 1111 ... 1 .... @3same_fp
+VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
+
+######################################################################
+# 2-reg-and-shift grouping:
+# 1111 001 U 1 D immH:3 immL:3 Vd:4 opc:4 L Q M 1 Vm:4
+######################################################################
+&2reg_shift vm vd q shift size
+
+# Right shifts are encoded as N - shift, where N is the element size in bits.
+%neon_rshift_i6 16:6 !function=rsub_64
+%neon_rshift_i5 16:5 !function=rsub_32
+%neon_rshift_i4 16:4 !function=rsub_16
+%neon_rshift_i3 16:3 !function=rsub_8
+
+@2reg_shr_d .... ... . . . ...... .... .... 1 q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=3 shift=%neon_rshift_i6
+@2reg_shr_s .... ... . . . 1 ..... .... .... 0 q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5
+@2reg_shr_h .... ... . . . 01 .... .... .... 0 q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4
+@2reg_shr_b .... ... . . . 001 ... .... .... 0 q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i3
+
+@2reg_shl_d .... ... . . . shift:6 .... .... 1 q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=3
+@2reg_shl_s .... ... . . . 1 shift:5 .... .... 0 q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=2
+@2reg_shl_h .... ... . . . 01 shift:4 .... .... 0 q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=1
+@2reg_shl_b .... ... . . . 001 shift:3 .... .... 0 q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=0
+
+# Narrowing right shifts: here the Q bit is part of the opcode decode
+@2reg_shrn_d .... ... . . . 1 ..... .... .... 0 . . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=3 q=0 \
+ shift=%neon_rshift_i5
+@2reg_shrn_s .... ... . . . 01 .... .... .... 0 . . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0 \
+ shift=%neon_rshift_i4
+@2reg_shrn_h .... ... . . . 001 ... .... .... 0 . . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \
+ shift=%neon_rshift_i3
+
+# Long left shifts: again Q is part of opcode decode
+@2reg_shll_s .... ... . . . 1 shift:5 .... .... 0 . . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0
+@2reg_shll_h .... ... . . . 01 shift:4 .... .... 0 . . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0
+@2reg_shll_b .... ... . . . 001 shift:3 .... .... 0 . . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=0 q=0
+
+@2reg_vcvt .... ... . . . 1 ..... .... .... . q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5
+@2reg_vcvt_f16 .... ... . . . 11 .... .... .... . q:1 . . .... \
+ &2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4
+
+VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
+VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
+VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
+VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b
+
+VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
+VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
+VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
+VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b
+
+VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d
+VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s
+VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h
+VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b
+
+VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d
+VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s
+VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h
+VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b
+
+VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d
+VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s
+VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h
+VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b
+
+VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d
+VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s
+VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h
+VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b
+
+VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d
+VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s
+VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h
+VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b
+
+VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d
+VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s
+VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h
+VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b
+
+VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_d
+VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_s
+VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_h
+VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_b
+
+VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d
+VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s
+VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h
+VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b
+
+VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d
+VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s
+VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h
+VSLI_2sh 1111 001 1 1 . ...... .... 0101 . . . 1 .... @2reg_shl_b
+
+VQSHLU_64_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_d
+VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_s
+VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_h
+VQSHLU_2sh 1111 001 1 1 . ...... .... 0110 . . . 1 .... @2reg_shl_b
+
+VQSHL_S_64_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
+VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
+VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
+VQSHL_S_2sh 1111 001 0 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
+
+VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
+VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
+VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
+VQSHL_U_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b
+
+VSHRN_64_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d
+VSHRN_32_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s
+VSHRN_16_2sh 1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h
+
+VRSHRN_64_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
+VRSHRN_32_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
+VRSHRN_16_2sh 1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h
+
+VQSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d
+VQSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s
+VQSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h
+
+VQRSHRUN_64_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
+VQRSHRUN_32_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
+VQRSHRUN_16_2sh 1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h
+
+# VQSHRN with signed input
+VQSHRN_S64_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d
+VQSHRN_S32_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s
+VQSHRN_S16_2sh 1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h
+
+# VQRSHRN with signed input
+VQRSHRN_S64_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d
+VQRSHRN_S32_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s
+VQRSHRN_S16_2sh 1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h
+
+# VQSHRN with unsigned input
+VQSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d
+VQSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s
+VQSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h
+
+# VQRSHRN with unsigned input
+VQRSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d
+VQRSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s
+VQRSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h
+
+VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s
+VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
+VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b
+
+VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s
+VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
+VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b
+
+# VCVT fixed<->float conversions
+VCVT_SH_2sh 1111 001 0 1 . ...... .... 1100 0 . . 1 .... @2reg_vcvt_f16
+VCVT_UH_2sh 1111 001 1 1 . ...... .... 1100 0 . . 1 .... @2reg_vcvt_f16
+VCVT_HS_2sh 1111 001 0 1 . ...... .... 1101 0 . . 1 .... @2reg_vcvt_f16
+VCVT_HU_2sh 1111 001 1 1 . ...... .... 1101 0 . . 1 .... @2reg_vcvt_f16
+
+VCVT_SF_2sh 1111 001 0 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt
+VCVT_UF_2sh 1111 001 1 1 . ...... .... 1110 0 . . 1 .... @2reg_vcvt
+VCVT_FS_2sh 1111 001 0 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
+VCVT_FU_2sh 1111 001 1 1 . ...... .... 1111 0 . . 1 .... @2reg_vcvt
+
+######################################################################
+# 1-reg-and-modified-immediate grouping:
+# 1111 001 i 1 D 000 imm:3 Vd:4 cmode:4 0 Q op 1 Vm:4
+######################################################################
+
+&1reg_imm vd q imm cmode op
+
+%asimd_imm_value 24:1 16:3 0:4
+
+@1reg_imm .... ... . . . ... ... .... .... . q:1 . . .... \
+ &1reg_imm imm=%asimd_imm_value vd=%vd_dp
+
+# The cmode/op bits here decode VORR/VBIC/VMOV/VMNV, but
+# not in a way we can conveniently represent in decodetree without
+# a lot of repetition:
+# VORR: op=0, (cmode & 1) && cmode < 12
+# VBIC: op=1, (cmode & 1) && cmode < 12
+# VMOV: everything else
+# So we have a single decode line and check the cmode/op in the
+# trans function.
+Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
+
+######################################################################
+# Within the "two registers, or three registers of different lengths"
+# grouping ([23,4]=0b10), bits [21:20] are either part of the opcode
+# decode: 0b11 for VEXT, two-reg-misc, VTBL, and duplicate-scalar;
+# or they are a size field for the three-reg-different-lengths and
+# two-reg-and-scalar insn groups (where size cannot be 0b11). This
+# is slightly awkward for decodetree: we handle it with this
+# non-exclusive group which contains within it two exclusive groups:
+# one for the size=0b11 patterns, and one for the size-not-0b11
+# patterns. This allows us to check that none of the insns within
+# each subgroup accidentally overlap each other. Note that all the
+# trans functions for the size-not-0b11 patterns must check and
+# return false for size==3.
+######################################################################
+{
+ [
+ ##################################################################
+ # Miscellaneous size=0b11 insns
+ ##################################################################
+ VEXT 1111 001 0 1 . 11 .... .... imm:4 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VTBL 1111 001 1 1 . 11 .... .... 10 len:2 . op:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VDUP_scalar 1111 001 1 1 . 11 index:3 1 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=0
+ VDUP_scalar 1111 001 1 1 . 11 index:2 10 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=1
+ VDUP_scalar 1111 001 1 1 . 11 index:1 100 .... 11 000 q:1 . 0 .... \
+ vm=%vm_dp vd=%vd_dp size=2
+
+ ##################################################################
+ # 2-reg-misc grouping:
+ # 1111 001 11 D 11 size:2 opc1:2 Vd:4 0 opc2:4 q:1 M 0 Vm:4
+ ##################################################################
+
+ &2misc vd vm q size
+
+ @2misc .... ... .. . .. size:2 .. .... . .... q:1 . . .... \
+ &2misc vm=%vm_dp vd=%vd_dp
+ @2misc_q0 .... ... .. . .. size:2 .. .... . .... . . . .... \
+ &2misc vm=%vm_dp vd=%vd_dp q=0
+ @2misc_q1 .... ... .. . .. size:2 .. .... . .... . . . .... \
+ &2misc vm=%vm_dp vd=%vd_dp q=1
+
+ VREV64 1111 001 11 . 11 .. 00 .... 0 0000 . . 0 .... @2misc
+ VREV32 1111 001 11 . 11 .. 00 .... 0 0001 . . 0 .... @2misc
+ VREV16 1111 001 11 . 11 .. 00 .... 0 0010 . . 0 .... @2misc
+
+ VPADDL_S 1111 001 11 . 11 .. 00 .... 0 0100 . . 0 .... @2misc
+ VPADDL_U 1111 001 11 . 11 .. 00 .... 0 0101 . . 0 .... @2misc
+
+ AESE 1111 001 11 . 11 .. 00 .... 0 0110 0 . 0 .... @2misc_q1
+ AESD 1111 001 11 . 11 .. 00 .... 0 0110 1 . 0 .... @2misc_q1
+ AESMC 1111 001 11 . 11 .. 00 .... 0 0111 0 . 0 .... @2misc_q1
+ AESIMC 1111 001 11 . 11 .. 00 .... 0 0111 1 . 0 .... @2misc_q1
+
+ VCLS 1111 001 11 . 11 .. 00 .... 0 1000 . . 0 .... @2misc
+ VCLZ 1111 001 11 . 11 .. 00 .... 0 1001 . . 0 .... @2misc
+ VCNT 1111 001 11 . 11 .. 00 .... 0 1010 . . 0 .... @2misc
+
+ VMVN 1111 001 11 . 11 .. 00 .... 0 1011 . . 0 .... @2misc
+
+ VPADAL_S 1111 001 11 . 11 .. 00 .... 0 1100 . . 0 .... @2misc
+ VPADAL_U 1111 001 11 . 11 .. 00 .... 0 1101 . . 0 .... @2misc
+
+ VQABS 1111 001 11 . 11 .. 00 .... 0 1110 . . 0 .... @2misc
+ VQNEG 1111 001 11 . 11 .. 00 .... 0 1111 . . 0 .... @2misc
+
+ VCGT0 1111 001 11 . 11 .. 01 .... 0 0000 . . 0 .... @2misc
+ VCGE0 1111 001 11 . 11 .. 01 .... 0 0001 . . 0 .... @2misc
+ VCEQ0 1111 001 11 . 11 .. 01 .... 0 0010 . . 0 .... @2misc
+ VCLE0 1111 001 11 . 11 .. 01 .... 0 0011 . . 0 .... @2misc
+ VCLT0 1111 001 11 . 11 .. 01 .... 0 0100 . . 0 .... @2misc
+
+ SHA1H 1111 001 11 . 11 .. 01 .... 0 0101 1 . 0 .... @2misc_q1
+
+ VABS 1111 001 11 . 11 .. 01 .... 0 0110 . . 0 .... @2misc
+ VNEG 1111 001 11 . 11 .. 01 .... 0 0111 . . 0 .... @2misc
+
+ VCGT0_F 1111 001 11 . 11 .. 01 .... 0 1000 . . 0 .... @2misc
+ VCGE0_F 1111 001 11 . 11 .. 01 .... 0 1001 . . 0 .... @2misc
+ VCEQ0_F 1111 001 11 . 11 .. 01 .... 0 1010 . . 0 .... @2misc
+ VCLE0_F 1111 001 11 . 11 .. 01 .... 0 1011 . . 0 .... @2misc
+ VCLT0_F 1111 001 11 . 11 .. 01 .... 0 1100 . . 0 .... @2misc
+
+ VABS_F 1111 001 11 . 11 .. 01 .... 0 1110 . . 0 .... @2misc
+ VNEG_F 1111 001 11 . 11 .. 01 .... 0 1111 . . 0 .... @2misc
+
+ VSWP 1111 001 11 . 11 .. 10 .... 0 0000 . . 0 .... @2misc
+ VTRN 1111 001 11 . 11 .. 10 .... 0 0001 . . 0 .... @2misc
+ VUZP 1111 001 11 . 11 .. 10 .... 0 0010 . . 0 .... @2misc
+ VZIP 1111 001 11 . 11 .. 10 .... 0 0011 . . 0 .... @2misc
+
+ VMOVN 1111 001 11 . 11 .. 10 .... 0 0100 0 . 0 .... @2misc_q0
+ # VQMOVUN: unsigned result (source is always signed)
+ VQMOVUN 1111 001 11 . 11 .. 10 .... 0 0100 1 . 0 .... @2misc_q0
+ # VQMOVN: signed result, source may be signed (_S) or unsigned (_U)
+ VQMOVN_S 1111 001 11 . 11 .. 10 .... 0 0101 0 . 0 .... @2misc_q0
+ VQMOVN_U 1111 001 11 . 11 .. 10 .... 0 0101 1 . 0 .... @2misc_q0
+
+ VSHLL 1111 001 11 . 11 .. 10 .... 0 0110 0 . 0 .... @2misc_q0
+
+ SHA1SU1 1111 001 11 . 11 .. 10 .... 0 0111 0 . 0 .... @2misc_q1
+ SHA256SU0 1111 001 11 . 11 .. 10 .... 0 0111 1 . 0 .... @2misc_q1
+
+ VRINTN 1111 001 11 . 11 .. 10 .... 0 1000 . . 0 .... @2misc
+ VRINTX 1111 001 11 . 11 .. 10 .... 0 1001 . . 0 .... @2misc
+ VRINTA 1111 001 11 . 11 .. 10 .... 0 1010 . . 0 .... @2misc
+ VRINTZ 1111 001 11 . 11 .. 10 .... 0 1011 . . 0 .... @2misc
+
+ VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
+ VCVT_B16_F32 1111 001 11 . 11 .. 10 .... 0 1100 1 . 0 .... @2misc_q0
+
+ VRINTM 1111 001 11 . 11 .. 10 .... 0 1101 . . 0 .... @2misc
+
+ VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0
+
+ VRINTP 1111 001 11 . 11 .. 10 .... 0 1111 . . 0 .... @2misc
+
+ VCVTAS 1111 001 11 . 11 .. 11 .... 0 0000 . . 0 .... @2misc
+ VCVTAU 1111 001 11 . 11 .. 11 .... 0 0001 . . 0 .... @2misc
+ VCVTNS 1111 001 11 . 11 .. 11 .... 0 0010 . . 0 .... @2misc
+ VCVTNU 1111 001 11 . 11 .. 11 .... 0 0011 . . 0 .... @2misc
+ VCVTPS 1111 001 11 . 11 .. 11 .... 0 0100 . . 0 .... @2misc
+ VCVTPU 1111 001 11 . 11 .. 11 .... 0 0101 . . 0 .... @2misc
+ VCVTMS 1111 001 11 . 11 .. 11 .... 0 0110 . . 0 .... @2misc
+ VCVTMU 1111 001 11 . 11 .. 11 .... 0 0111 . . 0 .... @2misc
+
+ VRECPE 1111 001 11 . 11 .. 11 .... 0 1000 . . 0 .... @2misc
+ VRSQRTE 1111 001 11 . 11 .. 11 .... 0 1001 . . 0 .... @2misc
+ VRECPE_F 1111 001 11 . 11 .. 11 .... 0 1010 . . 0 .... @2misc
+ VRSQRTE_F 1111 001 11 . 11 .. 11 .... 0 1011 . . 0 .... @2misc
+ VCVT_FS 1111 001 11 . 11 .. 11 .... 0 1100 . . 0 .... @2misc
+ VCVT_FU 1111 001 11 . 11 .. 11 .... 0 1101 . . 0 .... @2misc
+ VCVT_SF 1111 001 11 . 11 .. 11 .... 0 1110 . . 0 .... @2misc
+ VCVT_UF 1111 001 11 . 11 .. 11 .... 0 1111 . . 0 .... @2misc
+ ]
+
+ # Subgroup for size != 0b11
+ [
+ ##################################################################
+ # 3-reg-different-length grouping:
+ # 1111 001 U 1 D sz!=11 Vn:4 Vd:4 opc:4 N 0 M 0 Vm:4
+ ##################################################################
+
+ &3diff vm vn vd size
+
+ @3diff .... ... . . . size:2 .... .... .... . . . . .... \
+ &3diff vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+ VADDL_S_3d 1111 001 0 1 . .. .... .... 0000 . 0 . 0 .... @3diff
+ VADDL_U_3d 1111 001 1 1 . .. .... .... 0000 . 0 . 0 .... @3diff
+
+ VADDW_S_3d 1111 001 0 1 . .. .... .... 0001 . 0 . 0 .... @3diff
+ VADDW_U_3d 1111 001 1 1 . .. .... .... 0001 . 0 . 0 .... @3diff
+
+ VSUBL_S_3d 1111 001 0 1 . .. .... .... 0010 . 0 . 0 .... @3diff
+ VSUBL_U_3d 1111 001 1 1 . .. .... .... 0010 . 0 . 0 .... @3diff
+
+ VSUBW_S_3d 1111 001 0 1 . .. .... .... 0011 . 0 . 0 .... @3diff
+ VSUBW_U_3d 1111 001 1 1 . .. .... .... 0011 . 0 . 0 .... @3diff
+
+ VADDHN_3d 1111 001 0 1 . .. .... .... 0100 . 0 . 0 .... @3diff
+ VRADDHN_3d 1111 001 1 1 . .. .... .... 0100 . 0 . 0 .... @3diff
+
+ VABAL_S_3d 1111 001 0 1 . .. .... .... 0101 . 0 . 0 .... @3diff
+ VABAL_U_3d 1111 001 1 1 . .. .... .... 0101 . 0 . 0 .... @3diff
+
+ VSUBHN_3d 1111 001 0 1 . .. .... .... 0110 . 0 . 0 .... @3diff
+ VRSUBHN_3d 1111 001 1 1 . .. .... .... 0110 . 0 . 0 .... @3diff
+
+ VABDL_S_3d 1111 001 0 1 . .. .... .... 0111 . 0 . 0 .... @3diff
+ VABDL_U_3d 1111 001 1 1 . .. .... .... 0111 . 0 . 0 .... @3diff
+
+ VMLAL_S_3d 1111 001 0 1 . .. .... .... 1000 . 0 . 0 .... @3diff
+ VMLAL_U_3d 1111 001 1 1 . .. .... .... 1000 . 0 . 0 .... @3diff
+
+ VQDMLAL_3d 1111 001 0 1 . .. .... .... 1001 . 0 . 0 .... @3diff
+
+ VMLSL_S_3d 1111 001 0 1 . .. .... .... 1010 . 0 . 0 .... @3diff
+ VMLSL_U_3d 1111 001 1 1 . .. .... .... 1010 . 0 . 0 .... @3diff
+
+ VQDMLSL_3d 1111 001 0 1 . .. .... .... 1011 . 0 . 0 .... @3diff
+
+ VMULL_S_3d 1111 001 0 1 . .. .... .... 1100 . 0 . 0 .... @3diff
+ VMULL_U_3d 1111 001 1 1 . .. .... .... 1100 . 0 . 0 .... @3diff
+
+ VQDMULL_3d 1111 001 0 1 . .. .... .... 1101 . 0 . 0 .... @3diff
+
+ VMULL_P_3d 1111 001 0 1 . .. .... .... 1110 . 0 . 0 .... @3diff
+
+ ##################################################################
+ # 2-regs-plus-scalar grouping:
+ # 1111 001 Q 1 D sz!=11 Vn:4 Vd:4 opc:4 N 1 M 0 Vm:4
+ ##################################################################
+ &2scalar vm vn vd size q
+
+ @2scalar .... ... q:1 . . size:2 .... .... .... . . . . .... \
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp
+ # For the 'long' ops the Q bit is part of insn decode
+ @2scalar_q0 .... ... . . . size:2 .... .... .... . . . . .... \
+ &2scalar vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
+
+ VMLA_2sc 1111 001 . 1 . .. .... .... 0000 . 1 . 0 .... @2scalar
+ VMLA_F_2sc 1111 001 . 1 . .. .... .... 0001 . 1 . 0 .... @2scalar
+
+ VMLAL_S_2sc 1111 001 0 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
+ VMLAL_U_2sc 1111 001 1 1 . .. .... .... 0010 . 1 . 0 .... @2scalar_q0
+
+ VQDMLAL_2sc 1111 001 0 1 . .. .... .... 0011 . 1 . 0 .... @2scalar_q0
+
+ VMLS_2sc 1111 001 . 1 . .. .... .... 0100 . 1 . 0 .... @2scalar
+ VMLS_F_2sc 1111 001 . 1 . .. .... .... 0101 . 1 . 0 .... @2scalar
+
+ VMLSL_S_2sc 1111 001 0 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
+ VMLSL_U_2sc 1111 001 1 1 . .. .... .... 0110 . 1 . 0 .... @2scalar_q0
+
+ VQDMLSL_2sc 1111 001 0 1 . .. .... .... 0111 . 1 . 0 .... @2scalar_q0
+
+ VMUL_2sc 1111 001 . 1 . .. .... .... 1000 . 1 . 0 .... @2scalar
+ VMUL_F_2sc 1111 001 . 1 . .. .... .... 1001 . 1 . 0 .... @2scalar
+
+ VMULL_S_2sc 1111 001 0 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
+ VMULL_U_2sc 1111 001 1 1 . .. .... .... 1010 . 1 . 0 .... @2scalar_q0
+
+ VQDMULL_2sc 1111 001 0 1 . .. .... .... 1011 . 1 . 0 .... @2scalar_q0
+
+ VQDMULH_2sc 1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
+ VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar
+
+ VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar
+ VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar
+ ]
+}
diff --git a/target/arm/tcg/neon-ls.decode b/target/arm/tcg/neon-ls.decode
new file mode 100644
index 0000000000..c5f364cbc0
--- /dev/null
+++ b/target/arm/tcg/neon-ls.decode
@@ -0,0 +1,52 @@
+# AArch32 Neon load/store instruction descriptions
+#
+# Copyright (c) 2020 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+# Encodings for Neon load/store instructions where the T32 encoding
+# is a simple transformation of the A32 encoding.
+# More specifically, this file covers instructions where the A32 encoding is
+# 0b1111_0100_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
+# and the T32 encoding is
+# 0b1111_1001_xxx0_xxxx_xxxx_xxxx_xxxx_xxxx
+# This file works on the A32 encoding only; calling code for T32 has to
+# transform the insn into the A32 version first.
+
+%vd_dp 22:1 12:4
+
+# Neon load/store multiple structures
+
+VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
+ vd=%vd_dp
+
+# Neon load single element to all lanes
+
+VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
+ vd=%vd_dp
+
+# Neon load/store single structure to one lane
+%imm1_5_p1 5:1 !function=plus_1
+%imm1_6_p1 6:1 !function=plus_1
+
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 00 n:2 reg_idx:3 align:1 rm:4 \
+ vd=%vd_dp size=0 stride=1
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 01 n:2 reg_idx:2 . align:1 rm:4 \
+ vd=%vd_dp size=1 stride=%imm1_5_p1
+VLDST_single 1111 0100 1 . l:1 0 rn:4 .... 10 n:2 reg_idx:1 . align:2 rm:4 \
+ vd=%vd_dp size=2 stride=%imm1_6_p1
diff --git a/target/arm/tcg/neon-shared.decode b/target/arm/tcg/neon-shared.decode
new file mode 100644
index 0000000000..8e6bd0b61f
--- /dev/null
+++ b/target/arm/tcg/neon-shared.decode
@@ -0,0 +1,99 @@
+# AArch32 Neon instruction descriptions
+#
+# Copyright (c) 2020 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+# Encodings for Neon instructions whose encoding is the same for
+# both A32 and T32.
+
+# More specifically, this covers:
+# 2reg scalar ext: 0b1111_1110_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
+# 3same ext: 0b1111_110x_xxxx_xxxx_xxxx_1x0x_xxxx_xxxx
+
+# VFP/Neon register fields; same as vfp.decode
+%vm_dp 5:1 0:4
+%vm_sp 0:4 5:1
+%vn_dp 7:1 16:4
+%vn_sp 16:4 7:1
+%vd_dp 22:1 12:4
+%vd_sp 12:4 22:1
+
+# For VCMLA/VCADD insns, convert the single-bit size field
+# which is 0 for fp16 and 1 for fp32 into a MO_* constant.
+# (Note that this is the reverse of the sense of the 1-bit size
+# field in the 3same_fp Neon insns.)
+%vcadd_size 20:1 !function=plus_1
+
+VCMLA 1111 110 rot:2 . 1 . .... .... 1000 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%vcadd_size
+
+VCADD 1111 110 rot:1 1 . 0 . .... .... 1000 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp size=%vcadd_size
+
+VSDOT 1111 110 00 . 10 .... .... 1101 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VUDOT 1111 110 00 . 10 .... .... 1101 . q:1 . 1 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VUSDOT 1111 110 01 . 10 .... .... 1101 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VDOT_b16 1111 110 00 . 00 .... .... 1101 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+# VFM[AS]L
+VFML 1111 110 0 s:1 . 10 .... .... 1000 . 0 . 1 .... \
+ vm=%vm_sp vn=%vn_sp vd=%vd_dp q=0
+VFML 1111 110 0 s:1 . 10 .... .... 1000 . 1 . 1 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp q=1
+
+VSMMLA 1111 1100 0.10 .... .... 1100 .1.0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VUMMLA 1111 1100 0.10 .... .... 1100 .1.1 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VUSMMLA 1111 1100 1.10 .... .... 1100 .1.0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+VMMLA_b16 1111 1100 0.00 .... .... 1100 .1.0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VFMA_b16 1111 110 0 0.11 .... .... 1000 . q:1 . 1 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VCMLA_scalar 1111 1110 0 . rot:2 .... .... 1000 . q:1 index:1 0 vm:4 \
+ vn=%vn_dp vd=%vd_dp size=1
+VCMLA_scalar 1111 1110 1 . rot:2 .... .... 1000 . q:1 . 0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp size=2 index=0
+
+VSDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 0 vm:4 \
+ vn=%vn_dp vd=%vd_dp
+VUDOT_scalar 1111 1110 0 . 10 .... .... 1101 . q:1 index:1 1 vm:4 \
+ vn=%vn_dp vd=%vd_dp
+VUSDOT_scalar 1111 1110 1 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \
+ vn=%vn_dp vd=%vd_dp
+VSUDOT_scalar 1111 1110 1 . 00 .... .... 1101 . q:1 index:1 1 vm:4 \
+ vn=%vn_dp vd=%vd_dp
+VDOT_b16_scal 1111 1110 0 . 00 .... .... 1101 . q:1 index:1 0 vm:4 \
+ vn=%vn_dp vd=%vd_dp
+
+%vfml_scalar_q0_rm 0:3 5:1
+%vfml_scalar_q1_index 5:1 3:1
+VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 0 . 1 index:1 ... \
+ rm=%vfml_scalar_q0_rm vn=%vn_sp vd=%vd_dp q=0
+VFML_scalar 1111 1110 0 . 0 s:1 .... .... 1000 . 1 . 1 . rm:3 \
+ index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp q=1
+VFMA_b16_scal 1111 1110 0.11 .... .... 1000 . q:1 . 1 . vm:3 \
+ index=%vfml_scalar_q1_index vn=%vn_dp vd=%vd_dp
diff --git a/target/arm/neon_helper.c b/target/arm/tcg/neon_helper.c
index c2c6491a83..bc6c4a54e9 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/tcg/neon_helper.c
@@ -11,18 +11,19 @@
#include "cpu.h"
#include "exec/helper-proto.h"
#include "fpu/softfloat.h"
+#include "vec_internal.h"
#define SIGNBIT (uint32_t)0x80000000
#define SIGNBIT64 ((uint64_t)1 << 63)
-#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
+#define SET_QC() env->vfp.qc[0] = 1
#define NEON_TYPE1(name, type) \
typedef struct \
{ \
type v1; \
} neon_##name;
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
#define NEON_TYPE2(name, type) \
typedef struct \
{ \
@@ -562,31 +563,7 @@ uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2)
return dest;
}
-#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0
-NEON_VOP(cgt_s8, neon_s8, 4)
-NEON_VOP(cgt_u8, neon_u8, 4)
-NEON_VOP(cgt_s16, neon_s16, 2)
-NEON_VOP(cgt_u16, neon_u16, 2)
-NEON_VOP(cgt_s32, neon_s32, 1)
-NEON_VOP(cgt_u32, neon_u32, 1)
-#undef NEON_FN
-
-#define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0
-NEON_VOP(cge_s8, neon_s8, 4)
-NEON_VOP(cge_u8, neon_u8, 4)
-NEON_VOP(cge_s16, neon_s16, 2)
-NEON_VOP(cge_u16, neon_u16, 2)
-NEON_VOP(cge_s32, neon_s32, 1)
-NEON_VOP(cge_u32, neon_u32, 1)
-#undef NEON_FN
-
#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
-NEON_VOP(min_s8, neon_s8, 4)
-NEON_VOP(min_u8, neon_u8, 4)
-NEON_VOP(min_s16, neon_s16, 2)
-NEON_VOP(min_u16, neon_u16, 2)
-NEON_VOP(min_s32, neon_s32, 1)
-NEON_VOP(min_u32, neon_u32, 1)
NEON_POP(pmin_s8, neon_s8, 4)
NEON_POP(pmin_u8, neon_u8, 4)
NEON_POP(pmin_s16, neon_s16, 2)
@@ -594,12 +571,6 @@ NEON_POP(pmin_u16, neon_u16, 2)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
-NEON_VOP(max_s8, neon_s8, 4)
-NEON_VOP(max_u8, neon_u8, 4)
-NEON_VOP(max_s16, neon_s16, 2)
-NEON_VOP(max_u16, neon_u16, 2)
-NEON_VOP(max_s32, neon_s32, 1)
-NEON_VOP(max_u32, neon_u32, 1)
NEON_POP(pmax_s8, neon_s8, 4)
NEON_POP(pmax_u8, neon_u8, 4)
NEON_POP(pmax_s16, neon_s16, 2)
@@ -607,538 +578,153 @@ NEON_POP(pmax_u16, neon_u16, 2)
#undef NEON_FN
#define NEON_FN(dest, src1, src2) \
- dest = (src1 > src2) ? (src1 - src2) : (src2 - src1)
-NEON_VOP(abd_s8, neon_s8, 4)
-NEON_VOP(abd_u8, neon_u8, 4)
-NEON_VOP(abd_s16, neon_s16, 2)
-NEON_VOP(abd_u16, neon_u16, 2)
-NEON_VOP(abd_s32, neon_s32, 1)
-NEON_VOP(abd_u32, neon_u32, 1)
-#undef NEON_FN
-
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8 || \
- tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- }} while (0)
-NEON_VOP(shl_u8, neon_u8, 4)
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, NULL))
NEON_VOP(shl_u16, neon_u16, 2)
-NEON_VOP(shl_u32, neon_u32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- if (shift >= 64 || shift <= -64) {
- val = 0;
- } else if (shift < 0) {
- val >>= -shift;
- } else {
- val <<= shift;
- }
- return val;
-}
-
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> (sizeof(src1) * 8 - 1); \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- }} while (0)
-NEON_VOP(shl_s8, neon_s8, 4)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, NULL))
NEON_VOP(shl_s16, neon_s16, 2)
-NEON_VOP(shl_s32, neon_s32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- int64_t val = valop;
- if (shift >= 64) {
- val = 0;
- } else if (shift <= -64) {
- val >>= 63;
- } else if (shift < 0) {
- val >>= -shift;
- } else {
- val <<= shift;
- }
- return val;
-}
-
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if ((tmp >= (ssize_t)sizeof(src1) * 8) \
- || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, NULL))
NEON_VOP(rshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, NULL))
NEON_VOP(rshl_s16, neon_s16, 2)
#undef NEON_FN
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator. */
-uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift)
{
- int32_t dest;
- int32_t val = (int32_t)valop;
- int8_t shift = (int8_t)shiftop;
- if ((shift >= 32) || (shift <= -32)) {
- dest = 0;
- } else if (shift < 0) {
- int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
- dest = big_dest >> -shift;
- } else {
- dest = val << shift;
- }
- return dest;
+ return do_sqrshl_bhs(val, (int8_t)shift, 32, true, NULL);
}
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values. */
-uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- int64_t val = valop;
- if ((shift >= 64) || (shift <= -64)) {
- val = 0;
- } else if (shift < 0) {
- val >>= (-shift - 1);
- if (val == INT64_MAX) {
- /* In this case, it means that the rounding constant is 1,
- * and the addition would overflow. Return the actual
- * result directly. */
- val = 0x4000000000000000LL;
- } else {
- val++;
- val >>= 1;
- }
- } else {
- val <<= shift;
- }
- return val;
+uint64_t HELPER(neon_rshl_s64)(uint64_t val, uint64_t shift)
+{
+ return do_sqrshl_d(val, (int8_t)shift, true, NULL);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8 || \
- tmp < -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> (-tmp - 1); \
- } else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, NULL))
NEON_VOP(rshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, NULL))
NEON_VOP(rshl_u16, neon_u16, 2)
#undef NEON_FN
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator. */
-uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
+uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift)
{
- uint32_t dest;
- int8_t shift = (int8_t)shiftop;
- if (shift >= 32 || shift < -32) {
- dest = 0;
- } else if (shift == -32) {
- dest = val >> 31;
- } else if (shift < 0) {
- uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
- dest = big_dest >> -shift;
- } else {
- dest = val << shift;
- }
- return dest;
+ return do_uqrshl_bhs(val, (int8_t)shift, 32, true, NULL);
}
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values. */
-uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
-{
- int8_t shift = (uint8_t)shiftop;
- if (shift >= 64 || shift < -64) {
- val = 0;
- } else if (shift == -64) {
- /* Rounding a 1-bit result just preserves that bit. */
- val >>= 63;
- } else if (shift < 0) {
- val >>= (-shift - 1);
- if (val == UINT64_MAX) {
- /* In this case, it means that the rounding constant is 1,
- * and the addition would overflow. Return the actual
- * result directly. */
- val = 0x8000000000000000ULL;
- } else {
- val++;
- val >>= 1;
- }
- } else {
- val <<= shift;
- }
- return val;
+uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift)
+{
+ return do_uqrshl_d(val, (int8_t)shift, true, NULL);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = ~0; \
- } else { \
- dest = 0; \
- } \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = ~0; \
- } \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qshl_u32, neon_u32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- if (shift >= 64) {
- if (val) {
- val = ~(uint64_t)0;
- SET_QC();
- }
- } else if (shift <= -64) {
- val = 0;
- } else if (shift < 0) {
- val >>= -shift;
- } else {
- uint64_t tmp = val;
- val <<= shift;
- if ((val >> shift) != tmp) {
- SET_QC();
- val = ~(uint64_t)0;
- }
- }
- return val;
+uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
+{
+ return do_uqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
- if (src1 > 0) { \
- dest--; \
- } \
- } else { \
- dest = src1; \
- } \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> 31; \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
- if (src1 > 0) { \
- dest--; \
- } \
- } \
- }} while (0)
+uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+ return do_uqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
+}
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
NEON_VOP_ENV(qshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qshl_s32, neon_s32, 1)
#undef NEON_FN
-uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
-{
- int8_t shift = (uint8_t)shiftop;
- int64_t val = valop;
- if (shift >= 64) {
- if (val) {
- SET_QC();
- val = (val >> 63) ^ ~SIGNBIT64;
- }
- } else if (shift <= -64) {
- val >>= 63;
- } else if (shift < 0) {
- val >>= -shift;
- } else {
- int64_t tmp = val;
- val <<= shift;
- if ((val >> shift) != tmp) {
- SET_QC();
- val = (tmp >> 63) ^ ~SIGNBIT64;
- }
- }
- return val;
+uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
+{
+ return do_sqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
}
-#define NEON_FN(dest, src1, src2) do { \
- if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \
- SET_QC(); \
- dest = 0; \
- } else { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = ~0; \
- } else { \
- dest = 0; \
- } \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = src1 >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = ~0; \
- } \
- } \
- }} while (0)
-NEON_VOP_ENV(qshlu_s8, neon_u8, 4)
-NEON_VOP_ENV(qshlu_s16, neon_u16, 2)
+uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+ return do_sqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
+}
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_suqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc))
+NEON_VOP_ENV(qshlu_s8, neon_s8, 4)
#undef NEON_FN
-uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_suqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc))
+NEON_VOP_ENV(qshlu_s16, neon_s16, 2)
+#undef NEON_FN
+
+uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
{
- if ((int32_t)valop < 0) {
- SET_QC();
- return 0;
- }
- return helper_neon_qshl_u32(env, valop, shiftop);
+ return do_suqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc);
}
-uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
+uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
{
- if ((int64_t)valop < 0) {
- SET_QC();
- return 0;
- }
- return helper_neon_qshl_u64(env, valop, shiftop);
+ return do_suqrshl_d(val, (int8_t)shift, false, env->vfp.qc);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = ~0; \
- } else { \
- dest = 0; \
- } \
- } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
- dest = src1 >> (sizeof(src1) * 8 - 1); \
- } else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = ~0; \
- } \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
#undef NEON_FN
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator. */
-uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shiftop)
+uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift)
{
- uint32_t dest;
- int8_t shift = (int8_t)shiftop;
- if (shift >= 32) {
- if (val) {
- SET_QC();
- dest = ~0;
- } else {
- dest = 0;
- }
- } else if (shift < -32) {
- dest = 0;
- } else if (shift == -32) {
- dest = val >> 31;
- } else if (shift < 0) {
- uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
- dest = big_dest >> -shift;
- } else {
- dest = val << shift;
- if ((dest >> shift) != val) {
- SET_QC();
- dest = ~0;
- }
- }
- return dest;
+ return do_uqrshl_bhs(val, (int8_t)shift, 32, true, env->vfp.qc);
}
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values. */
-uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop)
-{
- int8_t shift = (int8_t)shiftop;
- if (shift >= 64) {
- if (val) {
- SET_QC();
- val = ~0;
- }
- } else if (shift < -64) {
- val = 0;
- } else if (shift == -64) {
- val >>= 63;
- } else if (shift < 0) {
- val >>= (-shift - 1);
- if (val == UINT64_MAX) {
- /* In this case, it means that the rounding constant is 1,
- * and the addition would overflow. Return the actual
- * result directly. */
- val = 0x8000000000000000ULL;
- } else {
- val++;
- val >>= 1;
- }
- } else { \
- uint64_t tmp = val;
- val <<= shift;
- if ((val >> shift) != tmp) {
- SET_QC();
- val = ~0;
- }
- }
- return val;
+uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+ return do_uqrshl_d(val, (int8_t)shift, true, env->vfp.qc);
}
-#define NEON_FN(dest, src1, src2) do { \
- int8_t tmp; \
- tmp = (int8_t)src2; \
- if (tmp >= (ssize_t)sizeof(src1) * 8) { \
- if (src1) { \
- SET_QC(); \
- dest = (typeof(dest))(1 << (sizeof(src1) * 8 - 1)); \
- if (src1 > 0) { \
- dest--; \
- } \
- } else { \
- dest = 0; \
- } \
- } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \
- dest = 0; \
- } else if (tmp < 0) { \
- dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
- } else { \
- dest = src1 << tmp; \
- if ((dest >> tmp) != src1) { \
- SET_QC(); \
- dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \
- if (src1 > 0) { \
- dest--; \
- } \
- } \
- }} while (0)
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
+#undef NEON_FN
+
+#define NEON_FN(dest, src1, src2) \
+ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc))
NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
#undef NEON_FN
-/* The addition of the rounding constant may overflow, so we use an
- * intermediate 64 bit accumulator. */
-uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop)
+uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift)
{
- int32_t dest;
- int32_t val = (int32_t)valop;
- int8_t shift = (int8_t)shiftop;
- if (shift >= 32) {
- if (val) {
- SET_QC();
- dest = (val >> 31) ^ ~SIGNBIT;
- } else {
- dest = 0;
- }
- } else if (shift <= -32) {
- dest = 0;
- } else if (shift < 0) {
- int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
- dest = big_dest >> -shift;
- } else {
- dest = val << shift;
- if ((dest >> shift) != val) {
- SET_QC();
- dest = (val >> 31) ^ ~SIGNBIT;
- }
- }
- return dest;
+ return do_sqrshl_bhs(val, (int8_t)shift, 32, true, env->vfp.qc);
}
-/* Handling addition overflow with 64 bit input values is more
- * tricky than with 32 bit values. */
-uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop)
-{
- int8_t shift = (uint8_t)shiftop;
- int64_t val = valop;
-
- if (shift >= 64) {
- if (val) {
- SET_QC();
- val = (val >> 63) ^ ~SIGNBIT64;
- }
- } else if (shift <= -64) {
- val = 0;
- } else if (shift < 0) {
- val >>= (-shift - 1);
- if (val == INT64_MAX) {
- /* In this case, it means that the rounding constant is 1,
- * and the addition would overflow. Return the actual
- * result directly. */
- val = 0x4000000000000000ULL;
- } else {
- val++;
- val >>= 1;
- }
- } else {
- int64_t tmp = val;
- val <<= shift;
- if ((val >> shift) != tmp) {
- SET_QC();
- val = (tmp >> 63) ^ ~SIGNBIT64;
- }
- }
- return val;
+uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift)
+{
+ return do_sqrshl_d(val, (int8_t)shift, true, env->vfp.qc);
}
uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b)
@@ -1174,77 +760,12 @@ NEON_VOP(mul_u8, neon_u8, 4)
NEON_VOP(mul_u16, neon_u16, 2)
#undef NEON_FN
-/* Polynomial multiplication is like integer multiplication except the
- partial products are XORed, not added. */
-uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
-{
- uint32_t mask;
- uint32_t result;
- result = 0;
- while (op1) {
- mask = 0;
- if (op1 & 1)
- mask |= 0xff;
- if (op1 & (1 << 8))
- mask |= (0xff << 8);
- if (op1 & (1 << 16))
- mask |= (0xff << 16);
- if (op1 & (1 << 24))
- mask |= (0xff << 24);
- result ^= op2 & mask;
- op1 = (op1 >> 1) & 0x7f7f7f7f;
- op2 = (op2 << 1) & 0xfefefefe;
- }
- return result;
-}
-
-uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
-{
- uint64_t result = 0;
- uint64_t mask;
- uint64_t op2ex = op2;
- op2ex = (op2ex & 0xff) |
- ((op2ex & 0xff00) << 8) |
- ((op2ex & 0xff0000) << 16) |
- ((op2ex & 0xff000000) << 24);
- while (op1) {
- mask = 0;
- if (op1 & 1) {
- mask |= 0xffff;
- }
- if (op1 & (1 << 8)) {
- mask |= (0xffffU << 16);
- }
- if (op1 & (1 << 16)) {
- mask |= (0xffffULL << 32);
- }
- if (op1 & (1 << 24)) {
- mask |= (0xffffULL << 48);
- }
- result ^= op2ex & mask;
- op1 = (op1 >> 1) & 0x7f7f7f7f;
- op2ex <<= 1;
- }
- return result;
-}
-
#define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
NEON_VOP(tst_u8, neon_u8, 4)
NEON_VOP(tst_u16, neon_u16, 2)
NEON_VOP(tst_u32, neon_u32, 1)
#undef NEON_FN
-#define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0
-NEON_VOP(ceq_u8, neon_u8, 4)
-NEON_VOP(ceq_u16, neon_u16, 2)
-NEON_VOP(ceq_u32, neon_u32, 1)
-#undef NEON_FN
-
-#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
-NEON_VOP1(abs_s8, neon_s8, 4)
-NEON_VOP1(abs_s16, neon_s16, 2)
-#undef NEON_FN
-
/* Count Leading Sign/Zero Bits. */
static inline int do_clz8(uint8_t x)
{
@@ -1963,13 +1484,6 @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x)
}
/* NEON Float helpers. */
-uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
-{
- float_status *fpst = fpstp;
- float32 f0 = make_float32(a);
- float32 f1 = make_float32(b);
- return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
-}
/* Floating point comparisons produce an integer result.
* Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
@@ -2224,33 +1738,3 @@ void HELPER(neon_zip16)(void *vd, void *vm)
rm[0] = m0;
rd[0] = d0;
}
-
-/* Helper function for 64 bit polynomial multiply case:
- * perform PolynomialMult(op1, op2) and return either the top or
- * bottom half of the 128 bit result.
- */
-uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- for (bitnum = 0; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 << bitnum;
- }
- }
- return res;
-}
-uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- /* bit 0 of op1 can't influence the high 64 bits at all */
- for (bitnum = 1; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 >> (64 - bitnum);
- }
- }
- return res;
-}
diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
new file mode 100644
index 0000000000..c199b69fbf
--- /dev/null
+++ b/target/arm/tcg/op_helper.c
@@ -0,0 +1,1185 @@
+/*
+ * ARM helper routines
+ *
+ * Copyright (c) 2005-2007 CodeSourcery, LLC
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "cpregs.h"
+
+#define SIGNBIT (uint32_t)0x80000000
+#define SIGNBIT64 ((uint64_t)1 << 63)
+
+int exception_target_el(CPUARMState *env)
+{
+ int target_el = MAX(1, arm_current_el(env));
+
+ /*
+ * No such thing as secure EL1 if EL3 is aarch32,
+ * so update the target EL to EL3 in this case.
+ */
+ if (arm_is_secure(env) && !arm_el_is_aa64(env, 3) && target_el == 1) {
+ target_el = 3;
+ }
+
+ return target_el;
+}
+
+void raise_exception(CPUARMState *env, uint32_t excp,
+ uint32_t syndrome, uint32_t target_el)
+{
+ CPUState *cs = env_cpu(env);
+
+ if (target_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
+ /*
+ * Redirect NS EL1 exceptions to NS EL2. These are reported with
+ * their original syndrome register value, with the exception of
+ * SIMD/FP access traps, which are reported as uncategorized
+ * (see DDI0478C.a D1.10.4)
+ */
+ target_el = 2;
+ if (syn_get_ec(syndrome) == EC_ADVSIMDFPACCESSTRAP) {
+ syndrome = syn_uncategorized();
+ }
+ }
+
+ assert(!excp_is_internal(excp));
+ cs->exception_index = excp;
+ env->exception.syndrome = syndrome;
+ env->exception.target_el = target_el;
+ cpu_loop_exit(cs);
+}
+
+void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
+ uint32_t target_el, uintptr_t ra)
+{
+ CPUState *cs = env_cpu(env);
+
+ /*
+ * restore_state_to_opc() will set env->exception.syndrome, so
+ * we must restore CPU state here before setting the syndrome
+ * the caller passed us, and cannot use cpu_loop_exit_restore().
+ */
+ cpu_restore_state(cs, ra);
+ raise_exception(env, excp, syndrome, target_el);
+}
+
+uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc,
+ uint64_t ireg, uint64_t def)
+{
+ uint64_t tmp, val = 0;
+ uint32_t maxindex = ((desc & 3) + 1) * 8;
+ uint32_t base_reg = desc >> 2;
+ uint32_t shift, index, reg;
+
+ for (shift = 0; shift < 64; shift += 8) {
+ index = (ireg >> shift) & 0xff;
+ if (index < maxindex) {
+ reg = base_reg + (index >> 3);
+ tmp = *aa32_vfp_dreg(env, reg);
+ tmp = ((tmp >> ((index & 7) << 3)) & 0xff) << shift;
+ } else {
+ tmp = def & (0xffull << shift);
+ }
+ val |= tmp;
+ }
+ return val;
+}
+
+void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue)
+{
+ /*
+ * Perform the v8M stack limit check for SP updates from translated code,
+ * raising an exception if the limit is breached.
+ */
+ if (newvalue < v7m_sp_limit(env)) {
+ /*
+ * Stack limit exceptions are a rare case, so rather than syncing
+ * PC/condbits before the call, we use raise_exception_ra() so
+ * that cpu_restore_state() will sort them out.
+ */
+ raise_exception_ra(env, EXCP_STKOF, 0, 1, GETPC());
+ }
+}
+
+/* Sign/zero extend */
+uint32_t HELPER(sxtb16)(uint32_t x)
+{
+ uint32_t res;
+ res = (uint16_t)(int8_t)x;
+ res |= (uint32_t)(int8_t)(x >> 16) << 16;
+ return res;
+}
+
+static void handle_possible_div0_trap(CPUARMState *env, uintptr_t ra)
+{
+ /*
+ * Take a division-by-zero exception if necessary; otherwise return
+ * to get the usual non-trapping division behaviour (result of 0)
+ */
+ if (arm_feature(env, ARM_FEATURE_M)
+ && (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_DIV_0_TRP_MASK)) {
+ raise_exception_ra(env, EXCP_DIVBYZERO, 0, 1, ra);
+ }
+}
+
+uint32_t HELPER(uxtb16)(uint32_t x)
+{
+ uint32_t res;
+ res = (uint16_t)(uint8_t)x;
+ res |= (uint32_t)(uint8_t)(x >> 16) << 16;
+ return res;
+}
+
+int32_t HELPER(sdiv)(CPUARMState *env, int32_t num, int32_t den)
+{
+ if (den == 0) {
+ handle_possible_div0_trap(env, GETPC());
+ return 0;
+ }
+ if (num == INT_MIN && den == -1) {
+ return INT_MIN;
+ }
+ return num / den;
+}
+
+uint32_t HELPER(udiv)(CPUARMState *env, uint32_t num, uint32_t den)
+{
+ if (den == 0) {
+ handle_possible_div0_trap(env, GETPC());
+ return 0;
+ }
+ return num / den;
+}
+
+uint32_t HELPER(rbit)(uint32_t x)
+{
+ return revbit32(x);
+}
+
+uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a + b;
+ if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT))
+ env->QF = 1;
+ return res;
+}
+
+uint32_t HELPER(add_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a + b;
+ if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
+ env->QF = 1;
+ res = ~(((int32_t)a >> 31) ^ SIGNBIT);
+ }
+ return res;
+}
+
+uint32_t HELPER(sub_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a - b;
+ if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
+ env->QF = 1;
+ res = ~(((int32_t)a >> 31) ^ SIGNBIT);
+ }
+ return res;
+}
+
+uint32_t HELPER(add_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a + b;
+ if (res < a) {
+ env->QF = 1;
+ res = ~0;
+ }
+ return res;
+}
+
+uint32_t HELPER(sub_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ uint32_t res = a - b;
+ if (res > a) {
+ env->QF = 1;
+ res = 0;
+ }
+ return res;
+}
+
+/* Signed saturation. */
+static inline uint32_t do_ssat(CPUARMState *env, int32_t val, int shift)
+{
+ int32_t top;
+ uint32_t mask;
+
+ top = val >> shift;
+ mask = (1u << shift) - 1;
+ if (top > 0) {
+ env->QF = 1;
+ return mask;
+ } else if (top < -1) {
+ env->QF = 1;
+ return ~mask;
+ }
+ return val;
+}
+
+/* Unsigned saturation. */
+static inline uint32_t do_usat(CPUARMState *env, int32_t val, int shift)
+{
+ uint32_t max;
+
+ max = (1u << shift) - 1;
+ if (val < 0) {
+ env->QF = 1;
+ return 0;
+ } else if (val > max) {
+ env->QF = 1;
+ return max;
+ }
+ return val;
+}
+
+/* Signed saturate. */
+uint32_t HELPER(ssat)(CPUARMState *env, uint32_t x, uint32_t shift)
+{
+ return do_ssat(env, x, shift);
+}
+
+/* Dual halfword signed saturate. */
+uint32_t HELPER(ssat16)(CPUARMState *env, uint32_t x, uint32_t shift)
+{
+ uint32_t res;
+
+ res = (uint16_t)do_ssat(env, (int16_t)x, shift);
+ res |= do_ssat(env, ((int32_t)x) >> 16, shift) << 16;
+ return res;
+}
+
+/* Unsigned saturate. */
+uint32_t HELPER(usat)(CPUARMState *env, uint32_t x, uint32_t shift)
+{
+ return do_usat(env, x, shift);
+}
+
+/* Dual halfword unsigned saturate. */
+uint32_t HELPER(usat16)(CPUARMState *env, uint32_t x, uint32_t shift)
+{
+ uint32_t res;
+
+ res = (uint16_t)do_usat(env, (int16_t)x, shift);
+ res |= do_usat(env, ((int32_t)x) >> 16, shift) << 16;
+ return res;
+}
+
+void HELPER(setend)(CPUARMState *env)
+{
+ env->uncached_cpsr ^= CPSR_E;
+ arm_rebuild_hflags(env);
+}
+
+void HELPER(check_bxj_trap)(CPUARMState *env, uint32_t rm)
+{
+ /*
+ * Only called if in NS EL0 or EL1 for a BXJ for a v7A CPU;
+ * check if HSTR.TJDBX means we need to trap to EL2.
+ */
+ if (env->cp15.hstr_el2 & HSTR_TJDBX) {
+ /*
+ * We know the condition code check passed, so take the IMPDEF
+ * choice to always report CV=1 COND 0xe
+ */
+ uint32_t syn = syn_bxjtrap(1, 0xe, rm);
+ raise_exception_ra(env, EXCP_HYP_TRAP, syn, 2, GETPC());
+ }
+}
+
+#ifndef CONFIG_USER_ONLY
+/* Function checks whether WFx (WFI/WFE) instructions are set up to be trapped.
+ * The function returns the target EL (1-3) if the instruction is to be trapped;
+ * otherwise it returns 0 indicating it is not trapped.
+ */
+static inline int check_wfx_trap(CPUARMState *env, bool is_wfe)
+{
+ int cur_el = arm_current_el(env);
+ uint64_t mask;
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ /* M profile cores can never trap WFI/WFE. */
+ return 0;
+ }
+
+ /* If we are currently in EL0 then we need to check if SCTLR is set up for
+ * WFx instructions being trapped to EL1. These trap bits don't exist in v7.
+ */
+ if (cur_el < 1 && arm_feature(env, ARM_FEATURE_V8)) {
+ int target_el;
+
+ mask = is_wfe ? SCTLR_nTWE : SCTLR_nTWI;
+ if (arm_is_secure_below_el3(env) && !arm_el_is_aa64(env, 3)) {
+ /* Secure EL0 and Secure PL1 is at EL3 */
+ target_el = 3;
+ } else {
+ target_el = 1;
+ }
+
+ if (!(env->cp15.sctlr_el[target_el] & mask)) {
+ return target_el;
+ }
+ }
+
+ /* We are not trapping to EL1; trap to EL2 if HCR_EL2 requires it
+ * No need for ARM_FEATURE check as if HCR_EL2 doesn't exist the
+ * bits will be zero indicating no trap.
+ */
+ if (cur_el < 2) {
+ mask = is_wfe ? HCR_TWE : HCR_TWI;
+ if (arm_hcr_el2_eff(env) & mask) {
+ return 2;
+ }
+ }
+
+ /* We are not trapping to EL1 or EL2; trap to EL3 if SCR_EL3 requires it */
+ if (cur_el < 3) {
+ mask = (is_wfe) ? SCR_TWE : SCR_TWI;
+ if (env->cp15.scr_el3 & mask) {
+ return 3;
+ }
+ }
+
+ return 0;
+}
+#endif
+
+void HELPER(wfi)(CPUARMState *env, uint32_t insn_len)
+{
+#ifdef CONFIG_USER_ONLY
+ /*
+ * WFI in the user-mode emulator is technically permitted but not
+ * something any real-world code would do. AArch64 Linux kernels
+ * trap it via SCTRL_EL1.nTWI and make it an (expensive) NOP;
+ * AArch32 kernels don't trap it so it will delay a bit.
+ * For QEMU, make it NOP here, because trying to raise EXCP_HLT
+ * would trigger an abort.
+ */
+ return;
+#else
+ CPUState *cs = env_cpu(env);
+ int target_el = check_wfx_trap(env, false);
+
+ if (cpu_has_work(cs)) {
+ /* Don't bother to go into our "low power state" if
+ * we would just wake up immediately.
+ */
+ return;
+ }
+
+ if (target_el) {
+ if (env->aarch64) {
+ env->pc -= insn_len;
+ } else {
+ env->regs[15] -= insn_len;
+ }
+
+ raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, insn_len == 2),
+ target_el);
+ }
+
+ cs->exception_index = EXCP_HLT;
+ cs->halted = 1;
+ cpu_loop_exit(cs);
+#endif
+}
+
+void HELPER(wfe)(CPUARMState *env)
+{
+ /* This is a hint instruction that is semantically different
+ * from YIELD even though we currently implement it identically.
+ * Don't actually halt the CPU, just yield back to top
+ * level loop. This is not going into a "low power state"
+ * (ie halting until some event occurs), so we never take
+ * a configurable trap to a different exception level.
+ */
+ HELPER(yield)(env);
+}
+
+void HELPER(yield)(CPUARMState *env)
+{
+ CPUState *cs = env_cpu(env);
+
+ /* This is a non-trappable hint instruction that generally indicates
+ * that the guest is currently busy-looping. Yield control back to the
+ * top level loop so that a more deserving VCPU has a chance to run.
+ */
+ cs->exception_index = EXCP_YIELD;
+ cpu_loop_exit(cs);
+}
+
+/* Raise an internal-to-QEMU exception. This is limited to only
+ * those EXCP values which are special cases for QEMU to interrupt
+ * execution and not to be used for exceptions which are passed to
+ * the guest (those must all have syndrome information and thus should
+ * use exception_with_syndrome*).
+ */
+void HELPER(exception_internal)(CPUARMState *env, uint32_t excp)
+{
+ CPUState *cs = env_cpu(env);
+
+ assert(excp_is_internal(excp));
+ cs->exception_index = excp;
+ cpu_loop_exit(cs);
+}
+
+/* Raise an exception with the specified syndrome register value */
+void HELPER(exception_with_syndrome_el)(CPUARMState *env, uint32_t excp,
+ uint32_t syndrome, uint32_t target_el)
+{
+ raise_exception(env, excp, syndrome, target_el);
+}
+
+/*
+ * Raise an exception with the specified syndrome register value
+ * to the default target el.
+ */
+void HELPER(exception_with_syndrome)(CPUARMState *env, uint32_t excp,
+ uint32_t syndrome)
+{
+ raise_exception(env, excp, syndrome, exception_target_el(env));
+}
+
+uint32_t HELPER(cpsr_read)(CPUARMState *env)
+{
+ return cpsr_read(env) & ~CPSR_EXEC;
+}
+
+void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+ cpsr_write(env, val, mask, CPSRWriteByInstr);
+ /* TODO: Not all cpsr bits are relevant to hflags. */
+ arm_rebuild_hflags(env);
+}
+
+/* Write the CPSR for a 32-bit exception return */
+void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
+{
+ uint32_t mask;
+
+ bql_lock();
+ arm_call_pre_el_change_hook(env_archcpu(env));
+ bql_unlock();
+
+ mask = aarch32_cpsr_valid_mask(env->features, &env_archcpu(env)->isar);
+ cpsr_write(env, val, mask, CPSRWriteExceptionReturn);
+
+ /* Generated code has already stored the new PC value, but
+ * without masking out its low bits, because which bits need
+ * masking depends on whether we're returning to Thumb or ARM
+ * state. Do the masking now.
+ */
+ env->regs[15] &= (env->thumb ? ~1 : ~3);
+ arm_rebuild_hflags(env);
+
+ bql_lock();
+ arm_call_el_change_hook(env_archcpu(env));
+ bql_unlock();
+}
+
+/* Access to user mode registers from privileged modes. */
+uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno)
+{
+ uint32_t val;
+
+ if (regno == 13) {
+ val = env->banked_r13[BANK_USRSYS];
+ } else if (regno == 14) {
+ val = env->banked_r14[BANK_USRSYS];
+ } else if (regno >= 8
+ && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
+ val = env->usr_regs[regno - 8];
+ } else {
+ val = env->regs[regno];
+ }
+ return val;
+}
+
+void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
+{
+ if (regno == 13) {
+ env->banked_r13[BANK_USRSYS] = val;
+ } else if (regno == 14) {
+ env->banked_r14[BANK_USRSYS] = val;
+ } else if (regno >= 8
+ && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
+ env->usr_regs[regno - 8] = val;
+ } else {
+ env->regs[regno] = val;
+ }
+}
+
+void HELPER(set_r13_banked)(CPUARMState *env, uint32_t mode, uint32_t val)
+{
+ if ((env->uncached_cpsr & CPSR_M) == mode) {
+ env->regs[13] = val;
+ } else {
+ env->banked_r13[bank_number(mode)] = val;
+ }
+}
+
+uint32_t HELPER(get_r13_banked)(CPUARMState *env, uint32_t mode)
+{
+ if ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_SYS) {
+ /* SRS instruction is UNPREDICTABLE from System mode; we UNDEF.
+ * Other UNPREDICTABLE and UNDEF cases were caught at translate time.
+ */
+ raise_exception(env, EXCP_UDEF, syn_uncategorized(),
+ exception_target_el(env));
+ }
+
+ if ((env->uncached_cpsr & CPSR_M) == mode) {
+ return env->regs[13];
+ } else {
+ return env->banked_r13[bank_number(mode)];
+ }
+}
+
+static void msr_mrs_banked_exc_checks(CPUARMState *env, uint32_t tgtmode,
+ uint32_t regno)
+{
+ /* Raise an exception if the requested access is one of the UNPREDICTABLE
+ * cases; otherwise return. This broadly corresponds to the pseudocode
+ * BankedRegisterAccessValid() and SPSRAccessValid(),
+ * except that we have already handled some cases at translate time.
+ */
+ int curmode = env->uncached_cpsr & CPSR_M;
+
+ if (tgtmode == ARM_CPU_MODE_HYP) {
+ /*
+ * Handle Hyp target regs first because some are special cases
+ * which don't want the usual "not accessible from tgtmode" check.
+ */
+ switch (regno) {
+ case 16 ... 17: /* ELR_Hyp, SPSR_Hyp */
+ if (curmode != ARM_CPU_MODE_HYP && curmode != ARM_CPU_MODE_MON) {
+ goto undef;
+ }
+ break;
+ case 13:
+ if (curmode != ARM_CPU_MODE_MON) {
+ goto undef;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return;
+ }
+
+ if (curmode == tgtmode) {
+ goto undef;
+ }
+
+ if (tgtmode == ARM_CPU_MODE_USR) {
+ switch (regno) {
+ case 8 ... 12:
+ if (curmode != ARM_CPU_MODE_FIQ) {
+ goto undef;
+ }
+ break;
+ case 13:
+ if (curmode == ARM_CPU_MODE_SYS) {
+ goto undef;
+ }
+ break;
+ case 14:
+ if (curmode == ARM_CPU_MODE_HYP || curmode == ARM_CPU_MODE_SYS) {
+ goto undef;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ return;
+
+undef:
+ raise_exception(env, EXCP_UDEF, syn_uncategorized(),
+ exception_target_el(env));
+}
+
+void HELPER(msr_banked)(CPUARMState *env, uint32_t value, uint32_t tgtmode,
+ uint32_t regno)
+{
+ msr_mrs_banked_exc_checks(env, tgtmode, regno);
+
+ switch (regno) {
+ case 16: /* SPSRs */
+ if (tgtmode == (env->uncached_cpsr & CPSR_M)) {
+ /* Only happens for SPSR_Hyp access in Hyp mode */
+ env->spsr = value;
+ } else {
+ env->banked_spsr[bank_number(tgtmode)] = value;
+ }
+ break;
+ case 17: /* ELR_Hyp */
+ env->elr_el[2] = value;
+ break;
+ case 13:
+ env->banked_r13[bank_number(tgtmode)] = value;
+ break;
+ case 14:
+ env->banked_r14[r14_bank_number(tgtmode)] = value;
+ break;
+ case 8 ... 12:
+ switch (tgtmode) {
+ case ARM_CPU_MODE_USR:
+ env->usr_regs[regno - 8] = value;
+ break;
+ case ARM_CPU_MODE_FIQ:
+ env->fiq_regs[regno - 8] = value;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+uint32_t HELPER(mrs_banked)(CPUARMState *env, uint32_t tgtmode, uint32_t regno)
+{
+ msr_mrs_banked_exc_checks(env, tgtmode, regno);
+
+ switch (regno) {
+ case 16: /* SPSRs */
+ if (tgtmode == (env->uncached_cpsr & CPSR_M)) {
+ /* Only happens for SPSR_Hyp access in Hyp mode */
+ return env->spsr;
+ } else {
+ return env->banked_spsr[bank_number(tgtmode)];
+ }
+ case 17: /* ELR_Hyp */
+ return env->elr_el[2];
+ case 13:
+ return env->banked_r13[bank_number(tgtmode)];
+ case 14:
+ return env->banked_r14[r14_bank_number(tgtmode)];
+ case 8 ... 12:
+ switch (tgtmode) {
+ case ARM_CPU_MODE_USR:
+ return env->usr_regs[regno - 8];
+ case ARM_CPU_MODE_FIQ:
+ return env->fiq_regs[regno - 8];
+ default:
+ g_assert_not_reached();
+ }
+ default:
+ g_assert_not_reached();
+ }
+}
+
+const void *HELPER(access_check_cp_reg)(CPUARMState *env, uint32_t key,
+ uint32_t syndrome, uint32_t isread)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, key);
+ CPAccessResult res = CP_ACCESS_OK;
+ int target_el;
+
+ assert(ri != NULL);
+
+ if (arm_feature(env, ARM_FEATURE_XSCALE) && ri->cp < 14
+ && extract32(env->cp15.c15_cpar, ri->cp, 1) == 0) {
+ res = CP_ACCESS_TRAP;
+ goto fail;
+ }
+
+ if (ri->accessfn) {
+ res = ri->accessfn(env, ri, isread);
+ }
+
+ /*
+ * If the access function indicates a trap from EL0 to EL1 then
+ * that always takes priority over the HSTR_EL2 trap. (If it indicates
+ * a trap to EL3, then the HSTR_EL2 trap takes priority; if it indicates
+ * a trap to EL2, then the syndrome is the same either way so we don't
+ * care whether technically the architecture says that HSTR_EL2 trap or
+ * the other trap takes priority. So we take the "check HSTR_EL2" path
+ * for all of those cases.)
+ */
+ if (res != CP_ACCESS_OK && ((res & CP_ACCESS_EL_MASK) == 0) &&
+ arm_current_el(env) == 0) {
+ goto fail;
+ }
+
+ /*
+ * HSTR_EL2 traps from EL1 are checked earlier, in generated code;
+ * we only need to check here for traps from EL0.
+ */
+ if (!is_a64(env) && arm_current_el(env) == 0 && ri->cp == 15 &&
+ arm_is_el2_enabled(env) &&
+ (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
+ uint32_t mask = 1 << ri->crn;
+
+ if (ri->type & ARM_CP_64BIT) {
+ mask = 1 << ri->crm;
+ }
+
+ /* T4 and T14 are RES0 */
+ mask &= ~((1 << 4) | (1 << 14));
+
+ if (env->cp15.hstr_el2 & mask) {
+ res = CP_ACCESS_TRAP_EL2;
+ goto fail;
+ }
+ }
+
+ /*
+ * Fine-grained traps also are lower priority than undef-to-EL1,
+ * higher priority than trap-to-EL3, and we don't care about priority
+ * order with other EL2 traps because the syndrome value is the same.
+ */
+ if (arm_fgt_active(env, arm_current_el(env))) {
+ uint64_t trapword = 0;
+ unsigned int idx = FIELD_EX32(ri->fgt, FGT, IDX);
+ unsigned int bitpos = FIELD_EX32(ri->fgt, FGT, BITPOS);
+ bool rev = FIELD_EX32(ri->fgt, FGT, REV);
+ bool trapbit;
+
+ if (ri->fgt & FGT_EXEC) {
+ assert(idx < ARRAY_SIZE(env->cp15.fgt_exec));
+ trapword = env->cp15.fgt_exec[idx];
+ } else if (isread && (ri->fgt & FGT_R)) {
+ assert(idx < ARRAY_SIZE(env->cp15.fgt_read));
+ trapword = env->cp15.fgt_read[idx];
+ } else if (!isread && (ri->fgt & FGT_W)) {
+ assert(idx < ARRAY_SIZE(env->cp15.fgt_write));
+ trapword = env->cp15.fgt_write[idx];
+ }
+
+ trapbit = extract64(trapword, bitpos, 1);
+ if (trapbit != rev) {
+ res = CP_ACCESS_TRAP_EL2;
+ goto fail;
+ }
+ }
+
+ if (likely(res == CP_ACCESS_OK)) {
+ return ri;
+ }
+
+ fail:
+ switch (res & ~CP_ACCESS_EL_MASK) {
+ case CP_ACCESS_TRAP:
+ break;
+ case CP_ACCESS_TRAP_UNCATEGORIZED:
+ /* Only CP_ACCESS_TRAP traps are direct to a specified EL */
+ assert((res & CP_ACCESS_EL_MASK) == 0);
+ if (cpu_isar_feature(aa64_ids, cpu) && isread &&
+ arm_cpreg_in_idspace(ri)) {
+ /*
+ * FEAT_IDST says this should be reported as EC_SYSTEMREGISTERTRAP,
+ * not EC_UNCATEGORIZED
+ */
+ break;
+ }
+ syndrome = syn_uncategorized();
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ target_el = res & CP_ACCESS_EL_MASK;
+ switch (target_el) {
+ case 0:
+ target_el = exception_target_el(env);
+ break;
+ case 2:
+ assert(arm_current_el(env) != 3);
+ assert(arm_is_el2_enabled(env));
+ break;
+ case 3:
+ assert(arm_feature(env, ARM_FEATURE_EL3));
+ break;
+ default:
+ /* No "direct" traps to EL1 */
+ g_assert_not_reached();
+ }
+
+ raise_exception(env, EXCP_UDEF, syndrome, target_el);
+}
+
+const void *HELPER(lookup_cp_reg)(CPUARMState *env, uint32_t key)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ const ARMCPRegInfo *ri = get_arm_cp_reginfo(cpu->cp_regs, key);
+
+ assert(ri != NULL);
+ return ri;
+}
+
+/*
+ * Test for HCR_EL2.TIDCP at EL1.
+ * Since implementation defined registers are rare, and within QEMU
+ * most of them are no-op, do not waste HFLAGS space for this and
+ * always use a helper.
+ */
+void HELPER(tidcp_el1)(CPUARMState *env, uint32_t syndrome)
+{
+ if (arm_hcr_el2_eff(env) & HCR_TIDCP) {
+ raise_exception_ra(env, EXCP_UDEF, syndrome, 2, GETPC());
+ }
+}
+
+/*
+ * Similarly, for FEAT_TIDCP1 at EL0.
+ * We have already checked for the presence of the feature.
+ */
+void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome)
+{
+ /* See arm_sctlr(), but we also need the sctlr el. */
+ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0);
+ int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1;
+
+ /*
+ * The bit is not valid unless the target el is aa64, but since the
+ * bit test is simpler perform that first and check validity after.
+ */
+ if ((env->cp15.sctlr_el[target_el] & SCTLR_TIDCP)
+ && arm_el_is_aa64(env, target_el)) {
+ raise_exception_ra(env, EXCP_UDEF, syndrome, target_el, GETPC());
+ }
+}
+
+void HELPER(set_cp_reg)(CPUARMState *env, const void *rip, uint32_t value)
+{
+ const ARMCPRegInfo *ri = rip;
+
+ if (ri->type & ARM_CP_IO) {
+ bql_lock();
+ ri->writefn(env, ri, value);
+ bql_unlock();
+ } else {
+ ri->writefn(env, ri, value);
+ }
+}
+
+uint32_t HELPER(get_cp_reg)(CPUARMState *env, const void *rip)
+{
+ const ARMCPRegInfo *ri = rip;
+ uint32_t res;
+
+ if (ri->type & ARM_CP_IO) {
+ bql_lock();
+ res = ri->readfn(env, ri);
+ bql_unlock();
+ } else {
+ res = ri->readfn(env, ri);
+ }
+
+ return res;
+}
+
+void HELPER(set_cp_reg64)(CPUARMState *env, const void *rip, uint64_t value)
+{
+ const ARMCPRegInfo *ri = rip;
+
+ if (ri->type & ARM_CP_IO) {
+ bql_lock();
+ ri->writefn(env, ri, value);
+ bql_unlock();
+ } else {
+ ri->writefn(env, ri, value);
+ }
+}
+
+uint64_t HELPER(get_cp_reg64)(CPUARMState *env, const void *rip)
+{
+ const ARMCPRegInfo *ri = rip;
+ uint64_t res;
+
+ if (ri->type & ARM_CP_IO) {
+ bql_lock();
+ res = ri->readfn(env, ri);
+ bql_unlock();
+ } else {
+ res = ri->readfn(env, ri);
+ }
+
+ return res;
+}
+
+void HELPER(pre_hvc)(CPUARMState *env)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ int cur_el = arm_current_el(env);
+ /* FIXME: Use actual secure state. */
+ bool secure = false;
+ bool undef;
+
+ if (arm_is_psci_call(cpu, EXCP_HVC)) {
+ /* If PSCI is enabled and this looks like a valid PSCI call then
+ * that overrides the architecturally mandated HVC behaviour.
+ */
+ return;
+ }
+
+ if (!arm_feature(env, ARM_FEATURE_EL2)) {
+ /* If EL2 doesn't exist, HVC always UNDEFs */
+ undef = true;
+ } else if (arm_feature(env, ARM_FEATURE_EL3)) {
+ /* EL3.HCE has priority over EL2.HCD. */
+ undef = !(env->cp15.scr_el3 & SCR_HCE);
+ } else {
+ undef = env->cp15.hcr_el2 & HCR_HCD;
+ }
+
+ /* In ARMv7 and ARMv8/AArch32, HVC is undef in secure state.
+ * For ARMv8/AArch64, HVC is allowed in EL3.
+ * Note that we've already trapped HVC from EL0 at translation
+ * time.
+ */
+ if (secure && (!is_a64(env) || cur_el == 1)) {
+ undef = true;
+ }
+
+ if (undef) {
+ raise_exception(env, EXCP_UDEF, syn_uncategorized(),
+ exception_target_el(env));
+ }
+}
+
+void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ int cur_el = arm_current_el(env);
+ bool secure = arm_is_secure(env);
+ bool smd_flag = env->cp15.scr_el3 & SCR_SMD;
+
+ /*
+ * SMC behaviour is summarized in the following table.
+ * This helper handles the "Trap to EL2" and "Undef insn" cases.
+ * The "Trap to EL3" and "PSCI call" cases are handled in the exception
+ * helper.
+ *
+ * -> ARM_FEATURE_EL3 and !SMD
+ * HCR_TSC && NS EL1 !HCR_TSC || !NS EL1
+ *
+ * Conduit SMC, valid call Trap to EL2 PSCI Call
+ * Conduit SMC, inval call Trap to EL2 Trap to EL3
+ * Conduit not SMC Trap to EL2 Trap to EL3
+ *
+ *
+ * -> ARM_FEATURE_EL3 and SMD
+ * HCR_TSC && NS EL1 !HCR_TSC || !NS EL1
+ *
+ * Conduit SMC, valid call Trap to EL2 PSCI Call
+ * Conduit SMC, inval call Trap to EL2 Undef insn
+ * Conduit not SMC Trap to EL2 Undef insn
+ *
+ *
+ * -> !ARM_FEATURE_EL3
+ * HCR_TSC && NS EL1 !HCR_TSC || !NS EL1
+ *
+ * Conduit SMC, valid call Trap to EL2 PSCI Call
+ * Conduit SMC, inval call Trap to EL2 Undef insn
+ * Conduit not SMC Undef or trap[1] Undef insn
+ *
+ * [1] In this case:
+ * - if HCR_EL2.NV == 1 we must trap to EL2
+ * - if HCR_EL2.NV == 0 then newer architecture revisions permit
+ * AArch64 (but not AArch32) to trap to EL2 as an IMPDEF choice
+ * - otherwise we must UNDEF
+ * We take the IMPDEF choice to always UNDEF if HCR_EL2.NV == 0.
+ */
+
+ /* On ARMv8 with EL3 AArch64, SMD applies to both S and NS state.
+ * On ARMv8 with EL3 AArch32, or ARMv7 with the Virtualization
+ * extensions, SMD only applies to NS state.
+ * On ARMv7 without the Virtualization extensions, the SMD bit
+ * doesn't exist, but we forbid the guest to set it to 1 in scr_write(),
+ * so we need not special case this here.
+ */
+ bool smd = arm_feature(env, ARM_FEATURE_AARCH64) ? smd_flag
+ : smd_flag && !secure;
+
+ if (!arm_feature(env, ARM_FEATURE_EL3) &&
+ !(arm_hcr_el2_eff(env) & HCR_NV) &&
+ cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) {
+ /*
+ * If we have no EL3 then traditionally SMC always UNDEFs and can't be
+ * trapped to EL2. For nested virtualization, SMC can be trapped to
+ * the outer hypervisor. PSCI-via-SMC is a sort of ersatz EL3
+ * firmware within QEMU, and we want an EL2 guest to be able
+ * to forbid its EL1 from making PSCI calls into QEMU's
+ * "firmware" via HCR.TSC, so for these purposes treat
+ * PSCI-via-SMC as implying an EL3.
+ * This handles the very last line of the previous table.
+ */
+ raise_exception(env, EXCP_UDEF, syn_uncategorized(),
+ exception_target_el(env));
+ }
+
+ if (cur_el == 1 && (arm_hcr_el2_eff(env) & HCR_TSC)) {
+ /* In NS EL1, HCR controlled routing to EL2 has priority over SMD.
+ * We also want an EL2 guest to be able to forbid its EL1 from
+ * making PSCI calls into QEMU's "firmware" via HCR.TSC.
+ * This handles all the "Trap to EL2" cases of the previous table.
+ */
+ raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
+ }
+
+ /* Catch the two remaining "Undef insn" cases of the previous table:
+ * - PSCI conduit is SMC but we don't have a valid PCSI call,
+ * - We don't have EL3 or SMD is set.
+ */
+ if (!arm_is_psci_call(cpu, EXCP_SMC) &&
+ (smd || !arm_feature(env, ARM_FEATURE_EL3))) {
+ raise_exception(env, EXCP_UDEF, syn_uncategorized(),
+ exception_target_el(env));
+ }
+}
+
+/* ??? Flag setting arithmetic is awkward because we need to do comparisons.
+ The only way to do that in TCG is a conditional branch, which clobbers
+ all our temporaries. For now implement these as helper functions. */
+
+/* Similarly for variable shift instructions. */
+
+uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
+{
+ int shift = i & 0xff;
+ if (shift >= 32) {
+ if (shift == 32)
+ env->CF = x & 1;
+ else
+ env->CF = 0;
+ return 0;
+ } else if (shift != 0) {
+ env->CF = (x >> (32 - shift)) & 1;
+ return x << shift;
+ }
+ return x;
+}
+
+uint32_t HELPER(shr_cc)(CPUARMState *env, uint32_t x, uint32_t i)
+{
+ int shift = i & 0xff;
+ if (shift >= 32) {
+ if (shift == 32)
+ env->CF = (x >> 31) & 1;
+ else
+ env->CF = 0;
+ return 0;
+ } else if (shift != 0) {
+ env->CF = (x >> (shift - 1)) & 1;
+ return x >> shift;
+ }
+ return x;
+}
+
+uint32_t HELPER(sar_cc)(CPUARMState *env, uint32_t x, uint32_t i)
+{
+ int shift = i & 0xff;
+ if (shift >= 32) {
+ env->CF = (x >> 31) & 1;
+ return (int32_t)x >> 31;
+ } else if (shift != 0) {
+ env->CF = (x >> (shift - 1)) & 1;
+ return (int32_t)x >> shift;
+ }
+ return x;
+}
+
+uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
+{
+ int shift1, shift;
+ shift1 = i & 0xff;
+ shift = shift1 & 0x1f;
+ if (shift == 0) {
+ if (shift1 != 0)
+ env->CF = (x >> 31) & 1;
+ return x;
+ } else {
+ env->CF = (x >> (shift - 1)) & 1;
+ return ((uint32_t)x >> shift) | (x << (32 - shift));
+ }
+}
+
+void HELPER(probe_access)(CPUARMState *env, target_ulong ptr,
+ uint32_t access_type, uint32_t mmu_idx,
+ uint32_t size)
+{
+ uint32_t in_page = -((uint32_t)ptr | TARGET_PAGE_SIZE);
+ uintptr_t ra = GETPC();
+
+ if (likely(size <= in_page)) {
+ probe_access(env, ptr, size, access_type, mmu_idx, ra);
+ } else {
+ probe_access(env, ptr, in_page, access_type, mmu_idx, ra);
+ probe_access(env, ptr + in_page, size - in_page,
+ access_type, mmu_idx, ra);
+ }
+}
+
+/*
+ * This function corresponds to AArch64.vESBOperation().
+ * Note that the AArch32 version is not functionally different.
+ */
+void HELPER(vesb)(CPUARMState *env)
+{
+ /*
+ * The EL2Enabled() check is done inside arm_hcr_el2_eff,
+ * and will return HCR_EL2.VSE == 0, so nothing happens.
+ */
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ bool enabled = !(hcr & HCR_TGE) && (hcr & HCR_AMO);
+ bool pending = enabled && (hcr & HCR_VSE);
+ bool masked = (env->daif & PSTATE_A);
+
+ /* If VSE pending and masked, defer the exception. */
+ if (pending && masked) {
+ uint32_t syndrome;
+
+ if (arm_el_is_aa64(env, 1)) {
+ /* Copy across IDS and ISS from VSESR. */
+ syndrome = env->cp15.vsesr_el2 & 0x1ffffff;
+ } else {
+ ARMMMUFaultInfo fi = { .type = ARMFault_AsyncExternal };
+
+ if (extended_addresses_enabled(env)) {
+ syndrome = arm_fi_to_lfsc(&fi);
+ } else {
+ syndrome = arm_fi_to_sfsc(&fi);
+ }
+ /* Copy across AET and ExT from VSESR. */
+ syndrome |= env->cp15.vsesr_el2 & 0xd000;
+ }
+
+ /* Set VDISR_EL2.A along with the syndrome. */
+ env->cp15.vdisr_el2 = syndrome | (1u << 31);
+
+ /* Clear pending virtual SError */
+ env->cp15.hcr_el2 &= ~HCR_VSE;
+ cpu_reset_interrupt(env_cpu(env), CPU_INTERRUPT_VSERR);
+ }
+}
diff --git a/target/arm/tcg/pauth_helper.c b/target/arm/tcg/pauth_helper.c
new file mode 100644
index 0000000000..c4b143024f
--- /dev/null
+++ b/target/arm/tcg/pauth_helper.c
@@ -0,0 +1,632 @@
+/*
+ * ARM v8.3-PAuth Operations
+ *
+ * Copyright (c) 2019 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "qemu/xxhash.h"
+
+
+static uint64_t pac_cell_shuffle(uint64_t i)
+{
+ uint64_t o = 0;
+
+ o |= extract64(i, 52, 4);
+ o |= extract64(i, 24, 4) << 4;
+ o |= extract64(i, 44, 4) << 8;
+ o |= extract64(i, 0, 4) << 12;
+
+ o |= extract64(i, 28, 4) << 16;
+ o |= extract64(i, 48, 4) << 20;
+ o |= extract64(i, 4, 4) << 24;
+ o |= extract64(i, 40, 4) << 28;
+
+ o |= extract64(i, 32, 4) << 32;
+ o |= extract64(i, 12, 4) << 36;
+ o |= extract64(i, 56, 4) << 40;
+ o |= extract64(i, 20, 4) << 44;
+
+ o |= extract64(i, 8, 4) << 48;
+ o |= extract64(i, 36, 4) << 52;
+ o |= extract64(i, 16, 4) << 56;
+ o |= extract64(i, 60, 4) << 60;
+
+ return o;
+}
+
+static uint64_t pac_cell_inv_shuffle(uint64_t i)
+{
+ uint64_t o = 0;
+
+ o |= extract64(i, 12, 4);
+ o |= extract64(i, 24, 4) << 4;
+ o |= extract64(i, 48, 4) << 8;
+ o |= extract64(i, 36, 4) << 12;
+
+ o |= extract64(i, 56, 4) << 16;
+ o |= extract64(i, 44, 4) << 20;
+ o |= extract64(i, 4, 4) << 24;
+ o |= extract64(i, 16, 4) << 28;
+
+ o |= i & MAKE_64BIT_MASK(32, 4);
+ o |= extract64(i, 52, 4) << 36;
+ o |= extract64(i, 28, 4) << 40;
+ o |= extract64(i, 8, 4) << 44;
+
+ o |= extract64(i, 20, 4) << 48;
+ o |= extract64(i, 0, 4) << 52;
+ o |= extract64(i, 40, 4) << 56;
+ o |= i & MAKE_64BIT_MASK(60, 4);
+
+ return o;
+}
+
+static uint64_t pac_sub(uint64_t i)
+{
+ static const uint8_t sub[16] = {
+ 0xb, 0x6, 0x8, 0xf, 0xc, 0x0, 0x9, 0xe,
+ 0x3, 0x7, 0x4, 0x5, 0xd, 0x2, 0x1, 0xa,
+ };
+ uint64_t o = 0;
+ int b;
+
+ for (b = 0; b < 64; b += 4) {
+ o |= (uint64_t)sub[(i >> b) & 0xf] << b;
+ }
+ return o;
+}
+
+static uint64_t pac_sub1(uint64_t i)
+{
+ static const uint8_t sub1[16] = {
+ 0xa, 0xd, 0xe, 0x6, 0xf, 0x7, 0x3, 0x5,
+ 0x9, 0x8, 0x0, 0xc, 0xb, 0x1, 0x2, 0x4,
+ };
+ uint64_t o = 0;
+ int b;
+
+ for (b = 0; b < 64; b += 4) {
+ o |= (uint64_t)sub1[(i >> b) & 0xf] << b;
+ }
+ return o;
+}
+
+static uint64_t pac_inv_sub(uint64_t i)
+{
+ static const uint8_t inv_sub[16] = {
+ 0x5, 0xe, 0xd, 0x8, 0xa, 0xb, 0x1, 0x9,
+ 0x2, 0x6, 0xf, 0x0, 0x4, 0xc, 0x7, 0x3,
+ };
+ uint64_t o = 0;
+ int b;
+
+ for (b = 0; b < 64; b += 4) {
+ o |= (uint64_t)inv_sub[(i >> b) & 0xf] << b;
+ }
+ return o;
+}
+
+static int rot_cell(int cell, int n)
+{
+ /* 4-bit rotate left by n. */
+ cell |= cell << 4;
+ return extract32(cell, 4 - n, 4);
+}
+
+static uint64_t pac_mult(uint64_t i)
+{
+ uint64_t o = 0;
+ int b;
+
+ for (b = 0; b < 4 * 4; b += 4) {
+ int i0, i4, i8, ic, t0, t1, t2, t3;
+
+ i0 = extract64(i, b, 4);
+ i4 = extract64(i, b + 4 * 4, 4);
+ i8 = extract64(i, b + 8 * 4, 4);
+ ic = extract64(i, b + 12 * 4, 4);
+
+ t0 = rot_cell(i8, 1) ^ rot_cell(i4, 2) ^ rot_cell(i0, 1);
+ t1 = rot_cell(ic, 1) ^ rot_cell(i4, 1) ^ rot_cell(i0, 2);
+ t2 = rot_cell(ic, 2) ^ rot_cell(i8, 1) ^ rot_cell(i0, 1);
+ t3 = rot_cell(ic, 1) ^ rot_cell(i8, 2) ^ rot_cell(i4, 1);
+
+ o |= (uint64_t)t3 << b;
+ o |= (uint64_t)t2 << (b + 4 * 4);
+ o |= (uint64_t)t1 << (b + 8 * 4);
+ o |= (uint64_t)t0 << (b + 12 * 4);
+ }
+ return o;
+}
+
+static uint64_t tweak_cell_rot(uint64_t cell)
+{
+ return (cell >> 1) | (((cell ^ (cell >> 1)) & 1) << 3);
+}
+
+static uint64_t tweak_shuffle(uint64_t i)
+{
+ uint64_t o = 0;
+
+ o |= extract64(i, 16, 4) << 0;
+ o |= extract64(i, 20, 4) << 4;
+ o |= tweak_cell_rot(extract64(i, 24, 4)) << 8;
+ o |= extract64(i, 28, 4) << 12;
+
+ o |= tweak_cell_rot(extract64(i, 44, 4)) << 16;
+ o |= extract64(i, 8, 4) << 20;
+ o |= extract64(i, 12, 4) << 24;
+ o |= tweak_cell_rot(extract64(i, 32, 4)) << 28;
+
+ o |= extract64(i, 48, 4) << 32;
+ o |= extract64(i, 52, 4) << 36;
+ o |= extract64(i, 56, 4) << 40;
+ o |= tweak_cell_rot(extract64(i, 60, 4)) << 44;
+
+ o |= tweak_cell_rot(extract64(i, 0, 4)) << 48;
+ o |= extract64(i, 4, 4) << 52;
+ o |= tweak_cell_rot(extract64(i, 40, 4)) << 56;
+ o |= tweak_cell_rot(extract64(i, 36, 4)) << 60;
+
+ return o;
+}
+
+static uint64_t tweak_cell_inv_rot(uint64_t cell)
+{
+ return ((cell << 1) & 0xf) | ((cell & 1) ^ (cell >> 3));
+}
+
+static uint64_t tweak_inv_shuffle(uint64_t i)
+{
+ uint64_t o = 0;
+
+ o |= tweak_cell_inv_rot(extract64(i, 48, 4));
+ o |= extract64(i, 52, 4) << 4;
+ o |= extract64(i, 20, 4) << 8;
+ o |= extract64(i, 24, 4) << 12;
+
+ o |= extract64(i, 0, 4) << 16;
+ o |= extract64(i, 4, 4) << 20;
+ o |= tweak_cell_inv_rot(extract64(i, 8, 4)) << 24;
+ o |= extract64(i, 12, 4) << 28;
+
+ o |= tweak_cell_inv_rot(extract64(i, 28, 4)) << 32;
+ o |= tweak_cell_inv_rot(extract64(i, 60, 4)) << 36;
+ o |= tweak_cell_inv_rot(extract64(i, 56, 4)) << 40;
+ o |= tweak_cell_inv_rot(extract64(i, 16, 4)) << 44;
+
+ o |= extract64(i, 32, 4) << 48;
+ o |= extract64(i, 36, 4) << 52;
+ o |= extract64(i, 40, 4) << 56;
+ o |= tweak_cell_inv_rot(extract64(i, 44, 4)) << 60;
+
+ return o;
+}
+
+static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier,
+ ARMPACKey key, bool isqarma3)
+{
+ static const uint64_t RC[5] = {
+ 0x0000000000000000ull,
+ 0x13198A2E03707344ull,
+ 0xA4093822299F31D0ull,
+ 0x082EFA98EC4E6C89ull,
+ 0x452821E638D01377ull,
+ };
+ const uint64_t alpha = 0xC0AC29B7C97C50DDull;
+ int iterations = isqarma3 ? 2 : 4;
+ /*
+ * Note that in the ARM pseudocode, key0 contains bits <127:64>
+ * and key1 contains bits <63:0> of the 128-bit key.
+ */
+ uint64_t key0 = key.hi, key1 = key.lo;
+ uint64_t workingval, runningmod, roundkey, modk0;
+ int i;
+
+ modk0 = (key0 << 63) | ((key0 >> 1) ^ (key0 >> 63));
+ runningmod = modifier;
+ workingval = data ^ key0;
+
+ for (i = 0; i <= iterations; ++i) {
+ roundkey = key1 ^ runningmod;
+ workingval ^= roundkey;
+ workingval ^= RC[i];
+ if (i > 0) {
+ workingval = pac_cell_shuffle(workingval);
+ workingval = pac_mult(workingval);
+ }
+ if (isqarma3) {
+ workingval = pac_sub1(workingval);
+ } else {
+ workingval = pac_sub(workingval);
+ }
+ runningmod = tweak_shuffle(runningmod);
+ }
+ roundkey = modk0 ^ runningmod;
+ workingval ^= roundkey;
+ workingval = pac_cell_shuffle(workingval);
+ workingval = pac_mult(workingval);
+ if (isqarma3) {
+ workingval = pac_sub1(workingval);
+ } else {
+ workingval = pac_sub(workingval);
+ }
+ workingval = pac_cell_shuffle(workingval);
+ workingval = pac_mult(workingval);
+ workingval ^= key1;
+ workingval = pac_cell_inv_shuffle(workingval);
+ if (isqarma3) {
+ workingval = pac_sub1(workingval);
+ } else {
+ workingval = pac_inv_sub(workingval);
+ }
+ workingval = pac_mult(workingval);
+ workingval = pac_cell_inv_shuffle(workingval);
+ workingval ^= key0;
+ workingval ^= runningmod;
+ for (i = 0; i <= iterations; ++i) {
+ if (isqarma3) {
+ workingval = pac_sub1(workingval);
+ } else {
+ workingval = pac_inv_sub(workingval);
+ }
+ if (i < iterations) {
+ workingval = pac_mult(workingval);
+ workingval = pac_cell_inv_shuffle(workingval);
+ }
+ runningmod = tweak_inv_shuffle(runningmod);
+ roundkey = key1 ^ runningmod;
+ workingval ^= RC[iterations - i];
+ workingval ^= roundkey;
+ workingval ^= alpha;
+ }
+ workingval ^= modk0;
+
+ return workingval;
+}
+
+static uint64_t pauth_computepac_impdef(uint64_t data, uint64_t modifier,
+ ARMPACKey key)
+{
+ return qemu_xxhash64_4(data, modifier, key.lo, key.hi);
+}
+
+static uint64_t pauth_computepac(CPUARMState *env, uint64_t data,
+ uint64_t modifier, ARMPACKey key)
+{
+ if (cpu_isar_feature(aa64_pauth_qarma5, env_archcpu(env))) {
+ return pauth_computepac_architected(data, modifier, key, false);
+ } else if (cpu_isar_feature(aa64_pauth_qarma3, env_archcpu(env))) {
+ return pauth_computepac_architected(data, modifier, key, true);
+ } else {
+ return pauth_computepac_impdef(data, modifier, key);
+ }
+}
+
+static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
+ ARMPACKey *key, bool data)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
+ ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false);
+ ARMPauthFeature pauth_feature = cpu_isar_feature(pauth_feature, cpu);
+ uint64_t pac, ext_ptr, ext, test;
+ int bot_bit, top_bit;
+
+ /* If tagged pointers are in use, use ptr<55>, otherwise ptr<63>. */
+ if (param.tbi) {
+ ext = sextract64(ptr, 55, 1);
+ } else {
+ ext = sextract64(ptr, 63, 1);
+ }
+
+ /* Build a pointer with known good extension bits. */
+ top_bit = 64 - 8 * param.tbi;
+ bot_bit = 64 - param.tsz;
+ ext_ptr = deposit64(ptr, bot_bit, top_bit - bot_bit, ext);
+
+ pac = pauth_computepac(env, ext_ptr, modifier, *key);
+
+ /*
+ * Check if the ptr has good extension bits and corrupt the
+ * pointer authentication code if not.
+ */
+ test = sextract64(ptr, bot_bit, top_bit - bot_bit);
+ if (test != 0 && test != -1) {
+ if (pauth_feature >= PauthFeat_2) {
+ /* No action required */
+ } else if (pauth_feature == PauthFeat_EPAC) {
+ pac = 0;
+ } else {
+ /*
+ * Note that our top_bit is one greater than the pseudocode's
+ * version, hence "- 2" here.
+ */
+ pac ^= MAKE_64BIT_MASK(top_bit - 2, 1);
+ }
+ }
+
+ /*
+ * Preserve the determination between upper and lower at bit 55,
+ * and insert pointer authentication code.
+ */
+ if (pauth_feature >= PauthFeat_2) {
+ pac ^= ptr;
+ }
+ if (param.tbi) {
+ ptr &= ~MAKE_64BIT_MASK(bot_bit, 55 - bot_bit + 1);
+ pac &= MAKE_64BIT_MASK(bot_bit, 54 - bot_bit + 1);
+ } else {
+ ptr &= MAKE_64BIT_MASK(0, bot_bit);
+ pac &= ~(MAKE_64BIT_MASK(55, 1) | MAKE_64BIT_MASK(0, bot_bit));
+ }
+ ext &= MAKE_64BIT_MASK(55, 1);
+ return pac | ext | ptr;
+}
+
+static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param)
+{
+ uint64_t mask = pauth_ptr_mask(param);
+
+ /* Note that bit 55 is used whether or not the regime has 2 ranges. */
+ if (extract64(ptr, 55, 1)) {
+ return ptr | mask;
+ } else {
+ return ptr & ~mask;
+ }
+}
+
+static G_NORETURN
+void pauth_fail_exception(CPUARMState *env, bool data,
+ int keynumber, uintptr_t ra)
+{
+ raise_exception_ra(env, EXCP_UDEF, syn_pacfail(data, keynumber),
+ exception_target_el(env), ra);
+}
+
+static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier,
+ ARMPACKey *key, bool data, int keynumber,
+ uintptr_t ra, bool is_combined)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
+ ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false);
+ ARMPauthFeature pauth_feature = cpu_isar_feature(pauth_feature, cpu);
+ int bot_bit, top_bit;
+ uint64_t pac, orig_ptr, cmp_mask;
+
+ orig_ptr = pauth_original_ptr(ptr, param);
+ pac = pauth_computepac(env, orig_ptr, modifier, *key);
+ bot_bit = 64 - param.tsz;
+ top_bit = 64 - 8 * param.tbi;
+
+ cmp_mask = MAKE_64BIT_MASK(bot_bit, top_bit - bot_bit);
+ cmp_mask &= ~MAKE_64BIT_MASK(55, 1);
+
+ if (pauth_feature >= PauthFeat_2) {
+ ARMPauthFeature fault_feature =
+ is_combined ? PauthFeat_FPACCOMBINED : PauthFeat_FPAC;
+ uint64_t result = ptr ^ (pac & cmp_mask);
+
+ if (pauth_feature >= fault_feature
+ && ((result ^ sextract64(result, 55, 1)) & cmp_mask)) {
+ pauth_fail_exception(env, data, keynumber, ra);
+ }
+ return result;
+ }
+
+ if ((pac ^ ptr) & cmp_mask) {
+ int error_code = (keynumber << 1) | (keynumber ^ 1);
+ if (param.tbi) {
+ return deposit64(orig_ptr, 53, 2, error_code);
+ } else {
+ return deposit64(orig_ptr, 61, 2, error_code);
+ }
+ }
+ return orig_ptr;
+}
+
+static uint64_t pauth_strip(CPUARMState *env, uint64_t ptr, bool data)
+{
+ ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
+ ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false);
+
+ return pauth_original_ptr(ptr, param);
+}
+
+static G_NORETURN
+void pauth_trap(CPUARMState *env, int target_el, uintptr_t ra)
+{
+ raise_exception_ra(env, EXCP_UDEF, syn_pactrap(), target_el, ra);
+}
+
+static void pauth_check_trap(CPUARMState *env, int el, uintptr_t ra)
+{
+ if (el < 2 && arm_is_el2_enabled(env)) {
+ uint64_t hcr = arm_hcr_el2_eff(env);
+ bool trap = !(hcr & HCR_API);
+ if (el == 0) {
+ /* Trap only applies to EL1&0 regime. */
+ trap &= (hcr & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE);
+ }
+ /* FIXME: ARMv8.3-NV: HCR_NV trap takes precedence for ERETA[AB]. */
+ if (trap) {
+ pauth_trap(env, 2, ra);
+ }
+ }
+ if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
+ if (!(env->cp15.scr_el3 & SCR_API)) {
+ pauth_trap(env, 3, ra);
+ }
+ }
+}
+
+static bool pauth_key_enabled(CPUARMState *env, int el, uint32_t bit)
+{
+ return (arm_sctlr(env, el) & bit) != 0;
+}
+
+uint64_t HELPER(pacia)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ int el = arm_current_el(env);
+ if (!pauth_key_enabled(env, el, SCTLR_EnIA)) {
+ return x;
+ }
+ pauth_check_trap(env, el, GETPC());
+ return pauth_addpac(env, x, y, &env->keys.apia, false);
+}
+
+uint64_t HELPER(pacib)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ int el = arm_current_el(env);
+ if (!pauth_key_enabled(env, el, SCTLR_EnIB)) {
+ return x;
+ }
+ pauth_check_trap(env, el, GETPC());
+ return pauth_addpac(env, x, y, &env->keys.apib, false);
+}
+
+uint64_t HELPER(pacda)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ int el = arm_current_el(env);
+ if (!pauth_key_enabled(env, el, SCTLR_EnDA)) {
+ return x;
+ }
+ pauth_check_trap(env, el, GETPC());
+ return pauth_addpac(env, x, y, &env->keys.apda, true);
+}
+
+uint64_t HELPER(pacdb)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ int el = arm_current_el(env);
+ if (!pauth_key_enabled(env, el, SCTLR_EnDB)) {
+ return x;
+ }
+ pauth_check_trap(env, el, GETPC());
+ return pauth_addpac(env, x, y, &env->keys.apdb, true);
+}
+
+uint64_t HELPER(pacga)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ uint64_t pac;
+
+ pauth_check_trap(env, arm_current_el(env), GETPC());
+ pac = pauth_computepac(env, x, y, env->keys.apga);
+
+ return pac & 0xffffffff00000000ull;
+}
+
+static uint64_t pauth_autia(CPUARMState *env, uint64_t x, uint64_t y,
+ uintptr_t ra, bool is_combined)
+{
+ int el = arm_current_el(env);
+ if (!pauth_key_enabled(env, el, SCTLR_EnIA)) {
+ return x;
+ }
+ pauth_check_trap(env, el, ra);
+ return pauth_auth(env, x, y, &env->keys.apia, false, 0, ra, is_combined);
+}
+
+uint64_t HELPER(autia)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ return pauth_autia(env, x, y, GETPC(), false);
+}
+
+uint64_t HELPER(autia_combined)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ return pauth_autia(env, x, y, GETPC(), true);
+}
+
+static uint64_t pauth_autib(CPUARMState *env, uint64_t x, uint64_t y,
+ uintptr_t ra, bool is_combined)
+{
+ int el = arm_current_el(env);
+ if (!pauth_key_enabled(env, el, SCTLR_EnIB)) {
+ return x;
+ }
+ pauth_check_trap(env, el, ra);
+ return pauth_auth(env, x, y, &env->keys.apib, false, 1, ra, is_combined);
+}
+
+uint64_t HELPER(autib)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ return pauth_autib(env, x, y, GETPC(), false);
+}
+
+uint64_t HELPER(autib_combined)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ return pauth_autib(env, x, y, GETPC(), true);
+}
+
+static uint64_t pauth_autda(CPUARMState *env, uint64_t x, uint64_t y,
+ uintptr_t ra, bool is_combined)
+{
+ int el = arm_current_el(env);
+ if (!pauth_key_enabled(env, el, SCTLR_EnDA)) {
+ return x;
+ }
+ pauth_check_trap(env, el, ra);
+ return pauth_auth(env, x, y, &env->keys.apda, true, 0, ra, is_combined);
+}
+
+uint64_t HELPER(autda)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ return pauth_autda(env, x, y, GETPC(), false);
+}
+
+uint64_t HELPER(autda_combined)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ return pauth_autda(env, x, y, GETPC(), true);
+}
+
+static uint64_t pauth_autdb(CPUARMState *env, uint64_t x, uint64_t y,
+ uintptr_t ra, bool is_combined)
+{
+ int el = arm_current_el(env);
+ if (!pauth_key_enabled(env, el, SCTLR_EnDB)) {
+ return x;
+ }
+ pauth_check_trap(env, el, ra);
+ return pauth_auth(env, x, y, &env->keys.apdb, true, 1, ra, is_combined);
+}
+
+uint64_t HELPER(autdb)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ return pauth_autdb(env, x, y, GETPC(), false);
+}
+
+uint64_t HELPER(autdb_combined)(CPUARMState *env, uint64_t x, uint64_t y)
+{
+ return pauth_autdb(env, x, y, GETPC(), true);
+}
+
+uint64_t HELPER(xpaci)(CPUARMState *env, uint64_t a)
+{
+ return pauth_strip(env, a, false);
+}
+
+uint64_t HELPER(xpacd)(CPUARMState *env, uint64_t a)
+{
+ return pauth_strip(env, a, true);
+}
diff --git a/target/arm/psci.c b/target/arm/tcg/psci.c
index a74d78802a..51d2ca3d30 100644
--- a/target/arm/psci.c
+++ b/target/arm/tcg/psci.c
@@ -15,25 +15,26 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+
#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/helper-proto.h"
#include "kvm-consts.h"
-#include "sysemu/sysemu.h"
+#include "qemu/main-loop.h"
+#include "sysemu/runstate.h"
#include "internals.h"
#include "arm-powerctl.h"
+#include "target/arm/multiprocessing.h"
bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
{
- /* Return true if the r0/x0 value indicates a PSCI call and
- * the exception type matches the configured PSCI conduit. This is
- * called before the SMC/HVC instruction is executed, to decide whether
- * we should treat it as a PSCI call or with the architecturally
+ /*
+ * Return true if the exception type matches the configured PSCI conduit.
+ * This is called before the SMC/HVC instruction is executed, to decide
+ * whether we should treat it as a PSCI call or with the architecturally
* defined behaviour for an SMC or HVC (which might be UNDEF or trap
* to EL2 or to EL3).
*/
- CPUARMState *env = &cpu->env;
- uint64_t param = is_a64(env) ? env->xregs[0] : env->regs[0];
switch (excp_type) {
case EXCP_HVC:
@@ -50,34 +51,14 @@ bool arm_is_psci_call(ARMCPU *cpu, int excp_type)
return false;
}
- switch (param) {
- case QEMU_PSCI_0_2_FN_PSCI_VERSION:
- case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
- case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
- case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
- case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
- case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
- case QEMU_PSCI_0_1_FN_CPU_ON:
- case QEMU_PSCI_0_2_FN_CPU_ON:
- case QEMU_PSCI_0_2_FN64_CPU_ON:
- case QEMU_PSCI_0_1_FN_CPU_OFF:
- case QEMU_PSCI_0_2_FN_CPU_OFF:
- case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
- case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
- case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
- case QEMU_PSCI_0_1_FN_MIGRATE:
- case QEMU_PSCI_0_2_FN_MIGRATE:
- return true;
- default:
- return false;
- }
+ return true;
}
void arm_handle_psci_call(ARMCPU *cpu)
{
/*
* This function partially implements the logic for dispatching Power State
- * Coordination Interface (PSCI) calls (as described in ARM DEN 0022B.b),
+ * Coordination Interface (PSCI) calls (as described in ARM DEN 0022D.b),
* to the extent required for bringing up and taking down secondary cores,
* and for handling reset and poweroff requests.
* Additional information about the calling convention used is available in
@@ -100,7 +81,7 @@ void arm_handle_psci_call(ARMCPU *cpu)
}
if ((param[0] & QEMU_PSCI_0_2_64BIT) && !is_a64(env)) {
- ret = QEMU_PSCI_RET_INVALID_PARAMS;
+ ret = QEMU_PSCI_RET_NOT_SUPPORTED;
goto err;
}
@@ -109,7 +90,7 @@ void arm_handle_psci_call(ARMCPU *cpu)
ARMCPU *target_cpu;
case QEMU_PSCI_0_2_FN_PSCI_VERSION:
- ret = QEMU_PSCI_0_2_RET_VERSION_0_2;
+ ret = QEMU_PSCI_VERSION_1_1;
break;
case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
ret = QEMU_PSCI_0_2_RET_TOS_MIGRATION_NOT_REQUIRED; /* No trusted OS */
@@ -127,7 +108,7 @@ void arm_handle_psci_call(ARMCPU *cpu)
}
target_cpu = ARM_CPU(target_cpu_state);
- g_assert(qemu_mutex_iothread_locked());
+ g_assert(bql_locked());
ret = target_cpu->power_state;
break;
default:
@@ -190,12 +171,40 @@ void arm_handle_psci_call(ARMCPU *cpu)
}
helper_wfi(env, 4);
break;
+ case QEMU_PSCI_1_0_FN_PSCI_FEATURES:
+ switch (param[1]) {
+ case QEMU_PSCI_0_2_FN_PSCI_VERSION:
+ case QEMU_PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+ case QEMU_PSCI_0_2_FN_AFFINITY_INFO:
+ case QEMU_PSCI_0_2_FN64_AFFINITY_INFO:
+ case QEMU_PSCI_0_2_FN_SYSTEM_RESET:
+ case QEMU_PSCI_0_2_FN_SYSTEM_OFF:
+ case QEMU_PSCI_0_1_FN_CPU_ON:
+ case QEMU_PSCI_0_2_FN_CPU_ON:
+ case QEMU_PSCI_0_2_FN64_CPU_ON:
+ case QEMU_PSCI_0_1_FN_CPU_OFF:
+ case QEMU_PSCI_0_2_FN_CPU_OFF:
+ case QEMU_PSCI_0_1_FN_CPU_SUSPEND:
+ case QEMU_PSCI_0_2_FN_CPU_SUSPEND:
+ case QEMU_PSCI_0_2_FN64_CPU_SUSPEND:
+ case QEMU_PSCI_1_0_FN_PSCI_FEATURES:
+ if (!(param[1] & QEMU_PSCI_0_2_64BIT) || is_a64(env)) {
+ ret = 0;
+ break;
+ }
+ /* fallthrough */
+ case QEMU_PSCI_0_1_FN_MIGRATE:
+ case QEMU_PSCI_0_2_FN_MIGRATE:
+ default:
+ ret = QEMU_PSCI_RET_NOT_SUPPORTED;
+ break;
+ }
+ break;
case QEMU_PSCI_0_1_FN_MIGRATE:
case QEMU_PSCI_0_2_FN_MIGRATE:
+ default:
ret = QEMU_PSCI_RET_NOT_SUPPORTED;
break;
- default:
- g_assert_not_reached();
}
err:
@@ -207,7 +216,7 @@ err:
return;
cpu_off:
- ret = arm_set_cpu_off(cpu->mp_affinity);
+ ret = arm_set_cpu_off(arm_cpu_mp_affinity(cpu));
/* notreached */
/* sanity check in case something failed */
assert(ret == QEMU_ARM_POWERCTL_RET_SUCCESS);
diff --git a/target/arm/tcg/sme-fa64.decode b/target/arm/tcg/sme-fa64.decode
new file mode 100644
index 0000000000..47708ccc8d
--- /dev/null
+++ b/target/arm/tcg/sme-fa64.decode
@@ -0,0 +1,60 @@
+# AArch64 SME allowed instruction decoding
+#
+# Copyright (c) 2022 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
+# Arm Architecture Reference Manual Supplement,
+# The Scalable Matrix Extension (SME), for Armv9-A
+
+{
+ [
+ OK 0-00 1110 0000 0001 0010 11-- ---- ---- # SMOV W|Xd,Vn.B[0]
+ OK 0-00 1110 0000 0010 0010 11-- ---- ---- # SMOV W|Xd,Vn.H[0]
+ OK 0100 1110 0000 0100 0010 11-- ---- ---- # SMOV Xd,Vn.S[0]
+ OK 0000 1110 0000 0001 0011 11-- ---- ---- # UMOV Wd,Vn.B[0]
+ OK 0000 1110 0000 0010 0011 11-- ---- ---- # UMOV Wd,Vn.H[0]
+ OK 0000 1110 0000 0100 0011 11-- ---- ---- # UMOV Wd,Vn.S[0]
+ OK 0100 1110 0000 1000 0011 11-- ---- ---- # UMOV Xd,Vn.D[0]
+ ]
+ FAIL 0--0 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD vector operations
+}
+
+{
+ [
+ OK 0101 1110 --1- ---- 11-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar)
+ OK 0101 1110 -10- ---- 00-1 11-- ---- ---- # FMULX/FRECPS/FRSQRTS (scalar, FP16)
+ OK 01-1 1110 1-10 0001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar)
+ OK 01-1 1110 1111 1001 11-1 10-- ---- ---- # FRECPE/FRSQRTE/FRECPX (scalar, FP16)
+ ]
+ FAIL 01-1 111- ---- ---- ---- ---- ---- ---- # Advanced SIMD single-element operations
+}
+
+FAIL 0-00 110- ---- ---- ---- ---- ---- ---- # Advanced SIMD structure load/store
+FAIL 1100 1110 ---- ---- ---- ---- ---- ---- # Advanced SIMD cryptography extensions
+FAIL 0001 1110 0111 1110 0000 00-- ---- ---- # FJCVTZS
+
+# These are the "avoidance of doubt" final table of Illegal Advanced SIMD instructions
+# We don't actually need to include these, as the default is OK.
+# -001 111- ---- ---- ---- ---- ---- ---- # Scalar floating-point operations
+# --10 110- ---- ---- ---- ---- ---- ---- # Load/store pair of FP registers
+# --01 1100 ---- ---- ---- ---- ---- ---- # Load FP register (PC-relative literal)
+# --11 1100 --0- ---- ---- ---- ---- ---- # Load/store FP register (unscaled imm)
+# --11 1100 --1- ---- ---- ---- ---- --10 # Load/store FP register (register offset)
+# --11 1101 ---- ---- ---- ---- ---- ---- # Load/store FP register (scaled imm)
diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode
new file mode 100644
index 0000000000..628804e37a
--- /dev/null
+++ b/target/arm/tcg/sme.decode
@@ -0,0 +1,88 @@
+# AArch64 SME instruction descriptions
+#
+# Copyright (c) 2022 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+### SME Misc
+
+ZERO 11000000 00 001 00000000000 imm:8
+
+### SME Move into/from Array
+
+%mova_rs 13:2 !function=plus_12
+&mova esz rs pg zr za_imm v:bool to_vec:bool
+
+MOVA 11000000 esz:2 00000 0 v:1 .. pg:3 zr:5 0 za_imm:4 \
+ &mova to_vec=0 rs=%mova_rs
+MOVA 11000000 11 00000 1 v:1 .. pg:3 zr:5 0 za_imm:4 \
+ &mova to_vec=0 rs=%mova_rs esz=4
+
+MOVA 11000000 esz:2 00001 0 v:1 .. pg:3 0 za_imm:4 zr:5 \
+ &mova to_vec=1 rs=%mova_rs
+MOVA 11000000 11 00001 1 v:1 .. pg:3 0 za_imm:4 zr:5 \
+ &mova to_vec=1 rs=%mova_rs esz=4
+
+### SME Memory
+
+&ldst esz rs pg rn rm za_imm v:bool st:bool
+
+LDST1 1110000 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
+ &ldst rs=%mova_rs
+LDST1 1110000 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4 \
+ &ldst esz=4 rs=%mova_rs
+
+&ldstr rv rn imm
+@ldstr ....... ... . ...... .. ... rn:5 . imm:4 \
+ &ldstr rv=%mova_rs
+
+LDR 1110000 100 0 000000 .. 000 ..... 0 .... @ldstr
+STR 1110000 100 1 000000 .. 000 ..... 0 .... @ldstr
+
+### SME Add Vector to Array
+
+&adda zad zn pm pn
+@adda_32 ........ .. ..... . pm:3 pn:3 zn:5 ... zad:2 &adda
+@adda_64 ........ .. ..... . pm:3 pn:3 zn:5 .. zad:3 &adda
+
+ADDHA_s 11000000 10 01000 0 ... ... ..... 000 .. @adda_32
+ADDVA_s 11000000 10 01000 1 ... ... ..... 000 .. @adda_32
+ADDHA_d 11000000 11 01000 0 ... ... ..... 00 ... @adda_64
+ADDVA_d 11000000 11 01000 1 ... ... ..... 00 ... @adda_64
+
+### SME Outer Product
+
+&op zad zn zm pm pn sub:bool
+@op_32 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 &op
+@op_64 ........ ... zm:5 pm:3 pn:3 zn:5 sub:1 . zad:3 &op
+
+FMOPA_s 10000000 100 ..... ... ... ..... . 00 .. @op_32
+FMOPA_d 10000000 110 ..... ... ... ..... . 0 ... @op_64
+
+BFMOPA 10000001 100 ..... ... ... ..... . 00 .. @op_32
+FMOPA_h 10000001 101 ..... ... ... ..... . 00 .. @op_32
+
+SMOPA_s 1010000 0 10 0 ..... ... ... ..... . 00 .. @op_32
+SUMOPA_s 1010000 0 10 1 ..... ... ... ..... . 00 .. @op_32
+USMOPA_s 1010000 1 10 0 ..... ... ... ..... . 00 .. @op_32
+UMOPA_s 1010000 1 10 1 ..... ... ... ..... . 00 .. @op_32
+
+SMOPA_d 1010000 0 11 0 ..... ... ... ..... . 0 ... @op_64
+SUMOPA_d 1010000 0 11 1 ..... ... ... ..... . 0 ... @op_64
+USMOPA_d 1010000 1 11 0 ..... ... ... ..... . 0 ... @op_64
+UMOPA_d 1010000 1 11 1 ..... ... ... ..... . 0 ... @op_64
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
new file mode 100644
index 0000000000..e2e0575039
--- /dev/null
+++ b/target/arm/tcg/sme_helper.c
@@ -0,0 +1,1179 @@
+/*
+ * ARM SME Operations
+ *
+ * Copyright (c) 2022 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "qemu/int128.h"
+#include "fpu/softfloat.h"
+#include "vec_internal.h"
+#include "sve_ldst_internal.h"
+
+void helper_set_svcr(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+ aarch64_set_svcr(env, val, mask);
+}
+
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
+{
+ uint32_t i;
+
+ /*
+ * Special case clearing the entire ZA space.
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
+ * parts of the ZA storage outside of SVL.
+ */
+ if (imm == 0xff) {
+ memset(env->zarray, 0, sizeof(env->zarray));
+ return;
+ }
+
+ /*
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
+ * so each row is discontiguous within ZA[].
+ */
+ for (i = 0; i < svl; i++) {
+ if (imm & (1 << (i % 8))) {
+ memset(&env->zarray[i], 0, svl);
+ }
+ }
+}
+
+
+/*
+ * When considering the ZA storage as an array of elements of
+ * type T, the index within that array of the Nth element of
+ * a vertical slice of a tile can be calculated like this,
+ * regardless of the size of type T. This is because the tiles
+ * are interleaved, so if type T is size N bytes then row 1 of
+ * the tile is N rows away from row 0. The division by N to
+ * convert a byte offset into an array index and the multiplication
+ * by N to convert from vslice-index-within-the-tile to
+ * the index within the ZA storage cancel out.
+ */
+#define tile_vslice_index(i) ((i) * sizeof(ARMVectorReg))
+
+/*
+ * When doing byte arithmetic on the ZA storage, the element
+ * byteoff bytes away in a tile vertical slice is always this
+ * many bytes away in the ZA storage, regardless of the
+ * size of the tile element, assuming that byteoff is a multiple
+ * of the element size. Again this is because of the interleaving
+ * of the tiles. For instance if we have 1 byte per element then
+ * each row of the ZA storage has one byte of the vslice data,
+ * and (counting from 0) byte 8 goes in row 8 of the storage
+ * at offset (8 * row-size-in-bytes).
+ * If we have 8 bytes per element then each row of the ZA storage
+ * has 8 bytes of the data, but there are 8 interleaved tiles and
+ * so byte 8 of the data goes into row 1 of the tile,
+ * which is again row 8 of the storage, so the offset is still
+ * (8 * row-size-in-bytes). Similarly for other element sizes.
+ */
+#define tile_vslice_offset(byteoff) ((byteoff) * sizeof(ARMVectorReg))
+
+
+/*
+ * Move Zreg vector to ZArray column.
+ */
+#define DO_MOVA_C(NAME, TYPE, H) \
+void HELPER(NAME)(void *za, void *vn, void *vg, uint32_t desc) \
+{ \
+ int i, oprsz = simd_oprsz(desc); \
+ for (i = 0; i < oprsz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ *(TYPE *)(za + tile_vslice_offset(i)) = *(TYPE *)(vn + H(i)); \
+ } \
+ i += sizeof(TYPE); \
+ pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+DO_MOVA_C(sme_mova_cz_b, uint8_t, H1)
+DO_MOVA_C(sme_mova_cz_h, uint16_t, H1_2)
+DO_MOVA_C(sme_mova_cz_s, uint32_t, H1_4)
+
+void HELPER(sme_mova_cz_d)(void *za, void *vn, void *vg, uint32_t desc)
+{
+ int i, oprsz = simd_oprsz(desc) / 8;
+ uint8_t *pg = vg;
+ uint64_t *n = vn;
+ uint64_t *a = za;
+
+ for (i = 0; i < oprsz; i++) {
+ if (pg[H1(i)] & 1) {
+ a[tile_vslice_index(i)] = n[i];
+ }
+ }
+}
+
+void HELPER(sme_mova_cz_q)(void *za, void *vn, void *vg, uint32_t desc)
+{
+ int i, oprsz = simd_oprsz(desc) / 16;
+ uint16_t *pg = vg;
+ Int128 *n = vn;
+ Int128 *a = za;
+
+ /*
+ * Int128 is used here simply to copy 16 bytes, and to simplify
+ * the address arithmetic.
+ */
+ for (i = 0; i < oprsz; i++) {
+ if (pg[H2(i)] & 1) {
+ a[tile_vslice_index(i)] = n[i];
+ }
+ }
+}
+
+#undef DO_MOVA_C
+
+/*
+ * Move ZArray column to Zreg vector.
+ */
+#define DO_MOVA_Z(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *za, void *vg, uint32_t desc) \
+{ \
+ int i, oprsz = simd_oprsz(desc); \
+ for (i = 0; i < oprsz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(za + tile_vslice_offset(i)); \
+ } \
+ i += sizeof(TYPE); \
+ pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+DO_MOVA_Z(sme_mova_zc_b, uint8_t, H1)
+DO_MOVA_Z(sme_mova_zc_h, uint16_t, H1_2)
+DO_MOVA_Z(sme_mova_zc_s, uint32_t, H1_4)
+
+void HELPER(sme_mova_zc_d)(void *vd, void *za, void *vg, uint32_t desc)
+{
+ int i, oprsz = simd_oprsz(desc) / 8;
+ uint8_t *pg = vg;
+ uint64_t *d = vd;
+ uint64_t *a = za;
+
+ for (i = 0; i < oprsz; i++) {
+ if (pg[H1(i)] & 1) {
+ d[i] = a[tile_vslice_index(i)];
+ }
+ }
+}
+
+void HELPER(sme_mova_zc_q)(void *vd, void *za, void *vg, uint32_t desc)
+{
+ int i, oprsz = simd_oprsz(desc) / 16;
+ uint16_t *pg = vg;
+ Int128 *d = vd;
+ Int128 *a = za;
+
+ /*
+ * Int128 is used here simply to copy 16 bytes, and to simplify
+ * the address arithmetic.
+ */
+ for (i = 0; i < oprsz; i++, za += sizeof(ARMVectorReg)) {
+ if (pg[H2(i)] & 1) {
+ d[i] = a[tile_vslice_index(i)];
+ }
+ }
+}
+
+#undef DO_MOVA_Z
+
+/*
+ * Clear elements in a tile slice comprising len bytes.
+ */
+
+typedef void ClearFn(void *ptr, size_t off, size_t len);
+
+static void clear_horizontal(void *ptr, size_t off, size_t len)
+{
+ memset(ptr + off, 0, len);
+}
+
+static void clear_vertical_b(void *vptr, size_t off, size_t len)
+{
+ for (size_t i = 0; i < len; ++i) {
+ *(uint8_t *)(vptr + tile_vslice_offset(i + off)) = 0;
+ }
+}
+
+static void clear_vertical_h(void *vptr, size_t off, size_t len)
+{
+ for (size_t i = 0; i < len; i += 2) {
+ *(uint16_t *)(vptr + tile_vslice_offset(i + off)) = 0;
+ }
+}
+
+static void clear_vertical_s(void *vptr, size_t off, size_t len)
+{
+ for (size_t i = 0; i < len; i += 4) {
+ *(uint32_t *)(vptr + tile_vslice_offset(i + off)) = 0;
+ }
+}
+
+static void clear_vertical_d(void *vptr, size_t off, size_t len)
+{
+ for (size_t i = 0; i < len; i += 8) {
+ *(uint64_t *)(vptr + tile_vslice_offset(i + off)) = 0;
+ }
+}
+
+static void clear_vertical_q(void *vptr, size_t off, size_t len)
+{
+ for (size_t i = 0; i < len; i += 16) {
+ memset(vptr + tile_vslice_offset(i + off), 0, 16);
+ }
+}
+
+/*
+ * Copy elements from an array into a tile slice comprising len bytes.
+ */
+
+typedef void CopyFn(void *dst, const void *src, size_t len);
+
+static void copy_horizontal(void *dst, const void *src, size_t len)
+{
+ memcpy(dst, src, len);
+}
+
+static void copy_vertical_b(void *vdst, const void *vsrc, size_t len)
+{
+ const uint8_t *src = vsrc;
+ uint8_t *dst = vdst;
+ size_t i;
+
+ for (i = 0; i < len; ++i) {
+ dst[tile_vslice_index(i)] = src[i];
+ }
+}
+
+static void copy_vertical_h(void *vdst, const void *vsrc, size_t len)
+{
+ const uint16_t *src = vsrc;
+ uint16_t *dst = vdst;
+ size_t i;
+
+ for (i = 0; i < len / 2; ++i) {
+ dst[tile_vslice_index(i)] = src[i];
+ }
+}
+
+static void copy_vertical_s(void *vdst, const void *vsrc, size_t len)
+{
+ const uint32_t *src = vsrc;
+ uint32_t *dst = vdst;
+ size_t i;
+
+ for (i = 0; i < len / 4; ++i) {
+ dst[tile_vslice_index(i)] = src[i];
+ }
+}
+
+static void copy_vertical_d(void *vdst, const void *vsrc, size_t len)
+{
+ const uint64_t *src = vsrc;
+ uint64_t *dst = vdst;
+ size_t i;
+
+ for (i = 0; i < len / 8; ++i) {
+ dst[tile_vslice_index(i)] = src[i];
+ }
+}
+
+static void copy_vertical_q(void *vdst, const void *vsrc, size_t len)
+{
+ for (size_t i = 0; i < len; i += 16) {
+ memcpy(vdst + tile_vslice_offset(i), vsrc + i, 16);
+ }
+}
+
+/*
+ * Host and TLB primitives for vertical tile slice addressing.
+ */
+
+#define DO_LD(NAME, TYPE, HOST, TLB) \
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
+{ \
+ TYPE val = HOST(host); \
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
+} \
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
+ intptr_t off, target_ulong addr, uintptr_t ra) \
+{ \
+ TYPE val = TLB(env, useronly_clean_ptr(addr), ra); \
+ *(TYPE *)(za + tile_vslice_offset(off)) = val; \
+}
+
+#define DO_ST(NAME, TYPE, HOST, TLB) \
+static inline void sme_##NAME##_v_host(void *za, intptr_t off, void *host) \
+{ \
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
+ HOST(host, val); \
+} \
+static inline void sme_##NAME##_v_tlb(CPUARMState *env, void *za, \
+ intptr_t off, target_ulong addr, uintptr_t ra) \
+{ \
+ TYPE val = *(TYPE *)(za + tile_vslice_offset(off)); \
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
+}
+
+/*
+ * The ARMVectorReg elements are stored in host-endian 64-bit units.
+ * For 128-bit quantities, the sequence defined by the Elem[] pseudocode
+ * corresponds to storing the two 64-bit pieces in little-endian order.
+ */
+#define DO_LDQ(HNAME, VNAME, BE, HOST, TLB) \
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
+{ \
+ uint64_t val0 = HOST(host), val1 = HOST(host + 8); \
+ uint64_t *ptr = za + off; \
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
+} \
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
+{ \
+ HNAME##_host(za, tile_vslice_offset(off), host); \
+} \
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
+ target_ulong addr, uintptr_t ra) \
+{ \
+ uint64_t val0 = TLB(env, useronly_clean_ptr(addr), ra); \
+ uint64_t val1 = TLB(env, useronly_clean_ptr(addr + 8), ra); \
+ uint64_t *ptr = za + off; \
+ ptr[0] = BE ? val1 : val0, ptr[1] = BE ? val0 : val1; \
+} \
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
+ target_ulong addr, uintptr_t ra) \
+{ \
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
+}
+
+#define DO_STQ(HNAME, VNAME, BE, HOST, TLB) \
+static inline void HNAME##_host(void *za, intptr_t off, void *host) \
+{ \
+ uint64_t *ptr = za + off; \
+ HOST(host, ptr[BE]); \
+ HOST(host + 8, ptr[!BE]); \
+} \
+static inline void VNAME##_v_host(void *za, intptr_t off, void *host) \
+{ \
+ HNAME##_host(za, tile_vslice_offset(off), host); \
+} \
+static inline void HNAME##_tlb(CPUARMState *env, void *za, intptr_t off, \
+ target_ulong addr, uintptr_t ra) \
+{ \
+ uint64_t *ptr = za + off; \
+ TLB(env, useronly_clean_ptr(addr), ptr[BE], ra); \
+ TLB(env, useronly_clean_ptr(addr + 8), ptr[!BE], ra); \
+} \
+static inline void VNAME##_v_tlb(CPUARMState *env, void *za, intptr_t off, \
+ target_ulong addr, uintptr_t ra) \
+{ \
+ HNAME##_tlb(env, za, tile_vslice_offset(off), addr, ra); \
+}
+
+DO_LD(ld1b, uint8_t, ldub_p, cpu_ldub_data_ra)
+DO_LD(ld1h_be, uint16_t, lduw_be_p, cpu_lduw_be_data_ra)
+DO_LD(ld1h_le, uint16_t, lduw_le_p, cpu_lduw_le_data_ra)
+DO_LD(ld1s_be, uint32_t, ldl_be_p, cpu_ldl_be_data_ra)
+DO_LD(ld1s_le, uint32_t, ldl_le_p, cpu_ldl_le_data_ra)
+DO_LD(ld1d_be, uint64_t, ldq_be_p, cpu_ldq_be_data_ra)
+DO_LD(ld1d_le, uint64_t, ldq_le_p, cpu_ldq_le_data_ra)
+
+DO_LDQ(sve_ld1qq_be, sme_ld1q_be, 1, ldq_be_p, cpu_ldq_be_data_ra)
+DO_LDQ(sve_ld1qq_le, sme_ld1q_le, 0, ldq_le_p, cpu_ldq_le_data_ra)
+
+DO_ST(st1b, uint8_t, stb_p, cpu_stb_data_ra)
+DO_ST(st1h_be, uint16_t, stw_be_p, cpu_stw_be_data_ra)
+DO_ST(st1h_le, uint16_t, stw_le_p, cpu_stw_le_data_ra)
+DO_ST(st1s_be, uint32_t, stl_be_p, cpu_stl_be_data_ra)
+DO_ST(st1s_le, uint32_t, stl_le_p, cpu_stl_le_data_ra)
+DO_ST(st1d_be, uint64_t, stq_be_p, cpu_stq_be_data_ra)
+DO_ST(st1d_le, uint64_t, stq_le_p, cpu_stq_le_data_ra)
+
+DO_STQ(sve_st1qq_be, sme_st1q_be, 1, stq_be_p, cpu_stq_be_data_ra)
+DO_STQ(sve_st1qq_le, sme_st1q_le, 0, stq_le_p, cpu_stq_le_data_ra)
+
+#undef DO_LD
+#undef DO_ST
+#undef DO_LDQ
+#undef DO_STQ
+
+/*
+ * Common helper for all contiguous predicated loads.
+ */
+
+static inline QEMU_ALWAYS_INLINE
+void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
+ const int esz, uint32_t mtedesc, bool vertical,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn,
+ ClearFn *clr_fn,
+ CopyFn *cpy_fn)
+{
+ const intptr_t reg_max = simd_oprsz(desc);
+ const intptr_t esize = 1 << esz;
+ intptr_t reg_off, reg_last;
+ SVEContLdSt info;
+ void *host;
+ int flags;
+
+ /* Find the active elements. */
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
+ /* The entire predicate was false; no load occurs. */
+ clr_fn(za, 0, reg_max);
+ return;
+ }
+
+ /* Probe the page(s). Exit with exception for any invalid page. */
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, ra);
+
+ /* Handle watchpoints for all active elements. */
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
+ BP_MEM_READ, ra);
+
+ /*
+ * Handle mte checks for all active elements.
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
+ */
+ if (mtedesc) {
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
+ mtedesc, ra);
+ }
+
+ flags = info.page[0].flags | info.page[1].flags;
+ if (unlikely(flags != 0)) {
+#ifdef CONFIG_USER_ONLY
+ g_assert_not_reached();
+#else
+ /*
+ * At least one page includes MMIO.
+ * Any bus operation can fail with cpu_transaction_failed,
+ * which for ARM will raise SyncExternal. Perform the load
+ * into scratch memory to preserve register state until the end.
+ */
+ ARMVectorReg scratch = { };
+
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[1];
+ if (reg_last < 0) {
+ reg_last = info.reg_off_split;
+ if (reg_last < 0) {
+ reg_last = info.reg_off_last[0];
+ }
+ }
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ tlb_fn(env, &scratch, reg_off, addr + reg_off, ra);
+ }
+ reg_off += esize;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+
+ cpy_fn(za, &scratch, reg_max);
+ return;
+#endif
+ }
+
+ /* The entire operation is in RAM, on valid pages. */
+
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[0];
+ host = info.page[0].host;
+
+ if (!vertical) {
+ memset(za, 0, reg_max);
+ } else if (reg_off) {
+ clr_fn(za, 0, reg_off);
+ }
+
+ while (reg_off <= reg_last) {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ host_fn(za, reg_off, host + reg_off);
+ } else if (vertical) {
+ clr_fn(za, reg_off, esize);
+ }
+ reg_off += esize;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ }
+
+ /*
+ * Use the slow path to manage the cross-page misalignment.
+ * But we know this is RAM and cannot trap.
+ */
+ reg_off = info.reg_off_split;
+ if (unlikely(reg_off >= 0)) {
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
+ }
+
+ reg_off = info.reg_off_first[1];
+ if (unlikely(reg_off >= 0)) {
+ reg_last = info.reg_off_last[1];
+ host = info.page[1].host;
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ host_fn(za, reg_off, host + reg_off);
+ } else if (vertical) {
+ clr_fn(za, reg_off, esize);
+ }
+ reg_off += esize;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ }
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sme_ld1_mte(CPUARMState *env, void *za, uint64_t *vg,
+ target_ulong addr, uint32_t desc, uintptr_t ra,
+ const int esz, bool vertical,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn,
+ ClearFn *clr_fn,
+ CopyFn *cpy_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ int bit55 = extract64(addr, 55, 1);
+
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /* Perform gross MTE suppression early. */
+ if (!tbi_check(mtedesc, bit55) ||
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
+ mtedesc = 0;
+ }
+
+ sme_ld1(env, za, vg, addr, desc, ra, esz, mtedesc, vertical,
+ host_fn, tlb_fn, clr_fn, cpy_fn);
+}
+
+#define DO_LD(L, END, ESZ) \
+void HELPER(sme_ld1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
+ clear_horizontal, copy_horizontal); \
+} \
+void HELPER(sme_ld1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sme_ld1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
+ clear_vertical_##L, copy_vertical_##L); \
+} \
+void HELPER(sme_ld1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
+ sve_ld1##L##L##END##_host, sve_ld1##L##L##END##_tlb, \
+ clear_horizontal, copy_horizontal); \
+} \
+void HELPER(sme_ld1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sme_ld1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
+ sme_ld1##L##END##_v_host, sme_ld1##L##END##_v_tlb, \
+ clear_vertical_##L, copy_vertical_##L); \
+}
+
+DO_LD(b, , MO_8)
+DO_LD(h, _be, MO_16)
+DO_LD(h, _le, MO_16)
+DO_LD(s, _be, MO_32)
+DO_LD(s, _le, MO_32)
+DO_LD(d, _be, MO_64)
+DO_LD(d, _le, MO_64)
+DO_LD(q, _be, MO_128)
+DO_LD(q, _le, MO_128)
+
+#undef DO_LD
+
+/*
+ * Common helper for all contiguous predicated stores.
+ */
+
+static inline QEMU_ALWAYS_INLINE
+void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
+ const target_ulong addr, uint32_t desc, const uintptr_t ra,
+ const int esz, uint32_t mtedesc, bool vertical,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ const intptr_t reg_max = simd_oprsz(desc);
+ const intptr_t esize = 1 << esz;
+ intptr_t reg_off, reg_last;
+ SVEContLdSt info;
+ void *host;
+ int flags;
+
+ /* Find the active elements. */
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, esize)) {
+ /* The entire predicate was false; no store occurs. */
+ return;
+ }
+
+ /* Probe the page(s). Exit with exception for any invalid page. */
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, ra);
+
+ /* Handle watchpoints for all active elements. */
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, esize, esize,
+ BP_MEM_WRITE, ra);
+
+ /*
+ * Handle mte checks for all active elements.
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
+ */
+ if (mtedesc) {
+ sve_cont_ldst_mte_check(&info, env, vg, addr, esize, esize,
+ mtedesc, ra);
+ }
+
+ flags = info.page[0].flags | info.page[1].flags;
+ if (unlikely(flags != 0)) {
+#ifdef CONFIG_USER_ONLY
+ g_assert_not_reached();
+#else
+ /*
+ * At least one page includes MMIO.
+ * Any bus operation can fail with cpu_transaction_failed,
+ * which for ARM will raise SyncExternal. We cannot avoid
+ * this fault and will leave with the store incomplete.
+ */
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[1];
+ if (reg_last < 0) {
+ reg_last = info.reg_off_split;
+ if (reg_last < 0) {
+ reg_last = info.reg_off_last[0];
+ }
+ }
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
+ }
+ reg_off += esize;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ return;
+#endif
+ }
+
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[0];
+ host = info.page[0].host;
+
+ while (reg_off <= reg_last) {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ host_fn(za, reg_off, host + reg_off);
+ }
+ reg_off += 1 << esz;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ }
+
+ /*
+ * Use the slow path to manage the cross-page misalignment.
+ * But we know this is RAM and cannot trap.
+ */
+ reg_off = info.reg_off_split;
+ if (unlikely(reg_off >= 0)) {
+ tlb_fn(env, za, reg_off, addr + reg_off, ra);
+ }
+
+ reg_off = info.reg_off_first[1];
+ if (unlikely(reg_off >= 0)) {
+ reg_last = info.reg_off_last[1];
+ host = info.page[1].host;
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ host_fn(za, reg_off, host + reg_off);
+ }
+ reg_off += 1 << esz;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ }
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sme_st1_mte(CPUARMState *env, void *za, uint64_t *vg, target_ulong addr,
+ uint32_t desc, uintptr_t ra, int esz, bool vertical,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ int bit55 = extract64(addr, 55, 1);
+
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /* Perform gross MTE suppression early. */
+ if (!tbi_check(mtedesc, bit55) ||
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
+ mtedesc = 0;
+ }
+
+ sme_st1(env, za, vg, addr, desc, ra, esz, mtedesc,
+ vertical, host_fn, tlb_fn);
+}
+
+#define DO_ST(L, END, ESZ) \
+void HELPER(sme_st1##L##END##_h)(CPUARMState *env, void *za, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, false, \
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
+} \
+void HELPER(sme_st1##L##END##_v)(CPUARMState *env, void *za, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sme_st1(env, za, vg, addr, desc, GETPC(), ESZ, 0, true, \
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
+} \
+void HELPER(sme_st1##L##END##_h_mte)(CPUARMState *env, void *za, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, false, \
+ sve_st1##L##L##END##_host, sve_st1##L##L##END##_tlb); \
+} \
+void HELPER(sme_st1##L##END##_v_mte)(CPUARMState *env, void *za, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sme_st1_mte(env, za, vg, addr, desc, GETPC(), ESZ, true, \
+ sme_st1##L##END##_v_host, sme_st1##L##END##_v_tlb); \
+}
+
+DO_ST(b, , MO_8)
+DO_ST(h, _be, MO_16)
+DO_ST(h, _le, MO_16)
+DO_ST(s, _be, MO_32)
+DO_ST(s, _le, MO_32)
+DO_ST(d, _be, MO_64)
+DO_ST(d, _le, MO_64)
+DO_ST(q, _be, MO_128)
+DO_ST(q, _le, MO_128)
+
+#undef DO_ST
+
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
+ void *vpm, uint32_t desc)
+{
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
+ uint64_t *pn = vpn, *pm = vpm;
+ uint32_t *zda = vzda, *zn = vzn;
+
+ for (row = 0; row < oprsz; ) {
+ uint64_t pa = pn[row >> 4];
+ do {
+ if (pa & 1) {
+ for (col = 0; col < oprsz; ) {
+ uint64_t pb = pm[col >> 4];
+ do {
+ if (pb & 1) {
+ zda[tile_vslice_index(row) + H4(col)] += zn[H4(col)];
+ }
+ pb >>= 4;
+ } while (++col & 15);
+ }
+ }
+ pa >>= 4;
+ } while (++row & 15);
+ }
+}
+
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
+ void *vpm, uint32_t desc)
+{
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
+ uint8_t *pn = vpn, *pm = vpm;
+ uint64_t *zda = vzda, *zn = vzn;
+
+ for (row = 0; row < oprsz; ++row) {
+ if (pn[H1(row)] & 1) {
+ for (col = 0; col < oprsz; ++col) {
+ if (pm[H1(col)] & 1) {
+ zda[tile_vslice_index(row) + col] += zn[col];
+ }
+ }
+ }
+ }
+}
+
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
+ void *vpm, uint32_t desc)
+{
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
+ uint64_t *pn = vpn, *pm = vpm;
+ uint32_t *zda = vzda, *zn = vzn;
+
+ for (row = 0; row < oprsz; ) {
+ uint64_t pa = pn[row >> 4];
+ do {
+ if (pa & 1) {
+ uint32_t zn_row = zn[H4(row)];
+ for (col = 0; col < oprsz; ) {
+ uint64_t pb = pm[col >> 4];
+ do {
+ if (pb & 1) {
+ zda[tile_vslice_index(row) + H4(col)] += zn_row;
+ }
+ pb >>= 4;
+ } while (++col & 15);
+ }
+ }
+ pa >>= 4;
+ } while (++row & 15);
+ }
+}
+
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
+ void *vpm, uint32_t desc)
+{
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
+ uint8_t *pn = vpn, *pm = vpm;
+ uint64_t *zda = vzda, *zn = vzn;
+
+ for (row = 0; row < oprsz; ++row) {
+ if (pn[H1(row)] & 1) {
+ uint64_t zn_row = zn[row];
+ for (col = 0; col < oprsz; ++col) {
+ if (pm[H1(col)] & 1) {
+ zda[tile_vslice_index(row) + col] += zn_row;
+ }
+ }
+ }
+ }
+}
+
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
+ void *vpm, void *vst, uint32_t desc)
+{
+ intptr_t row, col, oprsz = simd_maxsz(desc);
+ uint32_t neg = simd_data(desc) << 31;
+ uint16_t *pn = vpn, *pm = vpm;
+ float_status fpst;
+
+ /*
+ * Make a copy of float_status because this operation does not
+ * update the cumulative fp exception status. It also produces
+ * default nans.
+ */
+ fpst = *(float_status *)vst;
+ set_default_nan_mode(true, &fpst);
+
+ for (row = 0; row < oprsz; ) {
+ uint16_t pa = pn[H2(row >> 4)];
+ do {
+ if (pa & 1) {
+ void *vza_row = vza + tile_vslice_offset(row);
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
+
+ for (col = 0; col < oprsz; ) {
+ uint16_t pb = pm[H2(col >> 4)];
+ do {
+ if (pb & 1) {
+ uint32_t *a = vza_row + H1_4(col);
+ uint32_t *m = vzm + H1_4(col);
+ *a = float32_muladd(n, *m, *a, 0, vst);
+ }
+ col += 4;
+ pb >>= 4;
+ } while (col & 15);
+ }
+ }
+ row += 4;
+ pa >>= 4;
+ } while (row & 15);
+ }
+}
+
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
+ void *vpm, void *vst, uint32_t desc)
+{
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
+ uint64_t neg = (uint64_t)simd_data(desc) << 63;
+ uint64_t *za = vza, *zn = vzn, *zm = vzm;
+ uint8_t *pn = vpn, *pm = vpm;
+ float_status fpst = *(float_status *)vst;
+
+ set_default_nan_mode(true, &fpst);
+
+ for (row = 0; row < oprsz; ++row) {
+ if (pn[H1(row)] & 1) {
+ uint64_t *za_row = &za[tile_vslice_index(row)];
+ uint64_t n = zn[row] ^ neg;
+
+ for (col = 0; col < oprsz; ++col) {
+ if (pm[H1(col)] & 1) {
+ uint64_t *a = &za_row[col];
+ *a = float64_muladd(n, zm[col], *a, 0, &fpst);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Alter PAIR as needed for controlling predicates being false,
+ * and for NEG on an enabled row element.
+ */
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg)
+{
+ /*
+ * The pseudocode uses a conditional negate after the conditional zero.
+ * It is simpler here to unconditionally negate before conditional zero.
+ */
+ pair ^= neg;
+ if (!(pg & 1)) {
+ pair &= 0xffff0000u;
+ }
+ if (!(pg & 4)) {
+ pair &= 0x0000ffffu;
+ }
+ return pair;
+}
+
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
+ float_status *s_std, float_status *s_odd)
+{
+ float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std);
+ float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
+ float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std);
+ float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
+ float64 t64;
+ float32 t32;
+
+ /*
+ * The ARM pseudocode function FPDot performs both multiplies
+ * and the add with a single rounding operation. Emulate this
+ * by performing the first multiply in round-to-odd, then doing
+ * the second multiply as fused multiply-add, and rounding to
+ * float32 all in one step.
+ */
+ t64 = float64_mul(e1r, e2r, s_odd);
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
+
+ /* This conversion is exact, because we've already rounded. */
+ t32 = float64_to_float32(t64, s_std);
+
+ /* The final accumulation step is not fused. */
+ return float32_add(sum, t32, s_std);
+}
+
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
+ void *vpm, void *vst, uint32_t desc)
+{
+ intptr_t row, col, oprsz = simd_maxsz(desc);
+ uint32_t neg = simd_data(desc) * 0x80008000u;
+ uint16_t *pn = vpn, *pm = vpm;
+ float_status fpst_odd, fpst_std;
+
+ /*
+ * Make a copy of float_status because this operation does not
+ * update the cumulative fp exception status. It also produces
+ * default nans. Make a second copy with round-to-odd -- see above.
+ */
+ fpst_std = *(float_status *)vst;
+ set_default_nan_mode(true, &fpst_std);
+ fpst_odd = fpst_std;
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
+
+ for (row = 0; row < oprsz; ) {
+ uint16_t prow = pn[H2(row >> 4)];
+ do {
+ void *vza_row = vza + tile_vslice_offset(row);
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
+
+ n = f16mop_adj_pair(n, prow, neg);
+
+ for (col = 0; col < oprsz; ) {
+ uint16_t pcol = pm[H2(col >> 4)];
+ do {
+ if (prow & pcol & 0b0101) {
+ uint32_t *a = vza_row + H1_4(col);
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
+
+ m = f16mop_adj_pair(m, pcol, 0);
+ *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd);
+ }
+ col += 4;
+ pcol >>= 4;
+ } while (col & 15);
+ }
+ row += 4;
+ prow >>= 4;
+ } while (row & 15);
+ }
+}
+
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
+ void *vpm, uint32_t desc)
+{
+ intptr_t row, col, oprsz = simd_maxsz(desc);
+ uint32_t neg = simd_data(desc) * 0x80008000u;
+ uint16_t *pn = vpn, *pm = vpm;
+
+ for (row = 0; row < oprsz; ) {
+ uint16_t prow = pn[H2(row >> 4)];
+ do {
+ void *vza_row = vza + tile_vslice_offset(row);
+ uint32_t n = *(uint32_t *)(vzn + H1_4(row));
+
+ n = f16mop_adj_pair(n, prow, neg);
+
+ for (col = 0; col < oprsz; ) {
+ uint16_t pcol = pm[H2(col >> 4)];
+ do {
+ if (prow & pcol & 0b0101) {
+ uint32_t *a = vza_row + H1_4(col);
+ uint32_t m = *(uint32_t *)(vzm + H1_4(col));
+
+ m = f16mop_adj_pair(m, pcol, 0);
+ *a = bfdotadd(*a, n, m);
+ }
+ col += 4;
+ pcol >>= 4;
+ } while (col & 15);
+ }
+ row += 4;
+ prow >>= 4;
+ } while (row & 15);
+ }
+}
+
+typedef uint32_t IMOPFn32(uint32_t, uint32_t, uint32_t, uint8_t, bool);
+static inline void do_imopa_s(uint32_t *za, uint32_t *zn, uint32_t *zm,
+ uint8_t *pn, uint8_t *pm,
+ uint32_t desc, IMOPFn32 *fn)
+{
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
+ bool neg = simd_data(desc);
+
+ for (row = 0; row < oprsz; ++row) {
+ uint8_t pa = (pn[H1(row >> 1)] >> ((row & 1) * 4)) & 0xf;
+ uint32_t *za_row = &za[tile_vslice_index(row)];
+ uint32_t n = zn[H4(row)];
+
+ for (col = 0; col < oprsz; ++col) {
+ uint8_t pb = pm[H1(col >> 1)] >> ((col & 1) * 4);
+ uint32_t *a = &za_row[H4(col)];
+
+ *a = fn(n, zm[H4(col)], *a, pa & pb, neg);
+ }
+ }
+}
+
+typedef uint64_t IMOPFn64(uint64_t, uint64_t, uint64_t, uint8_t, bool);
+static inline void do_imopa_d(uint64_t *za, uint64_t *zn, uint64_t *zm,
+ uint8_t *pn, uint8_t *pm,
+ uint32_t desc, IMOPFn64 *fn)
+{
+ intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
+ bool neg = simd_data(desc);
+
+ for (row = 0; row < oprsz; ++row) {
+ uint8_t pa = pn[H1(row)];
+ uint64_t *za_row = &za[tile_vslice_index(row)];
+ uint64_t n = zn[row];
+
+ for (col = 0; col < oprsz; ++col) {
+ uint8_t pb = pm[H1(col)];
+ uint64_t *a = &za_row[col];
+
+ *a = fn(n, zm[col], *a, pa & pb, neg);
+ }
+ }
+}
+
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
+static uint32_t NAME(uint32_t n, uint32_t m, uint32_t a, uint8_t p, bool neg) \
+{ \
+ uint32_t sum = 0; \
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
+ n &= expand_pred_b(p); \
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
+ sum += (NTYPE)(n >> 8) * (MTYPE)(m >> 8); \
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
+ sum += (NTYPE)(n >> 24) * (MTYPE)(m >> 24); \
+ return neg ? a - sum : a + sum; \
+}
+
+#define DEF_IMOP_64(NAME, NTYPE, MTYPE) \
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
+{ \
+ uint64_t sum = 0; \
+ /* Apply P to N as a mask, making the inactive elements 0. */ \
+ n &= expand_pred_h(p); \
+ sum += (NTYPE)(n >> 0) * (MTYPE)(m >> 0); \
+ sum += (NTYPE)(n >> 16) * (MTYPE)(m >> 16); \
+ sum += (NTYPE)(n >> 32) * (MTYPE)(m >> 32); \
+ sum += (NTYPE)(n >> 48) * (MTYPE)(m >> 48); \
+ return neg ? a - sum : a + sum; \
+}
+
+DEF_IMOP_32(smopa_s, int8_t, int8_t)
+DEF_IMOP_32(umopa_s, uint8_t, uint8_t)
+DEF_IMOP_32(sumopa_s, int8_t, uint8_t)
+DEF_IMOP_32(usmopa_s, uint8_t, int8_t)
+
+DEF_IMOP_64(smopa_d, int16_t, int16_t)
+DEF_IMOP_64(umopa_d, uint16_t, uint16_t)
+DEF_IMOP_64(sumopa_d, int16_t, uint16_t)
+DEF_IMOP_64(usmopa_d, uint16_t, int16_t)
+
+#define DEF_IMOPH(NAME, S) \
+ void HELPER(sme_##NAME##_##S)(void *vza, void *vzn, void *vzm, \
+ void *vpn, void *vpm, uint32_t desc) \
+ { do_imopa_##S(vza, vzn, vzm, vpn, vpm, desc, NAME##_##S); }
+
+DEF_IMOPH(smopa, s)
+DEF_IMOPH(umopa, s)
+DEF_IMOPH(sumopa, s)
+DEF_IMOPH(usmopa, s)
+
+DEF_IMOPH(smopa, d)
+DEF_IMOPH(umopa, d)
+DEF_IMOPH(sumopa, d)
+DEF_IMOPH(usmopa, d)
diff --git a/target/arm/sve.decode b/target/arm/tcg/sve.decode
index e10b689454..04b6fcc0cf 100644
--- a/target/arm/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -5,7 +5,7 @@
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
-# version 2 of the License, or (at your option) any later version.
+# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -22,7 +22,7 @@
###########################################################################
# Named fields. These are primarily for disjoint fields.
-%imm4_16_p1 16:4 !function=plus1
+%imm4_16_p1 16:4 !function=plus_1
%imm6_22_5 22:1 5:5
%imm7_22_16 22:2 16:5
%imm8_16_10 16:5 10:3
@@ -30,6 +30,8 @@
%size_23 23:2
%dtype_23_13 23:2 13:2
%index3_22_19 22:1 19:2
+%index3_19_11 19:2 11:1
+%index2_20_11 20:1 11:1
# A combination of tsz:imm3 -- extract esize.
%tszimm_esz 22:2 5:5 !function=tszimm_esz
@@ -65,11 +67,15 @@
&rr_dbm rd rn dbm
&rrri rd rn rm imm
&rri_esz rd rn imm esz
+&rrri_esz rd rn rm imm esz
&rrr_esz rd rn rm esz
+&rrx_esz rd rn rm index esz
&rpr_esz rd pg rn esz
&rpr_s rd pg rn s
&rprr_s rd pg rn rm s
&rprr_esz rd pg rn rm esz
+&rrrr_esz rd ra rn rm esz
+&rrxr_esz rd rn rm ra index esz
&rprrr_esz rd pg rn rm ra esz
&rpri_esz rd pg rn imm esz
&ptrue rd esz pat s
@@ -99,6 +105,7 @@
# Two operand with governing predicate, flags setting
@pd_pg_pn_s ........ . s:1 ...... .. pg:4 . rn:4 . rd:4 &rpr_s
+@pd_pg_pn_s0 ........ . . ...... .. pg:4 . rn:4 . rd:4 &rpr_s s=0
# Three operand with unused vector element size
@rd_rn_rm_e0 ........ ... rm:5 ... ... rn:5 rd:5 &rrr_esz esz=0
@@ -111,6 +118,8 @@
@pd_pn_pm ........ esz:2 .. rm:4 ....... rn:4 . rd:4 &rrr_esz
@rdn_rm ........ esz:2 ...... ...... rm:5 rd:5 \
&rrr_esz rn=%reg_movprfx
+@rdn_rm_e0 ........ .. ...... ...... rm:5 rd:5 \
+ &rrr_esz rn=%reg_movprfx esz=0
@rdn_sh_i8u ........ esz:2 ...... ...... ..... rd:5 \
&rri_esz rn=%reg_movprfx imm=%sh8_i8u
@rdn_i8u ........ esz:2 ...... ... imm:8 rd:5 \
@@ -118,6 +127,16 @@
@rdn_i8s ........ esz:2 ...... ... imm:s8 rd:5 \
&rri_esz rn=%reg_movprfx
+# Four operand, vector element size
+@rda_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 \
+ &rrrr_esz ra=%reg_movprfx
+
+# Four operand with unused vector element size
+@rda_rn_rm_e0 ........ ... rm:5 ... ... rn:5 rd:5 \
+ &rrrr_esz esz=0 ra=%reg_movprfx
+@rdn_ra_rm_e0 ........ ... rm:5 ... ... ra:5 rd:5 \
+ &rrrr_esz esz=0 rn=%reg_movprfx
+
# Three operand with "memory" size, aka immediate left shift
@rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri
@@ -136,6 +155,7 @@
&rprrr_esz rn=%reg_movprfx
@rdn_pg_rm_ra ........ esz:2 . ra:5 ... pg:3 rm:5 rd:5 \
&rprrr_esz rn=%reg_movprfx
+@rd_pg_rn_rm ........ esz:2 . rm:5 ... pg:3 rn:5 rd:5 &rprr_esz
# One register operand, with governing predicate, vector element size
@rd_pg_rn ........ esz:2 ... ... ... pg:3 rn:5 rd:5 &rpr_esz
@@ -149,13 +169,17 @@
@rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri
# Two register operand, one immediate operand, with predicate,
-# element size encoded as TSZHL. User must fill in imm.
-@rdn_pg_tszimm ........ .. ... ... ... pg:3 ..... rd:5 \
- &rpri_esz rn=%reg_movprfx esz=%tszimm_esz
+# element size encoded as TSZHL.
+@rdn_pg_tszimm_shl ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shl
+@rdn_pg_tszimm_shr ........ .. ... ... ... pg:3 ..... rd:5 \
+ &rpri_esz rn=%reg_movprfx esz=%tszimm_esz imm=%tszimm_shr
# Similarly without predicate.
-@rd_rn_tszimm ........ .. ... ... ...... rn:5 rd:5 \
- &rri_esz esz=%tszimm16_esz
+@rd_rn_tszimm_shl ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz imm=%tszimm16_shl
+@rd_rn_tszimm_shr ........ .. ... ... ...... rn:5 rd:5 \
+ &rri_esz esz=%tszimm16_esz imm=%tszimm16_shr
# Two register operand, one immediate operand, with 4-bit predicate.
# User must fill in imm.
@@ -229,6 +253,32 @@
@rpri_scatter_store ....... msz:2 .. imm:5 ... pg:3 rn:5 rd:5 \
&rpri_scatter_store
+# Two registers and a scalar by N-bit index
+@rrx_3 ........ .. . .. rm:3 ...... rn:5 rd:5 \
+ &rrx_esz index=%index3_22_19
+@rrx_2 ........ .. . index:2 rm:3 ...... rn:5 rd:5 &rrx_esz
+@rrx_1 ........ .. . index:1 rm:4 ...... rn:5 rd:5 &rrx_esz
+
+# Two registers and a scalar by N-bit index, alternate
+@rrx_3a ........ .. . .. rm:3 ...... rn:5 rd:5 \
+ &rrx_esz index=%index3_19_11
+@rrx_2a ........ .. . . rm:4 ...... rn:5 rd:5 \
+ &rrx_esz index=%index2_20_11
+
+# Three registers and a scalar by N-bit index
+@rrxr_3 ........ .. . .. rm:3 ...... rn:5 rd:5 \
+ &rrxr_esz ra=%reg_movprfx index=%index3_22_19
+@rrxr_2 ........ .. . index:2 rm:3 ...... rn:5 rd:5 \
+ &rrxr_esz ra=%reg_movprfx
+@rrxr_1 ........ .. . index:1 rm:4 ...... rn:5 rd:5 \
+ &rrxr_esz ra=%reg_movprfx
+
+# Three registers and a scalar by N-bit index, alternate
+@rrxr_3a ........ .. ... rm:3 ...... rn:5 rd:5 \
+ &rrxr_esz ra=%reg_movprfx index=%index3_19_11
+@rrxr_2a ........ .. .. rm:4 ...... rn:5 rd:5 \
+ &rrxr_esz ra=%reg_movprfx index=%index2_20_11
+
###########################################################################
# Instruction patterns. Grouped according to the SVE encodingindex.xhtml.
@@ -288,14 +338,15 @@ UMINV 00000100 .. 001 011 001 ... ..... ..... @rd_pg_rn
### SVE Shift by Immediate - Predicated Group
# SVE bitwise shift by immediate (predicated)
-ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
-LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
-LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shl
-ASRD 00000100 .. 000 100 100 ... .. ... ..... \
- @rdn_pg_tszimm imm=%tszimm_shr
+ASR_zpzi 00000100 .. 000 000 100 ... .. ... ..... @rdn_pg_tszimm_shr
+LSR_zpzi 00000100 .. 000 001 100 ... .. ... ..... @rdn_pg_tszimm_shr
+LSL_zpzi 00000100 .. 000 011 100 ... .. ... ..... @rdn_pg_tszimm_shl
+ASRD 00000100 .. 000 100 100 ... .. ... ..... @rdn_pg_tszimm_shr
+SQSHL_zpzi 00000100 .. 000 110 100 ... .. ... ..... @rdn_pg_tszimm_shl
+UQSHL_zpzi 00000100 .. 000 111 100 ... .. ... ..... @rdn_pg_tszimm_shl
+SRSHR 00000100 .. 001 100 100 ... .. ... ..... @rdn_pg_tszimm_shr
+URSHR 00000100 .. 001 101 100 ... .. ... ..... @rdn_pg_tszimm_shr
+SQSHLU 00000100 .. 001 111 100 ... .. ... ..... @rdn_pg_tszimm_shl
# SVE bitwise shift by vector (predicated)
ASR_zpzz 00000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm
@@ -373,6 +424,17 @@ ORR_zzz 00000100 01 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
EOR_zzz 00000100 10 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
BIC_zzz 00000100 11 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
+XAR 00000100 .. 1 ..... 001 101 rm:5 rd:5 &rrri_esz \
+ rn=%reg_movprfx esz=%tszimm16_esz imm=%tszimm16_shr
+
+# SVE2 bitwise ternary operations
+EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0
+BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
+BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0
+BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
+BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
+NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
+
### SVE Index Generation Group
# SVE index generation (immediate start, immediate increment)
@@ -387,24 +449,24 @@ INDEX_ri 00000100 esz:2 1 imm:s5 010001 rn:5 rd:5
# SVE index generation (register start, register increment)
INDEX_rr 00000100 .. 1 ..... 010011 ..... ..... @rd_rn_rm
-### SVE Stack Allocation Group
+### SVE / Streaming SVE Stack Allocation Group
# SVE stack frame adjustment
ADDVL 00000100 001 ..... 01010 ...... ..... @rd_rn_i6
+ADDSVL 00000100 001 ..... 01011 ...... ..... @rd_rn_i6
ADDPL 00000100 011 ..... 01010 ...... ..... @rd_rn_i6
+ADDSPL 00000100 011 ..... 01011 ...... ..... @rd_rn_i6
# SVE stack frame size
RDVL 00000100 101 11111 01010 imm:s6 rd:5
+RDSVL 00000100 101 11111 01011 imm:s6 rd:5
### SVE Bitwise Shift - Unpredicated Group
# SVE bitwise shift by immediate (unpredicated)
-ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shr
-LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shr
-LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... \
- @rd_rn_tszimm imm=%tszimm16_shl
+ASR_zzi 00000100 .. 1 ..... 1001 00 ..... ..... @rd_rn_tszimm_shr
+LSR_zzi 00000100 .. 1 ..... 1001 01 ..... ..... @rd_rn_tszimm_shr
+LSL_zzi 00000100 .. 1 ..... 1001 11 ..... ..... @rd_rn_tszimm_shl
# SVE bitwise shift by wide elements (unpredicated)
# Note esz != 3
@@ -469,15 +531,25 @@ DUPM 00000101 11 0000 dbm:13 rd:5
FCPY 00000101 .. 01 .... 110 imm:8 ..... @rdn_pg4
# SVE copy integer immediate (predicated)
-CPY_m_i 00000101 .. 01 .... 01 . ........ ..... @rdn_pg4 imm=%sh8_i8s
-CPY_z_i 00000101 .. 01 .... 00 . ........ ..... @rdn_pg4 imm=%sh8_i8s
+{
+ INVALID 00000101 00 01 ---- 01 1 -------- -----
+ CPY_m_i 00000101 .. 01 .... 01 . ........ ..... @rdn_pg4 imm=%sh8_i8s
+}
+{
+ INVALID 00000101 00 01 ---- 00 1 -------- -----
+ CPY_z_i 00000101 .. 01 .... 00 . ........ ..... @rdn_pg4 imm=%sh8_i8s
+}
### SVE Permute - Extract Group
-# SVE extract vector (immediate offset)
+# SVE extract vector (destructive)
EXT 00000101 001 ..... 000 ... rm:5 rd:5 \
&rrri rn=%reg_movprfx imm=%imm8_16_10
+# SVE2 extract vector (constructive)
+EXT_sve2 00000101 011 ..... 000 ... rn:5 rd:5 \
+ &rri imm=%imm8_16_10
+
### SVE Permute - Unpredicated Group
# SVE broadcast general register
@@ -502,6 +574,11 @@ TBL 00000101 .. 1 ..... 001100 ..... ..... @rd_rn_rm
# SVE unpack vector elements
UNPK 00000101 esz:2 1100 u:1 h:1 001110 rn:5 rd:5
+# SVE2 Table Lookup (three sources)
+
+TBL_sve2 00000101 .. 1 ..... 001010 ..... ..... @rd_rn_rm
+TBX 00000101 .. 1 ..... 001011 ..... ..... @rd_rn_rm
+
### SVE Permute - Predicates Group
# SVE permute predicate elements
@@ -529,6 +606,14 @@ UZP2_z 00000101 .. 1 ..... 011 011 ..... ..... @rd_rn_rm
TRN1_z 00000101 .. 1 ..... 011 100 ..... ..... @rd_rn_rm
TRN2_z 00000101 .. 1 ..... 011 101 ..... ..... @rd_rn_rm
+# SVE2 permute vector segments
+ZIP1_q 00000101 10 1 ..... 000 000 ..... ..... @rd_rn_rm_e0
+ZIP2_q 00000101 10 1 ..... 000 001 ..... ..... @rd_rn_rm_e0
+UZP1_q 00000101 10 1 ..... 000 010 ..... ..... @rd_rn_rm_e0
+UZP2_q 00000101 10 1 ..... 000 011 ..... ..... @rd_rn_rm_e0
+TRN1_q 00000101 10 1 ..... 000 110 ..... ..... @rd_rn_rm_e0
+TRN2_q 00000101 10 1 ..... 000 111 ..... ..... @rd_rn_rm_e0
+
### SVE Permute - Predicated Group
# SVE compress active elements
@@ -567,10 +652,14 @@ REVB 00000101 .. 1001 00 100 ... ..... ..... @rd_pg_rn
REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn
REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn
RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn
+REVD 00000101 00 1011 10 100 ... ..... ..... @rd_pg_rn_e0
-# SVE vector splice (predicated)
+# SVE vector splice (predicated, destructive)
SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm
+# SVE2 vector splice (predicated, constructive)
+SPLICE_sve2 00000101 .. 101 101 100 ... ..... ..... @rd_pg_rn
+
### SVE Select Vectors Group
# SVE select vector elements (predicated)
@@ -667,8 +756,8 @@ BRKPB 00100101 0. 00 .... 11 .... 0 .... 1 .... @pd_pg_pn_pm_s
# SVE partition break condition
BRKA_z 00100101 0. 01000001 .... 0 .... 0 .... @pd_pg_pn_s
BRKB_z 00100101 1. 01000001 .... 0 .... 0 .... @pd_pg_pn_s
-BRKA_m 00100101 0. 01000001 .... 0 .... 1 .... @pd_pg_pn_s
-BRKB_m 00100101 1. 01000001 .... 0 .... 1 .... @pd_pg_pn_s
+BRKA_m 00100101 00 01000001 .... 0 .... 1 .... @pd_pg_pn_s0
+BRKB_m 00100101 10 01000001 .... 0 .... 1 .... @pd_pg_pn_s0
# SVE propagate break to next partition
BRKN 00100101 0. 01100001 .... 0 .... 0 .... @pd_pg_pn_s
@@ -697,7 +786,10 @@ SINCDECP_z 00100101 .. 1010 d:1 u:1 10000 00 .... ..... @incdec2_pred
CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
# SVE integer compare scalar count and limit
-WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4
+WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4
+
+# SVE2 pointer conflict compare
+WHILE_ptr 00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4
### SVE Integer Wide Immediate - Unpredicated Group
@@ -705,16 +797,40 @@ WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4
FDUP 00100101 esz:2 111 00 1110 imm:8 rd:5
# SVE broadcast integer immediate (unpredicated)
-DUP_i 00100101 esz:2 111 00 011 . ........ rd:5 imm=%sh8_i8s
+{
+ INVALID 00100101 00 111 00 011 1 -------- -----
+ DUP_i 00100101 esz:2 111 00 011 . ........ rd:5 imm=%sh8_i8s
+}
# SVE integer add/subtract immediate (unpredicated)
-ADD_zzi 00100101 .. 100 000 11 . ........ ..... @rdn_sh_i8u
-SUB_zzi 00100101 .. 100 001 11 . ........ ..... @rdn_sh_i8u
-SUBR_zzi 00100101 .. 100 011 11 . ........ ..... @rdn_sh_i8u
-SQADD_zzi 00100101 .. 100 100 11 . ........ ..... @rdn_sh_i8u
-UQADD_zzi 00100101 .. 100 101 11 . ........ ..... @rdn_sh_i8u
-SQSUB_zzi 00100101 .. 100 110 11 . ........ ..... @rdn_sh_i8u
-UQSUB_zzi 00100101 .. 100 111 11 . ........ ..... @rdn_sh_i8u
+{
+ INVALID 00100101 00 100 000 11 1 -------- -----
+ ADD_zzi 00100101 .. 100 000 11 . ........ ..... @rdn_sh_i8u
+}
+{
+ INVALID 00100101 00 100 001 11 1 -------- -----
+ SUB_zzi 00100101 .. 100 001 11 . ........ ..... @rdn_sh_i8u
+}
+{
+ INVALID 00100101 00 100 011 11 1 -------- -----
+ SUBR_zzi 00100101 .. 100 011 11 . ........ ..... @rdn_sh_i8u
+}
+{
+ INVALID 00100101 00 100 100 11 1 -------- -----
+ SQADD_zzi 00100101 .. 100 100 11 . ........ ..... @rdn_sh_i8u
+}
+{
+ INVALID 00100101 00 100 101 11 1 -------- -----
+ UQADD_zzi 00100101 .. 100 101 11 . ........ ..... @rdn_sh_i8u
+}
+{
+ INVALID 00100101 00 100 110 11 1 -------- -----
+ SQSUB_zzi 00100101 .. 100 110 11 . ........ ..... @rdn_sh_i8u
+}
+{
+ INVALID 00100101 00 100 111 11 1 -------- -----
+ UQSUB_zzi 00100101 .. 100 111 11 . ........ ..... @rdn_sh_i8u
+}
# SVE integer min/max immediate (unpredicated)
SMAX_zzi 00100101 .. 101 000 110 ........ ..... @rdn_i8s
@@ -726,13 +842,114 @@ UMIN_zzi 00100101 .. 101 011 110 ........ ..... @rdn_i8u
MUL_zzi 00100101 .. 110 000 110 ........ ..... @rdn_i8s
# SVE integer dot product (unpredicated)
-DOT_zzz 01000100 1 sz:1 0 rm:5 00000 u:1 rn:5 rd:5 ra=%reg_movprfx
+DOT_zzzz 01000100 1 sz:1 0 rm:5 00000 u:1 rn:5 rd:5 \
+ ra=%reg_movprfx
+
+# SVE2 complex dot product (vectors)
+CDOT_zzzz 01000100 esz:2 0 rm:5 0001 rot:2 rn:5 rd:5 ra=%reg_movprfx
+
+#### SVE Multiply - Indexed
# SVE integer dot product (indexed)
-DOT_zzx 01000100 101 index:2 rm:3 00000 u:1 rn:5 rd:5 \
- sz=0 ra=%reg_movprfx
-DOT_zzx 01000100 111 index:1 rm:4 00000 u:1 rn:5 rd:5 \
- sz=1 ra=%reg_movprfx
+SDOT_zzxw_s 01000100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2
+SDOT_zzxw_d 01000100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3
+UDOT_zzxw_s 01000100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2
+UDOT_zzxw_d 01000100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3
+
+# SVE2 integer multiply-add (indexed)
+MLA_zzxz_h 01000100 0. 1 ..... 000010 ..... ..... @rrxr_3 esz=1
+MLA_zzxz_s 01000100 10 1 ..... 000010 ..... ..... @rrxr_2 esz=2
+MLA_zzxz_d 01000100 11 1 ..... 000010 ..... ..... @rrxr_1 esz=3
+MLS_zzxz_h 01000100 0. 1 ..... 000011 ..... ..... @rrxr_3 esz=1
+MLS_zzxz_s 01000100 10 1 ..... 000011 ..... ..... @rrxr_2 esz=2
+MLS_zzxz_d 01000100 11 1 ..... 000011 ..... ..... @rrxr_1 esz=3
+
+# SVE2 saturating multiply-add high (indexed)
+SQRDMLAH_zzxz_h 01000100 0. 1 ..... 000100 ..... ..... @rrxr_3 esz=1
+SQRDMLAH_zzxz_s 01000100 10 1 ..... 000100 ..... ..... @rrxr_2 esz=2
+SQRDMLAH_zzxz_d 01000100 11 1 ..... 000100 ..... ..... @rrxr_1 esz=3
+SQRDMLSH_zzxz_h 01000100 0. 1 ..... 000101 ..... ..... @rrxr_3 esz=1
+SQRDMLSH_zzxz_s 01000100 10 1 ..... 000101 ..... ..... @rrxr_2 esz=2
+SQRDMLSH_zzxz_d 01000100 11 1 ..... 000101 ..... ..... @rrxr_1 esz=3
+
+# SVE mixed sign dot product (indexed)
+USDOT_zzxw_s 01000100 10 1 ..... 000110 ..... ..... @rrxr_2 esz=2
+SUDOT_zzxw_s 01000100 10 1 ..... 000111 ..... ..... @rrxr_2 esz=2
+
+# SVE2 saturating multiply-add (indexed)
+SQDMLALB_zzxw_s 01000100 10 1 ..... 0010.0 ..... ..... @rrxr_3a esz=2
+SQDMLALB_zzxw_d 01000100 11 1 ..... 0010.0 ..... ..... @rrxr_2a esz=3
+SQDMLALT_zzxw_s 01000100 10 1 ..... 0010.1 ..... ..... @rrxr_3a esz=2
+SQDMLALT_zzxw_d 01000100 11 1 ..... 0010.1 ..... ..... @rrxr_2a esz=3
+SQDMLSLB_zzxw_s 01000100 10 1 ..... 0011.0 ..... ..... @rrxr_3a esz=2
+SQDMLSLB_zzxw_d 01000100 11 1 ..... 0011.0 ..... ..... @rrxr_2a esz=3
+SQDMLSLT_zzxw_s 01000100 10 1 ..... 0011.1 ..... ..... @rrxr_3a esz=2
+SQDMLSLT_zzxw_d 01000100 11 1 ..... 0011.1 ..... ..... @rrxr_2a esz=3
+
+# SVE2 complex integer dot product (indexed)
+CDOT_zzxw_s 01000100 10 1 index:2 rm:3 0100 rot:2 rn:5 rd:5 \
+ ra=%reg_movprfx
+CDOT_zzxw_d 01000100 11 1 index:1 rm:4 0100 rot:2 rn:5 rd:5 \
+ ra=%reg_movprfx
+
+# SVE2 complex integer multiply-add (indexed)
+CMLA_zzxz_h 01000100 10 1 index:2 rm:3 0110 rot:2 rn:5 rd:5 \
+ ra=%reg_movprfx
+CMLA_zzxz_s 01000100 11 1 index:1 rm:4 0110 rot:2 rn:5 rd:5 \
+ ra=%reg_movprfx
+
+# SVE2 complex saturating integer multiply-add (indexed)
+SQRDCMLAH_zzxz_h 01000100 10 1 index:2 rm:3 0111 rot:2 rn:5 rd:5 \
+ ra=%reg_movprfx
+SQRDCMLAH_zzxz_s 01000100 11 1 index:1 rm:4 0111 rot:2 rn:5 rd:5 \
+ ra=%reg_movprfx
+
+# SVE2 multiply-add long (indexed)
+SMLALB_zzxw_s 01000100 10 1 ..... 1000.0 ..... ..... @rrxr_3a esz=2
+SMLALB_zzxw_d 01000100 11 1 ..... 1000.0 ..... ..... @rrxr_2a esz=3
+SMLALT_zzxw_s 01000100 10 1 ..... 1000.1 ..... ..... @rrxr_3a esz=2
+SMLALT_zzxw_d 01000100 11 1 ..... 1000.1 ..... ..... @rrxr_2a esz=3
+UMLALB_zzxw_s 01000100 10 1 ..... 1001.0 ..... ..... @rrxr_3a esz=2
+UMLALB_zzxw_d 01000100 11 1 ..... 1001.0 ..... ..... @rrxr_2a esz=3
+UMLALT_zzxw_s 01000100 10 1 ..... 1001.1 ..... ..... @rrxr_3a esz=2
+UMLALT_zzxw_d 01000100 11 1 ..... 1001.1 ..... ..... @rrxr_2a esz=3
+SMLSLB_zzxw_s 01000100 10 1 ..... 1010.0 ..... ..... @rrxr_3a esz=2
+SMLSLB_zzxw_d 01000100 11 1 ..... 1010.0 ..... ..... @rrxr_2a esz=3
+SMLSLT_zzxw_s 01000100 10 1 ..... 1010.1 ..... ..... @rrxr_3a esz=2
+SMLSLT_zzxw_d 01000100 11 1 ..... 1010.1 ..... ..... @rrxr_2a esz=3
+UMLSLB_zzxw_s 01000100 10 1 ..... 1011.0 ..... ..... @rrxr_3a esz=2
+UMLSLB_zzxw_d 01000100 11 1 ..... 1011.0 ..... ..... @rrxr_2a esz=3
+UMLSLT_zzxw_s 01000100 10 1 ..... 1011.1 ..... ..... @rrxr_3a esz=2
+UMLSLT_zzxw_d 01000100 11 1 ..... 1011.1 ..... ..... @rrxr_2a esz=3
+
+# SVE2 integer multiply long (indexed)
+SMULLB_zzx_s 01000100 10 1 ..... 1100.0 ..... ..... @rrx_3a esz=2
+SMULLB_zzx_d 01000100 11 1 ..... 1100.0 ..... ..... @rrx_2a esz=3
+SMULLT_zzx_s 01000100 10 1 ..... 1100.1 ..... ..... @rrx_3a esz=2
+SMULLT_zzx_d 01000100 11 1 ..... 1100.1 ..... ..... @rrx_2a esz=3
+UMULLB_zzx_s 01000100 10 1 ..... 1101.0 ..... ..... @rrx_3a esz=2
+UMULLB_zzx_d 01000100 11 1 ..... 1101.0 ..... ..... @rrx_2a esz=3
+UMULLT_zzx_s 01000100 10 1 ..... 1101.1 ..... ..... @rrx_3a esz=2
+UMULLT_zzx_d 01000100 11 1 ..... 1101.1 ..... ..... @rrx_2a esz=3
+
+# SVE2 saturating multiply (indexed)
+SQDMULLB_zzx_s 01000100 10 1 ..... 1110.0 ..... ..... @rrx_3a esz=2
+SQDMULLB_zzx_d 01000100 11 1 ..... 1110.0 ..... ..... @rrx_2a esz=3
+SQDMULLT_zzx_s 01000100 10 1 ..... 1110.1 ..... ..... @rrx_3a esz=2
+SQDMULLT_zzx_d 01000100 11 1 ..... 1110.1 ..... ..... @rrx_2a esz=3
+
+# SVE2 saturating multiply high (indexed)
+SQDMULH_zzx_h 01000100 0. 1 ..... 111100 ..... ..... @rrx_3 esz=1
+SQDMULH_zzx_s 01000100 10 1 ..... 111100 ..... ..... @rrx_2 esz=2
+SQDMULH_zzx_d 01000100 11 1 ..... 111100 ..... ..... @rrx_1 esz=3
+SQRDMULH_zzx_h 01000100 0. 1 ..... 111101 ..... ..... @rrx_3 esz=1
+SQRDMULH_zzx_s 01000100 10 1 ..... 111101 ..... ..... @rrx_2 esz=2
+SQRDMULH_zzx_d 01000100 11 1 ..... 111101 ..... ..... @rrx_1 esz=3
+
+# SVE2 integer multiply (indexed)
+MUL_zzx_h 01000100 0. 1 ..... 111110 ..... ..... @rrx_3 esz=1
+MUL_zzx_s 01000100 10 1 ..... 111110 ..... ..... @rrx_2 esz=2
+MUL_zzx_d 01000100 11 1 ..... 111110 ..... ..... @rrx_1 esz=3
# SVE floating-point complex add (predicated)
FCADD 01100100 esz:2 00000 rot:1 100 pg:3 rm:5 rd:5 \
@@ -751,20 +968,19 @@ FCMLA_zzxz 01100100 11 1 index:1 rm:4 0001 rot:2 rn:5 rd:5 \
### SVE FP Multiply-Add Indexed Group
# SVE floating-point multiply-add (indexed)
-FMLA_zzxz 01100100 0.1 .. rm:3 00000 sub:1 rn:5 rd:5 \
- ra=%reg_movprfx index=%index3_22_19 esz=1
-FMLA_zzxz 01100100 101 index:2 rm:3 00000 sub:1 rn:5 rd:5 \
- ra=%reg_movprfx esz=2
-FMLA_zzxz 01100100 111 index:1 rm:4 00000 sub:1 rn:5 rd:5 \
- ra=%reg_movprfx esz=3
+FMLA_zzxz 01100100 0. 1 ..... 000000 ..... ..... @rrxr_3 esz=1
+FMLA_zzxz 01100100 10 1 ..... 000000 ..... ..... @rrxr_2 esz=2
+FMLA_zzxz 01100100 11 1 ..... 000000 ..... ..... @rrxr_1 esz=3
+FMLS_zzxz 01100100 0. 1 ..... 000001 ..... ..... @rrxr_3 esz=1
+FMLS_zzxz 01100100 10 1 ..... 000001 ..... ..... @rrxr_2 esz=2
+FMLS_zzxz 01100100 11 1 ..... 000001 ..... ..... @rrxr_1 esz=3
### SVE FP Multiply Indexed Group
# SVE floating-point multiply (indexed)
-FMUL_zzx 01100100 0.1 .. rm:3 001000 rn:5 rd:5 \
- index=%index3_22_19 esz=1
-FMUL_zzx 01100100 101 index:2 rm:3 001000 rn:5 rd:5 esz=2
-FMUL_zzx 01100100 111 index:1 rm:4 001000 rn:5 rd:5 esz=3
+FMUL_zzx 01100100 0. 1 ..... 001000 ..... ..... @rrx_3 esz=1
+FMUL_zzx 01100100 10 1 ..... 001000 ..... ..... @rrx_2 esz=2
+FMUL_zzx 01100100 11 1 ..... 001000 ..... ..... @rrx_1 esz=3
### SVE FP Fast Reduction Group
@@ -854,6 +1070,7 @@ FNMLS_zpzzz 01100101 .. 1 ..... 111 ... ..... ..... @rdn_pg_rm_ra
# SVE floating-point convert precision
FCVT_sh 01100101 10 0010 00 101 ... ..... ..... @rd_pg_rn_e0
FCVT_hs 01100101 10 0010 01 101 ... ..... ..... @rd_pg_rn_e0
+BFCVT 01100101 10 0010 10 101 ... ..... ..... @rd_pg_rn_e0
FCVT_dh 01100101 11 0010 00 101 ... ..... ..... @rd_pg_rn_e0
FCVT_hd 01100101 11 0010 01 101 ... ..... ..... @rd_pg_rn_e0
FCVT_ds 01100101 11 0010 10 101 ... ..... ..... @rd_pg_rn_e0
@@ -959,17 +1176,21 @@ LD_zpri 1010010 .. nreg:2 0.... 111 ... ..... ..... @rpri_load_msz
# SVE load and broadcast quadword (scalar plus scalar)
LD1RQ_zprr 1010010 .. 00 ..... 000 ... ..... ..... \
@rprr_load_msz nreg=0
+LD1RO_zprr 1010010 .. 01 ..... 000 ... ..... ..... \
+ @rprr_load_msz nreg=0
# SVE load and broadcast quadword (scalar plus immediate)
# LD1RQB, LD1RQH, LD1RQS, LD1RQD
LD1RQ_zpri 1010010 .. 00 0.... 001 ... ..... ..... \
@rpri_load_msz nreg=0
+LD1RO_zpri 1010010 .. 01 0.... 001 ... ..... ..... \
+ @rpri_load_msz nreg=0
# SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
-PRF 1000010 00 -1 ----- 0-- --- ----- 0 ----
+PRF_ns 1000010 00 -1 ----- 0-- --- ----- 0 ----
# SVE 32-bit gather prefetch (vector plus immediate)
-PRF 1000010 -- 00 ----- 111 --- ----- 0 ----
+PRF_ns 1000010 -- 00 ----- 111 --- ----- 0 ----
# SVE contiguous prefetch (scalar plus immediate)
PRF 1000010 11 1- ----- 0-- --- ----- 0 ----
@@ -1006,13 +1227,13 @@ LD1_zpiz 1100010 .. 01 ..... 1.. ... ..... ..... \
@rpri_g_load esz=3
# SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
-PRF 1100010 00 11 ----- 1-- --- ----- 0 ----
+PRF_ns 1100010 00 11 ----- 1-- --- ----- 0 ----
# SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
-PRF 1100010 00 -1 ----- 0-- --- ----- 0 ----
+PRF_ns 1100010 00 -1 ----- 0-- --- ----- 0 ----
# SVE 64-bit gather prefetch (vector plus immediate)
-PRF 1100010 -- 00 ----- 111 --- ----- 0 ----
+PRF_ns 1100010 -- 00 ----- 111 --- ----- 0 ----
### SVE Memory Store Group
@@ -1092,3 +1313,390 @@ ST1_zprz 1110010 .. 00 ..... 100 ... ..... ..... \
@rprr_scatter_store xs=0 esz=3 scale=0
ST1_zprz 1110010 .. 00 ..... 110 ... ..... ..... \
@rprr_scatter_store xs=1 esz=3 scale=0
+
+#### SVE2 Support
+
+### SVE2 Integer Multiply - Unpredicated
+
+# SVE2 integer multiply vectors (unpredicated)
+MUL_zzz 00000100 .. 1 ..... 0110 00 ..... ..... @rd_rn_rm
+SMULH_zzz 00000100 .. 1 ..... 0110 10 ..... ..... @rd_rn_rm
+UMULH_zzz 00000100 .. 1 ..... 0110 11 ..... ..... @rd_rn_rm
+PMUL_zzz 00000100 00 1 ..... 0110 01 ..... ..... @rd_rn_rm_e0
+
+# SVE2 signed saturating doubling multiply high (unpredicated)
+SQDMULH_zzz 00000100 .. 1 ..... 0111 00 ..... ..... @rd_rn_rm
+SQRDMULH_zzz 00000100 .. 1 ..... 0111 01 ..... ..... @rd_rn_rm
+
+### SVE2 Integer - Predicated
+
+SADALP_zpzz 01000100 .. 000 100 101 ... ..... ..... @rdm_pg_rn
+UADALP_zpzz 01000100 .. 000 101 101 ... ..... ..... @rdm_pg_rn
+
+### SVE2 integer unary operations (predicated)
+
+URECPE 01000100 .. 000 000 101 ... ..... ..... @rd_pg_rn
+URSQRTE 01000100 .. 000 001 101 ... ..... ..... @rd_pg_rn
+SQABS 01000100 .. 001 000 101 ... ..... ..... @rd_pg_rn
+SQNEG 01000100 .. 001 001 101 ... ..... ..... @rd_pg_rn
+
+### SVE2 saturating/rounding bitwise shift left (predicated)
+
+SRSHL 01000100 .. 000 010 100 ... ..... ..... @rdn_pg_rm
+URSHL 01000100 .. 000 011 100 ... ..... ..... @rdn_pg_rm
+SRSHL 01000100 .. 000 110 100 ... ..... ..... @rdm_pg_rn # SRSHLR
+URSHL 01000100 .. 000 111 100 ... ..... ..... @rdm_pg_rn # URSHLR
+
+SQSHL 01000100 .. 001 000 100 ... ..... ..... @rdn_pg_rm
+UQSHL 01000100 .. 001 001 100 ... ..... ..... @rdn_pg_rm
+SQSHL 01000100 .. 001 100 100 ... ..... ..... @rdm_pg_rn # SQSHLR
+UQSHL 01000100 .. 001 101 100 ... ..... ..... @rdm_pg_rn # UQSHLR
+
+SQRSHL 01000100 .. 001 010 100 ... ..... ..... @rdn_pg_rm
+UQRSHL 01000100 .. 001 011 100 ... ..... ..... @rdn_pg_rm
+SQRSHL 01000100 .. 001 110 100 ... ..... ..... @rdm_pg_rn # SQRSHLR
+UQRSHL 01000100 .. 001 111 100 ... ..... ..... @rdm_pg_rn # UQRSHLR
+
+### SVE2 integer halving add/subtract (predicated)
+
+SHADD 01000100 .. 010 000 100 ... ..... ..... @rdn_pg_rm
+UHADD 01000100 .. 010 001 100 ... ..... ..... @rdn_pg_rm
+SHSUB 01000100 .. 010 010 100 ... ..... ..... @rdn_pg_rm
+UHSUB 01000100 .. 010 011 100 ... ..... ..... @rdn_pg_rm
+SRHADD 01000100 .. 010 100 100 ... ..... ..... @rdn_pg_rm
+URHADD 01000100 .. 010 101 100 ... ..... ..... @rdn_pg_rm
+SHSUB 01000100 .. 010 110 100 ... ..... ..... @rdm_pg_rn # SHSUBR
+UHSUB 01000100 .. 010 111 100 ... ..... ..... @rdm_pg_rn # UHSUBR
+
+### SVE2 integer pairwise arithmetic
+
+ADDP 01000100 .. 010 001 101 ... ..... ..... @rdn_pg_rm
+SMAXP 01000100 .. 010 100 101 ... ..... ..... @rdn_pg_rm
+UMAXP 01000100 .. 010 101 101 ... ..... ..... @rdn_pg_rm
+SMINP 01000100 .. 010 110 101 ... ..... ..... @rdn_pg_rm
+UMINP 01000100 .. 010 111 101 ... ..... ..... @rdn_pg_rm
+
+### SVE2 saturating add/subtract (predicated)
+
+SQADD_zpzz 01000100 .. 011 000 100 ... ..... ..... @rdn_pg_rm
+UQADD_zpzz 01000100 .. 011 001 100 ... ..... ..... @rdn_pg_rm
+SQSUB_zpzz 01000100 .. 011 010 100 ... ..... ..... @rdn_pg_rm
+UQSUB_zpzz 01000100 .. 011 011 100 ... ..... ..... @rdn_pg_rm
+SUQADD 01000100 .. 011 100 100 ... ..... ..... @rdn_pg_rm
+USQADD 01000100 .. 011 101 100 ... ..... ..... @rdn_pg_rm
+SQSUB_zpzz 01000100 .. 011 110 100 ... ..... ..... @rdm_pg_rn # SQSUBR
+UQSUB_zpzz 01000100 .. 011 111 100 ... ..... ..... @rdm_pg_rn # UQSUBR
+
+#### SVE2 Widening Integer Arithmetic
+
+## SVE2 integer add/subtract long
+
+SADDLB 01000101 .. 0 ..... 00 0000 ..... ..... @rd_rn_rm
+SADDLT 01000101 .. 0 ..... 00 0001 ..... ..... @rd_rn_rm
+UADDLB 01000101 .. 0 ..... 00 0010 ..... ..... @rd_rn_rm
+UADDLT 01000101 .. 0 ..... 00 0011 ..... ..... @rd_rn_rm
+
+SSUBLB 01000101 .. 0 ..... 00 0100 ..... ..... @rd_rn_rm
+SSUBLT 01000101 .. 0 ..... 00 0101 ..... ..... @rd_rn_rm
+USUBLB 01000101 .. 0 ..... 00 0110 ..... ..... @rd_rn_rm
+USUBLT 01000101 .. 0 ..... 00 0111 ..... ..... @rd_rn_rm
+
+SABDLB 01000101 .. 0 ..... 00 1100 ..... ..... @rd_rn_rm
+SABDLT 01000101 .. 0 ..... 00 1101 ..... ..... @rd_rn_rm
+UABDLB 01000101 .. 0 ..... 00 1110 ..... ..... @rd_rn_rm
+UABDLT 01000101 .. 0 ..... 00 1111 ..... ..... @rd_rn_rm
+
+## SVE2 integer add/subtract interleaved long
+
+SADDLBT 01000101 .. 0 ..... 1000 00 ..... ..... @rd_rn_rm
+SSUBLBT 01000101 .. 0 ..... 1000 10 ..... ..... @rd_rn_rm
+SSUBLTB 01000101 .. 0 ..... 1000 11 ..... ..... @rd_rn_rm
+
+## SVE2 integer add/subtract wide
+
+SADDWB 01000101 .. 0 ..... 010 000 ..... ..... @rd_rn_rm
+SADDWT 01000101 .. 0 ..... 010 001 ..... ..... @rd_rn_rm
+UADDWB 01000101 .. 0 ..... 010 010 ..... ..... @rd_rn_rm
+UADDWT 01000101 .. 0 ..... 010 011 ..... ..... @rd_rn_rm
+
+SSUBWB 01000101 .. 0 ..... 010 100 ..... ..... @rd_rn_rm
+SSUBWT 01000101 .. 0 ..... 010 101 ..... ..... @rd_rn_rm
+USUBWB 01000101 .. 0 ..... 010 110 ..... ..... @rd_rn_rm
+USUBWT 01000101 .. 0 ..... 010 111 ..... ..... @rd_rn_rm
+
+## SVE2 integer multiply long
+
+SQDMULLB_zzz 01000101 .. 0 ..... 011 000 ..... ..... @rd_rn_rm
+SQDMULLT_zzz 01000101 .. 0 ..... 011 001 ..... ..... @rd_rn_rm
+PMULLB 01000101 .. 0 ..... 011 010 ..... ..... @rd_rn_rm
+PMULLT 01000101 .. 0 ..... 011 011 ..... ..... @rd_rn_rm
+SMULLB_zzz 01000101 .. 0 ..... 011 100 ..... ..... @rd_rn_rm
+SMULLT_zzz 01000101 .. 0 ..... 011 101 ..... ..... @rd_rn_rm
+UMULLB_zzz 01000101 .. 0 ..... 011 110 ..... ..... @rd_rn_rm
+UMULLT_zzz 01000101 .. 0 ..... 011 111 ..... ..... @rd_rn_rm
+
+## SVE2 bitwise shift left long
+
+# Note bit23 == 0 is handled by esz > 0 in do_sve2_shll_tb.
+SSHLLB 01000101 .. 0 ..... 1010 00 ..... ..... @rd_rn_tszimm_shl
+SSHLLT 01000101 .. 0 ..... 1010 01 ..... ..... @rd_rn_tszimm_shl
+USHLLB 01000101 .. 0 ..... 1010 10 ..... ..... @rd_rn_tszimm_shl
+USHLLT 01000101 .. 0 ..... 1010 11 ..... ..... @rd_rn_tszimm_shl
+
+## SVE2 bitwise exclusive-or interleaved
+
+EORBT 01000101 .. 0 ..... 10010 0 ..... ..... @rd_rn_rm
+EORTB 01000101 .. 0 ..... 10010 1 ..... ..... @rd_rn_rm
+
+## SVE integer matrix multiply accumulate
+
+SMMLA 01000101 00 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0
+USMMLA 01000101 10 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0
+UMMLA 01000101 11 0 ..... 10011 0 ..... ..... @rda_rn_rm_e0
+
+## SVE2 bitwise permute
+
+BEXT 01000101 .. 0 ..... 1011 00 ..... ..... @rd_rn_rm
+BDEP 01000101 .. 0 ..... 1011 01 ..... ..... @rd_rn_rm
+BGRP 01000101 .. 0 ..... 1011 10 ..... ..... @rd_rn_rm
+
+#### SVE2 Accumulate
+
+## SVE2 complex integer add
+
+CADD_rot90 01000101 .. 00000 0 11011 0 ..... ..... @rdn_rm
+CADD_rot270 01000101 .. 00000 0 11011 1 ..... ..... @rdn_rm
+SQCADD_rot90 01000101 .. 00000 1 11011 0 ..... ..... @rdn_rm
+SQCADD_rot270 01000101 .. 00000 1 11011 1 ..... ..... @rdn_rm
+
+## SVE2 integer absolute difference and accumulate long
+
+SABALB 01000101 .. 0 ..... 1100 00 ..... ..... @rda_rn_rm
+SABALT 01000101 .. 0 ..... 1100 01 ..... ..... @rda_rn_rm
+UABALB 01000101 .. 0 ..... 1100 10 ..... ..... @rda_rn_rm
+UABALT 01000101 .. 0 ..... 1100 11 ..... ..... @rda_rn_rm
+
+## SVE2 integer add/subtract long with carry
+
+# ADC and SBC decoded via size in helper dispatch.
+ADCLB 01000101 .. 0 ..... 11010 0 ..... ..... @rda_rn_rm
+ADCLT 01000101 .. 0 ..... 11010 1 ..... ..... @rda_rn_rm
+
+## SVE2 bitwise shift right and accumulate
+
+# TODO: Use @rda and %reg_movprfx here.
+SSRA 01000101 .. 0 ..... 1110 00 ..... ..... @rd_rn_tszimm_shr
+USRA 01000101 .. 0 ..... 1110 01 ..... ..... @rd_rn_tszimm_shr
+SRSRA 01000101 .. 0 ..... 1110 10 ..... ..... @rd_rn_tszimm_shr
+URSRA 01000101 .. 0 ..... 1110 11 ..... ..... @rd_rn_tszimm_shr
+
+## SVE2 bitwise shift and insert
+
+SRI 01000101 .. 0 ..... 11110 0 ..... ..... @rd_rn_tszimm_shr
+SLI 01000101 .. 0 ..... 11110 1 ..... ..... @rd_rn_tszimm_shl
+
+## SVE2 integer absolute difference and accumulate
+
+# TODO: Use @rda and %reg_movprfx here.
+SABA 01000101 .. 0 ..... 11111 0 ..... ..... @rd_rn_rm
+UABA 01000101 .. 0 ..... 11111 1 ..... ..... @rd_rn_rm
+
+#### SVE2 Narrowing
+
+## SVE2 saturating extract narrow
+
+# Bits 23, 18-16 are zero, limited in the translator via esz < 3 & imm == 0.
+SQXTNB 01000101 .. 1 ..... 010 000 ..... ..... @rd_rn_tszimm_shl
+SQXTNT 01000101 .. 1 ..... 010 001 ..... ..... @rd_rn_tszimm_shl
+UQXTNB 01000101 .. 1 ..... 010 010 ..... ..... @rd_rn_tszimm_shl
+UQXTNT 01000101 .. 1 ..... 010 011 ..... ..... @rd_rn_tszimm_shl
+SQXTUNB 01000101 .. 1 ..... 010 100 ..... ..... @rd_rn_tszimm_shl
+SQXTUNT 01000101 .. 1 ..... 010 101 ..... ..... @rd_rn_tszimm_shl
+
+## SVE2 bitwise shift right narrow
+
+# Bit 23 == 0 is handled by esz > 0 in the translator.
+SQSHRUNB 01000101 .. 1 ..... 00 0000 ..... ..... @rd_rn_tszimm_shr
+SQSHRUNT 01000101 .. 1 ..... 00 0001 ..... ..... @rd_rn_tszimm_shr
+SQRSHRUNB 01000101 .. 1 ..... 00 0010 ..... ..... @rd_rn_tszimm_shr
+SQRSHRUNT 01000101 .. 1 ..... 00 0011 ..... ..... @rd_rn_tszimm_shr
+SHRNB 01000101 .. 1 ..... 00 0100 ..... ..... @rd_rn_tszimm_shr
+SHRNT 01000101 .. 1 ..... 00 0101 ..... ..... @rd_rn_tszimm_shr
+RSHRNB 01000101 .. 1 ..... 00 0110 ..... ..... @rd_rn_tszimm_shr
+RSHRNT 01000101 .. 1 ..... 00 0111 ..... ..... @rd_rn_tszimm_shr
+SQSHRNB 01000101 .. 1 ..... 00 1000 ..... ..... @rd_rn_tszimm_shr
+SQSHRNT 01000101 .. 1 ..... 00 1001 ..... ..... @rd_rn_tszimm_shr
+SQRSHRNB 01000101 .. 1 ..... 00 1010 ..... ..... @rd_rn_tszimm_shr
+SQRSHRNT 01000101 .. 1 ..... 00 1011 ..... ..... @rd_rn_tszimm_shr
+UQSHRNB 01000101 .. 1 ..... 00 1100 ..... ..... @rd_rn_tszimm_shr
+UQSHRNT 01000101 .. 1 ..... 00 1101 ..... ..... @rd_rn_tszimm_shr
+UQRSHRNB 01000101 .. 1 ..... 00 1110 ..... ..... @rd_rn_tszimm_shr
+UQRSHRNT 01000101 .. 1 ..... 00 1111 ..... ..... @rd_rn_tszimm_shr
+
+## SVE2 integer add/subtract narrow high part
+
+ADDHNB 01000101 .. 1 ..... 011 000 ..... ..... @rd_rn_rm
+ADDHNT 01000101 .. 1 ..... 011 001 ..... ..... @rd_rn_rm
+RADDHNB 01000101 .. 1 ..... 011 010 ..... ..... @rd_rn_rm
+RADDHNT 01000101 .. 1 ..... 011 011 ..... ..... @rd_rn_rm
+SUBHNB 01000101 .. 1 ..... 011 100 ..... ..... @rd_rn_rm
+SUBHNT 01000101 .. 1 ..... 011 101 ..... ..... @rd_rn_rm
+RSUBHNB 01000101 .. 1 ..... 011 110 ..... ..... @rd_rn_rm
+RSUBHNT 01000101 .. 1 ..... 011 111 ..... ..... @rd_rn_rm
+
+### SVE2 Character Match
+
+MATCH 01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
+NMATCH 01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
+
+### SVE2 Histogram Computation
+
+HISTCNT 01000101 .. 1 ..... 110 ... ..... ..... @rd_pg_rn_rm
+HISTSEG 01000101 .. 1 ..... 101 000 ..... ..... @rd_rn_rm
+
+## SVE2 floating-point pairwise operations
+
+FADDP 01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm
+FMAXNMP 01100100 .. 010 10 0 100 ... ..... ..... @rdn_pg_rm
+FMINNMP 01100100 .. 010 10 1 100 ... ..... ..... @rdn_pg_rm
+FMAXP 01100100 .. 010 11 0 100 ... ..... ..... @rdn_pg_rm
+FMINP 01100100 .. 010 11 1 100 ... ..... ..... @rdn_pg_rm
+
+#### SVE Integer Multiply-Add (unpredicated)
+
+## SVE2 saturating multiply-add long
+
+SQDMLALB_zzzw 01000100 .. 0 ..... 0110 00 ..... ..... @rda_rn_rm
+SQDMLALT_zzzw 01000100 .. 0 ..... 0110 01 ..... ..... @rda_rn_rm
+SQDMLSLB_zzzw 01000100 .. 0 ..... 0110 10 ..... ..... @rda_rn_rm
+SQDMLSLT_zzzw 01000100 .. 0 ..... 0110 11 ..... ..... @rda_rn_rm
+
+## SVE2 saturating multiply-add interleaved long
+
+SQDMLALBT 01000100 .. 0 ..... 00001 0 ..... ..... @rda_rn_rm
+SQDMLSLBT 01000100 .. 0 ..... 00001 1 ..... ..... @rda_rn_rm
+
+## SVE2 saturating multiply-add high
+
+SQRDMLAH_zzzz 01000100 .. 0 ..... 01110 0 ..... ..... @rda_rn_rm
+SQRDMLSH_zzzz 01000100 .. 0 ..... 01110 1 ..... ..... @rda_rn_rm
+
+## SVE2 integer multiply-add long
+
+SMLALB_zzzw 01000100 .. 0 ..... 010 000 ..... ..... @rda_rn_rm
+SMLALT_zzzw 01000100 .. 0 ..... 010 001 ..... ..... @rda_rn_rm
+UMLALB_zzzw 01000100 .. 0 ..... 010 010 ..... ..... @rda_rn_rm
+UMLALT_zzzw 01000100 .. 0 ..... 010 011 ..... ..... @rda_rn_rm
+SMLSLB_zzzw 01000100 .. 0 ..... 010 100 ..... ..... @rda_rn_rm
+SMLSLT_zzzw 01000100 .. 0 ..... 010 101 ..... ..... @rda_rn_rm
+UMLSLB_zzzw 01000100 .. 0 ..... 010 110 ..... ..... @rda_rn_rm
+UMLSLT_zzzw 01000100 .. 0 ..... 010 111 ..... ..... @rda_rn_rm
+
+## SVE2 complex integer multiply-add
+
+CMLA_zzzz 01000100 esz:2 0 rm:5 0010 rot:2 rn:5 rd:5 ra=%reg_movprfx
+SQRDCMLAH_zzzz 01000100 esz:2 0 rm:5 0011 rot:2 rn:5 rd:5 ra=%reg_movprfx
+
+## SVE mixed sign dot product
+
+USDOT_zzzz 01000100 .. 0 ..... 011 110 ..... ..... @rda_rn_rm
+
+### SVE2 floating point matrix multiply accumulate
+BFMMLA 01100100 01 1 ..... 111 001 ..... ..... @rda_rn_rm_e0
+FMMLA_s 01100100 10 1 ..... 111 001 ..... ..... @rda_rn_rm_e0
+FMMLA_d 01100100 11 1 ..... 111 001 ..... ..... @rda_rn_rm_e0
+
+### SVE2 Memory Gather Load Group
+
+# SVE2 64-bit gather non-temporal load (scalar plus 64-bit unscaled offsets)
+LDNT1_zprz 1100010 msz:2 00 rm:5 1 u:1 0 pg:3 rn:5 rd:5 \
+ &rprr_gather_load xs=2 esz=3 scale=0 ff=0
+
+# SVE2 32-bit gather non-temporal load (scalar plus 32-bit unscaled offsets)
+LDNT1_zprz 1000010 msz:2 00 rm:5 10 u:1 pg:3 rn:5 rd:5 \
+ &rprr_gather_load xs=0 esz=2 scale=0 ff=0
+
+### SVE2 Memory Store Group
+
+# SVE2 64-bit scatter non-temporal store (vector plus scalar)
+STNT1_zprz 1110010 .. 00 ..... 001 ... ..... ..... \
+ @rprr_scatter_store xs=2 esz=3 scale=0
+
+# SVE2 32-bit scatter non-temporal store (vector plus scalar)
+STNT1_zprz 1110010 .. 10 ..... 001 ... ..... ..... \
+ @rprr_scatter_store xs=0 esz=2 scale=0
+
+### SVE2 Crypto Extensions
+
+# SVE2 crypto unary operations
+AESMC 01000101 00 10000011100 0 00000 rd:5
+AESIMC 01000101 00 10000011100 1 00000 rd:5
+
+# SVE2 crypto destructive binary operations
+AESE 01000101 00 10001 0 11100 0 ..... ..... @rdn_rm_e0
+AESD 01000101 00 10001 0 11100 1 ..... ..... @rdn_rm_e0
+SM4E 01000101 00 10001 1 11100 0 ..... ..... @rdn_rm_e0
+
+# SVE2 crypto constructive binary operations
+SM4EKEY 01000101 00 1 ..... 11110 0 ..... ..... @rd_rn_rm_e0
+RAX1 01000101 00 1 ..... 11110 1 ..... ..... @rd_rn_rm_e0
+
+### SVE2 floating-point convert precision odd elements
+FCVTXNT_ds 01100100 00 0010 10 101 ... ..... ..... @rd_pg_rn_e0
+FCVTX_ds 01100101 00 0010 10 101 ... ..... ..... @rd_pg_rn_e0
+FCVTNT_sh 01100100 10 0010 00 101 ... ..... ..... @rd_pg_rn_e0
+BFCVTNT 01100100 10 0010 10 101 ... ..... ..... @rd_pg_rn_e0
+FCVTLT_hs 01100100 10 0010 01 101 ... ..... ..... @rd_pg_rn_e0
+FCVTNT_ds 01100100 11 0010 10 101 ... ..... ..... @rd_pg_rn_e0
+FCVTLT_sd 01100100 11 0010 11 101 ... ..... ..... @rd_pg_rn_e0
+
+### SVE2 floating-point convert to integer
+FLOGB 01100101 00 011 esz:2 0101 pg:3 rn:5 rd:5 &rpr_esz
+
+### SVE2 floating-point multiply-add long (vectors)
+FMLALB_zzzw 01100100 10 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
+FMLALT_zzzw 01100100 10 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0
+FMLSLB_zzzw 01100100 10 1 ..... 10 1 00 0 ..... ..... @rda_rn_rm_e0
+FMLSLT_zzzw 01100100 10 1 ..... 10 1 00 1 ..... ..... @rda_rn_rm_e0
+
+BFMLALB_zzzw 01100100 11 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
+BFMLALT_zzzw 01100100 11 1 ..... 10 0 00 1 ..... ..... @rda_rn_rm_e0
+
+### SVE2 floating-point bfloat16 dot-product
+BFDOT_zzzz 01100100 01 1 ..... 10 0 00 0 ..... ..... @rda_rn_rm_e0
+
+### SVE2 floating-point multiply-add long (indexed)
+FMLALB_zzxw 01100100 10 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2
+FMLALT_zzxw 01100100 10 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
+FMLSLB_zzxw 01100100 10 1 ..... 0110.0 ..... ..... @rrxr_3a esz=2
+FMLSLT_zzxw 01100100 10 1 ..... 0110.1 ..... ..... @rrxr_3a esz=2
+BFMLALB_zzxw 01100100 11 1 ..... 0100.0 ..... ..... @rrxr_3a esz=2
+BFMLALT_zzxw 01100100 11 1 ..... 0100.1 ..... ..... @rrxr_3a esz=2
+
+### SVE2 floating-point bfloat16 dot-product (indexed)
+BFDOT_zzxz 01100100 01 1 ..... 010000 ..... ..... @rrxr_2 esz=2
+
+### SVE broadcast predicate element
+
+&psel esz pd pn pm rv imm
+%psel_rv 16:2 !function=plus_12
+%psel_imm_b 22:2 19:2
+%psel_imm_h 22:2 20:1
+%psel_imm_s 22:2
+%psel_imm_d 23:1
+@psel ........ .. . ... .. .. pn:4 . pm:4 . pd:4 \
+ &psel rv=%psel_rv
+
+PSEL 00100101 .. 1 ..1 .. 01 .... 0 .... 0 .... \
+ @psel esz=0 imm=%psel_imm_b
+PSEL 00100101 .. 1 .10 .. 01 .... 0 .... 0 .... \
+ @psel esz=1 imm=%psel_imm_h
+PSEL 00100101 .. 1 100 .. 01 .... 0 .... 0 .... \
+ @psel esz=2 imm=%psel_imm_s
+PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \
+ @psel esz=3 imm=%psel_imm_d
+
+### SVE clamp
+
+SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm
+UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
new file mode 100644
index 0000000000..6853f58c19
--- /dev/null
+++ b/target/arm/tcg/sve_helper.c
@@ -0,0 +1,7486 @@
+/*
+ * ARM SVE Operations
+ *
+ * Copyright (c) 2018 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "fpu/softfloat.h"
+#include "tcg/tcg.h"
+#include "vec_internal.h"
+#include "sve_ldst_internal.h"
+#include "hw/core/tcg-cpu-ops.h"
+
+
+/* Return a value for NZCV as per the ARM PredTest pseudofunction.
+ *
+ * The return value has bit 31 set if N is set, bit 1 set if Z is clear,
+ * and bit 0 set if C is set. Compare the definitions of these variables
+ * within CPUARMState.
+ */
+
+/* For no G bits set, NZCV = C. */
+#define PREDTEST_INIT 1
+
+/* This is an iterative function, called for each Pd and Pg word
+ * moving forward.
+ */
+static uint32_t iter_predtest_fwd(uint64_t d, uint64_t g, uint32_t flags)
+{
+ if (likely(g)) {
+ /* Compute N from first D & G.
+ Use bit 2 to signal first G bit seen. */
+ if (!(flags & 4)) {
+ flags |= ((d & (g & -g)) != 0) << 31;
+ flags |= 4;
+ }
+
+ /* Accumulate Z from each D & G. */
+ flags |= ((d & g) != 0) << 1;
+
+ /* Compute C from last !(D & G). Replace previous. */
+ flags = deposit32(flags, 0, 1, (d & pow2floor(g)) == 0);
+ }
+ return flags;
+}
+
+/* This is an iterative function, called for each Pd and Pg word
+ * moving backward.
+ */
+static uint32_t iter_predtest_bwd(uint64_t d, uint64_t g, uint32_t flags)
+{
+ if (likely(g)) {
+ /* Compute C from first (i.e last) !(D & G).
+ Use bit 2 to signal first G bit seen. */
+ if (!(flags & 4)) {
+ flags += 4 - 1; /* add bit 2, subtract C from PREDTEST_INIT */
+ flags |= (d & pow2floor(g)) == 0;
+ }
+
+ /* Accumulate Z from each D & G. */
+ flags |= ((d & g) != 0) << 1;
+
+ /* Compute N from last (i.e first) D & G. Replace previous. */
+ flags = deposit32(flags, 31, 1, (d & (g & -g)) != 0);
+ }
+ return flags;
+}
+
+/* The same for a single word predicate. */
+uint32_t HELPER(sve_predtest1)(uint64_t d, uint64_t g)
+{
+ return iter_predtest_fwd(d, g, PREDTEST_INIT);
+}
+
+/* The same for a multi-word predicate. */
+uint32_t HELPER(sve_predtest)(void *vd, void *vg, uint32_t words)
+{
+ uint32_t flags = PREDTEST_INIT;
+ uint64_t *d = vd, *g = vg;
+ uintptr_t i = 0;
+
+ do {
+ flags = iter_predtest_fwd(d[i], g[i], flags);
+ } while (++i < words);
+
+ return flags;
+}
+
+/* Similarly for single word elements. */
+static inline uint64_t expand_pred_s(uint8_t byte)
+{
+ static const uint64_t word[] = {
+ [0x01] = 0x00000000ffffffffull,
+ [0x10] = 0xffffffff00000000ull,
+ [0x11] = 0xffffffffffffffffull,
+ };
+ return word[byte & 0x11];
+}
+
+#define LOGICAL_PPPP(NAME, FUNC) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ uintptr_t opr_sz = simd_oprsz(desc); \
+ uint64_t *d = vd, *n = vn, *m = vm, *g = vg; \
+ uintptr_t i; \
+ for (i = 0; i < opr_sz / 8; ++i) { \
+ d[i] = FUNC(n[i], m[i], g[i]); \
+ } \
+}
+
+#define DO_AND(N, M, G) (((N) & (M)) & (G))
+#define DO_BIC(N, M, G) (((N) & ~(M)) & (G))
+#define DO_EOR(N, M, G) (((N) ^ (M)) & (G))
+#define DO_ORR(N, M, G) (((N) | (M)) & (G))
+#define DO_ORN(N, M, G) (((N) | ~(M)) & (G))
+#define DO_NOR(N, M, G) (~((N) | (M)) & (G))
+#define DO_NAND(N, M, G) (~((N) & (M)) & (G))
+#define DO_SEL(N, M, G) (((N) & (G)) | ((M) & ~(G)))
+
+LOGICAL_PPPP(sve_and_pppp, DO_AND)
+LOGICAL_PPPP(sve_bic_pppp, DO_BIC)
+LOGICAL_PPPP(sve_eor_pppp, DO_EOR)
+LOGICAL_PPPP(sve_sel_pppp, DO_SEL)
+LOGICAL_PPPP(sve_orr_pppp, DO_ORR)
+LOGICAL_PPPP(sve_orn_pppp, DO_ORN)
+LOGICAL_PPPP(sve_nor_pppp, DO_NOR)
+LOGICAL_PPPP(sve_nand_pppp, DO_NAND)
+
+#undef DO_AND
+#undef DO_BIC
+#undef DO_EOR
+#undef DO_ORR
+#undef DO_ORN
+#undef DO_NOR
+#undef DO_NAND
+#undef DO_SEL
+#undef LOGICAL_PPPP
+
+/* Fully general three-operand expander, controlled by a predicate.
+ * This is complicated by the host-endian storage of the register file.
+ */
+/* ??? I don't expect the compiler could ever vectorize this itself.
+ * With some tables we can convert bit masks to byte masks, and with
+ * extra care wrt byte/word ordering we could use gcc generic vectors
+ * and do 16 bytes at a time.
+ */
+#define DO_ZPZZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ TYPE mm = *(TYPE *)(vm + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZZ_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE nn = n[i], mm = m[i]; \
+ d[i] = OP(nn, mm); \
+ } \
+ } \
+}
+
+#define DO_AND(N, M) (N & M)
+#define DO_EOR(N, M) (N ^ M)
+#define DO_ORR(N, M) (N | M)
+#define DO_BIC(N, M) (N & ~M)
+#define DO_ADD(N, M) (N + M)
+#define DO_SUB(N, M) (N - M)
+#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
+#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
+#define DO_ABD(N, M) ((N) >= (M) ? (N) - (M) : (M) - (N))
+#define DO_MUL(N, M) (N * M)
+
+
+/*
+ * We must avoid the C undefined behaviour cases: division by
+ * zero and signed division of INT_MIN by -1. Both of these
+ * have architecturally defined required results for Arm.
+ * We special case all signed divisions by -1 to avoid having
+ * to deduce the minimum integer for the type involved.
+ */
+#define DO_SDIV(N, M) (unlikely(M == 0) ? 0 : unlikely(M == -1) ? -N : N / M)
+#define DO_UDIV(N, M) (unlikely(M == 0) ? 0 : N / M)
+
+DO_ZPZZ(sve_and_zpzz_b, uint8_t, H1, DO_AND)
+DO_ZPZZ(sve_and_zpzz_h, uint16_t, H1_2, DO_AND)
+DO_ZPZZ(sve_and_zpzz_s, uint32_t, H1_4, DO_AND)
+DO_ZPZZ_D(sve_and_zpzz_d, uint64_t, DO_AND)
+
+DO_ZPZZ(sve_orr_zpzz_b, uint8_t, H1, DO_ORR)
+DO_ZPZZ(sve_orr_zpzz_h, uint16_t, H1_2, DO_ORR)
+DO_ZPZZ(sve_orr_zpzz_s, uint32_t, H1_4, DO_ORR)
+DO_ZPZZ_D(sve_orr_zpzz_d, uint64_t, DO_ORR)
+
+DO_ZPZZ(sve_eor_zpzz_b, uint8_t, H1, DO_EOR)
+DO_ZPZZ(sve_eor_zpzz_h, uint16_t, H1_2, DO_EOR)
+DO_ZPZZ(sve_eor_zpzz_s, uint32_t, H1_4, DO_EOR)
+DO_ZPZZ_D(sve_eor_zpzz_d, uint64_t, DO_EOR)
+
+DO_ZPZZ(sve_bic_zpzz_b, uint8_t, H1, DO_BIC)
+DO_ZPZZ(sve_bic_zpzz_h, uint16_t, H1_2, DO_BIC)
+DO_ZPZZ(sve_bic_zpzz_s, uint32_t, H1_4, DO_BIC)
+DO_ZPZZ_D(sve_bic_zpzz_d, uint64_t, DO_BIC)
+
+DO_ZPZZ(sve_add_zpzz_b, uint8_t, H1, DO_ADD)
+DO_ZPZZ(sve_add_zpzz_h, uint16_t, H1_2, DO_ADD)
+DO_ZPZZ(sve_add_zpzz_s, uint32_t, H1_4, DO_ADD)
+DO_ZPZZ_D(sve_add_zpzz_d, uint64_t, DO_ADD)
+
+DO_ZPZZ(sve_sub_zpzz_b, uint8_t, H1, DO_SUB)
+DO_ZPZZ(sve_sub_zpzz_h, uint16_t, H1_2, DO_SUB)
+DO_ZPZZ(sve_sub_zpzz_s, uint32_t, H1_4, DO_SUB)
+DO_ZPZZ_D(sve_sub_zpzz_d, uint64_t, DO_SUB)
+
+DO_ZPZZ(sve_smax_zpzz_b, int8_t, H1, DO_MAX)
+DO_ZPZZ(sve_smax_zpzz_h, int16_t, H1_2, DO_MAX)
+DO_ZPZZ(sve_smax_zpzz_s, int32_t, H1_4, DO_MAX)
+DO_ZPZZ_D(sve_smax_zpzz_d, int64_t, DO_MAX)
+
+DO_ZPZZ(sve_umax_zpzz_b, uint8_t, H1, DO_MAX)
+DO_ZPZZ(sve_umax_zpzz_h, uint16_t, H1_2, DO_MAX)
+DO_ZPZZ(sve_umax_zpzz_s, uint32_t, H1_4, DO_MAX)
+DO_ZPZZ_D(sve_umax_zpzz_d, uint64_t, DO_MAX)
+
+DO_ZPZZ(sve_smin_zpzz_b, int8_t, H1, DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_h, int16_t, H1_2, DO_MIN)
+DO_ZPZZ(sve_smin_zpzz_s, int32_t, H1_4, DO_MIN)
+DO_ZPZZ_D(sve_smin_zpzz_d, int64_t, DO_MIN)
+
+DO_ZPZZ(sve_umin_zpzz_b, uint8_t, H1, DO_MIN)
+DO_ZPZZ(sve_umin_zpzz_h, uint16_t, H1_2, DO_MIN)
+DO_ZPZZ(sve_umin_zpzz_s, uint32_t, H1_4, DO_MIN)
+DO_ZPZZ_D(sve_umin_zpzz_d, uint64_t, DO_MIN)
+
+DO_ZPZZ(sve_sabd_zpzz_b, int8_t, H1, DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_h, int16_t, H1_2, DO_ABD)
+DO_ZPZZ(sve_sabd_zpzz_s, int32_t, H1_4, DO_ABD)
+DO_ZPZZ_D(sve_sabd_zpzz_d, int64_t, DO_ABD)
+
+DO_ZPZZ(sve_uabd_zpzz_b, uint8_t, H1, DO_ABD)
+DO_ZPZZ(sve_uabd_zpzz_h, uint16_t, H1_2, DO_ABD)
+DO_ZPZZ(sve_uabd_zpzz_s, uint32_t, H1_4, DO_ABD)
+DO_ZPZZ_D(sve_uabd_zpzz_d, uint64_t, DO_ABD)
+
+/* Because the computation type is at least twice as large as required,
+ these work for both signed and unsigned source types. */
+static inline uint8_t do_mulh_b(int32_t n, int32_t m)
+{
+ return (n * m) >> 8;
+}
+
+static inline uint16_t do_mulh_h(int32_t n, int32_t m)
+{
+ return (n * m) >> 16;
+}
+
+static inline uint32_t do_mulh_s(int64_t n, int64_t m)
+{
+ return (n * m) >> 32;
+}
+
+static inline uint64_t do_smulh_d(uint64_t n, uint64_t m)
+{
+ uint64_t lo, hi;
+ muls64(&lo, &hi, n, m);
+ return hi;
+}
+
+static inline uint64_t do_umulh_d(uint64_t n, uint64_t m)
+{
+ uint64_t lo, hi;
+ mulu64(&lo, &hi, n, m);
+ return hi;
+}
+
+DO_ZPZZ(sve_mul_zpzz_b, uint8_t, H1, DO_MUL)
+DO_ZPZZ(sve_mul_zpzz_h, uint16_t, H1_2, DO_MUL)
+DO_ZPZZ(sve_mul_zpzz_s, uint32_t, H1_4, DO_MUL)
+DO_ZPZZ_D(sve_mul_zpzz_d, uint64_t, DO_MUL)
+
+DO_ZPZZ(sve_smulh_zpzz_b, int8_t, H1, do_mulh_b)
+DO_ZPZZ(sve_smulh_zpzz_h, int16_t, H1_2, do_mulh_h)
+DO_ZPZZ(sve_smulh_zpzz_s, int32_t, H1_4, do_mulh_s)
+DO_ZPZZ_D(sve_smulh_zpzz_d, uint64_t, do_smulh_d)
+
+DO_ZPZZ(sve_umulh_zpzz_b, uint8_t, H1, do_mulh_b)
+DO_ZPZZ(sve_umulh_zpzz_h, uint16_t, H1_2, do_mulh_h)
+DO_ZPZZ(sve_umulh_zpzz_s, uint32_t, H1_4, do_mulh_s)
+DO_ZPZZ_D(sve_umulh_zpzz_d, uint64_t, do_umulh_d)
+
+DO_ZPZZ(sve_sdiv_zpzz_s, int32_t, H1_4, DO_SDIV)
+DO_ZPZZ_D(sve_sdiv_zpzz_d, int64_t, DO_SDIV)
+
+DO_ZPZZ(sve_udiv_zpzz_s, uint32_t, H1_4, DO_UDIV)
+DO_ZPZZ_D(sve_udiv_zpzz_d, uint64_t, DO_UDIV)
+
+/* Note that all bits of the shift are significant
+ and not modulo the element size. */
+#define DO_ASR(N, M) (N >> MIN(M, sizeof(N) * 8 - 1))
+#define DO_LSR(N, M) (M < sizeof(N) * 8 ? N >> M : 0)
+#define DO_LSL(N, M) (M < sizeof(N) * 8 ? N << M : 0)
+
+DO_ZPZZ(sve_asr_zpzz_b, int8_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_b, uint8_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_b, uint8_t, H1_4, DO_LSL)
+
+DO_ZPZZ(sve_asr_zpzz_h, int16_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_h, uint16_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_h, uint16_t, H1_4, DO_LSL)
+
+DO_ZPZZ(sve_asr_zpzz_s, int32_t, H1, DO_ASR)
+DO_ZPZZ(sve_lsr_zpzz_s, uint32_t, H1_2, DO_LSR)
+DO_ZPZZ(sve_lsl_zpzz_s, uint32_t, H1_4, DO_LSL)
+
+DO_ZPZZ_D(sve_asr_zpzz_d, int64_t, DO_ASR)
+DO_ZPZZ_D(sve_lsr_zpzz_d, uint64_t, DO_LSR)
+DO_ZPZZ_D(sve_lsl_zpzz_d, uint64_t, DO_LSL)
+
+static inline uint16_t do_sadalp_h(int16_t n, int16_t m)
+{
+ int8_t n1 = n, n2 = n >> 8;
+ return m + n1 + n2;
+}
+
+static inline uint32_t do_sadalp_s(int32_t n, int32_t m)
+{
+ int16_t n1 = n, n2 = n >> 16;
+ return m + n1 + n2;
+}
+
+static inline uint64_t do_sadalp_d(int64_t n, int64_t m)
+{
+ int32_t n1 = n, n2 = n >> 32;
+ return m + n1 + n2;
+}
+
+DO_ZPZZ(sve2_sadalp_zpzz_h, int16_t, H1_2, do_sadalp_h)
+DO_ZPZZ(sve2_sadalp_zpzz_s, int32_t, H1_4, do_sadalp_s)
+DO_ZPZZ_D(sve2_sadalp_zpzz_d, int64_t, do_sadalp_d)
+
+static inline uint16_t do_uadalp_h(uint16_t n, uint16_t m)
+{
+ uint8_t n1 = n, n2 = n >> 8;
+ return m + n1 + n2;
+}
+
+static inline uint32_t do_uadalp_s(uint32_t n, uint32_t m)
+{
+ uint16_t n1 = n, n2 = n >> 16;
+ return m + n1 + n2;
+}
+
+static inline uint64_t do_uadalp_d(uint64_t n, uint64_t m)
+{
+ uint32_t n1 = n, n2 = n >> 32;
+ return m + n1 + n2;
+}
+
+DO_ZPZZ(sve2_uadalp_zpzz_h, uint16_t, H1_2, do_uadalp_h)
+DO_ZPZZ(sve2_uadalp_zpzz_s, uint32_t, H1_4, do_uadalp_s)
+DO_ZPZZ_D(sve2_uadalp_zpzz_d, uint64_t, do_uadalp_d)
+
+#define do_srshl_b(n, m) do_sqrshl_bhs(n, m, 8, true, NULL)
+#define do_srshl_h(n, m) do_sqrshl_bhs(n, m, 16, true, NULL)
+#define do_srshl_s(n, m) do_sqrshl_bhs(n, m, 32, true, NULL)
+#define do_srshl_d(n, m) do_sqrshl_d(n, m, true, NULL)
+
+DO_ZPZZ(sve2_srshl_zpzz_b, int8_t, H1, do_srshl_b)
+DO_ZPZZ(sve2_srshl_zpzz_h, int16_t, H1_2, do_srshl_h)
+DO_ZPZZ(sve2_srshl_zpzz_s, int32_t, H1_4, do_srshl_s)
+DO_ZPZZ_D(sve2_srshl_zpzz_d, int64_t, do_srshl_d)
+
+#define do_urshl_b(n, m) do_uqrshl_bhs(n, (int8_t)m, 8, true, NULL)
+#define do_urshl_h(n, m) do_uqrshl_bhs(n, (int16_t)m, 16, true, NULL)
+#define do_urshl_s(n, m) do_uqrshl_bhs(n, m, 32, true, NULL)
+#define do_urshl_d(n, m) do_uqrshl_d(n, m, true, NULL)
+
+DO_ZPZZ(sve2_urshl_zpzz_b, uint8_t, H1, do_urshl_b)
+DO_ZPZZ(sve2_urshl_zpzz_h, uint16_t, H1_2, do_urshl_h)
+DO_ZPZZ(sve2_urshl_zpzz_s, uint32_t, H1_4, do_urshl_s)
+DO_ZPZZ_D(sve2_urshl_zpzz_d, uint64_t, do_urshl_d)
+
+/*
+ * Unlike the NEON and AdvSIMD versions, there is no QC bit to set.
+ * We pass in a pointer to a dummy saturation field to trigger
+ * the saturating arithmetic but discard the information about
+ * whether it has occurred.
+ */
+#define do_sqshl_b(n, m) \
+ ({ uint32_t discard; do_sqrshl_bhs(n, m, 8, false, &discard); })
+#define do_sqshl_h(n, m) \
+ ({ uint32_t discard; do_sqrshl_bhs(n, m, 16, false, &discard); })
+#define do_sqshl_s(n, m) \
+ ({ uint32_t discard; do_sqrshl_bhs(n, m, 32, false, &discard); })
+#define do_sqshl_d(n, m) \
+ ({ uint32_t discard; do_sqrshl_d(n, m, false, &discard); })
+
+DO_ZPZZ(sve2_sqshl_zpzz_b, int8_t, H1_2, do_sqshl_b)
+DO_ZPZZ(sve2_sqshl_zpzz_h, int16_t, H1_2, do_sqshl_h)
+DO_ZPZZ(sve2_sqshl_zpzz_s, int32_t, H1_4, do_sqshl_s)
+DO_ZPZZ_D(sve2_sqshl_zpzz_d, int64_t, do_sqshl_d)
+
+#define do_uqshl_b(n, m) \
+ ({ uint32_t discard; do_uqrshl_bhs(n, (int8_t)m, 8, false, &discard); })
+#define do_uqshl_h(n, m) \
+ ({ uint32_t discard; do_uqrshl_bhs(n, (int16_t)m, 16, false, &discard); })
+#define do_uqshl_s(n, m) \
+ ({ uint32_t discard; do_uqrshl_bhs(n, m, 32, false, &discard); })
+#define do_uqshl_d(n, m) \
+ ({ uint32_t discard; do_uqrshl_d(n, m, false, &discard); })
+
+DO_ZPZZ(sve2_uqshl_zpzz_b, uint8_t, H1_2, do_uqshl_b)
+DO_ZPZZ(sve2_uqshl_zpzz_h, uint16_t, H1_2, do_uqshl_h)
+DO_ZPZZ(sve2_uqshl_zpzz_s, uint32_t, H1_4, do_uqshl_s)
+DO_ZPZZ_D(sve2_uqshl_zpzz_d, uint64_t, do_uqshl_d)
+
+#define do_sqrshl_b(n, m) \
+ ({ uint32_t discard; do_sqrshl_bhs(n, m, 8, true, &discard); })
+#define do_sqrshl_h(n, m) \
+ ({ uint32_t discard; do_sqrshl_bhs(n, m, 16, true, &discard); })
+#define do_sqrshl_s(n, m) \
+ ({ uint32_t discard; do_sqrshl_bhs(n, m, 32, true, &discard); })
+#define do_sqrshl_d(n, m) \
+ ({ uint32_t discard; do_sqrshl_d(n, m, true, &discard); })
+
+DO_ZPZZ(sve2_sqrshl_zpzz_b, int8_t, H1_2, do_sqrshl_b)
+DO_ZPZZ(sve2_sqrshl_zpzz_h, int16_t, H1_2, do_sqrshl_h)
+DO_ZPZZ(sve2_sqrshl_zpzz_s, int32_t, H1_4, do_sqrshl_s)
+DO_ZPZZ_D(sve2_sqrshl_zpzz_d, int64_t, do_sqrshl_d)
+
+#undef do_sqrshl_d
+
+#define do_uqrshl_b(n, m) \
+ ({ uint32_t discard; do_uqrshl_bhs(n, (int8_t)m, 8, true, &discard); })
+#define do_uqrshl_h(n, m) \
+ ({ uint32_t discard; do_uqrshl_bhs(n, (int16_t)m, 16, true, &discard); })
+#define do_uqrshl_s(n, m) \
+ ({ uint32_t discard; do_uqrshl_bhs(n, m, 32, true, &discard); })
+#define do_uqrshl_d(n, m) \
+ ({ uint32_t discard; do_uqrshl_d(n, m, true, &discard); })
+
+DO_ZPZZ(sve2_uqrshl_zpzz_b, uint8_t, H1_2, do_uqrshl_b)
+DO_ZPZZ(sve2_uqrshl_zpzz_h, uint16_t, H1_2, do_uqrshl_h)
+DO_ZPZZ(sve2_uqrshl_zpzz_s, uint32_t, H1_4, do_uqrshl_s)
+DO_ZPZZ_D(sve2_uqrshl_zpzz_d, uint64_t, do_uqrshl_d)
+
+#undef do_uqrshl_d
+
+#define DO_HADD_BHS(n, m) (((int64_t)n + m) >> 1)
+#define DO_HADD_D(n, m) ((n >> 1) + (m >> 1) + (n & m & 1))
+
+DO_ZPZZ(sve2_shadd_zpzz_b, int8_t, H1, DO_HADD_BHS)
+DO_ZPZZ(sve2_shadd_zpzz_h, int16_t, H1_2, DO_HADD_BHS)
+DO_ZPZZ(sve2_shadd_zpzz_s, int32_t, H1_4, DO_HADD_BHS)
+DO_ZPZZ_D(sve2_shadd_zpzz_d, int64_t, DO_HADD_D)
+
+DO_ZPZZ(sve2_uhadd_zpzz_b, uint8_t, H1, DO_HADD_BHS)
+DO_ZPZZ(sve2_uhadd_zpzz_h, uint16_t, H1_2, DO_HADD_BHS)
+DO_ZPZZ(sve2_uhadd_zpzz_s, uint32_t, H1_4, DO_HADD_BHS)
+DO_ZPZZ_D(sve2_uhadd_zpzz_d, uint64_t, DO_HADD_D)
+
+#define DO_RHADD_BHS(n, m) (((int64_t)n + m + 1) >> 1)
+#define DO_RHADD_D(n, m) ((n >> 1) + (m >> 1) + ((n | m) & 1))
+
+DO_ZPZZ(sve2_srhadd_zpzz_b, int8_t, H1, DO_RHADD_BHS)
+DO_ZPZZ(sve2_srhadd_zpzz_h, int16_t, H1_2, DO_RHADD_BHS)
+DO_ZPZZ(sve2_srhadd_zpzz_s, int32_t, H1_4, DO_RHADD_BHS)
+DO_ZPZZ_D(sve2_srhadd_zpzz_d, int64_t, DO_RHADD_D)
+
+DO_ZPZZ(sve2_urhadd_zpzz_b, uint8_t, H1, DO_RHADD_BHS)
+DO_ZPZZ(sve2_urhadd_zpzz_h, uint16_t, H1_2, DO_RHADD_BHS)
+DO_ZPZZ(sve2_urhadd_zpzz_s, uint32_t, H1_4, DO_RHADD_BHS)
+DO_ZPZZ_D(sve2_urhadd_zpzz_d, uint64_t, DO_RHADD_D)
+
+#define DO_HSUB_BHS(n, m) (((int64_t)n - m) >> 1)
+#define DO_HSUB_D(n, m) ((n >> 1) - (m >> 1) - (~n & m & 1))
+
+DO_ZPZZ(sve2_shsub_zpzz_b, int8_t, H1, DO_HSUB_BHS)
+DO_ZPZZ(sve2_shsub_zpzz_h, int16_t, H1_2, DO_HSUB_BHS)
+DO_ZPZZ(sve2_shsub_zpzz_s, int32_t, H1_4, DO_HSUB_BHS)
+DO_ZPZZ_D(sve2_shsub_zpzz_d, int64_t, DO_HSUB_D)
+
+DO_ZPZZ(sve2_uhsub_zpzz_b, uint8_t, H1, DO_HSUB_BHS)
+DO_ZPZZ(sve2_uhsub_zpzz_h, uint16_t, H1_2, DO_HSUB_BHS)
+DO_ZPZZ(sve2_uhsub_zpzz_s, uint32_t, H1_4, DO_HSUB_BHS)
+DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D)
+
+static inline int32_t do_sat_bhs(int64_t val, int64_t min, int64_t max)
+{
+ return val >= max ? max : val <= min ? min : val;
+}
+
+#define DO_SQADD_B(n, m) do_sat_bhs((int64_t)n + m, INT8_MIN, INT8_MAX)
+#define DO_SQADD_H(n, m) do_sat_bhs((int64_t)n + m, INT16_MIN, INT16_MAX)
+#define DO_SQADD_S(n, m) do_sat_bhs((int64_t)n + m, INT32_MIN, INT32_MAX)
+
+static inline int64_t do_sqadd_d(int64_t n, int64_t m)
+{
+ int64_t r = n + m;
+ if (((r ^ n) & ~(n ^ m)) < 0) {
+ /* Signed overflow. */
+ return r < 0 ? INT64_MAX : INT64_MIN;
+ }
+ return r;
+}
+
+DO_ZPZZ(sve2_sqadd_zpzz_b, int8_t, H1, DO_SQADD_B)
+DO_ZPZZ(sve2_sqadd_zpzz_h, int16_t, H1_2, DO_SQADD_H)
+DO_ZPZZ(sve2_sqadd_zpzz_s, int32_t, H1_4, DO_SQADD_S)
+DO_ZPZZ_D(sve2_sqadd_zpzz_d, int64_t, do_sqadd_d)
+
+#define DO_UQADD_B(n, m) do_sat_bhs((int64_t)n + m, 0, UINT8_MAX)
+#define DO_UQADD_H(n, m) do_sat_bhs((int64_t)n + m, 0, UINT16_MAX)
+#define DO_UQADD_S(n, m) do_sat_bhs((int64_t)n + m, 0, UINT32_MAX)
+
+static inline uint64_t do_uqadd_d(uint64_t n, uint64_t m)
+{
+ uint64_t r = n + m;
+ return r < n ? UINT64_MAX : r;
+}
+
+DO_ZPZZ(sve2_uqadd_zpzz_b, uint8_t, H1, DO_UQADD_B)
+DO_ZPZZ(sve2_uqadd_zpzz_h, uint16_t, H1_2, DO_UQADD_H)
+DO_ZPZZ(sve2_uqadd_zpzz_s, uint32_t, H1_4, DO_UQADD_S)
+DO_ZPZZ_D(sve2_uqadd_zpzz_d, uint64_t, do_uqadd_d)
+
+#define DO_SQSUB_B(n, m) do_sat_bhs((int64_t)n - m, INT8_MIN, INT8_MAX)
+#define DO_SQSUB_H(n, m) do_sat_bhs((int64_t)n - m, INT16_MIN, INT16_MAX)
+#define DO_SQSUB_S(n, m) do_sat_bhs((int64_t)n - m, INT32_MIN, INT32_MAX)
+
+static inline int64_t do_sqsub_d(int64_t n, int64_t m)
+{
+ int64_t r = n - m;
+ if (((r ^ n) & (n ^ m)) < 0) {
+ /* Signed overflow. */
+ return r < 0 ? INT64_MAX : INT64_MIN;
+ }
+ return r;
+}
+
+DO_ZPZZ(sve2_sqsub_zpzz_b, int8_t, H1, DO_SQSUB_B)
+DO_ZPZZ(sve2_sqsub_zpzz_h, int16_t, H1_2, DO_SQSUB_H)
+DO_ZPZZ(sve2_sqsub_zpzz_s, int32_t, H1_4, DO_SQSUB_S)
+DO_ZPZZ_D(sve2_sqsub_zpzz_d, int64_t, do_sqsub_d)
+
+#define DO_UQSUB_B(n, m) do_sat_bhs((int64_t)n - m, 0, UINT8_MAX)
+#define DO_UQSUB_H(n, m) do_sat_bhs((int64_t)n - m, 0, UINT16_MAX)
+#define DO_UQSUB_S(n, m) do_sat_bhs((int64_t)n - m, 0, UINT32_MAX)
+
+static inline uint64_t do_uqsub_d(uint64_t n, uint64_t m)
+{
+ return n > m ? n - m : 0;
+}
+
+DO_ZPZZ(sve2_uqsub_zpzz_b, uint8_t, H1, DO_UQSUB_B)
+DO_ZPZZ(sve2_uqsub_zpzz_h, uint16_t, H1_2, DO_UQSUB_H)
+DO_ZPZZ(sve2_uqsub_zpzz_s, uint32_t, H1_4, DO_UQSUB_S)
+DO_ZPZZ_D(sve2_uqsub_zpzz_d, uint64_t, do_uqsub_d)
+
+#define DO_SUQADD_B(n, m) \
+ do_sat_bhs((int64_t)(int8_t)n + m, INT8_MIN, INT8_MAX)
+#define DO_SUQADD_H(n, m) \
+ do_sat_bhs((int64_t)(int16_t)n + m, INT16_MIN, INT16_MAX)
+#define DO_SUQADD_S(n, m) \
+ do_sat_bhs((int64_t)(int32_t)n + m, INT32_MIN, INT32_MAX)
+
+static inline int64_t do_suqadd_d(int64_t n, uint64_t m)
+{
+ uint64_t r = n + m;
+
+ if (n < 0) {
+ /* Note that m - abs(n) cannot underflow. */
+ if (r > INT64_MAX) {
+ /* Result is either very large positive or negative. */
+ if (m > -n) {
+ /* m > abs(n), so r is a very large positive. */
+ return INT64_MAX;
+ }
+ /* Result is negative. */
+ }
+ } else {
+ /* Both inputs are positive: check for overflow. */
+ if (r < m || r > INT64_MAX) {
+ return INT64_MAX;
+ }
+ }
+ return r;
+}
+
+DO_ZPZZ(sve2_suqadd_zpzz_b, uint8_t, H1, DO_SUQADD_B)
+DO_ZPZZ(sve2_suqadd_zpzz_h, uint16_t, H1_2, DO_SUQADD_H)
+DO_ZPZZ(sve2_suqadd_zpzz_s, uint32_t, H1_4, DO_SUQADD_S)
+DO_ZPZZ_D(sve2_suqadd_zpzz_d, uint64_t, do_suqadd_d)
+
+#define DO_USQADD_B(n, m) \
+ do_sat_bhs((int64_t)n + (int8_t)m, 0, UINT8_MAX)
+#define DO_USQADD_H(n, m) \
+ do_sat_bhs((int64_t)n + (int16_t)m, 0, UINT16_MAX)
+#define DO_USQADD_S(n, m) \
+ do_sat_bhs((int64_t)n + (int32_t)m, 0, UINT32_MAX)
+
+static inline uint64_t do_usqadd_d(uint64_t n, int64_t m)
+{
+ uint64_t r = n + m;
+
+ if (m < 0) {
+ return n < -m ? 0 : r;
+ }
+ return r < n ? UINT64_MAX : r;
+}
+
+DO_ZPZZ(sve2_usqadd_zpzz_b, uint8_t, H1, DO_USQADD_B)
+DO_ZPZZ(sve2_usqadd_zpzz_h, uint16_t, H1_2, DO_USQADD_H)
+DO_ZPZZ(sve2_usqadd_zpzz_s, uint32_t, H1_4, DO_USQADD_S)
+DO_ZPZZ_D(sve2_usqadd_zpzz_d, uint64_t, do_usqadd_d)
+
+#undef DO_ZPZZ
+#undef DO_ZPZZ_D
+
+/*
+ * Three operand expander, operating on element pairs.
+ * If the slot I is even, the elements from from VN {I, I+1}.
+ * If the slot I is odd, the elements from from VM {I-1, I}.
+ * Load all of the input elements in each pair before overwriting output.
+ */
+#define DO_ZPZZ_PAIR(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ TYPE n0 = *(TYPE *)(vn + H(i)); \
+ TYPE m0 = *(TYPE *)(vm + H(i)); \
+ TYPE n1 = *(TYPE *)(vn + H(i + sizeof(TYPE))); \
+ TYPE m1 = *(TYPE *)(vm + H(i + sizeof(TYPE))); \
+ if (pg & 1) { \
+ *(TYPE *)(vd + H(i)) = OP(n0, n1); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ if (pg & 1) { \
+ *(TYPE *)(vd + H(i)) = OP(m0, m1); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZZ_PAIR_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 2) { \
+ TYPE n0 = n[i], n1 = n[i + 1]; \
+ TYPE m0 = m[i], m1 = m[i + 1]; \
+ if (pg[H1(i)] & 1) { \
+ d[i] = OP(n0, n1); \
+ } \
+ if (pg[H1(i + 1)] & 1) { \
+ d[i + 1] = OP(m0, m1); \
+ } \
+ } \
+}
+
+DO_ZPZZ_PAIR(sve2_addp_zpzz_b, uint8_t, H1, DO_ADD)
+DO_ZPZZ_PAIR(sve2_addp_zpzz_h, uint16_t, H1_2, DO_ADD)
+DO_ZPZZ_PAIR(sve2_addp_zpzz_s, uint32_t, H1_4, DO_ADD)
+DO_ZPZZ_PAIR_D(sve2_addp_zpzz_d, uint64_t, DO_ADD)
+
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_b, uint8_t, H1, DO_MAX)
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_h, uint16_t, H1_2, DO_MAX)
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_s, uint32_t, H1_4, DO_MAX)
+DO_ZPZZ_PAIR_D(sve2_umaxp_zpzz_d, uint64_t, DO_MAX)
+
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_b, uint8_t, H1, DO_MIN)
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_h, uint16_t, H1_2, DO_MIN)
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_s, uint32_t, H1_4, DO_MIN)
+DO_ZPZZ_PAIR_D(sve2_uminp_zpzz_d, uint64_t, DO_MIN)
+
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_b, int8_t, H1, DO_MAX)
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_h, int16_t, H1_2, DO_MAX)
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_s, int32_t, H1_4, DO_MAX)
+DO_ZPZZ_PAIR_D(sve2_smaxp_zpzz_d, int64_t, DO_MAX)
+
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_b, int8_t, H1, DO_MIN)
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_h, int16_t, H1_2, DO_MIN)
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_s, int32_t, H1_4, DO_MIN)
+DO_ZPZZ_PAIR_D(sve2_sminp_zpzz_d, int64_t, DO_MIN)
+
+#undef DO_ZPZZ_PAIR
+#undef DO_ZPZZ_PAIR_D
+
+#define DO_ZPZZ_PAIR_FP(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
+ void *status, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ TYPE n0 = *(TYPE *)(vn + H(i)); \
+ TYPE m0 = *(TYPE *)(vm + H(i)); \
+ TYPE n1 = *(TYPE *)(vn + H(i + sizeof(TYPE))); \
+ TYPE m1 = *(TYPE *)(vm + H(i + sizeof(TYPE))); \
+ if (pg & 1) { \
+ *(TYPE *)(vd + H(i)) = OP(n0, n1, status); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ if (pg & 1) { \
+ *(TYPE *)(vd + H(i)) = OP(m0, m1, status); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_h, float16, H1_2, float16_add)
+DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_s, float32, H1_4, float32_add)
+DO_ZPZZ_PAIR_FP(sve2_faddp_zpzz_d, float64, H1_8, float64_add)
+
+DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_h, float16, H1_2, float16_maxnum)
+DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_s, float32, H1_4, float32_maxnum)
+DO_ZPZZ_PAIR_FP(sve2_fmaxnmp_zpzz_d, float64, H1_8, float64_maxnum)
+
+DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_h, float16, H1_2, float16_minnum)
+DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_s, float32, H1_4, float32_minnum)
+DO_ZPZZ_PAIR_FP(sve2_fminnmp_zpzz_d, float64, H1_8, float64_minnum)
+
+DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_h, float16, H1_2, float16_max)
+DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_s, float32, H1_4, float32_max)
+DO_ZPZZ_PAIR_FP(sve2_fmaxp_zpzz_d, float64, H1_8, float64_max)
+
+DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_h, float16, H1_2, float16_min)
+DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_s, float32, H1_4, float32_min)
+DO_ZPZZ_PAIR_FP(sve2_fminp_zpzz_d, float64, H1_8, float64_min)
+
+#undef DO_ZPZZ_PAIR_FP
+
+/* Three-operand expander, controlled by a predicate, in which the
+ * third operand is "wide". That is, for D = N op M, the same 64-bit
+ * value of M is used with all of the narrower values of N.
+ */
+#define DO_ZPZW(NAME, TYPE, TYPEW, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint8_t pg = *(uint8_t *)(vg + H1(i >> 3)); \
+ TYPEW mm = *(TYPEW *)(vm + i); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 7); \
+ } \
+}
+
+DO_ZPZW(sve_asr_zpzw_b, int8_t, uint64_t, H1, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_b, uint8_t, uint64_t, H1, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_b, uint8_t, uint64_t, H1, DO_LSL)
+
+DO_ZPZW(sve_asr_zpzw_h, int16_t, uint64_t, H1_2, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_h, uint16_t, uint64_t, H1_2, DO_LSL)
+
+DO_ZPZW(sve_asr_zpzw_s, int32_t, uint64_t, H1_4, DO_ASR)
+DO_ZPZW(sve_lsr_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSR)
+DO_ZPZW(sve_lsl_zpzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
+
+#undef DO_ZPZW
+
+/* Fully general two-operand expander, controlled by a predicate.
+ */
+#define DO_ZPZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZ_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *n = vn; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE nn = n[i]; \
+ d[i] = OP(nn); \
+ } \
+ } \
+}
+
+#define DO_CLS_B(N) (clrsb32(N) - 24)
+#define DO_CLS_H(N) (clrsb32(N) - 16)
+
+DO_ZPZ(sve_cls_b, int8_t, H1, DO_CLS_B)
+DO_ZPZ(sve_cls_h, int16_t, H1_2, DO_CLS_H)
+DO_ZPZ(sve_cls_s, int32_t, H1_4, clrsb32)
+DO_ZPZ_D(sve_cls_d, int64_t, clrsb64)
+
+#define DO_CLZ_B(N) (clz32(N) - 24)
+#define DO_CLZ_H(N) (clz32(N) - 16)
+
+DO_ZPZ(sve_clz_b, uint8_t, H1, DO_CLZ_B)
+DO_ZPZ(sve_clz_h, uint16_t, H1_2, DO_CLZ_H)
+DO_ZPZ(sve_clz_s, uint32_t, H1_4, clz32)
+DO_ZPZ_D(sve_clz_d, uint64_t, clz64)
+
+DO_ZPZ(sve_cnt_zpz_b, uint8_t, H1, ctpop8)
+DO_ZPZ(sve_cnt_zpz_h, uint16_t, H1_2, ctpop16)
+DO_ZPZ(sve_cnt_zpz_s, uint32_t, H1_4, ctpop32)
+DO_ZPZ_D(sve_cnt_zpz_d, uint64_t, ctpop64)
+
+#define DO_CNOT(N) (N == 0)
+
+DO_ZPZ(sve_cnot_b, uint8_t, H1, DO_CNOT)
+DO_ZPZ(sve_cnot_h, uint16_t, H1_2, DO_CNOT)
+DO_ZPZ(sve_cnot_s, uint32_t, H1_4, DO_CNOT)
+DO_ZPZ_D(sve_cnot_d, uint64_t, DO_CNOT)
+
+#define DO_FABS(N) (N & ((__typeof(N))-1 >> 1))
+
+DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
+DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
+DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
+
+#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
+
+DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
+DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
+DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
+
+#define DO_NOT(N) (~N)
+
+DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
+DO_ZPZ(sve_not_zpz_h, uint16_t, H1_2, DO_NOT)
+DO_ZPZ(sve_not_zpz_s, uint32_t, H1_4, DO_NOT)
+DO_ZPZ_D(sve_not_zpz_d, uint64_t, DO_NOT)
+
+#define DO_SXTB(N) ((int8_t)N)
+#define DO_SXTH(N) ((int16_t)N)
+#define DO_SXTS(N) ((int32_t)N)
+#define DO_UXTB(N) ((uint8_t)N)
+#define DO_UXTH(N) ((uint16_t)N)
+#define DO_UXTS(N) ((uint32_t)N)
+
+DO_ZPZ(sve_sxtb_h, uint16_t, H1_2, DO_SXTB)
+DO_ZPZ(sve_sxtb_s, uint32_t, H1_4, DO_SXTB)
+DO_ZPZ(sve_sxth_s, uint32_t, H1_4, DO_SXTH)
+DO_ZPZ_D(sve_sxtb_d, uint64_t, DO_SXTB)
+DO_ZPZ_D(sve_sxth_d, uint64_t, DO_SXTH)
+DO_ZPZ_D(sve_sxtw_d, uint64_t, DO_SXTS)
+
+DO_ZPZ(sve_uxtb_h, uint16_t, H1_2, DO_UXTB)
+DO_ZPZ(sve_uxtb_s, uint32_t, H1_4, DO_UXTB)
+DO_ZPZ(sve_uxth_s, uint32_t, H1_4, DO_UXTH)
+DO_ZPZ_D(sve_uxtb_d, uint64_t, DO_UXTB)
+DO_ZPZ_D(sve_uxth_d, uint64_t, DO_UXTH)
+DO_ZPZ_D(sve_uxtw_d, uint64_t, DO_UXTS)
+
+#define DO_ABS(N) (N < 0 ? -N : N)
+
+DO_ZPZ(sve_abs_b, int8_t, H1, DO_ABS)
+DO_ZPZ(sve_abs_h, int16_t, H1_2, DO_ABS)
+DO_ZPZ(sve_abs_s, int32_t, H1_4, DO_ABS)
+DO_ZPZ_D(sve_abs_d, int64_t, DO_ABS)
+
+#define DO_NEG(N) (-N)
+
+DO_ZPZ(sve_neg_b, uint8_t, H1, DO_NEG)
+DO_ZPZ(sve_neg_h, uint16_t, H1_2, DO_NEG)
+DO_ZPZ(sve_neg_s, uint32_t, H1_4, DO_NEG)
+DO_ZPZ_D(sve_neg_d, uint64_t, DO_NEG)
+
+DO_ZPZ(sve_revb_h, uint16_t, H1_2, bswap16)
+DO_ZPZ(sve_revb_s, uint32_t, H1_4, bswap32)
+DO_ZPZ_D(sve_revb_d, uint64_t, bswap64)
+
+DO_ZPZ(sve_revh_s, uint32_t, H1_4, hswap32)
+DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
+
+DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
+
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 2) {
+ if (pg[H1(i)] & 1) {
+ uint64_t n0 = n[i + 0];
+ uint64_t n1 = n[i + 1];
+ d[i + 0] = n1;
+ d[i + 1] = n0;
+ }
+ }
+}
+
+DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
+DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
+DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
+DO_ZPZ_D(sve_rbit_d, uint64_t, revbit64)
+
+#define DO_SQABS(X) \
+ ({ __typeof(X) x_ = (X), min_ = 1ull << (sizeof(X) * 8 - 1); \
+ x_ >= 0 ? x_ : x_ == min_ ? -min_ - 1 : -x_; })
+
+DO_ZPZ(sve2_sqabs_b, int8_t, H1, DO_SQABS)
+DO_ZPZ(sve2_sqabs_h, int16_t, H1_2, DO_SQABS)
+DO_ZPZ(sve2_sqabs_s, int32_t, H1_4, DO_SQABS)
+DO_ZPZ_D(sve2_sqabs_d, int64_t, DO_SQABS)
+
+#define DO_SQNEG(X) \
+ ({ __typeof(X) x_ = (X), min_ = 1ull << (sizeof(X) * 8 - 1); \
+ x_ == min_ ? -min_ - 1 : -x_; })
+
+DO_ZPZ(sve2_sqneg_b, uint8_t, H1, DO_SQNEG)
+DO_ZPZ(sve2_sqneg_h, uint16_t, H1_2, DO_SQNEG)
+DO_ZPZ(sve2_sqneg_s, uint32_t, H1_4, DO_SQNEG)
+DO_ZPZ_D(sve2_sqneg_d, uint64_t, DO_SQNEG)
+
+DO_ZPZ(sve2_urecpe_s, uint32_t, H1_4, helper_recpe_u32)
+DO_ZPZ(sve2_ursqrte_s, uint32_t, H1_4, helper_rsqrte_u32)
+
+/* Three-operand expander, unpredicated, in which the third operand is "wide".
+ */
+#define DO_ZZW(NAME, TYPE, TYPEW, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ TYPEW mm = *(TYPEW *)(vm + i); \
+ do { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm); \
+ i += sizeof(TYPE); \
+ } while (i & 7); \
+ } \
+}
+
+DO_ZZW(sve_asr_zzw_b, int8_t, uint64_t, H1, DO_ASR)
+DO_ZZW(sve_lsr_zzw_b, uint8_t, uint64_t, H1, DO_LSR)
+DO_ZZW(sve_lsl_zzw_b, uint8_t, uint64_t, H1, DO_LSL)
+
+DO_ZZW(sve_asr_zzw_h, int16_t, uint64_t, H1_2, DO_ASR)
+DO_ZZW(sve_lsr_zzw_h, uint16_t, uint64_t, H1_2, DO_LSR)
+DO_ZZW(sve_lsl_zzw_h, uint16_t, uint64_t, H1_2, DO_LSL)
+
+DO_ZZW(sve_asr_zzw_s, int32_t, uint64_t, H1_4, DO_ASR)
+DO_ZZW(sve_lsr_zzw_s, uint32_t, uint64_t, H1_4, DO_LSR)
+DO_ZZW(sve_lsl_zzw_s, uint32_t, uint64_t, H1_4, DO_LSL)
+
+#undef DO_ZZW
+
+#undef DO_CLS_B
+#undef DO_CLS_H
+#undef DO_CLZ_B
+#undef DO_CLZ_H
+#undef DO_CNOT
+#undef DO_FABS
+#undef DO_FNEG
+#undef DO_ABS
+#undef DO_NEG
+#undef DO_ZPZ
+#undef DO_ZPZ_D
+
+/*
+ * Three-operand expander, unpredicated, in which the two inputs are
+ * selected from the top or bottom half of the wide column.
+ */
+#define DO_ZZZ_TB(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ int sel1 = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN); \
+ int sel2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(TYPEN); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEN *)(vn + HN(i + sel1)); \
+ TYPEW mm = *(TYPEN *)(vm + HN(i + sel2)); \
+ *(TYPEW *)(vd + HW(i)) = OP(nn, mm); \
+ } \
+}
+
+DO_ZZZ_TB(sve2_saddl_h, int16_t, int8_t, H1_2, H1, DO_ADD)
+DO_ZZZ_TB(sve2_saddl_s, int32_t, int16_t, H1_4, H1_2, DO_ADD)
+DO_ZZZ_TB(sve2_saddl_d, int64_t, int32_t, H1_8, H1_4, DO_ADD)
+
+DO_ZZZ_TB(sve2_ssubl_h, int16_t, int8_t, H1_2, H1, DO_SUB)
+DO_ZZZ_TB(sve2_ssubl_s, int32_t, int16_t, H1_4, H1_2, DO_SUB)
+DO_ZZZ_TB(sve2_ssubl_d, int64_t, int32_t, H1_8, H1_4, DO_SUB)
+
+DO_ZZZ_TB(sve2_sabdl_h, int16_t, int8_t, H1_2, H1, DO_ABD)
+DO_ZZZ_TB(sve2_sabdl_s, int32_t, int16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZ_TB(sve2_sabdl_d, int64_t, int32_t, H1_8, H1_4, DO_ABD)
+
+DO_ZZZ_TB(sve2_uaddl_h, uint16_t, uint8_t, H1_2, H1, DO_ADD)
+DO_ZZZ_TB(sve2_uaddl_s, uint32_t, uint16_t, H1_4, H1_2, DO_ADD)
+DO_ZZZ_TB(sve2_uaddl_d, uint64_t, uint32_t, H1_8, H1_4, DO_ADD)
+
+DO_ZZZ_TB(sve2_usubl_h, uint16_t, uint8_t, H1_2, H1, DO_SUB)
+DO_ZZZ_TB(sve2_usubl_s, uint32_t, uint16_t, H1_4, H1_2, DO_SUB)
+DO_ZZZ_TB(sve2_usubl_d, uint64_t, uint32_t, H1_8, H1_4, DO_SUB)
+
+DO_ZZZ_TB(sve2_uabdl_h, uint16_t, uint8_t, H1_2, H1, DO_ABD)
+DO_ZZZ_TB(sve2_uabdl_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZ_TB(sve2_uabdl_d, uint64_t, uint32_t, H1_8, H1_4, DO_ABD)
+
+DO_ZZZ_TB(sve2_smull_zzz_h, int16_t, int8_t, H1_2, H1, DO_MUL)
+DO_ZZZ_TB(sve2_smull_zzz_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
+DO_ZZZ_TB(sve2_smull_zzz_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
+
+DO_ZZZ_TB(sve2_umull_zzz_h, uint16_t, uint8_t, H1_2, H1, DO_MUL)
+DO_ZZZ_TB(sve2_umull_zzz_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
+DO_ZZZ_TB(sve2_umull_zzz_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
+
+/* Note that the multiply cannot overflow, but the doubling can. */
+static inline int16_t do_sqdmull_h(int16_t n, int16_t m)
+{
+ int16_t val = n * m;
+ return DO_SQADD_H(val, val);
+}
+
+static inline int32_t do_sqdmull_s(int32_t n, int32_t m)
+{
+ int32_t val = n * m;
+ return DO_SQADD_S(val, val);
+}
+
+static inline int64_t do_sqdmull_d(int64_t n, int64_t m)
+{
+ int64_t val = n * m;
+ return do_sqadd_d(val, val);
+}
+
+DO_ZZZ_TB(sve2_sqdmull_zzz_h, int16_t, int8_t, H1_2, H1, do_sqdmull_h)
+DO_ZZZ_TB(sve2_sqdmull_zzz_s, int32_t, int16_t, H1_4, H1_2, do_sqdmull_s)
+DO_ZZZ_TB(sve2_sqdmull_zzz_d, int64_t, int32_t, H1_8, H1_4, do_sqdmull_d)
+
+#undef DO_ZZZ_TB
+
+#define DO_ZZZ_WTB(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ int sel2 = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEW *)(vn + HW(i)); \
+ TYPEW mm = *(TYPEN *)(vm + HN(i + sel2)); \
+ *(TYPEW *)(vd + HW(i)) = OP(nn, mm); \
+ } \
+}
+
+DO_ZZZ_WTB(sve2_saddw_h, int16_t, int8_t, H1_2, H1, DO_ADD)
+DO_ZZZ_WTB(sve2_saddw_s, int32_t, int16_t, H1_4, H1_2, DO_ADD)
+DO_ZZZ_WTB(sve2_saddw_d, int64_t, int32_t, H1_8, H1_4, DO_ADD)
+
+DO_ZZZ_WTB(sve2_ssubw_h, int16_t, int8_t, H1_2, H1, DO_SUB)
+DO_ZZZ_WTB(sve2_ssubw_s, int32_t, int16_t, H1_4, H1_2, DO_SUB)
+DO_ZZZ_WTB(sve2_ssubw_d, int64_t, int32_t, H1_8, H1_4, DO_SUB)
+
+DO_ZZZ_WTB(sve2_uaddw_h, uint16_t, uint8_t, H1_2, H1, DO_ADD)
+DO_ZZZ_WTB(sve2_uaddw_s, uint32_t, uint16_t, H1_4, H1_2, DO_ADD)
+DO_ZZZ_WTB(sve2_uaddw_d, uint64_t, uint32_t, H1_8, H1_4, DO_ADD)
+
+DO_ZZZ_WTB(sve2_usubw_h, uint16_t, uint8_t, H1_2, H1, DO_SUB)
+DO_ZZZ_WTB(sve2_usubw_s, uint32_t, uint16_t, H1_4, H1_2, DO_SUB)
+DO_ZZZ_WTB(sve2_usubw_d, uint64_t, uint32_t, H1_8, H1_4, DO_SUB)
+
+#undef DO_ZZZ_WTB
+
+#define DO_ZZZ_NTB(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ intptr_t sel1 = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPE); \
+ intptr_t sel2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(TYPE); \
+ for (i = 0; i < opr_sz; i += 2 * sizeof(TYPE)) { \
+ TYPE nn = *(TYPE *)(vn + H(i + sel1)); \
+ TYPE mm = *(TYPE *)(vm + H(i + sel2)); \
+ *(TYPE *)(vd + H(i + sel1)) = OP(nn, mm); \
+ } \
+}
+
+DO_ZZZ_NTB(sve2_eoril_b, uint8_t, H1, DO_EOR)
+DO_ZZZ_NTB(sve2_eoril_h, uint16_t, H1_2, DO_EOR)
+DO_ZZZ_NTB(sve2_eoril_s, uint32_t, H1_4, DO_EOR)
+DO_ZZZ_NTB(sve2_eoril_d, uint64_t, H1_8, DO_EOR)
+
+#undef DO_ZZZ_NTB
+
+#define DO_ZZZW_ACC(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ intptr_t sel1 = simd_data(desc) * sizeof(TYPEN); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEN *)(vn + HN(i + sel1)); \
+ TYPEW mm = *(TYPEN *)(vm + HN(i + sel1)); \
+ TYPEW aa = *(TYPEW *)(va + HW(i)); \
+ *(TYPEW *)(vd + HW(i)) = OP(nn, mm) + aa; \
+ } \
+}
+
+DO_ZZZW_ACC(sve2_sabal_h, int16_t, int8_t, H1_2, H1, DO_ABD)
+DO_ZZZW_ACC(sve2_sabal_s, int32_t, int16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZW_ACC(sve2_sabal_d, int64_t, int32_t, H1_8, H1_4, DO_ABD)
+
+DO_ZZZW_ACC(sve2_uabal_h, uint16_t, uint8_t, H1_2, H1, DO_ABD)
+DO_ZZZW_ACC(sve2_uabal_s, uint32_t, uint16_t, H1_4, H1_2, DO_ABD)
+DO_ZZZW_ACC(sve2_uabal_d, uint64_t, uint32_t, H1_8, H1_4, DO_ABD)
+
+DO_ZZZW_ACC(sve2_smlal_zzzw_h, int16_t, int8_t, H1_2, H1, DO_MUL)
+DO_ZZZW_ACC(sve2_smlal_zzzw_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
+DO_ZZZW_ACC(sve2_smlal_zzzw_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
+
+DO_ZZZW_ACC(sve2_umlal_zzzw_h, uint16_t, uint8_t, H1_2, H1, DO_MUL)
+DO_ZZZW_ACC(sve2_umlal_zzzw_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
+DO_ZZZW_ACC(sve2_umlal_zzzw_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
+
+#define DO_NMUL(N, M) -(N * M)
+
+DO_ZZZW_ACC(sve2_smlsl_zzzw_h, int16_t, int8_t, H1_2, H1, DO_NMUL)
+DO_ZZZW_ACC(sve2_smlsl_zzzw_s, int32_t, int16_t, H1_4, H1_2, DO_NMUL)
+DO_ZZZW_ACC(sve2_smlsl_zzzw_d, int64_t, int32_t, H1_8, H1_4, DO_NMUL)
+
+DO_ZZZW_ACC(sve2_umlsl_zzzw_h, uint16_t, uint8_t, H1_2, H1, DO_NMUL)
+DO_ZZZW_ACC(sve2_umlsl_zzzw_s, uint32_t, uint16_t, H1_4, H1_2, DO_NMUL)
+DO_ZZZW_ACC(sve2_umlsl_zzzw_d, uint64_t, uint32_t, H1_8, H1_4, DO_NMUL)
+
+#undef DO_ZZZW_ACC
+
+#define DO_XTNB(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
+ TYPE nn = *(TYPE *)(vn + i); \
+ nn = OP(nn) & MAKE_64BIT_MASK(0, sizeof(TYPE) * 4); \
+ *(TYPE *)(vd + i) = nn; \
+ } \
+}
+
+#define DO_XTNT(NAME, TYPE, TYPEN, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc), odd = H(sizeof(TYPEN)); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
+ TYPE nn = *(TYPE *)(vn + i); \
+ *(TYPEN *)(vd + i + odd) = OP(nn); \
+ } \
+}
+
+#define DO_SQXTN_H(n) do_sat_bhs(n, INT8_MIN, INT8_MAX)
+#define DO_SQXTN_S(n) do_sat_bhs(n, INT16_MIN, INT16_MAX)
+#define DO_SQXTN_D(n) do_sat_bhs(n, INT32_MIN, INT32_MAX)
+
+DO_XTNB(sve2_sqxtnb_h, int16_t, DO_SQXTN_H)
+DO_XTNB(sve2_sqxtnb_s, int32_t, DO_SQXTN_S)
+DO_XTNB(sve2_sqxtnb_d, int64_t, DO_SQXTN_D)
+
+DO_XTNT(sve2_sqxtnt_h, int16_t, int8_t, H1, DO_SQXTN_H)
+DO_XTNT(sve2_sqxtnt_s, int32_t, int16_t, H1_2, DO_SQXTN_S)
+DO_XTNT(sve2_sqxtnt_d, int64_t, int32_t, H1_4, DO_SQXTN_D)
+
+#define DO_UQXTN_H(n) do_sat_bhs(n, 0, UINT8_MAX)
+#define DO_UQXTN_S(n) do_sat_bhs(n, 0, UINT16_MAX)
+#define DO_UQXTN_D(n) do_sat_bhs(n, 0, UINT32_MAX)
+
+DO_XTNB(sve2_uqxtnb_h, uint16_t, DO_UQXTN_H)
+DO_XTNB(sve2_uqxtnb_s, uint32_t, DO_UQXTN_S)
+DO_XTNB(sve2_uqxtnb_d, uint64_t, DO_UQXTN_D)
+
+DO_XTNT(sve2_uqxtnt_h, uint16_t, uint8_t, H1, DO_UQXTN_H)
+DO_XTNT(sve2_uqxtnt_s, uint32_t, uint16_t, H1_2, DO_UQXTN_S)
+DO_XTNT(sve2_uqxtnt_d, uint64_t, uint32_t, H1_4, DO_UQXTN_D)
+
+DO_XTNB(sve2_sqxtunb_h, int16_t, DO_UQXTN_H)
+DO_XTNB(sve2_sqxtunb_s, int32_t, DO_UQXTN_S)
+DO_XTNB(sve2_sqxtunb_d, int64_t, DO_UQXTN_D)
+
+DO_XTNT(sve2_sqxtunt_h, int16_t, int8_t, H1, DO_UQXTN_H)
+DO_XTNT(sve2_sqxtunt_s, int32_t, int16_t, H1_2, DO_UQXTN_S)
+DO_XTNT(sve2_sqxtunt_d, int64_t, int32_t, H1_4, DO_UQXTN_D)
+
+#undef DO_XTNB
+#undef DO_XTNT
+
+void HELPER(sve2_adcl_s)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int sel = H4(extract32(desc, SIMD_DATA_SHIFT, 1));
+ uint32_t inv = -extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t *a = va, *n = vn;
+ uint64_t *d = vd, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ uint32_t e1 = a[2 * i + H4(0)];
+ uint32_t e2 = n[2 * i + sel] ^ inv;
+ uint64_t c = extract64(m[i], 32, 1);
+ /* Compute and store the entire 33-bit result at once. */
+ d[i] = c + e1 + e2;
+ }
+}
+
+void HELPER(sve2_adcl_d)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int sel = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t inv = -(uint64_t)extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint64_t *d = vd, *a = va, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; i += 2) {
+ Int128 e1 = int128_make64(a[i]);
+ Int128 e2 = int128_make64(n[i + sel] ^ inv);
+ Int128 c = int128_make64(m[i + 1] & 1);
+ Int128 r = int128_add(int128_add(e1, e2), c);
+ d[i + 0] = int128_getlo(r);
+ d[i + 1] = int128_gethi(r);
+ }
+}
+
+#define DO_SQDMLAL(NAME, TYPEW, TYPEN, HW, HN, DMUL_OP, SUM_OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ int sel1 = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN); \
+ int sel2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(TYPEN); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEN *)(vn + HN(i + sel1)); \
+ TYPEW mm = *(TYPEN *)(vm + HN(i + sel2)); \
+ TYPEW aa = *(TYPEW *)(va + HW(i)); \
+ *(TYPEW *)(vd + HW(i)) = SUM_OP(aa, DMUL_OP(nn, mm)); \
+ } \
+}
+
+DO_SQDMLAL(sve2_sqdmlal_zzzw_h, int16_t, int8_t, H1_2, H1,
+ do_sqdmull_h, DO_SQADD_H)
+DO_SQDMLAL(sve2_sqdmlal_zzzw_s, int32_t, int16_t, H1_4, H1_2,
+ do_sqdmull_s, DO_SQADD_S)
+DO_SQDMLAL(sve2_sqdmlal_zzzw_d, int64_t, int32_t, H1_8, H1_4,
+ do_sqdmull_d, do_sqadd_d)
+
+DO_SQDMLAL(sve2_sqdmlsl_zzzw_h, int16_t, int8_t, H1_2, H1,
+ do_sqdmull_h, DO_SQSUB_H)
+DO_SQDMLAL(sve2_sqdmlsl_zzzw_s, int32_t, int16_t, H1_4, H1_2,
+ do_sqdmull_s, DO_SQSUB_S)
+DO_SQDMLAL(sve2_sqdmlsl_zzzw_d, int64_t, int32_t, H1_8, H1_4,
+ do_sqdmull_d, do_sqsub_d)
+
+#undef DO_SQDMLAL
+
+#define DO_CMLA_FUNC(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \
+ int rot = simd_data(desc); \
+ int sel_a = rot & 1, sel_b = sel_a ^ 1; \
+ bool sub_r = rot == 1 || rot == 2; \
+ bool sub_i = rot >= 2; \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ for (i = 0; i < opr_sz; i += 2) { \
+ TYPE elt1_a = n[H(i + sel_a)]; \
+ TYPE elt2_a = m[H(i + sel_a)]; \
+ TYPE elt2_b = m[H(i + sel_b)]; \
+ d[H(i)] = OP(elt1_a, elt2_a, a[H(i)], sub_r); \
+ d[H(i + 1)] = OP(elt1_a, elt2_b, a[H(i + 1)], sub_i); \
+ } \
+}
+
+#define DO_CMLA(N, M, A, S) (A + (N * M) * (S ? -1 : 1))
+
+DO_CMLA_FUNC(sve2_cmla_zzzz_b, uint8_t, H1, DO_CMLA)
+DO_CMLA_FUNC(sve2_cmla_zzzz_h, uint16_t, H2, DO_CMLA)
+DO_CMLA_FUNC(sve2_cmla_zzzz_s, uint32_t, H4, DO_CMLA)
+DO_CMLA_FUNC(sve2_cmla_zzzz_d, uint64_t, H8, DO_CMLA)
+
+#define DO_SQRDMLAH_B(N, M, A, S) \
+ do_sqrdmlah_b(N, M, A, S, true)
+#define DO_SQRDMLAH_H(N, M, A, S) \
+ ({ uint32_t discard; do_sqrdmlah_h(N, M, A, S, true, &discard); })
+#define DO_SQRDMLAH_S(N, M, A, S) \
+ ({ uint32_t discard; do_sqrdmlah_s(N, M, A, S, true, &discard); })
+#define DO_SQRDMLAH_D(N, M, A, S) \
+ do_sqrdmlah_d(N, M, A, S, true)
+
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_b, int8_t, H1, DO_SQRDMLAH_B)
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_h, int16_t, H2, DO_SQRDMLAH_H)
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_s, int32_t, H4, DO_SQRDMLAH_S)
+DO_CMLA_FUNC(sve2_sqrdcmlah_zzzz_d, int64_t, H8, DO_SQRDMLAH_D)
+
+#define DO_CMLA_IDX_FUNC(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ int rot = extract32(desc, SIMD_DATA_SHIFT, 2); \
+ int idx = extract32(desc, SIMD_DATA_SHIFT + 2, 2) * 2; \
+ int sel_a = rot & 1, sel_b = sel_a ^ 1; \
+ bool sub_r = rot == 1 || rot == 2; \
+ bool sub_i = rot >= 2; \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += 16 / sizeof(TYPE)) { \
+ TYPE elt2_a = m[H(i + idx + sel_a)]; \
+ TYPE elt2_b = m[H(i + idx + sel_b)]; \
+ for (j = 0; j < 16 / sizeof(TYPE); j += 2) { \
+ TYPE elt1_a = n[H(i + j + sel_a)]; \
+ d[H2(i + j)] = OP(elt1_a, elt2_a, a[H(i + j)], sub_r); \
+ d[H2(i + j + 1)] = OP(elt1_a, elt2_b, a[H(i + j + 1)], sub_i); \
+ } \
+ } \
+}
+
+DO_CMLA_IDX_FUNC(sve2_cmla_idx_h, int16_t, H2, DO_CMLA)
+DO_CMLA_IDX_FUNC(sve2_cmla_idx_s, int32_t, H4, DO_CMLA)
+
+DO_CMLA_IDX_FUNC(sve2_sqrdcmlah_idx_h, int16_t, H2, DO_SQRDMLAH_H)
+DO_CMLA_IDX_FUNC(sve2_sqrdcmlah_idx_s, int32_t, H4, DO_SQRDMLAH_S)
+
+#undef DO_CMLA
+#undef DO_CMLA_FUNC
+#undef DO_CMLA_IDX_FUNC
+#undef DO_SQRDMLAH_B
+#undef DO_SQRDMLAH_H
+#undef DO_SQRDMLAH_S
+#undef DO_SQRDMLAH_D
+
+/* Note N and M are 4 elements bundled into one unit. */
+static int32_t do_cdot_s(uint32_t n, uint32_t m, int32_t a,
+ int sel_a, int sel_b, int sub_i)
+{
+ for (int i = 0; i <= 1; i++) {
+ int32_t elt1_r = (int8_t)(n >> (16 * i));
+ int32_t elt1_i = (int8_t)(n >> (16 * i + 8));
+ int32_t elt2_a = (int8_t)(m >> (16 * i + 8 * sel_a));
+ int32_t elt2_b = (int8_t)(m >> (16 * i + 8 * sel_b));
+
+ a += elt1_r * elt2_a + elt1_i * elt2_b * sub_i;
+ }
+ return a;
+}
+
+static int64_t do_cdot_d(uint64_t n, uint64_t m, int64_t a,
+ int sel_a, int sel_b, int sub_i)
+{
+ for (int i = 0; i <= 1; i++) {
+ int64_t elt1_r = (int16_t)(n >> (32 * i + 0));
+ int64_t elt1_i = (int16_t)(n >> (32 * i + 16));
+ int64_t elt2_a = (int16_t)(m >> (32 * i + 16 * sel_a));
+ int64_t elt2_b = (int16_t)(m >> (32 * i + 16 * sel_b));
+
+ a += elt1_r * elt2_a + elt1_i * elt2_b * sub_i;
+ }
+ return a;
+}
+
+void HELPER(sve2_cdot_zzzz_s)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ int opr_sz = simd_oprsz(desc);
+ int rot = simd_data(desc);
+ int sel_a = rot & 1;
+ int sel_b = sel_a ^ 1;
+ int sub_i = (rot == 0 || rot == 3 ? -1 : 1);
+ uint32_t *d = vd, *n = vn, *m = vm, *a = va;
+
+ for (int e = 0; e < opr_sz / 4; e++) {
+ d[e] = do_cdot_s(n[e], m[e], a[e], sel_a, sel_b, sub_i);
+ }
+}
+
+void HELPER(sve2_cdot_zzzz_d)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ int opr_sz = simd_oprsz(desc);
+ int rot = simd_data(desc);
+ int sel_a = rot & 1;
+ int sel_b = sel_a ^ 1;
+ int sub_i = (rot == 0 || rot == 3 ? -1 : 1);
+ uint64_t *d = vd, *n = vn, *m = vm, *a = va;
+
+ for (int e = 0; e < opr_sz / 8; e++) {
+ d[e] = do_cdot_d(n[e], m[e], a[e], sel_a, sel_b, sub_i);
+ }
+}
+
+void HELPER(sve2_cdot_idx_s)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ int opr_sz = simd_oprsz(desc);
+ int rot = extract32(desc, SIMD_DATA_SHIFT, 2);
+ int idx = H4(extract32(desc, SIMD_DATA_SHIFT + 2, 2));
+ int sel_a = rot & 1;
+ int sel_b = sel_a ^ 1;
+ int sub_i = (rot == 0 || rot == 3 ? -1 : 1);
+ uint32_t *d = vd, *n = vn, *m = vm, *a = va;
+
+ for (int seg = 0; seg < opr_sz / 4; seg += 4) {
+ uint32_t seg_m = m[seg + idx];
+ for (int e = 0; e < 4; e++) {
+ d[seg + e] = do_cdot_s(n[seg + e], seg_m, a[seg + e],
+ sel_a, sel_b, sub_i);
+ }
+ }
+}
+
+void HELPER(sve2_cdot_idx_d)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ int seg, opr_sz = simd_oprsz(desc);
+ int rot = extract32(desc, SIMD_DATA_SHIFT, 2);
+ int idx = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
+ int sel_a = rot & 1;
+ int sel_b = sel_a ^ 1;
+ int sub_i = (rot == 0 || rot == 3 ? -1 : 1);
+ uint64_t *d = vd, *n = vn, *m = vm, *a = va;
+
+ for (seg = 0; seg < opr_sz / 8; seg += 2) {
+ uint64_t seg_m = m[seg + idx];
+ for (int e = 0; e < 2; e++) {
+ d[seg + e] = do_cdot_d(n[seg + e], seg_m, a[seg + e],
+ sel_a, sel_b, sub_i);
+ }
+ }
+}
+
+#define DO_ZZXZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t i, j, idx = simd_data(desc); \
+ TYPE *d = vd, *a = va, *n = vn, *m = (TYPE *)vm + H(idx); \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[i]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = OP(n[i + j], mm, a[i + j]); \
+ } \
+ } \
+}
+
+#define DO_SQRDMLAH_H(N, M, A) \
+ ({ uint32_t discard; do_sqrdmlah_h(N, M, A, false, true, &discard); })
+#define DO_SQRDMLAH_S(N, M, A) \
+ ({ uint32_t discard; do_sqrdmlah_s(N, M, A, false, true, &discard); })
+#define DO_SQRDMLAH_D(N, M, A) do_sqrdmlah_d(N, M, A, false, true)
+
+DO_ZZXZ(sve2_sqrdmlah_idx_h, int16_t, H2, DO_SQRDMLAH_H)
+DO_ZZXZ(sve2_sqrdmlah_idx_s, int32_t, H4, DO_SQRDMLAH_S)
+DO_ZZXZ(sve2_sqrdmlah_idx_d, int64_t, H8, DO_SQRDMLAH_D)
+
+#define DO_SQRDMLSH_H(N, M, A) \
+ ({ uint32_t discard; do_sqrdmlah_h(N, M, A, true, true, &discard); })
+#define DO_SQRDMLSH_S(N, M, A) \
+ ({ uint32_t discard; do_sqrdmlah_s(N, M, A, true, true, &discard); })
+#define DO_SQRDMLSH_D(N, M, A) do_sqrdmlah_d(N, M, A, true, true)
+
+DO_ZZXZ(sve2_sqrdmlsh_idx_h, int16_t, H2, DO_SQRDMLSH_H)
+DO_ZZXZ(sve2_sqrdmlsh_idx_s, int32_t, H4, DO_SQRDMLSH_S)
+DO_ZZXZ(sve2_sqrdmlsh_idx_d, int64_t, H8, DO_SQRDMLSH_D)
+
+#undef DO_ZZXZ
+
+#define DO_ZZXW(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN); \
+ intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 1, 3) * sizeof(TYPEN); \
+ for (i = 0; i < oprsz; i += 16) { \
+ TYPEW mm = *(TYPEN *)(vm + HN(i + idx)); \
+ for (j = 0; j < 16; j += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEN *)(vn + HN(i + j + sel)); \
+ TYPEW aa = *(TYPEW *)(va + HW(i + j)); \
+ *(TYPEW *)(vd + HW(i + j)) = OP(nn, mm, aa); \
+ } \
+ } \
+}
+
+#define DO_MLA(N, M, A) (A + N * M)
+
+DO_ZZXW(sve2_smlal_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MLA)
+DO_ZZXW(sve2_smlal_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MLA)
+DO_ZZXW(sve2_umlal_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MLA)
+DO_ZZXW(sve2_umlal_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MLA)
+
+#define DO_MLS(N, M, A) (A - N * M)
+
+DO_ZZXW(sve2_smlsl_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MLS)
+DO_ZZXW(sve2_smlsl_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MLS)
+DO_ZZXW(sve2_umlsl_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MLS)
+DO_ZZXW(sve2_umlsl_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MLS)
+
+#define DO_SQDMLAL_S(N, M, A) DO_SQADD_S(A, do_sqdmull_s(N, M))
+#define DO_SQDMLAL_D(N, M, A) do_sqadd_d(A, do_sqdmull_d(N, M))
+
+DO_ZZXW(sve2_sqdmlal_idx_s, int32_t, int16_t, H1_4, H1_2, DO_SQDMLAL_S)
+DO_ZZXW(sve2_sqdmlal_idx_d, int64_t, int32_t, H1_8, H1_4, DO_SQDMLAL_D)
+
+#define DO_SQDMLSL_S(N, M, A) DO_SQSUB_S(A, do_sqdmull_s(N, M))
+#define DO_SQDMLSL_D(N, M, A) do_sqsub_d(A, do_sqdmull_d(N, M))
+
+DO_ZZXW(sve2_sqdmlsl_idx_s, int32_t, int16_t, H1_4, H1_2, DO_SQDMLSL_S)
+DO_ZZXW(sve2_sqdmlsl_idx_d, int64_t, int32_t, H1_8, H1_4, DO_SQDMLSL_D)
+
+#undef DO_MLA
+#undef DO_MLS
+#undef DO_ZZXW
+
+#define DO_ZZX(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1) * sizeof(TYPEN); \
+ intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 1, 3) * sizeof(TYPEN); \
+ for (i = 0; i < oprsz; i += 16) { \
+ TYPEW mm = *(TYPEN *)(vm + HN(i + idx)); \
+ for (j = 0; j < 16; j += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEN *)(vn + HN(i + j + sel)); \
+ *(TYPEW *)(vd + HW(i + j)) = OP(nn, mm); \
+ } \
+ } \
+}
+
+DO_ZZX(sve2_sqdmull_idx_s, int32_t, int16_t, H1_4, H1_2, do_sqdmull_s)
+DO_ZZX(sve2_sqdmull_idx_d, int64_t, int32_t, H1_8, H1_4, do_sqdmull_d)
+
+DO_ZZX(sve2_smull_idx_s, int32_t, int16_t, H1_4, H1_2, DO_MUL)
+DO_ZZX(sve2_smull_idx_d, int64_t, int32_t, H1_8, H1_4, DO_MUL)
+
+DO_ZZX(sve2_umull_idx_s, uint32_t, uint16_t, H1_4, H1_2, DO_MUL)
+DO_ZZX(sve2_umull_idx_d, uint64_t, uint32_t, H1_8, H1_4, DO_MUL)
+
+#undef DO_ZZX
+
+#define DO_BITPERM(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
+ TYPE nn = *(TYPE *)(vn + i); \
+ TYPE mm = *(TYPE *)(vm + i); \
+ *(TYPE *)(vd + i) = OP(nn, mm, sizeof(TYPE) * 8); \
+ } \
+}
+
+static uint64_t bitextract(uint64_t data, uint64_t mask, int n)
+{
+ uint64_t res = 0;
+ int db, rb = 0;
+
+ for (db = 0; db < n; ++db) {
+ if ((mask >> db) & 1) {
+ res |= ((data >> db) & 1) << rb;
+ ++rb;
+ }
+ }
+ return res;
+}
+
+DO_BITPERM(sve2_bext_b, uint8_t, bitextract)
+DO_BITPERM(sve2_bext_h, uint16_t, bitextract)
+DO_BITPERM(sve2_bext_s, uint32_t, bitextract)
+DO_BITPERM(sve2_bext_d, uint64_t, bitextract)
+
+static uint64_t bitdeposit(uint64_t data, uint64_t mask, int n)
+{
+ uint64_t res = 0;
+ int rb, db = 0;
+
+ for (rb = 0; rb < n; ++rb) {
+ if ((mask >> rb) & 1) {
+ res |= ((data >> db) & 1) << rb;
+ ++db;
+ }
+ }
+ return res;
+}
+
+DO_BITPERM(sve2_bdep_b, uint8_t, bitdeposit)
+DO_BITPERM(sve2_bdep_h, uint16_t, bitdeposit)
+DO_BITPERM(sve2_bdep_s, uint32_t, bitdeposit)
+DO_BITPERM(sve2_bdep_d, uint64_t, bitdeposit)
+
+static uint64_t bitgroup(uint64_t data, uint64_t mask, int n)
+{
+ uint64_t resm = 0, resu = 0;
+ int db, rbm = 0, rbu = 0;
+
+ for (db = 0; db < n; ++db) {
+ uint64_t val = (data >> db) & 1;
+ if ((mask >> db) & 1) {
+ resm |= val << rbm++;
+ } else {
+ resu |= val << rbu++;
+ }
+ }
+
+ return resm | (resu << rbm);
+}
+
+DO_BITPERM(sve2_bgrp_b, uint8_t, bitgroup)
+DO_BITPERM(sve2_bgrp_h, uint16_t, bitgroup)
+DO_BITPERM(sve2_bgrp_s, uint32_t, bitgroup)
+DO_BITPERM(sve2_bgrp_d, uint64_t, bitgroup)
+
+#undef DO_BITPERM
+
+#define DO_CADD(NAME, TYPE, H, ADD_OP, SUB_OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ int sub_r = simd_data(desc); \
+ if (sub_r) { \
+ for (i = 0; i < opr_sz; i += 2 * sizeof(TYPE)) { \
+ TYPE acc_r = *(TYPE *)(vn + H(i)); \
+ TYPE acc_i = *(TYPE *)(vn + H(i + sizeof(TYPE))); \
+ TYPE el2_r = *(TYPE *)(vm + H(i)); \
+ TYPE el2_i = *(TYPE *)(vm + H(i + sizeof(TYPE))); \
+ acc_r = ADD_OP(acc_r, el2_i); \
+ acc_i = SUB_OP(acc_i, el2_r); \
+ *(TYPE *)(vd + H(i)) = acc_r; \
+ *(TYPE *)(vd + H(i + sizeof(TYPE))) = acc_i; \
+ } \
+ } else { \
+ for (i = 0; i < opr_sz; i += 2 * sizeof(TYPE)) { \
+ TYPE acc_r = *(TYPE *)(vn + H(i)); \
+ TYPE acc_i = *(TYPE *)(vn + H(i + sizeof(TYPE))); \
+ TYPE el2_r = *(TYPE *)(vm + H(i)); \
+ TYPE el2_i = *(TYPE *)(vm + H(i + sizeof(TYPE))); \
+ acc_r = SUB_OP(acc_r, el2_i); \
+ acc_i = ADD_OP(acc_i, el2_r); \
+ *(TYPE *)(vd + H(i)) = acc_r; \
+ *(TYPE *)(vd + H(i + sizeof(TYPE))) = acc_i; \
+ } \
+ } \
+}
+
+DO_CADD(sve2_cadd_b, int8_t, H1, DO_ADD, DO_SUB)
+DO_CADD(sve2_cadd_h, int16_t, H1_2, DO_ADD, DO_SUB)
+DO_CADD(sve2_cadd_s, int32_t, H1_4, DO_ADD, DO_SUB)
+DO_CADD(sve2_cadd_d, int64_t, H1_8, DO_ADD, DO_SUB)
+
+DO_CADD(sve2_sqcadd_b, int8_t, H1, DO_SQADD_B, DO_SQSUB_B)
+DO_CADD(sve2_sqcadd_h, int16_t, H1_2, DO_SQADD_H, DO_SQSUB_H)
+DO_CADD(sve2_sqcadd_s, int32_t, H1_4, DO_SQADD_S, DO_SQSUB_S)
+DO_CADD(sve2_sqcadd_d, int64_t, H1_8, do_sqadd_d, do_sqsub_d)
+
+#undef DO_CADD
+
+#define DO_ZZI_SHLL(NAME, TYPEW, TYPEN, HW, HN) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ intptr_t sel = (simd_data(desc) & 1) * sizeof(TYPEN); \
+ int shift = simd_data(desc) >> 1; \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEN *)(vn + HN(i + sel)); \
+ *(TYPEW *)(vd + HW(i)) = nn << shift; \
+ } \
+}
+
+DO_ZZI_SHLL(sve2_sshll_h, int16_t, int8_t, H1_2, H1)
+DO_ZZI_SHLL(sve2_sshll_s, int32_t, int16_t, H1_4, H1_2)
+DO_ZZI_SHLL(sve2_sshll_d, int64_t, int32_t, H1_8, H1_4)
+
+DO_ZZI_SHLL(sve2_ushll_h, uint16_t, uint8_t, H1_2, H1)
+DO_ZZI_SHLL(sve2_ushll_s, uint32_t, uint16_t, H1_4, H1_2)
+DO_ZZI_SHLL(sve2_ushll_d, uint64_t, uint32_t, H1_8, H1_4)
+
+#undef DO_ZZI_SHLL
+
+/* Two-operand reduction expander, controlled by a predicate.
+ * The difference between TYPERED and TYPERET has to do with
+ * sign-extension. E.g. for SMAX, TYPERED must be signed,
+ * but TYPERET must be unsigned so that e.g. a 32-bit value
+ * is not sign-extended to the ABI uint64_t return type.
+ */
+/* ??? If we were to vectorize this by hand the reduction ordering
+ * would change. For integer operands, this is perfectly fine.
+ */
+#define DO_VPZ(NAME, TYPEELT, TYPERED, TYPERET, H, INIT, OP) \
+uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPERED ret = INIT; \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPEELT nn = *(TYPEELT *)(vn + H(i)); \
+ ret = OP(ret, nn); \
+ } \
+ i += sizeof(TYPEELT), pg >>= sizeof(TYPEELT); \
+ } while (i & 15); \
+ } \
+ return (TYPERET)ret; \
+}
+
+#define DO_VPZ_D(NAME, TYPEE, TYPER, INIT, OP) \
+uint64_t HELPER(NAME)(void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPEE *n = vn; \
+ uint8_t *pg = vg; \
+ TYPER ret = INIT; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPEE nn = n[i]; \
+ ret = OP(ret, nn); \
+ } \
+ } \
+ return ret; \
+}
+
+DO_VPZ(sve_orv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_ORR)
+DO_VPZ(sve_orv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_ORR)
+DO_VPZ(sve_orv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_ORR)
+DO_VPZ_D(sve_orv_d, uint64_t, uint64_t, 0, DO_ORR)
+
+DO_VPZ(sve_eorv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_EOR)
+DO_VPZ(sve_eorv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_EOR)
+DO_VPZ(sve_eorv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_EOR)
+DO_VPZ_D(sve_eorv_d, uint64_t, uint64_t, 0, DO_EOR)
+
+DO_VPZ(sve_andv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_AND)
+DO_VPZ(sve_andv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_AND)
+DO_VPZ(sve_andv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_AND)
+DO_VPZ_D(sve_andv_d, uint64_t, uint64_t, -1, DO_AND)
+
+DO_VPZ(sve_saddv_b, int8_t, uint64_t, uint64_t, H1, 0, DO_ADD)
+DO_VPZ(sve_saddv_h, int16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD)
+DO_VPZ(sve_saddv_s, int32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD)
+
+DO_VPZ(sve_uaddv_b, uint8_t, uint64_t, uint64_t, H1, 0, DO_ADD)
+DO_VPZ(sve_uaddv_h, uint16_t, uint64_t, uint64_t, H1_2, 0, DO_ADD)
+DO_VPZ(sve_uaddv_s, uint32_t, uint64_t, uint64_t, H1_4, 0, DO_ADD)
+DO_VPZ_D(sve_uaddv_d, uint64_t, uint64_t, 0, DO_ADD)
+
+DO_VPZ(sve_smaxv_b, int8_t, int8_t, uint8_t, H1, INT8_MIN, DO_MAX)
+DO_VPZ(sve_smaxv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MIN, DO_MAX)
+DO_VPZ(sve_smaxv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MIN, DO_MAX)
+DO_VPZ_D(sve_smaxv_d, int64_t, int64_t, INT64_MIN, DO_MAX)
+
+DO_VPZ(sve_umaxv_b, uint8_t, uint8_t, uint8_t, H1, 0, DO_MAX)
+DO_VPZ(sve_umaxv_h, uint16_t, uint16_t, uint16_t, H1_2, 0, DO_MAX)
+DO_VPZ(sve_umaxv_s, uint32_t, uint32_t, uint32_t, H1_4, 0, DO_MAX)
+DO_VPZ_D(sve_umaxv_d, uint64_t, uint64_t, 0, DO_MAX)
+
+DO_VPZ(sve_sminv_b, int8_t, int8_t, uint8_t, H1, INT8_MAX, DO_MIN)
+DO_VPZ(sve_sminv_h, int16_t, int16_t, uint16_t, H1_2, INT16_MAX, DO_MIN)
+DO_VPZ(sve_sminv_s, int32_t, int32_t, uint32_t, H1_4, INT32_MAX, DO_MIN)
+DO_VPZ_D(sve_sminv_d, int64_t, int64_t, INT64_MAX, DO_MIN)
+
+DO_VPZ(sve_uminv_b, uint8_t, uint8_t, uint8_t, H1, -1, DO_MIN)
+DO_VPZ(sve_uminv_h, uint16_t, uint16_t, uint16_t, H1_2, -1, DO_MIN)
+DO_VPZ(sve_uminv_s, uint32_t, uint32_t, uint32_t, H1_4, -1, DO_MIN)
+DO_VPZ_D(sve_uminv_d, uint64_t, uint64_t, -1, DO_MIN)
+
+#undef DO_VPZ
+#undef DO_VPZ_D
+
+/* Two vector operand, one scalar operand, unpredicated. */
+#define DO_ZZI(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint64_t s64, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(TYPE); \
+ TYPE s = s64, *d = vd, *n = vn; \
+ for (i = 0; i < opr_sz; ++i) { \
+ d[i] = OP(n[i], s); \
+ } \
+}
+
+#define DO_SUBR(X, Y) (Y - X)
+
+DO_ZZI(sve_subri_b, uint8_t, DO_SUBR)
+DO_ZZI(sve_subri_h, uint16_t, DO_SUBR)
+DO_ZZI(sve_subri_s, uint32_t, DO_SUBR)
+DO_ZZI(sve_subri_d, uint64_t, DO_SUBR)
+
+DO_ZZI(sve_smaxi_b, int8_t, DO_MAX)
+DO_ZZI(sve_smaxi_h, int16_t, DO_MAX)
+DO_ZZI(sve_smaxi_s, int32_t, DO_MAX)
+DO_ZZI(sve_smaxi_d, int64_t, DO_MAX)
+
+DO_ZZI(sve_smini_b, int8_t, DO_MIN)
+DO_ZZI(sve_smini_h, int16_t, DO_MIN)
+DO_ZZI(sve_smini_s, int32_t, DO_MIN)
+DO_ZZI(sve_smini_d, int64_t, DO_MIN)
+
+DO_ZZI(sve_umaxi_b, uint8_t, DO_MAX)
+DO_ZZI(sve_umaxi_h, uint16_t, DO_MAX)
+DO_ZZI(sve_umaxi_s, uint32_t, DO_MAX)
+DO_ZZI(sve_umaxi_d, uint64_t, DO_MAX)
+
+DO_ZZI(sve_umini_b, uint8_t, DO_MIN)
+DO_ZZI(sve_umini_h, uint16_t, DO_MIN)
+DO_ZZI(sve_umini_s, uint32_t, DO_MIN)
+DO_ZZI(sve_umini_d, uint64_t, DO_MIN)
+
+#undef DO_ZZI
+
+#undef DO_AND
+#undef DO_ORR
+#undef DO_EOR
+#undef DO_BIC
+#undef DO_ADD
+#undef DO_SUB
+#undef DO_MAX
+#undef DO_MIN
+#undef DO_ABD
+#undef DO_MUL
+#undef DO_DIV
+#undef DO_ASR
+#undef DO_LSR
+#undef DO_LSL
+#undef DO_SUBR
+
+/* Similar to the ARM LastActiveElement pseudocode function, except the
+ result is multiplied by the element size. This includes the not found
+ indication; e.g. not found for esz=3 is -8. */
+static intptr_t last_active_element(uint64_t *g, intptr_t words, intptr_t esz)
+{
+ uint64_t mask = pred_esz_masks[esz];
+ intptr_t i = words;
+
+ do {
+ uint64_t this_g = g[--i] & mask;
+ if (this_g) {
+ return i * 64 + (63 - clz64(this_g));
+ }
+ } while (i > 0);
+ return (intptr_t)-1 << esz;
+}
+
+uint32_t HELPER(sve_pfirst)(void *vd, void *vg, uint32_t pred_desc)
+{
+ intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+ uint32_t flags = PREDTEST_INIT;
+ uint64_t *d = vd, *g = vg;
+ intptr_t i = 0;
+
+ do {
+ uint64_t this_d = d[i];
+ uint64_t this_g = g[i];
+
+ if (this_g) {
+ if (!(flags & 4)) {
+ /* Set in D the first bit of G. */
+ this_d |= this_g & -this_g;
+ d[i] = this_d;
+ }
+ flags = iter_predtest_fwd(this_d, this_g, flags);
+ }
+ } while (++i < words);
+
+ return flags;
+}
+
+uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
+{
+ intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ uint32_t flags = PREDTEST_INIT;
+ uint64_t *d = vd, *g = vg, esz_mask;
+ intptr_t i, next;
+
+ next = last_active_element(vd, words, esz) + (1 << esz);
+ esz_mask = pred_esz_masks[esz];
+
+ /* Similar to the pseudocode for pnext, but scaled by ESZ
+ so that we find the correct bit. */
+ if (next < words * 64) {
+ uint64_t mask = -1;
+
+ if (next & 63) {
+ mask = ~((1ull << (next & 63)) - 1);
+ next &= -64;
+ }
+ do {
+ uint64_t this_g = g[next / 64] & esz_mask & mask;
+ if (this_g != 0) {
+ next = (next & -64) + ctz64(this_g);
+ break;
+ }
+ next += 64;
+ mask = -1;
+ } while (next < words * 64);
+ }
+
+ i = 0;
+ do {
+ uint64_t this_d = 0;
+ if (i == next / 64) {
+ this_d = 1ull << (next & 63);
+ }
+ d[i] = this_d;
+ flags = iter_predtest_fwd(this_d, g[i] & esz_mask, flags);
+ } while (++i < words);
+
+ return flags;
+}
+
+/*
+ * Copy Zn into Zd, and store zero into inactive elements.
+ * If inv, store zeros into the active elements.
+ */
+void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv);
+ }
+}
+
+void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv);
+ }
+}
+
+void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv);
+ }
+}
+
+void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+ uint8_t inv = simd_data(desc);
+
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1);
+ }
+}
+
+/* Three-operand expander, immediate operand, controlled by a predicate.
+ */
+#define DO_ZPZI(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPE imm = simd_data(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, imm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZI_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *n = vn; \
+ TYPE imm = simd_data(desc); \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE nn = n[i]; \
+ d[i] = OP(nn, imm); \
+ } \
+ } \
+}
+
+#define DO_SHR(N, M) (N >> M)
+#define DO_SHL(N, M) (N << M)
+
+/* Arithmetic shift right for division. This rounds negative numbers
+ toward zero as per signed division. Therefore before shifting,
+ when N is negative, add 2**M-1. */
+#define DO_ASRD(N, M) ((N + (N < 0 ? ((__typeof(N))1 << M) - 1 : 0)) >> M)
+
+static inline uint64_t do_urshr(uint64_t x, unsigned sh)
+{
+ if (likely(sh < 64)) {
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
+ } else if (sh == 64) {
+ return x >> 63;
+ } else {
+ return 0;
+ }
+}
+
+static inline int64_t do_srshr(int64_t x, unsigned sh)
+{
+ if (likely(sh < 64)) {
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
+ } else {
+ /* Rounding the sign bit always produces 0. */
+ return 0;
+ }
+}
+
+DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR)
+DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR)
+DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR)
+DO_ZPZI_D(sve_asr_zpzi_d, int64_t, DO_SHR)
+
+DO_ZPZI(sve_lsr_zpzi_b, uint8_t, H1, DO_SHR)
+DO_ZPZI(sve_lsr_zpzi_h, uint16_t, H1_2, DO_SHR)
+DO_ZPZI(sve_lsr_zpzi_s, uint32_t, H1_4, DO_SHR)
+DO_ZPZI_D(sve_lsr_zpzi_d, uint64_t, DO_SHR)
+
+DO_ZPZI(sve_lsl_zpzi_b, uint8_t, H1, DO_SHL)
+DO_ZPZI(sve_lsl_zpzi_h, uint16_t, H1_2, DO_SHL)
+DO_ZPZI(sve_lsl_zpzi_s, uint32_t, H1_4, DO_SHL)
+DO_ZPZI_D(sve_lsl_zpzi_d, uint64_t, DO_SHL)
+
+DO_ZPZI(sve_asrd_b, int8_t, H1, DO_ASRD)
+DO_ZPZI(sve_asrd_h, int16_t, H1_2, DO_ASRD)
+DO_ZPZI(sve_asrd_s, int32_t, H1_4, DO_ASRD)
+DO_ZPZI_D(sve_asrd_d, int64_t, DO_ASRD)
+
+/* SVE2 bitwise shift by immediate */
+DO_ZPZI(sve2_sqshl_zpzi_b, int8_t, H1, do_sqshl_b)
+DO_ZPZI(sve2_sqshl_zpzi_h, int16_t, H1_2, do_sqshl_h)
+DO_ZPZI(sve2_sqshl_zpzi_s, int32_t, H1_4, do_sqshl_s)
+DO_ZPZI_D(sve2_sqshl_zpzi_d, int64_t, do_sqshl_d)
+
+DO_ZPZI(sve2_uqshl_zpzi_b, uint8_t, H1, do_uqshl_b)
+DO_ZPZI(sve2_uqshl_zpzi_h, uint16_t, H1_2, do_uqshl_h)
+DO_ZPZI(sve2_uqshl_zpzi_s, uint32_t, H1_4, do_uqshl_s)
+DO_ZPZI_D(sve2_uqshl_zpzi_d, uint64_t, do_uqshl_d)
+
+DO_ZPZI(sve2_srshr_b, int8_t, H1, do_srshr)
+DO_ZPZI(sve2_srshr_h, int16_t, H1_2, do_srshr)
+DO_ZPZI(sve2_srshr_s, int32_t, H1_4, do_srshr)
+DO_ZPZI_D(sve2_srshr_d, int64_t, do_srshr)
+
+DO_ZPZI(sve2_urshr_b, uint8_t, H1, do_urshr)
+DO_ZPZI(sve2_urshr_h, uint16_t, H1_2, do_urshr)
+DO_ZPZI(sve2_urshr_s, uint32_t, H1_4, do_urshr)
+DO_ZPZI_D(sve2_urshr_d, uint64_t, do_urshr)
+
+#define do_suqrshl_b(n, m) \
+ ({ uint32_t discard; do_suqrshl_bhs(n, (int8_t)m, 8, false, &discard); })
+#define do_suqrshl_h(n, m) \
+ ({ uint32_t discard; do_suqrshl_bhs(n, (int16_t)m, 16, false, &discard); })
+#define do_suqrshl_s(n, m) \
+ ({ uint32_t discard; do_suqrshl_bhs(n, m, 32, false, &discard); })
+#define do_suqrshl_d(n, m) \
+ ({ uint32_t discard; do_suqrshl_d(n, m, false, &discard); })
+
+DO_ZPZI(sve2_sqshlu_b, int8_t, H1, do_suqrshl_b)
+DO_ZPZI(sve2_sqshlu_h, int16_t, H1_2, do_suqrshl_h)
+DO_ZPZI(sve2_sqshlu_s, int32_t, H1_4, do_suqrshl_s)
+DO_ZPZI_D(sve2_sqshlu_d, int64_t, do_suqrshl_d)
+
+#undef DO_ASRD
+#undef DO_ZPZI
+#undef DO_ZPZI_D
+
+#define DO_SHRNB(NAME, TYPEW, TYPEN, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEW *)(vn + i); \
+ *(TYPEW *)(vd + i) = (TYPEN)OP(nn, shift); \
+ } \
+}
+
+#define DO_SHRNT(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEW *)(vn + HW(i)); \
+ *(TYPEN *)(vd + HN(i + sizeof(TYPEN))) = OP(nn, shift); \
+ } \
+}
+
+DO_SHRNB(sve2_shrnb_h, uint16_t, uint8_t, DO_SHR)
+DO_SHRNB(sve2_shrnb_s, uint32_t, uint16_t, DO_SHR)
+DO_SHRNB(sve2_shrnb_d, uint64_t, uint32_t, DO_SHR)
+
+DO_SHRNT(sve2_shrnt_h, uint16_t, uint8_t, H1_2, H1, DO_SHR)
+DO_SHRNT(sve2_shrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_SHR)
+DO_SHRNT(sve2_shrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_SHR)
+
+DO_SHRNB(sve2_rshrnb_h, uint16_t, uint8_t, do_urshr)
+DO_SHRNB(sve2_rshrnb_s, uint32_t, uint16_t, do_urshr)
+DO_SHRNB(sve2_rshrnb_d, uint64_t, uint32_t, do_urshr)
+
+DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1, do_urshr)
+DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, do_urshr)
+DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, H1_8, H1_4, do_urshr)
+
+#define DO_SQSHRUN_H(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT8_MAX)
+#define DO_SQSHRUN_S(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT16_MAX)
+#define DO_SQSHRUN_D(x, sh) \
+ do_sat_bhs((int64_t)(x) >> (sh < 64 ? sh : 63), 0, UINT32_MAX)
+
+DO_SHRNB(sve2_sqshrunb_h, int16_t, uint8_t, DO_SQSHRUN_H)
+DO_SHRNB(sve2_sqshrunb_s, int32_t, uint16_t, DO_SQSHRUN_S)
+DO_SHRNB(sve2_sqshrunb_d, int64_t, uint32_t, DO_SQSHRUN_D)
+
+DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H)
+DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S)
+DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRUN_D)
+
+#define DO_SQRSHRUN_H(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT8_MAX)
+#define DO_SQRSHRUN_S(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT16_MAX)
+#define DO_SQRSHRUN_D(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT32_MAX)
+
+DO_SHRNB(sve2_sqrshrunb_h, int16_t, uint8_t, DO_SQRSHRUN_H)
+DO_SHRNB(sve2_sqrshrunb_s, int32_t, uint16_t, DO_SQRSHRUN_S)
+DO_SHRNB(sve2_sqrshrunb_d, int64_t, uint32_t, DO_SQRSHRUN_D)
+
+DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H)
+DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S)
+DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRUN_D)
+
+#define DO_SQSHRN_H(x, sh) do_sat_bhs(x >> sh, INT8_MIN, INT8_MAX)
+#define DO_SQSHRN_S(x, sh) do_sat_bhs(x >> sh, INT16_MIN, INT16_MAX)
+#define DO_SQSHRN_D(x, sh) do_sat_bhs(x >> sh, INT32_MIN, INT32_MAX)
+
+DO_SHRNB(sve2_sqshrnb_h, int16_t, uint8_t, DO_SQSHRN_H)
+DO_SHRNB(sve2_sqshrnb_s, int32_t, uint16_t, DO_SQSHRN_S)
+DO_SHRNB(sve2_sqshrnb_d, int64_t, uint32_t, DO_SQSHRN_D)
+
+DO_SHRNT(sve2_sqshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRN_H)
+DO_SHRNT(sve2_sqshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRN_S)
+DO_SHRNT(sve2_sqshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQSHRN_D)
+
+#define DO_SQRSHRN_H(x, sh) do_sat_bhs(do_srshr(x, sh), INT8_MIN, INT8_MAX)
+#define DO_SQRSHRN_S(x, sh) do_sat_bhs(do_srshr(x, sh), INT16_MIN, INT16_MAX)
+#define DO_SQRSHRN_D(x, sh) do_sat_bhs(do_srshr(x, sh), INT32_MIN, INT32_MAX)
+
+DO_SHRNB(sve2_sqrshrnb_h, int16_t, uint8_t, DO_SQRSHRN_H)
+DO_SHRNB(sve2_sqrshrnb_s, int32_t, uint16_t, DO_SQRSHRN_S)
+DO_SHRNB(sve2_sqrshrnb_d, int64_t, uint32_t, DO_SQRSHRN_D)
+
+DO_SHRNT(sve2_sqrshrnt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRN_H)
+DO_SHRNT(sve2_sqrshrnt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRN_S)
+DO_SHRNT(sve2_sqrshrnt_d, int64_t, uint32_t, H1_8, H1_4, DO_SQRSHRN_D)
+
+#define DO_UQSHRN_H(x, sh) MIN(x >> sh, UINT8_MAX)
+#define DO_UQSHRN_S(x, sh) MIN(x >> sh, UINT16_MAX)
+#define DO_UQSHRN_D(x, sh) MIN(x >> sh, UINT32_MAX)
+
+DO_SHRNB(sve2_uqshrnb_h, uint16_t, uint8_t, DO_UQSHRN_H)
+DO_SHRNB(sve2_uqshrnb_s, uint32_t, uint16_t, DO_UQSHRN_S)
+DO_SHRNB(sve2_uqshrnb_d, uint64_t, uint32_t, DO_UQSHRN_D)
+
+DO_SHRNT(sve2_uqshrnt_h, uint16_t, uint8_t, H1_2, H1, DO_UQSHRN_H)
+DO_SHRNT(sve2_uqshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_UQSHRN_S)
+DO_SHRNT(sve2_uqshrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_UQSHRN_D)
+
+#define DO_UQRSHRN_H(x, sh) MIN(do_urshr(x, sh), UINT8_MAX)
+#define DO_UQRSHRN_S(x, sh) MIN(do_urshr(x, sh), UINT16_MAX)
+#define DO_UQRSHRN_D(x, sh) MIN(do_urshr(x, sh), UINT32_MAX)
+
+DO_SHRNB(sve2_uqrshrnb_h, uint16_t, uint8_t, DO_UQRSHRN_H)
+DO_SHRNB(sve2_uqrshrnb_s, uint32_t, uint16_t, DO_UQRSHRN_S)
+DO_SHRNB(sve2_uqrshrnb_d, uint64_t, uint32_t, DO_UQRSHRN_D)
+
+DO_SHRNT(sve2_uqrshrnt_h, uint16_t, uint8_t, H1_2, H1, DO_UQRSHRN_H)
+DO_SHRNT(sve2_uqrshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_UQRSHRN_S)
+DO_SHRNT(sve2_uqrshrnt_d, uint64_t, uint32_t, H1_8, H1_4, DO_UQRSHRN_D)
+
+#undef DO_SHRNB
+#undef DO_SHRNT
+
+#define DO_BINOPNB(NAME, TYPEW, TYPEN, SHIFT, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEW *)(vn + i); \
+ TYPEW mm = *(TYPEW *)(vm + i); \
+ *(TYPEW *)(vd + i) = (TYPEN)OP(nn, mm, SHIFT); \
+ } \
+}
+
+#define DO_BINOPNT(NAME, TYPEW, TYPEN, SHIFT, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPEW)) { \
+ TYPEW nn = *(TYPEW *)(vn + HW(i)); \
+ TYPEW mm = *(TYPEW *)(vm + HW(i)); \
+ *(TYPEN *)(vd + HN(i + sizeof(TYPEN))) = OP(nn, mm, SHIFT); \
+ } \
+}
+
+#define DO_ADDHN(N, M, SH) ((N + M) >> SH)
+#define DO_RADDHN(N, M, SH) ((N + M + ((__typeof(N))1 << (SH - 1))) >> SH)
+#define DO_SUBHN(N, M, SH) ((N - M) >> SH)
+#define DO_RSUBHN(N, M, SH) ((N - M + ((__typeof(N))1 << (SH - 1))) >> SH)
+
+DO_BINOPNB(sve2_addhnb_h, uint16_t, uint8_t, 8, DO_ADDHN)
+DO_BINOPNB(sve2_addhnb_s, uint32_t, uint16_t, 16, DO_ADDHN)
+DO_BINOPNB(sve2_addhnb_d, uint64_t, uint32_t, 32, DO_ADDHN)
+
+DO_BINOPNT(sve2_addhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_ADDHN)
+DO_BINOPNT(sve2_addhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_ADDHN)
+DO_BINOPNT(sve2_addhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_ADDHN)
+
+DO_BINOPNB(sve2_raddhnb_h, uint16_t, uint8_t, 8, DO_RADDHN)
+DO_BINOPNB(sve2_raddhnb_s, uint32_t, uint16_t, 16, DO_RADDHN)
+DO_BINOPNB(sve2_raddhnb_d, uint64_t, uint32_t, 32, DO_RADDHN)
+
+DO_BINOPNT(sve2_raddhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_RADDHN)
+DO_BINOPNT(sve2_raddhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_RADDHN)
+DO_BINOPNT(sve2_raddhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_RADDHN)
+
+DO_BINOPNB(sve2_subhnb_h, uint16_t, uint8_t, 8, DO_SUBHN)
+DO_BINOPNB(sve2_subhnb_s, uint32_t, uint16_t, 16, DO_SUBHN)
+DO_BINOPNB(sve2_subhnb_d, uint64_t, uint32_t, 32, DO_SUBHN)
+
+DO_BINOPNT(sve2_subhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_SUBHN)
+DO_BINOPNT(sve2_subhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_SUBHN)
+DO_BINOPNT(sve2_subhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_SUBHN)
+
+DO_BINOPNB(sve2_rsubhnb_h, uint16_t, uint8_t, 8, DO_RSUBHN)
+DO_BINOPNB(sve2_rsubhnb_s, uint32_t, uint16_t, 16, DO_RSUBHN)
+DO_BINOPNB(sve2_rsubhnb_d, uint64_t, uint32_t, 32, DO_RSUBHN)
+
+DO_BINOPNT(sve2_rsubhnt_h, uint16_t, uint8_t, 8, H1_2, H1, DO_RSUBHN)
+DO_BINOPNT(sve2_rsubhnt_s, uint32_t, uint16_t, 16, H1_4, H1_2, DO_RSUBHN)
+DO_BINOPNT(sve2_rsubhnt_d, uint64_t, uint32_t, 32, H1_8, H1_4, DO_RSUBHN)
+
+#undef DO_RSUBHN
+#undef DO_SUBHN
+#undef DO_RADDHN
+#undef DO_ADDHN
+
+#undef DO_BINOPNB
+
+/* Fully general four-operand expander, controlled by a predicate.
+ */
+#define DO_ZPZZZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \
+ void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ TYPE mm = *(TYPE *)(vm + H(i)); \
+ TYPE aa = *(TYPE *)(va + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(aa, nn, mm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZZZ_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *va, void *vn, void *vm, \
+ void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *a = va, *n = vn, *m = vm; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE aa = a[i], nn = n[i], mm = m[i]; \
+ d[i] = OP(aa, nn, mm); \
+ } \
+ } \
+}
+
+#define DO_MLA(A, N, M) (A + N * M)
+#define DO_MLS(A, N, M) (A - N * M)
+
+DO_ZPZZZ(sve_mla_b, uint8_t, H1, DO_MLA)
+DO_ZPZZZ(sve_mls_b, uint8_t, H1, DO_MLS)
+
+DO_ZPZZZ(sve_mla_h, uint16_t, H1_2, DO_MLA)
+DO_ZPZZZ(sve_mls_h, uint16_t, H1_2, DO_MLS)
+
+DO_ZPZZZ(sve_mla_s, uint32_t, H1_4, DO_MLA)
+DO_ZPZZZ(sve_mls_s, uint32_t, H1_4, DO_MLS)
+
+DO_ZPZZZ_D(sve_mla_d, uint64_t, DO_MLA)
+DO_ZPZZZ_D(sve_mls_d, uint64_t, DO_MLS)
+
+#undef DO_MLA
+#undef DO_MLS
+#undef DO_ZPZZZ
+#undef DO_ZPZZZ_D
+
+void HELPER(sve_index_b)(void *vd, uint32_t start,
+ uint32_t incr, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint8_t *d = vd;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[H1(i)] = start + i * incr;
+ }
+}
+
+void HELPER(sve_index_h)(void *vd, uint32_t start,
+ uint32_t incr, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+ uint16_t *d = vd;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[H2(i)] = start + i * incr;
+ }
+}
+
+void HELPER(sve_index_s)(void *vd, uint32_t start,
+ uint32_t incr, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t *d = vd;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[H4(i)] = start + i * incr;
+ }
+}
+
+void HELPER(sve_index_d)(void *vd, uint64_t start,
+ uint64_t incr, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = start + i * incr;
+ }
+}
+
+void HELPER(sve_adr_p32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t sh = simd_data(desc);
+ uint32_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] + (m[i] << sh);
+ }
+}
+
+void HELPER(sve_adr_p64)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t sh = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] + (m[i] << sh);
+ }
+}
+
+void HELPER(sve_adr_s32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t sh = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] + ((uint64_t)(int32_t)m[i] << sh);
+ }
+}
+
+void HELPER(sve_adr_u32)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t sh = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = n[i] + ((uint64_t)(uint32_t)m[i] << sh);
+ }
+}
+
+void HELPER(sve_fexpa_h)(void *vd, void *vn, uint32_t desc)
+{
+ /* These constants are cut-and-paste directly from the ARM pseudocode. */
+ static const uint16_t coeff[] = {
+ 0x0000, 0x0016, 0x002d, 0x0045, 0x005d, 0x0075, 0x008e, 0x00a8,
+ 0x00c2, 0x00dc, 0x00f8, 0x0114, 0x0130, 0x014d, 0x016b, 0x0189,
+ 0x01a8, 0x01c8, 0x01e8, 0x0209, 0x022b, 0x024e, 0x0271, 0x0295,
+ 0x02ba, 0x02e0, 0x0306, 0x032e, 0x0356, 0x037f, 0x03a9, 0x03d4,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+ uint16_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz; i++) {
+ uint16_t nn = n[i];
+ intptr_t idx = extract32(nn, 0, 5);
+ uint16_t exp = extract32(nn, 5, 5);
+ d[i] = coeff[idx] | (exp << 10);
+ }
+}
+
+void HELPER(sve_fexpa_s)(void *vd, void *vn, uint32_t desc)
+{
+ /* These constants are cut-and-paste directly from the ARM pseudocode. */
+ static const uint32_t coeff[] = {
+ 0x000000, 0x0164d2, 0x02cd87, 0x043a29,
+ 0x05aac3, 0x071f62, 0x08980f, 0x0a14d5,
+ 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc,
+ 0x11c3d3, 0x135a2b, 0x14f4f0, 0x16942d,
+ 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda,
+ 0x1ef532, 0x20b051, 0x227043, 0x243516,
+ 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
+ 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4,
+ 0x3504f3, 0x36fd92, 0x38fbaf, 0x3aff5b,
+ 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd,
+ 0x45672a, 0x478d75, 0x49b9be, 0x4bec15,
+ 0x4e248c, 0x506334, 0x52a81e, 0x54f35b,
+ 0x5744fd, 0x599d16, 0x5bfbb8, 0x5e60f5,
+ 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
+ 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177,
+ 0x75257d, 0x77d0df, 0x7a83b3, 0x7d3e0c,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz; i++) {
+ uint32_t nn = n[i];
+ intptr_t idx = extract32(nn, 0, 6);
+ uint32_t exp = extract32(nn, 6, 8);
+ d[i] = coeff[idx] | (exp << 23);
+ }
+}
+
+void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
+{
+ /* These constants are cut-and-paste directly from the ARM pseudocode. */
+ static const uint64_t coeff[] = {
+ 0x0000000000000ull, 0x02C9A3E778061ull, 0x059B0D3158574ull,
+ 0x0874518759BC8ull, 0x0B5586CF9890Full, 0x0E3EC32D3D1A2ull,
+ 0x11301D0125B51ull, 0x1429AAEA92DE0ull, 0x172B83C7D517Bull,
+ 0x1A35BEB6FCB75ull, 0x1D4873168B9AAull, 0x2063B88628CD6ull,
+ 0x2387A6E756238ull, 0x26B4565E27CDDull, 0x29E9DF51FDEE1ull,
+ 0x2D285A6E4030Bull, 0x306FE0A31B715ull, 0x33C08B26416FFull,
+ 0x371A7373AA9CBull, 0x3A7DB34E59FF7ull, 0x3DEA64C123422ull,
+ 0x4160A21F72E2Aull, 0x44E086061892Dull, 0x486A2B5C13CD0ull,
+ 0x4BFDAD5362A27ull, 0x4F9B2769D2CA7ull, 0x5342B569D4F82ull,
+ 0x56F4736B527DAull, 0x5AB07DD485429ull, 0x5E76F15AD2148ull,
+ 0x6247EB03A5585ull, 0x6623882552225ull, 0x6A09E667F3BCDull,
+ 0x6DFB23C651A2Full, 0x71F75E8EC5F74ull, 0x75FEB564267C9ull,
+ 0x7A11473EB0187ull, 0x7E2F336CF4E62ull, 0x82589994CCE13ull,
+ 0x868D99B4492EDull, 0x8ACE5422AA0DBull, 0x8F1AE99157736ull,
+ 0x93737B0CDC5E5ull, 0x97D829FDE4E50ull, 0x9C49182A3F090ull,
+ 0xA0C667B5DE565ull, 0xA5503B23E255Dull, 0xA9E6B5579FDBFull,
+ 0xAE89F995AD3ADull, 0xB33A2B84F15FBull, 0xB7F76F2FB5E47ull,
+ 0xBCC1E904BC1D2ull, 0xC199BDD85529Cull, 0xC67F12E57D14Bull,
+ 0xCB720DCEF9069ull, 0xD072D4A07897Cull, 0xD5818DCFBA487ull,
+ 0xDA9E603DB3285ull, 0xDFC97337B9B5Full, 0xE502EE78B3FF6ull,
+ 0xEA4AFA2A490DAull, 0xEFA1BEE615A27ull, 0xF50765B6E4540ull,
+ 0xFA7C1819E90D8ull,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+
+ for (i = 0; i < opr_sz; i++) {
+ uint64_t nn = n[i];
+ intptr_t idx = extract32(nn, 0, 6);
+ uint64_t exp = extract32(nn, 6, 11);
+ d[i] = coeff[idx] | (exp << 52);
+ }
+}
+
+void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 2;
+ uint16_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ uint16_t nn = n[i];
+ uint16_t mm = m[i];
+ if (mm & 1) {
+ nn = float16_one;
+ }
+ d[i] = nn ^ (mm & 2) << 14;
+ }
+}
+
+void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ uint32_t nn = n[i];
+ uint32_t mm = m[i];
+ if (mm & 1) {
+ nn = float32_one;
+ }
+ d[i] = nn ^ (mm & 2) << 30;
+ }
+}
+
+void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ uint64_t mm = m[i];
+ if (mm & 1) {
+ nn = float64_one;
+ }
+ d[i] = nn ^ (mm & 2) << 62;
+ }
+}
+
+/*
+ * Signed saturating addition with scalar operand.
+ */
+
+void HELPER(sve_sqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(int8_t)) {
+ *(int8_t *)(d + i) = DO_SQADD_B(b, *(int8_t *)(a + i));
+ }
+}
+
+void HELPER(sve_sqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(int16_t)) {
+ *(int16_t *)(d + i) = DO_SQADD_H(b, *(int16_t *)(a + i));
+ }
+}
+
+void HELPER(sve_sqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(int32_t)) {
+ *(int32_t *)(d + i) = DO_SQADD_S(b, *(int32_t *)(a + i));
+ }
+}
+
+void HELPER(sve_sqaddi_d)(void *d, void *a, int64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(int64_t)) {
+ *(int64_t *)(d + i) = do_sqadd_d(b, *(int64_t *)(a + i));
+ }
+}
+
+/*
+ * Unsigned saturating addition with scalar operand.
+ */
+
+void HELPER(sve_uqaddi_b)(void *d, void *a, int32_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
+ *(uint8_t *)(d + i) = DO_UQADD_B(b, *(uint8_t *)(a + i));
+ }
+}
+
+void HELPER(sve_uqaddi_h)(void *d, void *a, int32_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
+ *(uint16_t *)(d + i) = DO_UQADD_H(b, *(uint16_t *)(a + i));
+ }
+}
+
+void HELPER(sve_uqaddi_s)(void *d, void *a, int64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
+ *(uint32_t *)(d + i) = DO_UQADD_S(b, *(uint32_t *)(a + i));
+ }
+}
+
+void HELPER(sve_uqaddi_d)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ *(uint64_t *)(d + i) = do_uqadd_d(b, *(uint64_t *)(a + i));
+ }
+}
+
+void HELPER(sve_uqsubi_d)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
+ *(uint64_t *)(d + i) = do_uqsub_d(*(uint64_t *)(a + i), b);
+ }
+}
+
+/* Two operand predicated copy immediate with merge. All valid immediates
+ * can fit within 17 signed bits in the simd_data field.
+ */
+void HELPER(sve_cpy_m_b)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ mm = dup_const(MO_8, mm);
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ uint64_t pp = expand_pred_b(pg[H1(i)]);
+ d[i] = (mm & pp) | (nn & ~pp);
+ }
+}
+
+void HELPER(sve_cpy_m_h)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ mm = dup_const(MO_16, mm);
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ uint64_t pp = expand_pred_h(pg[H1(i)]);
+ d[i] = (mm & pp) | (nn & ~pp);
+ }
+}
+
+void HELPER(sve_cpy_m_s)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ mm = dup_const(MO_32, mm);
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ uint64_t pp = expand_pred_s(pg[H1(i)]);
+ d[i] = (mm & pp) | (nn & ~pp);
+ }
+}
+
+void HELPER(sve_cpy_m_d)(void *vd, void *vn, void *vg,
+ uint64_t mm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i];
+ d[i] = (pg[H1(i)] & 1 ? mm : nn);
+ }
+}
+
+void HELPER(sve_cpy_z_b)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+
+ val = dup_const(MO_8, val);
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = val & expand_pred_b(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_cpy_z_h)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+
+ val = dup_const(MO_16, val);
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = val & expand_pred_h(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_cpy_z_s)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+
+ val = dup_const(MO_32, val);
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = val & expand_pred_s(pg[H1(i)]);
+ }
+}
+
+void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = (pg[H1(i)] & 1 ? val : 0);
+ }
+}
+
+/* Big-endian hosts need to frob the byte indices. If the copy
+ * happens to be 8-byte aligned, then no frobbing necessary.
+ */
+static void swap_memmove(void *vd, void *vs, size_t n)
+{
+ uintptr_t d = (uintptr_t)vd;
+ uintptr_t s = (uintptr_t)vs;
+ uintptr_t o = (d | s | n) & 7;
+ size_t i;
+
+#if !HOST_BIG_ENDIAN
+ o = 0;
+#endif
+ switch (o) {
+ case 0:
+ memmove(vd, vs, n);
+ break;
+
+ case 4:
+ if (d < s || d >= s + n) {
+ for (i = 0; i < n; i += 4) {
+ *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
+ }
+ } else {
+ for (i = n; i > 0; ) {
+ i -= 4;
+ *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
+ }
+ }
+ break;
+
+ case 2:
+ case 6:
+ if (d < s || d >= s + n) {
+ for (i = 0; i < n; i += 2) {
+ *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
+ }
+ } else {
+ for (i = n; i > 0; ) {
+ i -= 2;
+ *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
+ }
+ }
+ break;
+
+ default:
+ if (d < s || d >= s + n) {
+ for (i = 0; i < n; i++) {
+ *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
+ }
+ } else {
+ for (i = n; i > 0; ) {
+ i -= 1;
+ *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
+ }
+ }
+ break;
+ }
+}
+
+/* Similarly for memset of 0. */
+static void swap_memzero(void *vd, size_t n)
+{
+ uintptr_t d = (uintptr_t)vd;
+ uintptr_t o = (d | n) & 7;
+ size_t i;
+
+ /* Usually, the first bit of a predicate is set, so N is 0. */
+ if (likely(n == 0)) {
+ return;
+ }
+
+#if !HOST_BIG_ENDIAN
+ o = 0;
+#endif
+ switch (o) {
+ case 0:
+ memset(vd, 0, n);
+ break;
+
+ case 4:
+ for (i = 0; i < n; i += 4) {
+ *(uint32_t *)H1_4(d + i) = 0;
+ }
+ break;
+
+ case 2:
+ case 6:
+ for (i = 0; i < n; i += 2) {
+ *(uint16_t *)H1_2(d + i) = 0;
+ }
+ break;
+
+ default:
+ for (i = 0; i < n; i++) {
+ *(uint8_t *)H1(d + i) = 0;
+ }
+ break;
+ }
+}
+
+void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t opr_sz = simd_oprsz(desc);
+ size_t n_ofs = simd_data(desc);
+ size_t n_siz = opr_sz - n_ofs;
+
+ if (vd != vm) {
+ swap_memmove(vd, vn + n_ofs, n_siz);
+ swap_memmove(vd + n_siz, vm, n_ofs);
+ } else if (vd != vn) {
+ swap_memmove(vd + n_siz, vd, n_ofs);
+ swap_memmove(vd, vn + n_ofs, n_siz);
+ } else {
+ /* vd == vn == vm. Need temp space. */
+ ARMVectorReg tmp;
+ swap_memmove(&tmp, vm, n_ofs);
+ swap_memmove(vd, vd + n_ofs, n_siz);
+ memcpy(vd + n_siz, &tmp, n_ofs);
+ }
+}
+
+#define DO_INSR(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ swap_memmove(vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \
+ *(TYPE *)(vd + H(0)) = val; \
+}
+
+DO_INSR(sve_insr_b, uint8_t, H1)
+DO_INSR(sve_insr_h, uint16_t, H1_2)
+DO_INSR(sve_insr_s, uint32_t, H1_4)
+DO_INSR(sve_insr_d, uint64_t, H1_8)
+
+#undef DO_INSR
+
+void HELPER(sve_rev_b)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+ uint64_t f = *(uint64_t *)(vn + i);
+ uint64_t b = *(uint64_t *)(vn + j);
+ *(uint64_t *)(vd + i) = bswap64(b);
+ *(uint64_t *)(vd + j) = bswap64(f);
+ }
+}
+
+void HELPER(sve_rev_h)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+ uint64_t f = *(uint64_t *)(vn + i);
+ uint64_t b = *(uint64_t *)(vn + j);
+ *(uint64_t *)(vd + i) = hswap64(b);
+ *(uint64_t *)(vd + j) = hswap64(f);
+ }
+}
+
+void HELPER(sve_rev_s)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+ uint64_t f = *(uint64_t *)(vn + i);
+ uint64_t b = *(uint64_t *)(vn + j);
+ *(uint64_t *)(vd + i) = rol64(b, 32);
+ *(uint64_t *)(vd + j) = rol64(f, 32);
+ }
+}
+
+void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
+ uint64_t f = *(uint64_t *)(vn + i);
+ uint64_t b = *(uint64_t *)(vn + j);
+ *(uint64_t *)(vd + i) = b;
+ *(uint64_t *)(vd + j) = f;
+ }
+}
+
+typedef void tb_impl_fn(void *, void *, void *, void *, uintptr_t, bool);
+
+static inline void do_tbl1(void *vd, void *vn, void *vm, uint32_t desc,
+ bool is_tbx, tb_impl_fn *fn)
+{
+ ARMVectorReg scratch;
+ uintptr_t oprsz = simd_oprsz(desc);
+
+ if (unlikely(vd == vn)) {
+ vn = memcpy(&scratch, vn, oprsz);
+ }
+
+ fn(vd, vn, NULL, vm, oprsz, is_tbx);
+}
+
+static inline void do_tbl2(void *vd, void *vn0, void *vn1, void *vm,
+ uint32_t desc, bool is_tbx, tb_impl_fn *fn)
+{
+ ARMVectorReg scratch;
+ uintptr_t oprsz = simd_oprsz(desc);
+
+ if (unlikely(vd == vn0)) {
+ vn0 = memcpy(&scratch, vn0, oprsz);
+ if (vd == vn1) {
+ vn1 = vn0;
+ }
+ } else if (unlikely(vd == vn1)) {
+ vn1 = memcpy(&scratch, vn1, oprsz);
+ }
+
+ fn(vd, vn0, vn1, vm, oprsz, is_tbx);
+}
+
+#define DO_TB(SUFF, TYPE, H) \
+static inline void do_tb_##SUFF(void *vd, void *vt0, void *vt1, \
+ void *vm, uintptr_t oprsz, bool is_tbx) \
+{ \
+ TYPE *d = vd, *tbl0 = vt0, *tbl1 = vt1, *indexes = vm; \
+ uintptr_t i, nelem = oprsz / sizeof(TYPE); \
+ for (i = 0; i < nelem; ++i) { \
+ TYPE index = indexes[H1(i)], val = 0; \
+ if (index < nelem) { \
+ val = tbl0[H(index)]; \
+ } else { \
+ index -= nelem; \
+ if (tbl1 && index < nelem) { \
+ val = tbl1[H(index)]; \
+ } else if (is_tbx) { \
+ continue; \
+ } \
+ } \
+ d[H(i)] = val; \
+ } \
+} \
+void HELPER(sve_tbl_##SUFF)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ do_tbl1(vd, vn, vm, desc, false, do_tb_##SUFF); \
+} \
+void HELPER(sve2_tbl_##SUFF)(void *vd, void *vn0, void *vn1, \
+ void *vm, uint32_t desc) \
+{ \
+ do_tbl2(vd, vn0, vn1, vm, desc, false, do_tb_##SUFF); \
+} \
+void HELPER(sve2_tbx_##SUFF)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ do_tbl1(vd, vn, vm, desc, true, do_tb_##SUFF); \
+}
+
+DO_TB(b, uint8_t, H1)
+DO_TB(h, uint16_t, H2)
+DO_TB(s, uint32_t, H4)
+DO_TB(d, uint64_t, H8)
+
+#undef DO_TB
+
+#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPED *d = vd; \
+ TYPES *n = vn; \
+ ARMVectorReg tmp; \
+ if (unlikely(vn - vd < opr_sz)) { \
+ n = memcpy(&tmp, n, opr_sz / 2); \
+ } \
+ for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \
+ d[HD(i)] = n[HS(i)]; \
+ } \
+}
+
+DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
+DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
+DO_UNPK(sve_sunpk_d, int64_t, int32_t, H8, H4)
+
+DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1)
+DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2)
+DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, H8, H4)
+
+#undef DO_UNPK
+
+/* Mask of bits included in the even numbered predicates of width esz.
+ * We also use this for expand_bits/compress_bits, and so extend the
+ * same pattern out to 16-bit units.
+ */
+static const uint64_t even_bit_esz_masks[5] = {
+ 0x5555555555555555ull,
+ 0x3333333333333333ull,
+ 0x0f0f0f0f0f0f0f0full,
+ 0x00ff00ff00ff00ffull,
+ 0x0000ffff0000ffffull,
+};
+
+/* Zero-extend units of 2**N bits to units of 2**(N+1) bits.
+ * For N==0, this corresponds to the operation that in qemu/bitops.h
+ * we call half_shuffle64; this algorithm is from Hacker's Delight,
+ * section 7-2 Shuffling Bits.
+ */
+static uint64_t expand_bits(uint64_t x, int n)
+{
+ int i;
+
+ x &= 0xffffffffu;
+ for (i = 4; i >= n; i--) {
+ int sh = 1 << i;
+ x = ((x << sh) | x) & even_bit_esz_masks[i];
+ }
+ return x;
+}
+
+/* Compress units of 2**(N+1) bits to units of 2**N bits.
+ * For N==0, this corresponds to the operation that in qemu/bitops.h
+ * we call half_unshuffle64; this algorithm is from Hacker's Delight,
+ * section 7-2 Shuffling Bits, where it is called an inverse half shuffle.
+ */
+static uint64_t compress_bits(uint64_t x, int n)
+{
+ int i;
+
+ for (i = n; i <= 4; i++) {
+ int sh = 1 << i;
+ x &= even_bit_esz_masks[i];
+ x = (x >> sh) | x;
+ }
+ return x & 0xffffffffu;
+}
+
+void HELPER(sve_zip_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ intptr_t high = FIELD_EX32(pred_desc, PREDDESC, DATA);
+ int esize = 1 << esz;
+ uint64_t *d = vd;
+ intptr_t i;
+
+ if (oprsz <= 8) {
+ uint64_t nn = *(uint64_t *)vn;
+ uint64_t mm = *(uint64_t *)vm;
+ int half = 4 * oprsz;
+
+ nn = extract64(nn, high * half, half);
+ mm = extract64(mm, high * half, half);
+ nn = expand_bits(nn, esz);
+ mm = expand_bits(mm, esz);
+ d[0] = nn | (mm << esize);
+ } else {
+ ARMPredicateReg tmp;
+
+ /* We produce output faster than we consume input.
+ Therefore we must be mindful of possible overlap. */
+ if (vd == vn) {
+ vn = memcpy(&tmp, vn, oprsz);
+ if (vd == vm) {
+ vm = vn;
+ }
+ } else if (vd == vm) {
+ vm = memcpy(&tmp, vm, oprsz);
+ }
+ if (high) {
+ high = oprsz >> 1;
+ }
+
+ if ((oprsz & 7) == 0) {
+ uint32_t *n = vn, *m = vm;
+ high >>= 2;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ uint64_t nn = n[H4(high + i)];
+ uint64_t mm = m[H4(high + i)];
+
+ nn = expand_bits(nn, esz);
+ mm = expand_bits(mm, esz);
+ d[i] = nn | (mm << esize);
+ }
+ } else {
+ uint8_t *n = vn, *m = vm;
+ uint16_t *d16 = vd;
+
+ for (i = 0; i < oprsz / 2; i++) {
+ uint16_t nn = n[H1(high + i)];
+ uint16_t mm = m[H1(high + i)];
+
+ nn = expand_bits(nn, esz);
+ mm = expand_bits(mm, esz);
+ d16[H2(i)] = nn | (mm << esize);
+ }
+ }
+ }
+}
+
+void HELPER(sve_uzp_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ int odd = FIELD_EX32(pred_desc, PREDDESC, DATA) << esz;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t l, h;
+ intptr_t i;
+
+ if (oprsz <= 8) {
+ l = compress_bits(n[0] >> odd, esz);
+ h = compress_bits(m[0] >> odd, esz);
+ d[0] = l | (h << (4 * oprsz));
+ } else {
+ ARMPredicateReg tmp_m;
+ intptr_t oprsz_16 = oprsz / 16;
+
+ if ((vm - vd) < (uintptr_t)oprsz) {
+ m = memcpy(&tmp_m, vm, oprsz);
+ }
+
+ for (i = 0; i < oprsz_16; i++) {
+ l = n[2 * i + 0];
+ h = n[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ d[i] = l | (h << 32);
+ }
+
+ /*
+ * For VL which is not a multiple of 512, the results from M do not
+ * align nicely with the uint64_t for D. Put the aligned results
+ * from M into TMP_M and then copy it into place afterward.
+ */
+ if (oprsz & 15) {
+ int final_shift = (oprsz & 15) * 2;
+
+ l = n[2 * i + 0];
+ h = n[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ d[i] = l | (h << final_shift);
+
+ for (i = 0; i < oprsz_16; i++) {
+ l = m[2 * i + 0];
+ h = m[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ tmp_m.p[i] = l | (h << 32);
+ }
+ l = m[2 * i + 0];
+ h = m[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ tmp_m.p[i] = l | (h << final_shift);
+
+ swap_memmove(vd + oprsz / 2, &tmp_m, oprsz / 2);
+ } else {
+ for (i = 0; i < oprsz_16; i++) {
+ l = m[2 * i + 0];
+ h = m[2 * i + 1];
+ l = compress_bits(l >> odd, esz);
+ h = compress_bits(h >> odd, esz);
+ d[oprsz_16 + i] = l | (h << 32);
+ }
+ }
+ }
+}
+
+void HELPER(sve_trn_p)(void *vd, void *vn, void *vm, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ int odd = FIELD_EX32(pred_desc, PREDDESC, DATA);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t mask;
+ int shr, shl;
+ intptr_t i;
+
+ shl = 1 << esz;
+ shr = 0;
+ mask = even_bit_esz_masks[esz];
+ if (odd) {
+ mask <<= shl;
+ shr = shl;
+ shl = 0;
+ }
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); i++) {
+ uint64_t nn = (n[i] & mask) >> shr;
+ uint64_t mm = (m[i] & mask) << shl;
+ d[i] = nn + mm;
+ }
+}
+
+/* Reverse units of 2**N bits. */
+static uint64_t reverse_bits_64(uint64_t x, int n)
+{
+ int i, sh;
+
+ x = bswap64(x);
+ for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
+ uint64_t mask = even_bit_esz_masks[i];
+ x = ((x & mask) << sh) | ((x >> sh) & mask);
+ }
+ return x;
+}
+
+static uint8_t reverse_bits_8(uint8_t x, int n)
+{
+ static const uint8_t mask[3] = { 0x55, 0x33, 0x0f };
+ int i, sh;
+
+ for (i = 2, sh = 4; i >= n; i--, sh >>= 1) {
+ x = ((x & mask[i]) << sh) | ((x >> sh) & mask[i]);
+ }
+ return x;
+}
+
+void HELPER(sve_rev_p)(void *vd, void *vn, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ int esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ intptr_t i, oprsz_2 = oprsz / 2;
+
+ if (oprsz <= 8) {
+ uint64_t l = *(uint64_t *)vn;
+ l = reverse_bits_64(l << (64 - 8 * oprsz), esz);
+ *(uint64_t *)vd = l;
+ } else if ((oprsz & 15) == 0) {
+ for (i = 0; i < oprsz_2; i += 8) {
+ intptr_t ih = oprsz - 8 - i;
+ uint64_t l = reverse_bits_64(*(uint64_t *)(vn + i), esz);
+ uint64_t h = reverse_bits_64(*(uint64_t *)(vn + ih), esz);
+ *(uint64_t *)(vd + i) = h;
+ *(uint64_t *)(vd + ih) = l;
+ }
+ } else {
+ for (i = 0; i < oprsz_2; i += 1) {
+ intptr_t il = H1(i);
+ intptr_t ih = H1(oprsz - 1 - i);
+ uint8_t l = reverse_bits_8(*(uint8_t *)(vn + il), esz);
+ uint8_t h = reverse_bits_8(*(uint8_t *)(vn + ih), esz);
+ *(uint8_t *)(vd + il) = h;
+ *(uint8_t *)(vd + ih) = l;
+ }
+ }
+}
+
+void HELPER(sve_punpk_p)(void *vd, void *vn, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ intptr_t high = FIELD_EX32(pred_desc, PREDDESC, DATA);
+ uint64_t *d = vd;
+ intptr_t i;
+
+ if (oprsz <= 8) {
+ uint64_t nn = *(uint64_t *)vn;
+ int half = 4 * oprsz;
+
+ nn = extract64(nn, high * half, half);
+ nn = expand_bits(nn, 0);
+ d[0] = nn;
+ } else {
+ ARMPredicateReg tmp_n;
+
+ /* We produce output faster than we consume input.
+ Therefore we must be mindful of possible overlap. */
+ if ((vn - vd) < (uintptr_t)oprsz) {
+ vn = memcpy(&tmp_n, vn, oprsz);
+ }
+ if (high) {
+ high = oprsz >> 1;
+ }
+
+ if ((oprsz & 7) == 0) {
+ uint32_t *n = vn;
+ high >>= 2;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ uint64_t nn = n[H4(high + i)];
+ d[i] = expand_bits(nn, 0);
+ }
+ } else {
+ uint16_t *d16 = vd;
+ uint8_t *n = vn;
+
+ for (i = 0; i < oprsz / 2; i++) {
+ uint16_t nn = n[H1(high + i)];
+ d16[H2(i)] = expand_bits(nn, 0);
+ }
+ }
+ }
+}
+
+#define DO_ZIP(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t odd_ofs = simd_data(desc); \
+ intptr_t i, oprsz_2 = oprsz / 2; \
+ ARMVectorReg tmp_n, tmp_m; \
+ /* We produce output faster than we consume input. \
+ Therefore we must be mindful of possible overlap. */ \
+ if (unlikely((vn - vd) < (uintptr_t)oprsz)) { \
+ vn = memcpy(&tmp_n, vn, oprsz); \
+ } \
+ if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \
+ vm = memcpy(&tmp_m, vm, oprsz); \
+ } \
+ for (i = 0; i < oprsz_2; i += sizeof(TYPE)) { \
+ *(TYPE *)(vd + H(2 * i + 0)) = *(TYPE *)(vn + odd_ofs + H(i)); \
+ *(TYPE *)(vd + H(2 * i + sizeof(TYPE))) = \
+ *(TYPE *)(vm + odd_ofs + H(i)); \
+ } \
+ if (sizeof(TYPE) == 16 && unlikely(oprsz & 16)) { \
+ memset(vd + oprsz - 16, 0, 16); \
+ } \
+}
+
+DO_ZIP(sve_zip_b, uint8_t, H1)
+DO_ZIP(sve_zip_h, uint16_t, H1_2)
+DO_ZIP(sve_zip_s, uint32_t, H1_4)
+DO_ZIP(sve_zip_d, uint64_t, H1_8)
+DO_ZIP(sve2_zip_q, Int128, )
+
+#define DO_UZP(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t odd_ofs = simd_data(desc); \
+ intptr_t i, p; \
+ ARMVectorReg tmp_m; \
+ if (unlikely((vm - vd) < (uintptr_t)oprsz)) { \
+ vm = memcpy(&tmp_m, vm, oprsz); \
+ } \
+ i = 0, p = odd_ofs; \
+ do { \
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(vn + H(p)); \
+ i += sizeof(TYPE), p += 2 * sizeof(TYPE); \
+ } while (p < oprsz); \
+ p -= oprsz; \
+ do { \
+ *(TYPE *)(vd + H(i)) = *(TYPE *)(vm + H(p)); \
+ i += sizeof(TYPE), p += 2 * sizeof(TYPE); \
+ } while (p < oprsz); \
+ tcg_debug_assert(i == oprsz); \
+}
+
+DO_UZP(sve_uzp_b, uint8_t, H1)
+DO_UZP(sve_uzp_h, uint16_t, H1_2)
+DO_UZP(sve_uzp_s, uint32_t, H1_4)
+DO_UZP(sve_uzp_d, uint64_t, H1_8)
+DO_UZP(sve2_uzp_q, Int128, )
+
+#define DO_TRN(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t odd_ofs = simd_data(desc); \
+ intptr_t i; \
+ for (i = 0; i < oprsz; i += 2 * sizeof(TYPE)) { \
+ TYPE ae = *(TYPE *)(vn + H(i + odd_ofs)); \
+ TYPE be = *(TYPE *)(vm + H(i + odd_ofs)); \
+ *(TYPE *)(vd + H(i + 0)) = ae; \
+ *(TYPE *)(vd + H(i + sizeof(TYPE))) = be; \
+ } \
+ if (sizeof(TYPE) == 16 && unlikely(oprsz & 16)) { \
+ memset(vd + oprsz - 16, 0, 16); \
+ } \
+}
+
+DO_TRN(sve_trn_b, uint8_t, H1)
+DO_TRN(sve_trn_h, uint16_t, H1_2)
+DO_TRN(sve_trn_s, uint32_t, H1_4)
+DO_TRN(sve_trn_d, uint64_t, H1_8)
+DO_TRN(sve2_trn_q, Int128, )
+
+#undef DO_ZIP
+#undef DO_UZP
+#undef DO_TRN
+
+void HELPER(sve_compact_s)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc) / 4;
+ uint32_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = j = 0; i < opr_sz; i++) {
+ if (pg[H1(i / 2)] & (i & 1 ? 0x10 : 0x01)) {
+ d[H4(j)] = n[H4(i)];
+ j++;
+ }
+ }
+ for (; j < opr_sz; j++) {
+ d[H4(j)] = 0;
+ }
+}
+
+void HELPER(sve_compact_d)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn;
+ uint8_t *pg = vg;
+
+ for (i = j = 0; i < opr_sz; i++) {
+ if (pg[H1(i)] & 1) {
+ d[j] = n[i];
+ j++;
+ }
+ }
+ for (; j < opr_sz; j++) {
+ d[j] = 0;
+ }
+}
+
+/* Similar to the ARM LastActiveElement pseudocode function, except the
+ * result is multiplied by the element size. This includes the not found
+ * indication; e.g. not found for esz=3 is -8.
+ */
+int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc)
+{
+ intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+
+ return last_active_element(vg, words, esz);
+}
+
+void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc)
+{
+ intptr_t opr_sz = simd_oprsz(desc) / 8;
+ int esz = simd_data(desc);
+ uint64_t pg, first_g, last_g, len, mask = pred_esz_masks[esz];
+ intptr_t i, first_i, last_i;
+ ARMVectorReg tmp;
+
+ first_i = last_i = 0;
+ first_g = last_g = 0;
+
+ /* Find the extent of the active elements within VG. */
+ for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) {
+ pg = *(uint64_t *)(vg + i) & mask;
+ if (pg) {
+ if (last_g == 0) {
+ last_g = pg;
+ last_i = i;
+ }
+ first_g = pg;
+ first_i = i;
+ }
+ }
+
+ len = 0;
+ if (first_g != 0) {
+ first_i = first_i * 8 + ctz64(first_g);
+ last_i = last_i * 8 + 63 - clz64(last_g);
+ len = last_i - first_i + (1 << esz);
+ if (vd == vm) {
+ vm = memcpy(&tmp, vm, opr_sz * 8);
+ }
+ swap_memmove(vd, vn + first_i, len);
+ }
+ swap_memmove(vd + len, vm, opr_sz * 8 - len);
+}
+
+void HELPER(sve_sel_zpzz_b)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i], mm = m[i];
+ uint64_t pp = expand_pred_b(pg[H1(i)]);
+ d[i] = (nn & pp) | (mm & ~pp);
+ }
+}
+
+void HELPER(sve_sel_zpzz_h)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i], mm = m[i];
+ uint64_t pp = expand_pred_h(pg[H1(i)]);
+ d[i] = (nn & pp) | (mm & ~pp);
+ }
+}
+
+void HELPER(sve_sel_zpzz_s)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i], mm = m[i];
+ uint64_t pp = expand_pred_s(pg[H1(i)]);
+ d[i] = (nn & pp) | (mm & ~pp);
+ }
+}
+
+void HELPER(sve_sel_zpzz_d)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ uint64_t nn = n[i], mm = m[i];
+ d[i] = (pg[H1(i)] & 1 ? nn : mm);
+ }
+}
+
+void HELPER(sve_sel_zpzz_q)(void *vd, void *vn, void *vm,
+ void *vg, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 16;
+ Int128 *d = vd, *n = vn, *m = vm;
+ uint16_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i += 1) {
+ d[i] = (pg[H2(i)] & 1 ? n : m)[i];
+ }
+}
+
+/* Two operand comparison controlled by a predicate.
+ * ??? It is very tempting to want to be able to expand this inline
+ * with x86 instructions, e.g.
+ *
+ * vcmpeqw zm, zn, %ymm0
+ * vpmovmskb %ymm0, %eax
+ * and $0x5555, %eax
+ * and pg, %eax
+ *
+ * or even aarch64, e.g.
+ *
+ * // mask = 4000 1000 0400 0100 0040 0010 0004 0001
+ * cmeq v0.8h, zn, zm
+ * and v0.8h, v0.8h, mask
+ * addv h0, v0.8h
+ * and v0.8b, pg
+ *
+ * However, coming up with an abstraction that allows vector inputs and
+ * a scalar output, and also handles the byte-ordering of sub-uint64_t
+ * scalar outputs, is tricky.
+ */
+#define DO_CMP_PPZZ(NAME, TYPE, OP, H, MASK) \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ uint32_t flags = PREDTEST_INIT; \
+ intptr_t i = opr_sz; \
+ do { \
+ uint64_t out = 0, pg; \
+ do { \
+ i -= sizeof(TYPE), out <<= sizeof(TYPE); \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ TYPE mm = *(TYPE *)(vm + H(i)); \
+ out |= nn OP mm; \
+ } while (i & 63); \
+ pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
+ out &= pg; \
+ *(uint64_t *)(vd + (i >> 3)) = out; \
+ flags = iter_predtest_bwd(out, pg, flags); \
+ } while (i > 0); \
+ return flags; \
+}
+
+#define DO_CMP_PPZZ_B(NAME, TYPE, OP) \
+ DO_CMP_PPZZ(NAME, TYPE, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZZ_H(NAME, TYPE, OP) \
+ DO_CMP_PPZZ(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZZ_S(NAME, TYPE, OP) \
+ DO_CMP_PPZZ(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
+#define DO_CMP_PPZZ_D(NAME, TYPE, OP) \
+ DO_CMP_PPZZ(NAME, TYPE, OP, H1_8, 0x0101010101010101ull)
+
+DO_CMP_PPZZ_B(sve_cmpeq_ppzz_b, uint8_t, ==)
+DO_CMP_PPZZ_H(sve_cmpeq_ppzz_h, uint16_t, ==)
+DO_CMP_PPZZ_S(sve_cmpeq_ppzz_s, uint32_t, ==)
+DO_CMP_PPZZ_D(sve_cmpeq_ppzz_d, uint64_t, ==)
+
+DO_CMP_PPZZ_B(sve_cmpne_ppzz_b, uint8_t, !=)
+DO_CMP_PPZZ_H(sve_cmpne_ppzz_h, uint16_t, !=)
+DO_CMP_PPZZ_S(sve_cmpne_ppzz_s, uint32_t, !=)
+DO_CMP_PPZZ_D(sve_cmpne_ppzz_d, uint64_t, !=)
+
+DO_CMP_PPZZ_B(sve_cmpgt_ppzz_b, int8_t, >)
+DO_CMP_PPZZ_H(sve_cmpgt_ppzz_h, int16_t, >)
+DO_CMP_PPZZ_S(sve_cmpgt_ppzz_s, int32_t, >)
+DO_CMP_PPZZ_D(sve_cmpgt_ppzz_d, int64_t, >)
+
+DO_CMP_PPZZ_B(sve_cmpge_ppzz_b, int8_t, >=)
+DO_CMP_PPZZ_H(sve_cmpge_ppzz_h, int16_t, >=)
+DO_CMP_PPZZ_S(sve_cmpge_ppzz_s, int32_t, >=)
+DO_CMP_PPZZ_D(sve_cmpge_ppzz_d, int64_t, >=)
+
+DO_CMP_PPZZ_B(sve_cmphi_ppzz_b, uint8_t, >)
+DO_CMP_PPZZ_H(sve_cmphi_ppzz_h, uint16_t, >)
+DO_CMP_PPZZ_S(sve_cmphi_ppzz_s, uint32_t, >)
+DO_CMP_PPZZ_D(sve_cmphi_ppzz_d, uint64_t, >)
+
+DO_CMP_PPZZ_B(sve_cmphs_ppzz_b, uint8_t, >=)
+DO_CMP_PPZZ_H(sve_cmphs_ppzz_h, uint16_t, >=)
+DO_CMP_PPZZ_S(sve_cmphs_ppzz_s, uint32_t, >=)
+DO_CMP_PPZZ_D(sve_cmphs_ppzz_d, uint64_t, >=)
+
+#undef DO_CMP_PPZZ_B
+#undef DO_CMP_PPZZ_H
+#undef DO_CMP_PPZZ_S
+#undef DO_CMP_PPZZ_D
+#undef DO_CMP_PPZZ
+
+/* Similar, but the second source is "wide". */
+#define DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H, MASK) \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ uint32_t flags = PREDTEST_INIT; \
+ intptr_t i = opr_sz; \
+ do { \
+ uint64_t out = 0, pg; \
+ do { \
+ TYPEW mm = *(TYPEW *)(vm + i - 8); \
+ do { \
+ i -= sizeof(TYPE), out <<= sizeof(TYPE); \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ out |= nn OP mm; \
+ } while (i & 7); \
+ } while (i & 63); \
+ pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
+ out &= pg; \
+ *(uint64_t *)(vd + (i >> 3)) = out; \
+ flags = iter_predtest_bwd(out, pg, flags); \
+ } while (i > 0); \
+ return flags; \
+}
+
+#define DO_CMP_PPZW_B(NAME, TYPE, TYPEW, OP) \
+ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZW_H(NAME, TYPE, TYPEW, OP) \
+ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZW_S(NAME, TYPE, TYPEW, OP) \
+ DO_CMP_PPZW(NAME, TYPE, TYPEW, OP, H1_4, 0x1111111111111111ull)
+
+DO_CMP_PPZW_B(sve_cmpeq_ppzw_b, int8_t, uint64_t, ==)
+DO_CMP_PPZW_H(sve_cmpeq_ppzw_h, int16_t, uint64_t, ==)
+DO_CMP_PPZW_S(sve_cmpeq_ppzw_s, int32_t, uint64_t, ==)
+
+DO_CMP_PPZW_B(sve_cmpne_ppzw_b, int8_t, uint64_t, !=)
+DO_CMP_PPZW_H(sve_cmpne_ppzw_h, int16_t, uint64_t, !=)
+DO_CMP_PPZW_S(sve_cmpne_ppzw_s, int32_t, uint64_t, !=)
+
+DO_CMP_PPZW_B(sve_cmpgt_ppzw_b, int8_t, int64_t, >)
+DO_CMP_PPZW_H(sve_cmpgt_ppzw_h, int16_t, int64_t, >)
+DO_CMP_PPZW_S(sve_cmpgt_ppzw_s, int32_t, int64_t, >)
+
+DO_CMP_PPZW_B(sve_cmpge_ppzw_b, int8_t, int64_t, >=)
+DO_CMP_PPZW_H(sve_cmpge_ppzw_h, int16_t, int64_t, >=)
+DO_CMP_PPZW_S(sve_cmpge_ppzw_s, int32_t, int64_t, >=)
+
+DO_CMP_PPZW_B(sve_cmphi_ppzw_b, uint8_t, uint64_t, >)
+DO_CMP_PPZW_H(sve_cmphi_ppzw_h, uint16_t, uint64_t, >)
+DO_CMP_PPZW_S(sve_cmphi_ppzw_s, uint32_t, uint64_t, >)
+
+DO_CMP_PPZW_B(sve_cmphs_ppzw_b, uint8_t, uint64_t, >=)
+DO_CMP_PPZW_H(sve_cmphs_ppzw_h, uint16_t, uint64_t, >=)
+DO_CMP_PPZW_S(sve_cmphs_ppzw_s, uint32_t, uint64_t, >=)
+
+DO_CMP_PPZW_B(sve_cmplt_ppzw_b, int8_t, int64_t, <)
+DO_CMP_PPZW_H(sve_cmplt_ppzw_h, int16_t, int64_t, <)
+DO_CMP_PPZW_S(sve_cmplt_ppzw_s, int32_t, int64_t, <)
+
+DO_CMP_PPZW_B(sve_cmple_ppzw_b, int8_t, int64_t, <=)
+DO_CMP_PPZW_H(sve_cmple_ppzw_h, int16_t, int64_t, <=)
+DO_CMP_PPZW_S(sve_cmple_ppzw_s, int32_t, int64_t, <=)
+
+DO_CMP_PPZW_B(sve_cmplo_ppzw_b, uint8_t, uint64_t, <)
+DO_CMP_PPZW_H(sve_cmplo_ppzw_h, uint16_t, uint64_t, <)
+DO_CMP_PPZW_S(sve_cmplo_ppzw_s, uint32_t, uint64_t, <)
+
+DO_CMP_PPZW_B(sve_cmpls_ppzw_b, uint8_t, uint64_t, <=)
+DO_CMP_PPZW_H(sve_cmpls_ppzw_h, uint16_t, uint64_t, <=)
+DO_CMP_PPZW_S(sve_cmpls_ppzw_s, uint32_t, uint64_t, <=)
+
+#undef DO_CMP_PPZW_B
+#undef DO_CMP_PPZW_H
+#undef DO_CMP_PPZW_S
+#undef DO_CMP_PPZW
+
+/* Similar, but the second source is immediate. */
+#define DO_CMP_PPZI(NAME, TYPE, OP, H, MASK) \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vg, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ uint32_t flags = PREDTEST_INIT; \
+ TYPE mm = simd_data(desc); \
+ intptr_t i = opr_sz; \
+ do { \
+ uint64_t out = 0, pg; \
+ do { \
+ i -= sizeof(TYPE), out <<= sizeof(TYPE); \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ out |= nn OP mm; \
+ } while (i & 63); \
+ pg = *(uint64_t *)(vg + (i >> 3)) & MASK; \
+ out &= pg; \
+ *(uint64_t *)(vd + (i >> 3)) = out; \
+ flags = iter_predtest_bwd(out, pg, flags); \
+ } while (i > 0); \
+ return flags; \
+}
+
+#define DO_CMP_PPZI_B(NAME, TYPE, OP) \
+ DO_CMP_PPZI(NAME, TYPE, OP, H1, 0xffffffffffffffffull)
+#define DO_CMP_PPZI_H(NAME, TYPE, OP) \
+ DO_CMP_PPZI(NAME, TYPE, OP, H1_2, 0x5555555555555555ull)
+#define DO_CMP_PPZI_S(NAME, TYPE, OP) \
+ DO_CMP_PPZI(NAME, TYPE, OP, H1_4, 0x1111111111111111ull)
+#define DO_CMP_PPZI_D(NAME, TYPE, OP) \
+ DO_CMP_PPZI(NAME, TYPE, OP, H1_8, 0x0101010101010101ull)
+
+DO_CMP_PPZI_B(sve_cmpeq_ppzi_b, uint8_t, ==)
+DO_CMP_PPZI_H(sve_cmpeq_ppzi_h, uint16_t, ==)
+DO_CMP_PPZI_S(sve_cmpeq_ppzi_s, uint32_t, ==)
+DO_CMP_PPZI_D(sve_cmpeq_ppzi_d, uint64_t, ==)
+
+DO_CMP_PPZI_B(sve_cmpne_ppzi_b, uint8_t, !=)
+DO_CMP_PPZI_H(sve_cmpne_ppzi_h, uint16_t, !=)
+DO_CMP_PPZI_S(sve_cmpne_ppzi_s, uint32_t, !=)
+DO_CMP_PPZI_D(sve_cmpne_ppzi_d, uint64_t, !=)
+
+DO_CMP_PPZI_B(sve_cmpgt_ppzi_b, int8_t, >)
+DO_CMP_PPZI_H(sve_cmpgt_ppzi_h, int16_t, >)
+DO_CMP_PPZI_S(sve_cmpgt_ppzi_s, int32_t, >)
+DO_CMP_PPZI_D(sve_cmpgt_ppzi_d, int64_t, >)
+
+DO_CMP_PPZI_B(sve_cmpge_ppzi_b, int8_t, >=)
+DO_CMP_PPZI_H(sve_cmpge_ppzi_h, int16_t, >=)
+DO_CMP_PPZI_S(sve_cmpge_ppzi_s, int32_t, >=)
+DO_CMP_PPZI_D(sve_cmpge_ppzi_d, int64_t, >=)
+
+DO_CMP_PPZI_B(sve_cmphi_ppzi_b, uint8_t, >)
+DO_CMP_PPZI_H(sve_cmphi_ppzi_h, uint16_t, >)
+DO_CMP_PPZI_S(sve_cmphi_ppzi_s, uint32_t, >)
+DO_CMP_PPZI_D(sve_cmphi_ppzi_d, uint64_t, >)
+
+DO_CMP_PPZI_B(sve_cmphs_ppzi_b, uint8_t, >=)
+DO_CMP_PPZI_H(sve_cmphs_ppzi_h, uint16_t, >=)
+DO_CMP_PPZI_S(sve_cmphs_ppzi_s, uint32_t, >=)
+DO_CMP_PPZI_D(sve_cmphs_ppzi_d, uint64_t, >=)
+
+DO_CMP_PPZI_B(sve_cmplt_ppzi_b, int8_t, <)
+DO_CMP_PPZI_H(sve_cmplt_ppzi_h, int16_t, <)
+DO_CMP_PPZI_S(sve_cmplt_ppzi_s, int32_t, <)
+DO_CMP_PPZI_D(sve_cmplt_ppzi_d, int64_t, <)
+
+DO_CMP_PPZI_B(sve_cmple_ppzi_b, int8_t, <=)
+DO_CMP_PPZI_H(sve_cmple_ppzi_h, int16_t, <=)
+DO_CMP_PPZI_S(sve_cmple_ppzi_s, int32_t, <=)
+DO_CMP_PPZI_D(sve_cmple_ppzi_d, int64_t, <=)
+
+DO_CMP_PPZI_B(sve_cmplo_ppzi_b, uint8_t, <)
+DO_CMP_PPZI_H(sve_cmplo_ppzi_h, uint16_t, <)
+DO_CMP_PPZI_S(sve_cmplo_ppzi_s, uint32_t, <)
+DO_CMP_PPZI_D(sve_cmplo_ppzi_d, uint64_t, <)
+
+DO_CMP_PPZI_B(sve_cmpls_ppzi_b, uint8_t, <=)
+DO_CMP_PPZI_H(sve_cmpls_ppzi_h, uint16_t, <=)
+DO_CMP_PPZI_S(sve_cmpls_ppzi_s, uint32_t, <=)
+DO_CMP_PPZI_D(sve_cmpls_ppzi_d, uint64_t, <=)
+
+#undef DO_CMP_PPZI_B
+#undef DO_CMP_PPZI_H
+#undef DO_CMP_PPZI_S
+#undef DO_CMP_PPZI_D
+#undef DO_CMP_PPZI
+
+/* Similar to the ARM LastActive pseudocode function. */
+static bool last_active_pred(void *vd, void *vg, intptr_t oprsz)
+{
+ intptr_t i;
+
+ for (i = QEMU_ALIGN_UP(oprsz, 8) - 8; i >= 0; i -= 8) {
+ uint64_t pg = *(uint64_t *)(vg + i);
+ if (pg) {
+ return (pow2floor(pg) & *(uint64_t *)(vd + i)) != 0;
+ }
+ }
+ return 0;
+}
+
+/* Compute a mask into RETB that is true for all G, up to and including
+ * (if after) or excluding (if !after) the first G & N.
+ * Return true if BRK found.
+ */
+static bool compute_brk(uint64_t *retb, uint64_t n, uint64_t g,
+ bool brk, bool after)
+{
+ uint64_t b;
+
+ if (brk) {
+ b = 0;
+ } else if ((g & n) == 0) {
+ /* For all G, no N are set; break not found. */
+ b = g;
+ } else {
+ /* Break somewhere in N. Locate it. */
+ b = g & n; /* guard true, pred true */
+ b = b & -b; /* first such */
+ if (after) {
+ b = b | (b - 1); /* break after same */
+ } else {
+ b = b - 1; /* break before same */
+ }
+ brk = true;
+ }
+
+ *retb = b;
+ return brk;
+}
+
+/* Compute a zeroing BRK. */
+static void compute_brk_z(uint64_t *d, uint64_t *n, uint64_t *g,
+ intptr_t oprsz, bool after)
+{
+ bool brk = false;
+ intptr_t i;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+ uint64_t this_b, this_g = g[i];
+
+ brk = compute_brk(&this_b, n[i], this_g, brk, after);
+ d[i] = this_b & this_g;
+ }
+}
+
+/* Likewise, but also compute flags. */
+static uint32_t compute_brks_z(uint64_t *d, uint64_t *n, uint64_t *g,
+ intptr_t oprsz, bool after)
+{
+ uint32_t flags = PREDTEST_INIT;
+ bool brk = false;
+ intptr_t i;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+ uint64_t this_b, this_d, this_g = g[i];
+
+ brk = compute_brk(&this_b, n[i], this_g, brk, after);
+ d[i] = this_d = this_b & this_g;
+ flags = iter_predtest_fwd(this_d, this_g, flags);
+ }
+ return flags;
+}
+
+/* Compute a merging BRK. */
+static void compute_brk_m(uint64_t *d, uint64_t *n, uint64_t *g,
+ intptr_t oprsz, bool after)
+{
+ bool brk = false;
+ intptr_t i;
+
+ for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
+ uint64_t this_b, this_g = g[i];
+
+ brk = compute_brk(&this_b, n[i], this_g, brk, after);
+ d[i] = (this_b & this_g) | (d[i] & ~this_g);
+ }
+}
+
+/* Likewise, but also compute flags. */
+static uint32_t compute_brks_m(uint64_t *d, uint64_t *n, uint64_t *g,
+ intptr_t oprsz, bool after)
+{
+ uint32_t flags = PREDTEST_INIT;
+ bool brk = false;
+ intptr_t i;
+
+ for (i = 0; i < oprsz / 8; ++i) {
+ uint64_t this_b, this_d = d[i], this_g = g[i];
+
+ brk = compute_brk(&this_b, n[i], this_g, brk, after);
+ d[i] = this_d = (this_b & this_g) | (this_d & ~this_g);
+ flags = iter_predtest_fwd(this_d, this_g, flags);
+ }
+ return flags;
+}
+
+static uint32_t do_zero(ARMPredicateReg *d, intptr_t oprsz)
+{
+ /* It is quicker to zero the whole predicate than loop on OPRSZ.
+ * The compiler should turn this into 4 64-bit integer stores.
+ */
+ memset(d, 0, sizeof(ARMPredicateReg));
+ return PREDTEST_INIT;
+}
+
+void HELPER(sve_brkpa)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ if (last_active_pred(vn, vg, oprsz)) {
+ compute_brk_z(vd, vm, vg, oprsz, true);
+ } else {
+ do_zero(vd, oprsz);
+ }
+}
+
+uint32_t HELPER(sve_brkpas)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ if (last_active_pred(vn, vg, oprsz)) {
+ return compute_brks_z(vd, vm, vg, oprsz, true);
+ } else {
+ return do_zero(vd, oprsz);
+ }
+}
+
+void HELPER(sve_brkpb)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ if (last_active_pred(vn, vg, oprsz)) {
+ compute_brk_z(vd, vm, vg, oprsz, false);
+ } else {
+ do_zero(vd, oprsz);
+ }
+}
+
+uint32_t HELPER(sve_brkpbs)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ if (last_active_pred(vn, vg, oprsz)) {
+ return compute_brks_z(vd, vm, vg, oprsz, false);
+ } else {
+ return do_zero(vd, oprsz);
+ }
+}
+
+void HELPER(sve_brka_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ compute_brk_z(vd, vn, vg, oprsz, true);
+}
+
+uint32_t HELPER(sve_brkas_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ return compute_brks_z(vd, vn, vg, oprsz, true);
+}
+
+void HELPER(sve_brkb_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ compute_brk_z(vd, vn, vg, oprsz, false);
+}
+
+uint32_t HELPER(sve_brkbs_z)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ return compute_brks_z(vd, vn, vg, oprsz, false);
+}
+
+void HELPER(sve_brka_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ compute_brk_m(vd, vn, vg, oprsz, true);
+}
+
+uint32_t HELPER(sve_brkas_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ return compute_brks_m(vd, vn, vg, oprsz, true);
+}
+
+void HELPER(sve_brkb_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ compute_brk_m(vd, vn, vg, oprsz, false);
+}
+
+uint32_t HELPER(sve_brkbs_m)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ return compute_brks_m(vd, vn, vg, oprsz, false);
+}
+
+void HELPER(sve_brkn)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ if (!last_active_pred(vn, vg, oprsz)) {
+ do_zero(vd, oprsz);
+ }
+}
+
+/* As if PredTest(Ones(PL), D, esz). */
+static uint32_t predtest_ones(ARMPredicateReg *d, intptr_t oprsz,
+ uint64_t esz_mask)
+{
+ uint32_t flags = PREDTEST_INIT;
+ intptr_t i;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ flags = iter_predtest_fwd(d->p[i], esz_mask, flags);
+ }
+ if (oprsz & 7) {
+ uint64_t mask = ~(-1ULL << (8 * (oprsz & 7)));
+ flags = iter_predtest_fwd(d->p[i], esz_mask & mask, flags);
+ }
+ return flags;
+}
+
+uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ if (last_active_pred(vn, vg, oprsz)) {
+ return predtest_ones(vd, oprsz, -1);
+ } else {
+ return do_zero(vd, oprsz);
+ }
+}
+
+uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
+{
+ intptr_t words = DIV_ROUND_UP(FIELD_EX32(pred_desc, PREDDESC, OPRSZ), 8);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz];
+ intptr_t i;
+
+ for (i = 0; i < words; ++i) {
+ uint64_t t = n[i] & g[i] & mask;
+ sum += ctpop64(t);
+ }
+ return sum;
+}
+
+uint32_t HELPER(sve_whilel)(void *vd, uint32_t count, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ uint64_t esz_mask = pred_esz_masks[esz];
+ ARMPredicateReg *d = vd;
+ uint32_t flags;
+ intptr_t i;
+
+ /* Begin with a zero predicate register. */
+ flags = do_zero(d, oprsz);
+ if (count == 0) {
+ return flags;
+ }
+
+ /* Set all of the requested bits. */
+ for (i = 0; i < count / 64; ++i) {
+ d->p[i] = esz_mask;
+ }
+ if (count & 63) {
+ d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask;
+ }
+
+ return predtest_ones(d, oprsz, esz_mask);
+}
+
+uint32_t HELPER(sve_whileg)(void *vd, uint32_t count, uint32_t pred_desc)
+{
+ intptr_t oprsz = FIELD_EX32(pred_desc, PREDDESC, OPRSZ);
+ intptr_t esz = FIELD_EX32(pred_desc, PREDDESC, ESZ);
+ uint64_t esz_mask = pred_esz_masks[esz];
+ ARMPredicateReg *d = vd;
+ intptr_t i, invcount, oprbits;
+ uint64_t bits;
+
+ if (count == 0) {
+ return do_zero(d, oprsz);
+ }
+
+ oprbits = oprsz * 8;
+ tcg_debug_assert(count <= oprbits);
+
+ bits = esz_mask;
+ if (oprbits & 63) {
+ bits &= MAKE_64BIT_MASK(0, oprbits & 63);
+ }
+
+ invcount = oprbits - count;
+ for (i = (oprsz - 1) / 8; i > invcount / 64; --i) {
+ d->p[i] = bits;
+ bits = esz_mask;
+ }
+
+ d->p[i] = bits & MAKE_64BIT_MASK(invcount & 63, 64);
+
+ while (--i >= 0) {
+ d->p[i] = 0;
+ }
+
+ return predtest_ones(d, oprsz, esz_mask);
+}
+
+/* Recursive reduction on a function;
+ * C.f. the ARM ARM function ReducePredicated.
+ *
+ * While it would be possible to write this without the DATA temporary,
+ * it is much simpler to process the predicate register this way.
+ * The recursion is bounded to depth 7 (128 fp16 elements), so there's
+ * little to gain with a more complex non-recursive form.
+ */
+#define DO_REDUCE(NAME, TYPE, H, FUNC, IDENT) \
+static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
+{ \
+ if (n == 1) { \
+ return *data; \
+ } else { \
+ uintptr_t half = n / 2; \
+ TYPE lo = NAME##_reduce(data, status, half); \
+ TYPE hi = NAME##_reduce(data + half, status, half); \
+ return TYPE##_##FUNC(lo, hi, status); \
+ } \
+} \
+uint64_t HELPER(NAME)(void *vn, void *vg, void *vs, uint32_t desc) \
+{ \
+ uintptr_t i, oprsz = simd_oprsz(desc), maxsz = simd_data(desc); \
+ TYPE data[sizeof(ARMVectorReg) / sizeof(TYPE)]; \
+ for (i = 0; i < oprsz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)((void *)data + i) = (pg & 1 ? nn : IDENT); \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+ for (; i < maxsz; i += sizeof(TYPE)) { \
+ *(TYPE *)((void *)data + i) = IDENT; \
+ } \
+ return NAME##_reduce(data, vs, maxsz / sizeof(TYPE)); \
+}
+
+DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
+DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
+DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero)
+
+/* Identity is floatN_default_nan, without the function call. */
+DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
+DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
+DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL)
+
+DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
+DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
+DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL)
+
+DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
+DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
+DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity)
+
+DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
+DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
+DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity))
+
+#undef DO_REDUCE
+
+uint64_t HELPER(sve_fadda_h)(uint64_t nn, void *vm, void *vg,
+ void *status, uint32_t desc)
+{
+ intptr_t i = 0, opr_sz = simd_oprsz(desc);
+ float16 result = nn;
+
+ do {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ float16 mm = *(float16 *)(vm + H1_2(i));
+ result = float16_add(result, mm, status);
+ }
+ i += sizeof(float16), pg >>= sizeof(float16);
+ } while (i & 15);
+ } while (i < opr_sz);
+
+ return result;
+}
+
+uint64_t HELPER(sve_fadda_s)(uint64_t nn, void *vm, void *vg,
+ void *status, uint32_t desc)
+{
+ intptr_t i = 0, opr_sz = simd_oprsz(desc);
+ float32 result = nn;
+
+ do {
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+ do {
+ if (pg & 1) {
+ float32 mm = *(float32 *)(vm + H1_2(i));
+ result = float32_add(result, mm, status);
+ }
+ i += sizeof(float32), pg >>= sizeof(float32);
+ } while (i & 15);
+ } while (i < opr_sz);
+
+ return result;
+}
+
+uint64_t HELPER(sve_fadda_d)(uint64_t nn, void *vm, void *vg,
+ void *status, uint32_t desc)
+{
+ intptr_t i = 0, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *m = vm;
+ uint8_t *pg = vg;
+
+ for (i = 0; i < opr_sz; i++) {
+ if (pg[H1(i)] & 1) {
+ nn = float64_add(nn, m[i], status);
+ }
+ }
+
+ return nn;
+}
+
+/* Fully general three-operand expander, controlled by a predicate,
+ * With the extra float_status parameter.
+ */
+#define DO_ZPZZ_FP(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
+ void *status, uint32_t desc) \
+{ \
+ intptr_t i = simd_oprsz(desc); \
+ uint64_t *g = vg; \
+ do { \
+ uint64_t pg = g[(i - 1) >> 6]; \
+ do { \
+ i -= sizeof(TYPE); \
+ if (likely((pg >> (i & 63)) & 1)) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ TYPE mm = *(TYPE *)(vm + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm, status); \
+ } \
+ } while (i & 63); \
+ } while (i != 0); \
+}
+
+DO_ZPZZ_FP(sve_fadd_h, uint16_t, H1_2, float16_add)
+DO_ZPZZ_FP(sve_fadd_s, uint32_t, H1_4, float32_add)
+DO_ZPZZ_FP(sve_fadd_d, uint64_t, H1_8, float64_add)
+
+DO_ZPZZ_FP(sve_fsub_h, uint16_t, H1_2, float16_sub)
+DO_ZPZZ_FP(sve_fsub_s, uint32_t, H1_4, float32_sub)
+DO_ZPZZ_FP(sve_fsub_d, uint64_t, H1_8, float64_sub)
+
+DO_ZPZZ_FP(sve_fmul_h, uint16_t, H1_2, float16_mul)
+DO_ZPZZ_FP(sve_fmul_s, uint32_t, H1_4, float32_mul)
+DO_ZPZZ_FP(sve_fmul_d, uint64_t, H1_8, float64_mul)
+
+DO_ZPZZ_FP(sve_fdiv_h, uint16_t, H1_2, float16_div)
+DO_ZPZZ_FP(sve_fdiv_s, uint32_t, H1_4, float32_div)
+DO_ZPZZ_FP(sve_fdiv_d, uint64_t, H1_8, float64_div)
+
+DO_ZPZZ_FP(sve_fmin_h, uint16_t, H1_2, float16_min)
+DO_ZPZZ_FP(sve_fmin_s, uint32_t, H1_4, float32_min)
+DO_ZPZZ_FP(sve_fmin_d, uint64_t, H1_8, float64_min)
+
+DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
+DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
+DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
+
+DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
+DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
+DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
+
+DO_ZPZZ_FP(sve_fmaxnum_h, uint16_t, H1_2, float16_maxnum)
+DO_ZPZZ_FP(sve_fmaxnum_s, uint32_t, H1_4, float32_maxnum)
+DO_ZPZZ_FP(sve_fmaxnum_d, uint64_t, H1_8, float64_maxnum)
+
+static inline float16 abd_h(float16 a, float16 b, float_status *s)
+{
+ return float16_abs(float16_sub(a, b, s));
+}
+
+static inline float32 abd_s(float32 a, float32 b, float_status *s)
+{
+ return float32_abs(float32_sub(a, b, s));
+}
+
+static inline float64 abd_d(float64 a, float64 b, float_status *s)
+{
+ return float64_abs(float64_sub(a, b, s));
+}
+
+DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
+DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
+DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
+
+static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
+{
+ int b_int = MIN(MAX(b, INT_MIN), INT_MAX);
+ return float64_scalbn(a, b_int, s);
+}
+
+DO_ZPZZ_FP(sve_fscalbn_h, int16_t, H1_2, float16_scalbn)
+DO_ZPZZ_FP(sve_fscalbn_s, int32_t, H1_4, float32_scalbn)
+DO_ZPZZ_FP(sve_fscalbn_d, int64_t, H1_8, scalbn_d)
+
+DO_ZPZZ_FP(sve_fmulx_h, uint16_t, H1_2, helper_advsimd_mulxh)
+DO_ZPZZ_FP(sve_fmulx_s, uint32_t, H1_4, helper_vfp_mulxs)
+DO_ZPZZ_FP(sve_fmulx_d, uint64_t, H1_8, helper_vfp_mulxd)
+
+#undef DO_ZPZZ_FP
+
+/* Three-operand expander, with one scalar operand, controlled by
+ * a predicate, with the extra float_status parameter.
+ */
+#define DO_ZPZS_FP(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, uint64_t scalar, \
+ void *status, uint32_t desc) \
+{ \
+ intptr_t i = simd_oprsz(desc); \
+ uint64_t *g = vg; \
+ TYPE mm = scalar; \
+ do { \
+ uint64_t pg = g[(i - 1) >> 6]; \
+ do { \
+ i -= sizeof(TYPE); \
+ if (likely((pg >> (i & 63)) & 1)) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm, status); \
+ } \
+ } while (i & 63); \
+ } while (i != 0); \
+}
+
+DO_ZPZS_FP(sve_fadds_h, float16, H1_2, float16_add)
+DO_ZPZS_FP(sve_fadds_s, float32, H1_4, float32_add)
+DO_ZPZS_FP(sve_fadds_d, float64, H1_8, float64_add)
+
+DO_ZPZS_FP(sve_fsubs_h, float16, H1_2, float16_sub)
+DO_ZPZS_FP(sve_fsubs_s, float32, H1_4, float32_sub)
+DO_ZPZS_FP(sve_fsubs_d, float64, H1_8, float64_sub)
+
+DO_ZPZS_FP(sve_fmuls_h, float16, H1_2, float16_mul)
+DO_ZPZS_FP(sve_fmuls_s, float32, H1_4, float32_mul)
+DO_ZPZS_FP(sve_fmuls_d, float64, H1_8, float64_mul)
+
+static inline float16 subr_h(float16 a, float16 b, float_status *s)
+{
+ return float16_sub(b, a, s);
+}
+
+static inline float32 subr_s(float32 a, float32 b, float_status *s)
+{
+ return float32_sub(b, a, s);
+}
+
+static inline float64 subr_d(float64 a, float64 b, float_status *s)
+{
+ return float64_sub(b, a, s);
+}
+
+DO_ZPZS_FP(sve_fsubrs_h, float16, H1_2, subr_h)
+DO_ZPZS_FP(sve_fsubrs_s, float32, H1_4, subr_s)
+DO_ZPZS_FP(sve_fsubrs_d, float64, H1_8, subr_d)
+
+DO_ZPZS_FP(sve_fmaxnms_h, float16, H1_2, float16_maxnum)
+DO_ZPZS_FP(sve_fmaxnms_s, float32, H1_4, float32_maxnum)
+DO_ZPZS_FP(sve_fmaxnms_d, float64, H1_8, float64_maxnum)
+
+DO_ZPZS_FP(sve_fminnms_h, float16, H1_2, float16_minnum)
+DO_ZPZS_FP(sve_fminnms_s, float32, H1_4, float32_minnum)
+DO_ZPZS_FP(sve_fminnms_d, float64, H1_8, float64_minnum)
+
+DO_ZPZS_FP(sve_fmaxs_h, float16, H1_2, float16_max)
+DO_ZPZS_FP(sve_fmaxs_s, float32, H1_4, float32_max)
+DO_ZPZS_FP(sve_fmaxs_d, float64, H1_8, float64_max)
+
+DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
+DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
+DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
+
+/* Fully general two-operand expander, controlled by a predicate,
+ * With the extra float_status parameter.
+ */
+#define DO_ZPZ_FP(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+{ \
+ intptr_t i = simd_oprsz(desc); \
+ uint64_t *g = vg; \
+ do { \
+ uint64_t pg = g[(i - 1) >> 6]; \
+ do { \
+ i -= sizeof(TYPE); \
+ if (likely((pg >> (i & 63)) & 1)) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, status); \
+ } \
+ } while (i & 63); \
+ } while (i != 0); \
+}
+
+/* SVE fp16 conversions always use IEEE mode. Like AdvSIMD, they ignore
+ * FZ16. When converting from fp16, this affects flushing input denormals;
+ * when converting to fp16, this affects flushing output denormals.
+ */
+static inline float32 sve_f16_to_f32(float16 f, float_status *fpst)
+{
+ bool save = get_flush_inputs_to_zero(fpst);
+ float32 ret;
+
+ set_flush_inputs_to_zero(false, fpst);
+ ret = float16_to_float32(f, true, fpst);
+ set_flush_inputs_to_zero(save, fpst);
+ return ret;
+}
+
+static inline float64 sve_f16_to_f64(float16 f, float_status *fpst)
+{
+ bool save = get_flush_inputs_to_zero(fpst);
+ float64 ret;
+
+ set_flush_inputs_to_zero(false, fpst);
+ ret = float16_to_float64(f, true, fpst);
+ set_flush_inputs_to_zero(save, fpst);
+ return ret;
+}
+
+static inline float16 sve_f32_to_f16(float32 f, float_status *fpst)
+{
+ bool save = get_flush_to_zero(fpst);
+ float16 ret;
+
+ set_flush_to_zero(false, fpst);
+ ret = float32_to_float16(f, true, fpst);
+ set_flush_to_zero(save, fpst);
+ return ret;
+}
+
+static inline float16 sve_f64_to_f16(float64 f, float_status *fpst)
+{
+ bool save = get_flush_to_zero(fpst);
+ float16 ret;
+
+ set_flush_to_zero(false, fpst);
+ ret = float64_to_float16(f, true, fpst);
+ set_flush_to_zero(save, fpst);
+ return ret;
+}
+
+static inline int16_t vfp_float16_to_int16_rtz(float16 f, float_status *s)
+{
+ if (float16_is_any_nan(f)) {
+ float_raise(float_flag_invalid, s);
+ return 0;
+ }
+ return float16_to_int16_round_to_zero(f, s);
+}
+
+static inline int64_t vfp_float16_to_int64_rtz(float16 f, float_status *s)
+{
+ if (float16_is_any_nan(f)) {
+ float_raise(float_flag_invalid, s);
+ return 0;
+ }
+ return float16_to_int64_round_to_zero(f, s);
+}
+
+static inline int64_t vfp_float32_to_int64_rtz(float32 f, float_status *s)
+{
+ if (float32_is_any_nan(f)) {
+ float_raise(float_flag_invalid, s);
+ return 0;
+ }
+ return float32_to_int64_round_to_zero(f, s);
+}
+
+static inline int64_t vfp_float64_to_int64_rtz(float64 f, float_status *s)
+{
+ if (float64_is_any_nan(f)) {
+ float_raise(float_flag_invalid, s);
+ return 0;
+ }
+ return float64_to_int64_round_to_zero(f, s);
+}
+
+static inline uint16_t vfp_float16_to_uint16_rtz(float16 f, float_status *s)
+{
+ if (float16_is_any_nan(f)) {
+ float_raise(float_flag_invalid, s);
+ return 0;
+ }
+ return float16_to_uint16_round_to_zero(f, s);
+}
+
+static inline uint64_t vfp_float16_to_uint64_rtz(float16 f, float_status *s)
+{
+ if (float16_is_any_nan(f)) {
+ float_raise(float_flag_invalid, s);
+ return 0;
+ }
+ return float16_to_uint64_round_to_zero(f, s);
+}
+
+static inline uint64_t vfp_float32_to_uint64_rtz(float32 f, float_status *s)
+{
+ if (float32_is_any_nan(f)) {
+ float_raise(float_flag_invalid, s);
+ return 0;
+ }
+ return float32_to_uint64_round_to_zero(f, s);
+}
+
+static inline uint64_t vfp_float64_to_uint64_rtz(float64 f, float_status *s)
+{
+ if (float64_is_any_nan(f)) {
+ float_raise(float_flag_invalid, s);
+ return 0;
+ }
+ return float64_to_uint64_round_to_zero(f, s);
+}
+
+DO_ZPZ_FP(sve_fcvt_sh, uint32_t, H1_4, sve_f32_to_f16)
+DO_ZPZ_FP(sve_fcvt_hs, uint32_t, H1_4, sve_f16_to_f32)
+DO_ZPZ_FP(sve_bfcvt, uint32_t, H1_4, float32_to_bfloat16)
+DO_ZPZ_FP(sve_fcvt_dh, uint64_t, H1_8, sve_f64_to_f16)
+DO_ZPZ_FP(sve_fcvt_hd, uint64_t, H1_8, sve_f16_to_f64)
+DO_ZPZ_FP(sve_fcvt_ds, uint64_t, H1_8, float64_to_float32)
+DO_ZPZ_FP(sve_fcvt_sd, uint64_t, H1_8, float32_to_float64)
+
+DO_ZPZ_FP(sve_fcvtzs_hh, uint16_t, H1_2, vfp_float16_to_int16_rtz)
+DO_ZPZ_FP(sve_fcvtzs_hs, uint32_t, H1_4, helper_vfp_tosizh)
+DO_ZPZ_FP(sve_fcvtzs_ss, uint32_t, H1_4, helper_vfp_tosizs)
+DO_ZPZ_FP(sve_fcvtzs_hd, uint64_t, H1_8, vfp_float16_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_sd, uint64_t, H1_8, vfp_float32_to_int64_rtz)
+DO_ZPZ_FP(sve_fcvtzs_ds, uint64_t, H1_8, helper_vfp_tosizd)
+DO_ZPZ_FP(sve_fcvtzs_dd, uint64_t, H1_8, vfp_float64_to_int64_rtz)
+
+DO_ZPZ_FP(sve_fcvtzu_hh, uint16_t, H1_2, vfp_float16_to_uint16_rtz)
+DO_ZPZ_FP(sve_fcvtzu_hs, uint32_t, H1_4, helper_vfp_touizh)
+DO_ZPZ_FP(sve_fcvtzu_ss, uint32_t, H1_4, helper_vfp_touizs)
+DO_ZPZ_FP(sve_fcvtzu_hd, uint64_t, H1_8, vfp_float16_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_sd, uint64_t, H1_8, vfp_float32_to_uint64_rtz)
+DO_ZPZ_FP(sve_fcvtzu_ds, uint64_t, H1_8, helper_vfp_touizd)
+DO_ZPZ_FP(sve_fcvtzu_dd, uint64_t, H1_8, vfp_float64_to_uint64_rtz)
+
+DO_ZPZ_FP(sve_frint_h, uint16_t, H1_2, helper_advsimd_rinth)
+DO_ZPZ_FP(sve_frint_s, uint32_t, H1_4, helper_rints)
+DO_ZPZ_FP(sve_frint_d, uint64_t, H1_8, helper_rintd)
+
+DO_ZPZ_FP(sve_frintx_h, uint16_t, H1_2, float16_round_to_int)
+DO_ZPZ_FP(sve_frintx_s, uint32_t, H1_4, float32_round_to_int)
+DO_ZPZ_FP(sve_frintx_d, uint64_t, H1_8, float64_round_to_int)
+
+DO_ZPZ_FP(sve_frecpx_h, uint16_t, H1_2, helper_frecpx_f16)
+DO_ZPZ_FP(sve_frecpx_s, uint32_t, H1_4, helper_frecpx_f32)
+DO_ZPZ_FP(sve_frecpx_d, uint64_t, H1_8, helper_frecpx_f64)
+
+DO_ZPZ_FP(sve_fsqrt_h, uint16_t, H1_2, float16_sqrt)
+DO_ZPZ_FP(sve_fsqrt_s, uint32_t, H1_4, float32_sqrt)
+DO_ZPZ_FP(sve_fsqrt_d, uint64_t, H1_8, float64_sqrt)
+
+DO_ZPZ_FP(sve_scvt_hh, uint16_t, H1_2, int16_to_float16)
+DO_ZPZ_FP(sve_scvt_sh, uint32_t, H1_4, int32_to_float16)
+DO_ZPZ_FP(sve_scvt_ss, uint32_t, H1_4, int32_to_float32)
+DO_ZPZ_FP(sve_scvt_sd, uint64_t, H1_8, int32_to_float64)
+DO_ZPZ_FP(sve_scvt_dh, uint64_t, H1_8, int64_to_float16)
+DO_ZPZ_FP(sve_scvt_ds, uint64_t, H1_8, int64_to_float32)
+DO_ZPZ_FP(sve_scvt_dd, uint64_t, H1_8, int64_to_float64)
+
+DO_ZPZ_FP(sve_ucvt_hh, uint16_t, H1_2, uint16_to_float16)
+DO_ZPZ_FP(sve_ucvt_sh, uint32_t, H1_4, uint32_to_float16)
+DO_ZPZ_FP(sve_ucvt_ss, uint32_t, H1_4, uint32_to_float32)
+DO_ZPZ_FP(sve_ucvt_sd, uint64_t, H1_8, uint32_to_float64)
+DO_ZPZ_FP(sve_ucvt_dh, uint64_t, H1_8, uint64_to_float16)
+DO_ZPZ_FP(sve_ucvt_ds, uint64_t, H1_8, uint64_to_float32)
+DO_ZPZ_FP(sve_ucvt_dd, uint64_t, H1_8, uint64_to_float64)
+
+static int16_t do_float16_logb_as_int(float16 a, float_status *s)
+{
+ /* Extract frac to the top of the uint32_t. */
+ uint32_t frac = (uint32_t)a << (16 + 6);
+ int16_t exp = extract32(a, 10, 5);
+
+ if (unlikely(exp == 0)) {
+ if (frac != 0) {
+ if (!get_flush_inputs_to_zero(s)) {
+ /* denormal: bias - fractional_zeros */
+ return -15 - clz32(frac);
+ }
+ /* flush to zero */
+ float_raise(float_flag_input_denormal, s);
+ }
+ } else if (unlikely(exp == 0x1f)) {
+ if (frac == 0) {
+ return INT16_MAX; /* infinity */
+ }
+ } else {
+ /* normal: exp - bias */
+ return exp - 15;
+ }
+ /* nan or zero */
+ float_raise(float_flag_invalid, s);
+ return INT16_MIN;
+}
+
+static int32_t do_float32_logb_as_int(float32 a, float_status *s)
+{
+ /* Extract frac to the top of the uint32_t. */
+ uint32_t frac = a << 9;
+ int32_t exp = extract32(a, 23, 8);
+
+ if (unlikely(exp == 0)) {
+ if (frac != 0) {
+ if (!get_flush_inputs_to_zero(s)) {
+ /* denormal: bias - fractional_zeros */
+ return -127 - clz32(frac);
+ }
+ /* flush to zero */
+ float_raise(float_flag_input_denormal, s);
+ }
+ } else if (unlikely(exp == 0xff)) {
+ if (frac == 0) {
+ return INT32_MAX; /* infinity */
+ }
+ } else {
+ /* normal: exp - bias */
+ return exp - 127;
+ }
+ /* nan or zero */
+ float_raise(float_flag_invalid, s);
+ return INT32_MIN;
+}
+
+static int64_t do_float64_logb_as_int(float64 a, float_status *s)
+{
+ /* Extract frac to the top of the uint64_t. */
+ uint64_t frac = a << 12;
+ int64_t exp = extract64(a, 52, 11);
+
+ if (unlikely(exp == 0)) {
+ if (frac != 0) {
+ if (!get_flush_inputs_to_zero(s)) {
+ /* denormal: bias - fractional_zeros */
+ return -1023 - clz64(frac);
+ }
+ /* flush to zero */
+ float_raise(float_flag_input_denormal, s);
+ }
+ } else if (unlikely(exp == 0x7ff)) {
+ if (frac == 0) {
+ return INT64_MAX; /* infinity */
+ }
+ } else {
+ /* normal: exp - bias */
+ return exp - 1023;
+ }
+ /* nan or zero */
+ float_raise(float_flag_invalid, s);
+ return INT64_MIN;
+}
+
+DO_ZPZ_FP(flogb_h, float16, H1_2, do_float16_logb_as_int)
+DO_ZPZ_FP(flogb_s, float32, H1_4, do_float32_logb_as_int)
+DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
+
+#undef DO_ZPZ_FP
+
+static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
+ float_status *status, uint32_t desc,
+ uint16_t neg1, uint16_t neg3)
+{
+ intptr_t i = simd_oprsz(desc);
+ uint64_t *g = vg;
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ i -= 2;
+ if (likely((pg >> (i & 63)) & 1)) {
+ float16 e1, e2, e3, r;
+
+ e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
+ e2 = *(uint16_t *)(vm + H1_2(i));
+ e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
+ r = float16_muladd(e1, e2, e3, 0, status);
+ *(uint16_t *)(vd + H1_2(i)) = r;
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
+}
+
+void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
+}
+
+void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
+}
+
+void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
+}
+
+static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
+ float_status *status, uint32_t desc,
+ uint32_t neg1, uint32_t neg3)
+{
+ intptr_t i = simd_oprsz(desc);
+ uint64_t *g = vg;
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ i -= 4;
+ if (likely((pg >> (i & 63)) & 1)) {
+ float32 e1, e2, e3, r;
+
+ e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
+ e2 = *(uint32_t *)(vm + H1_4(i));
+ e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
+ r = float32_muladd(e1, e2, e3, 0, status);
+ *(uint32_t *)(vd + H1_4(i)) = r;
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
+}
+
+void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
+}
+
+void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
+}
+
+void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
+}
+
+static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
+ float_status *status, uint32_t desc,
+ uint64_t neg1, uint64_t neg3)
+{
+ intptr_t i = simd_oprsz(desc);
+ uint64_t *g = vg;
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ i -= 8;
+ if (likely((pg >> (i & 63)) & 1)) {
+ float64 e1, e2, e3, r;
+
+ e1 = *(uint64_t *)(vn + i) ^ neg1;
+ e2 = *(uint64_t *)(vm + i);
+ e3 = *(uint64_t *)(va + i) ^ neg3;
+ r = float64_muladd(e1, e2, e3, 0, status);
+ *(uint64_t *)(vd + i) = r;
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
+}
+
+void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
+}
+
+void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
+}
+
+void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
+}
+
+/* Two operand floating-point comparison controlled by a predicate.
+ * Unlike the integer version, we are not allowed to optimistically
+ * compare operands, since the comparison may have side effects wrt
+ * the FPSR.
+ */
+#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \
+ void *status, uint32_t desc) \
+{ \
+ intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
+ uint64_t *d = vd, *g = vg; \
+ do { \
+ uint64_t out = 0, pg = g[j]; \
+ do { \
+ i -= sizeof(TYPE), out <<= sizeof(TYPE); \
+ if (likely((pg >> (i & 63)) & 1)) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ TYPE mm = *(TYPE *)(vm + H(i)); \
+ out |= OP(TYPE, nn, mm, status); \
+ } \
+ } while (i & 63); \
+ d[j--] = out; \
+ } while (i > 0); \
+}
+
+#define DO_FPCMP_PPZZ_H(NAME, OP) \
+ DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP)
+#define DO_FPCMP_PPZZ_S(NAME, OP) \
+ DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP)
+#define DO_FPCMP_PPZZ_D(NAME, OP) \
+ DO_FPCMP_PPZZ(NAME##_d, float64, H1_8, OP)
+
+#define DO_FPCMP_PPZZ_ALL(NAME, OP) \
+ DO_FPCMP_PPZZ_H(NAME, OP) \
+ DO_FPCMP_PPZZ_S(NAME, OP) \
+ DO_FPCMP_PPZZ_D(NAME, OP)
+
+#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0
+#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0
+#define DO_FCMLE(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) <= 0
+#define DO_FCMLT(TYPE, X, Y, ST) TYPE##_compare(X, Y, ST) < 0
+#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0
+#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0
+#define DO_FCMUO(TYPE, X, Y, ST) \
+ TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered
+#define DO_FACGE(TYPE, X, Y, ST) \
+ TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0
+#define DO_FACGT(TYPE, X, Y, ST) \
+ TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0
+
+DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE)
+DO_FPCMP_PPZZ_ALL(sve_fcmgt, DO_FCMGT)
+DO_FPCMP_PPZZ_ALL(sve_fcmeq, DO_FCMEQ)
+DO_FPCMP_PPZZ_ALL(sve_fcmne, DO_FCMNE)
+DO_FPCMP_PPZZ_ALL(sve_fcmuo, DO_FCMUO)
+DO_FPCMP_PPZZ_ALL(sve_facge, DO_FACGE)
+DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT)
+
+#undef DO_FPCMP_PPZZ_ALL
+#undef DO_FPCMP_PPZZ_D
+#undef DO_FPCMP_PPZZ_S
+#undef DO_FPCMP_PPZZ_H
+#undef DO_FPCMP_PPZZ
+
+/* One operand floating-point comparison against zero, controlled
+ * by a predicate.
+ */
+#define DO_FPCMP_PPZ0(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, \
+ void *status, uint32_t desc) \
+{ \
+ intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \
+ uint64_t *d = vd, *g = vg; \
+ do { \
+ uint64_t out = 0, pg = g[j]; \
+ do { \
+ i -= sizeof(TYPE), out <<= sizeof(TYPE); \
+ if ((pg >> (i & 63)) & 1) { \
+ TYPE nn = *(TYPE *)(vn + H(i)); \
+ out |= OP(TYPE, nn, 0, status); \
+ } \
+ } while (i & 63); \
+ d[j--] = out; \
+ } while (i > 0); \
+}
+
+#define DO_FPCMP_PPZ0_H(NAME, OP) \
+ DO_FPCMP_PPZ0(NAME##_h, float16, H1_2, OP)
+#define DO_FPCMP_PPZ0_S(NAME, OP) \
+ DO_FPCMP_PPZ0(NAME##_s, float32, H1_4, OP)
+#define DO_FPCMP_PPZ0_D(NAME, OP) \
+ DO_FPCMP_PPZ0(NAME##_d, float64, H1_8, OP)
+
+#define DO_FPCMP_PPZ0_ALL(NAME, OP) \
+ DO_FPCMP_PPZ0_H(NAME, OP) \
+ DO_FPCMP_PPZ0_S(NAME, OP) \
+ DO_FPCMP_PPZ0_D(NAME, OP)
+
+DO_FPCMP_PPZ0_ALL(sve_fcmge0, DO_FCMGE)
+DO_FPCMP_PPZ0_ALL(sve_fcmgt0, DO_FCMGT)
+DO_FPCMP_PPZ0_ALL(sve_fcmle0, DO_FCMLE)
+DO_FPCMP_PPZ0_ALL(sve_fcmlt0, DO_FCMLT)
+DO_FPCMP_PPZ0_ALL(sve_fcmeq0, DO_FCMEQ)
+DO_FPCMP_PPZ0_ALL(sve_fcmne0, DO_FCMNE)
+
+/* FP Trig Multiply-Add. */
+
+void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+{
+ static const float16 coeff[16] = {
+ 0x3c00, 0xb155, 0x2030, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
+ intptr_t x = simd_data(desc);
+ float16 *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i++) {
+ float16 mm = m[i];
+ intptr_t xx = x;
+ if (float16_is_neg(mm)) {
+ mm = float16_abs(mm);
+ xx += 8;
+ }
+ d[i] = float16_muladd(n[i], mm, coeff[xx], 0, vs);
+ }
+}
+
+void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+{
+ static const float32 coeff[16] = {
+ 0x3f800000, 0xbe2aaaab, 0x3c088886, 0xb95008b9,
+ 0x36369d6d, 0x00000000, 0x00000000, 0x00000000,
+ 0x3f800000, 0xbf000000, 0x3d2aaaa6, 0xbab60705,
+ 0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
+ intptr_t x = simd_data(desc);
+ float32 *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i++) {
+ float32 mm = m[i];
+ intptr_t xx = x;
+ if (float32_is_neg(mm)) {
+ mm = float32_abs(mm);
+ xx += 8;
+ }
+ d[i] = float32_muladd(n[i], mm, coeff[xx], 0, vs);
+ }
+}
+
+void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm, void *vs, uint32_t desc)
+{
+ static const float64 coeff[16] = {
+ 0x3ff0000000000000ull, 0xbfc5555555555543ull,
+ 0x3f8111111110f30cull, 0xbf2a01a019b92fc6ull,
+ 0x3ec71de351f3d22bull, 0xbe5ae5e2b60f7b91ull,
+ 0x3de5d8408868552full, 0x0000000000000000ull,
+ 0x3ff0000000000000ull, 0xbfe0000000000000ull,
+ 0x3fa5555555555536ull, 0xbf56c16c16c13a0bull,
+ 0x3efa01a019b1e8d8ull, 0xbe927e4f7282f468ull,
+ 0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
+ };
+ intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
+ intptr_t x = simd_data(desc);
+ float64 *d = vd, *n = vn, *m = vm;
+ for (i = 0; i < opr_sz; i++) {
+ float64 mm = m[i];
+ intptr_t xx = x;
+ if (float64_is_neg(mm)) {
+ mm = float64_abs(mm);
+ xx += 8;
+ }
+ d[i] = float64_muladd(n[i], mm, coeff[xx], 0, vs);
+ }
+}
+
+/*
+ * FP Complex Add
+ */
+
+void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
+ void *vs, uint32_t desc)
+{
+ intptr_t j, i = simd_oprsz(desc);
+ uint64_t *g = vg;
+ float16 neg_imag = float16_set_sign(0, simd_data(desc));
+ float16 neg_real = float16_chs(neg_imag);
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ float16 e0, e1, e2, e3;
+
+ /* I holds the real index; J holds the imag index. */
+ j = i - sizeof(float16);
+ i -= 2 * sizeof(float16);
+
+ e0 = *(float16 *)(vn + H1_2(i));
+ e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
+ e2 = *(float16 *)(vn + H1_2(j));
+ e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
+
+ if (likely((pg >> (i & 63)) & 1)) {
+ *(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, vs);
+ }
+ if (likely((pg >> (j & 63)) & 1)) {
+ *(float16 *)(vd + H1_2(j)) = float16_add(e2, e3, vs);
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
+ void *vs, uint32_t desc)
+{
+ intptr_t j, i = simd_oprsz(desc);
+ uint64_t *g = vg;
+ float32 neg_imag = float32_set_sign(0, simd_data(desc));
+ float32 neg_real = float32_chs(neg_imag);
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ float32 e0, e1, e2, e3;
+
+ /* I holds the real index; J holds the imag index. */
+ j = i - sizeof(float32);
+ i -= 2 * sizeof(float32);
+
+ e0 = *(float32 *)(vn + H1_2(i));
+ e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
+ e2 = *(float32 *)(vn + H1_2(j));
+ e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
+
+ if (likely((pg >> (i & 63)) & 1)) {
+ *(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, vs);
+ }
+ if (likely((pg >> (j & 63)) & 1)) {
+ *(float32 *)(vd + H1_2(j)) = float32_add(e2, e3, vs);
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
+ void *vs, uint32_t desc)
+{
+ intptr_t j, i = simd_oprsz(desc);
+ uint64_t *g = vg;
+ float64 neg_imag = float64_set_sign(0, simd_data(desc));
+ float64 neg_real = float64_chs(neg_imag);
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ float64 e0, e1, e2, e3;
+
+ /* I holds the real index; J holds the imag index. */
+ j = i - sizeof(float64);
+ i -= 2 * sizeof(float64);
+
+ e0 = *(float64 *)(vn + H1_2(i));
+ e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
+ e2 = *(float64 *)(vn + H1_2(j));
+ e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
+
+ if (likely((pg >> (i & 63)) & 1)) {
+ *(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, vs);
+ }
+ if (likely((pg >> (j & 63)) & 1)) {
+ *(float64 *)(vd + H1_2(j)) = float64_add(e2, e3, vs);
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+/*
+ * FP Complex Multiply
+ */
+
+void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ intptr_t j, i = simd_oprsz(desc);
+ unsigned rot = simd_data(desc);
+ bool flip = rot & 1;
+ float16 neg_imag, neg_real;
+ uint64_t *g = vg;
+
+ neg_imag = float16_set_sign(0, (rot & 2) != 0);
+ neg_real = float16_set_sign(0, rot == 1 || rot == 2);
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ float16 e1, e2, e3, e4, nr, ni, mr, mi, d;
+
+ /* I holds the real index; J holds the imag index. */
+ j = i - sizeof(float16);
+ i -= 2 * sizeof(float16);
+
+ nr = *(float16 *)(vn + H1_2(i));
+ ni = *(float16 *)(vn + H1_2(j));
+ mr = *(float16 *)(vm + H1_2(i));
+ mi = *(float16 *)(vm + H1_2(j));
+
+ e2 = (flip ? ni : nr);
+ e1 = (flip ? mi : mr) ^ neg_real;
+ e4 = e2;
+ e3 = (flip ? mr : mi) ^ neg_imag;
+
+ if (likely((pg >> (i & 63)) & 1)) {
+ d = *(float16 *)(va + H1_2(i));
+ d = float16_muladd(e2, e1, d, 0, status);
+ *(float16 *)(vd + H1_2(i)) = d;
+ }
+ if (likely((pg >> (j & 63)) & 1)) {
+ d = *(float16 *)(va + H1_2(j));
+ d = float16_muladd(e4, e3, d, 0, status);
+ *(float16 *)(vd + H1_2(j)) = d;
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ intptr_t j, i = simd_oprsz(desc);
+ unsigned rot = simd_data(desc);
+ bool flip = rot & 1;
+ float32 neg_imag, neg_real;
+ uint64_t *g = vg;
+
+ neg_imag = float32_set_sign(0, (rot & 2) != 0);
+ neg_real = float32_set_sign(0, rot == 1 || rot == 2);
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ float32 e1, e2, e3, e4, nr, ni, mr, mi, d;
+
+ /* I holds the real index; J holds the imag index. */
+ j = i - sizeof(float32);
+ i -= 2 * sizeof(float32);
+
+ nr = *(float32 *)(vn + H1_2(i));
+ ni = *(float32 *)(vn + H1_2(j));
+ mr = *(float32 *)(vm + H1_2(i));
+ mi = *(float32 *)(vm + H1_2(j));
+
+ e2 = (flip ? ni : nr);
+ e1 = (flip ? mi : mr) ^ neg_real;
+ e4 = e2;
+ e3 = (flip ? mr : mi) ^ neg_imag;
+
+ if (likely((pg >> (i & 63)) & 1)) {
+ d = *(float32 *)(va + H1_2(i));
+ d = float32_muladd(e2, e1, d, 0, status);
+ *(float32 *)(vd + H1_2(i)) = d;
+ }
+ if (likely((pg >> (j & 63)) & 1)) {
+ d = *(float32 *)(va + H1_2(j));
+ d = float32_muladd(e4, e3, d, 0, status);
+ *(float32 *)(vd + H1_2(j)) = d;
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, void *status, uint32_t desc)
+{
+ intptr_t j, i = simd_oprsz(desc);
+ unsigned rot = simd_data(desc);
+ bool flip = rot & 1;
+ float64 neg_imag, neg_real;
+ uint64_t *g = vg;
+
+ neg_imag = float64_set_sign(0, (rot & 2) != 0);
+ neg_real = float64_set_sign(0, rot == 1 || rot == 2);
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ float64 e1, e2, e3, e4, nr, ni, mr, mi, d;
+
+ /* I holds the real index; J holds the imag index. */
+ j = i - sizeof(float64);
+ i -= 2 * sizeof(float64);
+
+ nr = *(float64 *)(vn + H1_2(i));
+ ni = *(float64 *)(vn + H1_2(j));
+ mr = *(float64 *)(vm + H1_2(i));
+ mi = *(float64 *)(vm + H1_2(j));
+
+ e2 = (flip ? ni : nr);
+ e1 = (flip ? mi : mr) ^ neg_real;
+ e4 = e2;
+ e3 = (flip ? mr : mi) ^ neg_imag;
+
+ if (likely((pg >> (i & 63)) & 1)) {
+ d = *(float64 *)(va + H1_2(i));
+ d = float64_muladd(e2, e1, d, 0, status);
+ *(float64 *)(vd + H1_2(i)) = d;
+ }
+ if (likely((pg >> (j & 63)) & 1)) {
+ d = *(float64 *)(va + H1_2(j));
+ d = float64_muladd(e4, e3, d, 0, status);
+ *(float64 *)(vd + H1_2(j)) = d;
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+/*
+ * Load contiguous data, protected by a governing predicate.
+ */
+
+/*
+ * Skip through a sequence of inactive elements in the guarding predicate @vg,
+ * beginning at @reg_off bounded by @reg_max. Return the offset of the active
+ * element >= @reg_off, or @reg_max if there were no active elements at all.
+ */
+static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
+ intptr_t reg_max, int esz)
+{
+ uint64_t pg_mask = pred_esz_masks[esz];
+ uint64_t pg = (vg[reg_off >> 6] & pg_mask) >> (reg_off & 63);
+
+ /* In normal usage, the first element is active. */
+ if (likely(pg & 1)) {
+ return reg_off;
+ }
+
+ if (pg == 0) {
+ reg_off &= -64;
+ do {
+ reg_off += 64;
+ if (unlikely(reg_off >= reg_max)) {
+ /* The entire predicate was false. */
+ return reg_max;
+ }
+ pg = vg[reg_off >> 6] & pg_mask;
+ } while (pg == 0);
+ }
+ reg_off += ctz64(pg);
+
+ /* We should never see an out of range predicate bit set. */
+ tcg_debug_assert(reg_off < reg_max);
+ return reg_off;
+}
+
+/*
+ * Resolve the guest virtual address to info->host and info->flags.
+ * If @nofault, return false if the page is invalid, otherwise
+ * exit via page fault exception.
+ */
+
+bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
+ target_ulong addr, int mem_off, MMUAccessType access_type,
+ int mmu_idx, uintptr_t retaddr)
+{
+ int flags;
+
+ addr += mem_off;
+
+ /*
+ * User-only currently always issues with TBI. See the comment
+ * above useronly_clean_ptr. Usually we clean this top byte away
+ * during translation, but we can't do that for e.g. vector + imm
+ * addressing modes.
+ *
+ * We currently always enable TBI for user-only, and do not provide
+ * a way to turn it off. So clean the pointer unconditionally here,
+ * rather than look it up here, or pass it down from above.
+ */
+ addr = useronly_clean_ptr(addr);
+
+#ifdef CONFIG_USER_ONLY
+ flags = probe_access_flags(env, addr, 0, access_type, mmu_idx, nofault,
+ &info->host, retaddr);
+#else
+ CPUTLBEntryFull *full;
+ flags = probe_access_full(env, addr, 0, access_type, mmu_idx, nofault,
+ &info->host, &full, retaddr);
+#endif
+ info->flags = flags;
+
+ if (flags & TLB_INVALID_MASK) {
+ g_assert(nofault);
+ return false;
+ }
+
+#ifdef CONFIG_USER_ONLY
+ memset(&info->attrs, 0, sizeof(info->attrs));
+ /* Require both ANON and MTE; see allocation_tag_mem(). */
+ info->tagged = (flags & PAGE_ANON) && (flags & PAGE_MTE);
+#else
+ info->attrs = full->attrs;
+ info->tagged = full->extra.arm.pte_attrs == 0xf0;
+#endif
+
+ /* Ensure that info->host[] is relative to addr, not addr + mem_off. */
+ info->host -= mem_off;
+ return true;
+}
+
+/*
+ * Find first active element on each page, and a loose bound for the
+ * final element on each page. Identify any single element that spans
+ * the page boundary. Return true if there are any active elements.
+ */
+bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg,
+ intptr_t reg_max, int esz, int msize)
+{
+ const int esize = 1 << esz;
+ const uint64_t pg_mask = pred_esz_masks[esz];
+ intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split;
+ intptr_t mem_off_last, mem_off_split;
+ intptr_t page_split, elt_split;
+ intptr_t i;
+
+ /* Set all of the element indices to -1, and the TLB data to 0. */
+ memset(info, -1, offsetof(SVEContLdSt, page));
+ memset(info->page, 0, sizeof(info->page));
+
+ /* Gross scan over the entire predicate to find bounds. */
+ i = 0;
+ do {
+ uint64_t pg = vg[i] & pg_mask;
+ if (pg) {
+ reg_off_last = i * 64 + 63 - clz64(pg);
+ if (reg_off_first < 0) {
+ reg_off_first = i * 64 + ctz64(pg);
+ }
+ }
+ } while (++i * 64 < reg_max);
+
+ if (unlikely(reg_off_first < 0)) {
+ /* No active elements, no pages touched. */
+ return false;
+ }
+ tcg_debug_assert(reg_off_last >= 0 && reg_off_last < reg_max);
+
+ info->reg_off_first[0] = reg_off_first;
+ info->mem_off_first[0] = (reg_off_first >> esz) * msize;
+ mem_off_last = (reg_off_last >> esz) * msize;
+
+ page_split = -(addr | TARGET_PAGE_MASK);
+ if (likely(mem_off_last + msize <= page_split)) {
+ /* The entire operation fits within a single page. */
+ info->reg_off_last[0] = reg_off_last;
+ return true;
+ }
+
+ info->page_split = page_split;
+ elt_split = page_split / msize;
+ reg_off_split = elt_split << esz;
+ mem_off_split = elt_split * msize;
+
+ /*
+ * This is the last full element on the first page, but it is not
+ * necessarily active. If there is no full element, i.e. the first
+ * active element is the one that's split, this value remains -1.
+ * It is useful as iteration bounds.
+ */
+ if (elt_split != 0) {
+ info->reg_off_last[0] = reg_off_split - esize;
+ }
+
+ /* Determine if an unaligned element spans the pages. */
+ if (page_split % msize != 0) {
+ /* It is helpful to know if the split element is active. */
+ if ((vg[reg_off_split >> 6] >> (reg_off_split & 63)) & 1) {
+ info->reg_off_split = reg_off_split;
+ info->mem_off_split = mem_off_split;
+
+ if (reg_off_split == reg_off_last) {
+ /* The page crossing element is last. */
+ return true;
+ }
+ }
+ reg_off_split += esize;
+ mem_off_split += msize;
+ }
+
+ /*
+ * We do want the first active element on the second page, because
+ * this may affect the address reported in an exception.
+ */
+ reg_off_split = find_next_active(vg, reg_off_split, reg_max, esz);
+ tcg_debug_assert(reg_off_split <= reg_off_last);
+ info->reg_off_first[1] = reg_off_split;
+ info->mem_off_first[1] = (reg_off_split >> esz) * msize;
+ info->reg_off_last[1] = reg_off_last;
+ return true;
+}
+
+/*
+ * Resolve the guest virtual addresses to info->page[].
+ * Control the generation of page faults with @fault. Return false if
+ * there is no work to do, which can only happen with @fault == FAULT_NO.
+ */
+bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault,
+ CPUARMState *env, target_ulong addr,
+ MMUAccessType access_type, uintptr_t retaddr)
+{
+ int mmu_idx = arm_env_mmu_index(env);
+ int mem_off = info->mem_off_first[0];
+ bool nofault = fault == FAULT_NO;
+ bool have_work = true;
+
+ if (!sve_probe_page(&info->page[0], nofault, env, addr, mem_off,
+ access_type, mmu_idx, retaddr)) {
+ /* No work to be done. */
+ return false;
+ }
+
+ if (likely(info->page_split < 0)) {
+ /* The entire operation was on the one page. */
+ return true;
+ }
+
+ /*
+ * If the second page is invalid, then we want the fault address to be
+ * the first byte on that page which is accessed.
+ */
+ if (info->mem_off_split >= 0) {
+ /*
+ * There is an element split across the pages. The fault address
+ * should be the first byte of the second page.
+ */
+ mem_off = info->page_split;
+ /*
+ * If the split element is also the first active element
+ * of the vector, then: For first-fault we should continue
+ * to generate faults for the second page. For no-fault,
+ * we have work only if the second page is valid.
+ */
+ if (info->mem_off_first[0] < info->mem_off_split) {
+ nofault = FAULT_FIRST;
+ have_work = false;
+ }
+ } else {
+ /*
+ * There is no element split across the pages. The fault address
+ * should be the first active element on the second page.
+ */
+ mem_off = info->mem_off_first[1];
+ /*
+ * There must have been one active element on the first page,
+ * so we're out of first-fault territory.
+ */
+ nofault = fault != FAULT_ALL;
+ }
+
+ have_work |= sve_probe_page(&info->page[1], nofault, env, addr, mem_off,
+ access_type, mmu_idx, retaddr);
+ return have_work;
+}
+
+#ifndef CONFIG_USER_ONLY
+void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
+ uint64_t *vg, target_ulong addr,
+ int esize, int msize, int wp_access,
+ uintptr_t retaddr)
+{
+ intptr_t mem_off, reg_off, reg_last;
+ int flags0 = info->page[0].flags;
+ int flags1 = info->page[1].flags;
+
+ if (likely(!((flags0 | flags1) & TLB_WATCHPOINT))) {
+ return;
+ }
+
+ /* Indicate that watchpoints are handled. */
+ info->page[0].flags = flags0 & ~TLB_WATCHPOINT;
+ info->page[1].flags = flags1 & ~TLB_WATCHPOINT;
+
+ if (flags0 & TLB_WATCHPOINT) {
+ mem_off = info->mem_off_first[0];
+ reg_off = info->reg_off_first[0];
+ reg_last = info->reg_off_last[0];
+
+ while (reg_off <= reg_last) {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ cpu_check_watchpoint(env_cpu(env), addr + mem_off,
+ msize, info->page[0].attrs,
+ wp_access, retaddr);
+ }
+ reg_off += esize;
+ mem_off += msize;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ }
+ }
+
+ mem_off = info->mem_off_split;
+ if (mem_off >= 0) {
+ cpu_check_watchpoint(env_cpu(env), addr + mem_off, msize,
+ info->page[0].attrs, wp_access, retaddr);
+ }
+
+ mem_off = info->mem_off_first[1];
+ if ((flags1 & TLB_WATCHPOINT) && mem_off >= 0) {
+ reg_off = info->reg_off_first[1];
+ reg_last = info->reg_off_last[1];
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ cpu_check_watchpoint(env_cpu(env), addr + mem_off,
+ msize, info->page[1].attrs,
+ wp_access, retaddr);
+ }
+ reg_off += esize;
+ mem_off += msize;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ }
+}
+#endif
+
+void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env,
+ uint64_t *vg, target_ulong addr, int esize,
+ int msize, uint32_t mtedesc, uintptr_t ra)
+{
+ intptr_t mem_off, reg_off, reg_last;
+
+ /* Process the page only if MemAttr == Tagged. */
+ if (info->page[0].tagged) {
+ mem_off = info->mem_off_first[0];
+ reg_off = info->reg_off_first[0];
+ reg_last = info->reg_off_split;
+ if (reg_last < 0) {
+ reg_last = info->reg_off_last[0];
+ }
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ mte_check(env, mtedesc, addr, ra);
+ }
+ reg_off += esize;
+ mem_off += msize;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ } while (reg_off <= reg_last);
+ }
+
+ mem_off = info->mem_off_first[1];
+ if (mem_off >= 0 && info->page[1].tagged) {
+ reg_off = info->reg_off_first[1];
+ reg_last = info->reg_off_last[1];
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ mte_check(env, mtedesc, addr, ra);
+ }
+ reg_off += esize;
+ mem_off += msize;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ }
+}
+
+/*
+ * Common helper for all contiguous 1,2,3,4-register predicated stores.
+ */
+static inline QEMU_ALWAYS_INLINE
+void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr,
+ const int esz, const int msz, const int N, uint32_t mtedesc,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ const unsigned rd = simd_data(desc);
+ const intptr_t reg_max = simd_oprsz(desc);
+ intptr_t reg_off, reg_last, mem_off;
+ SVEContLdSt info;
+ void *host;
+ int flags, i;
+
+ /* Find the active elements. */
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, N << msz)) {
+ /* The entire predicate was false; no load occurs. */
+ for (i = 0; i < N; ++i) {
+ memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max);
+ }
+ return;
+ }
+
+ /* Probe the page(s). Exit with exception for any invalid page. */
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, retaddr);
+
+ /* Handle watchpoints for all active elements. */
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz,
+ BP_MEM_READ, retaddr);
+
+ /*
+ * Handle mte checks for all active elements.
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
+ */
+ if (mtedesc) {
+ sve_cont_ldst_mte_check(&info, env, vg, addr, 1 << esz, N << msz,
+ mtedesc, retaddr);
+ }
+
+ flags = info.page[0].flags | info.page[1].flags;
+ if (unlikely(flags != 0)) {
+ /*
+ * At least one page includes MMIO.
+ * Any bus operation can fail with cpu_transaction_failed,
+ * which for ARM will raise SyncExternal. Perform the load
+ * into scratch memory to preserve register state until the end.
+ */
+ ARMVectorReg scratch[4] = { };
+
+ mem_off = info.mem_off_first[0];
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[1];
+ if (reg_last < 0) {
+ reg_last = info.reg_off_split;
+ if (reg_last < 0) {
+ reg_last = info.reg_off_last[0];
+ }
+ }
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ for (i = 0; i < N; ++i) {
+ tlb_fn(env, &scratch[i], reg_off,
+ addr + mem_off + (i << msz), retaddr);
+ }
+ }
+ reg_off += 1 << esz;
+ mem_off += N << msz;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+
+ for (i = 0; i < N; ++i) {
+ memcpy(&env->vfp.zregs[(rd + i) & 31], &scratch[i], reg_max);
+ }
+ return;
+ }
+
+ /* The entire operation is in RAM, on valid pages. */
+
+ for (i = 0; i < N; ++i) {
+ memset(&env->vfp.zregs[(rd + i) & 31], 0, reg_max);
+ }
+
+ mem_off = info.mem_off_first[0];
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[0];
+ host = info.page[0].host;
+
+ while (reg_off <= reg_last) {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ for (i = 0; i < N; ++i) {
+ host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
+ host + mem_off + (i << msz));
+ }
+ }
+ reg_off += 1 << esz;
+ mem_off += N << msz;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ }
+
+ /*
+ * Use the slow path to manage the cross-page misalignment.
+ * But we know this is RAM and cannot trap.
+ */
+ mem_off = info.mem_off_split;
+ if (unlikely(mem_off >= 0)) {
+ reg_off = info.reg_off_split;
+ for (i = 0; i < N; ++i) {
+ tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off,
+ addr + mem_off + (i << msz), retaddr);
+ }
+ }
+
+ mem_off = info.mem_off_first[1];
+ if (unlikely(mem_off >= 0)) {
+ reg_off = info.reg_off_first[1];
+ reg_last = info.reg_off_last[1];
+ host = info.page[1].host;
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ for (i = 0; i < N; ++i) {
+ host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
+ host + mem_off + (i << msz));
+ }
+ }
+ reg_off += 1 << esz;
+ mem_off += N << msz;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ }
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t ra,
+ const int esz, const int msz, const int N,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ int bit55 = extract64(addr, 55, 1);
+
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /* Perform gross MTE suppression early. */
+ if (!tbi_check(mtedesc, bit55) ||
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
+ mtedesc = 0;
+ }
+
+ sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn);
+}
+
+#define DO_LD1_1(NAME, ESZ) \
+void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \
+ sve_##NAME##_host, sve_##NAME##_tlb); \
+} \
+void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \
+ sve_##NAME##_host, sve_##NAME##_tlb); \
+}
+
+#define DO_LD1_2(NAME, ESZ, MSZ) \
+void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \
+ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \
+} \
+void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \
+ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \
+} \
+void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \
+ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \
+} \
+void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \
+ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \
+}
+
+DO_LD1_1(ld1bb, MO_8)
+DO_LD1_1(ld1bhu, MO_16)
+DO_LD1_1(ld1bhs, MO_16)
+DO_LD1_1(ld1bsu, MO_32)
+DO_LD1_1(ld1bss, MO_32)
+DO_LD1_1(ld1bdu, MO_64)
+DO_LD1_1(ld1bds, MO_64)
+
+DO_LD1_2(ld1hh, MO_16, MO_16)
+DO_LD1_2(ld1hsu, MO_32, MO_16)
+DO_LD1_2(ld1hss, MO_32, MO_16)
+DO_LD1_2(ld1hdu, MO_64, MO_16)
+DO_LD1_2(ld1hds, MO_64, MO_16)
+
+DO_LD1_2(ld1ss, MO_32, MO_32)
+DO_LD1_2(ld1sdu, MO_64, MO_32)
+DO_LD1_2(ld1sds, MO_64, MO_32)
+
+DO_LD1_2(ld1dd, MO_64, MO_64)
+
+#undef DO_LD1_1
+#undef DO_LD1_2
+
+#define DO_LDN_1(N) \
+void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \
+ sve_ld1bb_host, sve_ld1bb_tlb); \
+} \
+void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \
+ sve_ld1bb_host, sve_ld1bb_tlb); \
+}
+
+#define DO_LDN_2(N, SUFF, ESZ) \
+void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \
+ sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \
+} \
+void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \
+ sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \
+} \
+void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \
+ sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \
+} \
+void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \
+ sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \
+}
+
+DO_LDN_1(2)
+DO_LDN_1(3)
+DO_LDN_1(4)
+
+DO_LDN_2(2, hh, MO_16)
+DO_LDN_2(3, hh, MO_16)
+DO_LDN_2(4, hh, MO_16)
+
+DO_LDN_2(2, ss, MO_32)
+DO_LDN_2(3, ss, MO_32)
+DO_LDN_2(4, ss, MO_32)
+
+DO_LDN_2(2, dd, MO_64)
+DO_LDN_2(3, dd, MO_64)
+DO_LDN_2(4, dd, MO_64)
+
+#undef DO_LDN_1
+#undef DO_LDN_2
+
+/*
+ * Load contiguous data, first-fault and no-fault.
+ *
+ * For user-only, one could argue that we should hold the mmap_lock during
+ * the operation so that there is no race between page_check_range and the
+ * load operation. However, unmapping pages out from under a running thread
+ * is extraordinarily unlikely. This theoretical race condition also affects
+ * linux-user/ in its get_user/put_user macros.
+ *
+ * TODO: Construct some helpers, written in assembly, that interact with
+ * host_signal_handler to produce memory ops which can properly report errors
+ * without racing.
+ */
+
+/* Fault on byte I. All bits in FFR from I are cleared. The vector
+ * result from I is CONSTRAINED UNPREDICTABLE; we choose the MERGE
+ * option, which leaves subsequent data unchanged.
+ */
+static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz)
+{
+ uint64_t *ffr = env->vfp.pregs[FFR_PRED_NUM].p;
+
+ if (i & 63) {
+ ffr[i / 64] &= MAKE_64BIT_MASK(0, i & 63);
+ i = ROUND_UP(i, 64);
+ }
+ for (; i < oprsz; i += 64) {
+ ffr[i / 64] = 0;
+ }
+}
+
+/*
+ * Common helper for all contiguous no-fault and first-fault loads.
+ */
+static inline QEMU_ALWAYS_INLINE
+void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr, uint32_t mtedesc,
+ const int esz, const int msz, const SVEContFault fault,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ const unsigned rd = simd_data(desc);
+ void *vd = &env->vfp.zregs[rd];
+ const intptr_t reg_max = simd_oprsz(desc);
+ intptr_t reg_off, mem_off, reg_last;
+ SVEContLdSt info;
+ int flags;
+ void *host;
+
+ /* Find the active elements. */
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, 1 << msz)) {
+ /* The entire predicate was false; no load occurs. */
+ memset(vd, 0, reg_max);
+ return;
+ }
+ reg_off = info.reg_off_first[0];
+
+ /* Probe the page(s). */
+ if (!sve_cont_ldst_pages(&info, fault, env, addr, MMU_DATA_LOAD, retaddr)) {
+ /* Fault on first element. */
+ tcg_debug_assert(fault == FAULT_NO);
+ memset(vd, 0, reg_max);
+ goto do_fault;
+ }
+
+ mem_off = info.mem_off_first[0];
+ flags = info.page[0].flags;
+
+ /*
+ * Disable MTE checking if the Tagged bit is not set. Since TBI must
+ * be set within MTEDESC for MTE, !mtedesc => !mte_active.
+ */
+ if (!info.page[0].tagged) {
+ mtedesc = 0;
+ }
+
+ if (fault == FAULT_FIRST) {
+ /* Trapping mte check for the first-fault element. */
+ if (mtedesc) {
+ mte_check(env, mtedesc, addr + mem_off, retaddr);
+ }
+
+ /*
+ * Special handling of the first active element,
+ * if it crosses a page boundary or is MMIO.
+ */
+ bool is_split = mem_off == info.mem_off_split;
+ if (unlikely(flags != 0) || unlikely(is_split)) {
+ /*
+ * Use the slow path for cross-page handling.
+ * Might trap for MMIO or watchpoints.
+ */
+ tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
+
+ /* After any fault, zero the other elements. */
+ swap_memzero(vd, reg_off);
+ reg_off += 1 << esz;
+ mem_off += 1 << msz;
+ swap_memzero(vd + reg_off, reg_max - reg_off);
+
+ if (is_split) {
+ goto second_page;
+ }
+ } else {
+ memset(vd, 0, reg_max);
+ }
+ } else {
+ memset(vd, 0, reg_max);
+ if (unlikely(mem_off == info.mem_off_split)) {
+ /* The first active element crosses a page boundary. */
+ flags |= info.page[1].flags;
+ if (unlikely(flags & TLB_MMIO)) {
+ /* Some page is MMIO, see below. */
+ goto do_fault;
+ }
+ if (unlikely(flags & TLB_WATCHPOINT) &&
+ (cpu_watchpoint_address_matches
+ (env_cpu(env), addr + mem_off, 1 << msz)
+ & BP_MEM_READ)) {
+ /* Watchpoint hit, see below. */
+ goto do_fault;
+ }
+ if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) {
+ goto do_fault;
+ }
+ /*
+ * Use the slow path for cross-page handling.
+ * This is RAM, without a watchpoint, and will not trap.
+ */
+ tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
+ goto second_page;
+ }
+ }
+
+ /*
+ * From this point on, all memory operations are MemSingleNF.
+ *
+ * Per the MemSingleNF pseudocode, a no-fault load from Device memory
+ * must not actually hit the bus -- it returns (UNKNOWN, FAULT) instead.
+ *
+ * Unfortuately we do not have access to the memory attributes from the
+ * PTE to tell Device memory from Normal memory. So we make a mostly
+ * correct check, and indicate (UNKNOWN, FAULT) for any MMIO.
+ * This gives the right answer for the common cases of "Normal memory,
+ * backed by host RAM" and "Device memory, backed by MMIO".
+ * The architecture allows us to suppress an NF load and return
+ * (UNKNOWN, FAULT) for any reason, so our behaviour for the corner
+ * case of "Normal memory, backed by MMIO" is permitted. The case we
+ * get wrong is "Device memory, backed by host RAM", for which we
+ * should return (UNKNOWN, FAULT) for but do not.
+ *
+ * Similarly, CPU_BP breakpoints would raise exceptions, and so
+ * return (UNKNOWN, FAULT). For simplicity, we consider gdb and
+ * architectural breakpoints the same.
+ */
+ if (unlikely(flags & TLB_MMIO)) {
+ goto do_fault;
+ }
+
+ reg_last = info.reg_off_last[0];
+ host = info.page[0].host;
+
+ do {
+ uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3));
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ if (unlikely(flags & TLB_WATCHPOINT) &&
+ (cpu_watchpoint_address_matches
+ (env_cpu(env), addr + mem_off, 1 << msz)
+ & BP_MEM_READ)) {
+ goto do_fault;
+ }
+ if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) {
+ goto do_fault;
+ }
+ host_fn(vd, reg_off, host + mem_off);
+ }
+ reg_off += 1 << esz;
+ mem_off += 1 << msz;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ } while (reg_off <= reg_last);
+
+ /*
+ * MemSingleNF is allowed to fail for any reason. We have special
+ * code above to handle the first element crossing a page boundary.
+ * As an implementation choice, decline to handle a cross-page element
+ * in any other position.
+ */
+ reg_off = info.reg_off_split;
+ if (reg_off >= 0) {
+ goto do_fault;
+ }
+
+ second_page:
+ reg_off = info.reg_off_first[1];
+ if (likely(reg_off < 0)) {
+ /* No active elements on the second page. All done. */
+ return;
+ }
+
+ /*
+ * MemSingleNF is allowed to fail for any reason. As an implementation
+ * choice, decline to handle elements on the second page. This should
+ * be low frequency as the guest walks through memory -- the next
+ * iteration of the guest's loop should be aligned on the page boundary,
+ * and then all following iterations will stay aligned.
+ */
+
+ do_fault:
+ record_fault(env, reg_off, reg_max);
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr,
+ const int esz, const int msz, const SVEContFault fault,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ int bit55 = extract64(addr, 55, 1);
+
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /* Perform gross MTE suppression early. */
+ if (!tbi_check(mtedesc, bit55) ||
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
+ mtedesc = 0;
+ }
+
+ sve_ldnfff1_r(env, vg, addr, desc, retaddr, mtedesc,
+ esz, msz, fault, host_fn, tlb_fn);
+}
+
+#define DO_LDFF1_LDNF1_1(PART, ESZ) \
+void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \
+ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO, \
+ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
+} \
+void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \
+ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \
+ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
+}
+
+#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
+void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \
+ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \
+ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
+} \
+void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \
+ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \
+ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
+} \
+void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
+ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
+ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
+} \
+void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
+ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
+ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
+}
+
+DO_LDFF1_LDNF1_1(bb, MO_8)
+DO_LDFF1_LDNF1_1(bhu, MO_16)
+DO_LDFF1_LDNF1_1(bhs, MO_16)
+DO_LDFF1_LDNF1_1(bsu, MO_32)
+DO_LDFF1_LDNF1_1(bss, MO_32)
+DO_LDFF1_LDNF1_1(bdu, MO_64)
+DO_LDFF1_LDNF1_1(bds, MO_64)
+
+DO_LDFF1_LDNF1_2(hh, MO_16, MO_16)
+DO_LDFF1_LDNF1_2(hsu, MO_32, MO_16)
+DO_LDFF1_LDNF1_2(hss, MO_32, MO_16)
+DO_LDFF1_LDNF1_2(hdu, MO_64, MO_16)
+DO_LDFF1_LDNF1_2(hds, MO_64, MO_16)
+
+DO_LDFF1_LDNF1_2(ss, MO_32, MO_32)
+DO_LDFF1_LDNF1_2(sdu, MO_64, MO_32)
+DO_LDFF1_LDNF1_2(sds, MO_64, MO_32)
+
+DO_LDFF1_LDNF1_2(dd, MO_64, MO_64)
+
+#undef DO_LDFF1_LDNF1_1
+#undef DO_LDFF1_LDNF1_2
+
+/*
+ * Common helper for all contiguous 1,2,3,4-register predicated stores.
+ */
+
+static inline QEMU_ALWAYS_INLINE
+void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr,
+ const int esz, const int msz, const int N, uint32_t mtedesc,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ const unsigned rd = simd_data(desc);
+ const intptr_t reg_max = simd_oprsz(desc);
+ intptr_t reg_off, reg_last, mem_off;
+ SVEContLdSt info;
+ void *host;
+ int i, flags;
+
+ /* Find the active elements. */
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, N << msz)) {
+ /* The entire predicate was false; no store occurs. */
+ return;
+ }
+
+ /* Probe the page(s). Exit with exception for any invalid page. */
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_STORE, retaddr);
+
+ /* Handle watchpoints for all active elements. */
+ sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz,
+ BP_MEM_WRITE, retaddr);
+
+ /*
+ * Handle mte checks for all active elements.
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
+ */
+ if (mtedesc) {
+ sve_cont_ldst_mte_check(&info, env, vg, addr, 1 << esz, N << msz,
+ mtedesc, retaddr);
+ }
+
+ flags = info.page[0].flags | info.page[1].flags;
+ if (unlikely(flags != 0)) {
+#ifdef CONFIG_USER_ONLY
+ g_assert_not_reached();
+#else
+ /*
+ * At least one page includes MMIO.
+ * Any bus operation can fail with cpu_transaction_failed,
+ * which for ARM will raise SyncExternal. We cannot avoid
+ * this fault and will leave with the store incomplete.
+ */
+ mem_off = info.mem_off_first[0];
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[1];
+ if (reg_last < 0) {
+ reg_last = info.reg_off_split;
+ if (reg_last < 0) {
+ reg_last = info.reg_off_last[0];
+ }
+ }
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ for (i = 0; i < N; ++i) {
+ tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off,
+ addr + mem_off + (i << msz), retaddr);
+ }
+ }
+ reg_off += 1 << esz;
+ mem_off += N << msz;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ return;
+#endif
+ }
+
+ mem_off = info.mem_off_first[0];
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[0];
+ host = info.page[0].host;
+
+ while (reg_off <= reg_last) {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ for (i = 0; i < N; ++i) {
+ host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
+ host + mem_off + (i << msz));
+ }
+ }
+ reg_off += 1 << esz;
+ mem_off += N << msz;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ }
+
+ /*
+ * Use the slow path to manage the cross-page misalignment.
+ * But we know this is RAM and cannot trap.
+ */
+ mem_off = info.mem_off_split;
+ if (unlikely(mem_off >= 0)) {
+ reg_off = info.reg_off_split;
+ for (i = 0; i < N; ++i) {
+ tlb_fn(env, &env->vfp.zregs[(rd + i) & 31], reg_off,
+ addr + mem_off + (i << msz), retaddr);
+ }
+ }
+
+ mem_off = info.mem_off_first[1];
+ if (unlikely(mem_off >= 0)) {
+ reg_off = info.reg_off_first[1];
+ reg_last = info.reg_off_last[1];
+ host = info.page[1].host;
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ for (i = 0; i < N; ++i) {
+ host_fn(&env->vfp.zregs[(rd + i) & 31], reg_off,
+ host + mem_off + (i << msz));
+ }
+ }
+ reg_off += 1 << esz;
+ mem_off += N << msz;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ }
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t ra,
+ const int esz, const int msz, const int N,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ int bit55 = extract64(addr, 55, 1);
+
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /* Perform gross MTE suppression early. */
+ if (!tbi_check(mtedesc, bit55) ||
+ tcma_check(mtedesc, bit55, allocation_tag_from_addr(addr))) {
+ mtedesc = 0;
+ }
+
+ sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn);
+}
+
+#define DO_STN_1(N, NAME, ESZ) \
+void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \
+ sve_st1##NAME##_host, sve_st1##NAME##_tlb); \
+} \
+void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \
+ sve_st1##NAME##_host, sve_st1##NAME##_tlb); \
+}
+
+#define DO_STN_2(N, NAME, ESZ, MSZ) \
+void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \
+ sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \
+} \
+void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \
+ sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \
+} \
+void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \
+ sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \
+} \
+void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \
+ sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \
+}
+
+DO_STN_1(1, bb, MO_8)
+DO_STN_1(1, bh, MO_16)
+DO_STN_1(1, bs, MO_32)
+DO_STN_1(1, bd, MO_64)
+DO_STN_1(2, bb, MO_8)
+DO_STN_1(3, bb, MO_8)
+DO_STN_1(4, bb, MO_8)
+
+DO_STN_2(1, hh, MO_16, MO_16)
+DO_STN_2(1, hs, MO_32, MO_16)
+DO_STN_2(1, hd, MO_64, MO_16)
+DO_STN_2(2, hh, MO_16, MO_16)
+DO_STN_2(3, hh, MO_16, MO_16)
+DO_STN_2(4, hh, MO_16, MO_16)
+
+DO_STN_2(1, ss, MO_32, MO_32)
+DO_STN_2(1, sd, MO_64, MO_32)
+DO_STN_2(2, ss, MO_32, MO_32)
+DO_STN_2(3, ss, MO_32, MO_32)
+DO_STN_2(4, ss, MO_32, MO_32)
+
+DO_STN_2(1, dd, MO_64, MO_64)
+DO_STN_2(2, dd, MO_64, MO_64)
+DO_STN_2(3, dd, MO_64, MO_64)
+DO_STN_2(4, dd, MO_64, MO_64)
+
+#undef DO_STN_1
+#undef DO_STN_2
+
+/*
+ * Loads with a vector index.
+ */
+
+/*
+ * Load the element at @reg + @reg_ofs, sign or zero-extend as needed.
+ */
+typedef target_ulong zreg_off_fn(void *reg, intptr_t reg_ofs);
+
+static target_ulong off_zsu_s(void *reg, intptr_t reg_ofs)
+{
+ return *(uint32_t *)(reg + H1_4(reg_ofs));
+}
+
+static target_ulong off_zss_s(void *reg, intptr_t reg_ofs)
+{
+ return *(int32_t *)(reg + H1_4(reg_ofs));
+}
+
+static target_ulong off_zsu_d(void *reg, intptr_t reg_ofs)
+{
+ return (uint32_t)*(uint64_t *)(reg + reg_ofs);
+}
+
+static target_ulong off_zss_d(void *reg, intptr_t reg_ofs)
+{
+ return (int32_t)*(uint64_t *)(reg + reg_ofs);
+}
+
+static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
+{
+ return *(uint64_t *)(reg + reg_ofs);
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ uint32_t mtedesc, int esize, int msize,
+ zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ const int mmu_idx = arm_env_mmu_index(env);
+ const intptr_t reg_max = simd_oprsz(desc);
+ const int scale = simd_data(desc);
+ ARMVectorReg scratch;
+ intptr_t reg_off;
+ SVEHostPage info, info2;
+
+ memset(&scratch, 0, reg_max);
+ reg_off = 0;
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if (likely(pg & 1)) {
+ target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+ target_ulong in_page = -(addr | TARGET_PAGE_MASK);
+
+ sve_probe_page(&info, false, env, addr, 0, MMU_DATA_LOAD,
+ mmu_idx, retaddr);
+
+ if (likely(in_page >= msize)) {
+ if (unlikely(info.flags & TLB_WATCHPOINT)) {
+ cpu_check_watchpoint(env_cpu(env), addr, msize,
+ info.attrs, BP_MEM_READ, retaddr);
+ }
+ if (mtedesc && info.tagged) {
+ mte_check(env, mtedesc, addr, retaddr);
+ }
+ if (unlikely(info.flags & TLB_MMIO)) {
+ tlb_fn(env, &scratch, reg_off, addr, retaddr);
+ } else {
+ host_fn(&scratch, reg_off, info.host);
+ }
+ } else {
+ /* Element crosses the page boundary. */
+ sve_probe_page(&info2, false, env, addr + in_page, 0,
+ MMU_DATA_LOAD, mmu_idx, retaddr);
+ if (unlikely((info.flags | info2.flags) & TLB_WATCHPOINT)) {
+ cpu_check_watchpoint(env_cpu(env), addr,
+ msize, info.attrs,
+ BP_MEM_READ, retaddr);
+ }
+ if (mtedesc && info.tagged) {
+ mte_check(env, mtedesc, addr, retaddr);
+ }
+ tlb_fn(env, &scratch, reg_off, addr, retaddr);
+ }
+ }
+ reg_off += esize;
+ pg >>= esize;
+ } while (reg_off & 63);
+ } while (reg_off < reg_max);
+
+ /* Wait until all exceptions have been raised to write back. */
+ memcpy(vd, &scratch, reg_max);
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ int esize, int msize, zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /*
+ * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+ * offset base entirely over the address space hole to change the
+ * pointer tag, or change the bit55 selector. So we could here
+ * examine TBI + TCMA like we do for sve_ldN_r_mte().
+ */
+ sve_ld1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc,
+ esize, msize, off_fn, host_fn, tlb_fn);
+}
+
+#define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \
+void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \
+ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+} \
+void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
+ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+}
+
+#define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \
+void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \
+ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+} \
+void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
+ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+}
+
+DO_LD1_ZPZ_S(bsu, zsu, MO_8)
+DO_LD1_ZPZ_S(bsu, zss, MO_8)
+DO_LD1_ZPZ_D(bdu, zsu, MO_8)
+DO_LD1_ZPZ_D(bdu, zss, MO_8)
+DO_LD1_ZPZ_D(bdu, zd, MO_8)
+
+DO_LD1_ZPZ_S(bss, zsu, MO_8)
+DO_LD1_ZPZ_S(bss, zss, MO_8)
+DO_LD1_ZPZ_D(bds, zsu, MO_8)
+DO_LD1_ZPZ_D(bds, zss, MO_8)
+DO_LD1_ZPZ_D(bds, zd, MO_8)
+
+DO_LD1_ZPZ_S(hsu_le, zsu, MO_16)
+DO_LD1_ZPZ_S(hsu_le, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zsu, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_le, zd, MO_16)
+
+DO_LD1_ZPZ_S(hsu_be, zsu, MO_16)
+DO_LD1_ZPZ_S(hsu_be, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zsu, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zss, MO_16)
+DO_LD1_ZPZ_D(hdu_be, zd, MO_16)
+
+DO_LD1_ZPZ_S(hss_le, zsu, MO_16)
+DO_LD1_ZPZ_S(hss_le, zss, MO_16)
+DO_LD1_ZPZ_D(hds_le, zsu, MO_16)
+DO_LD1_ZPZ_D(hds_le, zss, MO_16)
+DO_LD1_ZPZ_D(hds_le, zd, MO_16)
+
+DO_LD1_ZPZ_S(hss_be, zsu, MO_16)
+DO_LD1_ZPZ_S(hss_be, zss, MO_16)
+DO_LD1_ZPZ_D(hds_be, zsu, MO_16)
+DO_LD1_ZPZ_D(hds_be, zss, MO_16)
+DO_LD1_ZPZ_D(hds_be, zd, MO_16)
+
+DO_LD1_ZPZ_S(ss_le, zsu, MO_32)
+DO_LD1_ZPZ_S(ss_le, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zsu, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_le, zd, MO_32)
+
+DO_LD1_ZPZ_S(ss_be, zsu, MO_32)
+DO_LD1_ZPZ_S(ss_be, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zsu, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zss, MO_32)
+DO_LD1_ZPZ_D(sdu_be, zd, MO_32)
+
+DO_LD1_ZPZ_D(sds_le, zsu, MO_32)
+DO_LD1_ZPZ_D(sds_le, zss, MO_32)
+DO_LD1_ZPZ_D(sds_le, zd, MO_32)
+
+DO_LD1_ZPZ_D(sds_be, zsu, MO_32)
+DO_LD1_ZPZ_D(sds_be, zss, MO_32)
+DO_LD1_ZPZ_D(sds_be, zd, MO_32)
+
+DO_LD1_ZPZ_D(dd_le, zsu, MO_64)
+DO_LD1_ZPZ_D(dd_le, zss, MO_64)
+DO_LD1_ZPZ_D(dd_le, zd, MO_64)
+
+DO_LD1_ZPZ_D(dd_be, zsu, MO_64)
+DO_LD1_ZPZ_D(dd_be, zss, MO_64)
+DO_LD1_ZPZ_D(dd_be, zd, MO_64)
+
+#undef DO_LD1_ZPZ_S
+#undef DO_LD1_ZPZ_D
+
+/* First fault loads with a vector index. */
+
+/*
+ * Common helpers for all gather first-faulting loads.
+ */
+
+static inline QEMU_ALWAYS_INLINE
+void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ uint32_t mtedesc, const int esz, const int msz,
+ zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ const int mmu_idx = arm_env_mmu_index(env);
+ const intptr_t reg_max = simd_oprsz(desc);
+ const int scale = simd_data(desc);
+ const int esize = 1 << esz;
+ const int msize = 1 << msz;
+ intptr_t reg_off;
+ SVEHostPage info;
+ target_ulong addr, in_page;
+ ARMVectorReg scratch;
+
+ /* Skip to the first true predicate. */
+ reg_off = find_next_active(vg, 0, reg_max, esz);
+ if (unlikely(reg_off >= reg_max)) {
+ /* The entire predicate was false; no load occurs. */
+ memset(vd, 0, reg_max);
+ return;
+ }
+
+ /* Protect against overlap between vd and vm. */
+ if (unlikely(vd == vm)) {
+ vm = memcpy(&scratch, vm, reg_max);
+ }
+
+ /*
+ * Probe the first element, allowing faults.
+ */
+ addr = base + (off_fn(vm, reg_off) << scale);
+ if (mtedesc) {
+ mte_check(env, mtedesc, addr, retaddr);
+ }
+ tlb_fn(env, vd, reg_off, addr, retaddr);
+
+ /* After any fault, zero the other elements. */
+ swap_memzero(vd, reg_off);
+ reg_off += esize;
+ swap_memzero(vd + reg_off, reg_max - reg_off);
+
+ /*
+ * Probe the remaining elements, not allowing faults.
+ */
+ while (reg_off < reg_max) {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if (likely((pg >> (reg_off & 63)) & 1)) {
+ addr = base + (off_fn(vm, reg_off) << scale);
+ in_page = -(addr | TARGET_PAGE_MASK);
+
+ if (unlikely(in_page < msize)) {
+ /* Stop if the element crosses a page boundary. */
+ goto fault;
+ }
+
+ sve_probe_page(&info, true, env, addr, 0, MMU_DATA_LOAD,
+ mmu_idx, retaddr);
+ if (unlikely(info.flags & (TLB_INVALID_MASK | TLB_MMIO))) {
+ goto fault;
+ }
+ if (unlikely(info.flags & TLB_WATCHPOINT) &&
+ (cpu_watchpoint_address_matches
+ (env_cpu(env), addr, msize) & BP_MEM_READ)) {
+ goto fault;
+ }
+ if (mtedesc && info.tagged && !mte_probe(env, mtedesc, addr)) {
+ goto fault;
+ }
+
+ host_fn(vd, reg_off, info.host);
+ }
+ reg_off += esize;
+ } while (reg_off & 63);
+ }
+ return;
+
+ fault:
+ record_fault(env, reg_off, reg_max);
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ const int esz, const int msz,
+ zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /*
+ * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+ * offset base entirely over the address space hole to change the
+ * pointer tag, or change the bit55 selector. So we could here
+ * examine TBI + TCMA like we do for sve_ldN_r_mte().
+ */
+ sve_ldff1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc,
+ esz, msz, off_fn, host_fn, tlb_fn);
+}
+
+#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \
+void HELPER(sve_ldff##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ, \
+ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+} \
+void HELPER(sve_ldff##MEM##_##OFS##_mte) \
+ (CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \
+ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+}
+
+#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \
+void HELPER(sve_ldff##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ, \
+ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+} \
+void HELPER(sve_ldff##MEM##_##OFS##_mte) \
+ (CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \
+ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+}
+
+DO_LDFF1_ZPZ_S(bsu, zsu, MO_8)
+DO_LDFF1_ZPZ_S(bsu, zss, MO_8)
+DO_LDFF1_ZPZ_D(bdu, zsu, MO_8)
+DO_LDFF1_ZPZ_D(bdu, zss, MO_8)
+DO_LDFF1_ZPZ_D(bdu, zd, MO_8)
+
+DO_LDFF1_ZPZ_S(bss, zsu, MO_8)
+DO_LDFF1_ZPZ_S(bss, zss, MO_8)
+DO_LDFF1_ZPZ_D(bds, zsu, MO_8)
+DO_LDFF1_ZPZ_D(bds, zss, MO_8)
+DO_LDFF1_ZPZ_D(bds, zd, MO_8)
+
+DO_LDFF1_ZPZ_S(hsu_le, zsu, MO_16)
+DO_LDFF1_ZPZ_S(hsu_le, zss, MO_16)
+DO_LDFF1_ZPZ_D(hdu_le, zsu, MO_16)
+DO_LDFF1_ZPZ_D(hdu_le, zss, MO_16)
+DO_LDFF1_ZPZ_D(hdu_le, zd, MO_16)
+
+DO_LDFF1_ZPZ_S(hsu_be, zsu, MO_16)
+DO_LDFF1_ZPZ_S(hsu_be, zss, MO_16)
+DO_LDFF1_ZPZ_D(hdu_be, zsu, MO_16)
+DO_LDFF1_ZPZ_D(hdu_be, zss, MO_16)
+DO_LDFF1_ZPZ_D(hdu_be, zd, MO_16)
+
+DO_LDFF1_ZPZ_S(hss_le, zsu, MO_16)
+DO_LDFF1_ZPZ_S(hss_le, zss, MO_16)
+DO_LDFF1_ZPZ_D(hds_le, zsu, MO_16)
+DO_LDFF1_ZPZ_D(hds_le, zss, MO_16)
+DO_LDFF1_ZPZ_D(hds_le, zd, MO_16)
+
+DO_LDFF1_ZPZ_S(hss_be, zsu, MO_16)
+DO_LDFF1_ZPZ_S(hss_be, zss, MO_16)
+DO_LDFF1_ZPZ_D(hds_be, zsu, MO_16)
+DO_LDFF1_ZPZ_D(hds_be, zss, MO_16)
+DO_LDFF1_ZPZ_D(hds_be, zd, MO_16)
+
+DO_LDFF1_ZPZ_S(ss_le, zsu, MO_32)
+DO_LDFF1_ZPZ_S(ss_le, zss, MO_32)
+DO_LDFF1_ZPZ_D(sdu_le, zsu, MO_32)
+DO_LDFF1_ZPZ_D(sdu_le, zss, MO_32)
+DO_LDFF1_ZPZ_D(sdu_le, zd, MO_32)
+
+DO_LDFF1_ZPZ_S(ss_be, zsu, MO_32)
+DO_LDFF1_ZPZ_S(ss_be, zss, MO_32)
+DO_LDFF1_ZPZ_D(sdu_be, zsu, MO_32)
+DO_LDFF1_ZPZ_D(sdu_be, zss, MO_32)
+DO_LDFF1_ZPZ_D(sdu_be, zd, MO_32)
+
+DO_LDFF1_ZPZ_D(sds_le, zsu, MO_32)
+DO_LDFF1_ZPZ_D(sds_le, zss, MO_32)
+DO_LDFF1_ZPZ_D(sds_le, zd, MO_32)
+
+DO_LDFF1_ZPZ_D(sds_be, zsu, MO_32)
+DO_LDFF1_ZPZ_D(sds_be, zss, MO_32)
+DO_LDFF1_ZPZ_D(sds_be, zd, MO_32)
+
+DO_LDFF1_ZPZ_D(dd_le, zsu, MO_64)
+DO_LDFF1_ZPZ_D(dd_le, zss, MO_64)
+DO_LDFF1_ZPZ_D(dd_le, zd, MO_64)
+
+DO_LDFF1_ZPZ_D(dd_be, zsu, MO_64)
+DO_LDFF1_ZPZ_D(dd_be, zss, MO_64)
+DO_LDFF1_ZPZ_D(dd_be, zd, MO_64)
+
+/* Stores with a vector index. */
+
+static inline QEMU_ALWAYS_INLINE
+void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ uint32_t mtedesc, int esize, int msize,
+ zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ const int mmu_idx = arm_env_mmu_index(env);
+ const intptr_t reg_max = simd_oprsz(desc);
+ const int scale = simd_data(desc);
+ void *host[ARM_MAX_VQ * 4];
+ intptr_t reg_off, i;
+ SVEHostPage info, info2;
+
+ /*
+ * Probe all of the elements for host addresses and flags.
+ */
+ i = reg_off = 0;
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+ target_ulong in_page = -(addr | TARGET_PAGE_MASK);
+
+ host[i] = NULL;
+ if (likely((pg >> (reg_off & 63)) & 1)) {
+ if (likely(in_page >= msize)) {
+ sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE,
+ mmu_idx, retaddr);
+ if (!(info.flags & TLB_MMIO)) {
+ host[i] = info.host;
+ }
+ } else {
+ /*
+ * Element crosses the page boundary.
+ * Probe both pages, but do not record the host address,
+ * so that we use the slow path.
+ */
+ sve_probe_page(&info, false, env, addr, 0,
+ MMU_DATA_STORE, mmu_idx, retaddr);
+ sve_probe_page(&info2, false, env, addr + in_page, 0,
+ MMU_DATA_STORE, mmu_idx, retaddr);
+ info.flags |= info2.flags;
+ }
+
+ if (unlikely(info.flags & TLB_WATCHPOINT)) {
+ cpu_check_watchpoint(env_cpu(env), addr, msize,
+ info.attrs, BP_MEM_WRITE, retaddr);
+ }
+
+ if (mtedesc && info.tagged) {
+ mte_check(env, mtedesc, addr, retaddr);
+ }
+ }
+ i += 1;
+ reg_off += esize;
+ } while (reg_off & 63);
+ } while (reg_off < reg_max);
+
+ /*
+ * Now that we have recognized all exceptions except SyncExternal
+ * (from TLB_MMIO), which we cannot avoid, perform all of the stores.
+ *
+ * Note for the common case of an element in RAM, not crossing a page
+ * boundary, we have stored the host address in host[]. This doubles
+ * as a first-level check against the predicate, since only enabled
+ * elements have non-null host addresses.
+ */
+ i = reg_off = 0;
+ do {
+ void *h = host[i];
+ if (likely(h != NULL)) {
+ host_fn(vd, reg_off, h);
+ } else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
+ target_ulong addr = base + (off_fn(vm, reg_off) << scale);
+ tlb_fn(env, vd, reg_off, addr, retaddr);
+ }
+ i += 1;
+ reg_off += esize;
+ } while (reg_off < reg_max);
+}
+
+static inline QEMU_ALWAYS_INLINE
+void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ int esize, int msize, zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /*
+ * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+ * offset base entirely over the address space hole to change the
+ * pointer tag, or change the bit55 selector. So we could here
+ * examine TBI + TCMA like we do for sve_ldN_r_mte().
+ */
+ sve_st1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc,
+ esize, msize, off_fn, host_fn, tlb_fn);
+}
+
+#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \
+void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \
+ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+} \
+void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
+ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+}
+
+#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \
+void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \
+ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+} \
+void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
+ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+}
+
+DO_ST1_ZPZ_S(bs, zsu, MO_8)
+DO_ST1_ZPZ_S(hs_le, zsu, MO_16)
+DO_ST1_ZPZ_S(hs_be, zsu, MO_16)
+DO_ST1_ZPZ_S(ss_le, zsu, MO_32)
+DO_ST1_ZPZ_S(ss_be, zsu, MO_32)
+
+DO_ST1_ZPZ_S(bs, zss, MO_8)
+DO_ST1_ZPZ_S(hs_le, zss, MO_16)
+DO_ST1_ZPZ_S(hs_be, zss, MO_16)
+DO_ST1_ZPZ_S(ss_le, zss, MO_32)
+DO_ST1_ZPZ_S(ss_be, zss, MO_32)
+
+DO_ST1_ZPZ_D(bd, zsu, MO_8)
+DO_ST1_ZPZ_D(hd_le, zsu, MO_16)
+DO_ST1_ZPZ_D(hd_be, zsu, MO_16)
+DO_ST1_ZPZ_D(sd_le, zsu, MO_32)
+DO_ST1_ZPZ_D(sd_be, zsu, MO_32)
+DO_ST1_ZPZ_D(dd_le, zsu, MO_64)
+DO_ST1_ZPZ_D(dd_be, zsu, MO_64)
+
+DO_ST1_ZPZ_D(bd, zss, MO_8)
+DO_ST1_ZPZ_D(hd_le, zss, MO_16)
+DO_ST1_ZPZ_D(hd_be, zss, MO_16)
+DO_ST1_ZPZ_D(sd_le, zss, MO_32)
+DO_ST1_ZPZ_D(sd_be, zss, MO_32)
+DO_ST1_ZPZ_D(dd_le, zss, MO_64)
+DO_ST1_ZPZ_D(dd_be, zss, MO_64)
+
+DO_ST1_ZPZ_D(bd, zd, MO_8)
+DO_ST1_ZPZ_D(hd_le, zd, MO_16)
+DO_ST1_ZPZ_D(hd_be, zd, MO_16)
+DO_ST1_ZPZ_D(sd_le, zd, MO_32)
+DO_ST1_ZPZ_D(sd_be, zd, MO_32)
+DO_ST1_ZPZ_D(dd_le, zd, MO_64)
+DO_ST1_ZPZ_D(dd_be, zd, MO_64)
+
+#undef DO_ST1_ZPZ_S
+#undef DO_ST1_ZPZ_D
+
+void HELPER(sve2_eor3)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = n[i] ^ m[i] ^ k[i];
+ }
+}
+
+void HELPER(sve2_bcax)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = n[i] ^ (m[i] & ~k[i]);
+ }
+}
+
+void HELPER(sve2_bsl1n)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = (~n[i] & k[i]) | (m[i] & ~k[i]);
+ }
+}
+
+void HELPER(sve2_bsl2n)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = (n[i] & k[i]) | (~m[i] & ~k[i]);
+ }
+}
+
+void HELPER(sve2_nbsl)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ~((n[i] & k[i]) | (m[i] & ~k[i]));
+ }
+}
+
+/*
+ * Returns true if m0 or m1 contains the low uint8_t/uint16_t in n.
+ * See hasless(v,1) from
+ * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ */
+static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
+{
+ int bits = 8 << esz;
+ uint64_t ones = dup_const(esz, 1);
+ uint64_t signs = ones << (bits - 1);
+ uint64_t cmp0, cmp1;
+
+ cmp1 = dup_const(esz, n);
+ cmp0 = cmp1 ^ m0;
+ cmp1 = cmp1 ^ m1;
+ cmp0 = (cmp0 - ones) & ~cmp0;
+ cmp1 = (cmp1 - ones) & ~cmp1;
+ return (cmp0 | cmp1) & signs;
+}
+
+static inline uint32_t do_match(void *vd, void *vn, void *vm, void *vg,
+ uint32_t desc, int esz, bool nmatch)
+{
+ uint16_t esz_mask = pred_esz_masks[esz];
+ intptr_t opr_sz = simd_oprsz(desc);
+ uint32_t flags = PREDTEST_INIT;
+ intptr_t i, j, k;
+
+ for (i = 0; i < opr_sz; i += 16) {
+ uint64_t m0 = *(uint64_t *)(vm + i);
+ uint64_t m1 = *(uint64_t *)(vm + i + 8);
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)) & esz_mask;
+ uint16_t out = 0;
+
+ for (j = 0; j < 16; j += 8) {
+ uint64_t n = *(uint64_t *)(vn + i + j);
+
+ for (k = 0; k < 8; k += 1 << esz) {
+ if (pg & (1 << (j + k))) {
+ bool o = do_match2(n >> (k * 8), m0, m1, esz);
+ out |= (o ^ nmatch) << (j + k);
+ }
+ }
+ }
+ *(uint16_t *)(vd + H1_2(i >> 3)) = out;
+ flags = iter_predtest_fwd(out, pg, flags);
+ }
+ return flags;
+}
+
+#define DO_PPZZ_MATCH(NAME, ESZ, INV) \
+uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ return do_match(vd, vn, vm, vg, desc, ESZ, INV); \
+}
+
+DO_PPZZ_MATCH(sve2_match_ppzz_b, MO_8, false)
+DO_PPZZ_MATCH(sve2_match_ppzz_h, MO_16, false)
+
+DO_PPZZ_MATCH(sve2_nmatch_ppzz_b, MO_8, true)
+DO_PPZZ_MATCH(sve2_nmatch_ppzz_h, MO_16, true)
+
+#undef DO_PPZZ_MATCH
+
+void HELPER(sve2_histcnt_s)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t desc)
+{
+ ARMVectorReg scratch;
+ intptr_t i, j;
+ intptr_t opr_sz = simd_oprsz(desc);
+ uint32_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ if (d == n) {
+ n = memcpy(&scratch, n, opr_sz);
+ if (d == m) {
+ m = n;
+ }
+ } else if (d == m) {
+ m = memcpy(&scratch, m, opr_sz);
+ }
+
+ for (i = 0; i < opr_sz; i += 4) {
+ uint64_t count = 0;
+ uint8_t pred;
+
+ pred = pg[H1(i >> 3)] >> (i & 7);
+ if (pred & 1) {
+ uint32_t nn = n[H4(i >> 2)];
+
+ for (j = 0; j <= i; j += 4) {
+ pred = pg[H1(j >> 3)] >> (j & 7);
+ if ((pred & 1) && nn == m[H4(j >> 2)]) {
+ ++count;
+ }
+ }
+ }
+ d[H4(i >> 2)] = count;
+ }
+}
+
+void HELPER(sve2_histcnt_d)(void *vd, void *vn, void *vm, void *vg,
+ uint32_t desc)
+{
+ ARMVectorReg scratch;
+ intptr_t i, j;
+ intptr_t opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint8_t *pg = vg;
+
+ if (d == n) {
+ n = memcpy(&scratch, n, opr_sz);
+ if (d == m) {
+ m = n;
+ }
+ } else if (d == m) {
+ m = memcpy(&scratch, m, opr_sz);
+ }
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ uint64_t count = 0;
+ if (pg[H1(i)] & 1) {
+ uint64_t nn = n[i];
+ for (j = 0; j <= i; ++j) {
+ if ((pg[H1(j)] & 1) && nn == m[j]) {
+ ++count;
+ }
+ }
+ }
+ d[i] = count;
+ }
+}
+
+/*
+ * Returns the number of bytes in m0 and m1 that match n.
+ * Unlike do_match2 we don't just need true/false, we need an exact count.
+ * This requires two extra logical operations.
+ */
+static inline uint64_t do_histseg_cnt(uint8_t n, uint64_t m0, uint64_t m1)
+{
+ const uint64_t mask = dup_const(MO_8, 0x7f);
+ uint64_t cmp0, cmp1;
+
+ cmp1 = dup_const(MO_8, n);
+ cmp0 = cmp1 ^ m0;
+ cmp1 = cmp1 ^ m1;
+
+ /*
+ * 1: clear msb of each byte to avoid carry to next byte (& mask)
+ * 2: carry in to msb if byte != 0 (+ mask)
+ * 3: set msb if cmp has msb set (| cmp)
+ * 4: set ~msb to ignore them (| mask)
+ * We now have 0xff for byte != 0 or 0x7f for byte == 0.
+ * 5: invert, resulting in 0x80 if and only if byte == 0.
+ */
+ cmp0 = ~(((cmp0 & mask) + mask) | cmp0 | mask);
+ cmp1 = ~(((cmp1 & mask) + mask) | cmp1 | mask);
+
+ /*
+ * Combine the two compares in a way that the bits do
+ * not overlap, and so preserves the count of set bits.
+ * If the host has an efficient instruction for ctpop,
+ * then ctpop(x) + ctpop(y) has the same number of
+ * operations as ctpop(x | (y >> 1)). If the host does
+ * not have an efficient ctpop, then we only want to
+ * use it once.
+ */
+ return ctpop64(cmp0 | (cmp1 >> 1));
+}
+
+void HELPER(sve2_histseg)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j;
+ intptr_t opr_sz = simd_oprsz(desc);
+
+ for (i = 0; i < opr_sz; i += 16) {
+ uint64_t n0 = *(uint64_t *)(vn + i);
+ uint64_t m0 = *(uint64_t *)(vm + i);
+ uint64_t n1 = *(uint64_t *)(vn + i + 8);
+ uint64_t m1 = *(uint64_t *)(vm + i + 8);
+ uint64_t out0 = 0;
+ uint64_t out1 = 0;
+
+ for (j = 0; j < 64; j += 8) {
+ uint64_t cnt0 = do_histseg_cnt(n0 >> j, m0, m1);
+ uint64_t cnt1 = do_histseg_cnt(n1 >> j, m0, m1);
+ out0 |= cnt0 << j;
+ out1 |= cnt1 << j;
+ }
+
+ *(uint64_t *)(vd + i) = out0;
+ *(uint64_t *)(vd + i + 8) = out1;
+ }
+}
+
+void HELPER(sve2_xar_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ int shr = simd_data(desc);
+ int shl = 8 - shr;
+ uint64_t mask = dup_const(MO_8, 0xff >> shr);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ uint64_t t = n[i] ^ m[i];
+ d[i] = ((t >> shr) & mask) | ((t << shl) & ~mask);
+ }
+}
+
+void HELPER(sve2_xar_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ int shr = simd_data(desc);
+ int shl = 16 - shr;
+ uint64_t mask = dup_const(MO_16, 0xffff >> shr);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ uint64_t t = n[i] ^ m[i];
+ d[i] = ((t >> shr) & mask) | ((t << shl) & ~mask);
+ }
+}
+
+void HELPER(sve2_xar_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 4;
+ int shr = simd_data(desc);
+ uint32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ror32(n[i] ^ m[i], shr);
+ }
+}
+
+void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va,
+ void *status, uint32_t desc)
+{
+ intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float32) * 4);
+
+ for (s = 0; s < opr_sz; ++s) {
+ float32 *n = vn + s * sizeof(float32) * 4;
+ float32 *m = vm + s * sizeof(float32) * 4;
+ float32 *a = va + s * sizeof(float32) * 4;
+ float32 *d = vd + s * sizeof(float32) * 4;
+ float32 n00 = n[H4(0)], n01 = n[H4(1)];
+ float32 n10 = n[H4(2)], n11 = n[H4(3)];
+ float32 m00 = m[H4(0)], m01 = m[H4(1)];
+ float32 m10 = m[H4(2)], m11 = m[H4(3)];
+ float32 p0, p1;
+
+ /* i = 0, j = 0 */
+ p0 = float32_mul(n00, m00, status);
+ p1 = float32_mul(n01, m01, status);
+ d[H4(0)] = float32_add(a[H4(0)], float32_add(p0, p1, status), status);
+
+ /* i = 0, j = 1 */
+ p0 = float32_mul(n00, m10, status);
+ p1 = float32_mul(n01, m11, status);
+ d[H4(1)] = float32_add(a[H4(1)], float32_add(p0, p1, status), status);
+
+ /* i = 1, j = 0 */
+ p0 = float32_mul(n10, m00, status);
+ p1 = float32_mul(n11, m01, status);
+ d[H4(2)] = float32_add(a[H4(2)], float32_add(p0, p1, status), status);
+
+ /* i = 1, j = 1 */
+ p0 = float32_mul(n10, m10, status);
+ p1 = float32_mul(n11, m11, status);
+ d[H4(3)] = float32_add(a[H4(3)], float32_add(p0, p1, status), status);
+ }
+}
+
+void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va,
+ void *status, uint32_t desc)
+{
+ intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float64) * 4);
+
+ for (s = 0; s < opr_sz; ++s) {
+ float64 *n = vn + s * sizeof(float64) * 4;
+ float64 *m = vm + s * sizeof(float64) * 4;
+ float64 *a = va + s * sizeof(float64) * 4;
+ float64 *d = vd + s * sizeof(float64) * 4;
+ float64 n00 = n[0], n01 = n[1], n10 = n[2], n11 = n[3];
+ float64 m00 = m[0], m01 = m[1], m10 = m[2], m11 = m[3];
+ float64 p0, p1;
+
+ /* i = 0, j = 0 */
+ p0 = float64_mul(n00, m00, status);
+ p1 = float64_mul(n01, m01, status);
+ d[0] = float64_add(a[0], float64_add(p0, p1, status), status);
+
+ /* i = 0, j = 1 */
+ p0 = float64_mul(n00, m10, status);
+ p1 = float64_mul(n01, m11, status);
+ d[1] = float64_add(a[1], float64_add(p0, p1, status), status);
+
+ /* i = 1, j = 0 */
+ p0 = float64_mul(n10, m00, status);
+ p1 = float64_mul(n11, m01, status);
+ d[2] = float64_add(a[2], float64_add(p0, p1, status), status);
+
+ /* i = 1, j = 1 */
+ p0 = float64_mul(n10, m10, status);
+ p1 = float64_mul(n11, m11, status);
+ d[3] = float64_add(a[3], float64_add(p0, p1, status), status);
+ }
+}
+
+#define DO_FCVTNT(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+{ \
+ intptr_t i = simd_oprsz(desc); \
+ uint64_t *g = vg; \
+ do { \
+ uint64_t pg = g[(i - 1) >> 6]; \
+ do { \
+ i -= sizeof(TYPEW); \
+ if (likely((pg >> (i & 63)) & 1)) { \
+ TYPEW nn = *(TYPEW *)(vn + HW(i)); \
+ *(TYPEN *)(vd + HN(i + sizeof(TYPEN))) = OP(nn, status); \
+ } \
+ } while (i & 63); \
+ } while (i != 0); \
+}
+
+DO_FCVTNT(sve_bfcvtnt, uint32_t, uint16_t, H1_4, H1_2, float32_to_bfloat16)
+DO_FCVTNT(sve2_fcvtnt_sh, uint32_t, uint16_t, H1_4, H1_2, sve_f32_to_f16)
+DO_FCVTNT(sve2_fcvtnt_ds, uint64_t, uint32_t, H1_8, H1_4, float64_to_float32)
+
+#define DO_FCVTLT(NAME, TYPEW, TYPEN, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vg, void *status, uint32_t desc) \
+{ \
+ intptr_t i = simd_oprsz(desc); \
+ uint64_t *g = vg; \
+ do { \
+ uint64_t pg = g[(i - 1) >> 6]; \
+ do { \
+ i -= sizeof(TYPEW); \
+ if (likely((pg >> (i & 63)) & 1)) { \
+ TYPEN nn = *(TYPEN *)(vn + HN(i + sizeof(TYPEN))); \
+ *(TYPEW *)(vd + HW(i)) = OP(nn, status); \
+ } \
+ } while (i & 63); \
+ } while (i != 0); \
+}
+
+DO_FCVTLT(sve2_fcvtlt_hs, uint32_t, uint16_t, H1_4, H1_2, sve_f16_to_f32)
+DO_FCVTLT(sve2_fcvtlt_sd, uint64_t, uint32_t, H1_8, H1_4, float32_to_float64)
+
+#undef DO_FCVTLT
+#undef DO_FCVTNT
diff --git a/target/arm/tcg/sve_ldst_internal.h b/target/arm/tcg/sve_ldst_internal.h
new file mode 100644
index 0000000000..4f159ec4ad
--- /dev/null
+++ b/target/arm/tcg/sve_ldst_internal.h
@@ -0,0 +1,222 @@
+/*
+ * ARM SVE Load/Store Helpers
+ *
+ * Copyright (c) 2018-2022 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_ARM_SVE_LDST_INTERNAL_H
+#define TARGET_ARM_SVE_LDST_INTERNAL_H
+
+#include "exec/cpu_ldst.h"
+
+/*
+ * Load one element into @vd + @reg_off from @host.
+ * The controlling predicate is known to be true.
+ */
+typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host);
+
+/*
+ * Load one element into @vd + @reg_off from (@env, @vaddr, @ra).
+ * The controlling predicate is known to be true.
+ */
+typedef void sve_ldst1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
+ target_ulong vaddr, uintptr_t retaddr);
+
+/*
+ * Generate the above primitives.
+ */
+
+#define DO_LD_HOST(NAME, H, TYPEE, TYPEM, HOST) \
+static inline void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
+{ TYPEM val = HOST(host); *(TYPEE *)(vd + H(reg_off)) = val; }
+
+#define DO_ST_HOST(NAME, H, TYPEE, TYPEM, HOST) \
+static inline void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
+{ TYPEM val = *(TYPEE *)(vd + H(reg_off)); HOST(host, val); }
+
+#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, TLB) \
+static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \
+ intptr_t reg_off, target_ulong addr, uintptr_t ra) \
+{ \
+ TYPEM val = TLB(env, useronly_clean_ptr(addr), ra); \
+ *(TYPEE *)(vd + H(reg_off)) = val; \
+}
+
+#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \
+static inline void sve_##NAME##_tlb(CPUARMState *env, void *vd, \
+ intptr_t reg_off, target_ulong addr, uintptr_t ra) \
+{ \
+ TYPEM val = *(TYPEE *)(vd + H(reg_off)); \
+ TLB(env, useronly_clean_ptr(addr), val, ra); \
+}
+
+#define DO_LD_PRIM_1(NAME, H, TE, TM) \
+ DO_LD_HOST(NAME, H, TE, TM, ldub_p) \
+ DO_LD_TLB(NAME, H, TE, TM, cpu_ldub_data_ra)
+
+DO_LD_PRIM_1(ld1bb, H1, uint8_t, uint8_t)
+DO_LD_PRIM_1(ld1bhu, H1_2, uint16_t, uint8_t)
+DO_LD_PRIM_1(ld1bhs, H1_2, uint16_t, int8_t)
+DO_LD_PRIM_1(ld1bsu, H1_4, uint32_t, uint8_t)
+DO_LD_PRIM_1(ld1bss, H1_4, uint32_t, int8_t)
+DO_LD_PRIM_1(ld1bdu, H1_8, uint64_t, uint8_t)
+DO_LD_PRIM_1(ld1bds, H1_8, uint64_t, int8_t)
+
+#define DO_ST_PRIM_1(NAME, H, TE, TM) \
+ DO_ST_HOST(st1##NAME, H, TE, TM, stb_p) \
+ DO_ST_TLB(st1##NAME, H, TE, TM, cpu_stb_data_ra)
+
+DO_ST_PRIM_1(bb, H1, uint8_t, uint8_t)
+DO_ST_PRIM_1(bh, H1_2, uint16_t, uint8_t)
+DO_ST_PRIM_1(bs, H1_4, uint32_t, uint8_t)
+DO_ST_PRIM_1(bd, H1_8, uint64_t, uint8_t)
+
+#define DO_LD_PRIM_2(NAME, H, TE, TM, LD) \
+ DO_LD_HOST(ld1##NAME##_be, H, TE, TM, LD##_be_p) \
+ DO_LD_HOST(ld1##NAME##_le, H, TE, TM, LD##_le_p) \
+ DO_LD_TLB(ld1##NAME##_be, H, TE, TM, cpu_##LD##_be_data_ra) \
+ DO_LD_TLB(ld1##NAME##_le, H, TE, TM, cpu_##LD##_le_data_ra)
+
+#define DO_ST_PRIM_2(NAME, H, TE, TM, ST) \
+ DO_ST_HOST(st1##NAME##_be, H, TE, TM, ST##_be_p) \
+ DO_ST_HOST(st1##NAME##_le, H, TE, TM, ST##_le_p) \
+ DO_ST_TLB(st1##NAME##_be, H, TE, TM, cpu_##ST##_be_data_ra) \
+ DO_ST_TLB(st1##NAME##_le, H, TE, TM, cpu_##ST##_le_data_ra)
+
+DO_LD_PRIM_2(hh, H1_2, uint16_t, uint16_t, lduw)
+DO_LD_PRIM_2(hsu, H1_4, uint32_t, uint16_t, lduw)
+DO_LD_PRIM_2(hss, H1_4, uint32_t, int16_t, lduw)
+DO_LD_PRIM_2(hdu, H1_8, uint64_t, uint16_t, lduw)
+DO_LD_PRIM_2(hds, H1_8, uint64_t, int16_t, lduw)
+
+DO_ST_PRIM_2(hh, H1_2, uint16_t, uint16_t, stw)
+DO_ST_PRIM_2(hs, H1_4, uint32_t, uint16_t, stw)
+DO_ST_PRIM_2(hd, H1_8, uint64_t, uint16_t, stw)
+
+DO_LD_PRIM_2(ss, H1_4, uint32_t, uint32_t, ldl)
+DO_LD_PRIM_2(sdu, H1_8, uint64_t, uint32_t, ldl)
+DO_LD_PRIM_2(sds, H1_8, uint64_t, int32_t, ldl)
+
+DO_ST_PRIM_2(ss, H1_4, uint32_t, uint32_t, stl)
+DO_ST_PRIM_2(sd, H1_8, uint64_t, uint32_t, stl)
+
+DO_LD_PRIM_2(dd, H1_8, uint64_t, uint64_t, ldq)
+DO_ST_PRIM_2(dd, H1_8, uint64_t, uint64_t, stq)
+
+#undef DO_LD_TLB
+#undef DO_ST_TLB
+#undef DO_LD_HOST
+#undef DO_LD_PRIM_1
+#undef DO_ST_PRIM_1
+#undef DO_LD_PRIM_2
+#undef DO_ST_PRIM_2
+
+/*
+ * Resolve the guest virtual address to info->host and info->flags.
+ * If @nofault, return false if the page is invalid, otherwise
+ * exit via page fault exception.
+ */
+
+typedef struct {
+ void *host;
+ int flags;
+ MemTxAttrs attrs;
+ bool tagged;
+} SVEHostPage;
+
+bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
+ target_ulong addr, int mem_off, MMUAccessType access_type,
+ int mmu_idx, uintptr_t retaddr);
+
+/*
+ * Analyse contiguous data, protected by a governing predicate.
+ */
+
+typedef enum {
+ FAULT_NO,
+ FAULT_FIRST,
+ FAULT_ALL,
+} SVEContFault;
+
+typedef struct {
+ /*
+ * First and last element wholly contained within the two pages.
+ * mem_off_first[0] and reg_off_first[0] are always set >= 0.
+ * reg_off_last[0] may be < 0 if the first element crosses pages.
+ * All of mem_off_first[1], reg_off_first[1] and reg_off_last[1]
+ * are set >= 0 only if there are complete elements on a second page.
+ *
+ * The reg_off_* offsets are relative to the internal vector register.
+ * The mem_off_first offset is relative to the memory address; the
+ * two offsets are different when a load operation extends, a store
+ * operation truncates, or for multi-register operations.
+ */
+ int16_t mem_off_first[2];
+ int16_t reg_off_first[2];
+ int16_t reg_off_last[2];
+
+ /*
+ * One element that is misaligned and spans both pages,
+ * or -1 if there is no such active element.
+ */
+ int16_t mem_off_split;
+ int16_t reg_off_split;
+
+ /*
+ * The byte offset at which the entire operation crosses a page boundary.
+ * Set >= 0 if and only if the entire operation spans two pages.
+ */
+ int16_t page_split;
+
+ /* TLB data for the two pages. */
+ SVEHostPage page[2];
+} SVEContLdSt;
+
+/*
+ * Find first active element on each page, and a loose bound for the
+ * final element on each page. Identify any single element that spans
+ * the page boundary. Return true if there are any active elements.
+ */
+bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg,
+ intptr_t reg_max, int esz, int msize);
+
+/*
+ * Resolve the guest virtual addresses to info->page[].
+ * Control the generation of page faults with @fault. Return false if
+ * there is no work to do, which can only happen with @fault == FAULT_NO.
+ */
+bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault,
+ CPUARMState *env, target_ulong addr,
+ MMUAccessType access_type, uintptr_t retaddr);
+
+#ifdef CONFIG_USER_ONLY
+static inline void
+sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, uint64_t *vg,
+ target_ulong addr, int esize, int msize,
+ int wp_access, uintptr_t retaddr)
+{ }
+#else
+void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
+ uint64_t *vg, target_ulong addr,
+ int esize, int msize, int wp_access,
+ uintptr_t retaddr);
+#endif
+
+void sve_cont_ldst_mte_check(SVEContLdSt *info, CPUARMState *env, uint64_t *vg,
+ target_ulong addr, int esize, int msize,
+ uint32_t mtedesc, uintptr_t ra);
+
+#endif /* TARGET_ARM_SVE_LDST_INTERNAL_H */
diff --git a/target/arm/tcg/t16.decode b/target/arm/tcg/t16.decode
new file mode 100644
index 0000000000..646c74929d
--- /dev/null
+++ b/target/arm/tcg/t16.decode
@@ -0,0 +1,281 @@
+# Thumb1 instructions
+#
+# Copyright (c) 2019 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+&empty !extern
+&s_rrr_shi !extern s rd rn rm shim shty
+&s_rrr_shr !extern s rn rd rm rs shty
+&s_rri_rot !extern s rn rd imm rot
+&s_rrrr !extern s rd rn rm ra
+&rrr_rot !extern rd rn rm rot
+&rr !extern rd rm
+&ri !extern rd imm
+&r !extern rm
+&i !extern imm
+&ldst_rr !extern p w u rn rt rm shimm shtype
+&ldst_ri !extern p w u rn rt imm
+&ldst_block !extern rn i b u w list
+&setend !extern E
+&cps !extern mode imod M A I F
+&ci !extern cond imm
+
+# Set S if the instruction is outside of an IT block.
+%s !function=t16_setflags
+
+# Data-processing (two low registers)
+
+%reg_0 0:3
+
+@lll_noshr ...... .... rm:3 rd:3 \
+ &s_rrr_shi %s rn=%reg_0 shim=0 shty=0
+@xll_noshr ...... .... rm:3 rn:3 \
+ &s_rrr_shi s=1 rd=0 shim=0 shty=0
+@lxl_shr ...... .... rs:3 rd:3 \
+ &s_rrr_shr %s rm=%reg_0 rn=0
+
+AND_rrri 010000 0000 ... ... @lll_noshr
+EOR_rrri 010000 0001 ... ... @lll_noshr
+MOV_rxrr 010000 0010 ... ... @lxl_shr shty=0 # LSL
+MOV_rxrr 010000 0011 ... ... @lxl_shr shty=1 # LSR
+MOV_rxrr 010000 0100 ... ... @lxl_shr shty=2 # ASR
+ADC_rrri 010000 0101 ... ... @lll_noshr
+SBC_rrri 010000 0110 ... ... @lll_noshr
+MOV_rxrr 010000 0111 ... ... @lxl_shr shty=3 # ROR
+TST_xrri 010000 1000 ... ... @xll_noshr
+RSB_rri 010000 1001 rn:3 rd:3 &s_rri_rot %s imm=0 rot=0
+CMP_xrri 010000 1010 ... ... @xll_noshr
+CMN_xrri 010000 1011 ... ... @xll_noshr
+ORR_rrri 010000 1100 ... ... @lll_noshr
+MUL 010000 1101 rn:3 rd:3 &s_rrrr %s rm=%reg_0 ra=0
+BIC_rrri 010000 1110 ... ... @lll_noshr
+MVN_rxri 010000 1111 ... ... @lll_noshr
+
+# Load/store (register offset)
+
+@ldst_rr ....... rm:3 rn:3 rt:3 \
+ &ldst_rr p=1 w=0 u=1 shimm=0 shtype=0
+
+STR_rr 0101 000 ... ... ... @ldst_rr
+STRH_rr 0101 001 ... ... ... @ldst_rr
+STRB_rr 0101 010 ... ... ... @ldst_rr
+LDRSB_rr 0101 011 ... ... ... @ldst_rr
+LDR_rr 0101 100 ... ... ... @ldst_rr
+LDRH_rr 0101 101 ... ... ... @ldst_rr
+LDRB_rr 0101 110 ... ... ... @ldst_rr
+LDRSH_rr 0101 111 ... ... ... @ldst_rr
+
+# Load/store word/byte (immediate offset)
+
+%imm5_6x4 6:5 !function=times_4
+
+@ldst_ri_1 ..... imm:5 rn:3 rt:3 \
+ &ldst_ri p=1 w=0 u=1
+@ldst_ri_4 ..... ..... rn:3 rt:3 \
+ &ldst_ri p=1 w=0 u=1 imm=%imm5_6x4
+
+STR_ri 01100 ..... ... ... @ldst_ri_4
+LDR_ri 01101 ..... ... ... @ldst_ri_4
+STRB_ri 01110 ..... ... ... @ldst_ri_1
+LDRB_ri 01111 ..... ... ... @ldst_ri_1
+
+# Load/store halfword (immediate offset)
+
+%imm5_6x2 6:5 !function=times_2
+@ldst_ri_2 ..... ..... rn:3 rt:3 \
+ &ldst_ri p=1 w=0 u=1 imm=%imm5_6x2
+
+STRH_ri 10000 ..... ... ... @ldst_ri_2
+LDRH_ri 10001 ..... ... ... @ldst_ri_2
+
+# Load/store (SP-relative)
+
+%imm8_0x4 0:8 !function=times_4
+@ldst_spec_i ..... rt:3 ........ \
+ &ldst_ri p=1 w=0 u=1 imm=%imm8_0x4
+
+STR_ri 10010 ... ........ @ldst_spec_i rn=13
+LDR_ri 10011 ... ........ @ldst_spec_i rn=13
+
+# Load (PC-relative)
+
+LDR_ri 01001 ... ........ @ldst_spec_i rn=15
+
+# Add PC/SP (immediate)
+
+ADR 10100 rd:3 ........ imm=%imm8_0x4
+ADD_rri 10101 rd:3 ........ \
+ &s_rri_rot rn=13 s=0 rot=0 imm=%imm8_0x4 # SP
+
+# Load/store multiple
+
+@ldstm ..... rn:3 list:8 &ldst_block i=1 b=0 u=0 w=1
+
+STM 11000 ... ........ @ldstm
+LDM_t16 11001 ... ........ @ldstm
+
+# Shift (immediate)
+
+@shift_i ..... shim:5 rm:3 rd:3 &s_rrr_shi %s rn=%reg_0
+
+MOV_rxri 000 00 ..... ... ... @shift_i shty=0 # LSL
+MOV_rxri 000 01 ..... ... ... @shift_i shty=1 # LSR
+MOV_rxri 000 10 ..... ... ... @shift_i shty=2 # ASR
+
+# Add/subtract (three low registers)
+
+@addsub_3 ....... rm:3 rn:3 rd:3 \
+ &s_rrr_shi %s shim=0 shty=0
+
+ADD_rrri 0001100 ... ... ... @addsub_3
+SUB_rrri 0001101 ... ... ... @addsub_3
+
+# Add/subtract (two low registers and immediate)
+
+@addsub_2i ....... imm:3 rn:3 rd:3 \
+ &s_rri_rot %s rot=0
+
+ADD_rri 0001 110 ... ... ... @addsub_2i
+SUB_rri 0001 111 ... ... ... @addsub_2i
+
+# Add, subtract, compare, move (one low register and immediate)
+
+%reg_8 8:3
+@arith_1i ..... rd:3 imm:8 \
+ &s_rri_rot rot=0 rn=%reg_8
+
+MOV_rxi 00100 ... ........ @arith_1i %s
+CMP_xri 00101 ... ........ @arith_1i s=1
+ADD_rri 00110 ... ........ @arith_1i %s
+SUB_rri 00111 ... ........ @arith_1i %s
+
+# Add, compare, move (two high registers)
+
+%reg_0_7 7:1 0:3
+@addsub_2h .... .... . rm:4 ... \
+ &s_rrr_shi rd=%reg_0_7 rn=%reg_0_7 shim=0 shty=0
+
+ADD_rrri 0100 0100 . .... ... @addsub_2h s=0
+CMP_xrri 0100 0101 . .... ... @addsub_2h s=1
+MOV_rxri 0100 0110 . .... ... @addsub_2h s=0
+
+# Adjust SP (immediate)
+
+%imm7_0x4 0:7 !function=times_4
+@addsub_sp_i .... .... . ....... \
+ &s_rri_rot s=0 rd=13 rn=13 rot=0 imm=%imm7_0x4
+
+ADD_rri 1011 0000 0 ....... @addsub_sp_i
+SUB_rri 1011 0000 1 ....... @addsub_sp_i
+
+# Branch and exchange
+
+@branchr .... .... . rm:4 ... &r
+
+BX 0100 0111 0 .... 000 @branchr
+BLX_r 0100 0111 1 .... 000 @branchr
+BXNS 0100 0111 0 .... 100 @branchr
+BLXNS 0100 0111 1 .... 100 @branchr
+
+# Extend
+
+@extend .... .... .. rm:3 rd:3 &rrr_rot rn=15 rot=0
+
+SXTAH 1011 0010 00 ... ... @extend
+SXTAB 1011 0010 01 ... ... @extend
+UXTAH 1011 0010 10 ... ... @extend
+UXTAB 1011 0010 11 ... ... @extend
+
+# Change processor state
+
+%imod 4:1 !function=plus_2
+
+SETEND 1011 0110 010 1 E:1 000 &setend
+{
+ CPS 1011 0110 011 . 0 A:1 I:1 F:1 &cps mode=0 M=0 %imod
+ CPS_v7m 1011 0110 011 im:1 00 I:1 F:1
+}
+
+# Reverse bytes
+
+@rdm .... .... .. rm:3 rd:3 &rr
+
+REV 1011 1010 00 ... ... @rdm
+REV16 1011 1010 01 ... ... @rdm
+REVSH 1011 1010 11 ... ... @rdm
+
+# Hints
+
+{
+ {
+ YIELD 1011 1111 0001 0000
+ WFE 1011 1111 0010 0000
+ WFI 1011 1111 0011 0000
+
+ # TODO: Implement SEV, SEVL; may help SMP performance.
+ # SEV 1011 1111 0100 0000
+ # SEVL 1011 1111 0101 0000
+
+ # The canonical nop has the second nibble as 0000, but the whole of the
+ # rest of the space is a reserved hint, behaves as nop.
+ NOP 1011 1111 ---- 0000
+ }
+ IT 1011 1111 cond_mask:8
+}
+
+# Miscellaneous 16-bit instructions
+
+%imm6_9_3 9:1 3:5 !function=times_2
+
+HLT 1011 1010 10 imm:6 &i
+BKPT 1011 1110 imm:8 &i
+CBZ 1011 nz:1 0.1 ..... rn:3 imm=%imm6_9_3
+
+# Push and Pop
+
+%push_list 0:9 !function=t16_push_list
+%pop_list 0:9 !function=t16_pop_list
+
+STM 1011 010 ......... \
+ &ldst_block i=0 b=1 u=0 w=1 rn=13 list=%push_list
+LDM_t16 1011 110 ......... \
+ &ldst_block i=1 b=0 u=0 w=1 rn=13 list=%pop_list
+
+# Conditional branches, Supervisor call
+
+%imm8_0x2 0:s8 !function=times_2
+
+{
+ UDF 1101 1110 ---- ----
+ SVC 1101 1111 imm:8 &i
+ B_cond_thumb 1101 cond:4 ........ &ci imm=%imm8_0x2
+}
+
+# Unconditional Branch
+
+%imm11_0x2 0:s11 !function=times_2
+
+B 11100 ........... &i imm=%imm11_0x2
+
+# thumb_insn_is_16bit() ensures we won't be decoding these as
+# T16 instructions for a Thumb2 CPU, so these patterns must be
+# a Thumb1 split BL/BLX.
+BLX_suffix 11101 imm:11 &i
+BL_BLX_prefix 11110 imm:s11 &i
+BL_suffix 11111 imm:11 &i
diff --git a/target/arm/tcg/t32.decode b/target/arm/tcg/t32.decode
new file mode 100644
index 0000000000..f21ad0167a
--- /dev/null
+++ b/target/arm/tcg/t32.decode
@@ -0,0 +1,753 @@
+# Thumb2 instructions
+#
+# Copyright (c) 2019 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+&empty !extern
+&s_rrr_shi !extern s rd rn rm shim shty
+&s_rrr_shr !extern s rn rd rm rs shty
+&s_rri_rot !extern s rn rd imm rot
+&s_rrrr !extern s rd rn rm ra
+&rrrr !extern rd rn rm ra
+&rrr_rot !extern rd rn rm rot
+&rrr !extern rd rn rm
+&rr !extern rd rm
+&ri !extern rd imm
+&r !extern rm
+&i !extern imm
+&msr_reg !extern rn r mask
+&mrs_reg !extern rd r
+&msr_bank !extern rn r sysm
+&mrs_bank !extern rd r sysm
+&ldst_rr !extern p w u rn rt rm shimm shtype
+&ldst_ri !extern p w u rn rt imm
+&ldst_block !extern rn i b u w list
+&strex !extern rn rd rt rt2 imm
+&ldrex !extern rn rt rt2 imm
+&bfx !extern rd rn lsb widthm1
+&bfi !extern rd rn lsb msb
+&sat !extern rd rn satimm imm sh
+&pkh !extern rd rn rm imm tb
+&cps !extern mode imod M A I F
+&mcr !extern cp opc1 crn crm opc2 rt
+&mcrr !extern cp opc1 crm rt rt2
+
+&mve_shl_ri rdalo rdahi shim
+&mve_shl_rr rdalo rdahi rm
+&mve_sh_ri rda shim
+&mve_sh_rr rda rm
+
+# rdahi: bits [3:1] from insn, bit 0 is 1
+# rdalo: bits [3:1] from insn, bit 0 is 0
+%rdahi_9 9:3 !function=times_2_plus_1
+%rdalo_17 17:3 !function=times_2
+
+# Data-processing (register)
+
+%imm5_12_6 12:3 6:2
+
+@s_rrr_shi ....... .... s:1 rn:4 .... rd:4 .. shty:2 rm:4 \
+ &s_rrr_shi shim=%imm5_12_6
+@s_rxr_shi ....... .... s:1 .... .... rd:4 .. shty:2 rm:4 \
+ &s_rrr_shi shim=%imm5_12_6 rn=0
+@S_xrr_shi ....... .... . rn:4 .... .... .. shty:2 rm:4 \
+ &s_rrr_shi shim=%imm5_12_6 s=1 rd=0
+
+@mve_shl_ri ....... .... . ... . . ... ... . .. .. .... \
+ &mve_shl_ri shim=%imm5_12_6 rdalo=%rdalo_17 rdahi=%rdahi_9
+@mve_shl_rr ....... .... . ... . rm:4 ... . .. .. .... \
+ &mve_shl_rr rdalo=%rdalo_17 rdahi=%rdahi_9
+@mve_sh_ri ....... .... . rda:4 . ... ... . .. .. .... \
+ &mve_sh_ri shim=%imm5_12_6
+@mve_sh_rr ....... .... . rda:4 rm:4 .... .... .... &mve_sh_rr
+
+{
+ TST_xrri 1110101 0000 1 .... 0 ... 1111 .... .... @S_xrr_shi
+ AND_rrri 1110101 0000 . .... 0 ... .... .... .... @s_rrr_shi
+}
+BIC_rrri 1110101 0001 . .... 0 ... .... .... .... @s_rrr_shi
+{
+ # The v8.1M MVE shift insns overlap in encoding with MOVS/ORRS
+ # and are distinguished by having Rm==13 or 15. Those are UNPREDICTABLE
+ # cases for MOVS/ORRS. We decode the MVE cases first, ensuring that
+ # they explicitly call unallocated_encoding() for cases that must UNDEF
+ # (eg "using a new shift insn on a v8.1M CPU without MVE"), and letting
+ # the rest fall through (where ORR_rrri and MOV_rxri will end up
+ # handling them as r13 and r15 accesses with the same semantics as A32).
+ [
+ {
+ UQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 00 1111 @mve_sh_ri
+ LSLL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 00 1111 @mve_shl_ri
+ UQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 00 1111 @mve_shl_ri
+ }
+
+ {
+ URSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 01 1111 @mve_sh_ri
+ LSRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 01 1111 @mve_shl_ri
+ URSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 01 1111 @mve_shl_ri
+ }
+
+ {
+ SRSHR_ri 1110101 0010 1 .... 0 ... 1111 .. 10 1111 @mve_sh_ri
+ ASRL_ri 1110101 0010 1 ... 0 0 ... ... 1 .. 10 1111 @mve_shl_ri
+ SRSHRL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 10 1111 @mve_shl_ri
+ }
+
+ {
+ SQSHL_ri 1110101 0010 1 .... 0 ... 1111 .. 11 1111 @mve_sh_ri
+ SQSHLL_ri 1110101 0010 1 ... 1 0 ... ... 1 .. 11 1111 @mve_shl_ri
+ }
+
+ {
+ UQRSHL_rr 1110101 0010 1 .... .... 1111 0000 1101 @mve_sh_rr
+ LSLL_rr 1110101 0010 1 ... 0 .... ... 1 0000 1101 @mve_shl_rr
+ UQRSHLL64_rr 1110101 0010 1 ... 1 .... ... 1 0000 1101 @mve_shl_rr
+ }
+
+ {
+ SQRSHR_rr 1110101 0010 1 .... .... 1111 0010 1101 @mve_sh_rr
+ ASRL_rr 1110101 0010 1 ... 0 .... ... 1 0010 1101 @mve_shl_rr
+ SQRSHRL64_rr 1110101 0010 1 ... 1 .... ... 1 0010 1101 @mve_shl_rr
+ }
+
+ UQRSHLL48_rr 1110101 0010 1 ... 1 .... ... 1 1000 1101 @mve_shl_rr
+ SQRSHRL48_rr 1110101 0010 1 ... 1 .... ... 1 1010 1101 @mve_shl_rr
+ ]
+
+ MOV_rxri 1110101 0010 . 1111 0 ... .... .... .... @s_rxr_shi
+ ORR_rrri 1110101 0010 . .... 0 ... .... .... .... @s_rrr_shi
+
+ # v8.1M CSEL and friends
+ CSEL 1110101 0010 1 rn:4 10 op:2 rd:4 fcond:4 rm:4
+}
+{
+ MVN_rxri 1110101 0011 . 1111 0 ... .... .... .... @s_rxr_shi
+ ORN_rrri 1110101 0011 . .... 0 ... .... .... .... @s_rrr_shi
+}
+{
+ TEQ_xrri 1110101 0100 1 .... 0 ... 1111 .... .... @S_xrr_shi
+ EOR_rrri 1110101 0100 . .... 0 ... .... .... .... @s_rrr_shi
+}
+PKH 1110101 0110 0 rn:4 0 ... rd:4 .. tb:1 0 rm:4 \
+ &pkh imm=%imm5_12_6
+{
+ CMN_xrri 1110101 1000 1 .... 0 ... 1111 .... .... @S_xrr_shi
+ ADD_rrri 1110101 1000 . .... 0 ... .... .... .... @s_rrr_shi
+}
+ADC_rrri 1110101 1010 . .... 0 ... .... .... .... @s_rrr_shi
+SBC_rrri 1110101 1011 . .... 0 ... .... .... .... @s_rrr_shi
+{
+ CMP_xrri 1110101 1101 1 .... 0 ... 1111 .... .... @S_xrr_shi
+ SUB_rrri 1110101 1101 . .... 0 ... .... .... .... @s_rrr_shi
+}
+RSB_rrri 1110101 1110 . .... 0 ... .... .... .... @s_rrr_shi
+
+# Data-processing (register-shifted register)
+
+MOV_rxrr 1111 1010 0 shty:2 s:1 rm:4 1111 rd:4 0000 rs:4 \
+ &s_rrr_shr rn=0
+
+# Data-processing (immediate)
+
+%t32extrot 26:1 12:3 0:8 !function=t32_expandimm_rot
+%t32extimm 26:1 12:3 0:8 !function=t32_expandimm_imm
+
+@s_rri_rot ....... .... s:1 rn:4 . ... rd:4 ........ \
+ &s_rri_rot imm=%t32extimm rot=%t32extrot
+@s_rxi_rot ....... .... s:1 .... . ... rd:4 ........ \
+ &s_rri_rot imm=%t32extimm rot=%t32extrot rn=0
+@S_xri_rot ....... .... . rn:4 . ... .... ........ \
+ &s_rri_rot imm=%t32extimm rot=%t32extrot s=1 rd=0
+
+{
+ TST_xri 1111 0.0 0000 1 .... 0 ... 1111 ........ @S_xri_rot
+ AND_rri 1111 0.0 0000 . .... 0 ... .... ........ @s_rri_rot
+}
+BIC_rri 1111 0.0 0001 . .... 0 ... .... ........ @s_rri_rot
+{
+ MOV_rxi 1111 0.0 0010 . 1111 0 ... .... ........ @s_rxi_rot
+ ORR_rri 1111 0.0 0010 . .... 0 ... .... ........ @s_rri_rot
+}
+{
+ MVN_rxi 1111 0.0 0011 . 1111 0 ... .... ........ @s_rxi_rot
+ ORN_rri 1111 0.0 0011 . .... 0 ... .... ........ @s_rri_rot
+}
+{
+ TEQ_xri 1111 0.0 0100 1 .... 0 ... 1111 ........ @S_xri_rot
+ EOR_rri 1111 0.0 0100 . .... 0 ... .... ........ @s_rri_rot
+}
+{
+ CMN_xri 1111 0.0 1000 1 .... 0 ... 1111 ........ @S_xri_rot
+ ADD_rri 1111 0.0 1000 . .... 0 ... .... ........ @s_rri_rot
+}
+ADC_rri 1111 0.0 1010 . .... 0 ... .... ........ @s_rri_rot
+SBC_rri 1111 0.0 1011 . .... 0 ... .... ........ @s_rri_rot
+{
+ CMP_xri 1111 0.0 1101 1 .... 0 ... 1111 ........ @S_xri_rot
+ SUB_rri 1111 0.0 1101 . .... 0 ... .... ........ @s_rri_rot
+}
+RSB_rri 1111 0.0 1110 . .... 0 ... .... ........ @s_rri_rot
+
+# Data processing (plain binary immediate)
+
+%imm12_26_12_0 26:1 12:3 0:8
+%neg12_26_12_0 26:1 12:3 0:8 !function=negate
+@s0_rri_12 .... ... .... . rn:4 . ... rd:4 ........ \
+ &s_rri_rot imm=%imm12_26_12_0 rot=0 s=0
+
+{
+ ADR 1111 0.1 0000 0 1111 0 ... rd:4 ........ \
+ &ri imm=%imm12_26_12_0
+ ADD_rri 1111 0.1 0000 0 .... 0 ... .... ........ @s0_rri_12
+}
+{
+ ADR 1111 0.1 0101 0 1111 0 ... rd:4 ........ \
+ &ri imm=%neg12_26_12_0
+ SUB_rri 1111 0.1 0101 0 .... 0 ... .... ........ @s0_rri_12
+}
+
+# Move Wide
+
+%imm16_26_16_12_0 16:4 26:1 12:3 0:8
+@mov16 .... .... .... .... .... rd:4 .... .... \
+ &ri imm=%imm16_26_16_12_0
+
+MOVW 1111 0.10 0100 .... 0 ... .... ........ @mov16
+MOVT 1111 0.10 1100 .... 0 ... .... ........ @mov16
+
+# Saturate, bitfield
+
+@sat .... .... .. sh:1 . rn:4 . ... rd:4 .. . satimm:5 \
+ &sat imm=%imm5_12_6
+@sat16 .... .... .. . . rn:4 . ... rd:4 .. . satimm:5 \
+ &sat sh=0 imm=0
+
+{
+ SSAT16 1111 0011 001 0 .... 0 000 .... 00 0 ..... @sat16
+ SSAT 1111 0011 00. 0 .... 0 ... .... .. 0 ..... @sat
+}
+{
+ USAT16 1111 0011 101 0 .... 0 000 .... 00 0 ..... @sat16
+ USAT 1111 0011 10. 0 .... 0 ... .... .. 0 ..... @sat
+}
+
+@bfx .... .... ... . rn:4 . ... rd:4 .. . widthm1:5 \
+ &bfx lsb=%imm5_12_6
+@bfi .... .... ... . rn:4 . ... rd:4 .. . msb:5 \
+ &bfi lsb=%imm5_12_6
+
+SBFX 1111 0011 010 0 .... 0 ... .... ..0..... @bfx
+UBFX 1111 0011 110 0 .... 0 ... .... ..0..... @bfx
+
+# bfc is bfi w/ rn=15
+BFCI 1111 0011 011 0 .... 0 ... .... ..0..... @bfi
+
+# Multiply and multiply accumulate
+
+@s0_rnadm .... .... .... rn:4 ra:4 rd:4 .... rm:4 &s_rrrr s=0
+@s0_rn0dm .... .... .... rn:4 .... rd:4 .... rm:4 &s_rrrr ra=0 s=0
+@rnadm .... .... .... rn:4 ra:4 rd:4 .... rm:4 &rrrr
+@rn0dm .... .... .... rn:4 .... rd:4 .... rm:4 &rrrr ra=0
+@rndm .... .... .... rn:4 .... rd:4 .... rm:4 &rrr
+@rdm .... .... .... .... .... rd:4 .... rm:4 &rr
+
+{
+ MUL 1111 1011 0000 .... 1111 .... 0000 .... @s0_rn0dm
+ MLA 1111 1011 0000 .... .... .... 0000 .... @s0_rnadm
+}
+MLS 1111 1011 0000 .... .... .... 0001 .... @rnadm
+SMULL 1111 1011 1000 .... .... .... 0000 .... @s0_rnadm
+UMULL 1111 1011 1010 .... .... .... 0000 .... @s0_rnadm
+SMLAL 1111 1011 1100 .... .... .... 0000 .... @s0_rnadm
+UMLAL 1111 1011 1110 .... .... .... 0000 .... @s0_rnadm
+UMAAL 1111 1011 1110 .... .... .... 0110 .... @rnadm
+{
+ SMULWB 1111 1011 0011 .... 1111 .... 0000 .... @rn0dm
+ SMLAWB 1111 1011 0011 .... .... .... 0000 .... @rnadm
+}
+{
+ SMULWT 1111 1011 0011 .... 1111 .... 0001 .... @rn0dm
+ SMLAWT 1111 1011 0011 .... .... .... 0001 .... @rnadm
+}
+{
+ SMULBB 1111 1011 0001 .... 1111 .... 0000 .... @rn0dm
+ SMLABB 1111 1011 0001 .... .... .... 0000 .... @rnadm
+}
+{
+ SMULBT 1111 1011 0001 .... 1111 .... 0001 .... @rn0dm
+ SMLABT 1111 1011 0001 .... .... .... 0001 .... @rnadm
+}
+{
+ SMULTB 1111 1011 0001 .... 1111 .... 0010 .... @rn0dm
+ SMLATB 1111 1011 0001 .... .... .... 0010 .... @rnadm
+}
+{
+ SMULTT 1111 1011 0001 .... 1111 .... 0011 .... @rn0dm
+ SMLATT 1111 1011 0001 .... .... .... 0011 .... @rnadm
+}
+SMLALBB 1111 1011 1100 .... .... .... 1000 .... @rnadm
+SMLALBT 1111 1011 1100 .... .... .... 1001 .... @rnadm
+SMLALTB 1111 1011 1100 .... .... .... 1010 .... @rnadm
+SMLALTT 1111 1011 1100 .... .... .... 1011 .... @rnadm
+
+# usad8 is usada8 w/ ra=15
+USADA8 1111 1011 0111 .... .... .... 0000 .... @rnadm
+
+SMLAD 1111 1011 0010 .... .... .... 0000 .... @rnadm
+SMLADX 1111 1011 0010 .... .... .... 0001 .... @rnadm
+SMLSD 1111 1011 0100 .... .... .... 0000 .... @rnadm
+SMLSDX 1111 1011 0100 .... .... .... 0001 .... @rnadm
+
+SMLALD 1111 1011 1100 .... .... .... 1100 .... @rnadm
+SMLALDX 1111 1011 1100 .... .... .... 1101 .... @rnadm
+SMLSLD 1111 1011 1101 .... .... .... 1100 .... @rnadm
+SMLSLDX 1111 1011 1101 .... .... .... 1101 .... @rnadm
+
+SMMLA 1111 1011 0101 .... .... .... 0000 .... @rnadm
+SMMLAR 1111 1011 0101 .... .... .... 0001 .... @rnadm
+SMMLS 1111 1011 0110 .... .... .... 0000 .... @rnadm
+SMMLSR 1111 1011 0110 .... .... .... 0001 .... @rnadm
+
+SDIV 1111 1011 1001 .... 1111 .... 1111 .... @rndm
+UDIV 1111 1011 1011 .... 1111 .... 1111 .... @rndm
+
+# Data-processing (two source registers)
+
+QADD 1111 1010 1000 .... 1111 .... 1000 .... @rndm
+QSUB 1111 1010 1000 .... 1111 .... 1010 .... @rndm
+QDADD 1111 1010 1000 .... 1111 .... 1001 .... @rndm
+QDSUB 1111 1010 1000 .... 1111 .... 1011 .... @rndm
+
+CRC32B 1111 1010 1100 .... 1111 .... 1000 .... @rndm
+CRC32H 1111 1010 1100 .... 1111 .... 1001 .... @rndm
+CRC32W 1111 1010 1100 .... 1111 .... 1010 .... @rndm
+CRC32CB 1111 1010 1101 .... 1111 .... 1000 .... @rndm
+CRC32CH 1111 1010 1101 .... 1111 .... 1001 .... @rndm
+CRC32CW 1111 1010 1101 .... 1111 .... 1010 .... @rndm
+
+SEL 1111 1010 1010 .... 1111 .... 1000 .... @rndm
+
+# Note rn != rm is CONSTRAINED UNPREDICTABLE; we choose to ignore rn.
+REV 1111 1010 1001 ---- 1111 .... 1000 .... @rdm
+REV16 1111 1010 1001 ---- 1111 .... 1001 .... @rdm
+RBIT 1111 1010 1001 ---- 1111 .... 1010 .... @rdm
+REVSH 1111 1010 1001 ---- 1111 .... 1011 .... @rdm
+CLZ 1111 1010 1011 ---- 1111 .... 1000 .... @rdm
+
+# Branches and miscellaneous control
+
+%msr_sysm 4:1 8:4
+%mrs_sysm 4:1 16:4
+%imm16_16_0 16:4 0:12
+%imm21 26:s1 11:1 13:1 16:6 0:11 !function=times_2
+&ci cond imm
+
+{
+ # Group insn[25:23] = 111, which is cond=111x for the branch below,
+ # or unconditional, which would be illegal for the branch.
+ [
+ # Hints, and CPS
+ {
+ [
+ YIELD 1111 0011 1010 1111 1000 0000 0000 0001
+ WFE 1111 0011 1010 1111 1000 0000 0000 0010
+ WFI 1111 0011 1010 1111 1000 0000 0000 0011
+
+ # TODO: Implement SEV, SEVL; may help SMP performance.
+ # SEV 1111 0011 1010 1111 1000 0000 0000 0100
+ # SEVL 1111 0011 1010 1111 1000 0000 0000 0101
+
+ ESB 1111 0011 1010 1111 1000 0000 0001 0000
+ ]
+
+ # The canonical nop ends in 0000 0000, but the whole rest
+ # of the space is "reserved hint, behaves as nop".
+ NOP 1111 0011 1010 1111 1000 0000 ---- ----
+
+ # If imod == '00' && M == '0' then SEE "Hint instructions", above.
+ CPS 1111 0011 1010 1111 1000 0 imod:2 M:1 A:1 I:1 F:1 mode:5 \
+ &cps
+ }
+
+ # Miscellaneous control
+ CLREX 1111 0011 1011 1111 1000 1111 0010 1111
+ DSB 1111 0011 1011 1111 1000 1111 0100 ----
+ DMB 1111 0011 1011 1111 1000 1111 0101 ----
+ ISB 1111 0011 1011 1111 1000 1111 0110 ----
+ SB 1111 0011 1011 1111 1000 1111 0111 0000
+
+ # Note that the v7m insn overlaps both the normal and banked insn.
+ {
+ MRS_bank 1111 0011 111 r:1 .... 1000 rd:4 001. 0000 \
+ &mrs_bank sysm=%mrs_sysm
+ MRS_reg 1111 0011 111 r:1 1111 1000 rd:4 0000 0000 &mrs_reg
+ MRS_v7m 1111 0011 111 0 1111 1000 rd:4 sysm:8
+ }
+ {
+ MSR_bank 1111 0011 100 r:1 rn:4 1000 .... 001. 0000 \
+ &msr_bank sysm=%msr_sysm
+ MSR_reg 1111 0011 100 r:1 rn:4 1000 mask:4 0000 0000 &msr_reg
+ MSR_v7m 1111 0011 100 0 rn:4 1000 mask:2 00 sysm:8
+ }
+ BXJ 1111 0011 1100 rm:4 1000 1111 0000 0000 &r
+ {
+ # At v6T2, this is the T5 encoding of SUBS PC, LR, #IMM, and works as for
+ # every other encoding of SUBS. With v7VE, IMM=0 is redefined as ERET.
+ # The distinction between the two only matters for Hyp mode.
+ ERET 1111 0011 1101 1110 1000 1111 0000 0000
+ SUB_rri 1111 0011 1101 1110 1000 1111 imm:8 \
+ &s_rri_rot rot=0 s=1 rd=15 rn=14
+ }
+ SMC 1111 0111 1111 imm:4 1000 0000 0000 0000 &i
+ HVC 1111 0111 1110 .... 1000 .... .... .... \
+ &i imm=%imm16_16_0
+ UDF 1111 0111 1111 ---- 1010 ---- ---- ----
+ ]
+ B_cond_thumb 1111 0. cond:4 ...... 10.0 ............ &ci imm=%imm21
+}
+
+# Load/store (register, immediate, literal)
+
+@ldst_rr .... .... .... rn:4 rt:4 ...... shimm:2 rm:4 \
+ &ldst_rr p=1 w=0 u=1 shtype=0
+@ldst_ri_idx .... .... .... rn:4 rt:4 . p:1 u:1 . imm:8 \
+ &ldst_ri w=1
+@ldst_ri_neg .... .... .... rn:4 rt:4 .... imm:8 \
+ &ldst_ri p=1 w=0 u=0
+@ldst_ri_unp .... .... .... rn:4 rt:4 .... imm:8 \
+ &ldst_ri p=1 w=0 u=1
+@ldst_ri_pos .... .... .... rn:4 rt:4 imm:12 \
+ &ldst_ri p=1 w=0 u=1
+@ldst_ri_lit .... .... u:1 ... .... rt:4 imm:12 \
+ &ldst_ri p=1 w=0 rn=15
+
+STRB_rr 1111 1000 0000 .... .... 000000 .. .... @ldst_rr
+STRB_ri 1111 1000 0000 .... .... 1..1 ........ @ldst_ri_idx
+STRB_ri 1111 1000 0000 .... .... 1100 ........ @ldst_ri_neg
+STRBT_ri 1111 1000 0000 .... .... 1110 ........ @ldst_ri_unp
+STRB_ri 1111 1000 1000 .... .... ............ @ldst_ri_pos
+
+STRH_rr 1111 1000 0010 .... .... 000000 .. .... @ldst_rr
+STRH_ri 1111 1000 0010 .... .... 1..1 ........ @ldst_ri_idx
+STRH_ri 1111 1000 0010 .... .... 1100 ........ @ldst_ri_neg
+STRHT_ri 1111 1000 0010 .... .... 1110 ........ @ldst_ri_unp
+STRH_ri 1111 1000 1010 .... .... ............ @ldst_ri_pos
+
+STR_rr 1111 1000 0100 .... .... 000000 .. .... @ldst_rr
+STR_ri 1111 1000 0100 .... .... 1..1 ........ @ldst_ri_idx
+STR_ri 1111 1000 0100 .... .... 1100 ........ @ldst_ri_neg
+STRT_ri 1111 1000 0100 .... .... 1110 ........ @ldst_ri_unp
+STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos
+
+# Note that Load, unsigned (literal) overlaps all other load encodings.
+{
+ {
+ NOP 1111 1000 -001 1111 1111 ------------ # PLD
+ LDRB_ri 1111 1000 .001 1111 .... ............ @ldst_ri_lit
+ }
+ {
+ NOP 1111 1000 1001 ---- 1111 ------------ # PLD
+ LDRB_ri 1111 1000 1001 .... .... ............ @ldst_ri_pos
+ }
+ LDRB_ri 1111 1000 0001 .... .... 1..1 ........ @ldst_ri_idx
+ {
+ NOP 1111 1000 0001 ---- 1111 1100 -------- # PLD
+ LDRB_ri 1111 1000 0001 .... .... 1100 ........ @ldst_ri_neg
+ }
+ LDRBT_ri 1111 1000 0001 .... .... 1110 ........ @ldst_ri_unp
+ {
+ NOP 1111 1000 0001 ---- 1111 000000 -- ---- # PLD
+ LDRB_rr 1111 1000 0001 .... .... 000000 .. .... @ldst_rr
+ }
+}
+{
+ {
+ NOP 1111 1000 -011 1111 1111 ------------ # PLD
+ LDRH_ri 1111 1000 .011 1111 .... ............ @ldst_ri_lit
+ }
+ {
+ NOP 1111 1000 1011 ---- 1111 ------------ # PLDW
+ LDRH_ri 1111 1000 1011 .... .... ............ @ldst_ri_pos
+ }
+ LDRH_ri 1111 1000 0011 .... .... 1..1 ........ @ldst_ri_idx
+ {
+ NOP 1111 1000 0011 ---- 1111 1100 -------- # PLDW
+ LDRH_ri 1111 1000 0011 .... .... 1100 ........ @ldst_ri_neg
+ }
+ LDRHT_ri 1111 1000 0011 .... .... 1110 ........ @ldst_ri_unp
+ {
+ NOP 1111 1000 0011 ---- 1111 000000 -- ---- # PLDW
+ LDRH_rr 1111 1000 0011 .... .... 000000 .. .... @ldst_rr
+ }
+}
+{
+ LDR_ri 1111 1000 .101 1111 .... ............ @ldst_ri_lit
+ LDR_ri 1111 1000 1101 .... .... ............ @ldst_ri_pos
+ LDR_ri 1111 1000 0101 .... .... 1..1 ........ @ldst_ri_idx
+ LDR_ri 1111 1000 0101 .... .... 1100 ........ @ldst_ri_neg
+ LDRT_ri 1111 1000 0101 .... .... 1110 ........ @ldst_ri_unp
+ LDR_rr 1111 1000 0101 .... .... 000000 .. .... @ldst_rr
+}
+# NOPs here are PLI.
+{
+ {
+ NOP 1111 1001 -001 1111 1111 ------------
+ LDRSB_ri 1111 1001 .001 1111 .... ............ @ldst_ri_lit
+ }
+ {
+ NOP 1111 1001 1001 ---- 1111 ------------
+ LDRSB_ri 1111 1001 1001 .... .... ............ @ldst_ri_pos
+ }
+ LDRSB_ri 1111 1001 0001 .... .... 1..1 ........ @ldst_ri_idx
+ {
+ NOP 1111 1001 0001 ---- 1111 1100 --------
+ LDRSB_ri 1111 1001 0001 .... .... 1100 ........ @ldst_ri_neg
+ }
+ LDRSBT_ri 1111 1001 0001 .... .... 1110 ........ @ldst_ri_unp
+ {
+ NOP 1111 1001 0001 ---- 1111 000000 -- ----
+ LDRSB_rr 1111 1001 0001 .... .... 000000 .. .... @ldst_rr
+ }
+}
+# NOPs here are unallocated memory hints, treated as NOP.
+{
+ {
+ NOP 1111 1001 -011 1111 1111 ------------
+ LDRSH_ri 1111 1001 .011 1111 .... ............ @ldst_ri_lit
+ }
+ {
+ NOP 1111 1001 1011 ---- 1111 ------------
+ LDRSH_ri 1111 1001 1011 .... .... ............ @ldst_ri_pos
+ }
+ LDRSH_ri 1111 1001 0011 .... .... 1..1 ........ @ldst_ri_idx
+ {
+ NOP 1111 1001 0011 ---- 1111 1100 --------
+ LDRSH_ri 1111 1001 0011 .... .... 1100 ........ @ldst_ri_neg
+ }
+ LDRSHT_ri 1111 1001 0011 .... .... 1110 ........ @ldst_ri_unp
+ {
+ NOP 1111 1001 0011 ---- 1111 000000 -- ----
+ LDRSH_rr 1111 1001 0011 .... .... 000000 .. .... @ldst_rr
+ }
+}
+
+%imm8x4 0:8 !function=times_4
+&ldst_ri2 p w u rn rt rt2 imm
+@ldstd_ri8 .... .... u:1 ... rn:4 rt:4 rt2:4 ........ \
+ &ldst_ri2 imm=%imm8x4
+
+STRD_ri_t32 1110 1000 .110 .... .... .... ........ @ldstd_ri8 w=1 p=0
+LDRD_ri_t32 1110 1000 .111 .... .... .... ........ @ldstd_ri8 w=1 p=0
+
+STRD_ri_t32 1110 1001 .100 .... .... .... ........ @ldstd_ri8 w=0 p=1
+LDRD_ri_t32 1110 1001 .101 .... .... .... ........ @ldstd_ri8 w=0 p=1
+
+STRD_ri_t32 1110 1001 .110 .... .... .... ........ @ldstd_ri8 w=1 p=1
+{
+ SG 1110 1001 0111 1111 1110 1001 01111111
+ LDRD_ri_t32 1110 1001 .111 .... .... .... ........ @ldstd_ri8 w=1 p=1
+}
+
+# Load/Store Exclusive, Load-Acquire/Store-Release, and Table Branch
+
+@strex_i .... .... .... rn:4 rt:4 rd:4 .... .... \
+ &strex rt2=15 imm=%imm8x4
+@strex_0 .... .... .... rn:4 rt:4 .... .... rd:4 \
+ &strex rt2=15 imm=0
+@strex_d .... .... .... rn:4 rt:4 rt2:4 .... rd:4 \
+ &strex imm=0
+
+@ldrex_i .... .... .... rn:4 rt:4 .... .... .... \
+ &ldrex rt2=15 imm=%imm8x4
+@ldrex_0 .... .... .... rn:4 rt:4 .... .... .... \
+ &ldrex rt2=15 imm=0
+@ldrex_d .... .... .... rn:4 rt:4 rt2:4 .... .... \
+ &ldrex imm=0
+
+{
+ TT 1110 1000 0100 rn:4 1111 rd:4 A:1 T:1 000000
+ STREX 1110 1000 0100 .... .... .... .... .... @strex_i
+}
+STREXB 1110 1000 1100 .... .... 1111 0100 .... @strex_0
+STREXH 1110 1000 1100 .... .... 1111 0101 .... @strex_0
+STREXD_t32 1110 1000 1100 .... .... .... 0111 .... @strex_d
+
+STLEX 1110 1000 1100 .... .... 1111 1110 .... @strex_0
+STLEXB 1110 1000 1100 .... .... 1111 1100 .... @strex_0
+STLEXH 1110 1000 1100 .... .... 1111 1101 .... @strex_0
+STLEXD_t32 1110 1000 1100 .... .... .... 1111 .... @strex_d
+
+STL 1110 1000 1100 .... .... 1111 1010 1111 @ldrex_0
+STLB 1110 1000 1100 .... .... 1111 1000 1111 @ldrex_0
+STLH 1110 1000 1100 .... .... 1111 1001 1111 @ldrex_0
+
+LDREX 1110 1000 0101 .... .... 1111 .... .... @ldrex_i
+LDREXB 1110 1000 1101 .... .... 1111 0100 1111 @ldrex_0
+LDREXH 1110 1000 1101 .... .... 1111 0101 1111 @ldrex_0
+LDREXD_t32 1110 1000 1101 .... .... .... 0111 1111 @ldrex_d
+
+LDAEX 1110 1000 1101 .... .... 1111 1110 1111 @ldrex_0
+LDAEXB 1110 1000 1101 .... .... 1111 1100 1111 @ldrex_0
+LDAEXH 1110 1000 1101 .... .... 1111 1101 1111 @ldrex_0
+LDAEXD_t32 1110 1000 1101 .... .... .... 1111 1111 @ldrex_d
+
+LDA 1110 1000 1101 .... .... 1111 1010 1111 @ldrex_0
+LDAB 1110 1000 1101 .... .... 1111 1000 1111 @ldrex_0
+LDAH 1110 1000 1101 .... .... 1111 1001 1111 @ldrex_0
+
+&tbranch rn rm
+@tbranch .... .... .... rn:4 .... .... .... rm:4 &tbranch
+
+TBB 1110 1000 1101 .... 1111 0000 0000 .... @tbranch
+TBH 1110 1000 1101 .... 1111 0000 0001 .... @tbranch
+
+# Parallel addition and subtraction
+
+SADD8 1111 1010 1000 .... 1111 .... 0000 .... @rndm
+QADD8 1111 1010 1000 .... 1111 .... 0001 .... @rndm
+SHADD8 1111 1010 1000 .... 1111 .... 0010 .... @rndm
+UADD8 1111 1010 1000 .... 1111 .... 0100 .... @rndm
+UQADD8 1111 1010 1000 .... 1111 .... 0101 .... @rndm
+UHADD8 1111 1010 1000 .... 1111 .... 0110 .... @rndm
+
+SADD16 1111 1010 1001 .... 1111 .... 0000 .... @rndm
+QADD16 1111 1010 1001 .... 1111 .... 0001 .... @rndm
+SHADD16 1111 1010 1001 .... 1111 .... 0010 .... @rndm
+UADD16 1111 1010 1001 .... 1111 .... 0100 .... @rndm
+UQADD16 1111 1010 1001 .... 1111 .... 0101 .... @rndm
+UHADD16 1111 1010 1001 .... 1111 .... 0110 .... @rndm
+
+SASX 1111 1010 1010 .... 1111 .... 0000 .... @rndm
+QASX 1111 1010 1010 .... 1111 .... 0001 .... @rndm
+SHASX 1111 1010 1010 .... 1111 .... 0010 .... @rndm
+UASX 1111 1010 1010 .... 1111 .... 0100 .... @rndm
+UQASX 1111 1010 1010 .... 1111 .... 0101 .... @rndm
+UHASX 1111 1010 1010 .... 1111 .... 0110 .... @rndm
+
+SSUB8 1111 1010 1100 .... 1111 .... 0000 .... @rndm
+QSUB8 1111 1010 1100 .... 1111 .... 0001 .... @rndm
+SHSUB8 1111 1010 1100 .... 1111 .... 0010 .... @rndm
+USUB8 1111 1010 1100 .... 1111 .... 0100 .... @rndm
+UQSUB8 1111 1010 1100 .... 1111 .... 0101 .... @rndm
+UHSUB8 1111 1010 1100 .... 1111 .... 0110 .... @rndm
+
+SSUB16 1111 1010 1101 .... 1111 .... 0000 .... @rndm
+QSUB16 1111 1010 1101 .... 1111 .... 0001 .... @rndm
+SHSUB16 1111 1010 1101 .... 1111 .... 0010 .... @rndm
+USUB16 1111 1010 1101 .... 1111 .... 0100 .... @rndm
+UQSUB16 1111 1010 1101 .... 1111 .... 0101 .... @rndm
+UHSUB16 1111 1010 1101 .... 1111 .... 0110 .... @rndm
+
+SSAX 1111 1010 1110 .... 1111 .... 0000 .... @rndm
+QSAX 1111 1010 1110 .... 1111 .... 0001 .... @rndm
+SHSAX 1111 1010 1110 .... 1111 .... 0010 .... @rndm
+USAX 1111 1010 1110 .... 1111 .... 0100 .... @rndm
+UQSAX 1111 1010 1110 .... 1111 .... 0101 .... @rndm
+UHSAX 1111 1010 1110 .... 1111 .... 0110 .... @rndm
+
+# Register extends
+
+@rrr_rot .... .... .... rn:4 .... rd:4 .. rot:2 rm:4 &rrr_rot
+
+SXTAH 1111 1010 0000 .... 1111 .... 10.. .... @rrr_rot
+UXTAH 1111 1010 0001 .... 1111 .... 10.. .... @rrr_rot
+SXTAB16 1111 1010 0010 .... 1111 .... 10.. .... @rrr_rot
+UXTAB16 1111 1010 0011 .... 1111 .... 10.. .... @rrr_rot
+SXTAB 1111 1010 0100 .... 1111 .... 10.. .... @rrr_rot
+UXTAB 1111 1010 0101 .... 1111 .... 10.. .... @rrr_rot
+
+# Load/store multiple
+
+@ldstm .... .... .. w:1 . rn:4 list:16 &ldst_block u=0
+
+STM_t32 1110 1000 10.0 .... ................ @ldstm i=1 b=0
+STM_t32 1110 1001 00.0 .... ................ @ldstm i=0 b=1
+{
+ # Rn=15 UNDEFs for LDM; M-profile CLRM uses that encoding
+ CLRM 1110 1000 1001 1111 list:16
+ LDM_t32 1110 1000 10.1 .... ................ @ldstm i=1 b=0
+}
+LDM_t32 1110 1001 00.1 .... ................ @ldstm i=0 b=1
+
+&rfe !extern rn w pu
+@rfe .... .... .. w:1 . rn:4 ................ &rfe
+
+RFE 1110 1000 00.1 .... 1100000000000000 @rfe pu=2
+RFE 1110 1001 10.1 .... 1100000000000000 @rfe pu=1
+
+&srs !extern mode w pu
+@srs .... .... .. w:1 . .... ........... mode:5 &srs
+
+SRS 1110 1000 00.0 1101 1100 0000 000. .... @srs pu=2
+SRS 1110 1001 10.0 1101 1100 0000 000. .... @srs pu=1
+
+# Coprocessor instructions
+
+# We decode MCR, MCR, MRRC and MCRR only, because for QEMU the
+# other coprocessor instructions always UNDEF.
+# The trans_ functions for these will ignore cp values 8..13 for v7 or
+# earlier, and 0..13 for v8 and later, because those areas of the
+# encoding space may be used for other things, such as VFP or Neon.
+
+@mcr .... .... opc1:3 . crn:4 rt:4 cp:4 opc2:3 . crm:4
+@mcrr .... .... .... rt2:4 rt:4 cp:4 opc1:4 crm:4
+
+MCRR 1110 1100 0100 .... .... .... .... .... @mcrr
+MRRC 1110 1100 0101 .... .... .... .... .... @mcrr
+
+MCR 1110 1110 ... 0 .... .... .... ... 1 .... @mcr
+MRC 1110 1110 ... 1 .... .... .... ... 1 .... @mcr
+
+# Branches
+
+%imm24 26:s1 13:1 11:1 16:10 0:11 !function=t32_branch24
+@branch24 ................................ &i imm=%imm24
+
+B 1111 0. .......... 10.1 ............ @branch24
+BL 1111 0. .......... 11.1 ............ @branch24
+{
+ # BLX_i is non-M-profile only
+ BLX_i 1111 0. .......... 11.0 ............ @branch24
+ # M-profile only: loop and branch insns
+ [
+ # All these BF insns have boff != 0b0000; we NOP them all
+ BF 1111 0 boff:4 ------- 1100 - ---------- 1 # BFL
+ BF 1111 0 boff:4 0 ------ 1110 - ---------- 1 # BFCSEL
+ BF 1111 0 boff:4 10 ----- 1110 - ---------- 1 # BF
+ BF 1111 0 boff:4 11 ----- 1110 0 0000000000 1 # BFX, BFLX
+ ]
+ [
+ # LE and WLS immediate
+ %lob_imm 1:10 11:1 !function=times_2
+
+ DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4
+ WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4
+ {
+ LE 1111 0 0000 0 f:1 tp:1 1111 1100 . .......... 1 imm=%lob_imm
+ # This is WLSTP
+ WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
+ }
+ {
+ LCTP 1111 0 0000 000 1111 1110 0000 0000 0001
+ # This is DLSTP
+ DLS 1111 0 0000 0 size:2 rn:4 1110 0000 0000 0001
+ }
+ VCTP 1111 0 0000 0 size:2 rn:4 1110 1000 0000 0001
+ ]
+}
diff --git a/target/arm/tcg/tlb_helper.c b/target/arm/tcg/tlb_helper.c
new file mode 100644
index 0000000000..885bf4ec14
--- /dev/null
+++ b/target/arm/tcg/tlb_helper.c
@@ -0,0 +1,398 @@
+/*
+ * ARM TLB (Translation lookaside buffer) helpers.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-features.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+
+
+/*
+ * Returns true if the stage 1 translation regime is using LPAE format page
+ * tables. Used when raising alignment exceptions, whose FSR changes depending
+ * on whether the long or short descriptor format is in use.
+ */
+bool arm_s1_regime_using_lpae_format(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ mmu_idx = stage_1_mmu_idx(mmu_idx);
+ return regime_using_lpae_format(env, mmu_idx);
+}
+
+static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
+ ARMMMUFaultInfo *fi,
+ unsigned int target_el,
+ bool same_el, bool is_write,
+ int fsc)
+{
+ uint32_t syn;
+
+ /*
+ * ISV is only set for stage-2 data aborts routed to EL2 and
+ * never for stage-1 page table walks faulting on stage 2
+ * or for stage-1 faults.
+ *
+ * Furthermore, ISV is only set for certain kinds of load/stores.
+ * If the template syndrome does not have ISV set, we should leave
+ * it cleared.
+ *
+ * See ARMv8 specs, D7-1974:
+ * ISS encoding for an exception from a Data Abort, the
+ * ISV field.
+ *
+ * TODO: FEAT_LS64/FEAT_LS64_V/FEAT_SL64_ACCDATA: Translation,
+ * Access Flag, and Permission faults caused by LD64B, ST64B,
+ * ST64BV, or ST64BV0 insns report syndrome info even for stage-1
+ * faults and regardless of the target EL.
+ */
+ if (template_syn & ARM_EL_VNCR) {
+ /*
+ * FEAT_NV2 faults on accesses via VNCR_EL2 are a special case:
+ * they are always reported as "same EL", even though we are going
+ * from EL1 to EL2.
+ */
+ assert(!fi->stage2);
+ syn = syn_data_abort_vncr(fi->ea, is_write, fsc);
+ } else if (!(template_syn & ARM_EL_ISV) || target_el != 2
+ || fi->s1ptw || !fi->stage2) {
+ syn = syn_data_abort_no_iss(same_el, 0,
+ fi->ea, 0, fi->s1ptw, is_write, fsc);
+ } else {
+ /*
+ * Fields: IL, ISV, SAS, SSE, SRT, SF and AR come from the template
+ * syndrome created at translation time.
+ * Now we create the runtime syndrome with the remaining fields.
+ */
+ syn = syn_data_abort_with_iss(same_el,
+ 0, 0, 0, 0, 0,
+ fi->ea, 0, fi->s1ptw, is_write, fsc,
+ true);
+ /* Merge the runtime syndrome with the template syndrome. */
+ syn |= template_syn;
+ }
+ return syn;
+}
+
+static uint32_t compute_fsr_fsc(CPUARMState *env, ARMMMUFaultInfo *fi,
+ int target_el, int mmu_idx, uint32_t *ret_fsc)
+{
+ ARMMMUIdx arm_mmu_idx = core_to_arm_mmu_idx(env, mmu_idx);
+ uint32_t fsr, fsc;
+
+ /*
+ * For M-profile there is no guest-facing FSR. We compute a
+ * short-form value for env->exception.fsr which we will then
+ * examine in arm_v7m_cpu_do_interrupt(). In theory we could
+ * use the LPAE format instead as long as both bits of code agree
+ * (and arm_fi_to_lfsc() handled the M-profile specific
+ * ARMFault_QEMU_NSCExec and ARMFault_QEMU_SFault cases).
+ */
+ if (!arm_feature(env, ARM_FEATURE_M) &&
+ (target_el == 2 || arm_el_is_aa64(env, target_el) ||
+ arm_s1_regime_using_lpae_format(env, arm_mmu_idx))) {
+ /*
+ * LPAE format fault status register : bottom 6 bits are
+ * status code in the same form as needed for syndrome
+ */
+ fsr = arm_fi_to_lfsc(fi);
+ fsc = extract32(fsr, 0, 6);
+ } else {
+ fsr = arm_fi_to_sfsc(fi);
+ /*
+ * Short format FSR : this fault will never actually be reported
+ * to an EL that uses a syndrome register. Use a (currently)
+ * reserved FSR code in case the constructed syndrome does leak
+ * into the guest somehow.
+ */
+ fsc = 0x3f;
+ }
+
+ *ret_fsc = fsc;
+ return fsr;
+}
+
+static bool report_as_gpc_exception(ARMCPU *cpu, int current_el,
+ ARMMMUFaultInfo *fi)
+{
+ bool ret;
+
+ switch (fi->gpcf) {
+ case GPCF_None:
+ return false;
+ case GPCF_AddressSize:
+ case GPCF_Walk:
+ case GPCF_EABT:
+ /* R_PYTGX: GPT faults are reported as GPC. */
+ ret = true;
+ break;
+ case GPCF_Fail:
+ /*
+ * R_BLYPM: A GPF at EL3 is reported as insn or data abort.
+ * R_VBZMW, R_LXHQR: A GPF at EL[0-2] is reported as a GPC
+ * if SCR_EL3.GPF is set, otherwise an insn or data abort.
+ */
+ ret = (cpu->env.cp15.scr_el3 & SCR_GPF) && current_el != 3;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ assert(cpu_isar_feature(aa64_rme, cpu));
+ assert(fi->type == ARMFault_GPCFOnWalk ||
+ fi->type == ARMFault_GPCFOnOutput);
+ if (fi->gpcf == GPCF_AddressSize) {
+ assert(fi->level == 0);
+ } else {
+ assert(fi->level >= 0 && fi->level <= 1);
+ }
+
+ return ret;
+}
+
+static unsigned encode_gpcsc(ARMMMUFaultInfo *fi)
+{
+ static uint8_t const gpcsc[] = {
+ [GPCF_AddressSize] = 0b000000,
+ [GPCF_Walk] = 0b000100,
+ [GPCF_Fail] = 0b001100,
+ [GPCF_EABT] = 0b010100,
+ };
+
+ /* Note that we've validated fi->gpcf and fi->level above. */
+ return gpcsc[fi->gpcf] | fi->level;
+}
+
+static G_NORETURN
+void arm_deliver_fault(ARMCPU *cpu, vaddr addr,
+ MMUAccessType access_type,
+ int mmu_idx, ARMMMUFaultInfo *fi)
+{
+ CPUARMState *env = &cpu->env;
+ int target_el = exception_target_el(env);
+ int current_el = arm_current_el(env);
+ bool same_el;
+ uint32_t syn, exc, fsr, fsc;
+ /*
+ * We know this must be a data or insn abort, and that
+ * env->exception.syndrome contains the template syndrome set
+ * up at translate time. So we can check only the VNCR bit
+ * (and indeed syndrome does not have the EC field in it,
+ * because we masked that out in disas_set_insn_syndrome())
+ */
+ bool is_vncr = (access_type != MMU_INST_FETCH) &&
+ (env->exception.syndrome & ARM_EL_VNCR);
+
+ if (is_vncr) {
+ /* FEAT_NV2 faults on accesses via VNCR_EL2 go to EL2 */
+ target_el = 2;
+ }
+
+ if (report_as_gpc_exception(cpu, current_el, fi)) {
+ target_el = 3;
+
+ fsr = compute_fsr_fsc(env, fi, target_el, mmu_idx, &fsc);
+
+ syn = syn_gpc(fi->stage2 && fi->type == ARMFault_GPCFOnWalk,
+ access_type == MMU_INST_FETCH,
+ encode_gpcsc(fi), is_vncr,
+ 0, fi->s1ptw,
+ access_type == MMU_DATA_STORE, fsc);
+
+ env->cp15.mfar_el3 = fi->paddr;
+ switch (fi->paddr_space) {
+ case ARMSS_Secure:
+ break;
+ case ARMSS_NonSecure:
+ env->cp15.mfar_el3 |= R_MFAR_NS_MASK;
+ break;
+ case ARMSS_Root:
+ env->cp15.mfar_el3 |= R_MFAR_NSE_MASK;
+ break;
+ case ARMSS_Realm:
+ env->cp15.mfar_el3 |= R_MFAR_NSE_MASK | R_MFAR_NS_MASK;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ exc = EXCP_GPC;
+ goto do_raise;
+ }
+
+ /* If SCR_EL3.GPF is unset, GPF may still be routed to EL2. */
+ if (fi->gpcf == GPCF_Fail && target_el < 2) {
+ if (arm_hcr_el2_eff(env) & HCR_GPF) {
+ target_el = 2;
+ }
+ }
+
+ if (fi->stage2) {
+ target_el = 2;
+ env->cp15.hpfar_el2 = extract64(fi->s2addr, 12, 47) << 4;
+ if (arm_is_secure_below_el3(env) && fi->s1ns) {
+ env->cp15.hpfar_el2 |= HPFAR_NS;
+ }
+ }
+
+ same_el = current_el == target_el;
+ fsr = compute_fsr_fsc(env, fi, target_el, mmu_idx, &fsc);
+
+ if (access_type == MMU_INST_FETCH) {
+ syn = syn_insn_abort(same_el, fi->ea, fi->s1ptw, fsc);
+ exc = EXCP_PREFETCH_ABORT;
+ } else {
+ syn = merge_syn_data_abort(env->exception.syndrome, fi, target_el,
+ same_el, access_type == MMU_DATA_STORE,
+ fsc);
+ if (access_type == MMU_DATA_STORE
+ && arm_feature(env, ARM_FEATURE_V6)) {
+ fsr |= (1 << 11);
+ }
+ exc = EXCP_DATA_ABORT;
+ }
+
+ do_raise:
+ env->exception.vaddress = addr;
+ env->exception.fsr = fsr;
+ raise_exception(env, exc, syn, target_el);
+}
+
+/* Raise a data fault alignment exception for the specified virtual address */
+void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
+ MMUAccessType access_type,
+ int mmu_idx, uintptr_t retaddr)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ ARMMMUFaultInfo fi = {};
+
+ /* now we have a real cpu fault */
+ cpu_restore_state(cs, retaddr);
+
+ fi.type = ARMFault_Alignment;
+ arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
+}
+
+void helper_exception_pc_alignment(CPUARMState *env, target_ulong pc)
+{
+ ARMMMUFaultInfo fi = { .type = ARMFault_Alignment };
+ int target_el = exception_target_el(env);
+ int mmu_idx = arm_env_mmu_index(env);
+ uint32_t fsc;
+
+ env->exception.vaddress = pc;
+
+ /*
+ * Note that the fsc is not applicable to this exception,
+ * since any syndrome is pcalignment not insn_abort.
+ */
+ env->exception.fsr = compute_fsr_fsc(env, &fi, target_el, mmu_idx, &fsc);
+ raise_exception(env, EXCP_PREFETCH_ABORT, syn_pcalignment(), target_el);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+/*
+ * arm_cpu_do_transaction_failed: handle a memory system error response
+ * (eg "no device/memory present at address") by raising an external abort
+ * exception
+ */
+void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr,
+ vaddr addr, unsigned size,
+ MMUAccessType access_type,
+ int mmu_idx, MemTxAttrs attrs,
+ MemTxResult response, uintptr_t retaddr)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ ARMMMUFaultInfo fi = {};
+
+ /* now we have a real cpu fault */
+ cpu_restore_state(cs, retaddr);
+
+ fi.ea = arm_extabort_type(response);
+ fi.type = ARMFault_SyncExternal;
+ arm_deliver_fault(cpu, addr, access_type, mmu_idx, &fi);
+}
+
+bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr)
+{
+ ARMCPU *cpu = ARM_CPU(cs);
+ GetPhysAddrResult res = {};
+ ARMMMUFaultInfo local_fi, *fi;
+ int ret;
+
+ /*
+ * Allow S1_ptw_translate to see any fault generated here.
+ * Since this may recurse, read and clear.
+ */
+ fi = cpu->env.tlb_fi;
+ if (fi) {
+ cpu->env.tlb_fi = NULL;
+ } else {
+ fi = memset(&local_fi, 0, sizeof(local_fi));
+ }
+
+ /*
+ * Walk the page table and (if the mapping exists) add the page
+ * to the TLB. On success, return true. Otherwise, if probing,
+ * return false. Otherwise populate fsr with ARM DFSR/IFSR fault
+ * register format, and signal the fault.
+ */
+ ret = get_phys_addr(&cpu->env, address, access_type,
+ core_to_arm_mmu_idx(&cpu->env, mmu_idx),
+ &res, fi);
+ if (likely(!ret)) {
+ /*
+ * Map a single [sub]page. Regions smaller than our declared
+ * target page size are handled specially, so for those we
+ * pass in the exact addresses.
+ */
+ if (res.f.lg_page_size >= TARGET_PAGE_BITS) {
+ res.f.phys_addr &= TARGET_PAGE_MASK;
+ address &= TARGET_PAGE_MASK;
+ }
+
+ res.f.extra.arm.pte_attrs = res.cacheattrs.attrs;
+ res.f.extra.arm.shareability = res.cacheattrs.shareability;
+
+ tlb_set_page_full(cs, mmu_idx, address, &res.f);
+ return true;
+ } else if (probe) {
+ return false;
+ } else {
+ /* now we have a real cpu fault */
+ cpu_restore_state(cs, retaddr);
+ arm_deliver_fault(cpu, address, access_type, mmu_idx, fi);
+ }
+}
+#else
+void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr,
+ MMUAccessType access_type,
+ bool maperr, uintptr_t ra)
+{
+ ARMMMUFaultInfo fi = {
+ .type = maperr ? ARMFault_Translation : ARMFault_Permission,
+ .level = 3,
+ };
+ ARMCPU *cpu = ARM_CPU(cs);
+
+ /*
+ * We report both ESR and FAR to signal handlers.
+ * For now, it's easiest to deliver the fault normally.
+ */
+ cpu_restore_state(cs, ra);
+ arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi);
+}
+
+void arm_cpu_record_sigbus(CPUState *cs, vaddr addr,
+ MMUAccessType access_type, uintptr_t ra)
+{
+ arm_cpu_do_unaligned_access(cs, addr, access_type, MMU_USER_IDX, ra);
+}
+#endif /* !defined(CONFIG_USER_ONLY) */
diff --git a/target/arm/tcg/translate-a32.h b/target/arm/tcg/translate-a32.h
new file mode 100644
index 0000000000..19de6e0a1a
--- /dev/null
+++ b/target/arm/tcg/translate-a32.h
@@ -0,0 +1,167 @@
+/*
+ * AArch32 translation, common definitions.
+ *
+ * Copyright (c) 2021 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_ARM_TRANSLATE_A32_H
+#define TARGET_ARM_TRANSLATE_A32_H
+
+/* Prototypes for autogenerated disassembler functions */
+bool disas_m_nocp(DisasContext *dc, uint32_t insn);
+bool disas_mve(DisasContext *dc, uint32_t insn);
+bool disas_vfp(DisasContext *s, uint32_t insn);
+bool disas_vfp_uncond(DisasContext *s, uint32_t insn);
+bool disas_neon_dp(DisasContext *s, uint32_t insn);
+bool disas_neon_ls(DisasContext *s, uint32_t insn);
+bool disas_neon_shared(DisasContext *s, uint32_t insn);
+
+void load_reg_var(DisasContext *s, TCGv_i32 var, int reg);
+void arm_gen_condlabel(DisasContext *s);
+bool vfp_access_check(DisasContext *s);
+bool vfp_access_check_m(DisasContext *s, bool skip_context_update);
+void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop);
+void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop);
+void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop);
+void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop);
+TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs);
+void gen_set_cpsr(TCGv_i32 var, uint32_t mask);
+void gen_set_condexec(DisasContext *s);
+void gen_update_pc(DisasContext *s, target_long diff);
+void gen_lookup_tb(DisasContext *s);
+long vfp_reg_offset(bool dp, unsigned reg);
+long neon_full_reg_offset(unsigned reg);
+long neon_element_offset(int reg, int element, MemOp memop);
+void gen_rev16(TCGv_i32 dest, TCGv_i32 var);
+void clear_eci_state(DisasContext *s);
+bool mve_eci_check(DisasContext *s);
+void mve_update_eci(DisasContext *s);
+void mve_update_and_store_eci(DisasContext *s);
+bool mve_skip_vmov(DisasContext *s, int vn, int index, int size);
+
+static inline TCGv_i32 load_cpu_offset(int offset)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_ld_i32(tmp, tcg_env, offset);
+ return tmp;
+}
+
+/* Load from a 32-bit field to a TCGv_i32 */
+#define load_cpu_field(name) \
+ ({ \
+ QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 4); \
+ load_cpu_offset(offsetof(CPUARMState, name)); \
+ })
+
+/* Load from the low half of a 64-bit field to a TCGv_i32 */
+#define load_cpu_field_low32(name) \
+ ({ \
+ QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 8); \
+ load_cpu_offset(offsetoflow32(CPUARMState, name)); \
+ })
+
+void store_cpu_offset(TCGv_i32 var, int offset, int size);
+
+#define store_cpu_field(val, name) \
+ ({ \
+ QEMU_BUILD_BUG_ON(sizeof_field(CPUARMState, name) != 4 \
+ && sizeof_field(CPUARMState, name) != 1); \
+ store_cpu_offset(val, offsetof(CPUARMState, name), \
+ sizeof_field(CPUARMState, name)); \
+ })
+
+#define store_cpu_field_constant(val, name) \
+ store_cpu_field(tcg_constant_i32(val), name)
+
+/* Create a new temporary and set it to the value of a CPU register. */
+static inline TCGv_i32 load_reg(DisasContext *s, int reg)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ load_reg_var(s, tmp, reg);
+ return tmp;
+}
+
+void store_reg(DisasContext *s, int reg, TCGv_i32 var);
+
+void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
+ TCGv_i32 a32, int index, MemOp opc);
+void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
+ TCGv_i32 a32, int index, MemOp opc);
+void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
+ TCGv_i32 a32, int index, MemOp opc);
+void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
+ TCGv_i32 a32, int index, MemOp opc);
+void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+ int index, MemOp opc);
+void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+ int index, MemOp opc);
+void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+ int index, MemOp opc);
+void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+ int index, MemOp opc);
+
+#define DO_GEN_LD(SUFF, OPC) \
+ static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
+ TCGv_i32 a32, int index) \
+ { \
+ gen_aa32_ld_i32(s, val, a32, index, OPC); \
+ }
+
+#define DO_GEN_ST(SUFF, OPC) \
+ static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
+ TCGv_i32 a32, int index) \
+ { \
+ gen_aa32_st_i32(s, val, a32, index, OPC); \
+ }
+
+static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
+ TCGv_i32 a32, int index)
+{
+ gen_aa32_ld_i64(s, val, a32, index, MO_UQ);
+}
+
+static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
+ TCGv_i32 a32, int index)
+{
+ gen_aa32_st_i64(s, val, a32, index, MO_UQ);
+}
+
+DO_GEN_LD(8u, MO_UB)
+DO_GEN_LD(16u, MO_UW)
+DO_GEN_LD(32u, MO_UL)
+DO_GEN_ST(8, MO_UB)
+DO_GEN_ST(16, MO_UW)
+DO_GEN_ST(32, MO_UL)
+
+#undef DO_GEN_LD
+#undef DO_GEN_ST
+
+#if defined(CONFIG_USER_ONLY)
+#define IS_USER(s) 1
+#else
+#define IS_USER(s) (s->user)
+#endif
+
+/* Set NZCV flags from the high 4 bits of var. */
+#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
+
+/* Swap low and high halfwords. */
+static inline void gen_swap_half(TCGv_i32 dest, TCGv_i32 var)
+{
+ tcg_gen_rotri_i32(dest, var, 16);
+}
+
+#endif
diff --git a/target/arm/translate-a64.c b/target/arm/tcg/translate-a64.c
index 8a24278d79..2666d52711 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6,7 +6,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -18,25 +18,14 @@
*/
#include "qemu/osdep.h"
-#include "cpu.h"
#include "exec/exec-all.h"
-#include "tcg-op.h"
-#include "tcg-op-gvec.h"
-#include "qemu/log.h"
-#include "arm_ldst.h"
#include "translate.h"
-#include "internals.h"
-#include "qemu/host-utils.h"
-
-#include "exec/semihost.h"
-#include "exec/gen-icount.h"
-
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-#include "exec/log.h"
-
-#include "trace-tcg.h"
#include "translate-a64.h"
+#include "qemu/log.h"
+#include "disas/disas.h"
+#include "arm_ldst.h"
+#include "semihosting/semihost.h"
+#include "cpregs.h"
static TCGv_i64 cpu_X[32];
static TCGv_i64 cpu_pc;
@@ -58,6 +47,35 @@ enum a64_shift_type {
A64_SHIFT_TYPE_ROR = 3
};
+/*
+ * Helpers for extracting complex instruction fields
+ */
+
+/*
+ * For load/store with an unsigned 12 bit immediate scaled by the element
+ * size. The input has the immediate field in bits [14:3] and the element
+ * size in [2:0].
+ */
+static int uimm_scaled(DisasContext *s, int x)
+{
+ unsigned imm = x >> 3;
+ unsigned scale = extract32(x, 0, 3);
+ return imm << scale;
+}
+
+/* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
+static int scale_by_log2_tag_granule(DisasContext *s, int x)
+{
+ return x << LOG2_TAG_GRANULE;
+}
+
+/*
+ * Include the generated decoders.
+ */
+
+#include "decode-sme-fa64.c.inc"
+#include "decode-a64.c.inc"
+
/* Table based decoder typedefs - used when the relevant bits for decode
* are too awkwardly scattered across the instruction (eg SIMD).
*/
@@ -69,264 +87,340 @@ typedef struct AArch64DecodeTable {
AArch64DecodeFn *disas_fn;
} AArch64DecodeTable;
-/* Function prototype for gen_ functions for calling Neon helpers */
-typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
-typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
-typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
-typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
-typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
-typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
-typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
-typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
-typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
-typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
-typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
-typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
-typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
-typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
-typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
-
/* initialize TCG globals. */
void a64_translate_init(void)
{
int i;
- cpu_pc = tcg_global_mem_new_i64(cpu_env,
+ cpu_pc = tcg_global_mem_new_i64(tcg_env,
offsetof(CPUARMState, pc),
"pc");
for (i = 0; i < 32; i++) {
- cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
+ cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
offsetof(CPUARMState, xregs[i]),
regnames[i]);
}
- cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
+ cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
offsetof(CPUARMState, exclusive_high), "exclusive_high");
}
-static inline int get_a64_user_mem_index(DisasContext *s)
+/*
+ * Return the core mmu_idx to use for A64 load/store insns which
+ * have a "unprivileged load/store" variant. Those insns access
+ * EL0 if executed from an EL which has control over EL0 (usually
+ * EL1) but behave like normal loads and stores if executed from
+ * elsewhere (eg EL3).
+ *
+ * @unpriv : true for the unprivileged encoding; false for the
+ * normal encoding (in which case we will return the same
+ * thing as get_mem_index().
+ */
+static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
{
- /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
- * if EL1, access as if EL0; otherwise access at current EL
+ /*
+ * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
+ * which is the usual mmu_idx for this cpu state.
*/
- ARMMMUIdx useridx;
+ ARMMMUIdx useridx = s->mmu_idx;
- switch (s->mmu_idx) {
- case ARMMMUIdx_S12NSE1:
- useridx = ARMMMUIdx_S12NSE0;
- break;
- case ARMMMUIdx_S1SE1:
- useridx = ARMMMUIdx_S1SE0;
- break;
- case ARMMMUIdx_S2NS:
- g_assert_not_reached();
- default:
- useridx = s->mmu_idx;
- break;
+ if (unpriv && s->unpriv) {
+ /*
+ * We have pre-computed the condition for AccType_UNPRIV.
+ * Therefore we should never get here with a mmu_idx for
+ * which we do not know the corresponding user mmu_idx.
+ */
+ switch (useridx) {
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ useridx = ARMMMUIdx_E10_0;
+ break;
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ useridx = ARMMMUIdx_E20_0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
return arm_to_core_mmu_idx(useridx);
}
-void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
- fprintf_function cpu_fprintf, int flags)
+static void set_btype_raw(int val)
{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- uint32_t psr = pstate_read(env);
- int i;
- int el = arm_current_el(env);
- const char *ns_status;
-
- cpu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
- for (i = 0; i < 32; i++) {
- if (i == 31) {
- cpu_fprintf(f, " SP=%016" PRIx64 "\n", env->xregs[i]);
- } else {
- cpu_fprintf(f, "X%02d=%016" PRIx64 "%s", i, env->xregs[i],
- (i + 2) % 3 ? " " : "\n");
- }
- }
+ tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
+ offsetof(CPUARMState, btype));
+}
- if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
- ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
- } else {
- ns_status = "";
- }
- cpu_fprintf(f, "PSTATE=%08x %c%c%c%c %sEL%d%c",
- psr,
- psr & PSTATE_N ? 'N' : '-',
- psr & PSTATE_Z ? 'Z' : '-',
- psr & PSTATE_C ? 'C' : '-',
- psr & PSTATE_V ? 'V' : '-',
- ns_status,
- el,
- psr & PSTATE_SP ? 'h' : 't');
+static void set_btype(DisasContext *s, int val)
+{
+ /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
+ tcg_debug_assert(val >= 1 && val <= 3);
+ set_btype_raw(val);
+ s->btype = -1;
+}
- if (!(flags & CPU_DUMP_FPU)) {
- cpu_fprintf(f, "\n");
- return;
- }
- if (fp_exception_el(env, el) != 0) {
- cpu_fprintf(f, " FPU disabled\n");
- return;
+static void reset_btype(DisasContext *s)
+{
+ if (s->btype != 0) {
+ set_btype_raw(0);
+ s->btype = 0;
}
- cpu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
- vfp_get_fpcr(env), vfp_get_fpsr(env));
-
- if (arm_feature(env, ARM_FEATURE_SVE) && sve_exception_el(env, el) == 0) {
- int j, zcr_len = sve_zcr_len_for_el(env, el);
+}
- for (i = 0; i <= FFR_PRED_NUM; i++) {
- bool eol;
- if (i == FFR_PRED_NUM) {
- cpu_fprintf(f, "FFR=");
- /* It's last, so end the line. */
- eol = true;
- } else {
- cpu_fprintf(f, "P%02d=", i);
- switch (zcr_len) {
- case 0:
- eol = i % 8 == 7;
- break;
- case 1:
- eol = i % 6 == 5;
- break;
- case 2:
- case 3:
- eol = i % 3 == 2;
- break;
- default:
- /* More than one quadword per predicate. */
- eol = true;
- break;
- }
- }
- for (j = zcr_len / 4; j >= 0; j--) {
- int digits;
- if (j * 4 + 4 <= zcr_len + 1) {
- digits = 16;
- } else {
- digits = (zcr_len % 4 + 1) * 4;
- }
- cpu_fprintf(f, "%0*" PRIx64 "%s", digits,
- env->vfp.pregs[i].p[j],
- j ? ":" : eol ? "\n" : " ");
- }
- }
-
- for (i = 0; i < 32; i++) {
- if (zcr_len == 0) {
- cpu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 "%s",
- i, env->vfp.zregs[i].d[1],
- env->vfp.zregs[i].d[0], i & 1 ? "\n" : " ");
- } else if (zcr_len == 1) {
- cpu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64
- ":%016" PRIx64 ":%016" PRIx64 "\n",
- i, env->vfp.zregs[i].d[3], env->vfp.zregs[i].d[2],
- env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0]);
- } else {
- for (j = zcr_len; j >= 0; j--) {
- bool odd = (zcr_len - j) % 2 != 0;
- if (j == zcr_len) {
- cpu_fprintf(f, "Z%02d[%x-%x]=", i, j, j - 1);
- } else if (!odd) {
- if (j > 0) {
- cpu_fprintf(f, " [%x-%x]=", j, j - 1);
- } else {
- cpu_fprintf(f, " [%x]=", j);
- }
- }
- cpu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s",
- env->vfp.zregs[i].d[j * 2 + 1],
- env->vfp.zregs[i].d[j * 2],
- odd || j == 0 ? "\n" : ":");
- }
- }
- }
+static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
+{
+ assert(s->pc_save != -1);
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
+ tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
} else {
- for (i = 0; i < 32; i++) {
- uint64_t *q = aa64_vfp_qreg(env, i);
- cpu_fprintf(f, "Q%02d=%016" PRIx64 ":%016" PRIx64 "%s",
- i, q[1], q[0], (i & 1 ? "\n" : " "));
- }
+ tcg_gen_movi_i64(dest, s->pc_curr + diff);
}
}
-void gen_a64_set_pc_im(uint64_t val)
+void gen_a64_update_pc(DisasContext *s, target_long diff)
{
- tcg_gen_movi_i64(cpu_pc, val);
+ gen_pc_plus_diff(s, cpu_pc, diff);
+ s->pc_save = s->pc_curr + diff;
}
-/* Load the PC from a generic TCG variable.
+/*
+ * Handle Top Byte Ignore (TBI) bits.
*
- * If address tagging is enabled via the TCR TBI bits, then loading
- * an address into the PC will clear out any tag in the it:
+ * If address tagging is enabled via the TCR TBI bits:
* + for EL2 and EL3 there is only one TBI bit, and if it is set
* then the address is zero-extended, clearing bits [63:56]
* + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
- * and TBI1 controls addressses with bit 55 == 1.
+ * and TBI1 controls addresses with bit 55 == 1.
* If the appropriate TBI bit is set for the address then
* the address is sign-extended from bit 55 into bits [63:56]
*
- * We can avoid doing this for relative-branches, because the
- * PC + offset can never overflow into the tag bits (assuming
- * that virtual addresses are less than 56 bits wide, as they
- * are currently), but we must handle it for branch-to-register.
+ * Here We have concatenated TBI{1,0} into tbi.
*/
-static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
+static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
+ TCGv_i64 src, int tbi)
{
+ if (tbi == 0) {
+ /* Load unmodified address */
+ tcg_gen_mov_i64(dst, src);
+ } else if (!regime_has_2_ranges(s->mmu_idx)) {
+ /* Force tag byte to all zero */
+ tcg_gen_extract_i64(dst, src, 0, 56);
+ } else {
+ /* Sign-extend from bit 55. */
+ tcg_gen_sextract_i64(dst, src, 0, 56);
- if (s->current_el <= 1) {
- /* Test if NEITHER or BOTH TBI values are set. If so, no need to
- * examine bit 55 of address, can just generate code.
- * If mixed, then test via generated code
- */
- if (s->tbi0 && s->tbi1) {
- TCGv_i64 tmp_reg = tcg_temp_new_i64();
- /* Both bits set, sign extension from bit 55 into [63:56] will
- * cover both cases
- */
- tcg_gen_shli_i64(tmp_reg, src, 8);
- tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
- tcg_temp_free_i64(tmp_reg);
- } else if (!s->tbi0 && !s->tbi1) {
- /* Neither bit set, just load it as-is */
- tcg_gen_mov_i64(cpu_pc, src);
- } else {
- TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
- TCGv_i64 tcg_bit55 = tcg_temp_new_i64();
- TCGv_i64 tcg_zero = tcg_const_i64(0);
-
- tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
-
- if (s->tbi0) {
- /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
- tcg_gen_andi_i64(tcg_tmpval, src,
- 0x00FFFFFFFFFFFFFFull);
- tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
- tcg_tmpval, src);
- } else {
- /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
- tcg_gen_ori_i64(tcg_tmpval, src,
- 0xFF00000000000000ull);
- tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
- tcg_tmpval, src);
- }
- tcg_temp_free_i64(tcg_zero);
- tcg_temp_free_i64(tcg_bit55);
- tcg_temp_free_i64(tcg_tmpval);
- }
- } else { /* EL > 1 */
- if (s->tbi0) {
- /* Force tag byte to all zero */
- tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
- } else {
- /* Load unmodified address */
- tcg_gen_mov_i64(cpu_pc, src);
+ switch (tbi) {
+ case 1:
+ /* tbi0 but !tbi1: only use the extension if positive */
+ tcg_gen_and_i64(dst, dst, src);
+ break;
+ case 2:
+ /* !tbi0 but tbi1: only use the extension if negative */
+ tcg_gen_or_i64(dst, dst, src);
+ break;
+ case 3:
+ /* tbi0 and tbi1: always use the extension */
+ break;
+ default:
+ g_assert_not_reached();
}
}
}
+static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
+{
+ /*
+ * If address tagging is enabled for instructions via the TCR TBI bits,
+ * then loading an address into the PC will clear out any tag.
+ */
+ gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
+ s->pc_save = -1;
+}
+
+/*
+ * Handle MTE and/or TBI.
+ *
+ * For TBI, ideally, we would do nothing. Proper behaviour on fault is
+ * for the tag to be present in the FAR_ELx register. But for user-only
+ * mode we do not have a TLB with which to implement this, so we must
+ * remove the top byte now.
+ *
+ * Always return a fresh temporary that we can increment independently
+ * of the write-back address.
+ */
+
+TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
+{
+ TCGv_i64 clean = tcg_temp_new_i64();
+#ifdef CONFIG_USER_ONLY
+ gen_top_byte_ignore(s, clean, addr, s->tbid);
+#else
+ tcg_gen_mov_i64(clean, addr);
+#endif
+ return clean;
+}
+
+/* Insert a zero tag into src, with the result at dst. */
+static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
+{
+ tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
+}
+
+static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
+ MMUAccessType acc, int log2_size)
+{
+ gen_helper_probe_access(tcg_env, ptr,
+ tcg_constant_i32(acc),
+ tcg_constant_i32(get_mem_index(s)),
+ tcg_constant_i32(1 << log2_size));
+}
+
+/*
+ * For MTE, check a single logical or atomic access. This probes a single
+ * address, the exact one specified. The size and alignment of the access
+ * is not relevant to MTE, per se, but watchpoints do require the size,
+ * and we want to recognize those before making any other changes to state.
+ */
+static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
+ bool is_write, bool tag_checked,
+ MemOp memop, bool is_unpriv,
+ int core_idx)
+{
+ if (tag_checked && s->mte_active[is_unpriv]) {
+ TCGv_i64 ret;
+ int desc = 0;
+
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
+ desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
+
+ ret = tcg_temp_new_i64();
+ gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
+
+ return ret;
+ }
+ return clean_data_tbi(s, addr);
+}
+
+TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
+ bool tag_checked, MemOp memop)
+{
+ return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
+ false, get_mem_index(s));
+}
+
+/*
+ * For MTE, check multiple logical sequential accesses.
+ */
+TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
+ bool tag_checked, int total_size, MemOp single_mop)
+{
+ if (tag_checked && s->mte_active[0]) {
+ TCGv_i64 ret;
+ int desc = 0;
+
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
+ desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
+
+ ret = tcg_temp_new_i64();
+ gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
+
+ return ret;
+ }
+ return clean_data_tbi(s, addr);
+}
+
+/*
+ * Generate the special alignment check that applies to AccType_ATOMIC
+ * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
+ * naturally aligned, but it must not cross a 16-byte boundary.
+ * See AArch64.CheckAlignment().
+ */
+static void check_lse2_align(DisasContext *s, int rn, int imm,
+ bool is_write, MemOp mop)
+{
+ TCGv_i32 tmp;
+ TCGv_i64 addr;
+ TCGLabel *over_label;
+ MMUAccessType type;
+ int mmu_idx;
+
+ tmp = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
+ tcg_gen_addi_i32(tmp, tmp, imm & 15);
+ tcg_gen_andi_i32(tmp, tmp, 15);
+ tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
+
+ over_label = gen_new_label();
+ tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
+
+ addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
+
+ type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
+ mmu_idx = get_mem_index(s);
+ gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
+ tcg_constant_i32(mmu_idx));
+
+ gen_set_label(over_label);
+
+}
+
+/* Handle the alignment check for AccType_ATOMIC instructions. */
+static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
+{
+ MemOp size = mop & MO_SIZE;
+
+ if (size == MO_8) {
+ return mop;
+ }
+
+ /*
+ * If size == MO_128, this is a LDXP, and the operation is single-copy
+ * atomic for each doubleword, not the entire quadword; it still must
+ * be quadword aligned.
+ */
+ if (size == MO_128) {
+ return finalize_memop_atom(s, MO_128 | MO_ALIGN,
+ MO_ATOM_IFALIGN_PAIR);
+ }
+ if (dc_isar_feature(aa64_lse2, s)) {
+ check_lse2_align(s, rn, 0, true, mop);
+ } else {
+ mop |= MO_ALIGN;
+ }
+ return finalize_memop(s, mop);
+}
+
+/* Handle the alignment check for AccType_ORDERED instructions. */
+static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
+ bool is_write, MemOp mop)
+{
+ MemOp size = mop & MO_SIZE;
+
+ if (size == MO_8) {
+ return mop;
+ }
+ if (size == MO_128) {
+ return finalize_memop_atom(s, MO_128 | MO_ALIGN,
+ MO_ATOM_IFALIGN_PAIR);
+ }
+ if (!dc_isar_feature(aa64_lse2, s)) {
+ mop |= MO_ALIGN;
+ } else if (!s->naa) {
+ check_lse2_align(s, rn, imm, is_write, mop);
+ }
+ return finalize_memop(s, mop);
+}
+
typedef struct DisasCompare64 {
TCGCond cond;
TCGv_i64 value;
@@ -338,80 +432,40 @@ static void a64_test_cc(DisasCompare64 *c64, int cc)
arm_test_cc(&c32, cc);
- /* Sign-extend the 32-bit value so that the GE/LT comparisons work
- * properly. The NE/EQ comparisons are also fine with this choice. */
+ /*
+ * Sign-extend the 32-bit value so that the GE/LT comparisons work
+ * properly. The NE/EQ comparisons are also fine with this choice.
+ */
c64->cond = c32.cond;
c64->value = tcg_temp_new_i64();
tcg_gen_ext_i32_i64(c64->value, c32.value);
-
- arm_free_cc(&c32);
}
-static void a64_free_cc(DisasCompare64 *c64)
+static void gen_rebuild_hflags(DisasContext *s)
{
- tcg_temp_free_i64(c64->value);
+ gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
}
static void gen_exception_internal(int excp)
{
- TCGv_i32 tcg_excp = tcg_const_i32(excp);
-
assert(excp_is_internal(excp));
- gen_helper_exception_internal(cpu_env, tcg_excp);
- tcg_temp_free_i32(tcg_excp);
+ gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
}
-static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
+static void gen_exception_internal_insn(DisasContext *s, int excp)
{
- TCGv_i32 tcg_excp = tcg_const_i32(excp);
- TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
- TCGv_i32 tcg_el = tcg_const_i32(target_el);
-
- gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
- tcg_syn, tcg_el);
- tcg_temp_free_i32(tcg_el);
- tcg_temp_free_i32(tcg_syn);
- tcg_temp_free_i32(tcg_excp);
-}
-
-static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
-{
- gen_a64_set_pc_im(s->pc - offset);
+ gen_a64_update_pc(s, 0);
gen_exception_internal(excp);
s->base.is_jmp = DISAS_NORETURN;
}
-static void gen_exception_insn(DisasContext *s, int offset, int excp,
- uint32_t syndrome, uint32_t target_el)
-{
- gen_a64_set_pc_im(s->pc - offset);
- gen_exception(excp, syndrome, target_el);
- s->base.is_jmp = DISAS_NORETURN;
-}
-
-static void gen_exception_bkpt_insn(DisasContext *s, int offset,
- uint32_t syndrome)
+static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
{
- TCGv_i32 tcg_syn;
-
- gen_a64_set_pc_im(s->pc - offset);
- tcg_syn = tcg_const_i32(syndrome);
- gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
- tcg_temp_free_i32(tcg_syn);
+ gen_a64_update_pc(s, 0);
+ gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
s->base.is_jmp = DISAS_NORETURN;
}
-static void gen_ss_advance(DisasContext *s)
-{
- /* If the singlestep state is Active-not-pending, advance to
- * Active-pending.
- */
- if (s->ss_active) {
- s->pstate_ss = 0;
- gen_helper_clear_pstate_ss(cpu_env);
- }
-}
-
static void gen_step_complete_exception(DisasContext *s)
{
/* We just completed step of an insn. Move from Active-not-pending
@@ -424,47 +478,42 @@ static void gen_step_complete_exception(DisasContext *s)
* of the exception, and our syndrome information is always correct.
*/
gen_ss_advance(s);
- gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
- default_exception_el(s));
+ gen_swstep_exception(s, 1, s->is_ldex);
s->base.is_jmp = DISAS_NORETURN;
}
-static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
+static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
{
- /* No direct tb linking with singlestep (either QEMU's or the ARM
- * debug architecture kind) or deterministic io
- */
- if (s->base.singlestep_enabled || s->ss_active ||
- (tb_cflags(s->base.tb) & CF_LAST_IO)) {
- return false;
- }
-
-#ifndef CONFIG_USER_ONLY
- /* Only link tbs from inside the same guest page */
- if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
+ if (s->ss_active) {
return false;
}
-#endif
-
- return true;
+ return translator_use_goto_tb(&s->base, dest);
}
-static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
+static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
{
- TranslationBlock *tb;
-
- tb = s->base.tb;
- if (use_goto_tb(s, n, dest)) {
- tcg_gen_goto_tb(n);
- gen_a64_set_pc_im(dest);
- tcg_gen_exit_tb(tb, n);
+ if (use_goto_tb(s, s->pc_curr + diff)) {
+ /*
+ * For pcrel, the pc must always be up-to-date on entry to
+ * the linked TB, so that it can use simple additions for all
+ * further adjustments. For !pcrel, the linked TB is compiled
+ * to know its full virtual address, so we can delay the
+ * update to pc to the unlinked path. A long chain of links
+ * can thus avoid many updates to the PC.
+ */
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
+ gen_a64_update_pc(s, diff);
+ tcg_gen_goto_tb(n);
+ } else {
+ tcg_gen_goto_tb(n);
+ gen_a64_update_pc(s, diff);
+ }
+ tcg_gen_exit_tb(s->base.tb, n);
s->base.is_jmp = DISAS_NORETURN;
} else {
- gen_a64_set_pc_im(dest);
+ gen_a64_update_pc(s, diff);
if (s->ss_active) {
gen_step_complete_exception(s);
- } else if (s->base.singlestep_enabled) {
- gen_exception_internal(EXCP_DEBUG);
} else {
tcg_gen_lookup_and_goto_ptr();
s->base.is_jmp = DISAS_NORETURN;
@@ -472,43 +521,6 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
}
}
-void unallocated_encoding(DisasContext *s)
-{
- /* Unallocated and reserved encodings are uncategorized */
- gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
- default_exception_el(s));
-}
-
-static void init_tmp_a64_array(DisasContext *s)
-{
-#ifdef CONFIG_DEBUG_TCG
- memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
-#endif
- s->tmp_a64_count = 0;
-}
-
-static void free_tmp_a64(DisasContext *s)
-{
- int i;
- for (i = 0; i < s->tmp_a64_count; i++) {
- tcg_temp_free_i64(s->tmp_a64[i]);
- }
- init_tmp_a64_array(s);
-}
-
-TCGv_i64 new_tmp_a64(DisasContext *s)
-{
- assert(s->tmp_a64_count < TMP_A64_MAX);
- return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
-}
-
-TCGv_i64 new_tmp_a64_zero(DisasContext *s)
-{
- TCGv_i64 t = new_tmp_a64(s);
- tcg_gen_movi_i64(t, 0);
- return t;
-}
-
/*
* Register access functions
*
@@ -527,7 +539,9 @@ TCGv_i64 new_tmp_a64_zero(DisasContext *s)
TCGv_i64 cpu_reg(DisasContext *s, int reg)
{
if (reg == 31) {
- return new_tmp_a64_zero(s);
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_movi_i64(t, 0);
+ return t;
} else {
return cpu_X[reg];
}
@@ -545,7 +559,7 @@ TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
*/
TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
{
- TCGv_i64 v = new_tmp_a64(s);
+ TCGv_i64 v = tcg_temp_new_i64();
if (reg != 31) {
if (sf) {
tcg_gen_mov_i64(v, cpu_X[reg]);
@@ -560,7 +574,7 @@ TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
{
- TCGv_i64 v = new_tmp_a64(s);
+ TCGv_i64 v = tcg_temp_new_i64();
if (sf) {
tcg_gen_mov_i64(v, cpu_X[reg]);
} else {
@@ -574,7 +588,7 @@ TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
* Dn, Sn, Hn or Bn).
* (Note that this is not the same mapping as for A32; see cpu.h)
*/
-static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
+static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
{
return vec_reg_offset(s, regno, 0, size);
}
@@ -595,7 +609,7 @@ static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
{
TCGv_i64 v = tcg_temp_new_i64();
- tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
+ tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
return v;
}
@@ -603,7 +617,7 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
{
TCGv_i32 v = tcg_temp_new_i32();
- tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
+ tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
return v;
}
@@ -611,7 +625,7 @@ static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
{
TCGv_i32 v = tcg_temp_new_i32();
- tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
+ tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
return v;
}
@@ -623,21 +637,15 @@ static void clear_vec_high(DisasContext *s, bool is_q, int rd)
unsigned ofs = fp_reg_offset(s, rd, MO_64);
unsigned vsz = vec_full_reg_size(s);
- if (!is_q) {
- TCGv_i64 tcg_zero = tcg_const_i64(0);
- tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
- tcg_temp_free_i64(tcg_zero);
- }
- if (vsz > 16) {
- tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
- }
+ /* Nop move, with side effect of clearing the tail. */
+ tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
}
void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
{
unsigned ofs = fp_reg_offset(s, reg, MO_64);
- tcg_gen_st_i64(v, cpu_env, ofs);
+ tcg_gen_st_i64(v, tcg_env, ofs);
clear_vec_high(s, false, reg);
}
@@ -647,26 +655,6 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
tcg_gen_extu_i32_i64(tmp, v);
write_fp_dreg(s, reg, tmp);
- tcg_temp_free_i64(tmp);
-}
-
-TCGv_ptr get_fpstatus_ptr(bool is_f16)
-{
- TCGv_ptr statusptr = tcg_temp_new_ptr();
- int offset;
-
- /* In A64 all instructions (both FP and Neon) use the FPCR; there
- * is no equivalent of the A32 Neon "standard FPSCR value".
- * However half-precision operations operate under a different
- * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
- */
- if (is_f16) {
- offset = offsetof(CPUARMState, vfp.fp_status_f16);
- } else {
- offset = offsetof(CPUARMState, vfp.fp_status);
- }
- tcg_gen_addi_ptr(statusptr, cpu_env, offset);
- return statusptr;
}
/* Expand a 2-operand AdvSIMD vector operation using an expander function. */
@@ -695,23 +683,22 @@ static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
}
-/* Expand a 2-operand + immediate AdvSIMD vector operation using
- * an op descriptor.
- */
-static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
- int rn, int64_t imm, const GVecGen2i *gvec_op)
+/* Expand a 4-operand AdvSIMD vector operation using an expander function. */
+static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
+ int rx, GVecGen4Fn *gvec_fn, int vece)
{
- tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
- is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
+ gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
+ is_q ? 16 : 8, vec_full_reg_size(s));
}
-/* Expand a 3-operand AdvSIMD vector operation using an op descriptor. */
-static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
- int rn, int rm, const GVecGen3 *gvec_op)
+/* Expand a 2-operand operation using an out-of-line helper. */
+static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
+ int rn, int data, gen_helper_gvec_2 *fn)
{
- tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), is_q ? 16 : 8,
- vec_full_reg_size(s), gvec_op);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
}
/* Expand a 3-operand operation using an out-of-line helper. */
@@ -724,31 +711,58 @@ static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
}
-/* Expand a 3-operand + env pointer operation using
+/* Expand a 3-operand + fpstatus pointer + simd data value operation using
* an out-of-line helper.
*/
-static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
- int rn, int rm, gen_helper_gvec_3_ptr *fn)
+static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, bool is_fp16, int data,
+ gen_helper_gvec_3_ptr *fn)
{
+ TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), cpu_env,
+ vec_full_reg_offset(s, rm), fpst,
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
+/* Expand a 3-operand + qc + operation using an out-of-line helper. */
+static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), qc_ptr,
is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
}
-/* Expand a 3-operand + fpstatus pointer + simd data value operation using
+/* Expand a 4-operand operation using an out-of-line helper. */
+static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, int ra, int data, gen_helper_gvec_4 *fn)
+{
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra),
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+}
+
+/*
+ * Expand a 4-operand + fpstatus pointer + simd data value operation using
* an out-of-line helper.
*/
-static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
- int rm, bool is_fp16, int data,
- gen_helper_gvec_3_ptr *fn)
+static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, int ra, bool is_fp16, int data,
+ gen_helper_gvec_4_ptr *fn)
{
- TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), fpst,
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra), fpst,
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
- tcg_temp_free_ptr(fpst);
}
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
@@ -774,96 +788,102 @@ static inline void gen_logic_CC(int sf, TCGv_i64 result)
}
/* dest = T0 + T1; compute C, N, V and Z flags */
-static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
{
- if (sf) {
- TCGv_i64 result, flag, tmp;
- result = tcg_temp_new_i64();
- flag = tcg_temp_new_i64();
- tmp = tcg_temp_new_i64();
+ TCGv_i64 result, flag, tmp;
+ result = tcg_temp_new_i64();
+ flag = tcg_temp_new_i64();
+ tmp = tcg_temp_new_i64();
- tcg_gen_movi_i64(tmp, 0);
- tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
+ tcg_gen_movi_i64(tmp, 0);
+ tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
- tcg_gen_extrl_i64_i32(cpu_CF, flag);
+ tcg_gen_extrl_i64_i32(cpu_CF, flag);
- gen_set_NZ64(result);
+ gen_set_NZ64(result);
- tcg_gen_xor_i64(flag, result, t0);
- tcg_gen_xor_i64(tmp, t0, t1);
- tcg_gen_andc_i64(flag, flag, tmp);
- tcg_temp_free_i64(tmp);
- tcg_gen_extrh_i64_i32(cpu_VF, flag);
+ tcg_gen_xor_i64(flag, result, t0);
+ tcg_gen_xor_i64(tmp, t0, t1);
+ tcg_gen_andc_i64(flag, flag, tmp);
+ tcg_gen_extrh_i64_i32(cpu_VF, flag);
- tcg_gen_mov_i64(dest, result);
- tcg_temp_free_i64(result);
- tcg_temp_free_i64(flag);
- } else {
- /* 32 bit arithmetic */
- TCGv_i32 t0_32 = tcg_temp_new_i32();
- TCGv_i32 t1_32 = tcg_temp_new_i32();
- TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_mov_i64(dest, result);
+}
- tcg_gen_movi_i32(tmp, 0);
- tcg_gen_extrl_i64_i32(t0_32, t0);
- tcg_gen_extrl_i64_i32(t1_32, t1);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
- tcg_gen_mov_i32(cpu_ZF, cpu_NF);
- tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
- tcg_gen_xor_i32(tmp, t0_32, t1_32);
- tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
- tcg_gen_extu_i32_i64(dest, cpu_NF);
+static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+{
+ TCGv_i32 t0_32 = tcg_temp_new_i32();
+ TCGv_i32 t1_32 = tcg_temp_new_i32();
+ TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(t0_32);
- tcg_temp_free_i32(t1_32);
+ tcg_gen_movi_i32(tmp, 0);
+ tcg_gen_extrl_i64_i32(t0_32, t0);
+ tcg_gen_extrl_i64_i32(t1_32, t1);
+ tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
+ tcg_gen_xor_i32(tmp, t0_32, t1_32);
+ tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
+ tcg_gen_extu_i32_i64(dest, cpu_NF);
+}
+
+static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+{
+ if (sf) {
+ gen_add64_CC(dest, t0, t1);
+ } else {
+ gen_add32_CC(dest, t0, t1);
}
}
/* dest = T0 - T1; compute C, N, V and Z flags */
-static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
{
- if (sf) {
- /* 64 bit arithmetic */
- TCGv_i64 result, flag, tmp;
+ /* 64 bit arithmetic */
+ TCGv_i64 result, flag, tmp;
- result = tcg_temp_new_i64();
- flag = tcg_temp_new_i64();
- tcg_gen_sub_i64(result, t0, t1);
+ result = tcg_temp_new_i64();
+ flag = tcg_temp_new_i64();
+ tcg_gen_sub_i64(result, t0, t1);
- gen_set_NZ64(result);
+ gen_set_NZ64(result);
- tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
- tcg_gen_extrl_i64_i32(cpu_CF, flag);
+ tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
+ tcg_gen_extrl_i64_i32(cpu_CF, flag);
- tcg_gen_xor_i64(flag, result, t0);
- tmp = tcg_temp_new_i64();
- tcg_gen_xor_i64(tmp, t0, t1);
- tcg_gen_and_i64(flag, flag, tmp);
- tcg_temp_free_i64(tmp);
- tcg_gen_extrh_i64_i32(cpu_VF, flag);
- tcg_gen_mov_i64(dest, result);
- tcg_temp_free_i64(flag);
- tcg_temp_free_i64(result);
- } else {
- /* 32 bit arithmetic */
- TCGv_i32 t0_32 = tcg_temp_new_i32();
- TCGv_i32 t1_32 = tcg_temp_new_i32();
- TCGv_i32 tmp;
+ tcg_gen_xor_i64(flag, result, t0);
+ tmp = tcg_temp_new_i64();
+ tcg_gen_xor_i64(tmp, t0, t1);
+ tcg_gen_and_i64(flag, flag, tmp);
+ tcg_gen_extrh_i64_i32(cpu_VF, flag);
+ tcg_gen_mov_i64(dest, result);
+}
- tcg_gen_extrl_i64_i32(t0_32, t0);
- tcg_gen_extrl_i64_i32(t1_32, t1);
- tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
- tcg_gen_mov_i32(cpu_ZF, cpu_NF);
- tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
- tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
- tmp = tcg_temp_new_i32();
- tcg_gen_xor_i32(tmp, t0_32, t1_32);
- tcg_temp_free_i32(t0_32);
- tcg_temp_free_i32(t1_32);
- tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
- tcg_temp_free_i32(tmp);
- tcg_gen_extu_i32_i64(dest, cpu_NF);
+static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+{
+ /* 32 bit arithmetic */
+ TCGv_i32 t0_32 = tcg_temp_new_i32();
+ TCGv_i32 t1_32 = tcg_temp_new_i32();
+ TCGv_i32 tmp;
+
+ tcg_gen_extrl_i64_i32(t0_32, t0);
+ tcg_gen_extrl_i64_i32(t1_32, t1);
+ tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
+ tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
+ tmp = tcg_temp_new_i32();
+ tcg_gen_xor_i32(tmp, t0_32, t1_32);
+ tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
+ tcg_gen_extu_i32_i64(dest, cpu_NF);
+}
+
+static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
+{
+ if (sf) {
+ gen_sub64_CC(dest, t0, t1);
+ } else {
+ gen_sub32_CC(dest, t0, t1);
}
}
@@ -874,7 +894,6 @@ static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
tcg_gen_extu_i32_i64(flag, cpu_CF);
tcg_gen_add_i64(dest, t0, t1);
tcg_gen_add_i64(dest, dest, flag);
- tcg_temp_free_i64(flag);
if (!sf) {
tcg_gen_ext32u_i64(dest, dest);
@@ -885,15 +904,15 @@ static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
{
if (sf) {
- TCGv_i64 result, cf_64, vf_64, tmp;
- result = tcg_temp_new_i64();
- cf_64 = tcg_temp_new_i64();
- vf_64 = tcg_temp_new_i64();
- tmp = tcg_const_i64(0);
+ TCGv_i64 result = tcg_temp_new_i64();
+ TCGv_i64 cf_64 = tcg_temp_new_i64();
+ TCGv_i64 vf_64 = tcg_temp_new_i64();
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_constant_i64(0);
tcg_gen_extu_i32_i64(cf_64, cpu_CF);
- tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
- tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
+ tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
+ tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
gen_set_NZ64(result);
@@ -903,31 +922,22 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
tcg_gen_mov_i64(dest, result);
-
- tcg_temp_free_i64(tmp);
- tcg_temp_free_i64(vf_64);
- tcg_temp_free_i64(cf_64);
- tcg_temp_free_i64(result);
} else {
- TCGv_i32 t0_32, t1_32, tmp;
- t0_32 = tcg_temp_new_i32();
- t1_32 = tcg_temp_new_i32();
- tmp = tcg_const_i32(0);
+ TCGv_i32 t0_32 = tcg_temp_new_i32();
+ TCGv_i32 t1_32 = tcg_temp_new_i32();
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_constant_i32(0);
tcg_gen_extrl_i64_i32(t0_32, t0);
tcg_gen_extrl_i64_i32(t1_32, t1);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
+ tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
+ tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
tcg_gen_mov_i32(cpu_ZF, cpu_NF);
tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
tcg_gen_xor_i32(tmp, t0_32, t1_32);
tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
tcg_gen_extu_i32_i64(dest, cpu_NF);
-
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(t1_32);
- tcg_temp_free_i32(t0_32);
}
}
@@ -939,19 +949,18 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
* Store from GPR register to memory.
*/
static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
- TCGv_i64 tcg_addr, int size, int memidx,
+ TCGv_i64 tcg_addr, MemOp memop, int memidx,
bool iss_valid,
unsigned int iss_srt,
bool iss_sf, bool iss_ar)
{
- g_assert(size <= 3);
- tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
+ tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
if (iss_valid) {
uint32_t syn;
syn = syn_data_abort_with_iss(0,
- size,
+ (memop & MO_SIZE),
false,
iss_srt,
iss_sf,
@@ -962,37 +971,27 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
}
static void do_gpr_st(DisasContext *s, TCGv_i64 source,
- TCGv_i64 tcg_addr, int size,
+ TCGv_i64 tcg_addr, MemOp memop,
bool iss_valid,
unsigned int iss_srt,
bool iss_sf, bool iss_ar)
{
- do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
+ do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
iss_valid, iss_srt, iss_sf, iss_ar);
}
/*
* Load from memory to GPR register
*/
-static void do_gpr_ld_memidx(DisasContext *s,
- TCGv_i64 dest, TCGv_i64 tcg_addr,
- int size, bool is_signed,
- bool extend, int memidx,
+static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+ MemOp memop, bool extend, int memidx,
bool iss_valid, unsigned int iss_srt,
bool iss_sf, bool iss_ar)
{
- TCGMemOp memop = s->be_data + size;
-
- g_assert(size <= 3);
-
- if (is_signed) {
- memop += MO_SIGN;
- }
-
tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
- if (extend && is_signed) {
- g_assert(size < 3);
+ if (extend && (memop & MO_SIGN)) {
+ g_assert((memop & MO_SIZE) <= MO_32);
tcg_gen_ext32u_i64(dest, dest);
}
@@ -1000,8 +999,8 @@ static void do_gpr_ld_memidx(DisasContext *s,
uint32_t syn;
syn = syn_data_abort_with_iss(0,
- size,
- is_signed,
+ (memop & MO_SIZE),
+ (memop & MO_SIGN) != 0,
iss_srt,
iss_sf,
iss_ar,
@@ -1010,79 +1009,64 @@ static void do_gpr_ld_memidx(DisasContext *s,
}
}
-static void do_gpr_ld(DisasContext *s,
- TCGv_i64 dest, TCGv_i64 tcg_addr,
- int size, bool is_signed, bool extend,
+static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
+ MemOp memop, bool extend,
bool iss_valid, unsigned int iss_srt,
bool iss_sf, bool iss_ar)
{
- do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
- get_mem_index(s),
+ do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
iss_valid, iss_srt, iss_sf, iss_ar);
}
/*
* Store from FP register to memory
*/
-static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
+static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
{
/* This writes the bottom N bits of a 128 bit wide vector to memory */
- TCGv_i64 tmp = tcg_temp_new_i64();
- tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
- if (size < 4) {
- tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
- s->be_data + size);
+ TCGv_i64 tmplo = tcg_temp_new_i64();
+
+ tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
+
+ if ((mop & MO_SIZE) < MO_128) {
+ tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
} else {
- bool be = s->be_data == MO_BE;
- TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
+ TCGv_i64 tmphi = tcg_temp_new_i64();
+ TCGv_i128 t16 = tcg_temp_new_i128();
- tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
- tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
- s->be_data | MO_Q);
- tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
- tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
- s->be_data | MO_Q);
- tcg_temp_free_i64(tcg_hiaddr);
- }
+ tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
+ tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
- tcg_temp_free_i64(tmp);
+ tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
+ }
}
/*
* Load from memory to FP register
*/
-static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
+static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
{
/* This always zero-extends and writes to a full 128 bit wide vector */
TCGv_i64 tmplo = tcg_temp_new_i64();
- TCGv_i64 tmphi;
+ TCGv_i64 tmphi = NULL;
- if (size < 4) {
- TCGMemOp memop = s->be_data + size;
- tmphi = tcg_const_i64(0);
- tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
+ if ((mop & MO_SIZE) < MO_128) {
+ tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
} else {
- bool be = s->be_data == MO_BE;
- TCGv_i64 tcg_hiaddr;
+ TCGv_i128 t16 = tcg_temp_new_i128();
- tmphi = tcg_temp_new_i64();
- tcg_hiaddr = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
- tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
- tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
- s->be_data | MO_Q);
- tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
- s->be_data | MO_Q);
- tcg_temp_free_i64(tcg_hiaddr);
+ tmphi = tcg_temp_new_i64();
+ tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
}
- tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
- tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
+ tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
- tcg_temp_free_i64(tmplo);
- tcg_temp_free_i64(tmphi);
-
- clear_vec_high(s, true, destidx);
+ if (tmphi) {
+ tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
+ }
+ clear_vec_high(s, tmphi != NULL, destidx);
}
/*
@@ -1099,31 +1083,31 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
/* Get value of an element within a vector register */
static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
- int element, TCGMemOp memop)
+ int element, MemOp memop)
{
int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
- switch (memop) {
+ switch ((unsigned)memop) {
case MO_8:
- tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
break;
case MO_16:
- tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
break;
case MO_32:
- tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
break;
case MO_8|MO_SIGN:
- tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
break;
case MO_16|MO_SIGN:
- tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
break;
case MO_32|MO_SIGN:
- tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
break;
case MO_64:
case MO_64|MO_SIGN:
- tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
break;
default:
g_assert_not_reached();
@@ -1131,25 +1115,25 @@ static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
}
static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
- int element, TCGMemOp memop)
+ int element, MemOp memop)
{
int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
switch (memop) {
case MO_8:
- tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
break;
case MO_16:
- tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
break;
case MO_8|MO_SIGN:
- tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
break;
case MO_16|MO_SIGN:
- tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
break;
case MO_32:
case MO_32|MO_SIGN:
- tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
+ tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
break;
default:
g_assert_not_reached();
@@ -1158,21 +1142,21 @@ static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
/* Set value of an element within a vector register */
static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
- int element, TCGMemOp memop)
+ int element, MemOp memop)
{
int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
switch (memop) {
case MO_8:
- tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
+ tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
break;
case MO_16:
- tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
+ tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
break;
case MO_32:
- tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
+ tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
break;
case MO_64:
- tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
+ tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
break;
default:
g_assert_not_reached();
@@ -1180,18 +1164,18 @@ static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
}
static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
- int destidx, int element, TCGMemOp memop)
+ int destidx, int element, MemOp memop)
{
int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
switch (memop) {
case MO_8:
- tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
+ tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
break;
case MO_16:
- tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
+ tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
break;
case MO_32:
- tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
+ tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
break;
default:
g_assert_not_reached();
@@ -1200,28 +1184,22 @@ static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
/* Store from vector register to memory */
static void do_vec_st(DisasContext *s, int srcidx, int element,
- TCGv_i64 tcg_addr, int size)
+ TCGv_i64 tcg_addr, MemOp mop)
{
- TCGMemOp memop = s->be_data + size;
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
- read_vec_element(s, tcg_tmp, srcidx, element, size);
- tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
-
- tcg_temp_free_i64(tcg_tmp);
+ read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
+ tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
}
/* Load from memory to vector register */
static void do_vec_ld(DisasContext *s, int destidx, int element,
- TCGv_i64 tcg_addr, int size)
+ TCGv_i64 tcg_addr, MemOp mop)
{
- TCGMemOp memop = s->be_data + size;
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
- write_vec_element(s, tcg_tmp, destidx, element, size);
-
- tcg_temp_free_i64(tcg_tmp);
+ tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
+ write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
}
/* Check that FP/Neon access is enabled. If it is, return
@@ -1231,31 +1209,109 @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
* unallocated-encoding checks (otherwise the syndrome information
* for the resulting exception will be incorrect).
*/
-static inline bool fp_access_check(DisasContext *s)
+static bool fp_access_check_only(DisasContext *s)
{
- assert(!s->fp_access_checked);
- s->fp_access_checked = true;
+ if (s->fp_excp_el) {
+ assert(!s->fp_access_checked);
+ s->fp_access_checked = true;
- if (!s->fp_excp_el) {
- return true;
+ gen_exception_insn_el(s, 0, EXCP_UDEF,
+ syn_fp_access_trap(1, 0xe, false, 0),
+ s->fp_excp_el);
+ return false;
}
+ s->fp_access_checked = true;
+ return true;
+}
- gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
- s->fp_excp_el);
- return false;
+static bool fp_access_check(DisasContext *s)
+{
+ if (!fp_access_check_only(s)) {
+ return false;
+ }
+ if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
+ gen_exception_insn(s, 0, EXCP_UDEF,
+ syn_smetrap(SME_ET_Streaming, false));
+ return false;
+ }
+ return true;
}
-/* Check that SVE access is enabled. If it is, return true.
+/*
+ * Check that SVE access is enabled. If it is, return true.
* If not, emit code to generate an appropriate exception and return false.
+ * This function corresponds to CheckSVEEnabled().
*/
bool sve_access_check(DisasContext *s)
{
- if (s->sve_excp_el) {
- gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
- s->sve_excp_el);
- return false;
+ if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
+ assert(dc_isar_feature(aa64_sme, s));
+ if (!sme_sm_enabled_check(s)) {
+ goto fail_exit;
+ }
+ } else if (s->sve_excp_el) {
+ gen_exception_insn_el(s, 0, EXCP_UDEF,
+ syn_sve_access_trap(), s->sve_excp_el);
+ goto fail_exit;
}
+ s->sve_access_checked = true;
return fp_access_check(s);
+
+ fail_exit:
+ /* Assert that we only raise one exception per instruction. */
+ assert(!s->sve_access_checked);
+ s->sve_access_checked = true;
+ return false;
+}
+
+/*
+ * Check that SME access is enabled, raise an exception if not.
+ * Note that this function corresponds to CheckSMEAccess and is
+ * only used directly for cpregs.
+ */
+static bool sme_access_check(DisasContext *s)
+{
+ if (s->sme_excp_el) {
+ gen_exception_insn_el(s, 0, EXCP_UDEF,
+ syn_smetrap(SME_ET_AccessTrap, false),
+ s->sme_excp_el);
+ return false;
+ }
+ return true;
+}
+
+/* This function corresponds to CheckSMEEnabled. */
+bool sme_enabled_check(DisasContext *s)
+{
+ /*
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
+ * to be zero when fp_excp_el has priority. This is because we need
+ * sme_excp_el by itself for cpregs access checks.
+ */
+ if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
+ s->fp_access_checked = true;
+ return sme_access_check(s);
+ }
+ return fp_access_check_only(s);
+}
+
+/* Common subroutine for CheckSMEAnd*Enabled. */
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
+{
+ if (!sme_enabled_check(s)) {
+ return false;
+ }
+ if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
+ gen_exception_insn(s, 0, EXCP_UDEF,
+ syn_smetrap(SME_ET_NotStreaming, false));
+ return false;
+ }
+ if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
+ gen_exception_insn(s, 0, EXCP_UDEF,
+ syn_smetrap(SME_ET_InactiveZA, false));
+ return false;
+ }
+ return true;
}
/*
@@ -1269,41 +1325,8 @@ static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
int extsize = extract32(option, 0, 2);
bool is_signed = extract32(option, 2, 1);
- if (is_signed) {
- switch (extsize) {
- case 0:
- tcg_gen_ext8s_i64(tcg_out, tcg_in);
- break;
- case 1:
- tcg_gen_ext16s_i64(tcg_out, tcg_in);
- break;
- case 2:
- tcg_gen_ext32s_i64(tcg_out, tcg_in);
- break;
- case 3:
- tcg_gen_mov_i64(tcg_out, tcg_in);
- break;
- }
- } else {
- switch (extsize) {
- case 0:
- tcg_gen_ext8u_i64(tcg_out, tcg_in);
- break;
- case 1:
- tcg_gen_ext16u_i64(tcg_out, tcg_in);
- break;
- case 2:
- tcg_gen_ext32u_i64(tcg_out, tcg_in);
- break;
- case 3:
- tcg_gen_mov_i64(tcg_out, tcg_in);
- break;
- }
- }
-
- if (shift) {
- tcg_gen_shli_i64(tcg_out, tcg_out, shift);
- }
+ tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
+ tcg_gen_shli_i64(tcg_out, tcg_out, shift);
}
static inline void gen_check_sp_alignment(DisasContext *s)
@@ -1352,234 +1375,684 @@ static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
* match up with those in the manual.
*/
-/* Unconditional branch (immediate)
- * 31 30 26 25 0
- * +----+-----------+-------------------------------------+
- * | op | 0 0 1 0 1 | imm26 |
- * +----+-----------+-------------------------------------+
- */
-static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
+static bool trans_B(DisasContext *s, arg_i *a)
{
- uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
+ reset_btype(s);
+ gen_goto_tb(s, 0, a->imm);
+ return true;
+}
- if (insn & (1U << 31)) {
- /* BL Branch with link */
- tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
- }
+static bool trans_BL(DisasContext *s, arg_i *a)
+{
+ gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
+ reset_btype(s);
+ gen_goto_tb(s, 0, a->imm);
+ return true;
+}
+
+
+static bool trans_CBZ(DisasContext *s, arg_cbz *a)
+{
+ DisasLabel match;
+ TCGv_i64 tcg_cmp;
- /* B Branch / BL Branch with link */
- gen_goto_tb(s, 0, addr);
+ tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
+ reset_btype(s);
+
+ match = gen_disas_label(s);
+ tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
+ tcg_cmp, 0, match.label);
+ gen_goto_tb(s, 0, 4);
+ set_disas_label(s, match);
+ gen_goto_tb(s, 1, a->imm);
+ return true;
}
-/* Compare and branch (immediate)
- * 31 30 25 24 23 5 4 0
- * +----+-------------+----+---------------------+--------+
- * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
- * +----+-------------+----+---------------------+--------+
- */
-static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
+static bool trans_TBZ(DisasContext *s, arg_tbz *a)
{
- unsigned int sf, op, rt;
- uint64_t addr;
- TCGLabel *label_match;
+ DisasLabel match;
TCGv_i64 tcg_cmp;
- sf = extract32(insn, 31, 1);
- op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
- rt = extract32(insn, 0, 5);
- addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
+ tcg_cmp = tcg_temp_new_i64();
+ tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
- tcg_cmp = read_cpu_reg(s, rt, sf);
- label_match = gen_new_label();
+ reset_btype(s);
- tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
- tcg_cmp, 0, label_match);
+ match = gen_disas_label(s);
+ tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
+ tcg_cmp, 0, match.label);
+ gen_goto_tb(s, 0, 4);
+ set_disas_label(s, match);
+ gen_goto_tb(s, 1, a->imm);
+ return true;
+}
- gen_goto_tb(s, 0, s->pc);
- gen_set_label(label_match);
- gen_goto_tb(s, 1, addr);
+static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
+{
+ /* BC.cond is only present with FEAT_HBC */
+ if (a->c && !dc_isar_feature(aa64_hbc, s)) {
+ return false;
+ }
+ reset_btype(s);
+ if (a->cond < 0x0e) {
+ /* genuinely conditional branches */
+ DisasLabel match = gen_disas_label(s);
+ arm_gen_test_cc(a->cond, match.label);
+ gen_goto_tb(s, 0, 4);
+ set_disas_label(s, match);
+ gen_goto_tb(s, 1, a->imm);
+ } else {
+ /* 0xe and 0xf are both "always" conditions */
+ gen_goto_tb(s, 0, a->imm);
+ }
+ return true;
}
-/* Test and branch (immediate)
- * 31 30 25 24 23 19 18 5 4 0
- * +----+-------------+----+-------+-------------+------+
- * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
- * +----+-------------+----+-------+-------------+------+
- */
-static void disas_test_b_imm(DisasContext *s, uint32_t insn)
+static void set_btype_for_br(DisasContext *s, int rn)
{
- unsigned int bit_pos, op, rt;
- uint64_t addr;
- TCGLabel *label_match;
- TCGv_i64 tcg_cmp;
+ if (dc_isar_feature(aa64_bti, s)) {
+ /* BR to {x16,x17} or !guard -> 1, else 3. */
+ set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
+ }
+}
- bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
- op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
- addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
- rt = extract32(insn, 0, 5);
+static void set_btype_for_blr(DisasContext *s)
+{
+ if (dc_isar_feature(aa64_bti, s)) {
+ /* BLR sets BTYPE to 2, regardless of source guarded page. */
+ set_btype(s, 2);
+ }
+}
- tcg_cmp = tcg_temp_new_i64();
- tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
- label_match = gen_new_label();
- tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
- tcg_cmp, 0, label_match);
- tcg_temp_free_i64(tcg_cmp);
- gen_goto_tb(s, 0, s->pc);
- gen_set_label(label_match);
- gen_goto_tb(s, 1, addr);
-}
-
-/* Conditional branch (immediate)
- * 31 25 24 23 5 4 3 0
- * +---------------+----+---------------------+----+------+
- * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
- * +---------------+----+---------------------+----+------+
- */
-static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
+static bool trans_BR(DisasContext *s, arg_r *a)
{
- unsigned int cond;
- uint64_t addr;
+ gen_a64_set_pc(s, cpu_reg(s, a->rn));
+ set_btype_for_br(s, a->rn);
+ s->base.is_jmp = DISAS_JUMP;
+ return true;
+}
- if ((insn & (1 << 4)) || (insn & (1 << 24))) {
- unallocated_encoding(s);
- return;
+static bool trans_BLR(DisasContext *s, arg_r *a)
+{
+ TCGv_i64 dst = cpu_reg(s, a->rn);
+ TCGv_i64 lr = cpu_reg(s, 30);
+ if (dst == lr) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_mov_i64(tmp, dst);
+ dst = tmp;
}
- addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
- cond = extract32(insn, 0, 4);
+ gen_pc_plus_diff(s, lr, curr_insn_len(s));
+ gen_a64_set_pc(s, dst);
+ set_btype_for_blr(s);
+ s->base.is_jmp = DISAS_JUMP;
+ return true;
+}
- if (cond < 0x0e) {
- /* genuinely conditional branches */
- TCGLabel *label_match = gen_new_label();
- arm_gen_test_cc(cond, label_match);
- gen_goto_tb(s, 0, s->pc);
- gen_set_label(label_match);
- gen_goto_tb(s, 1, addr);
+static bool trans_RET(DisasContext *s, arg_r *a)
+{
+ gen_a64_set_pc(s, cpu_reg(s, a->rn));
+ s->base.is_jmp = DISAS_JUMP;
+ return true;
+}
+
+static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
+ TCGv_i64 modifier, bool use_key_a)
+{
+ TCGv_i64 truedst;
+ /*
+ * Return the branch target for a BRAA/RETA/etc, which is either
+ * just the destination dst, or that value with the pauth check
+ * done and the code removed from the high bits.
+ */
+ if (!s->pauth_active) {
+ return dst;
+ }
+
+ truedst = tcg_temp_new_i64();
+ if (use_key_a) {
+ gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
} else {
- /* 0xe and 0xf are both "always" conditions */
- gen_goto_tb(s, 0, addr);
+ gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
}
+ return truedst;
}
-/* HINT instruction group, including various allocated HINTs */
-static void handle_hint(DisasContext *s, uint32_t insn,
- unsigned int op1, unsigned int op2, unsigned int crm)
+static bool trans_BRAZ(DisasContext *s, arg_braz *a)
{
- unsigned int selector = crm << 3 | op2;
+ TCGv_i64 dst;
- if (op1 != 3) {
- unallocated_encoding(s);
- return;
+ if (!dc_isar_feature(aa64_pauth, s)) {
+ return false;
}
- switch (selector) {
- case 0: /* NOP */
- return;
- case 3: /* WFI */
- s->base.is_jmp = DISAS_WFI;
- return;
- /* When running in MTTCG we don't generate jumps to the yield and
- * WFE helpers as it won't affect the scheduling of other vCPUs.
- * If we wanted to more completely model WFE/SEV so we don't busy
- * spin unnecessarily we would need to do something more involved.
+ dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
+ gen_a64_set_pc(s, dst);
+ set_btype_for_br(s, a->rn);
+ s->base.is_jmp = DISAS_JUMP;
+ return true;
+}
+
+static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
+{
+ TCGv_i64 dst, lr;
+
+ if (!dc_isar_feature(aa64_pauth, s)) {
+ return false;
+ }
+
+ dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
+ lr = cpu_reg(s, 30);
+ if (dst == lr) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_mov_i64(tmp, dst);
+ dst = tmp;
+ }
+ gen_pc_plus_diff(s, lr, curr_insn_len(s));
+ gen_a64_set_pc(s, dst);
+ set_btype_for_blr(s);
+ s->base.is_jmp = DISAS_JUMP;
+ return true;
+}
+
+static bool trans_RETA(DisasContext *s, arg_reta *a)
+{
+ TCGv_i64 dst;
+
+ dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
+ gen_a64_set_pc(s, dst);
+ s->base.is_jmp = DISAS_JUMP;
+ return true;
+}
+
+static bool trans_BRA(DisasContext *s, arg_bra *a)
+{
+ TCGv_i64 dst;
+
+ if (!dc_isar_feature(aa64_pauth, s)) {
+ return false;
+ }
+ dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
+ gen_a64_set_pc(s, dst);
+ set_btype_for_br(s, a->rn);
+ s->base.is_jmp = DISAS_JUMP;
+ return true;
+}
+
+static bool trans_BLRA(DisasContext *s, arg_bra *a)
+{
+ TCGv_i64 dst, lr;
+
+ if (!dc_isar_feature(aa64_pauth, s)) {
+ return false;
+ }
+ dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
+ lr = cpu_reg(s, 30);
+ if (dst == lr) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_mov_i64(tmp, dst);
+ dst = tmp;
+ }
+ gen_pc_plus_diff(s, lr, curr_insn_len(s));
+ gen_a64_set_pc(s, dst);
+ set_btype_for_blr(s);
+ s->base.is_jmp = DISAS_JUMP;
+ return true;
+}
+
+static bool trans_ERET(DisasContext *s, arg_ERET *a)
+{
+ TCGv_i64 dst;
+
+ if (s->current_el == 0) {
+ return false;
+ }
+ if (s->trap_eret) {
+ gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
+ return true;
+ }
+ dst = tcg_temp_new_i64();
+ tcg_gen_ld_i64(dst, tcg_env,
+ offsetof(CPUARMState, elr_el[s->current_el]));
+
+ translator_io_start(&s->base);
+
+ gen_helper_exception_return(tcg_env, dst);
+ /* Must exit loop to check un-masked IRQs */
+ s->base.is_jmp = DISAS_EXIT;
+ return true;
+}
+
+static bool trans_ERETA(DisasContext *s, arg_reta *a)
+{
+ TCGv_i64 dst;
+
+ if (!dc_isar_feature(aa64_pauth, s)) {
+ return false;
+ }
+ if (s->current_el == 0) {
+ return false;
+ }
+ /* The FGT trap takes precedence over an auth trap. */
+ if (s->trap_eret) {
+ gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
+ return true;
+ }
+ dst = tcg_temp_new_i64();
+ tcg_gen_ld_i64(dst, tcg_env,
+ offsetof(CPUARMState, elr_el[s->current_el]));
+
+ dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
+
+ translator_io_start(&s->base);
+
+ gen_helper_exception_return(tcg_env, dst);
+ /* Must exit loop to check un-masked IRQs */
+ s->base.is_jmp = DISAS_EXIT;
+ return true;
+}
+
+static bool trans_NOP(DisasContext *s, arg_NOP *a)
+{
+ return true;
+}
+
+static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
+{
+ /*
+ * When running in MTTCG we don't generate jumps to the yield and
+ * WFE helpers as it won't affect the scheduling of other vCPUs.
+ * If we wanted to more completely model WFE/SEV so we don't busy
+ * spin unnecessarily we would need to do something more involved.
+ */
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+ s->base.is_jmp = DISAS_YIELD;
+ }
+ return true;
+}
+
+static bool trans_WFI(DisasContext *s, arg_WFI *a)
+{
+ s->base.is_jmp = DISAS_WFI;
+ return true;
+}
+
+static bool trans_WFE(DisasContext *s, arg_WFI *a)
+{
+ /*
+ * When running in MTTCG we don't generate jumps to the yield and
+ * WFE helpers as it won't affect the scheduling of other vCPUs.
+ * If we wanted to more completely model WFE/SEV so we don't busy
+ * spin unnecessarily we would need to do something more involved.
+ */
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+ s->base.is_jmp = DISAS_WFE;
+ }
+ return true;
+}
+
+static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
+{
+ if (s->pauth_active) {
+ gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
+ }
+ return true;
+}
+
+static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
+{
+ if (s->pauth_active) {
+ gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
+ }
+ return true;
+}
+
+static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
+{
+ if (s->pauth_active) {
+ gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
+ }
+ return true;
+}
+
+static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
+{
+ if (s->pauth_active) {
+ gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
+ }
+ return true;
+}
+
+static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
+{
+ if (s->pauth_active) {
+ gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
+ }
+ return true;
+}
+
+static bool trans_ESB(DisasContext *s, arg_ESB *a)
+{
+ /* Without RAS, we must implement this as NOP. */
+ if (dc_isar_feature(aa64_ras, s)) {
+ /*
+ * QEMU does not have a source of physical SErrors,
+ * so we are only concerned with virtual SErrors.
+ * The pseudocode in the ARM for this case is
+ * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
+ * AArch64.vESBOperation();
+ * Most of the condition can be evaluated at translation time.
+ * Test for EL2 present, and defer test for SEL2 to runtime.
*/
- case 1: /* YIELD */
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
- s->base.is_jmp = DISAS_YIELD;
+ if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
+ gen_helper_vesb(tcg_env);
}
- return;
- case 2: /* WFE */
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
- s->base.is_jmp = DISAS_WFE;
- }
- return;
- case 4: /* SEV */
- case 5: /* SEVL */
- /* we treat all as NOP at least for now */
- return;
- default:
- /* default specified as NOP equivalent */
- return;
}
+ return true;
+}
+
+static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
+{
+ if (s->pauth_active) {
+ gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
+ }
+ return true;
+}
+
+static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
+{
+ if (s->pauth_active) {
+ gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
+ }
+ return true;
+}
+
+static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
+{
+ if (s->pauth_active) {
+ gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
+ }
+ return true;
+}
+
+static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
+{
+ if (s->pauth_active) {
+ gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
+ }
+ return true;
+}
+
+static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
+{
+ if (s->pauth_active) {
+ gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
+ }
+ return true;
+}
+
+static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
+{
+ if (s->pauth_active) {
+ gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
+ }
+ return true;
+}
+
+static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
+{
+ if (s->pauth_active) {
+ gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
+ }
+ return true;
+}
+
+static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
+{
+ if (s->pauth_active) {
+ gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
+ }
+ return true;
}
-static void gen_clrex(DisasContext *s, uint32_t insn)
+static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
{
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+ return true;
}
-/* CLREX, DSB, DMB, ISB */
-static void handle_sync(DisasContext *s, uint32_t insn,
- unsigned int op1, unsigned int op2, unsigned int crm)
+static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
{
+ /* We handle DSB and DMB the same way */
TCGBar bar;
- if (op1 != 3) {
- unallocated_encoding(s);
- return;
+ switch (a->types) {
+ case 1: /* MBReqTypes_Reads */
+ bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
+ break;
+ case 2: /* MBReqTypes_Writes */
+ bar = TCG_BAR_SC | TCG_MO_ST_ST;
+ break;
+ default: /* MBReqTypes_All */
+ bar = TCG_BAR_SC | TCG_MO_ALL;
+ break;
}
+ tcg_gen_mb(bar);
+ return true;
+}
- switch (op2) {
- case 2: /* CLREX */
- gen_clrex(s, insn);
- return;
- case 4: /* DSB */
- case 5: /* DMB */
- switch (crm & 3) {
- case 1: /* MBReqTypes_Reads */
- bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
- break;
- case 2: /* MBReqTypes_Writes */
- bar = TCG_BAR_SC | TCG_MO_ST_ST;
- break;
- default: /* MBReqTypes_All */
- bar = TCG_BAR_SC | TCG_MO_ALL;
- break;
- }
- tcg_gen_mb(bar);
- return;
- case 6: /* ISB */
- /* We need to break the TB after this insn to execute
- * a self-modified code correctly and also to take
- * any pending interrupts immediately.
- */
- gen_goto_tb(s, 0, s->pc);
- return;
- default:
- unallocated_encoding(s);
- return;
+static bool trans_ISB(DisasContext *s, arg_ISB *a)
+{
+ /*
+ * We need to break the TB after this insn to execute
+ * self-modifying code correctly and also to take
+ * any pending interrupts immediately.
+ */
+ reset_btype(s);
+ gen_goto_tb(s, 0, 4);
+ return true;
+}
+
+static bool trans_SB(DisasContext *s, arg_SB *a)
+{
+ if (!dc_isar_feature(aa64_sb, s)) {
+ return false;
}
+ /*
+ * TODO: There is no speculation barrier opcode for TCG;
+ * MB and end the TB instead.
+ */
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+ gen_goto_tb(s, 0, 4);
+ return true;
}
-/* MSR (immediate) - move immediate to processor state field */
-static void handle_msr_i(DisasContext *s, uint32_t insn,
- unsigned int op1, unsigned int op2, unsigned int crm)
+static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
{
- int op = op1 << 3 | op2;
- switch (op) {
- case 0x05: /* SPSel */
- if (s->current_el == 0) {
- unallocated_encoding(s);
- return;
+ if (!dc_isar_feature(aa64_condm_4, s)) {
+ return false;
+ }
+ tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
+ return true;
+}
+
+static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
+{
+ TCGv_i32 z;
+
+ if (!dc_isar_feature(aa64_condm_5, s)) {
+ return false;
+ }
+
+ z = tcg_temp_new_i32();
+
+ tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
+
+ /*
+ * (!C & !Z) << 31
+ * (!(C | Z)) << 31
+ * ~((C | Z) << 31)
+ * ~-(C | Z)
+ * (C | Z) - 1
+ */
+ tcg_gen_or_i32(cpu_NF, cpu_CF, z);
+ tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
+
+ /* !(Z & C) */
+ tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
+ tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
+
+ /* (!C & Z) << 31 -> -(Z & ~C) */
+ tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
+ tcg_gen_neg_i32(cpu_VF, cpu_VF);
+
+ /* C | Z */
+ tcg_gen_or_i32(cpu_CF, cpu_CF, z);
+
+ return true;
+}
+
+static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
+{
+ if (!dc_isar_feature(aa64_condm_5, s)) {
+ return false;
+ }
+
+ tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
+ tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
+
+ /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
+ tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
+
+ tcg_gen_movi_i32(cpu_NF, 0);
+ tcg_gen_movi_i32(cpu_VF, 0);
+
+ return true;
+}
+
+static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
+{
+ if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
+ return false;
+ }
+ if (a->imm & 1) {
+ set_pstate_bits(PSTATE_UAO);
+ } else {
+ clear_pstate_bits(PSTATE_UAO);
+ }
+ gen_rebuild_hflags(s);
+ s->base.is_jmp = DISAS_TOO_MANY;
+ return true;
+}
+
+static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
+{
+ if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
+ return false;
+ }
+ if (a->imm & 1) {
+ set_pstate_bits(PSTATE_PAN);
+ } else {
+ clear_pstate_bits(PSTATE_PAN);
+ }
+ gen_rebuild_hflags(s);
+ s->base.is_jmp = DISAS_TOO_MANY;
+ return true;
+}
+
+static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
+{
+ if (s->current_el == 0) {
+ return false;
+ }
+ gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
+ s->base.is_jmp = DISAS_TOO_MANY;
+ return true;
+}
+
+static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
+{
+ if (!dc_isar_feature(aa64_ssbs, s)) {
+ return false;
+ }
+ if (a->imm & 1) {
+ set_pstate_bits(PSTATE_SSBS);
+ } else {
+ clear_pstate_bits(PSTATE_SSBS);
+ }
+ /* Don't need to rebuild hflags since SSBS is a nop */
+ s->base.is_jmp = DISAS_TOO_MANY;
+ return true;
+}
+
+static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
+{
+ if (!dc_isar_feature(aa64_dit, s)) {
+ return false;
+ }
+ if (a->imm & 1) {
+ set_pstate_bits(PSTATE_DIT);
+ } else {
+ clear_pstate_bits(PSTATE_DIT);
+ }
+ /* There's no need to rebuild hflags because DIT is a nop */
+ s->base.is_jmp = DISAS_TOO_MANY;
+ return true;
+}
+
+static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
+{
+ if (dc_isar_feature(aa64_mte, s)) {
+ /* Full MTE is enabled -- set the TCO bit as directed. */
+ if (a->imm & 1) {
+ set_pstate_bits(PSTATE_TCO);
+ } else {
+ clear_pstate_bits(PSTATE_TCO);
}
- /* fall through */
- case 0x1e: /* DAIFSet */
- case 0x1f: /* DAIFClear */
- {
- TCGv_i32 tcg_imm = tcg_const_i32(crm);
- TCGv_i32 tcg_op = tcg_const_i32(op);
- gen_a64_set_pc_im(s->pc - 4);
- gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
- tcg_temp_free_i32(tcg_imm);
- tcg_temp_free_i32(tcg_op);
- /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */
- gen_a64_set_pc_im(s->pc);
- s->base.is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
- break;
+ gen_rebuild_hflags(s);
+ /* Many factors, including TCO, go into MTE_ACTIVE. */
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ return true;
+ } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
+ /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
+ return true;
+ } else {
+ /* Insn not present */
+ return false;
}
- default:
- unallocated_encoding(s);
- return;
+}
+
+static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
+{
+ gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
+ s->base.is_jmp = DISAS_TOO_MANY;
+ return true;
+}
+
+static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
+{
+ gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
+ /* Exit the cpu loop to re-evaluate pending IRQs. */
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
+ return true;
+}
+
+static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
+{
+ if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
+ return false;
}
+ if (sme_access_check(s)) {
+ int old = s->pstate_sm | (s->pstate_za << 1);
+ int new = a->imm * 3;
+
+ if ((old ^ new) & a->mask) {
+ /* At least one bit changes. */
+ gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
+ tcg_constant_i32(a->mask));
+ s->base.is_jmp = DISAS_TOO_MANY;
+ }
+ }
+ return true;
}
static void gen_get_nzcv(TCGv_i64 tcg_rt)
@@ -1599,13 +2072,9 @@ static void gen_get_nzcv(TCGv_i64 tcg_rt)
tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
/* generate result */
tcg_gen_extu_i32_i64(tcg_rt, nzcv);
-
- tcg_temp_free_i32(nzcv);
- tcg_temp_free_i32(tmp);
}
static void gen_set_nzcv(TCGv_i64 tcg_rt)
-
{
TCGv_i32 nzcv = tcg_temp_new_i32();
@@ -1623,7 +2092,29 @@ static void gen_set_nzcv(TCGv_i64 tcg_rt)
/* bit 28, V */
tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
- tcg_temp_free_i32(nzcv);
+}
+
+static void gen_sysreg_undef(DisasContext *s, bool isread,
+ uint8_t op0, uint8_t op1, uint8_t op2,
+ uint8_t crn, uint8_t crm, uint8_t rt)
+{
+ /*
+ * Generate code to emit an UNDEF with correct syndrome
+ * information for a failed system register access.
+ * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
+ * but if FEAT_IDST is implemented then read accesses to registers
+ * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
+ * syndrome.
+ */
+ uint32_t syndrome;
+
+ if (isread && dc_isar_feature(aa64_ids, s) &&
+ arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
+ syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
+ } else {
+ syndrome = syn_uncategorized();
+ }
+ gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
}
/* MRS - move from system register
@@ -1633,16 +2124,38 @@ static void gen_set_nzcv(TCGv_i64 tcg_rt)
* These are all essentially the same insn in 'read' and 'write'
* versions, with varying op0 fields.
*/
-static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
+static void handle_sys(DisasContext *s, bool isread,
unsigned int op0, unsigned int op1, unsigned int op2,
unsigned int crn, unsigned int crm, unsigned int rt)
{
- const ARMCPRegInfo *ri;
+ uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
+ crn, crm, op0, op1, op2);
+ const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
+ bool need_exit_tb = false;
+ bool nv_trap_to_el2 = false;
+ bool nv_redirect_reg = false;
+ bool skip_fp_access_checks = false;
+ bool nv2_mem_redirect = false;
+ TCGv_ptr tcg_ri = NULL;
TCGv_i64 tcg_rt;
+ uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
- ri = get_arm_cp_reginfo(s->cp_regs,
- ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
- crn, crm, op0, op1, op2));
+ if (crn == 11 || crn == 15) {
+ /*
+ * Check for TIDCP trap, which must take precedence over
+ * the UNDEF for "no such register" etc.
+ */
+ switch (s->current_el) {
+ case 0:
+ if (dc_isar_feature(aa64_tidcp1, s)) {
+ gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
+ }
+ break;
+ case 1:
+ gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
+ break;
+ }
+ }
if (!ri) {
/* Unknown register; this might be a guest error or a QEMU
@@ -1651,37 +2164,159 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
"system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
isread ? "read" : "write", op0, op1, crn, crm, op2);
- unallocated_encoding(s);
+ gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
return;
}
+ if (s->nv2 && ri->nv2_redirect_offset) {
+ /*
+ * Some registers always redirect to memory; some only do so if
+ * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
+ * pairs which share an offset; see the table in R_CSRPQ).
+ */
+ if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
+ nv2_mem_redirect = s->nv1;
+ } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
+ nv2_mem_redirect = !s->nv1;
+ } else {
+ nv2_mem_redirect = true;
+ }
+ }
+
/* Check access permissions */
if (!cp_access_ok(s->current_el, ri, isread)) {
- unallocated_encoding(s);
- return;
+ /*
+ * FEAT_NV/NV2 handling does not do the usual FP access checks
+ * for registers only accessible at EL2 (though it *does* do them
+ * for registers accessible at EL1).
+ */
+ skip_fp_access_checks = true;
+ if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
+ /*
+ * This is one of the few EL2 registers which should redirect
+ * to the equivalent EL1 register. We do that after running
+ * the EL2 register's accessfn.
+ */
+ nv_redirect_reg = true;
+ assert(!nv2_mem_redirect);
+ } else if (nv2_mem_redirect) {
+ /*
+ * NV2 redirect-to-memory takes precedence over trap to EL2 or
+ * UNDEF to EL1.
+ */
+ } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
+ /*
+ * This register / instruction exists and is an EL2 register, so
+ * we must trap to EL2 if accessed in nested virtualization EL1
+ * instead of UNDEFing. We'll do that after the usual access checks.
+ * (This makes a difference only for a couple of registers like
+ * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
+ * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
+ * an accessfn which does nothing when called from EL1, because
+ * the trap-to-EL3 controls which would apply to that register
+ * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
+ */
+ nv_trap_to_el2 = true;
+ } else {
+ gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
+ return;
+ }
}
- if (ri->accessfn) {
+ if (ri->accessfn || (ri->fgt && s->fgt_active)) {
/* Emit code to perform further access permissions checks at
* runtime; this may result in an exception.
*/
- TCGv_ptr tmpptr;
- TCGv_i32 tcg_syn, tcg_isread;
- uint32_t syndrome;
+ gen_a64_update_pc(s, 0);
+ tcg_ri = tcg_temp_new_ptr();
+ gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
+ tcg_constant_i32(key),
+ tcg_constant_i32(syndrome),
+ tcg_constant_i32(isread));
+ } else if (ri->type & ARM_CP_RAISES_EXC) {
+ /*
+ * The readfn or writefn might raise an exception;
+ * synchronize the CPU state in case it does.
+ */
+ gen_a64_update_pc(s, 0);
+ }
- gen_a64_set_pc_im(s->pc - 4);
- tmpptr = tcg_const_ptr(ri);
- syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
- tcg_syn = tcg_const_i32(syndrome);
- tcg_isread = tcg_const_i32(isread);
- gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
- tcg_temp_free_ptr(tmpptr);
- tcg_temp_free_i32(tcg_syn);
- tcg_temp_free_i32(tcg_isread);
+ if (!skip_fp_access_checks) {
+ if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
+ return;
+ } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
+ return;
+ } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
+ return;
+ }
+ }
+
+ if (nv_trap_to_el2) {
+ gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
+ return;
+ }
+
+ if (nv_redirect_reg) {
+ /*
+ * FEAT_NV2 redirection of an EL2 register to an EL1 register.
+ * Conveniently in all cases the encoding of the EL1 register is
+ * identical to the EL2 register except that opc1 is 0.
+ * Get the reginfo for the EL1 register to use for the actual access.
+ * We don't use the EL1 register's access function, and
+ * fine-grained-traps on EL1 also do not apply here.
+ */
+ key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
+ crn, crm, op0, 0, op2);
+ ri = get_arm_cp_reginfo(s->cp_regs, key);
+ assert(ri);
+ assert(cp_access_ok(s->current_el, ri, isread));
+ /*
+ * We might not have done an update_pc earlier, so check we don't
+ * need it. We could support this in future if necessary.
+ */
+ assert(!(ri->type & ARM_CP_RAISES_EXC));
+ }
+
+ if (nv2_mem_redirect) {
+ /*
+ * This system register is being redirected into an EL2 memory access.
+ * This means it is not an IO operation, doesn't change hflags,
+ * and need not end the TB, because it has no side effects.
+ *
+ * The access is 64-bit single copy atomic, guaranteed aligned because
+ * of the definition of VCNR_EL2. Its endianness depends on
+ * SCTLR_EL2.EE, not on the data endianness of EL1.
+ * It is done under either the EL2 translation regime or the EL2&0
+ * translation regime, depending on HCR_EL2.E2H. It behaves as if
+ * PSTATE.PAN is 0.
+ */
+ TCGv_i64 ptr = tcg_temp_new_i64();
+ MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
+ ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
+ int memidx = arm_to_core_mmu_idx(armmemidx);
+ uint32_t syn;
+
+ mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
+
+ tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
+ tcg_gen_addi_i64(ptr, ptr,
+ (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
+ tcg_rt = cpu_reg(s, rt);
+
+ syn = syn_data_abort_vncr(0, !isread, 0);
+ disas_set_insn_syndrome(s, syn);
+ if (isread) {
+ tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
+ } else {
+ tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
+ }
+ return;
}
/* Handle special cases first */
- switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
+ switch (ri->type & ARM_CP_SPECIAL_MASK) {
+ case 0:
+ break;
case ARM_CP_NOP:
return;
case ARM_CP_NZCV:
@@ -1693,28 +2328,78 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
}
return;
case ARM_CP_CURRENTEL:
- /* Reads as current EL value from pstate, which is
+ {
+ /*
+ * Reads as current EL value from pstate, which is
* guaranteed to be constant by the tb flags.
+ * For nested virt we should report EL2.
*/
+ int el = s->nv ? 2 : s->current_el;
tcg_rt = cpu_reg(s, rt);
- tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
+ tcg_gen_movi_i64(tcg_rt, el << 2);
return;
+ }
case ARM_CP_DC_ZVA:
/* Writes clear the aligned block of memory which rt points into. */
- tcg_rt = cpu_reg(s, rt);
- gen_helper_dc_zva(cpu_env, tcg_rt);
+ if (s->mte_active[0]) {
+ int desc = 0;
+
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+
+ tcg_rt = tcg_temp_new_i64();
+ gen_helper_mte_check_zva(tcg_rt, tcg_env,
+ tcg_constant_i32(desc), cpu_reg(s, rt));
+ } else {
+ tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
+ }
+ gen_helper_dc_zva(tcg_env, tcg_rt);
return;
- default:
- break;
- }
- if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
+ case ARM_CP_DC_GVA:
+ {
+ TCGv_i64 clean_addr, tag;
+
+ /*
+ * DC_GVA, like DC_ZVA, requires that we supply the original
+ * pointer for an invalid page. Probe that address first.
+ */
+ tcg_rt = cpu_reg(s, rt);
+ clean_addr = clean_data_tbi(s, tcg_rt);
+ gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
+
+ if (s->ata[0]) {
+ /* Extract the tag from the register to match STZGM. */
+ tag = tcg_temp_new_i64();
+ tcg_gen_shri_i64(tag, tcg_rt, 56);
+ gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
+ }
+ }
return;
- } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
+ case ARM_CP_DC_GZVA:
+ {
+ TCGv_i64 clean_addr, tag;
+
+ /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
+ tcg_rt = cpu_reg(s, rt);
+ clean_addr = clean_data_tbi(s, tcg_rt);
+ gen_helper_dc_zva(tcg_env, clean_addr);
+
+ if (s->ata[0]) {
+ /* Extract the tag from the register to match STZGM. */
+ tag = tcg_temp_new_i64();
+ tcg_gen_shri_i64(tag, tcg_rt, 56);
+ gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
+ }
+ }
return;
+ default:
+ g_assert_not_reached();
}
- if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
- gen_io_start();
+ if (ri->type & ARM_CP_IO) {
+ /* I/O operations must end the TB here (whether read or write) */
+ need_exit_tb = translator_io_start(&s->base);
}
tcg_rt = cpu_reg(s, rt);
@@ -1723,279 +2408,124 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
if (ri->type & ARM_CP_CONST) {
tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
} else if (ri->readfn) {
- TCGv_ptr tmpptr;
- tmpptr = tcg_const_ptr(ri);
- gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
- tcg_temp_free_ptr(tmpptr);
+ if (!tcg_ri) {
+ tcg_ri = gen_lookup_cp_reg(key);
+ }
+ gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
} else {
- tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
+ tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
}
} else {
if (ri->type & ARM_CP_CONST) {
/* If not forbidden by access permissions, treat as WI */
return;
} else if (ri->writefn) {
- TCGv_ptr tmpptr;
- tmpptr = tcg_const_ptr(ri);
- gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
- tcg_temp_free_ptr(tmpptr);
+ if (!tcg_ri) {
+ tcg_ri = gen_lookup_cp_reg(key);
+ }
+ gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
} else {
- tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
+ tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
}
}
- if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
- /* I/O operations must end the TB here (whether read or write) */
- gen_io_end();
- s->base.is_jmp = DISAS_UPDATE;
- } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
- /* We default to ending the TB on a coprocessor register write,
+ if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
+ /*
+ * A write to any coprocessor register that ends a TB
+ * must rebuild the hflags for the next TB.
+ */
+ gen_rebuild_hflags(s);
+ /*
+ * We default to ending the TB on a coprocessor register write,
* but allow this to be suppressed by the register definition
* (usually only necessary to work around guest bugs).
*/
- s->base.is_jmp = DISAS_UPDATE;
+ need_exit_tb = true;
}
-}
-
-/* System
- * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
- * +---------------------+---+-----+-----+-------+-------+-----+------+
- * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
- * +---------------------+---+-----+-----+-------+-------+-----+------+
- */
-static void disas_system(DisasContext *s, uint32_t insn)
-{
- unsigned int l, op0, op1, crn, crm, op2, rt;
- l = extract32(insn, 21, 1);
- op0 = extract32(insn, 19, 2);
- op1 = extract32(insn, 16, 3);
- crn = extract32(insn, 12, 4);
- crm = extract32(insn, 8, 4);
- op2 = extract32(insn, 5, 3);
- rt = extract32(insn, 0, 5);
-
- if (op0 == 0) {
- if (l || rt != 31) {
- unallocated_encoding(s);
- return;
- }
- switch (crn) {
- case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
- handle_hint(s, insn, op1, op2, crm);
- break;
- case 3: /* CLREX, DSB, DMB, ISB */
- handle_sync(s, insn, op1, op2, crm);
- break;
- case 4: /* MSR (immediate) */
- handle_msr_i(s, insn, op1, op2, crm);
- break;
- default:
- unallocated_encoding(s);
- break;
- }
- return;
+ if (need_exit_tb) {
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
}
- handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
}
-/* Exception generation
- *
- * 31 24 23 21 20 5 4 2 1 0
- * +-----------------+-----+------------------------+-----+----+
- * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
- * +-----------------------+------------------------+----------+
- */
-static void disas_exc(DisasContext *s, uint32_t insn)
+static bool trans_SYS(DisasContext *s, arg_SYS *a)
{
- int opc = extract32(insn, 21, 3);
- int op2_ll = extract32(insn, 0, 5);
- int imm16 = extract32(insn, 5, 16);
- TCGv_i32 tmp;
+ handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
+ return true;
+}
- switch (opc) {
- case 0:
- /* For SVC, HVC and SMC we advance the single-step state
- * machine before taking the exception. This is architecturally
- * mandated, to ensure that single-stepping a system call
- * instruction works properly.
- */
- switch (op2_ll) {
- case 1: /* SVC */
- gen_ss_advance(s);
- gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
- default_exception_el(s));
- break;
- case 2: /* HVC */
- if (s->current_el == 0) {
- unallocated_encoding(s);
- break;
- }
- /* The pre HVC helper handles cases when HVC gets trapped
- * as an undefined insn by runtime configuration.
- */
- gen_a64_set_pc_im(s->pc - 4);
- gen_helper_pre_hvc(cpu_env);
- gen_ss_advance(s);
- gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
- break;
- case 3: /* SMC */
- if (s->current_el == 0) {
- unallocated_encoding(s);
- break;
- }
- gen_a64_set_pc_im(s->pc - 4);
- tmp = tcg_const_i32(syn_aa64_smc(imm16));
- gen_helper_pre_smc(cpu_env, tmp);
- tcg_temp_free_i32(tmp);
- gen_ss_advance(s);
- gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
- break;
- default:
- unallocated_encoding(s);
- break;
- }
- break;
- case 1:
- if (op2_ll != 0) {
- unallocated_encoding(s);
- break;
- }
- /* BRK */
- gen_exception_bkpt_insn(s, 4, syn_aa64_bkpt(imm16));
- break;
- case 2:
- if (op2_ll != 0) {
- unallocated_encoding(s);
- break;
- }
- /* HLT. This has two purposes.
- * Architecturally, it is an external halting debug instruction.
- * Since QEMU doesn't implement external debug, we treat this as
- * it is required for halting debug disabled: it will UNDEF.
- * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
- */
- if (semihosting_enabled() && imm16 == 0xf000) {
-#ifndef CONFIG_USER_ONLY
- /* In system mode, don't allow userspace access to semihosting,
- * to provide some semblance of security (and for consistency
- * with our 32-bit semihosting).
- */
- if (s->current_el == 0) {
- unsupported_encoding(s, insn);
- break;
- }
-#endif
- gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
- } else {
- unsupported_encoding(s, insn);
- }
- break;
- case 5:
- if (op2_ll < 1 || op2_ll > 3) {
- unallocated_encoding(s);
- break;
- }
- /* DCPS1, DCPS2, DCPS3 */
- unsupported_encoding(s, insn);
- break;
- default:
- unallocated_encoding(s);
- break;
+static bool trans_SVC(DisasContext *s, arg_i *a)
+{
+ /*
+ * For SVC, HVC and SMC we advance the single-step state
+ * machine before taking the exception. This is architecturally
+ * mandated, to ensure that single-stepping a system call
+ * instruction works properly.
+ */
+ uint32_t syndrome = syn_aa64_svc(a->imm);
+ if (s->fgt_svc) {
+ gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
+ return true;
}
+ gen_ss_advance(s);
+ gen_exception_insn(s, 4, EXCP_SWI, syndrome);
+ return true;
}
-/* Unconditional branch (register)
- * 31 25 24 21 20 16 15 10 9 5 4 0
- * +---------------+-------+-------+-------+------+-------+
- * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
- * +---------------+-------+-------+-------+------+-------+
- */
-static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
+static bool trans_HVC(DisasContext *s, arg_i *a)
{
- unsigned int opc, op2, op3, rn, op4;
+ int target_el = s->current_el == 3 ? 3 : 2;
- opc = extract32(insn, 21, 4);
- op2 = extract32(insn, 16, 5);
- op3 = extract32(insn, 10, 6);
- rn = extract32(insn, 5, 5);
- op4 = extract32(insn, 0, 5);
-
- if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
+ if (s->current_el == 0) {
unallocated_encoding(s);
- return;
+ return true;
}
+ /*
+ * The pre HVC helper handles cases when HVC gets trapped
+ * as an undefined insn by runtime configuration.
+ */
+ gen_a64_update_pc(s, 0);
+ gen_helper_pre_hvc(tcg_env);
+ /* Architecture requires ss advance before we do the actual work */
+ gen_ss_advance(s);
+ gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
+ return true;
+}
- switch (opc) {
- case 0: /* BR */
- case 1: /* BLR */
- case 2: /* RET */
- gen_a64_set_pc(s, cpu_reg(s, rn));
- /* BLR also needs to load return address */
- if (opc == 1) {
- tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
- }
- break;
- case 4: /* ERET */
- if (s->current_el == 0) {
- unallocated_encoding(s);
- return;
- }
- if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
- gen_io_start();
- }
- gen_helper_exception_return(cpu_env);
- if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
- gen_io_end();
- }
- /* Must exit loop to check un-masked IRQs */
- s->base.is_jmp = DISAS_EXIT;
- return;
- case 5: /* DRPS */
- if (rn != 0x1f) {
- unallocated_encoding(s);
- } else {
- unsupported_encoding(s, insn);
- }
- return;
- default:
+static bool trans_SMC(DisasContext *s, arg_i *a)
+{
+ if (s->current_el == 0) {
unallocated_encoding(s);
- return;
+ return true;
}
+ gen_a64_update_pc(s, 0);
+ gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
+ /* Architecture requires ss advance before we do the actual work */
+ gen_ss_advance(s);
+ gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
+ return true;
+}
- s->base.is_jmp = DISAS_JUMP;
+static bool trans_BRK(DisasContext *s, arg_i *a)
+{
+ gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
+ return true;
}
-/* Branches, exception generating and system instructions */
-static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
+static bool trans_HLT(DisasContext *s, arg_i *a)
{
- switch (extract32(insn, 25, 7)) {
- case 0x0a: case 0x0b:
- case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
- disas_uncond_b_imm(s, insn);
- break;
- case 0x1a: case 0x5a: /* Compare & branch (immediate) */
- disas_comp_b_imm(s, insn);
- break;
- case 0x1b: case 0x5b: /* Test & branch (immediate) */
- disas_test_b_imm(s, insn);
- break;
- case 0x2a: /* Conditional branch (immediate) */
- disas_cond_b_imm(s, insn);
- break;
- case 0x6a: /* Exception generation / System */
- if (insn & (1 << 24)) {
- disas_system(s, insn);
- } else {
- disas_exc(s, insn);
- }
- break;
- case 0x6b: /* Unconditional branch (register) */
- disas_uncond_b_reg(s, insn);
- break;
- default:
+ /*
+ * HLT. This has two purposes.
+ * Architecturally, it is an external halting debug instruction.
+ * Since QEMU doesn't implement external debug, we treat this as
+ * it is required for halting debug disabled: it will UNDEF.
+ * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
+ */
+ if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
+ gen_exception_internal_insn(s, EXCP_SEMIHOST);
+ } else {
unallocated_encoding(s);
- break;
}
+ return true;
}
/*
@@ -2009,19 +2539,22 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
* races in multi-threaded linux-user and when MTTCG softmmu is
* enabled.
*/
-static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
- TCGv_i64 addr, int size, bool is_pair)
+static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
+ int size, bool is_pair)
{
int idx = get_mem_index(s);
- TCGMemOp memop = s->be_data;
+ TCGv_i64 dirty_addr, clean_addr;
+ MemOp memop = check_atomic_align(s, rn, size + is_pair);
+
+ s->is_ldex = true;
+ dirty_addr = cpu_reg_sp(s, rn);
+ clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
g_assert(size <= 3);
if (is_pair) {
g_assert(size >= 2);
if (size == 2) {
- /* The pair must be single-copy atomic for the doubleword. */
- memop |= MO_64 | MO_ALIGN;
- tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
+ tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
if (s->be_data == MO_LE) {
tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
@@ -2030,30 +2563,29 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
}
} else {
- /* The pair must be single-copy atomic for *each* doubleword, not
- the entire quadword, however it must be quadword aligned. */
- memop |= MO_64;
- tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
- memop | MO_ALIGN_16);
+ TCGv_i128 t16 = tcg_temp_new_i128();
- TCGv_i64 addr2 = tcg_temp_new_i64();
- tcg_gen_addi_i64(addr2, addr, 8);
- tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
- tcg_temp_free_i64(addr2);
+ tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr_i128_i64(cpu_exclusive_val,
+ cpu_exclusive_high, t16);
+ } else {
+ tcg_gen_extr_i128_i64(cpu_exclusive_high,
+ cpu_exclusive_val, t16);
+ }
tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
}
} else {
- memop |= size | MO_ALIGN;
- tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
+ tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
}
- tcg_gen_mov_i64(cpu_exclusive_addr, addr);
+ tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
}
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
- TCGv_i64 addr, int size, int is_pair)
+ int rn, int size, int is_pair)
{
/* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
* && (!is_pair || env->exclusive_high == [addr + datasize])) {
@@ -2069,9 +2601,46 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
*/
TCGLabel *fail_label = gen_new_label();
TCGLabel *done_label = gen_new_label();
- TCGv_i64 tmp;
+ TCGv_i64 tmp, clean_addr;
+ MemOp memop;
+
+ /*
+ * FIXME: We are out of spec here. We have recorded only the address
+ * from load_exclusive, not the entire range, and we assume that the
+ * size of the access on both sides match. The architecture allows the
+ * store to be smaller than the load, so long as the stored bytes are
+ * within the range recorded by the load.
+ */
- tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
+ /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
+ clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
+
+ /*
+ * The write, and any associated faults, only happen if the virtual
+ * and physical addresses pass the exclusive monitor check. These
+ * faults are exceedingly unlikely, because normally the guest uses
+ * the exact same address register for the load_exclusive, and we
+ * would have recognized these faults there.
+ *
+ * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
+ * unaligned 4-byte write within the range of an aligned 8-byte load.
+ * With LSE2, the store would need to cross a 16-byte boundary when the
+ * load did not, which would mean the store is outside the range
+ * recorded for the monitor, which would have failed a corrected monitor
+ * check above. For now, we assume no size change and retain the
+ * MO_ALIGN to let tcg know what we checked in the load_exclusive.
+ *
+ * It is possible to trigger an MTE fault, by performing the load with
+ * a virtual address with a valid tag and performing the store with the
+ * same virtual address and a different invalid tag.
+ */
+ memop = size + is_pair;
+ if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
+ memop |= MO_ALIGN;
+ }
+ memop = finalize_memop(s, memop);
+ gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
tmp = tcg_temp_new_i64();
if (is_pair) {
@@ -2083,38 +2652,46 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
}
tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
cpu_exclusive_val, tmp,
- get_mem_index(s),
- MO_64 | MO_ALIGN | s->be_data);
+ get_mem_index(s), memop);
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
- } else if (s->be_data == MO_LE) {
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
- gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
- cpu_exclusive_addr,
- cpu_reg(s, rt),
- cpu_reg(s, rt2));
+ } else {
+ TCGv_i128 t16 = tcg_temp_new_i128();
+ TCGv_i128 c16 = tcg_temp_new_i128();
+ TCGv_i64 a, b;
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
+ tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
+ cpu_exclusive_high);
} else {
- gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
- cpu_reg(s, rt), cpu_reg(s, rt2));
+ tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
+ tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
+ cpu_exclusive_val);
}
- } else {
- if (tb_cflags(s->base.tb) & CF_PARALLEL) {
- gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
- cpu_exclusive_addr,
- cpu_reg(s, rt),
- cpu_reg(s, rt2));
+
+ tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
+ get_mem_index(s), memop);
+
+ a = tcg_temp_new_i64();
+ b = tcg_temp_new_i64();
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr_i128_i64(a, b, t16);
} else {
- gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
- cpu_reg(s, rt), cpu_reg(s, rt2));
+ tcg_gen_extr_i128_i64(b, a, t16);
}
+
+ tcg_gen_xor_i64(a, a, cpu_exclusive_val);
+ tcg_gen_xor_i64(b, b, cpu_exclusive_high);
+ tcg_gen_or_i64(tmp, a, b);
+
+ tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
}
} else {
tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
- cpu_reg(s, rt), get_mem_index(s),
- size | MO_ALIGN | s->be_data);
+ cpu_reg(s, rt), get_mem_index(s), memop);
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
}
tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
- tcg_temp_free_i64(tmp);
tcg_gen_br(done_label);
gen_set_label(fail_label);
@@ -2129,13 +2706,16 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
TCGv_i64 tcg_rs = cpu_reg(s, rs);
TCGv_i64 tcg_rt = cpu_reg(s, rt);
int memidx = get_mem_index(s);
- TCGv_i64 addr = cpu_reg_sp(s, rn);
+ TCGv_i64 clean_addr;
+ MemOp memop;
if (rn == 31) {
gen_check_sp_alignment(s);
}
- tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
- size | MO_ALIGN | s->be_data);
+ memop = check_atomic_align(s, rn, size);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
+ tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
+ memidx, memop);
}
static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
@@ -2145,13 +2725,18 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
TCGv_i64 s2 = cpu_reg(s, rs + 1);
TCGv_i64 t1 = cpu_reg(s, rt);
TCGv_i64 t2 = cpu_reg(s, rt + 1);
- TCGv_i64 addr = cpu_reg_sp(s, rn);
+ TCGv_i64 clean_addr;
int memidx = get_mem_index(s);
+ MemOp memop;
if (rn == 31) {
gen_check_sp_alignment(s);
}
+ /* This is a single atomic access, despite the "pair". */
+ memop = check_atomic_align(s, rn, size + 1);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
+
if (size == 2) {
TCGv_i64 cmp = tcg_temp_new_i64();
TCGv_i64 val = tcg_temp_new_i64();
@@ -2164,1236 +2749,1503 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
tcg_gen_concat32_i64(cmp, s2, s1);
}
- tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
- MO_64 | MO_ALIGN | s->be_data);
- tcg_temp_free_i64(val);
+ tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
if (s->be_data == MO_LE) {
tcg_gen_extr32_i64(s1, s2, cmp);
} else {
tcg_gen_extr32_i64(s2, s1, cmp);
}
- tcg_temp_free_i64(cmp);
- } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
- TCGv_i32 tcg_rs = tcg_const_i32(rs);
+ } else {
+ TCGv_i128 cmp = tcg_temp_new_i128();
+ TCGv_i128 val = tcg_temp_new_i128();
if (s->be_data == MO_LE) {
- gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+ tcg_gen_concat_i64_i128(val, t1, t2);
+ tcg_gen_concat_i64_i128(cmp, s1, s2);
} else {
- gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+ tcg_gen_concat_i64_i128(val, t2, t1);
+ tcg_gen_concat_i64_i128(cmp, s2, s1);
}
- tcg_temp_free_i32(tcg_rs);
- } else {
- TCGv_i64 d1 = tcg_temp_new_i64();
- TCGv_i64 d2 = tcg_temp_new_i64();
- TCGv_i64 a2 = tcg_temp_new_i64();
- TCGv_i64 c1 = tcg_temp_new_i64();
- TCGv_i64 c2 = tcg_temp_new_i64();
- TCGv_i64 zero = tcg_const_i64(0);
-
- /* Load the two words, in memory order. */
- tcg_gen_qemu_ld_i64(d1, addr, memidx,
- MO_64 | MO_ALIGN_16 | s->be_data);
- tcg_gen_addi_i64(a2, addr, 8);
- tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
-
- /* Compare the two words, also in memory order. */
- tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
- tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
- tcg_gen_and_i64(c2, c2, c1);
-
- /* If compare equal, write back new data, else write back old data. */
- tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
- tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
- tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
- tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
- tcg_temp_free_i64(a2);
- tcg_temp_free_i64(c1);
- tcg_temp_free_i64(c2);
- tcg_temp_free_i64(zero);
-
- /* Write back the data from memory to Rs. */
- tcg_gen_mov_i64(s1, d1);
- tcg_gen_mov_i64(s2, d2);
- tcg_temp_free_i64(d1);
- tcg_temp_free_i64(d2);
- }
-}
-
-/* Update the Sixty-Four bit (SF) registersize. This logic is derived
+
+ tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr_i128_i64(s1, s2, cmp);
+ } else {
+ tcg_gen_extr_i128_i64(s2, s1, cmp);
+ }
+ }
+}
+
+/*
+ * Compute the ISS.SF bit for syndrome information if an exception
+ * is taken on a load or store. This indicates whether the instruction
+ * is accessing a 32-bit or 64-bit register. This logic is derived
* from the ARMv8 specs for LDR (Shared decode for all encodings).
*/
-static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
+static bool ldst_iss_sf(int size, bool sign, bool ext)
{
- int opc0 = extract32(opc, 0, 1);
- int regsize;
- if (is_signed) {
- regsize = opc0 ? 32 : 64;
+ if (sign) {
+ /*
+ * Signed loads are 64 bit results if we are not going to
+ * do a zero-extend from 32 to 64 after the load.
+ * (For a store, sign and ext are always false.)
+ */
+ return !ext;
} else {
- regsize = size == 3 ? 64 : 32;
+ /* Unsigned loads/stores work at the specified size */
+ return size == MO_64;
}
- return regsize == 64;
}
-/* Load/store exclusive
- *
- * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
- * +-----+-------------+----+---+----+------+----+-------+------+------+
- * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
- * +-----+-------------+----+---+----+------+----+-------+------+------+
- *
- * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
- * L: 0 -> store, 1 -> load
- * o2: 0 -> exclusive, 1 -> not
- * o1: 0 -> single register, 1 -> register pair
- * o0: 1 -> load-acquire/store-release, 0 -> not
- */
-static void disas_ldst_excl(DisasContext *s, uint32_t insn)
+static bool trans_STXR(DisasContext *s, arg_stxr *a)
{
- int rt = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int rt2 = extract32(insn, 10, 5);
- int rs = extract32(insn, 16, 5);
- int is_lasr = extract32(insn, 15, 1);
- int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
- int size = extract32(insn, 30, 2);
- TCGv_i64 tcg_addr;
-
- switch (o2_L_o1_o0) {
- case 0x0: /* STXR */
- case 0x1: /* STLXR */
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- }
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
- gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
- return;
-
- case 0x4: /* LDXR */
- case 0x5: /* LDAXR */
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
- s->is_ldex = true;
- gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
- }
- return;
-
- case 0x9: /* STLR */
- /* Generate ISS for non-exclusive accesses including LASR. */
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ if (a->lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
- do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
- disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
- return;
+ }
+ gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
+ return true;
+}
- case 0xd: /* LDAR */
- /* Generate ISS for non-exclusive accesses including LASR. */
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
- do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
- disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
+static bool trans_LDXR(DisasContext *s, arg_stxr *a)
+{
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
+ if (a->lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
- return;
-
- case 0x2: case 0x3: /* CASP / STXP */
- if (size & 2) { /* STXP / STLXP */
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- }
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
- gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
- return;
- }
- if (rt2 == 31
- && ((rt | rs) & 1) == 0
- && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
- /* CASP / CASPL */
- gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
- return;
- }
- break;
+ }
+ return true;
+}
- case 0x6: case 0x7: /* CASPA / LDXP */
- if (size & 2) { /* LDXP / LDAXP */
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
- s->is_ldex = true;
- gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
- }
- return;
- }
- if (rt2 == 31
- && ((rt | rs) & 1) == 0
- && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
- /* CASPA / CASPAL */
- gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
- return;
- }
- break;
+static bool trans_STLR(DisasContext *s, arg_stlr *a)
+{
+ TCGv_i64 clean_addr;
+ MemOp memop;
+ bool iss_sf = ldst_iss_sf(a->sz, false, false);
- case 0xa: /* CAS */
- case 0xb: /* CASL */
- case 0xe: /* CASA */
- case 0xf: /* CASAL */
- if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
- gen_compare_and_swap(s, rs, rt, rn, size);
- return;
- }
- break;
+ /*
+ * StoreLORelease is the same as Store-Release for QEMU, but
+ * needs the feature-test.
+ */
+ if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
+ return false;
}
- unallocated_encoding(s);
+ /* Generate ISS for non-exclusive accesses including LASR. */
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ memop = check_ordered_align(s, a->rn, 0, true, a->sz);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
+ true, a->rn != 31, memop);
+ do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
+ iss_sf, a->lasr);
+ return true;
}
-/*
- * Load register (literal)
- *
- * 31 30 29 27 26 25 24 23 5 4 0
- * +-----+-------+---+-----+-------------------+-------+
- * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
- * +-----+-------+---+-----+-------------------+-------+
- *
- * V: 1 -> vector (simd/fp)
- * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
- * 10-> 32 bit signed, 11 -> prefetch
- * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
- */
-static void disas_ld_lit(DisasContext *s, uint32_t insn)
+static bool trans_LDAR(DisasContext *s, arg_stlr *a)
{
- int rt = extract32(insn, 0, 5);
- int64_t imm = sextract32(insn, 5, 19) << 2;
- bool is_vector = extract32(insn, 26, 1);
- int opc = extract32(insn, 30, 2);
- bool is_signed = false;
- int size = 2;
- TCGv_i64 tcg_rt, tcg_addr;
+ TCGv_i64 clean_addr;
+ MemOp memop;
+ bool iss_sf = ldst_iss_sf(a->sz, false, false);
- if (is_vector) {
- if (opc == 3) {
- unallocated_encoding(s);
- return;
- }
- size = 2 + opc;
- if (!fp_access_check(s)) {
- return;
- }
- } else {
- if (opc == 3) {
- /* PRFM (literal) : prefetch */
- return;
- }
- size = 2 + extract32(opc, 0, 1);
- is_signed = extract32(opc, 1, 1);
+ /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
+ if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
+ return false;
}
+ /* Generate ISS for non-exclusive accesses including LASR. */
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ memop = check_ordered_align(s, a->rn, 0, false, a->sz);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
+ false, a->rn != 31, memop);
+ do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
+ a->rt, iss_sf, a->lasr);
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ return true;
+}
- tcg_rt = cpu_reg(s, rt);
+static bool trans_STXP(DisasContext *s, arg_stxr *a)
+{
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ if (a->lasr) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
+ gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
+ return true;
+}
- tcg_addr = tcg_const_i64((s->pc - 4) + imm);
- if (is_vector) {
- do_fp_ld(s, rt, tcg_addr, size);
- } else {
- /* Only unsigned 32bit loads target 32bit registers. */
- bool iss_sf = opc != 0;
+static bool trans_LDXP(DisasContext *s, arg_stxr *a)
+{
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
+ if (a->lasr) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
+ return true;
+}
- do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
- true, rt, iss_sf, false);
+static bool trans_CASP(DisasContext *s, arg_CASP *a)
+{
+ if (!dc_isar_feature(aa64_atomics, s)) {
+ return false;
+ }
+ if (((a->rt | a->rs) & 1) != 0) {
+ return false;
}
- tcg_temp_free_i64(tcg_addr);
+
+ gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
+ return true;
}
-/*
- * LDNP (Load Pair - non-temporal hint)
- * LDP (Load Pair - non vector)
- * LDPSW (Load Pair Signed Word - non vector)
- * STNP (Store Pair - non-temporal hint)
- * STP (Store Pair - non vector)
- * LDNP (Load Pair of SIMD&FP - non-temporal hint)
- * LDP (Load Pair of SIMD&FP)
- * STNP (Store Pair of SIMD&FP - non-temporal hint)
- * STP (Store Pair of SIMD&FP)
- *
- * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
- * +-----+-------+---+---+-------+---+-----------------------------+
- * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
- * +-----+-------+---+---+-------+---+-------+-------+------+------+
- *
- * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
- * LDPSW 01
- * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
- * V: 0 -> GPR, 1 -> Vector
- * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
- * 10 -> signed offset, 11 -> pre-index
- * L: 0 -> Store 1 -> Load
- *
- * Rt, Rt2 = GPR or SIMD registers to be stored
- * Rn = general purpose register containing address
- * imm7 = signed offset (multiple of 4 or 8 depending on size)
- */
-static void disas_ldst_pair(DisasContext *s, uint32_t insn)
+static bool trans_CAS(DisasContext *s, arg_CAS *a)
{
- int rt = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int rt2 = extract32(insn, 10, 5);
- uint64_t offset = sextract64(insn, 15, 7);
- int index = extract32(insn, 23, 2);
- bool is_vector = extract32(insn, 26, 1);
- bool is_load = extract32(insn, 22, 1);
- int opc = extract32(insn, 30, 2);
+ if (!dc_isar_feature(aa64_atomics, s)) {
+ return false;
+ }
+ gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
+ return true;
+}
- bool is_signed = false;
- bool postindex = false;
- bool wback = false;
+static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
+{
+ bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
+ TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
+ TCGv_i64 clean_addr = tcg_temp_new_i64();
+ MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
- TCGv_i64 tcg_addr; /* calculated address */
- int size;
+ gen_pc_plus_diff(s, clean_addr, a->imm);
+ do_gpr_ld(s, tcg_rt, clean_addr, memop,
+ false, true, a->rt, iss_sf, false);
+ return true;
+}
- if (opc == 3) {
- unallocated_encoding(s);
- return;
- }
+static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
+{
+ /* Load register (literal), vector version */
+ TCGv_i64 clean_addr;
+ MemOp memop;
- if (is_vector) {
- size = 2 + opc;
- } else {
- size = 2 + extract32(opc, 1, 1);
- is_signed = extract32(opc, 0, 1);
- if (!is_load && is_signed) {
- unallocated_encoding(s);
- return;
- }
+ if (!fp_access_check(s)) {
+ return true;
}
+ memop = finalize_memop_asimd(s, a->sz);
+ clean_addr = tcg_temp_new_i64();
+ gen_pc_plus_diff(s, clean_addr, a->imm);
+ do_fp_ld(s, a->rt, clean_addr, memop);
+ return true;
+}
- switch (index) {
- case 1: /* post-index */
- postindex = true;
- wback = true;
- break;
- case 0:
- /* signed offset with "non-temporal" hint. Since we don't emulate
- * caches we don't care about hints to the cache system about
- * data access patterns, and handle this identically to plain
- * signed offset.
- */
- if (is_signed) {
- /* There is no non-temporal-hint version of LDPSW */
- unallocated_encoding(s);
- return;
- }
- postindex = false;
- break;
- case 2: /* signed offset, rn not updated */
- postindex = false;
- break;
- case 3: /* pre-index */
- postindex = false;
- wback = true;
- break;
+static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
+ TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
+ uint64_t offset, bool is_store, MemOp mop)
+{
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
}
- if (is_vector && !fp_access_check(s)) {
- return;
+ *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
+ if (!a->p) {
+ tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
}
- offset <<= size;
+ *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
+ (a->w || a->rn != 31), 2 << a->sz, mop);
+}
- if (rn == 31) {
- gen_check_sp_alignment(s);
+static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
+ TCGv_i64 dirty_addr, uint64_t offset)
+{
+ if (a->w) {
+ if (a->p) {
+ tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
+ }
+ tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
}
+}
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
+static bool trans_STP(DisasContext *s, arg_ldstpair *a)
+{
+ uint64_t offset = a->imm << a->sz;
+ TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
+ MemOp mop = finalize_memop(s, a->sz);
- if (!postindex) {
- tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
+ op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
+ tcg_rt = cpu_reg(s, a->rt);
+ tcg_rt2 = cpu_reg(s, a->rt2);
+ /*
+ * We built mop above for the single logical access -- rebuild it
+ * now for the paired operation.
+ *
+ * With LSE2, non-sign-extending pairs are treated atomically if
+ * aligned, and if unaligned one of the pair will be completely
+ * within a 16-byte block and that element will be atomic.
+ * Otherwise each element is separately atomic.
+ * In all cases, issue one operation with the correct atomicity.
+ */
+ mop = a->sz + 1;
+ if (s->align_mem) {
+ mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
}
+ mop = finalize_memop_pair(s, mop);
+ if (a->sz == 2) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
- if (is_vector) {
- if (is_load) {
- do_fp_ld(s, rt, tcg_addr, size);
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
} else {
- do_fp_st(s, rt, tcg_addr, size);
+ tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
}
- tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
- if (is_load) {
- do_fp_ld(s, rt2, tcg_addr, size);
+ tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
+ } else {
+ TCGv_i128 tmp = tcg_temp_new_i128();
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
} else {
- do_fp_st(s, rt2, tcg_addr, size);
+ tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
}
- } else {
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
- TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
+ tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
+ }
+ op_addr_ldstpair_post(s, a, dirty_addr, offset);
+ return true;
+}
- if (is_load) {
- TCGv_i64 tmp = tcg_temp_new_i64();
+static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
+{
+ uint64_t offset = a->imm << a->sz;
+ TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
+ MemOp mop = finalize_memop(s, a->sz);
- /* Do not modify tcg_rt before recognizing any exception
- * from the second load.
- */
- do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
- false, 0, false, false);
- tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
- do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
- false, 0, false, false);
-
- tcg_gen_mov_i64(tcg_rt, tmp);
- tcg_temp_free_i64(tmp);
+ op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
+ tcg_rt = cpu_reg(s, a->rt);
+ tcg_rt2 = cpu_reg(s, a->rt2);
+
+ /*
+ * We built mop above for the single logical access -- rebuild it
+ * now for the paired operation.
+ *
+ * With LSE2, non-sign-extending pairs are treated atomically if
+ * aligned, and if unaligned one of the pair will be completely
+ * within a 16-byte block and that element will be atomic.
+ * Otherwise each element is separately atomic.
+ * In all cases, issue one operation with the correct atomicity.
+ *
+ * This treats sign-extending loads like zero-extending loads,
+ * since that reuses the most code below.
+ */
+ mop = a->sz + 1;
+ if (s->align_mem) {
+ mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
+ }
+ mop = finalize_memop_pair(s, mop);
+ if (a->sz == 2) {
+ int o2 = s->be_data == MO_LE ? 32 : 0;
+ int o1 = o2 ^ 32;
+
+ tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
+ if (a->sign) {
+ tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
+ tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
} else {
- do_gpr_st(s, tcg_rt, tcg_addr, size,
- false, 0, false, false);
- tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
- do_gpr_st(s, tcg_rt2, tcg_addr, size,
- false, 0, false, false);
+ tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
+ tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
}
- }
+ } else {
+ TCGv_i128 tmp = tcg_temp_new_i128();
- if (wback) {
- if (postindex) {
- tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
+ tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
} else {
- tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
+ tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
}
- tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
}
+ op_addr_ldstpair_post(s, a, dirty_addr, offset);
+ return true;
}
-/*
- * Load/store (immediate post-indexed)
- * Load/store (immediate pre-indexed)
- * Load/store (unscaled immediate)
- *
- * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
- * +----+-------+---+-----+-----+---+--------+-----+------+------+
- * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
- * +----+-------+---+-----+-----+---+--------+-----+------+------+
- *
- * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
- 10 -> unprivileged
- * V = 0 -> non-vector
- * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
- * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
- */
-static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
- int opc,
- int size,
- int rt,
- bool is_vector)
+static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
{
- int rn = extract32(insn, 5, 5);
- int imm9 = sextract32(insn, 12, 9);
- int idx = extract32(insn, 10, 2);
- bool is_signed = false;
- bool is_store = false;
- bool is_extended = false;
- bool is_unpriv = (idx == 2);
- bool iss_valid = !is_vector;
- bool post_index;
- bool writeback;
-
- TCGv_i64 tcg_addr;
-
- if (is_vector) {
- size |= (opc & 2) << 1;
- if (size > 4 || is_unpriv) {
- unallocated_encoding(s);
- return;
- }
- is_store = ((opc & 1) == 0);
- if (!fp_access_check(s)) {
- return;
- }
- } else {
- if (size == 3 && opc == 2) {
- /* PRFM - prefetch */
- if (is_unpriv) {
- unallocated_encoding(s);
- return;
- }
- return;
- }
- if (opc == 3 && size > 1) {
- unallocated_encoding(s);
- return;
- }
- is_store = (opc == 0);
- is_signed = extract32(opc, 1, 1);
- is_extended = (size < 3) && extract32(opc, 0, 1);
+ uint64_t offset = a->imm << a->sz;
+ TCGv_i64 clean_addr, dirty_addr;
+ MemOp mop;
+
+ if (!fp_access_check(s)) {
+ return true;
}
- switch (idx) {
- case 0:
- case 2:
- post_index = false;
- writeback = false;
- break;
- case 1:
- post_index = true;
- writeback = true;
- break;
- case 3:
- post_index = false;
- writeback = true;
- break;
- default:
- g_assert_not_reached();
+ /* LSE2 does not merge FP pairs; leave these as separate operations. */
+ mop = finalize_memop_asimd(s, a->sz);
+ op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
+ do_fp_st(s, a->rt, clean_addr, mop);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
+ do_fp_st(s, a->rt2, clean_addr, mop);
+ op_addr_ldstpair_post(s, a, dirty_addr, offset);
+ return true;
+}
+
+static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
+{
+ uint64_t offset = a->imm << a->sz;
+ TCGv_i64 clean_addr, dirty_addr;
+ MemOp mop;
+
+ if (!fp_access_check(s)) {
+ return true;
}
- if (rn == 31) {
+ /* LSE2 does not merge FP pairs; leave these as separate operations. */
+ mop = finalize_memop_asimd(s, a->sz);
+ op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
+ do_fp_ld(s, a->rt, clean_addr, mop);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
+ do_fp_ld(s, a->rt2, clean_addr, mop);
+ op_addr_ldstpair_post(s, a, dirty_addr, offset);
+ return true;
+}
+
+static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
+{
+ TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
+ uint64_t offset = a->imm << LOG2_TAG_GRANULE;
+ MemOp mop;
+ TCGv_i128 tmp;
+
+ /* STGP only comes in one size. */
+ tcg_debug_assert(a->sz == MO_64);
+
+ if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
+ return false;
+ }
+
+ if (a->rn == 31) {
gen_check_sp_alignment(s);
}
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
- if (!post_index) {
- tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
+ dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
+ if (!a->p) {
+ tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
}
- if (is_vector) {
- if (is_store) {
- do_fp_st(s, rt, tcg_addr, size);
- } else {
- do_fp_ld(s, rt, tcg_addr, size);
- }
+ clean_addr = clean_data_tbi(s, dirty_addr);
+ tcg_rt = cpu_reg(s, a->rt);
+ tcg_rt2 = cpu_reg(s, a->rt2);
+
+ /*
+ * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
+ * and one tag operation. We implement it as one single aligned 16-byte
+ * memory operation for convenience. Note that the alignment ensures
+ * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
+ */
+ mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
+
+ tmp = tcg_temp_new_i128();
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
} else {
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
- int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
- bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
+ tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
+ }
+ tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
- if (is_store) {
- do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
- iss_valid, rt, iss_sf, false);
+ /* Perform the tag store, if tag access enabled. */
+ if (s->ata[0]) {
+ if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+ gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
} else {
- do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
- is_signed, is_extended, memidx,
- iss_valid, rt, iss_sf, false);
+ gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
}
}
- if (writeback) {
- TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
- if (post_index) {
- tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
+ op_addr_ldstpair_post(s, a, dirty_addr, offset);
+ return true;
+}
+
+static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
+ TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
+ uint64_t offset, bool is_store, MemOp mop)
+{
+ int memidx;
+
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
+ if (!a->p) {
+ tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
+ }
+ memidx = get_a64_user_mem_index(s, a->unpriv);
+ *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
+ a->w || a->rn != 31,
+ mop, a->unpriv, memidx);
+}
+
+static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
+ TCGv_i64 dirty_addr, uint64_t offset)
+{
+ if (a->w) {
+ if (a->p) {
+ tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
}
- tcg_gen_mov_i64(tcg_rn, tcg_addr);
+ tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
}
}
-/*
- * Load/store (register offset)
- *
- * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
- * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
- * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
- * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
- *
- * For non-vector:
- * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
- * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
- * For vector:
- * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
- * opc<0>: 0 -> store, 1 -> load
- * V: 1 -> vector/simd
- * opt: extend encoding (see DecodeRegExtend)
- * S: if S=1 then scale (essentially index by sizeof(size))
- * Rt: register to transfer into/out of
- * Rn: address register or SP for base
- * Rm: offset register or ZR for offset
- */
-static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
- int opc,
- int size,
- int rt,
- bool is_vector)
+static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
{
- int rn = extract32(insn, 5, 5);
- int shift = extract32(insn, 12, 1);
- int rm = extract32(insn, 16, 5);
- int opt = extract32(insn, 13, 3);
- bool is_signed = false;
- bool is_store = false;
- bool is_extended = false;
+ bool iss_sf, iss_valid = !a->w;
+ TCGv_i64 clean_addr, dirty_addr, tcg_rt;
+ int memidx = get_a64_user_mem_index(s, a->unpriv);
+ MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
- TCGv_i64 tcg_rm;
- TCGv_i64 tcg_addr;
+ op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
- if (extract32(opt, 1, 1) == 0) {
- unallocated_encoding(s);
- return;
+ tcg_rt = cpu_reg(s, a->rt);
+ iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
+
+ do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
+ iss_valid, a->rt, iss_sf, false);
+ op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
+ return true;
+}
+
+static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
+{
+ bool iss_sf, iss_valid = !a->w;
+ TCGv_i64 clean_addr, dirty_addr, tcg_rt;
+ int memidx = get_a64_user_mem_index(s, a->unpriv);
+ MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
+
+ op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
+
+ tcg_rt = cpu_reg(s, a->rt);
+ iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
+
+ do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
+ a->ext, memidx, iss_valid, a->rt, iss_sf, false);
+ op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
+ return true;
+}
+
+static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
+{
+ TCGv_i64 clean_addr, dirty_addr;
+ MemOp mop;
+
+ if (!fp_access_check(s)) {
+ return true;
}
+ mop = finalize_memop_asimd(s, a->sz);
+ op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
+ do_fp_st(s, a->rt, clean_addr, mop);
+ op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
+ return true;
+}
- if (is_vector) {
- size |= (opc & 2) << 1;
- if (size > 4) {
- unallocated_encoding(s);
- return;
- }
- is_store = !extract32(opc, 0, 1);
- if (!fp_access_check(s)) {
- return;
- }
- } else {
- if (size == 3 && opc == 2) {
- /* PRFM - prefetch */
- return;
- }
- if (opc == 3 && size > 1) {
- unallocated_encoding(s);
- return;
- }
- is_store = (opc == 0);
- is_signed = extract32(opc, 1, 1);
- is_extended = (size < 3) && extract32(opc, 0, 1);
+static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
+{
+ TCGv_i64 clean_addr, dirty_addr;
+ MemOp mop;
+
+ if (!fp_access_check(s)) {
+ return true;
}
+ mop = finalize_memop_asimd(s, a->sz);
+ op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
+ do_fp_ld(s, a->rt, clean_addr, mop);
+ op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
+ return true;
+}
- if (rn == 31) {
+static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
+ TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
+ bool is_store, MemOp memop)
+{
+ TCGv_i64 tcg_rm;
+
+ if (a->rn == 31) {
gen_check_sp_alignment(s);
}
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
- tcg_rm = read_cpu_reg(s, rm, 1);
- ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
+ tcg_rm = read_cpu_reg(s, a->rm, 1);
+ ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
- tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
+ tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
+ *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
+}
- if (is_vector) {
- if (is_store) {
- do_fp_st(s, rt, tcg_addr, size);
- } else {
- do_fp_ld(s, rt, tcg_addr, size);
- }
- } else {
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
- bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
- if (is_store) {
- do_gpr_st(s, tcg_rt, tcg_addr, size,
- true, rt, iss_sf, false);
- } else {
- do_gpr_ld(s, tcg_rt, tcg_addr, size,
- is_signed, is_extended,
- true, rt, iss_sf, false);
- }
+static bool trans_LDR(DisasContext *s, arg_ldst *a)
+{
+ TCGv_i64 clean_addr, dirty_addr, tcg_rt;
+ bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
+ MemOp memop;
+
+ if (extract32(a->opt, 1, 1) == 0) {
+ return false;
}
+
+ memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
+ op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
+ tcg_rt = cpu_reg(s, a->rt);
+ do_gpr_ld(s, tcg_rt, clean_addr, memop,
+ a->ext, true, a->rt, iss_sf, false);
+ return true;
}
-/*
- * Load/store (unsigned immediate)
- *
- * 31 30 29 27 26 25 24 23 22 21 10 9 5
- * +----+-------+---+-----+-----+------------+-------+------+
- * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
- * +----+-------+---+-----+-----+------------+-------+------+
- *
- * For non-vector:
- * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
- * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
- * For vector:
- * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
- * opc<0>: 0 -> store, 1 -> load
- * Rn: base address register (inc SP)
- * Rt: target register
- */
-static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
- int opc,
- int size,
- int rt,
- bool is_vector)
+static bool trans_STR(DisasContext *s, arg_ldst *a)
{
- int rn = extract32(insn, 5, 5);
- unsigned int imm12 = extract32(insn, 10, 12);
- unsigned int offset;
+ TCGv_i64 clean_addr, dirty_addr, tcg_rt;
+ bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
+ MemOp memop;
- TCGv_i64 tcg_addr;
+ if (extract32(a->opt, 1, 1) == 0) {
+ return false;
+ }
- bool is_store;
- bool is_signed = false;
- bool is_extended = false;
+ memop = finalize_memop(s, a->sz);
+ op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
+ tcg_rt = cpu_reg(s, a->rt);
+ do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
+ return true;
+}
- if (is_vector) {
- size |= (opc & 2) << 1;
- if (size > 4) {
- unallocated_encoding(s);
- return;
- }
- is_store = !extract32(opc, 0, 1);
- if (!fp_access_check(s)) {
- return;
- }
- } else {
- if (size == 3 && opc == 2) {
- /* PRFM - prefetch */
- return;
- }
- if (opc == 3 && size > 1) {
- unallocated_encoding(s);
- return;
- }
- is_store = (opc == 0);
- is_signed = extract32(opc, 1, 1);
- is_extended = (size < 3) && extract32(opc, 0, 1);
+static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
+{
+ TCGv_i64 clean_addr, dirty_addr;
+ MemOp memop;
+
+ if (extract32(a->opt, 1, 1) == 0) {
+ return false;
}
- if (rn == 31) {
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ memop = finalize_memop_asimd(s, a->sz);
+ op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
+ do_fp_ld(s, a->rt, clean_addr, memop);
+ return true;
+}
+
+static bool trans_STR_v(DisasContext *s, arg_ldst *a)
+{
+ TCGv_i64 clean_addr, dirty_addr;
+ MemOp memop;
+
+ if (extract32(a->opt, 1, 1) == 0) {
+ return false;
+ }
+
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ memop = finalize_memop_asimd(s, a->sz);
+ op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
+ do_fp_st(s, a->rt, clean_addr, memop);
+ return true;
+}
+
+
+static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
+ int sign, bool invert)
+{
+ MemOp mop = a->sz | sign;
+ TCGv_i64 clean_addr, tcg_rs, tcg_rt;
+
+ if (a->rn == 31) {
gen_check_sp_alignment(s);
}
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
- offset = imm12 << size;
- tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
+ mop = check_atomic_align(s, a->rn, mop);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
+ a->rn != 31, mop);
+ tcg_rs = read_cpu_reg(s, a->rs, true);
+ tcg_rt = cpu_reg(s, a->rt);
+ if (invert) {
+ tcg_gen_not_i64(tcg_rs, tcg_rs);
+ }
+ /*
+ * The tcg atomic primitives are all full barriers. Therefore we
+ * can ignore the Acquire and Release bits of this instruction.
+ */
+ fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
- if (is_vector) {
- if (is_store) {
- do_fp_st(s, rt, tcg_addr, size);
- } else {
- do_fp_ld(s, rt, tcg_addr, size);
- }
- } else {
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
- bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
- if (is_store) {
- do_gpr_st(s, tcg_rt, tcg_addr, size,
- true, rt, iss_sf, false);
- } else {
- do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
- true, rt, iss_sf, false);
+ if (mop & MO_SIGN) {
+ switch (a->sz) {
+ case MO_8:
+ tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
+ break;
+ case MO_16:
+ tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
+ break;
+ case MO_32:
+ tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
+ break;
+ case MO_64:
+ break;
+ default:
+ g_assert_not_reached();
}
}
+ return true;
}
-/* Atomic memory operations
- *
- * 31 30 27 26 24 22 21 16 15 12 10 5 0
- * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
- * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
- * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
- *
- * Rt: the result register
- * Rn: base address or SP
- * Rs: the source register for the operation
- * V: vector flag (always 0 as of v8.3)
- * A: acquire flag
- * R: release flag
- */
-static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
- int size, int rt, bool is_vector)
+TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
+TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
+TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
+TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
+TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
+TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
+TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
+TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
+TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
+
+static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
{
- int rs = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int o3_opc = extract32(insn, 12, 4);
- int feature = ARM_FEATURE_V8_ATOMICS;
- TCGv_i64 tcg_rn, tcg_rs;
- AtomicThreeOpFn *fn;
+ bool iss_sf = ldst_iss_sf(a->sz, false, false);
+ TCGv_i64 clean_addr;
+ MemOp mop;
- if (is_vector) {
- unallocated_encoding(s);
- return;
+ if (!dc_isar_feature(aa64_atomics, s) ||
+ !dc_isar_feature(aa64_rcpc_8_3, s)) {
+ return false;
}
- switch (o3_opc) {
- case 000: /* LDADD */
- fn = tcg_gen_atomic_fetch_add_i64;
- break;
- case 001: /* LDCLR */
- fn = tcg_gen_atomic_fetch_and_i64;
- break;
- case 002: /* LDEOR */
- fn = tcg_gen_atomic_fetch_xor_i64;
- break;
- case 003: /* LDSET */
- fn = tcg_gen_atomic_fetch_or_i64;
- break;
- case 004: /* LDSMAX */
- fn = tcg_gen_atomic_fetch_smax_i64;
- break;
- case 005: /* LDSMIN */
- fn = tcg_gen_atomic_fetch_smin_i64;
- break;
- case 006: /* LDUMAX */
- fn = tcg_gen_atomic_fetch_umax_i64;
- break;
- case 007: /* LDUMIN */
- fn = tcg_gen_atomic_fetch_umin_i64;
- break;
- case 010: /* SWP */
- fn = tcg_gen_atomic_xchg_i64;
- break;
- default:
- unallocated_encoding(s);
- return;
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
}
- if (!arm_dc_feature(s, feature)) {
- unallocated_encoding(s);
- return;
+ mop = check_atomic_align(s, a->rn, a->sz);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
+ a->rn != 31, mop);
+ /*
+ * LDAPR* are a special case because they are a simple load, not a
+ * fetch-and-do-something op.
+ * The architectural consistency requirements here are weaker than
+ * full load-acquire (we only need "load-acquire processor consistent"),
+ * but we choose to implement them as full LDAQ.
+ */
+ do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
+ true, a->rt, iss_sf, true);
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ return true;
+}
+
+static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
+{
+ TCGv_i64 clean_addr, dirty_addr, tcg_rt;
+ MemOp memop;
+
+ /* Load with pointer authentication */
+ if (!dc_isar_feature(aa64_pauth, s)) {
+ return false;
}
- if (rn == 31) {
+ if (a->rn == 31) {
gen_check_sp_alignment(s);
}
- tcg_rn = cpu_reg_sp(s, rn);
- tcg_rs = read_cpu_reg(s, rs, true);
+ dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
- if (o3_opc == 1) { /* LDCLR */
- tcg_gen_not_i64(tcg_rs, tcg_rs);
+ if (s->pauth_active) {
+ if (!a->m) {
+ gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
+ tcg_constant_i64(0));
+ } else {
+ gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
+ tcg_constant_i64(0));
+ }
}
- /* The tcg atomic primitives are all full barriers. Therefore we
- * can ignore the Acquire and Release bits of this instruction.
+ tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
+
+ memop = finalize_memop(s, MO_64);
+
+ /* Note that "clean" and "dirty" here refer to TBI not PAC. */
+ clean_addr = gen_mte_check1(s, dirty_addr, false,
+ a->w || a->rn != 31, memop);
+
+ tcg_rt = cpu_reg(s, a->rt);
+ do_gpr_ld(s, tcg_rt, clean_addr, memop,
+ /* extend */ false, /* iss_valid */ !a->w,
+ /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
+
+ if (a->w) {
+ tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
+ }
+ return true;
+}
+
+static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
+{
+ TCGv_i64 clean_addr, dirty_addr;
+ MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
+ bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
+
+ if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
+ return false;
+ }
+
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ mop = check_ordered_align(s, a->rn, a->imm, false, mop);
+ dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
+ tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
+ clean_addr = clean_data_tbi(s, dirty_addr);
+
+ /*
+ * Load-AcquirePC semantics; we implement as the slightly more
+ * restrictive Load-Acquire.
*/
- fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
- s->be_data | size | MO_ALIGN);
+ do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
+ a->rt, iss_sf, true);
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ return true;
}
-/* Load/store register (all forms) */
-static void disas_ldst_reg(DisasContext *s, uint32_t insn)
+static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
{
- int rt = extract32(insn, 0, 5);
- int opc = extract32(insn, 22, 2);
- bool is_vector = extract32(insn, 26, 1);
- int size = extract32(insn, 30, 2);
+ TCGv_i64 clean_addr, dirty_addr;
+ MemOp mop = a->sz;
+ bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
- switch (extract32(insn, 24, 2)) {
- case 0:
- if (extract32(insn, 21, 1) == 0) {
- /* Load/store register (unscaled immediate)
- * Load/store immediate pre/post-indexed
- * Load/store register unprivileged
- */
- disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
- return;
- }
- switch (extract32(insn, 10, 2)) {
- case 0:
- disas_ldst_atomic(s, insn, size, rt, is_vector);
- return;
- case 2:
- disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
- return;
- }
- break;
- case 1:
- disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
- return;
+ if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
+ return false;
+ }
+
+ /* TODO: ARMv8.4-LSE SCTLR.nAA */
+
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
}
- unallocated_encoding(s);
+
+ mop = check_ordered_align(s, a->rn, a->imm, true, mop);
+ dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
+ tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
+ clean_addr = clean_data_tbi(s, dirty_addr);
+
+ /* Store-Release semantics */
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
+ return true;
}
-/* AdvSIMD load/store multiple structures
- *
- * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
- * +---+---+---------------+---+-------------+--------+------+------+------+
- * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
- * +---+---+---------------+---+-------------+--------+------+------+------+
- *
- * AdvSIMD load/store multiple structures (post-indexed)
- *
- * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
- * +---+---+---------------+---+---+---------+--------+------+------+------+
- * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
- * +---+---+---------------+---+---+---------+--------+------+------+------+
- *
- * Rt: first (or only) SIMD&FP register to be transferred
- * Rn: base address or SP
- * Rm (post-index only): post-index register (when !31) or size dependent #imm
- */
-static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
+static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
{
- int rt = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int size = extract32(insn, 10, 2);
- int opcode = extract32(insn, 12, 4);
- bool is_store = !extract32(insn, 22, 1);
- bool is_postidx = extract32(insn, 23, 1);
- bool is_q = extract32(insn, 30, 1);
- TCGv_i64 tcg_addr, tcg_rn;
+ TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
+ MemOp endian, align, mop;
- int ebytes = 1 << size;
- int elements = (is_q ? 128 : 64) / (8 << size);
- int rpt; /* num iterations */
- int selem; /* structure elements */
+ int total; /* total bytes */
+ int elements; /* elements per vector */
int r;
+ int size = a->sz;
- if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
- unallocated_encoding(s);
- return;
+ if (!a->p && a->rm != 0) {
+ /* For non-postindexed accesses the Rm field must be 0 */
+ return false;
+ }
+ if (size == 3 && !a->q && a->selem != 1) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
}
- /* From the shared decode logic */
- switch (opcode) {
- case 0x0:
- rpt = 1;
- selem = 4;
- break;
- case 0x2:
- rpt = 4;
- selem = 1;
- break;
- case 0x4:
- rpt = 1;
- selem = 3;
- break;
- case 0x6:
- rpt = 3;
- selem = 1;
- break;
- case 0x7:
- rpt = 1;
- selem = 1;
- break;
- case 0x8:
- rpt = 1;
- selem = 2;
- break;
- case 0xa:
- rpt = 2;
- selem = 1;
- break;
- default:
- unallocated_encoding(s);
- return;
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
}
- if (size == 3 && !is_q && selem != 1) {
- /* reserved */
- unallocated_encoding(s);
- return;
+ /* For our purposes, bytes are always little-endian. */
+ endian = s->be_data;
+ if (size == 0) {
+ endian = MO_LE;
+ }
+
+ total = a->rpt * a->selem * (a->q ? 16 : 8);
+ tcg_rn = cpu_reg_sp(s, a->rn);
+
+ /*
+ * Issue the MTE check vs the logical repeat count, before we
+ * promote consecutive little-endian elements below.
+ */
+ clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
+ finalize_memop_asimd(s, size));
+
+ /*
+ * Consecutive little-endian elements from a single register
+ * can be promoted to a larger little-endian operation.
+ */
+ align = MO_ALIGN;
+ if (a->selem == 1 && endian == MO_LE) {
+ align = pow2_align(size);
+ size = 3;
+ }
+ if (!s->align_mem) {
+ align = 0;
+ }
+ mop = endian | size | align;
+
+ elements = (a->q ? 16 : 8) >> size;
+ tcg_ebytes = tcg_constant_i64(1 << size);
+ for (r = 0; r < a->rpt; r++) {
+ int e;
+ for (e = 0; e < elements; e++) {
+ int xs;
+ for (xs = 0; xs < a->selem; xs++) {
+ int tt = (a->rt + r + xs) % 32;
+ do_vec_ld(s, tt, e, clean_addr, mop);
+ tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
+ }
+ }
+ }
+
+ /*
+ * For non-quad operations, setting a slice of the low 64 bits of
+ * the register clears the high 64 bits (in the ARM ARM pseudocode
+ * this is implicit in the fact that 'rval' is a 64 bit wide
+ * variable). For quad operations, we might still need to zero
+ * the high bits of SVE.
+ */
+ for (r = 0; r < a->rpt * a->selem; r++) {
+ int tt = (a->rt + r) % 32;
+ clear_vec_high(s, a->q, tt);
+ }
+
+ if (a->p) {
+ if (a->rm == 31) {
+ tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
+ } else {
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
+ }
}
+ return true;
+}
+
+static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
+{
+ TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
+ MemOp endian, align, mop;
+ int total; /* total bytes */
+ int elements; /* elements per vector */
+ int r;
+ int size = a->sz;
+
+ if (!a->p && a->rm != 0) {
+ /* For non-postindexed accesses the Rm field must be 0 */
+ return false;
+ }
+ if (size == 3 && !a->q && a->selem != 1) {
+ return false;
+ }
if (!fp_access_check(s)) {
- return;
+ return true;
}
- if (rn == 31) {
+ if (a->rn == 31) {
gen_check_sp_alignment(s);
}
- tcg_rn = cpu_reg_sp(s, rn);
- tcg_addr = tcg_temp_new_i64();
- tcg_gen_mov_i64(tcg_addr, tcg_rn);
+ /* For our purposes, bytes are always little-endian. */
+ endian = s->be_data;
+ if (size == 0) {
+ endian = MO_LE;
+ }
+
+ total = a->rpt * a->selem * (a->q ? 16 : 8);
+ tcg_rn = cpu_reg_sp(s, a->rn);
- for (r = 0; r < rpt; r++) {
+ /*
+ * Issue the MTE check vs the logical repeat count, before we
+ * promote consecutive little-endian elements below.
+ */
+ clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
+ finalize_memop_asimd(s, size));
+
+ /*
+ * Consecutive little-endian elements from a single register
+ * can be promoted to a larger little-endian operation.
+ */
+ align = MO_ALIGN;
+ if (a->selem == 1 && endian == MO_LE) {
+ align = pow2_align(size);
+ size = 3;
+ }
+ if (!s->align_mem) {
+ align = 0;
+ }
+ mop = endian | size | align;
+
+ elements = (a->q ? 16 : 8) >> size;
+ tcg_ebytes = tcg_constant_i64(1 << size);
+ for (r = 0; r < a->rpt; r++) {
int e;
for (e = 0; e < elements; e++) {
- int tt = (rt + r) % 32;
int xs;
- for (xs = 0; xs < selem; xs++) {
- if (is_store) {
- do_vec_st(s, tt, e, tcg_addr, size);
- } else {
- do_vec_ld(s, tt, e, tcg_addr, size);
-
- /* For non-quad operations, setting a slice of the low
- * 64 bits of the register clears the high 64 bits (in
- * the ARM ARM pseudocode this is implicit in the fact
- * that 'rval' is a 64 bit wide variable).
- * For quad operations, we might still need to zero the
- * high bits of SVE. We optimize by noticing that we only
- * need to do this the first time we touch a register.
- */
- if (e == 0 && (r == 0 || xs == selem - 1)) {
- clear_vec_high(s, is_q, tt);
- }
- }
- tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
- tt = (tt + 1) % 32;
+ for (xs = 0; xs < a->selem; xs++) {
+ int tt = (a->rt + r + xs) % 32;
+ do_vec_st(s, tt, e, clean_addr, mop);
+ tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
}
}
}
- if (is_postidx) {
- int rm = extract32(insn, 16, 5);
- if (rm == 31) {
- tcg_gen_mov_i64(tcg_rn, tcg_addr);
+ if (a->p) {
+ if (a->rm == 31) {
+ tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
} else {
- tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
}
}
- tcg_temp_free_i64(tcg_addr);
+ return true;
}
-/* AdvSIMD load/store single structure
- *
- * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
- * +---+---+---------------+-----+-----------+-----+---+------+------+------+
- * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
- * +---+---+---------------+-----+-----------+-----+---+------+------+------+
- *
- * AdvSIMD load/store single structure (post-indexed)
- *
- * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
- * +---+---+---------------+-----+-----------+-----+---+------+------+------+
- * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
- * +---+---+---------------+-----+-----------+-----+---+------+------+------+
- *
- * Rt: first (or only) SIMD&FP register to be transferred
- * Rn: base address or SP
- * Rm (post-index only): post-index register (when !31) or size dependent #imm
- * index = encoded in Q:S:size dependent on size
- *
- * lane_size = encoded in R, opc
- * transfer width = encoded in opc, S, size
- */
-static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
+static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
{
- int rt = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int size = extract32(insn, 10, 2);
- int S = extract32(insn, 12, 1);
- int opc = extract32(insn, 13, 3);
- int R = extract32(insn, 21, 1);
- int is_load = extract32(insn, 22, 1);
- int is_postidx = extract32(insn, 23, 1);
- int is_q = extract32(insn, 30, 1);
+ int xs, total, rt;
+ TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
+ MemOp mop;
- int scale = extract32(opc, 1, 2);
- int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
- bool replicate = false;
- int index = is_q << 3 | S << 2 | size;
- int ebytes, xs;
- TCGv_i64 tcg_addr, tcg_rn;
+ if (!a->p && a->rm != 0) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
- switch (scale) {
- case 3:
- if (!is_load || S) {
- unallocated_encoding(s);
- return;
- }
- scale = size;
- replicate = true;
- break;
- case 0:
- break;
- case 1:
- if (extract32(size, 0, 1)) {
- unallocated_encoding(s);
- return;
- }
- index >>= 1;
- break;
- case 2:
- if (extract32(size, 1, 1)) {
- unallocated_encoding(s);
- return;
- }
- if (!extract32(size, 0, 1)) {
- index >>= 2;
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ total = a->selem << a->scale;
+ tcg_rn = cpu_reg_sp(s, a->rn);
+
+ mop = finalize_memop_asimd(s, a->scale);
+ clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
+ total, mop);
+
+ tcg_ebytes = tcg_constant_i64(1 << a->scale);
+ for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
+ do_vec_st(s, rt, a->index, clean_addr, mop);
+ tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
+ }
+
+ if (a->p) {
+ if (a->rm == 31) {
+ tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
} else {
- if (S) {
- unallocated_encoding(s);
- return;
- }
- index >>= 3;
- scale = 3;
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
}
- break;
- default:
- g_assert_not_reached();
}
+ return true;
+}
+static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
+{
+ int xs, total, rt;
+ TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
+ MemOp mop;
+
+ if (!a->p && a->rm != 0) {
+ return false;
+ }
if (!fp_access_check(s)) {
- return;
+ return true;
}
- ebytes = 1 << scale;
-
- if (rn == 31) {
+ if (a->rn == 31) {
gen_check_sp_alignment(s);
}
- tcg_rn = cpu_reg_sp(s, rn);
- tcg_addr = tcg_temp_new_i64();
- tcg_gen_mov_i64(tcg_addr, tcg_rn);
+ total = a->selem << a->scale;
+ tcg_rn = cpu_reg_sp(s, a->rn);
- for (xs = 0; xs < selem; xs++) {
- if (replicate) {
- /* Load and replicate to all elements */
- uint64_t mulconst;
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+ mop = finalize_memop_asimd(s, a->scale);
+ clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
+ total, mop);
- tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
- get_mem_index(s), s->be_data + scale);
- switch (scale) {
- case 0:
- mulconst = 0x0101010101010101ULL;
- break;
- case 1:
- mulconst = 0x0001000100010001ULL;
- break;
- case 2:
- mulconst = 0x0000000100000001ULL;
- break;
- case 3:
- mulconst = 0;
- break;
- default:
- g_assert_not_reached();
- }
- if (mulconst) {
- tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
- }
- write_vec_element(s, tcg_tmp, rt, 0, MO_64);
- if (is_q) {
- write_vec_element(s, tcg_tmp, rt, 1, MO_64);
- }
- tcg_temp_free_i64(tcg_tmp);
- clear_vec_high(s, is_q, rt);
+ tcg_ebytes = tcg_constant_i64(1 << a->scale);
+ for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
+ do_vec_ld(s, rt, a->index, clean_addr, mop);
+ tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
+ }
+
+ if (a->p) {
+ if (a->rm == 31) {
+ tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
} else {
- /* Load/store one element per register */
- if (is_load) {
- do_vec_ld(s, rt, index, tcg_addr, scale);
- } else {
- do_vec_st(s, rt, index, tcg_addr, scale);
- }
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
}
- tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
- rt = (rt + 1) % 32;
}
+ return true;
+}
- if (is_postidx) {
- int rm = extract32(insn, 16, 5);
- if (rm == 31) {
- tcg_gen_mov_i64(tcg_rn, tcg_addr);
+static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
+{
+ int xs, total, rt;
+ TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
+ MemOp mop;
+
+ if (!a->p && a->rm != 0) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ total = a->selem << a->scale;
+ tcg_rn = cpu_reg_sp(s, a->rn);
+
+ mop = finalize_memop_asimd(s, a->scale);
+ clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
+ total, mop);
+
+ tcg_ebytes = tcg_constant_i64(1 << a->scale);
+ for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
+ /* Load and replicate to all elements */
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+ tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
+ tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
+ (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
+ tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
+ }
+
+ if (a->p) {
+ if (a->rm == 31) {
+ tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
} else {
- tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
+ tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
}
}
- tcg_temp_free_i64(tcg_addr);
+ return true;
}
-/* Loads and stores */
-static void disas_ldst(DisasContext *s, uint32_t insn)
+static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
{
- switch (extract32(insn, 24, 6)) {
- case 0x08: /* Load/store exclusive */
- disas_ldst_excl(s, insn);
- break;
- case 0x18: case 0x1c: /* Load register (literal) */
- disas_ld_lit(s, insn);
- break;
- case 0x28: case 0x29:
- case 0x2c: case 0x2d: /* Load/store pair (all forms) */
- disas_ldst_pair(s, insn);
- break;
- case 0x38: case 0x39:
- case 0x3c: case 0x3d: /* Load/store register (all forms) */
- disas_ldst_reg(s, insn);
- break;
- case 0x0c: /* AdvSIMD load/store multiple structures */
- disas_ldst_multiple_struct(s, insn);
- break;
- case 0x0d: /* AdvSIMD load/store single structure */
- disas_ldst_single_struct(s, insn);
- break;
- default:
- unallocated_encoding(s);
- break;
+ TCGv_i64 addr, clean_addr, tcg_rt;
+ int size = 4 << s->dcz_blocksize;
+
+ if (!dc_isar_feature(aa64_mte, s)) {
+ return false;
+ }
+ if (s->current_el == 0) {
+ return false;
+ }
+
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ addr = read_cpu_reg_sp(s, a->rn, true);
+ tcg_gen_addi_i64(addr, addr, a->imm);
+ tcg_rt = cpu_reg(s, a->rt);
+
+ if (s->ata[0]) {
+ gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
}
+ /*
+ * The non-tags portion of STZGM is mostly like DC_ZVA,
+ * except the alignment happens before the access.
+ */
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_helper_dc_zva(tcg_env, clean_addr);
+ return true;
}
-/* PC-rel. addressing
- * 31 30 29 28 24 23 5 4 0
- * +----+-------+-----------+-------------------+------+
- * | op | immlo | 1 0 0 0 0 | immhi | Rd |
- * +----+-------+-----------+-------------------+------+
- */
-static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
+static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
{
- unsigned int page, rd;
- uint64_t base;
- uint64_t offset;
+ TCGv_i64 addr, clean_addr, tcg_rt;
- page = extract32(insn, 31, 1);
- /* SignExtend(immhi:immlo) -> offset */
- offset = sextract64(insn, 5, 19);
- offset = offset << 2 | extract32(insn, 29, 2);
- rd = extract32(insn, 0, 5);
- base = s->pc - 4;
+ if (!dc_isar_feature(aa64_mte, s)) {
+ return false;
+ }
+ if (s->current_el == 0) {
+ return false;
+ }
- if (page) {
- /* ADRP (page based) */
- base &= ~0xfff;
- offset <<= 12;
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
}
- tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
+ addr = read_cpu_reg_sp(s, a->rn, true);
+ tcg_gen_addi_i64(addr, addr, a->imm);
+ tcg_rt = cpu_reg(s, a->rt);
+
+ if (s->ata[0]) {
+ gen_helper_stgm(tcg_env, addr, tcg_rt);
+ } else {
+ MMUAccessType acc = MMU_DATA_STORE;
+ int size = 4 << s->gm_blocksize;
+
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_probe_access(s, clean_addr, acc, size);
+ }
+ return true;
}
-/*
- * Add/subtract (immediate)
- *
- * 31 30 29 28 24 23 22 21 10 9 5 4 0
- * +--+--+--+-----------+-----+-------------+-----+-----+
- * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
- * +--+--+--+-----------+-----+-------------+-----+-----+
- *
- * sf: 0 -> 32bit, 1 -> 64bit
- * op: 0 -> add , 1 -> sub
- * S: 1 -> set flags
- * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
- */
-static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
+static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- uint64_t imm = extract32(insn, 10, 12);
- int shift = extract32(insn, 22, 2);
- bool setflags = extract32(insn, 29, 1);
- bool sub_op = extract32(insn, 30, 1);
- bool is_64bit = extract32(insn, 31, 1);
+ TCGv_i64 addr, clean_addr, tcg_rt;
- TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
- TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
- TCGv_i64 tcg_result;
+ if (!dc_isar_feature(aa64_mte, s)) {
+ return false;
+ }
+ if (s->current_el == 0) {
+ return false;
+ }
- switch (shift) {
- case 0x0:
- break;
- case 0x1:
- imm <<= 12;
- break;
- default:
- unallocated_encoding(s);
- return;
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
}
- tcg_result = tcg_temp_new_i64();
- if (!setflags) {
- if (sub_op) {
- tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
+ addr = read_cpu_reg_sp(s, a->rn, true);
+ tcg_gen_addi_i64(addr, addr, a->imm);
+ tcg_rt = cpu_reg(s, a->rt);
+
+ if (s->ata[0]) {
+ gen_helper_ldgm(tcg_rt, tcg_env, addr);
+ } else {
+ MMUAccessType acc = MMU_DATA_LOAD;
+ int size = 4 << s->gm_blocksize;
+
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_probe_access(s, clean_addr, acc, size);
+ /* The result tags are zeros. */
+ tcg_gen_movi_i64(tcg_rt, 0);
+ }
+ return true;
+}
+
+static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
+{
+ TCGv_i64 addr, clean_addr, tcg_rt;
+
+ if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
+ return false;
+ }
+
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ addr = read_cpu_reg_sp(s, a->rn, true);
+ if (!a->p) {
+ /* pre-index or signed offset */
+ tcg_gen_addi_i64(addr, addr, a->imm);
+ }
+
+ tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
+ tcg_rt = cpu_reg(s, a->rt);
+ if (s->ata[0]) {
+ gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
+ } else {
+ /*
+ * Tag access disabled: we must check for aborts on the load
+ * load from [rn+offset], and then insert a 0 tag into rt.
+ */
+ clean_addr = clean_data_tbi(s, addr);
+ gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
+ gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
+ }
+
+ if (a->w) {
+ /* pre-index or post-index */
+ if (a->p) {
+ /* post-index */
+ tcg_gen_addi_i64(addr, addr, a->imm);
+ }
+ tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
+ }
+ return true;
+}
+
+static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
+{
+ TCGv_i64 addr, tcg_rt;
+
+ if (a->rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ addr = read_cpu_reg_sp(s, a->rn, true);
+ if (!a->p) {
+ /* pre-index or signed offset */
+ tcg_gen_addi_i64(addr, addr, a->imm);
+ }
+ tcg_rt = cpu_reg_sp(s, a->rt);
+ if (!s->ata[0]) {
+ /*
+ * For STG and ST2G, we need to check alignment and probe memory.
+ * TODO: For STZG and STZ2G, we could rely on the stores below,
+ * at least for system mode; user-only won't enforce alignment.
+ */
+ if (is_pair) {
+ gen_helper_st2g_stub(tcg_env, addr);
+ } else {
+ gen_helper_stg_stub(tcg_env, addr);
+ }
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+ if (is_pair) {
+ gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
} else {
- tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
+ gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
}
} else {
- TCGv_i64 tcg_imm = tcg_const_i64(imm);
- if (sub_op) {
- gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
+ if (is_pair) {
+ gen_helper_st2g(tcg_env, addr, tcg_rt);
} else {
- gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
+ gen_helper_stg(tcg_env, addr, tcg_rt);
}
- tcg_temp_free_i64(tcg_imm);
}
- if (is_64bit) {
- tcg_gen_mov_i64(tcg_rd, tcg_result);
- } else {
- tcg_gen_ext32u_i64(tcg_rd, tcg_result);
+ if (is_zero) {
+ TCGv_i64 clean_addr = clean_data_tbi(s, addr);
+ TCGv_i64 zero64 = tcg_constant_i64(0);
+ TCGv_i128 zero128 = tcg_temp_new_i128();
+ int mem_index = get_mem_index(s);
+ MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
+
+ tcg_gen_concat_i64_i128(zero128, zero64, zero64);
+
+ /* This is 1 or 2 atomic 16-byte operations. */
+ tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
+ if (is_pair) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
+ tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
+ }
+ }
+
+ if (a->w) {
+ /* pre-index or post-index */
+ if (a->p) {
+ /* post-index */
+ tcg_gen_addi_i64(addr, addr, a->imm);
+ }
+ tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
+ }
+ return true;
+}
+
+TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
+TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
+TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
+TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
+
+typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
+
+static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
+ bool is_setg, SetFn fn)
+{
+ int memidx;
+ uint32_t syndrome, desc = 0;
+
+ if (is_setg && !dc_isar_feature(aa64_mte, s)) {
+ return false;
+ }
+
+ /*
+ * UNPREDICTABLE cases: we choose to UNDEF, which allows
+ * us to pull this check before the CheckMOPSEnabled() test
+ * (which we do in the helper function)
+ */
+ if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
+ a->rd == 31 || a->rn == 31) {
+ return false;
+ }
+
+ memidx = get_a64_user_mem_index(s, a->unpriv);
+
+ /*
+ * We pass option_a == true, matching our implementation;
+ * we pass wrong_option == false: helper function may set that bit.
+ */
+ syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
+ is_epilogue, false, true, a->rd, a->rs, a->rn);
+
+ if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
+ /* We may need to do MTE tag checking, so assemble the descriptor */
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
+ /* SIZEM1 and ALIGN we leave 0 (byte write) */
+ }
+ /* The helper function always needs the memidx even with MTE disabled */
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
+
+ /*
+ * The helper needs the register numbers, but since they're in
+ * the syndrome anyway, we let it extract them from there rather
+ * than passing in an extra three integer arguments.
+ */
+ fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
+ return true;
+}
+
+TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
+TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
+TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
+TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
+TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
+TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
+
+typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
+
+static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
+{
+ int rmemidx, wmemidx;
+ uint32_t syndrome, rdesc = 0, wdesc = 0;
+ bool wunpriv = extract32(a->options, 0, 1);
+ bool runpriv = extract32(a->options, 1, 1);
+
+ /*
+ * UNPREDICTABLE cases: we choose to UNDEF, which allows
+ * us to pull this check before the CheckMOPSEnabled() test
+ * (which we do in the helper function)
+ */
+ if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
+ a->rd == 31 || a->rs == 31 || a->rn == 31) {
+ return false;
+ }
+
+ rmemidx = get_a64_user_mem_index(s, runpriv);
+ wmemidx = get_a64_user_mem_index(s, wunpriv);
+
+ /*
+ * We pass option_a == true, matching our implementation;
+ * we pass wrong_option == false: helper function may set that bit.
+ */
+ syndrome = syn_mop(false, false, a->options, is_epilogue,
+ false, true, a->rd, a->rs, a->rn);
+
+ /* If we need to do MTE tag checking, assemble the descriptors */
+ if (s->mte_active[runpriv]) {
+ rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
+ rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
+ }
+ if (s->mte_active[wunpriv]) {
+ wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
+ wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
+ wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
+ }
+ /* The helper function needs these parts of the descriptor regardless */
+ rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
+ wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
+
+ /*
+ * The helper needs the register numbers, but since they're in
+ * the syndrome anyway, we let it extract them from there rather
+ * than passing in an extra three integer arguments.
+ */
+ fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
+ tcg_constant_i32(rdesc));
+ return true;
+}
+
+TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
+TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
+TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
+TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
+TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
+TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
+
+typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
+
+static bool gen_rri(DisasContext *s, arg_rri_sf *a,
+ bool rd_sp, bool rn_sp, ArithTwoOp *fn)
+{
+ TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
+ TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
+ TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
+
+ fn(tcg_rd, tcg_rn, tcg_imm);
+ if (!a->sf) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ }
+ return true;
+}
+
+/*
+ * PC-rel. addressing
+ */
+
+static bool trans_ADR(DisasContext *s, arg_ri *a)
+{
+ gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
+ return true;
+}
+
+static bool trans_ADRP(DisasContext *s, arg_ri *a)
+{
+ int64_t offset = (int64_t)a->imm << 12;
+
+ /* The page offset is ok for CF_PCREL. */
+ offset -= s->pc_curr & 0xfff;
+ gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
+ return true;
+}
+
+/*
+ * Add/subtract (immediate)
+ */
+TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
+TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
+TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
+TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
+
+/*
+ * Add/subtract (immediate, with tags)
+ */
+
+static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
+ bool sub_op)
+{
+ TCGv_i64 tcg_rn, tcg_rd;
+ int imm;
+
+ imm = a->uimm6 << LOG2_TAG_GRANULE;
+ if (sub_op) {
+ imm = -imm;
}
- tcg_temp_free_i64(tcg_result);
+ tcg_rn = cpu_reg_sp(s, a->rn);
+ tcg_rd = cpu_reg_sp(s, a->rd);
+
+ if (s->ata[0]) {
+ gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
+ tcg_constant_i32(imm),
+ tcg_constant_i32(a->uimm4));
+ } else {
+ tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
+ gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
+ }
+ return true;
}
+TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
+TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
+
/* The input should be a value in the bottom e bits (with higher
* bits zero); returns that value replicated into every element
* of size e in a 64 bit integer.
@@ -3408,14 +4260,12 @@ static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
return mask;
}
-/* Return a value with the bottom len bits set (where 0 < len <= 64) */
-static inline uint64_t bitmask64(unsigned int length)
-{
- assert(length > 0 && length <= 64);
- return ~0ULL >> (64 - length);
-}
+/*
+ * Logical (immediate)
+ */
-/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
+/*
+ * Simplified variant of pseudocode DecodeBitMasks() for the case where we
* only require the wmask. Returns false if the imms/immr/immn are a reserved
* value (ie should cause a guest UNDEF exception), and true if they are
* valid, in which case the decoded bit pattern is written to result.
@@ -3470,10 +4320,10 @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
/* Create the value of one element: s+1 set bits rotated
* by r within the element (which is e bits wide)...
*/
- mask = bitmask64(s + 1);
+ mask = MAKE_64BIT_MASK(0, s + 1);
if (r) {
mask = (mask >> r) | (mask << (e - r));
- mask &= bitmask64(e);
+ mask &= MAKE_64BIT_MASK(0, e);
}
/* ...then replicate the element over the whole 64 bit value */
mask = bitfield_replicate(mask, e);
@@ -3481,295 +4331,215 @@ bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
return true;
}
-/* Logical (immediate)
- * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
- * +----+-----+-------------+---+------+------+------+------+
- * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd |
- * +----+-----+-------------+---+------+------+------+------+
- */
-static void disas_logic_imm(DisasContext *s, uint32_t insn)
+static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
+ void (*fn)(TCGv_i64, TCGv_i64, int64_t))
{
- unsigned int sf, opc, is_n, immr, imms, rn, rd;
TCGv_i64 tcg_rd, tcg_rn;
- uint64_t wmask;
- bool is_and = false;
-
- sf = extract32(insn, 31, 1);
- opc = extract32(insn, 29, 2);
- is_n = extract32(insn, 22, 1);
- immr = extract32(insn, 16, 6);
- imms = extract32(insn, 10, 6);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
-
- if (!sf && is_n) {
- unallocated_encoding(s);
- return;
- }
+ uint64_t imm;
- if (opc == 0x3) { /* ANDS */
- tcg_rd = cpu_reg(s, rd);
- } else {
- tcg_rd = cpu_reg_sp(s, rd);
+ /* Some immediate field values are reserved. */
+ if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
+ extract32(a->dbm, 0, 6),
+ extract32(a->dbm, 6, 6))) {
+ return false;
}
- tcg_rn = cpu_reg(s, rn);
-
- if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
- /* some immediate field values are reserved */
- unallocated_encoding(s);
- return;
+ if (!a->sf) {
+ imm &= 0xffffffffull;
}
- if (!sf) {
- wmask &= 0xffffffff;
- }
+ tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
+ tcg_rn = cpu_reg(s, a->rn);
- switch (opc) {
- case 0x3: /* ANDS */
- case 0x0: /* AND */
- tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
- is_and = true;
- break;
- case 0x1: /* ORR */
- tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
- break;
- case 0x2: /* EOR */
- tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
- break;
- default:
- assert(FALSE); /* must handle all above */
- break;
+ fn(tcg_rd, tcg_rn, imm);
+ if (set_cc) {
+ gen_logic_CC(a->sf, tcg_rd);
}
-
- if (!sf && !is_and) {
- /* zero extend final result; we know we can skip this for AND
- * since the immediate had the high 32 bits clear.
- */
+ if (!a->sf) {
tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
}
-
- if (opc == 3) { /* ANDS */
- gen_logic_CC(sf, tcg_rd);
- }
+ return true;
}
+TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
+TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
+TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
+TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
+
/*
* Move wide (immediate)
- *
- * 31 30 29 28 23 22 21 20 5 4 0
- * +--+-----+-------------+-----+----------------+------+
- * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd |
- * +--+-----+-------------+-----+----------------+------+
- *
- * sf: 0 -> 32 bit, 1 -> 64 bit
- * opc: 00 -> N, 10 -> Z, 11 -> K
- * hw: shift/16 (0,16, and sf only 32, 48)
*/
-static void disas_movw_imm(DisasContext *s, uint32_t insn)
+
+static bool trans_MOVZ(DisasContext *s, arg_movw *a)
{
- int rd = extract32(insn, 0, 5);
- uint64_t imm = extract32(insn, 5, 16);
- int sf = extract32(insn, 31, 1);
- int opc = extract32(insn, 29, 2);
- int pos = extract32(insn, 21, 2) << 4;
- TCGv_i64 tcg_rd = cpu_reg(s, rd);
- TCGv_i64 tcg_imm;
+ int pos = a->hw << 4;
+ tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
+ return true;
+}
- if (!sf && (pos >= 32)) {
- unallocated_encoding(s);
- return;
+static bool trans_MOVN(DisasContext *s, arg_movw *a)
+{
+ int pos = a->hw << 4;
+ uint64_t imm = a->imm;
+
+ imm = ~(imm << pos);
+ if (!a->sf) {
+ imm = (uint32_t)imm;
}
+ tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
+ return true;
+}
- switch (opc) {
- case 0: /* MOVN */
- case 2: /* MOVZ */
- imm <<= pos;
- if (opc == 0) {
- imm = ~imm;
- }
- if (!sf) {
- imm &= 0xffffffffu;
- }
- tcg_gen_movi_i64(tcg_rd, imm);
- break;
- case 3: /* MOVK */
- tcg_imm = tcg_const_i64(imm);
- tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
- tcg_temp_free_i64(tcg_imm);
- if (!sf) {
- tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
- }
- break;
- default:
- unallocated_encoding(s);
- break;
+static bool trans_MOVK(DisasContext *s, arg_movw *a)
+{
+ int pos = a->hw << 4;
+ TCGv_i64 tcg_rd, tcg_im;
+
+ tcg_rd = cpu_reg(s, a->rd);
+ tcg_im = tcg_constant_i64(a->imm);
+ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
+ if (!a->sf) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
}
+ return true;
}
-/* Bitfield
- * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
- * +----+-----+-------------+---+------+------+------+------+
- * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd |
- * +----+-----+-------------+---+------+------+------+------+
+/*
+ * Bitfield
*/
-static void disas_bitfield(DisasContext *s, uint32_t insn)
+
+static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
{
- unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
- TCGv_i64 tcg_rd, tcg_tmp;
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
+ unsigned int bitsize = a->sf ? 64 : 32;
+ unsigned int ri = a->immr;
+ unsigned int si = a->imms;
+ unsigned int pos, len;
- sf = extract32(insn, 31, 1);
- opc = extract32(insn, 29, 2);
- n = extract32(insn, 22, 1);
- ri = extract32(insn, 16, 6);
- si = extract32(insn, 10, 6);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
- bitsize = sf ? 64 : 32;
+ if (si >= ri) {
+ /* Wd<s-r:0> = Wn<s:r> */
+ len = (si - ri) + 1;
+ tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
+ if (!a->sf) {
+ tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ }
+ } else {
+ /* Wd<32+s-r,32-r> = Wn<s:0> */
+ len = si + 1;
+ pos = (bitsize - ri) & (bitsize - 1);
- if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
- unallocated_encoding(s);
- return;
+ if (len < ri) {
+ /*
+ * Sign extend the destination field from len to fill the
+ * balance of the word. Let the deposit below insert all
+ * of those sign bits.
+ */
+ tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
+ len = ri;
+ }
+
+ /*
+ * We start with zero, and we haven't modified any bits outside
+ * bitsize, therefore no final zero-extension is unneeded for !sf.
+ */
+ tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
}
+ return true;
+}
- tcg_rd = cpu_reg(s, rd);
+static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
+{
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
+ unsigned int bitsize = a->sf ? 64 : 32;
+ unsigned int ri = a->immr;
+ unsigned int si = a->imms;
+ unsigned int pos, len;
- /* Suppress the zero-extend for !sf. Since RI and SI are constrained
- to be smaller than bitsize, we'll never reference data outside the
- low 32-bits anyway. */
- tcg_tmp = read_cpu_reg(s, rn, 1);
+ tcg_rd = cpu_reg(s, a->rd);
+ tcg_tmp = read_cpu_reg(s, a->rn, 1);
- /* Recognize simple(r) extractions. */
if (si >= ri) {
/* Wd<s-r:0> = Wn<s:r> */
len = (si - ri) + 1;
- if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
- tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
- goto done;
- } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
- tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
- return;
- }
- /* opc == 1, BXFIL fall through to deposit */
- tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
- pos = 0;
+ tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
} else {
- /* Handle the ri > si case with a deposit
- * Wd<32+s-r,32-r> = Wn<s:0>
- */
+ /* Wd<32+s-r,32-r> = Wn<s:0> */
len = si + 1;
pos = (bitsize - ri) & (bitsize - 1);
+ tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
}
+ return true;
+}
- if (opc == 0 && len < ri) {
- /* SBFM: sign extend the destination field from len to fill
- the balance of the word. Let the deposit below insert all
- of those sign bits. */
- tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
- len = ri;
- }
+static bool trans_BFM(DisasContext *s, arg_BFM *a)
+{
+ TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
+ TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
+ unsigned int bitsize = a->sf ? 64 : 32;
+ unsigned int ri = a->immr;
+ unsigned int si = a->imms;
+ unsigned int pos, len;
- if (opc == 1) { /* BFM, BXFIL */
- tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
+ tcg_rd = cpu_reg(s, a->rd);
+ tcg_tmp = read_cpu_reg(s, a->rn, 1);
+
+ if (si >= ri) {
+ /* Wd<s-r:0> = Wn<s:r> */
+ tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
+ len = (si - ri) + 1;
+ pos = 0;
} else {
- /* SBFM or UBFM: We start with zero, and we haven't modified
- any bits outside bitsize, therefore the zero-extension
- below is unneeded. */
- tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
- return;
+ /* Wd<32+s-r,32-r> = Wn<s:0> */
+ len = si + 1;
+ pos = (bitsize - ri) & (bitsize - 1);
}
- done:
- if (!sf) { /* zero extend final result */
+ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
+ if (!a->sf) {
tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
}
+ return true;
}
-/* Extract
- * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0
- * +----+------+-------------+---+----+------+--------+------+------+
- * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd |
- * +----+------+-------------+---+----+------+--------+------+------+
- */
-static void disas_extract(DisasContext *s, uint32_t insn)
+static bool trans_EXTR(DisasContext *s, arg_extract *a)
{
- unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
+ TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
- sf = extract32(insn, 31, 1);
- n = extract32(insn, 22, 1);
- rm = extract32(insn, 16, 5);
- imm = extract32(insn, 10, 6);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
- op21 = extract32(insn, 29, 2);
- op0 = extract32(insn, 21, 1);
- bitsize = sf ? 64 : 32;
+ tcg_rd = cpu_reg(s, a->rd);
- if (sf != n || op21 || op0 || imm >= bitsize) {
- unallocated_encoding(s);
+ if (unlikely(a->imm == 0)) {
+ /*
+ * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
+ * so an extract from bit 0 is a special case.
+ */
+ if (a->sf) {
+ tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
+ } else {
+ tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
+ }
} else {
- TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
+ tcg_rm = cpu_reg(s, a->rm);
+ tcg_rn = cpu_reg(s, a->rn);
- tcg_rd = cpu_reg(s, rd);
+ if (a->sf) {
+ /* Specialization to ROR happens in EXTRACT2. */
+ tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
- if (unlikely(imm == 0)) {
- /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
- * so an extract from bit 0 is a special case.
- */
- if (sf) {
- tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
+ tcg_gen_extrl_i64_i32(t0, tcg_rm);
+ if (a->rm == a->rn) {
+ tcg_gen_rotri_i32(t0, t0, a->imm);
} else {
- tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
- }
- } else if (rm == rn) { /* ROR */
- tcg_rm = cpu_reg(s, rm);
- if (sf) {
- tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
- } else {
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, tcg_rm);
- tcg_gen_rotri_i32(tmp, tmp, imm);
- tcg_gen_extu_i32_i64(tcg_rd, tmp);
- tcg_temp_free_i32(tmp);
- }
- } else {
- tcg_rm = read_cpu_reg(s, rm, sf);
- tcg_rn = read_cpu_reg(s, rn, sf);
- tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
- tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
- tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
- if (!sf) {
- tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t1, tcg_rn);
+ tcg_gen_extract2_i32(t0, t0, t1, a->imm);
}
+ tcg_gen_extu_i32_i64(tcg_rd, t0);
}
}
-}
-
-/* Data processing - immediate */
-static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
-{
- switch (extract32(insn, 23, 6)) {
- case 0x20: case 0x21: /* PC-rel. addressing */
- disas_pc_rel_adr(s, insn);
- break;
- case 0x22: case 0x23: /* Add/subtract (immediate) */
- disas_add_sub_imm(s, insn);
- break;
- case 0x24: /* Logical (immediate) */
- disas_logic_imm(s, insn);
- break;
- case 0x25: /* Move wide (immediate) */
- disas_movw_imm(s, insn);
- break;
- case 0x26: /* Bitfield */
- disas_bitfield(s, insn);
- break;
- case 0x27: /* Extract */
- disas_extract(s, insn);
- break;
- default:
- unallocated_encoding(s);
- break;
- }
+ return true;
}
/* Shift a TCGv src by TCGv shift_amount, put result in dst.
@@ -3804,8 +4574,6 @@ static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
tcg_gen_extrl_i64_i32(t1, shift_amount);
tcg_gen_rotr_i32(t0, t0, t1);
tcg_gen_extu_i32_i64(dst, t0);
- tcg_temp_free_i32(t0);
- tcg_temp_free_i32(t1);
}
break;
default:
@@ -3830,11 +4598,7 @@ static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
if (shift_i == 0) {
tcg_gen_mov_i64(dst, src);
} else {
- TCGv_i64 shift_const;
-
- shift_const = tcg_const_i64(shift_i);
- shift_reg(dst, src, sf, shift_type, shift_const);
- tcg_temp_free_i64(shift_const);
+ shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
}
}
@@ -3952,6 +4716,7 @@ static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
int imm3 = extract32(insn, 10, 3);
int option = extract32(insn, 13, 3);
int rm = extract32(insn, 16, 5);
+ int opt = extract32(insn, 22, 2);
bool setflags = extract32(insn, 29, 1);
bool sub_op = extract32(insn, 30, 1);
bool sf = extract32(insn, 31, 1);
@@ -3960,7 +4725,7 @@ static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
TCGv_i64 tcg_rd;
TCGv_i64 tcg_result;
- if (imm3 > 4) {
+ if (imm3 > 4 || opt != 0) {
unallocated_encoding(s);
return;
}
@@ -3997,8 +4762,6 @@ static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
} else {
tcg_gen_ext32u_i64(tcg_rd, tcg_result);
}
-
- tcg_temp_free_i64(tcg_result);
}
/*
@@ -4061,8 +4824,6 @@ static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
} else {
tcg_gen_ext32u_i64(tcg_rd, tcg_result);
}
-
- tcg_temp_free_i64(tcg_result);
}
/* Data-processing (3 source)
@@ -4120,8 +4881,6 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
} else {
tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
}
-
- tcg_temp_free_i64(low_bits);
return;
}
@@ -4157,18 +4916,13 @@ static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
if (!sf) {
tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
}
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_tmp);
}
/* Add/subtract (with carry)
- * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
- * +--+--+--+------------------------+------+---------+------+-----+
- * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | opcode2 | Rn | Rd |
- * +--+--+--+------------------------+------+---------+------+-----+
- * [000000]
+ * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
+ * +--+--+--+------------------------+------+-------------+------+-----+
+ * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd |
+ * +--+--+--+------------------------+------+-------------+------+-----+
*/
static void disas_adc_sbc(DisasContext *s, uint32_t insn)
@@ -4176,11 +4930,6 @@ static void disas_adc_sbc(DisasContext *s, uint32_t insn)
unsigned int sf, op, setflags, rm, rn, rd;
TCGv_i64 tcg_y, tcg_rn, tcg_rd;
- if (extract32(insn, 10, 6) != 0) {
- unallocated_encoding(s);
- return;
- }
-
sf = extract32(insn, 31, 1);
op = extract32(insn, 30, 1);
setflags = extract32(insn, 29, 1);
@@ -4192,7 +4941,7 @@ static void disas_adc_sbc(DisasContext *s, uint32_t insn)
tcg_rn = cpu_reg(s, rn);
if (op) {
- tcg_y = new_tmp_a64(s);
+ tcg_y = tcg_temp_new_i64();
tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
} else {
tcg_y = cpu_reg(s, rm);
@@ -4205,6 +4954,81 @@ static void disas_adc_sbc(DisasContext *s, uint32_t insn)
}
}
+/*
+ * Rotate right into flags
+ * 31 30 29 21 15 10 5 4 0
+ * +--+--+--+-----------------+--------+-----------+------+--+------+
+ * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask |
+ * +--+--+--+-----------------+--------+-----------+------+--+------+
+ */
+static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
+{
+ int mask = extract32(insn, 0, 4);
+ int o2 = extract32(insn, 4, 1);
+ int rn = extract32(insn, 5, 5);
+ int imm6 = extract32(insn, 15, 6);
+ int sf_op_s = extract32(insn, 29, 3);
+ TCGv_i64 tcg_rn;
+ TCGv_i32 nzcv;
+
+ if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_rn = read_cpu_reg(s, rn, 1);
+ tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
+
+ nzcv = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
+
+ if (mask & 8) { /* N */
+ tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
+ }
+ if (mask & 4) { /* Z */
+ tcg_gen_not_i32(cpu_ZF, nzcv);
+ tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
+ }
+ if (mask & 2) { /* C */
+ tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
+ }
+ if (mask & 1) { /* V */
+ tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
+ }
+}
+
+/*
+ * Evaluate into flags
+ * 31 30 29 21 15 14 10 5 4 0
+ * +--+--+--+-----------------+---------+----+---------+------+--+------+
+ * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask |
+ * +--+--+--+-----------------+---------+----+---------+------+--+------+
+ */
+static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
+{
+ int o3_mask = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int o2 = extract32(insn, 15, 6);
+ int sz = extract32(insn, 14, 1);
+ int sf_op_s = extract32(insn, 29, 3);
+ TCGv_i32 tmp;
+ int shift;
+
+ if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
+ !dc_isar_feature(aa64_condm_4, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ shift = sz ? 16 : 24; /* SETF16 or SETF8 */
+
+ tmp = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
+ tcg_gen_shli_i32(cpu_NF, tmp, shift);
+ tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
+}
+
/* Conditional compare (immediate / register)
* 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
* +--+--+--+------------------------+--------+------+----+--+------+--+-----+
@@ -4239,11 +5063,10 @@ static void disas_cc(DisasContext *s, uint32_t insn)
tcg_t0 = tcg_temp_new_i32();
arm_test_cc(&c, cond);
tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
- arm_free_cc(&c);
/* Load the arguments for the new comparison. */
if (is_imm) {
- tcg_y = new_tmp_a64(s);
+ tcg_y = tcg_temp_new_i64();
tcg_gen_movi_i64(tcg_y, y);
} else {
tcg_y = cpu_reg(s, y);
@@ -4257,7 +5080,6 @@ static void disas_cc(DisasContext *s, uint32_t insn)
} else {
gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
}
- tcg_temp_free_i64(tcg_tmp);
/* If COND was false, force the flags to #nzcv. Compute two masks
* to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
@@ -4305,9 +5127,6 @@ static void disas_cc(DisasContext *s, uint32_t insn)
tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
}
}
- tcg_temp_free_i32(tcg_t0);
- tcg_temp_free_i32(tcg_t1);
- tcg_temp_free_i32(tcg_t2);
}
/* Conditional select
@@ -4338,13 +5157,16 @@ static void disas_cond_select(DisasContext *s, uint32_t insn)
tcg_rd = cpu_reg(s, rd);
a64_test_cc(&c, cond);
- zero = tcg_const_i64(0);
+ zero = tcg_constant_i64(0);
if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
/* CSET & CSETM. */
- tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
if (else_inv) {
- tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
+ tcg_rd, c.value, zero);
+ } else {
+ tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
+ tcg_rd, c.value, zero);
}
} else {
TCGv_i64 t_true = cpu_reg(s, rn);
@@ -4359,9 +5181,6 @@ static void disas_cond_select(DisasContext *s, uint32_t insn)
tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
}
- tcg_temp_free_i64(zero);
- a64_free_cc(&c);
-
if (!sf) {
tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
}
@@ -4381,7 +5200,6 @@ static void handle_clz(DisasContext *s, unsigned int sf,
tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
- tcg_temp_free_i32(tcg_tmp32);
}
}
@@ -4399,7 +5217,6 @@ static void handle_cls(DisasContext *s, unsigned int sf,
tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
- tcg_temp_free_i32(tcg_tmp32);
}
}
@@ -4417,7 +5234,6 @@ static void handle_rbit(DisasContext *s, unsigned int sf,
tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
gen_helper_rbit(tcg_tmp32, tcg_tmp32);
tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
- tcg_temp_free_i32(tcg_tmp32);
}
}
@@ -4439,22 +5255,13 @@ static void handle_rev32(DisasContext *s, unsigned int sf,
unsigned int rn, unsigned int rd)
{
TCGv_i64 tcg_rd = cpu_reg(s, rd);
+ TCGv_i64 tcg_rn = cpu_reg(s, rn);
if (sf) {
- TCGv_i64 tcg_tmp = tcg_temp_new_i64();
- TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
-
- /* bswap32_i64 requires zero high word */
- tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
- tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
- tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
- tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
- tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
-
- tcg_temp_free_i64(tcg_tmp);
+ tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
+ tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
} else {
- tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
- tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
+ tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
}
}
@@ -4465,16 +5272,13 @@ static void handle_rev16(DisasContext *s, unsigned int sf,
TCGv_i64 tcg_rd = cpu_reg(s, rd);
TCGv_i64 tcg_tmp = tcg_temp_new_i64();
TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
- TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
+ TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
-
- tcg_temp_free_i64(mask);
- tcg_temp_free_i64(tcg_tmp);
}
/* Data-processing (1 source)
@@ -4485,38 +5289,197 @@ static void handle_rev16(DisasContext *s, unsigned int sf,
*/
static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
{
- unsigned int sf, opcode, rn, rd;
+ unsigned int sf, opcode, opcode2, rn, rd;
+ TCGv_i64 tcg_rd;
- if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
+ if (extract32(insn, 29, 1)) {
unallocated_encoding(s);
return;
}
sf = extract32(insn, 31, 1);
opcode = extract32(insn, 10, 6);
+ opcode2 = extract32(insn, 16, 5);
rn = extract32(insn, 5, 5);
rd = extract32(insn, 0, 5);
- switch (opcode) {
- case 0: /* RBIT */
+#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
+
+ switch (MAP(sf, opcode2, opcode)) {
+ case MAP(0, 0x00, 0x00): /* RBIT */
+ case MAP(1, 0x00, 0x00):
handle_rbit(s, sf, rn, rd);
break;
- case 1: /* REV16 */
+ case MAP(0, 0x00, 0x01): /* REV16 */
+ case MAP(1, 0x00, 0x01):
handle_rev16(s, sf, rn, rd);
break;
- case 2: /* REV32 */
+ case MAP(0, 0x00, 0x02): /* REV/REV32 */
+ case MAP(1, 0x00, 0x02):
handle_rev32(s, sf, rn, rd);
break;
- case 3: /* REV64 */
+ case MAP(1, 0x00, 0x03): /* REV64 */
handle_rev64(s, sf, rn, rd);
break;
- case 4: /* CLZ */
+ case MAP(0, 0x00, 0x04): /* CLZ */
+ case MAP(1, 0x00, 0x04):
handle_clz(s, sf, rn, rd);
break;
- case 5: /* CLS */
+ case MAP(0, 0x00, 0x05): /* CLS */
+ case MAP(1, 0x00, 0x05):
handle_cls(s, sf, rn, rd);
break;
+ case MAP(1, 0x01, 0x00): /* PACIA */
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
+ } else if (!dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ break;
+ case MAP(1, 0x01, 0x01): /* PACIB */
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
+ } else if (!dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ break;
+ case MAP(1, 0x01, 0x02): /* PACDA */
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
+ } else if (!dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ break;
+ case MAP(1, 0x01, 0x03): /* PACDB */
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
+ } else if (!dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ break;
+ case MAP(1, 0x01, 0x04): /* AUTIA */
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
+ } else if (!dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ break;
+ case MAP(1, 0x01, 0x05): /* AUTIB */
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
+ } else if (!dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ break;
+ case MAP(1, 0x01, 0x06): /* AUTDA */
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
+ } else if (!dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ break;
+ case MAP(1, 0x01, 0x07): /* AUTDB */
+ if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
+ } else if (!dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ break;
+ case MAP(1, 0x01, 0x08): /* PACIZA */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
+ }
+ break;
+ case MAP(1, 0x01, 0x09): /* PACIZB */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
+ }
+ break;
+ case MAP(1, 0x01, 0x0a): /* PACDZA */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
+ }
+ break;
+ case MAP(1, 0x01, 0x0b): /* PACDZB */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
+ }
+ break;
+ case MAP(1, 0x01, 0x0c): /* AUTIZA */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
+ }
+ break;
+ case MAP(1, 0x01, 0x0d): /* AUTIZB */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
+ }
+ break;
+ case MAP(1, 0x01, 0x0e): /* AUTDZA */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
+ }
+ break;
+ case MAP(1, 0x01, 0x0f): /* AUTDZB */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
+ }
+ break;
+ case MAP(1, 0x01, 0x10): /* XPACI */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
+ }
+ break;
+ case MAP(1, 0x01, 0x11): /* XPACD */
+ if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
+ goto do_unallocated;
+ } else if (s->pauth_active) {
+ tcg_rd = cpu_reg(s, rd);
+ gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
+ }
+ break;
+ default:
+ do_unallocated:
+ unallocated_encoding(s);
+ break;
}
+
+#undef MAP
}
static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
@@ -4526,8 +5489,8 @@ static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
tcg_rd = cpu_reg(s, rd);
if (!sf && is_signed) {
- tcg_n = new_tmp_a64(s);
- tcg_m = new_tmp_a64(s);
+ tcg_n = tcg_temp_new_i64();
+ tcg_m = tcg_temp_new_i64();
tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
} else {
@@ -4557,7 +5520,6 @@ static void handle_shift_reg(DisasContext *s,
tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
- tcg_temp_free_i64(tcg_shift);
}
/* CRC32[BHWX], CRC32C[BHWX] */
@@ -4568,7 +5530,7 @@ static void handle_crc32(DisasContext *s,
TCGv_i64 tcg_acc, tcg_val;
TCGv_i32 tcg_bytes;
- if (!arm_dc_feature(s, ARM_FEATURE_CRC)
+ if (!dc_isar_feature(aa64_crc32, s)
|| (sf == 1 && sz != 3)
|| (sf == 0 && sz == 3)) {
unallocated_encoding(s);
@@ -4592,20 +5554,18 @@ static void handle_crc32(DisasContext *s,
default:
g_assert_not_reached();
}
- tcg_val = new_tmp_a64(s);
+ tcg_val = tcg_temp_new_i64();
tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
}
tcg_acc = cpu_reg(s, rn);
- tcg_bytes = tcg_const_i32(1 << sz);
+ tcg_bytes = tcg_constant_i32(1 << sz);
if (crc32c) {
gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
} else {
gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
}
-
- tcg_temp_free_i32(tcg_bytes);
}
/* Data-processing (2 source)
@@ -4616,25 +5576,68 @@ static void handle_crc32(DisasContext *s,
*/
static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
{
- unsigned int sf, rm, opcode, rn, rd;
+ unsigned int sf, rm, opcode, rn, rd, setflag;
sf = extract32(insn, 31, 1);
+ setflag = extract32(insn, 29, 1);
rm = extract32(insn, 16, 5);
opcode = extract32(insn, 10, 6);
rn = extract32(insn, 5, 5);
rd = extract32(insn, 0, 5);
- if (extract32(insn, 29, 1)) {
+ if (setflag && opcode != 0) {
unallocated_encoding(s);
return;
}
switch (opcode) {
+ case 0: /* SUBP(S) */
+ if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+ goto do_unallocated;
+ } else {
+ TCGv_i64 tcg_n, tcg_m, tcg_d;
+
+ tcg_n = read_cpu_reg_sp(s, rn, true);
+ tcg_m = read_cpu_reg_sp(s, rm, true);
+ tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
+ tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
+ tcg_d = cpu_reg(s, rd);
+
+ if (setflag) {
+ gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
+ } else {
+ tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
+ }
+ }
+ break;
case 2: /* UDIV */
handle_div(s, false, sf, rm, rn, rd);
break;
case 3: /* SDIV */
handle_div(s, true, sf, rm, rn, rd);
break;
+ case 4: /* IRG */
+ if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+ goto do_unallocated;
+ }
+ if (s->ata[0]) {
+ gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
+ cpu_reg_sp(s, rn), cpu_reg(s, rm));
+ } else {
+ gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
+ cpu_reg_sp(s, rn));
+ }
+ break;
+ case 5: /* GMI */
+ if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+ goto do_unallocated;
+ } else {
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
+ tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
+ tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
+ }
+ break;
case 8: /* LSLV */
handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
break;
@@ -4647,6 +5650,13 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
case 11: /* RORV */
handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
break;
+ case 12: /* PACGA */
+ if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
+ goto do_unallocated;
+ }
+ gen_helper_pacga(cpu_reg(s, rd), tcg_env,
+ cpu_reg(s, rn), cpu_reg_sp(s, rm));
+ break;
case 16:
case 17:
case 18:
@@ -4662,52 +5672,87 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
break;
}
default:
+ do_unallocated:
unallocated_encoding(s);
break;
}
}
-/* Data processing - register */
+/*
+ * Data processing - register
+ * 31 30 29 28 25 21 20 16 10 0
+ * +--+---+--+---+-------+-----+-------+-------+---------+
+ * | |op0| |op1| 1 0 1 | op2 | | op3 | |
+ * +--+---+--+---+-------+-----+-------+-------+---------+
+ */
static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
{
- switch (extract32(insn, 24, 5)) {
- case 0x0a: /* Logical (shifted register) */
- disas_logic_reg(s, insn);
- break;
- case 0x0b: /* Add/subtract */
- if (insn & (1 << 21)) { /* (extended register) */
- disas_add_sub_ext_reg(s, insn);
+ int op0 = extract32(insn, 30, 1);
+ int op1 = extract32(insn, 28, 1);
+ int op2 = extract32(insn, 21, 4);
+ int op3 = extract32(insn, 10, 6);
+
+ if (!op1) {
+ if (op2 & 8) {
+ if (op2 & 1) {
+ /* Add/sub (extended register) */
+ disas_add_sub_ext_reg(s, insn);
+ } else {
+ /* Add/sub (shifted register) */
+ disas_add_sub_reg(s, insn);
+ }
} else {
- disas_add_sub_reg(s, insn);
+ /* Logical (shifted register) */
+ disas_logic_reg(s, insn);
}
- break;
- case 0x1b: /* Data-processing (3 source) */
- disas_data_proc_3src(s, insn);
- break;
- case 0x1a:
- switch (extract32(insn, 21, 3)) {
- case 0x0: /* Add/subtract (with carry) */
+ return;
+ }
+
+ switch (op2) {
+ case 0x0:
+ switch (op3) {
+ case 0x00: /* Add/subtract (with carry) */
disas_adc_sbc(s, insn);
break;
- case 0x2: /* Conditional compare */
- disas_cc(s, insn); /* both imm and reg forms */
- break;
- case 0x4: /* Conditional select */
- disas_cond_select(s, insn);
+
+ case 0x01: /* Rotate right into flags */
+ case 0x21:
+ disas_rotate_right_into_flags(s, insn);
break;
- case 0x6: /* Data-processing */
- if (insn & (1 << 30)) { /* (1 source) */
- disas_data_proc_1src(s, insn);
- } else { /* (2 source) */
- disas_data_proc_2src(s, insn);
- }
+
+ case 0x02: /* Evaluate into flags */
+ case 0x12:
+ case 0x22:
+ case 0x32:
+ disas_evaluate_into_flags(s, insn);
break;
+
default:
- unallocated_encoding(s);
- break;
+ goto do_unallocated;
+ }
+ break;
+
+ case 0x2: /* Conditional compare */
+ disas_cc(s, insn); /* both imm and reg forms */
+ break;
+
+ case 0x4: /* Conditional select */
+ disas_cond_select(s, insn);
+ break;
+
+ case 0x6: /* Data-processing */
+ if (op0) { /* (1 source) */
+ disas_data_proc_1src(s, insn);
+ } else { /* (2 source) */
+ disas_data_proc_2src(s, insn);
}
break;
+ case 0x8 ... 0xf: /* (3 source) */
+ disas_data_proc_3src(s, insn);
+ break;
+
default:
+ do_unallocated:
unallocated_encoding(s);
break;
}
@@ -4718,14 +5763,14 @@ static void handle_fp_compare(DisasContext *s, int size,
bool cmp_with_zero, bool signal_all_nans)
{
TCGv_i64 tcg_flags = tcg_temp_new_i64();
- TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
+ TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
if (size == MO_64) {
TCGv_i64 tcg_vn, tcg_vm;
tcg_vn = read_fp_dreg(s, rn);
if (cmp_with_zero) {
- tcg_vm = tcg_const_i64(0);
+ tcg_vm = tcg_constant_i64(0);
} else {
tcg_vm = read_fp_dreg(s, rm);
}
@@ -4734,8 +5779,6 @@ static void handle_fp_compare(DisasContext *s, int size,
} else {
gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
}
- tcg_temp_free_i64(tcg_vn);
- tcg_temp_free_i64(tcg_vm);
} else {
TCGv_i32 tcg_vn = tcg_temp_new_i32();
TCGv_i32 tcg_vm = tcg_temp_new_i32();
@@ -4765,16 +5808,9 @@ static void handle_fp_compare(DisasContext *s, int size,
default:
g_assert_not_reached();
}
-
- tcg_temp_free_i32(tcg_vn);
- tcg_temp_free_i32(tcg_vm);
}
- tcg_temp_free_ptr(fpst);
-
gen_set_nzcv(tcg_flags);
-
- tcg_temp_free_i64(tcg_flags);
}
/* Floating point compare
@@ -4810,7 +5846,7 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn)
break;
case 3:
size = MO_16;
- if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (dc_isar_feature(aa64_fp16, s)) {
break;
}
/* fallthru */
@@ -4835,7 +5871,6 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn)
static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
{
unsigned int mos, type, rm, cond, rn, op, nzcv;
- TCGv_i64 tcg_flags;
TCGLabel *label_continue = NULL;
int size;
@@ -4861,7 +5896,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
break;
case 3:
size = MO_16;
- if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (dc_isar_feature(aa64_fp16, s)) {
break;
}
/* fallthru */
@@ -4879,9 +5914,7 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
label_continue = gen_new_label();
arm_gen_test_cc(cond, label_match);
/* nomatch: */
- tcg_flags = tcg_const_i64(nzcv << 28);
- gen_set_nzcv(tcg_flags);
- tcg_temp_free_i64(tcg_flags);
+ gen_set_nzcv(tcg_constant_i64(nzcv << 28));
tcg_gen_br(label_continue);
gen_set_label(label_match);
}
@@ -4902,9 +5935,9 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
static void disas_fp_csel(DisasContext *s, uint32_t insn)
{
unsigned int mos, type, rm, cond, rn, rd;
- TCGv_i64 t_true, t_false, t_zero;
+ TCGv_i64 t_true, t_false;
DisasCompare64 c;
- TCGMemOp sz;
+ MemOp sz;
mos = extract32(insn, 29, 3);
type = extract32(insn, 22, 2);
@@ -4927,7 +5960,7 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
break;
case 3:
sz = MO_16;
- if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (dc_isar_feature(aa64_fp16, s)) {
break;
}
/* fallthru */
@@ -4947,16 +5980,12 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
read_vec_element(s, t_false, rm, 0, sz);
a64_test_cc(&c, cond);
- t_zero = tcg_const_i64(0);
- tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
- tcg_temp_free_i64(t_zero);
- tcg_temp_free_i64(t_false);
- a64_free_cc(&c);
+ tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
+ t_true, t_false);
/* Note that sregs & hregs write back zeros to the high bits,
and we've already done the zero-extension. */
write_fp_dreg(s, rd, t_true);
- tcg_temp_free_i64(t_true);
}
/* Floating-point data-processing (1 source) - half precision */
@@ -4977,7 +6006,7 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
break;
case 0x3: /* FSQRT */
- fpst = get_fpstatus_ptr(true);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
break;
case 0x8: /* FRINTN */
@@ -4986,99 +6015,108 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
case 0xb: /* FRINTZ */
case 0xc: /* FRINTA */
{
- TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
- fpst = get_fpstatus_ptr(true);
+ TCGv_i32 tcg_rmode;
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ tcg_rmode = gen_set_rmode(opcode & 7, fpst);
gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
-
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- tcg_temp_free_i32(tcg_rmode);
+ gen_restore_rmode(tcg_rmode, fpst);
break;
}
case 0xe: /* FRINTX */
- fpst = get_fpstatus_ptr(true);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
break;
case 0xf: /* FRINTI */
- fpst = get_fpstatus_ptr(true);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
break;
default:
- abort();
+ g_assert_not_reached();
}
write_fp_sreg(s, rd, tcg_res);
-
- if (fpst) {
- tcg_temp_free_ptr(fpst);
- }
- tcg_temp_free_i32(tcg_op);
- tcg_temp_free_i32(tcg_res);
}
/* Floating-point data-processing (1 source) - single precision */
static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
{
+ void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
+ TCGv_i32 tcg_op, tcg_res;
TCGv_ptr fpst;
- TCGv_i32 tcg_op;
- TCGv_i32 tcg_res;
+ int rmode = -1;
- fpst = get_fpstatus_ptr(false);
tcg_op = read_fp_sreg(s, rn);
tcg_res = tcg_temp_new_i32();
switch (opcode) {
case 0x0: /* FMOV */
tcg_gen_mov_i32(tcg_res, tcg_op);
- break;
+ goto done;
case 0x1: /* FABS */
gen_helper_vfp_abss(tcg_res, tcg_op);
- break;
+ goto done;
case 0x2: /* FNEG */
gen_helper_vfp_negs(tcg_res, tcg_op);
- break;
+ goto done;
case 0x3: /* FSQRT */
- gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
+ gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
+ goto done;
+ case 0x6: /* BFCVT */
+ gen_fpst = gen_helper_bfcvt;
break;
case 0x8: /* FRINTN */
case 0x9: /* FRINTP */
case 0xa: /* FRINTM */
case 0xb: /* FRINTZ */
case 0xc: /* FRINTA */
- {
- TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
-
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- gen_helper_rints(tcg_res, tcg_op, fpst);
-
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- tcg_temp_free_i32(tcg_rmode);
+ rmode = opcode & 7;
+ gen_fpst = gen_helper_rints;
break;
- }
case 0xe: /* FRINTX */
- gen_helper_rints_exact(tcg_res, tcg_op, fpst);
+ gen_fpst = gen_helper_rints_exact;
break;
case 0xf: /* FRINTI */
- gen_helper_rints(tcg_res, tcg_op, fpst);
+ gen_fpst = gen_helper_rints;
+ break;
+ case 0x10: /* FRINT32Z */
+ rmode = FPROUNDING_ZERO;
+ gen_fpst = gen_helper_frint32_s;
+ break;
+ case 0x11: /* FRINT32X */
+ gen_fpst = gen_helper_frint32_s;
+ break;
+ case 0x12: /* FRINT64Z */
+ rmode = FPROUNDING_ZERO;
+ gen_fpst = gen_helper_frint64_s;
+ break;
+ case 0x13: /* FRINT64X */
+ gen_fpst = gen_helper_frint64_s;
break;
default:
- abort();
+ g_assert_not_reached();
}
- write_fp_sreg(s, rd, tcg_res);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ if (rmode >= 0) {
+ TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
+ gen_fpst(tcg_res, tcg_op, fpst);
+ gen_restore_rmode(tcg_rmode, fpst);
+ } else {
+ gen_fpst(tcg_res, tcg_op, fpst);
+ }
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(tcg_op);
- tcg_temp_free_i32(tcg_res);
+ done:
+ write_fp_sreg(s, rd, tcg_res);
}
/* Floating-point data-processing (1 source) - double precision */
static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
{
+ void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
+ TCGv_i64 tcg_op, tcg_res;
TCGv_ptr fpst;
- TCGv_i64 tcg_op;
- TCGv_i64 tcg_res;
+ int rmode = -1;
switch (opcode) {
case 0x0: /* FMOV */
@@ -5086,50 +6124,62 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
return;
}
- fpst = get_fpstatus_ptr(false);
tcg_op = read_fp_dreg(s, rn);
tcg_res = tcg_temp_new_i64();
switch (opcode) {
case 0x1: /* FABS */
gen_helper_vfp_absd(tcg_res, tcg_op);
- break;
+ goto done;
case 0x2: /* FNEG */
gen_helper_vfp_negd(tcg_res, tcg_op);
- break;
+ goto done;
case 0x3: /* FSQRT */
- gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
- break;
+ gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
+ goto done;
case 0x8: /* FRINTN */
case 0x9: /* FRINTP */
case 0xa: /* FRINTM */
case 0xb: /* FRINTZ */
case 0xc: /* FRINTA */
- {
- TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
-
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- gen_helper_rintd(tcg_res, tcg_op, fpst);
-
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- tcg_temp_free_i32(tcg_rmode);
+ rmode = opcode & 7;
+ gen_fpst = gen_helper_rintd;
break;
- }
case 0xe: /* FRINTX */
- gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
+ gen_fpst = gen_helper_rintd_exact;
break;
case 0xf: /* FRINTI */
- gen_helper_rintd(tcg_res, tcg_op, fpst);
+ gen_fpst = gen_helper_rintd;
+ break;
+ case 0x10: /* FRINT32Z */
+ rmode = FPROUNDING_ZERO;
+ gen_fpst = gen_helper_frint32_d;
+ break;
+ case 0x11: /* FRINT32X */
+ gen_fpst = gen_helper_frint32_d;
+ break;
+ case 0x12: /* FRINT64Z */
+ rmode = FPROUNDING_ZERO;
+ gen_fpst = gen_helper_frint64_d;
+ break;
+ case 0x13: /* FRINT64X */
+ gen_fpst = gen_helper_frint64_d;
break;
default:
- abort();
+ g_assert_not_reached();
}
- write_fp_dreg(s, rd, tcg_res);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ if (rmode >= 0) {
+ TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
+ gen_fpst(tcg_res, tcg_op, fpst);
+ gen_restore_rmode(tcg_rmode, fpst);
+ } else {
+ gen_fpst(tcg_res, tcg_op, fpst);
+ }
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i64(tcg_op);
- tcg_temp_free_i64(tcg_res);
+ done:
+ write_fp_dreg(s, rd, tcg_res);
}
static void handle_fp_fcvt(DisasContext *s, int opcode,
@@ -5142,23 +6192,18 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
if (dtype == 1) {
/* Single to double */
TCGv_i64 tcg_rd = tcg_temp_new_i64();
- gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
+ gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
write_fp_dreg(s, rd, tcg_rd);
- tcg_temp_free_i64(tcg_rd);
} else {
/* Single to half */
TCGv_i32 tcg_rd = tcg_temp_new_i32();
TCGv_i32 ahp = get_ahp_flag();
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
/* write_fp_sreg is OK here because top half of tcg_rd is zero */
write_fp_sreg(s, rd, tcg_rd);
- tcg_temp_free_i32(tcg_rd);
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
}
- tcg_temp_free_i32(tcg_rn);
break;
}
case 0x1:
@@ -5167,25 +6212,21 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
TCGv_i32 tcg_rd = tcg_temp_new_i32();
if (dtype == 0) {
/* Double to single */
- gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
+ gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
} else {
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
TCGv_i32 ahp = get_ahp_flag();
/* Double to half */
gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
/* write_fp_sreg is OK here because top half of tcg_rd is zero */
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(ahp);
}
write_fp_sreg(s, rd, tcg_rd);
- tcg_temp_free_i32(tcg_rd);
- tcg_temp_free_i64(tcg_rn);
break;
}
case 0x3:
{
TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
- TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
TCGv_i32 tcg_ahp = get_ahp_flag();
tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
if (dtype == 0) {
@@ -5193,21 +6234,16 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
TCGv_i32 tcg_rd = tcg_temp_new_i32();
gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
write_fp_sreg(s, rd, tcg_rd);
- tcg_temp_free_ptr(tcg_fpst);
- tcg_temp_free_i32(tcg_ahp);
- tcg_temp_free_i32(tcg_rd);
} else {
/* Half to double */
TCGv_i64 tcg_rd = tcg_temp_new_i64();
gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
write_fp_dreg(s, rd, tcg_rd);
- tcg_temp_free_i64(tcg_rd);
}
- tcg_temp_free_i32(tcg_rn);
break;
}
default:
- abort();
+ g_assert_not_reached();
}
}
@@ -5219,19 +6255,23 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
*/
static void disas_fp_1src(DisasContext *s, uint32_t insn)
{
+ int mos = extract32(insn, 29, 3);
int type = extract32(insn, 22, 2);
int opcode = extract32(insn, 15, 6);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
+ if (mos) {
+ goto do_unallocated;
+ }
+
switch (opcode) {
case 0x4: case 0x5: case 0x7:
{
/* FCVT between half, single and double precision */
int dtype = extract32(opcode, 0, 2);
if (type == 2 || dtype == type) {
- unallocated_encoding(s);
- return;
+ goto do_unallocated;
}
if (!fp_access_check(s)) {
return;
@@ -5240,6 +6280,12 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
break;
}
+
+ case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
+ if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
+ goto do_unallocated;
+ }
+ /* fall through */
case 0x0 ... 0x3:
case 0x8 ... 0xc:
case 0xe ... 0xf:
@@ -5249,33 +6295,47 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
handle_fp_1src_single(s, opcode, rd, rn);
break;
case 1:
if (!fp_access_check(s)) {
return;
}
-
handle_fp_1src_double(s, opcode, rd, rn);
break;
case 3:
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
- unallocated_encoding(s);
- return;
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ goto do_unallocated;
}
if (!fp_access_check(s)) {
return;
}
-
handle_fp_1src_half(s, opcode, rd, rn);
break;
default:
- unallocated_encoding(s);
+ goto do_unallocated;
+ }
+ break;
+
+ case 0x6:
+ switch (type) {
+ case 1: /* BFCVT */
+ if (!dc_isar_feature(aa64_bf16, s)) {
+ goto do_unallocated;
+ }
+ if (!fp_access_check(s)) {
+ return;
+ }
+ handle_fp_1src_single(s, opcode, rd, rn);
+ break;
+ default:
+ goto do_unallocated;
}
break;
+
default:
+ do_unallocated:
unallocated_encoding(s);
break;
}
@@ -5291,7 +6351,7 @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
TCGv_ptr fpst;
tcg_res = tcg_temp_new_i32();
- fpst = get_fpstatus_ptr(false);
+ fpst = fpstatus_ptr(FPST_FPCR);
tcg_op1 = read_fp_sreg(s, rn);
tcg_op2 = read_fp_sreg(s, rm);
@@ -5327,11 +6387,6 @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
}
write_fp_sreg(s, rd, tcg_res);
-
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
- tcg_temp_free_i32(tcg_res);
}
/* Floating-point data-processing (2 source) - double precision */
@@ -5344,7 +6399,7 @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
TCGv_ptr fpst;
tcg_res = tcg_temp_new_i64();
- fpst = get_fpstatus_ptr(false);
+ fpst = fpstatus_ptr(FPST_FPCR);
tcg_op1 = read_fp_dreg(s, rn);
tcg_op2 = read_fp_dreg(s, rm);
@@ -5380,11 +6435,6 @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
}
write_fp_dreg(s, rd, tcg_res);
-
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res);
}
/* Floating-point data-processing (2 source) - half precision */
@@ -5397,7 +6447,7 @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
TCGv_ptr fpst;
tcg_res = tcg_temp_new_i32();
- fpst = get_fpstatus_ptr(true);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
tcg_op1 = read_fp_hreg(s, rn);
tcg_op2 = read_fp_hreg(s, rm);
@@ -5435,11 +6485,6 @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
}
write_fp_sreg(s, rd, tcg_res);
-
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
- tcg_temp_free_i32(tcg_res);
}
/* Floating point data-processing (2 source)
@@ -5450,13 +6495,14 @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
*/
static void disas_fp_2src(DisasContext *s, uint32_t insn)
{
+ int mos = extract32(insn, 29, 3);
int type = extract32(insn, 22, 2);
int rd = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int rm = extract32(insn, 16, 5);
int opcode = extract32(insn, 12, 4);
- if (opcode > 8) {
+ if (opcode > 8 || mos) {
unallocated_encoding(s);
return;
}
@@ -5475,7 +6521,7 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn)
handle_fp_2src_double(s, opcode, rd, rn, rm);
break;
case 3:
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
return;
}
@@ -5495,7 +6541,7 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
{
TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
TCGv_i32 tcg_res = tcg_temp_new_i32();
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
tcg_op1 = read_fp_sreg(s, rn);
tcg_op2 = read_fp_sreg(s, rm);
@@ -5519,12 +6565,6 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
write_fp_sreg(s, rd, tcg_res);
-
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
- tcg_temp_free_i32(tcg_op3);
- tcg_temp_free_i32(tcg_res);
}
/* Floating-point data-processing (3 source) - double precision */
@@ -5533,7 +6573,7 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
{
TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
TCGv_i64 tcg_res = tcg_temp_new_i64();
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
tcg_op1 = read_fp_dreg(s, rn);
tcg_op2 = read_fp_dreg(s, rm);
@@ -5557,12 +6597,6 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
write_fp_dreg(s, rd, tcg_res);
-
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_op3);
- tcg_temp_free_i64(tcg_res);
}
/* Floating-point data-processing (3 source) - half precision */
@@ -5571,7 +6605,7 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
{
TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
TCGv_i32 tcg_res = tcg_temp_new_i32();
- TCGv_ptr fpst = get_fpstatus_ptr(true);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
tcg_op1 = read_fp_hreg(s, rn);
tcg_op2 = read_fp_hreg(s, rm);
@@ -5595,12 +6629,6 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
write_fp_sreg(s, rd, tcg_res);
-
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
- tcg_temp_free_i32(tcg_op3);
- tcg_temp_free_i32(tcg_res);
}
/* Floating point data-processing (3 source)
@@ -5611,6 +6639,7 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
*/
static void disas_fp_3src(DisasContext *s, uint32_t insn)
{
+ int mos = extract32(insn, 29, 3);
int type = extract32(insn, 22, 2);
int rd = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
@@ -5619,6 +6648,11 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
bool o0 = extract32(insn, 15, 1);
bool o1 = extract32(insn, 21, 1);
+ if (mos) {
+ unallocated_encoding(s);
+ return;
+ }
+
switch (type) {
case 0:
if (!fp_access_check(s)) {
@@ -5633,7 +6667,7 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
break;
case 3:
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
return;
}
@@ -5647,38 +6681,6 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
}
}
-/* The imm8 encodes the sign bit, enough bits to represent an exponent in
- * the range 01....1xx to 10....0xx, and the most significant 4 bits of
- * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
- */
-uint64_t vfp_expand_imm(int size, uint8_t imm8)
-{
- uint64_t imm;
-
- switch (size) {
- case MO_64:
- imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
- (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
- extract32(imm8, 0, 6);
- imm <<= 48;
- break;
- case MO_32:
- imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
- (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
- (extract32(imm8, 0, 6) << 3);
- imm <<= 16;
- break;
- case MO_16:
- imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
- (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
- (extract32(imm8, 0, 6) << 6);
- break;
- default:
- g_assert_not_reached();
- }
- return imm;
-}
-
/* Floating point immediate
* 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
* +---+---+---+-----------+------+---+------------+-------+------+------+
@@ -5688,11 +6690,17 @@ uint64_t vfp_expand_imm(int size, uint8_t imm8)
static void disas_fp_imm(DisasContext *s, uint32_t insn)
{
int rd = extract32(insn, 0, 5);
+ int imm5 = extract32(insn, 5, 5);
int imm8 = extract32(insn, 13, 8);
int type = extract32(insn, 22, 2);
+ int mos = extract32(insn, 29, 3);
uint64_t imm;
- TCGv_i64 tcg_res;
- TCGMemOp sz;
+ MemOp sz;
+
+ if (mos || imm5) {
+ unallocated_encoding(s);
+ return;
+ }
switch (type) {
case 0:
@@ -5703,7 +6711,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
break;
case 3:
sz = MO_16;
- if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (dc_isar_feature(aa64_fp16, s)) {
break;
}
/* fallthru */
@@ -5717,10 +6725,7 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
}
imm = vfp_expand_imm(sz, imm8);
-
- tcg_res = tcg_const_i64(imm);
- write_fp_dreg(s, rd, tcg_res);
- tcg_temp_free_i64(tcg_res);
+ write_fp_dreg(s, rd, tcg_constant_i64(imm));
}
/* Handle floating point <=> fixed point conversions. Note that we can
@@ -5736,14 +6741,14 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
TCGv_i32 tcg_shift, tcg_single;
TCGv_i64 tcg_double;
- tcg_fpstatus = get_fpstatus_ptr(type == 3);
+ tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
- tcg_shift = tcg_const_i32(64 - scale);
+ tcg_shift = tcg_constant_i32(64 - scale);
if (itof) {
TCGv_i64 tcg_int = cpu_reg(s, rn);
if (!sf) {
- TCGv_i64 tcg_extend = new_tmp_a64(s);
+ TCGv_i64 tcg_extend = tcg_temp_new_i64();
if (is_signed) {
tcg_gen_ext32s_i64(tcg_extend, tcg_int);
@@ -5765,7 +6770,6 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
tcg_shift, tcg_fpstatus);
}
write_fp_dreg(s, rd, tcg_double);
- tcg_temp_free_i64(tcg_double);
break;
case 0: /* float32 */
@@ -5778,7 +6782,6 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
tcg_shift, tcg_fpstatus);
}
write_fp_sreg(s, rd, tcg_single);
- tcg_temp_free_i32(tcg_single);
break;
case 3: /* float16 */
@@ -5791,7 +6794,6 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
tcg_shift, tcg_fpstatus);
}
write_fp_sreg(s, rd, tcg_single);
- tcg_temp_free_i32(tcg_single);
break;
default:
@@ -5808,9 +6810,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
rmode = FPROUNDING_TIEAWAY;
}
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
-
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
switch (type) {
case 1: /* float64 */
@@ -5835,7 +6835,6 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
if (!sf) {
tcg_gen_ext32u_i64(tcg_int, tcg_int);
}
- tcg_temp_free_i64(tcg_double);
break;
case 0: /* float32 */
@@ -5858,9 +6857,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
tcg_shift, tcg_fpstatus);
}
tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
- tcg_temp_free_i32(tcg_dest);
}
- tcg_temp_free_i32(tcg_single);
break;
case 3: /* float16 */
@@ -5883,21 +6880,15 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
tcg_shift, tcg_fpstatus);
}
tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
- tcg_temp_free_i32(tcg_dest);
}
- tcg_temp_free_i32(tcg_single);
break;
default:
g_assert_not_reached();
}
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
- tcg_temp_free_i32(tcg_rmode);
+ gen_restore_rmode(tcg_rmode, tcg_fpstatus);
}
-
- tcg_temp_free_ptr(tcg_fpstatus);
- tcg_temp_free_i32(tcg_shift);
}
/* Floating point <-> fixed point conversions
@@ -5928,7 +6919,7 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
case 1: /* float64 */
break;
case 3: /* float16 */
- if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (dc_isar_feature(aa64_fp16, s)) {
break;
}
/* fallthru */
@@ -5974,7 +6965,6 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
tmp = tcg_temp_new_i64();
tcg_gen_ext32u_i64(tmp, tcg_rn);
write_fp_dreg(s, rd, tmp);
- tcg_temp_free_i64(tmp);
break;
case 1:
/* 64 bit */
@@ -5982,7 +6972,7 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
break;
case 2:
/* 64 bit to top half. */
- tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
+ tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
clear_vec_high(s, true, rd);
break;
case 3:
@@ -5990,7 +6980,6 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
tmp = tcg_temp_new_i64();
tcg_gen_ext16u_i64(tmp, tcg_rn);
write_fp_dreg(s, rd, tmp);
- tcg_temp_free_i64(tmp);
break;
default:
g_assert_not_reached();
@@ -6001,19 +6990,19 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
switch (type) {
case 0:
/* 32 bit */
- tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
+ tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
break;
case 1:
/* 64 bit */
- tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
+ tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
break;
case 2:
/* 64 bits from top half */
- tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
+ tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
break;
case 3:
/* 16 bit */
- tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
+ tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
break;
default:
g_assert_not_reached();
@@ -6021,6 +7010,20 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
}
}
+static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
+{
+ TCGv_i64 t = read_fp_dreg(s, rn);
+ TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
+
+ gen_helper_fjcvtzs(t, t, fpstatus);
+
+ tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
+ tcg_gen_extrh_i64_i32(cpu_ZF, t);
+ tcg_gen_movi_i32(cpu_CF, 0);
+ tcg_gen_movi_i32(cpu_NF, 0);
+ tcg_gen_movi_i32(cpu_VF, 0);
+}
+
/* Floating point <-> integer conversions
* 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
* +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
@@ -6036,68 +7039,80 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
int type = extract32(insn, 22, 2);
bool sbit = extract32(insn, 29, 1);
bool sf = extract32(insn, 31, 1);
+ bool itof = false;
if (sbit) {
- unallocated_encoding(s);
- return;
+ goto do_unallocated;
}
- if (opcode > 5) {
- /* FMOV */
- bool itof = opcode & 1;
-
- if (rmode >= 2) {
- unallocated_encoding(s);
- return;
+ switch (opcode) {
+ case 2: /* SCVTF */
+ case 3: /* UCVTF */
+ itof = true;
+ /* fallthru */
+ case 4: /* FCVTAS */
+ case 5: /* FCVTAU */
+ if (rmode != 0) {
+ goto do_unallocated;
}
-
- switch (sf << 3 | type << 1 | rmode) {
- case 0x0: /* 32 bit */
- case 0xa: /* 64 bit */
- case 0xd: /* 64 bit to top half of quad */
+ /* fallthru */
+ case 0: /* FCVT[NPMZ]S */
+ case 1: /* FCVT[NPMZ]U */
+ switch (type) {
+ case 0: /* float32 */
+ case 1: /* float64 */
break;
- case 0x6: /* 16-bit float, 32-bit int */
- case 0xe: /* 16-bit float, 64-bit int */
- if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
- break;
+ case 3: /* float16 */
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ goto do_unallocated;
}
- /* fallthru */
+ break;
default:
- /* all other sf/type/rmode combinations are invalid */
- unallocated_encoding(s);
- return;
+ goto do_unallocated;
}
-
if (!fp_access_check(s)) {
return;
}
- handle_fmov(s, rd, rn, type, itof);
- } else {
- /* actual FP conversions */
- bool itof = extract32(opcode, 1, 1);
+ handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
+ break;
- if (rmode != 0 && opcode > 1) {
- unallocated_encoding(s);
- return;
- }
- switch (type) {
- case 0: /* float32 */
- case 1: /* float64 */
- break;
- case 3: /* float16 */
- if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
- break;
+ default:
+ switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
+ case 0b01100110: /* FMOV half <-> 32-bit int */
+ case 0b01100111:
+ case 0b11100110: /* FMOV half <-> 64-bit int */
+ case 0b11100111:
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ goto do_unallocated;
}
/* fallthru */
+ case 0b00000110: /* FMOV 32-bit */
+ case 0b00000111:
+ case 0b10100110: /* FMOV 64-bit */
+ case 0b10100111:
+ case 0b11001110: /* FMOV top half of 128-bit */
+ case 0b11001111:
+ if (!fp_access_check(s)) {
+ return;
+ }
+ itof = opcode & 1;
+ handle_fmov(s, rd, rn, type, itof);
+ break;
+
+ case 0b00111110: /* FJCVTZS */
+ if (!dc_isar_feature(aa64_jscvt, s)) {
+ goto do_unallocated;
+ } else if (fp_access_check(s)) {
+ handle_fjcvtzs(s, rd, rn);
+ }
+ break;
+
default:
+ do_unallocated:
unallocated_encoding(s);
return;
}
-
- if (!fp_access_check(s)) {
- return;
- }
- handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
+ break;
}
}
@@ -6171,8 +7186,6 @@ static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
tcg_gen_shri_i64(tcg_right, tcg_right, pos);
tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
-
- tcg_temp_free_i64(tcg_tmp);
}
/* EXT
@@ -6214,7 +7227,6 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
read_vec_element(s, tcg_resh, rm, 0, MO_64);
do_ext64(s, tcg_resh, tcg_resl, pos);
}
- tcg_gen_movi_i64(tcg_resh, 0);
} else {
TCGv_i64 tcg_hh;
typedef struct {
@@ -6238,14 +7250,14 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
tcg_hh = tcg_temp_new_i64();
read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
do_ext64(s, tcg_hh, tcg_resh, pos);
- tcg_temp_free_i64(tcg_hh);
}
}
write_vec_element(s, tcg_resl, rd, 0, MO_64);
- tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
+ if (is_q) {
+ write_vec_element(s, tcg_resh, rd, 1, MO_64);
+ }
+ clear_vec_high(s, is_q, rd);
}
/* TBL/TBX
@@ -6261,10 +7273,8 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- int is_tblx = extract32(insn, 12, 1);
- int len = extract32(insn, 13, 2);
- TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
- TCGv_i32 tcg_regno, tcg_numregs;
+ int is_tbx = extract32(insn, 12, 1);
+ int len = (extract32(insn, 13, 2) + 1) * 16;
if (op2 != 0) {
unallocated_encoding(s);
@@ -6275,45 +7285,11 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
return;
}
- /* This does a table lookup: for every byte element in the input
- * we index into a table formed from up to four vector registers,
- * and then the output is the result of the lookups. Our helper
- * function does the lookup operation for a single 64 bit part of
- * the input.
- */
- tcg_resl = tcg_temp_new_i64();
- tcg_resh = tcg_temp_new_i64();
-
- if (is_tblx) {
- read_vec_element(s, tcg_resl, rd, 0, MO_64);
- } else {
- tcg_gen_movi_i64(tcg_resl, 0);
- }
- if (is_tblx && is_q) {
- read_vec_element(s, tcg_resh, rd, 1, MO_64);
- } else {
- tcg_gen_movi_i64(tcg_resh, 0);
- }
-
- tcg_idx = tcg_temp_new_i64();
- tcg_regno = tcg_const_i32(rn);
- tcg_numregs = tcg_const_i32(len + 1);
- read_vec_element(s, tcg_idx, rm, 0, MO_64);
- gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
- tcg_regno, tcg_numregs);
- if (is_q) {
- read_vec_element(s, tcg_idx, rm, 1, MO_64);
- gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
- tcg_regno, tcg_numregs);
- }
- tcg_temp_free_i64(tcg_idx);
- tcg_temp_free_i32(tcg_regno);
- tcg_temp_free_i32(tcg_numregs);
-
- write_vec_element(s, tcg_resl, rd, 0, MO_64);
- tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rm), tcg_env,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ (len << 6) | (is_tbx << 5) | rn,
+ gen_helper_simd_tblx);
}
/* ZIP/UZP/TRN
@@ -6335,10 +7311,10 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
bool part = extract32(insn, 14, 1);
bool is_q = extract32(insn, 30, 1);
int esize = 8 << size;
- int i, ofs;
+ int i;
int datasize = is_q ? 128 : 64;
int elements = datasize / esize;
- TCGv_i64 tcg_res, tcg_resl, tcg_resh;
+ TCGv_i64 tcg_res[2], tcg_ele;
if (opcode == 0 || (size == 3 && !is_q)) {
unallocated_encoding(s);
@@ -6349,37 +7325,39 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
return;
}
- tcg_resl = tcg_const_i64(0);
- tcg_resh = tcg_const_i64(0);
- tcg_res = tcg_temp_new_i64();
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
+ tcg_ele = tcg_temp_new_i64();
for (i = 0; i < elements; i++) {
+ int o, w;
+
switch (opcode) {
case 1: /* UZP1/2 */
{
int midpoint = elements / 2;
if (i < midpoint) {
- read_vec_element(s, tcg_res, rn, 2 * i + part, size);
+ read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
} else {
- read_vec_element(s, tcg_res, rm,
+ read_vec_element(s, tcg_ele, rm,
2 * (i - midpoint) + part, size);
}
break;
}
case 2: /* TRN1/2 */
if (i & 1) {
- read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
+ read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
} else {
- read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
+ read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
}
break;
case 3: /* ZIP1/2 */
{
int base = part * elements / 2;
if (i & 1) {
- read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
+ read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
} else {
- read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
+ read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
}
break;
}
@@ -6387,22 +7365,20 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
g_assert_not_reached();
}
- ofs = i * esize;
- if (ofs < 64) {
- tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
- tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
+ w = (i * esize) / 64;
+ o = (i * esize) % 64;
+ if (o == 0) {
+ tcg_gen_mov_i64(tcg_res[w], tcg_ele);
} else {
- tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
- tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
+ tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
+ tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
}
}
- tcg_temp_free_i64(tcg_res);
-
- write_vec_element(s, tcg_resl, rd, 0, MO_64);
- tcg_temp_free_i64(tcg_resl);
- write_vec_element(s, tcg_resh, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_resh);
+ for (i = 0; i <= is_q; ++i) {
+ write_vec_element(s, tcg_res[i], rd, i, MO_64);
+ }
+ clear_vec_high(s, is_q, rd);
}
/*
@@ -6420,7 +7396,7 @@ static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
{
if (esize == size) {
int element;
- TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
+ MemOp msize = esize == 16 ? MO_16 : MO_32;
TCGv_i32 tcg_elem;
/* We should have one register left here */
@@ -6470,9 +7446,6 @@ static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
default:
g_assert_not_reached();
}
-
- tcg_temp_free_i32(tcg_hi);
- tcg_temp_free_i32(tcg_lo);
return tcg_res;
}
}
@@ -6522,7 +7495,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
*/
is_min = extract32(size, 1, 1);
is_fp = true;
- if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!is_u && dc_isar_feature(aa64_fp16, s)) {
size = 1;
} else if (!is_u || !is_q || extract32(size, 0, 1)) {
unallocated_encoding(s);
@@ -6594,18 +7567,14 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
* Note that correct NaN propagation requires that we do these
* operations in exactly the order specified by the pseudocode.
*/
- TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
+ TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
int fpopcode = opcode | is_min << 4 | is_u << 5;
int vmap = (1 << elements) - 1;
TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
(is_q ? 128 : 64), vmap, fpst);
tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
- tcg_temp_free_i32(tcg_res32);
- tcg_temp_free_ptr(fpst);
}
- tcg_temp_free_i64(tcg_elt);
-
/* Now truncate the result to the width required for the final output */
if (opcode == 0x03) {
/* SADDLV, UADDLV: result is 2*esize */
@@ -6629,7 +7598,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
}
write_fp_dreg(s, rd, tcg_res);
- tcg_temp_free_i64(tcg_res);
}
/* DUP (Element, Vector)
@@ -6645,7 +7613,7 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
int imm5)
{
int size = ctz32(imm5);
- int index = imm5 >> (size + 1);
+ int index;
if (size > 3 || (size == 3 && !is_q)) {
unallocated_encoding(s);
@@ -6656,6 +7624,7 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
return;
}
+ index = imm5 >> (size + 1);
tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
vec_reg_offset(s, rn, index, size),
is_q ? 16 : 8, vec_full_reg_size(s));
@@ -6691,7 +7660,6 @@ static void handle_simd_dupes(DisasContext *s, int rd, int rn,
tmp = tcg_temp_new_i64();
read_vec_element(s, tmp, rn, index, size);
write_fp_dreg(s, rd, tmp);
- tcg_temp_free_i64(tmp);
}
/* DUP (General)
@@ -6759,7 +7727,8 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn,
read_vec_element(s, tmp, rn, src_index, size);
write_vec_element(s, tmp, rd, dst_index, size);
- tcg_temp_free_i64(tmp);
+ /* INS is considered a 128-bit write for SVE. */
+ clear_vec_high(s, true, rd);
}
@@ -6789,6 +7758,9 @@ static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
idx = extract32(imm5, 1 + size, 4 - size);
write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
+
+ /* INS is considered a 128-bit write for SVE. */
+ clear_vec_high(s, true, rd);
}
/*
@@ -6908,8 +7880,6 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
{
int rd = extract32(insn, 0, 5);
int cmode = extract32(insn, 12, 4);
- int cmode_3_1 = extract32(cmode, 1, 3);
- int cmode_0 = extract32(cmode, 0, 1);
int o2 = extract32(insn, 11, 1);
uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
bool is_neg = extract32(insn, 29, 1);
@@ -6918,7 +7888,7 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
/* Check for FMOV (vector, immediate) - half-precision */
- if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) {
+ if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
unallocated_encoding(s);
return;
}
@@ -6928,90 +7898,19 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
return;
}
- /* See AdvSIMDExpandImm() in ARM ARM */
- switch (cmode_3_1) {
- case 0: /* Replicate(Zeros(24):imm8, 2) */
- case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
- case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
- case 3: /* Replicate(imm8:Zeros(24), 2) */
- {
- int shift = cmode_3_1 * 8;
- imm = bitfield_replicate(abcdefgh << shift, 32);
- break;
- }
- case 4: /* Replicate(Zeros(8):imm8, 4) */
- case 5: /* Replicate(imm8:Zeros(8), 4) */
- {
- int shift = (cmode_3_1 & 0x1) * 8;
- imm = bitfield_replicate(abcdefgh << shift, 16);
- break;
- }
- case 6:
- if (cmode_0) {
- /* Replicate(Zeros(8):imm8:Ones(16), 2) */
- imm = (abcdefgh << 16) | 0xffff;
- } else {
- /* Replicate(Zeros(16):imm8:Ones(8), 2) */
- imm = (abcdefgh << 8) | 0xff;
- }
- imm = bitfield_replicate(imm, 32);
- break;
- case 7:
- if (!cmode_0 && !is_neg) {
- imm = bitfield_replicate(abcdefgh, 8);
- } else if (!cmode_0 && is_neg) {
- int i;
- imm = 0;
- for (i = 0; i < 8; i++) {
- if ((abcdefgh) & (1 << i)) {
- imm |= 0xffULL << (i * 8);
- }
- }
- } else if (cmode_0) {
- if (is_neg) {
- imm = (abcdefgh & 0x3f) << 48;
- if (abcdefgh & 0x80) {
- imm |= 0x8000000000000000ULL;
- }
- if (abcdefgh & 0x40) {
- imm |= 0x3fc0000000000000ULL;
- } else {
- imm |= 0x4000000000000000ULL;
- }
- } else {
- if (o2) {
- /* FMOV (vector, immediate) - half-precision */
- imm = vfp_expand_imm(MO_16, abcdefgh);
- /* now duplicate across the lanes */
- imm = bitfield_replicate(imm, 16);
- } else {
- imm = (abcdefgh & 0x3f) << 19;
- if (abcdefgh & 0x80) {
- imm |= 0x80000000;
- }
- if (abcdefgh & 0x40) {
- imm |= 0x3e000000;
- } else {
- imm |= 0x40000000;
- }
- imm |= (imm << 32);
- }
- }
- }
- break;
- default:
- fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
- g_assert_not_reached();
- }
-
- if (cmode_3_1 != 7 && is_neg) {
- imm = ~imm;
+ if (cmode == 15 && o2 && !is_neg) {
+ /* FMOV (vector, immediate) - half-precision */
+ imm = vfp_expand_imm(MO_16, abcdefgh);
+ /* now duplicate across the lanes */
+ imm = dup_const(MO_16, imm);
+ } else {
+ imm = asimd_imm_const(abcdefgh, cmode, is_neg);
}
if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
/* MOVI or MVNI, with MVNI negation handled above. */
- tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
- vec_full_reg_size(s), imm);
+ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
+ vec_full_reg_size(s), imm);
} else {
/* ORR or BIC, with BIC negation to AND handled above. */
if (is_neg) {
@@ -7085,7 +7984,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
case 0x2f: /* FMINP */
/* FP op, size[0] is 32 or 64 bit*/
if (!u) {
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
return;
} else {
@@ -7099,7 +7998,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
return;
}
- fpst = get_fpstatus_ptr(size == MO_16);
+ fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
break;
default:
unallocated_encoding(s);
@@ -7138,10 +8037,6 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
}
write_fp_dreg(s, rd, tcg_res);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res);
} else {
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
@@ -7193,14 +8088,6 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
}
write_fp_sreg(s, rd, tcg_res);
-
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
- tcg_temp_free_i32(tcg_res);
- }
-
- if (fpst) {
- tcg_temp_free_ptr(fpst);
}
}
@@ -7234,7 +8121,7 @@ static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
/* Deal with the rounding step */
if (round) {
if (extended_result) {
- TCGv_i64 tcg_zero = tcg_const_i64(0);
+ TCGv_i64 tcg_zero = tcg_constant_i64(0);
if (!is_u) {
/* take care of sign extending tcg_res */
tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
@@ -7246,7 +8133,6 @@ static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
tcg_src, tcg_zero,
tcg_rnd, tcg_zero);
}
- tcg_temp_free_i64(tcg_zero);
} else {
tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
}
@@ -7286,10 +8172,6 @@ static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
} else {
tcg_gen_mov_i64(tcg_res, tcg_src);
}
-
- if (extended_result) {
- tcg_temp_free_i64(tcg_src_hi);
- }
}
/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
@@ -7332,8 +8214,7 @@ static void handle_scalar_simd_shri(DisasContext *s,
}
if (round) {
- uint64_t round_const = 1ULL << (shift - 1);
- tcg_round = tcg_const_i64(round_const);
+ tcg_round = tcg_constant_i64(1ULL << (shift - 1));
} else {
tcg_round = NULL;
}
@@ -7356,12 +8237,6 @@ static void handle_scalar_simd_shri(DisasContext *s,
}
write_fp_dreg(s, rd, tcg_rd);
-
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rd);
- if (round) {
- tcg_temp_free_i64(tcg_round);
- }
}
/* SHL/SLI - Scalar shift left */
@@ -7372,8 +8247,8 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert,
int size = 32 - clz32(immh) - 1;
int immhb = immh << 3 | immb;
int shift = immhb - (8 << size);
- TCGv_i64 tcg_rn = new_tmp_a64(s);
- TCGv_i64 tcg_rd = new_tmp_a64(s);
+ TCGv_i64 tcg_rn;
+ TCGv_i64 tcg_rd;
if (!extract32(immh, 3, 1)) {
unallocated_encoding(s);
@@ -7394,9 +8269,6 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert,
}
write_fp_dreg(s, rd, tcg_rd);
-
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rd);
}
/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
@@ -7412,7 +8284,7 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
int shift = (2 * esize) - immhb;
int elements = is_scalar ? 1 : (64 / esize);
bool round = extract32(opcode, 0, 1);
- TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
+ MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
TCGv_i64 tcg_rn, tcg_rd, tcg_round;
TCGv_i32 tcg_rd_narrowed;
TCGv_i64 tcg_final;
@@ -7456,11 +8328,10 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
tcg_rn = tcg_temp_new_i64();
tcg_rd = tcg_temp_new_i64();
tcg_rd_narrowed = tcg_temp_new_i32();
- tcg_final = tcg_const_i64(0);
+ tcg_final = tcg_temp_new_i64();
if (round) {
- uint64_t round_const = 1ULL << (shift - 1);
- tcg_round = tcg_const_i64(round_const);
+ tcg_round = tcg_constant_i64(1ULL << (shift - 1));
} else {
tcg_round = NULL;
}
@@ -7469,9 +8340,13 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
read_vec_element(s, tcg_rn, rn, i, ldop);
handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
false, is_u_shift, size+1, shift);
- narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
+ narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
- tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
+ if (i == 0) {
+ tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize);
+ } else {
+ tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
+ }
}
if (!is_q) {
@@ -7479,15 +8354,6 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
} else {
write_vec_element(s, tcg_final, rd, 1, MO_64);
}
-
- if (round) {
- tcg_temp_free_i64(tcg_round);
- }
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rd);
- tcg_temp_free_i32(tcg_rd_narrowed);
- tcg_temp_free_i64(tcg_final);
-
clear_vec_high(s, is_q, rd);
}
@@ -7534,7 +8400,7 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
}
if (size == 3) {
- TCGv_i64 tcg_shift = tcg_const_i64(shift);
+ TCGv_i64 tcg_shift = tcg_constant_i64(shift);
static NeonGenTwo64OpEnvFn * const fns[2][2] = {
{ gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
{ NULL, gen_helper_neon_qshl_u64 },
@@ -7546,15 +8412,12 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
TCGv_i64 tcg_op = tcg_temp_new_i64();
read_vec_element(s, tcg_op, rn, pass, MO_64);
- genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
+ genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
write_vec_element(s, tcg_op, rd, pass, MO_64);
-
- tcg_temp_free_i64(tcg_op);
}
- tcg_temp_free_i64(tcg_shift);
clear_vec_high(s, is_q, rd);
} else {
- TCGv_i32 tcg_shift = tcg_const_i32(shift);
+ TCGv_i32 tcg_shift = tcg_constant_i32(shift);
static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
{
{ gen_helper_neon_qshl_s8,
@@ -7571,14 +8434,14 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
}
};
NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
- TCGMemOp memop = scalar ? size : MO_32;
+ MemOp memop = scalar ? size : MO_32;
int maxpass = scalar ? 1 : is_q ? 4 : 2;
for (pass = 0; pass < maxpass; pass++) {
TCGv_i32 tcg_op = tcg_temp_new_i32();
read_vec_element_i32(s, tcg_op, rn, pass, memop);
- genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
+ genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
if (scalar) {
switch (size) {
case 0:
@@ -7596,10 +8459,7 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
} else {
write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
}
-
- tcg_temp_free_i32(tcg_op);
}
- tcg_temp_free_i32(tcg_shift);
if (!scalar) {
clear_vec_high(s, is_q, rd);
@@ -7612,14 +8472,14 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
int elements, int is_signed,
int fracbits, int size)
{
- TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
+ TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
TCGv_i32 tcg_shift = NULL;
- TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
+ MemOp mop = size | (is_signed ? MO_SIGN : 0);
int pass;
if (fracbits || size == MO_64) {
- tcg_shift = tcg_const_i32(fracbits);
+ tcg_shift = tcg_constant_i32(fracbits);
}
if (size == MO_64) {
@@ -7642,10 +8502,6 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
write_vec_element(s, tcg_double, rd, pass, MO_64);
}
}
-
- tcg_temp_free_i64(tcg_int64);
- tcg_temp_free_i64(tcg_double);
-
} else {
TCGv_i32 tcg_int32 = tcg_temp_new_i32();
TCGv_i32 tcg_float = tcg_temp_new_i32();
@@ -7698,14 +8554,6 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
write_vec_element_i32(s, tcg_float, rd, pass, size);
}
}
-
- tcg_temp_free_i32(tcg_int32);
- tcg_temp_free_i32(tcg_float);
- }
-
- tcg_temp_free_ptr(tcg_fpst);
- if (tcg_shift) {
- tcg_temp_free_i32(tcg_shift);
}
clear_vec_high(s, elements << size == 16, rd);
@@ -7730,7 +8578,7 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
size = MO_32;
} else if (immh & 2) {
size = MO_16;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
return;
}
@@ -7775,7 +8623,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
size = MO_32;
} else if (immh & 0x2) {
size = MO_16;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
return;
}
@@ -7792,11 +8640,10 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
assert(!(is_scalar && is_q));
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
- tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
fracbits = (16 << size) - immhb;
- tcg_shift = tcg_const_i32(fracbits);
+ tcg_shift = tcg_constant_i32(fracbits);
if (size == MO_64) {
int maxpass = is_scalar ? 1 : 2;
@@ -7811,7 +8658,6 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
}
write_vec_element(s, tcg_op, rd, pass, MO_64);
- tcg_temp_free_i64(tcg_op);
}
clear_vec_high(s, is_q, rd);
} else {
@@ -7847,17 +8693,13 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
} else {
write_vec_element_i32(s, tcg_op, rd, pass, size);
}
- tcg_temp_free_i32(tcg_op);
}
if (!is_scalar) {
clear_vec_high(s, is_q, rd);
}
}
- tcg_temp_free_ptr(tcg_fpstatus);
- tcg_temp_free_i32(tcg_shift);
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
- tcg_temp_free_i32(tcg_rmode);
+ gen_restore_rmode(tcg_rmode, tcg_fpstatus);
}
/* AdvSIMD scalar shift by immediate
@@ -7982,7 +8824,7 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
+ gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res);
switch (opcode) {
case 0xd: /* SQDMULL, SQDMULL2 */
@@ -7992,7 +8834,7 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
/* fall through */
case 0x9: /* SQDMLAL, SQDMLAL2 */
read_vec_element(s, tcg_op1, rd, 0, MO_64);
- gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
+ gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env,
tcg_res, tcg_op1);
break;
default:
@@ -8000,17 +8842,13 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
}
write_fp_dreg(s, rd, tcg_res);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res);
} else {
TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
TCGv_i64 tcg_res = tcg_temp_new_i64();
gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
+ gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res);
switch (opcode) {
case 0xd: /* SQDMULL, SQDMULL2 */
@@ -8022,9 +8860,8 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
{
TCGv_i64 tcg_op3 = tcg_temp_new_i64();
read_vec_element(s, tcg_op3, rd, 0, MO_32);
- gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
+ gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env,
tcg_res, tcg_op3);
- tcg_temp_free_i64(tcg_op3);
break;
}
default:
@@ -8033,35 +8870,9 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
tcg_gen_ext32u_i64(tcg_res, tcg_res);
write_fp_dreg(s, rd, tcg_res);
-
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
- tcg_temp_free_i64(tcg_res);
}
}
-/* CMTST : test is "if (X & Y != 0)". */
-static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- tcg_gen_and_i32(d, a, b);
- tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- tcg_gen_and_i64(d, a, b);
- tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
- tcg_gen_neg_i64(d, d);
-}
-
-static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- tcg_gen_and_vec(vece, d, a, b);
- tcg_gen_dupi_vec(vece, a, 0);
- tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
-}
-
static void handle_3same_64(DisasContext *s, int opcode, bool u,
TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
{
@@ -8075,26 +8886,23 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
switch (opcode) {
case 0x1: /* SQADD */
if (u) {
- gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
} else {
- gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
}
break;
case 0x5: /* SQSUB */
if (u) {
- gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
} else {
- gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
}
break;
case 0x6: /* CMGT, CMHI */
- /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
- * We implement this using setcond (test) and then negating.
- */
cond = u ? TCG_COND_GTU : TCG_COND_GT;
do_cmop:
- tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
- tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ /* 64 bit integer comparison, result = test ? -1 : 0. */
+ tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
break;
case 0x7: /* CMGE, CMHS */
cond = u ? TCG_COND_GEU : TCG_COND_GE;
@@ -8108,16 +8916,16 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
break;
case 0x8: /* SSHL, USHL */
if (u) {
- gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
+ gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
} else {
- gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
+ gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
}
break;
case 0x9: /* SQSHL, UQSHL */
if (u) {
- gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
} else {
- gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
}
break;
case 0xa: /* SRSHL, URSHL */
@@ -8129,9 +8937,9 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u,
break;
case 0xb: /* SQRSHL, UQRSHL */
if (u) {
- gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
} else {
- gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm);
}
break;
case 0x10: /* ADD, SUB */
@@ -8154,7 +8962,7 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
int fpopcode, int rd, int rn, int rm)
{
int pass;
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
for (pass = 0; pass < elements; pass++) {
if (size) {
@@ -8233,10 +9041,6 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
-
- tcg_temp_free_i64(tcg_res);
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
} else {
/* Single */
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
@@ -8318,19 +9122,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
write_vec_element(s, tcg_tmp, rd, pass, MO_64);
- tcg_temp_free_i64(tcg_tmp);
} else {
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
}
-
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
}
}
- tcg_temp_free_ptr(fpst);
-
clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
}
@@ -8416,8 +9213,6 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rm);
} else {
/* Do a single operation on the lowest element in the vector.
* We use the standard Neon helpers and rely on 0 OP 0 == 0 with
@@ -8488,16 +9283,11 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
g_assert_not_reached();
}
- genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
+ genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm);
tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
- tcg_temp_free_i32(tcg_rd32);
- tcg_temp_free_i32(tcg_rn);
- tcg_temp_free_i32(tcg_rm);
}
write_fp_dreg(s, rd, tcg_rd);
-
- tcg_temp_free_i64(tcg_rd);
}
/* AdvSIMD scalar three same FP16
@@ -8539,7 +9329,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
return;
}
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
}
@@ -8547,7 +9337,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
return;
}
- fpst = get_fpstatus_ptr(true);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
tcg_op1 = read_fp_hreg(s, rn);
tcg_op2 = read_fp_hreg(s, rm);
@@ -8587,12 +9377,6 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
}
write_fp_sreg(s, rd, tcg_res);
-
-
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
- tcg_temp_free_ptr(fpst);
}
/* AdvSIMD scalar three same extra
@@ -8612,7 +9396,7 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
bool u = extract32(insn, 29, 1);
TCGv_i32 ele1, ele2, ele3;
TCGv_i64 res;
- int feature;
+ bool feature;
switch (u * 16 + opcode) {
case 0x10: /* SQRDMLAH (vector) */
@@ -8621,13 +9405,13 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
unallocated_encoding(s);
return;
}
- feature = ARM_FEATURE_V8_RDM;
+ feature = dc_isar_feature(aa64_rdm, s);
break;
default:
unallocated_encoding(s);
return;
}
- if (!arm_dc_feature(s, feature)) {
+ if (!feature) {
unallocated_encoding(s);
return;
}
@@ -8652,30 +9436,25 @@ static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
switch (opcode) {
case 0x0: /* SQRDMLAH */
if (size == 1) {
- gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
+ gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3);
} else {
- gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
+ gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3);
}
break;
case 0x1: /* SQRDMLSH */
if (size == 1) {
- gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
+ gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3);
} else {
- gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
+ gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3);
}
break;
default:
g_assert_not_reached();
}
- tcg_temp_free_i32(ele1);
- tcg_temp_free_i32(ele2);
res = tcg_temp_new_i64();
tcg_gen_extu_i32_i64(res, ele3);
- tcg_temp_free_i32(ele3);
-
write_fp_dreg(s, rd, res);
- tcg_temp_free_i64(res);
}
static void handle_2misc_64(DisasContext *s, int opcode, bool u,
@@ -8706,20 +9485,16 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
break;
case 0x7: /* SQABS, SQNEG */
if (u) {
- gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
+ gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
} else {
- gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
+ gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
}
break;
case 0xa: /* CMLT */
- /* 64 bit integer comparison against zero, result is
- * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
- * subtracting 1.
- */
cond = TCG_COND_LT;
do_cmop:
- tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
- tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
+ tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
break;
case 0x8: /* CMGT, CMGE */
cond = u ? TCG_COND_GE : TCG_COND_GT;
@@ -8731,11 +9506,7 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
if (u) {
tcg_gen_neg_i64(tcg_rd, tcg_rn);
} else {
- TCGv_i64 tcg_zero = tcg_const_i64(0);
- tcg_gen_neg_i64(tcg_rd, tcg_rn);
- tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
- tcg_rn, tcg_rd);
- tcg_temp_free_i64(tcg_zero);
+ tcg_gen_abs_i64(tcg_rd, tcg_rn);
}
break;
case 0x2f: /* FABS */
@@ -8745,30 +9516,22 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
gen_helper_vfp_negd(tcg_rd, tcg_rn);
break;
case 0x7f: /* FSQRT */
- gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
+ gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
break;
case 0x1a: /* FCVTNS */
case 0x1b: /* FCVTMS */
case 0x1c: /* FCVTAS */
case 0x3a: /* FCVTPS */
case 0x3b: /* FCVTZS */
- {
- TCGv_i32 tcg_shift = tcg_const_i32(0);
- gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
- tcg_temp_free_i32(tcg_shift);
+ gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
break;
- }
case 0x5a: /* FCVTNU */
case 0x5b: /* FCVTMU */
case 0x5c: /* FCVTAU */
case 0x7a: /* FCVTPU */
case 0x7b: /* FCVTZU */
- {
- TCGv_i32 tcg_shift = tcg_const_i32(0);
- gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
- tcg_temp_free_i32(tcg_shift);
+ gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
break;
- }
case 0x18: /* FRINTN */
case 0x19: /* FRINTM */
case 0x38: /* FRINTP */
@@ -8780,6 +9543,14 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
case 0x59: /* FRINTX */
gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
break;
+ case 0x1e: /* FRINT32Z */
+ case 0x5e: /* FRINT32X */
+ gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
+ break;
+ case 0x1f: /* FRINT64Z */
+ case 0x5f: /* FRINT64X */
+ gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
+ break;
default:
g_assert_not_reached();
}
@@ -8796,13 +9567,13 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
return;
}
- fpst = get_fpstatus_ptr(size == MO_16);
+ fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
if (is_double) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
- TCGv_i64 tcg_zero = tcg_const_i64(0);
+ TCGv_i64 tcg_zero = tcg_constant_i64(0);
TCGv_i64 tcg_res = tcg_temp_new_i64();
- NeonGenTwoDoubleOPFn *genfn;
+ NeonGenTwoDoubleOpFn *genfn;
bool swap = false;
int pass;
@@ -8835,16 +9606,13 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
}
- tcg_temp_free_i64(tcg_res);
- tcg_temp_free_i64(tcg_zero);
- tcg_temp_free_i64(tcg_op);
clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_op = tcg_temp_new_i32();
- TCGv_i32 tcg_zero = tcg_const_i32(0);
+ TCGv_i32 tcg_zero = tcg_constant_i32(0);
TCGv_i32 tcg_res = tcg_temp_new_i32();
- NeonGenTwoSingleOPFn *genfn;
+ NeonGenTwoSingleOpFn *genfn;
bool swap = false;
int pass, maxpasses;
@@ -8910,15 +9678,11 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
write_vec_element_i32(s, tcg_res, rd, pass, size);
}
}
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_zero);
- tcg_temp_free_i32(tcg_op);
+
if (!is_scalar) {
clear_vec_high(s, is_q, rd);
}
}
-
- tcg_temp_free_ptr(fpst);
}
static void handle_2misc_reciprocal(DisasContext *s, int opcode,
@@ -8926,7 +9690,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
int size, int rn, int rd)
{
bool is_double = (size == 3);
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
if (is_double) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
@@ -8950,8 +9714,6 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
}
- tcg_temp_free_i64(tcg_res);
- tcg_temp_free_i64(tcg_op);
clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_op = tcg_temp_new_i32();
@@ -8969,7 +9731,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
switch (opcode) {
case 0x3c: /* URECPE */
- gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
+ gen_helper_recpe_u32(tcg_res, tcg_op);
break;
case 0x3d: /* FRECPE */
gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
@@ -8990,13 +9752,10 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
}
}
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_op);
if (!is_scalar) {
clear_vec_high(s, is_q, rd);
}
}
- tcg_temp_free_ptr(fpst);
}
static void handle_2misc_narrow(DisasContext *s, bool scalar,
@@ -9012,7 +9771,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
int passes = scalar ? 1 : 2;
if (scalar) {
- tcg_res[1] = tcg_const_i32(0);
+ tcg_res[1] = tcg_constant_i32(0);
}
for (pass = 0; pass < passes; pass++) {
@@ -9063,21 +9822,23 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
case 0x16: /* FCVTN, FCVTN2 */
/* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
if (size == 2) {
- gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
+ gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env);
} else {
TCGv_i32 tcg_lo = tcg_temp_new_i32();
TCGv_i32 tcg_hi = tcg_temp_new_i32();
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
TCGv_i32 ahp = get_ahp_flag();
tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
- tcg_temp_free_i32(tcg_lo);
- tcg_temp_free_i32(tcg_hi);
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(ahp);
+ }
+ break;
+ case 0x36: /* BFCVTN, BFCVTN2 */
+ {
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
+ gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
}
break;
case 0x56: /* FCVTXN, FCVTXN2 */
@@ -9085,7 +9846,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
* with von Neumann rounding (round to odd)
*/
assert(size == 2);
- gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
+ gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env);
break;
default:
g_assert_not_reached();
@@ -9094,15 +9855,12 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
if (genfn) {
genfn(tcg_res[pass], tcg_op);
} else if (genenvfn) {
- genenvfn(tcg_res[pass], cpu_env, tcg_op);
+ genenvfn(tcg_res[pass], tcg_env, tcg_op);
}
-
- tcg_temp_free_i64(tcg_op);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
- tcg_temp_free_i32(tcg_res[pass]);
}
clear_vec_high(s, is_q, rd);
}
@@ -9123,14 +9881,12 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
read_vec_element(s, tcg_rd, rd, pass, MO_64);
if (is_u) { /* USQADD */
- gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
} else { /* SUQADD */
- gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd);
}
write_vec_element(s, tcg_rd, rd, pass, MO_64);
}
- tcg_temp_free_i64(tcg_rd);
- tcg_temp_free_i64(tcg_rn);
clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_rn = tcg_temp_new_i32();
@@ -9155,13 +9911,13 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
if (is_u) { /* USQADD */
switch (size) {
case 0:
- gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
break;
case 1:
- gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
break;
case 2:
- gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
break;
default:
g_assert_not_reached();
@@ -9169,13 +9925,13 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
} else { /* SUQADD */
switch (size) {
case 0:
- gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd);
break;
case 1:
- gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd);
break;
case 2:
- gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd);
break;
default:
g_assert_not_reached();
@@ -9183,14 +9939,10 @@ static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
}
if (is_scalar) {
- TCGv_i64 tcg_zero = tcg_const_i64(0);
- write_vec_element(s, tcg_zero, rd, 0, MO_64);
- tcg_temp_free_i64(tcg_zero);
+ write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
}
write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
}
- tcg_temp_free_i32(tcg_rd);
- tcg_temp_free_i32(tcg_rn);
clear_vec_high(s, is_q, rd);
}
}
@@ -9328,12 +10080,11 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
}
if (is_fcvt) {
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- tcg_fpstatus = get_fpstatus_ptr(false);
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
+ tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
} else {
- tcg_rmode = NULL;
tcg_fpstatus = NULL;
+ tcg_rmode = NULL;
}
if (size == 3) {
@@ -9342,8 +10093,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
write_fp_dreg(s, rd, tcg_rd);
- tcg_temp_free_i64(tcg_rd);
- tcg_temp_free_i64(tcg_rn);
} else {
TCGv_i32 tcg_rn = tcg_temp_new_i32();
TCGv_i32 tcg_rd = tcg_temp_new_i32();
@@ -9360,7 +10109,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
{ gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
};
genfn = fns[size][u];
- genfn(tcg_rd, cpu_env, tcg_rn);
+ genfn(tcg_rd, tcg_env, tcg_rn);
break;
}
case 0x1a: /* FCVTNS */
@@ -9368,237 +10117,37 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x1c: /* FCVTAS */
case 0x3a: /* FCVTPS */
case 0x3b: /* FCVTZS */
- {
- TCGv_i32 tcg_shift = tcg_const_i32(0);
- gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
- tcg_temp_free_i32(tcg_shift);
+ gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
+ tcg_fpstatus);
break;
- }
case 0x5a: /* FCVTNU */
case 0x5b: /* FCVTMU */
case 0x5c: /* FCVTAU */
case 0x7a: /* FCVTPU */
case 0x7b: /* FCVTZU */
- {
- TCGv_i32 tcg_shift = tcg_const_i32(0);
- gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
- tcg_temp_free_i32(tcg_shift);
+ gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
+ tcg_fpstatus);
break;
- }
default:
g_assert_not_reached();
}
write_fp_sreg(s, rd, tcg_rd);
- tcg_temp_free_i32(tcg_rd);
- tcg_temp_free_i32(tcg_rn);
}
if (is_fcvt) {
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
- tcg_temp_free_i32(tcg_rmode);
- tcg_temp_free_ptr(tcg_fpstatus);
+ gen_restore_rmode(tcg_rmode, tcg_fpstatus);
}
}
-static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_vec_sar8i_i64(a, a, shift);
- tcg_gen_vec_add8_i64(d, d, a);
-}
-
-static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_vec_sar16i_i64(a, a, shift);
- tcg_gen_vec_add16_i64(d, d, a);
-}
-
-static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
- tcg_gen_sari_i32(a, a, shift);
- tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_sari_i64(a, a, shift);
- tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- tcg_gen_sari_vec(vece, a, a, sh);
- tcg_gen_add_vec(vece, d, d, a);
-}
-
-static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_vec_shr8i_i64(a, a, shift);
- tcg_gen_vec_add8_i64(d, d, a);
-}
-
-static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_vec_shr16i_i64(a, a, shift);
- tcg_gen_vec_add16_i64(d, d, a);
-}
-
-static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
- tcg_gen_shri_i32(a, a, shift);
- tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_shri_i64(a, a, shift);
- tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- tcg_gen_shri_vec(vece, a, a, sh);
- tcg_gen_add_vec(vece, d, d, a);
-}
-
-static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- uint64_t mask = dup_const(MO_8, 0xff >> shift);
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shri_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
- tcg_temp_free_i64(t);
-}
-
-static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- uint64_t mask = dup_const(MO_16, 0xffff >> shift);
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shri_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
- tcg_temp_free_i64(t);
-}
-
-static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
- tcg_gen_shri_i32(a, a, shift);
- tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
-}
-
-static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_shri_i64(a, a, shift);
- tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
-}
-
-static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- uint64_t mask = (2ull << ((8 << vece) - 1)) - 1;
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh));
- tcg_gen_shri_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
-}
-
/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
int immh, int immb, int opcode, int rn, int rd)
{
- static const GVecGen2i ssra_op[4] = {
- { .fni8 = gen_ssra8_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opc = INDEX_op_sari_vec,
- .vece = MO_8 },
- { .fni8 = gen_ssra16_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opc = INDEX_op_sari_vec,
- .vece = MO_16 },
- { .fni4 = gen_ssra32_i32,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opc = INDEX_op_sari_vec,
- .vece = MO_32 },
- { .fni8 = gen_ssra64_i64,
- .fniv = gen_ssra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opc = INDEX_op_sari_vec,
- .vece = MO_64 },
- };
- static const GVecGen2i usra_op[4] = {
- { .fni8 = gen_usra8_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opc = INDEX_op_shri_vec,
- .vece = MO_8, },
- { .fni8 = gen_usra16_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opc = INDEX_op_shri_vec,
- .vece = MO_16, },
- { .fni4 = gen_usra32_i32,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opc = INDEX_op_shri_vec,
- .vece = MO_32, },
- { .fni8 = gen_usra64_i64,
- .fniv = gen_usra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opc = INDEX_op_shri_vec,
- .vece = MO_64, },
- };
- static const GVecGen2i sri_op[4] = {
- { .fni8 = gen_shr8_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opc = INDEX_op_shri_vec,
- .vece = MO_8 },
- { .fni8 = gen_shr16_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opc = INDEX_op_shri_vec,
- .vece = MO_16 },
- { .fni4 = gen_shr32_ins_i32,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opc = INDEX_op_shri_vec,
- .vece = MO_32 },
- { .fni8 = gen_shr64_ins_i64,
- .fniv = gen_shr_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opc = INDEX_op_shri_vec,
- .vece = MO_64 },
- };
-
int size = 32 - clz32(immh) - 1;
int immhb = immh << 3 | immb;
int shift = 2 * (8 << size) - immhb;
- bool accumulate = false;
- int dsize = is_q ? 128 : 64;
- int esize = 8 << size;
- int elements = dsize/esize;
- TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
- TCGv_i64 tcg_rn = new_tmp_a64(s);
- TCGv_i64 tcg_rd = new_tmp_a64(s);
- TCGv_i64 tcg_round;
- uint64_t round_const;
- int i;
+ GVecGen2iFn *gvec_fn;
if (extract32(immh, 3, 1) && !is_q) {
unallocated_encoding(s);
@@ -9612,164 +10161,58 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
switch (opcode) {
case 0x02: /* SSRA / USRA (accumulate) */
- if (is_u) {
- /* Shift count same as element size produces zero to add. */
- if (shift == 8 << size) {
- goto done;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
- } else {
- /* Shift count same as element size produces all sign to add. */
- if (shift == 8 << size) {
- shift -= 1;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
- }
- return;
+ gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
+ break;
+
case 0x08: /* SRI */
- /* Shift count same as element size is valid but does nothing. */
- if (shift == 8 << size) {
- goto done;
- }
- gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
- return;
+ gvec_fn = gen_gvec_sri;
+ break;
case 0x00: /* SSHR / USHR */
if (is_u) {
if (shift == 8 << size) {
/* Shift count the same size as element size produces zero. */
- tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
- is_q ? 16 : 8, vec_full_reg_size(s), 0);
- } else {
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
+ tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s), 0);
+ return;
}
+ gvec_fn = tcg_gen_gvec_shri;
} else {
/* Shift count the same size as element size produces all sign. */
if (shift == 8 << size) {
shift -= 1;
}
- gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
+ gvec_fn = tcg_gen_gvec_sari;
}
- return;
+ break;
case 0x04: /* SRSHR / URSHR (rounding) */
+ gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
break;
+
case 0x06: /* SRSRA / URSRA (accum + rounding) */
- accumulate = true;
+ gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
break;
+
default:
g_assert_not_reached();
}
- round_const = 1ULL << (shift - 1);
- tcg_round = tcg_const_i64(round_const);
-
- for (i = 0; i < elements; i++) {
- read_vec_element(s, tcg_rn, rn, i, memop);
- if (accumulate) {
- read_vec_element(s, tcg_rd, rd, i, memop);
- }
-
- handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
- accumulate, is_u, size, shift);
-
- write_vec_element(s, tcg_rd, rd, i, size);
- }
- tcg_temp_free_i64(tcg_round);
-
- done:
- clear_vec_high(s, is_q, rd);
-}
-
-static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- uint64_t mask = dup_const(MO_8, 0xff << shift);
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shli_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
- tcg_temp_free_i64(t);
-}
-
-static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- uint64_t mask = dup_const(MO_16, 0xffff << shift);
- TCGv_i64 t = tcg_temp_new_i64();
-
- tcg_gen_shli_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
- tcg_temp_free_i64(t);
-}
-
-static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
-{
- tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
-}
-
-static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
-}
-
-static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
-{
- uint64_t mask = (1ull << sh) - 1;
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, mask);
- tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
}
/* SHL/SLI - Vector shift left */
static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
int immh, int immb, int opcode, int rn, int rd)
{
- static const GVecGen2i shi_op[4] = {
- { .fni8 = gen_shl8_ins_i64,
- .fniv = gen_shl_ins_vec,
- .opc = INDEX_op_shli_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .vece = MO_8 },
- { .fni8 = gen_shl16_ins_i64,
- .fniv = gen_shl_ins_vec,
- .opc = INDEX_op_shli_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .vece = MO_16 },
- { .fni4 = gen_shl32_ins_i32,
- .fniv = gen_shl_ins_vec,
- .opc = INDEX_op_shli_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .vece = MO_32 },
- { .fni8 = gen_shl64_ins_i64,
- .fniv = gen_shl_ins_vec,
- .opc = INDEX_op_shli_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .vece = MO_64 },
- };
int size = 32 - clz32(immh) - 1;
int immhb = immh << 3 | immb;
int shift = immhb - (8 << size);
- if (extract32(immh, 3, 1) && !is_q) {
- unallocated_encoding(s);
- return;
- }
+ /* Range of size is limited by decode: immh is a non-zero 4 bit field */
+ assert(size >= 0 && size <= 3);
- if (size > 3 && !is_q) {
+ if (extract32(immh, 3, 1) && !is_q) {
unallocated_encoding(s);
return;
}
@@ -9779,7 +10222,7 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
}
if (insert) {
- gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]);
+ gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
} else {
gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
}
@@ -9795,8 +10238,8 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
int dsize = 64;
int esize = 8 << size;
int elements = dsize/esize;
- TCGv_i64 tcg_rn = new_tmp_a64(s);
- TCGv_i64 tcg_rd = new_tmp_a64(s);
+ TCGv_i64 tcg_rn = tcg_temp_new_i64();
+ TCGv_i64 tcg_rd = tcg_temp_new_i64();
int i;
if (size >= 3) {
@@ -9852,8 +10295,7 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
if (round) {
- uint64_t round_const = 1ULL << (shift - 1);
- tcg_round = tcg_const_i64(round_const);
+ tcg_round = tcg_constant_i64(1ULL << (shift - 1));
} else {
tcg_round = NULL;
}
@@ -9871,12 +10313,6 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
} else {
write_vec_element(s, tcg_final, rd, 1, MO_64);
}
- if (round) {
- tcg_temp_free_i64(tcg_round);
- }
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rd);
- tcg_temp_free_i64(tcg_final);
clear_vec_high(s, is_q, rd);
}
@@ -9898,6 +10334,9 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
bool is_u = extract32(insn, 29, 1);
bool is_q = extract32(insn, 30, 1);
+ /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
+ assert(immh != 0);
+
switch (opcode) {
case 0x08: /* SRI */
if (!is_u) {
@@ -10013,7 +10452,7 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
TCGv_i64 tcg_passres;
- TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
+ MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
int elt = pass + is_q * 2;
@@ -10044,8 +10483,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
tcg_passres,
tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
- tcg_temp_free_i64(tcg_tmp1);
- tcg_temp_free_i64(tcg_tmp2);
break;
}
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
@@ -10057,7 +10494,7 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+ gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
tcg_passres, tcg_passres);
break;
default:
@@ -10069,20 +10506,13 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
if (accop < 0) {
tcg_gen_neg_i64(tcg_passres, tcg_passres);
}
- gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+ gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
tcg_res[pass], tcg_passres);
} else if (accop > 0) {
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
} else if (accop < 0) {
tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
}
-
- if (accop != 0) {
- tcg_temp_free_i64(tcg_passres);
- }
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
}
} else {
/* size 0 or 1, generally helper functions */
@@ -10116,7 +10546,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
widenfn(tcg_passres, tcg_op1);
gen_neon_addl(size, (opcode == 2), tcg_passres,
tcg_passres, tcg_op2_64);
- tcg_temp_free_i64(tcg_op2_64);
break;
}
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
@@ -10157,18 +10586,12 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
case 13: /* SQDMULL, SQDMULL2 */
assert(size == 1);
gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
- gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+ gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
tcg_passres, tcg_passres);
break;
- case 14: /* PMULL */
- assert(size == 0);
- gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
- break;
default:
g_assert_not_reached();
}
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
if (accop != 0) {
if (opcode == 9 || opcode == 11) {
@@ -10176,22 +10599,19 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
if (accop < 0) {
gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
}
- gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+ gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
tcg_res[pass],
tcg_passres);
} else {
gen_neon_addl(size, (accop < 0), tcg_res[pass],
tcg_res[pass], tcg_passres);
}
- tcg_temp_free_i64(tcg_passres);
}
}
}
write_vec_element(s, tcg_res[0], rd, 0, MO_64);
write_vec_element(s, tcg_res[1], rd, 1, MO_64);
- tcg_temp_free_i64(tcg_res[0]);
- tcg_temp_free_i64(tcg_res[1]);
}
static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
@@ -10215,17 +10635,13 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
read_vec_element(s, tcg_op1, rn, pass, MO_64);
read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
widenfn(tcg_op2_wide, tcg_op2);
- tcg_temp_free_i32(tcg_op2);
tcg_res[pass] = tcg_temp_new_i64();
gen_neon_addl(size, (opcode == 3),
tcg_res[pass], tcg_op1, tcg_op2_wide);
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2_wide);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- tcg_temp_free_i64(tcg_res[pass]);
}
}
@@ -10260,45 +10676,16 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
-
tcg_res[pass] = tcg_temp_new_i32();
gennarrow(tcg_res[pass], tcg_wideres);
- tcg_temp_free_i64(tcg_wideres);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
- tcg_temp_free_i32(tcg_res[pass]);
}
clear_vec_high(s, is_q, rd);
}
-static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
-{
- /* PMULL of 64 x 64 -> 128 is an odd special case because it
- * is the only three-reg-diff instruction which produces a
- * 128-bit wide result from a single operation. However since
- * it's possible to calculate the two halves more or less
- * separately we just use two helper calls.
- */
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_res = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, is_q, MO_64);
- read_vec_element(s, tcg_op2, rm, is_q, MO_64);
- gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
- write_vec_element(s, tcg_res, rd, 0, MO_64);
- gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
- write_vec_element(s, tcg_res, rd, 1, MO_64);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res);
-}
-
/* AdvSIMD three different
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
@@ -10351,22 +10738,38 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
break;
case 14: /* PMULL, PMULL2 */
- if (is_u || size == 1 || size == 2) {
+ if (is_u) {
unallocated_encoding(s);
return;
}
- if (size == 3) {
- if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
+ switch (size) {
+ case 0: /* PMULL.P8 */
+ if (!fp_access_check(s)) {
+ return;
+ }
+ /* The Q field specifies lo/hi half input for this insn. */
+ gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
+ gen_helper_neon_pmull_h);
+ break;
+
+ case 3: /* PMULL.P64 */
+ if (!dc_isar_feature(aa64_pmull, s)) {
unallocated_encoding(s);
return;
}
if (!fp_access_check(s)) {
return;
}
- handle_pmull_64(s, is_q, rd, rn, rm);
- return;
+ /* The Q field specifies lo/hi half input for this insn. */
+ gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
+ gen_helper_gvec_pmull_q);
+ break;
+
+ default:
+ unallocated_encoding(s);
+ break;
}
- goto is_widening;
+ return;
case 9: /* SQDMLAL, SQDMLAL2 */
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
@@ -10387,7 +10790,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- is_widening:
if (!fp_access_check(s)) {
return;
}
@@ -10401,70 +10803,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
}
}
-static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
- tcg_gen_xor_i64(rn, rn, rm);
- tcg_gen_and_i64(rn, rn, rd);
- tcg_gen_xor_i64(rd, rm, rn);
-}
-
-static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
- tcg_gen_xor_i64(rn, rn, rd);
- tcg_gen_and_i64(rn, rn, rm);
- tcg_gen_xor_i64(rd, rd, rn);
-}
-
-static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
-{
- tcg_gen_xor_i64(rn, rn, rd);
- tcg_gen_andc_i64(rn, rn, rm);
- tcg_gen_xor_i64(rd, rd, rn);
-}
-
-static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
-{
- tcg_gen_xor_vec(vece, rn, rn, rm);
- tcg_gen_and_vec(vece, rn, rn, rd);
- tcg_gen_xor_vec(vece, rd, rm, rn);
-}
-
-static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
-{
- tcg_gen_xor_vec(vece, rn, rn, rd);
- tcg_gen_and_vec(vece, rn, rn, rm);
- tcg_gen_xor_vec(vece, rd, rd, rn);
-}
-
-static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
-{
- tcg_gen_xor_vec(vece, rn, rn, rd);
- tcg_gen_andc_vec(vece, rn, rn, rm);
- tcg_gen_xor_vec(vece, rd, rd, rn);
-}
-
/* Logic op (opcode == 3) subgroup of C3.6.16. */
static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
{
- static const GVecGen3 bsl_op = {
- .fni8 = gen_bsl_i64,
- .fniv = gen_bsl_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true
- };
- static const GVecGen3 bit_op = {
- .fni8 = gen_bit_i64,
- .fniv = gen_bit_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true
- };
- static const GVecGen3 bif_op = {
- .fni8 = gen_bif_i64,
- .fniv = gen_bif_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true
- };
-
int rd = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int rm = extract32(insn, 16, 5);
@@ -10484,11 +10825,7 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
return;
case 2: /* ORR */
- if (rn == rm) { /* MOV */
- gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
- } else {
- gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
- }
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
return;
case 3: /* ORN */
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
@@ -10498,13 +10835,13 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
return;
case 5: /* BSL bitwise select */
- gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
+ gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
return;
case 6: /* BIT, bitwise insert if true */
- gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
+ gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
return;
case 7: /* BIF, bitwise insert if false */
- gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
+ gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
return;
default:
@@ -10525,7 +10862,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
/* Floating point operations need fpst */
if (opcode >= 0x58) {
- fpst = get_fpstatus_ptr(false);
+ fpst = fpstatus_ptr(FPST_FPCR);
} else {
fpst = NULL;
}
@@ -10571,14 +10908,10 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
default:
g_assert_not_reached();
}
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- tcg_temp_free_i64(tcg_res[pass]);
}
} else {
int maxpass = is_q ? 4 : 2;
@@ -10650,21 +10983,13 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
if (genfn) {
genfn(tcg_res[pass], tcg_op1, tcg_op2);
}
-
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
}
for (pass = 0; pass < maxpass; pass++) {
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
- tcg_temp_free_i32(tcg_res[pass]);
}
clear_vec_high(s, is_q, rd);
}
-
- if (fpst) {
- tcg_temp_free_ptr(fpst);
- }
}
/* Floating point op subgroup of C3.6.16. */
@@ -10727,140 +11052,38 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
return;
+
+ case 0x1d: /* FMLAL */
+ case 0x3d: /* FMLSL */
+ case 0x59: /* FMLAL2 */
+ case 0x79: /* FMLSL2 */
+ if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ if (fp_access_check(s)) {
+ int is_s = extract32(insn, 23, 1);
+ int is_2 = extract32(insn, 29, 1);
+ int data = (is_2 << 1) | is_s;
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), tcg_env,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ data, gen_helper_gvec_fmlal_a64);
+ }
+ return;
+
default:
unallocated_encoding(s);
return;
}
}
-static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u8(a, a, b);
- gen_helper_neon_add_u8(d, d, a);
-}
-
-static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u16(a, a, b);
- gen_helper_neon_add_u16(d, d, a);
-}
-
-static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- tcg_gen_mul_i32(a, a, b);
- tcg_gen_add_i32(d, d, a);
-}
-
-static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- tcg_gen_mul_i64(a, a, b);
- tcg_gen_add_i64(d, d, a);
-}
-
-static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- tcg_gen_mul_vec(vece, a, a, b);
- tcg_gen_add_vec(vece, d, d, a);
-}
-
-static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u8(a, a, b);
- gen_helper_neon_sub_u8(d, d, a);
-}
-
-static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u16(a, a, b);
- gen_helper_neon_sub_u16(d, d, a);
-}
-
-static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- tcg_gen_mul_i32(a, a, b);
- tcg_gen_sub_i32(d, d, a);
-}
-
-static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
-{
- tcg_gen_mul_i64(a, a, b);
- tcg_gen_sub_i64(d, d, a);
-}
-
-static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
-{
- tcg_gen_mul_vec(vece, a, a, b);
- tcg_gen_sub_vec(vece, d, d, a);
-}
-
/* Integer op subgroup of C3.6.16. */
static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
{
- static const GVecGen3 cmtst_op[4] = {
- { .fni4 = gen_helper_neon_tst_u8,
- .fniv = gen_cmtst_vec,
- .vece = MO_8 },
- { .fni4 = gen_helper_neon_tst_u16,
- .fniv = gen_cmtst_vec,
- .vece = MO_16 },
- { .fni4 = gen_cmtst_i32,
- .fniv = gen_cmtst_vec,
- .vece = MO_32 },
- { .fni8 = gen_cmtst_i64,
- .fniv = gen_cmtst_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
- };
- static const GVecGen3 mla_op[4] = {
- { .fni4 = gen_mla8_i32,
- .fniv = gen_mla_vec,
- .opc = INDEX_op_mul_vec,
- .load_dest = true,
- .vece = MO_8 },
- { .fni4 = gen_mla16_i32,
- .fniv = gen_mla_vec,
- .opc = INDEX_op_mul_vec,
- .load_dest = true,
- .vece = MO_16 },
- { .fni4 = gen_mla32_i32,
- .fniv = gen_mla_vec,
- .opc = INDEX_op_mul_vec,
- .load_dest = true,
- .vece = MO_32 },
- { .fni8 = gen_mla64_i64,
- .fniv = gen_mla_vec,
- .opc = INDEX_op_mul_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .vece = MO_64 },
- };
- static const GVecGen3 mls_op[4] = {
- { .fni4 = gen_mls8_i32,
- .fniv = gen_mls_vec,
- .opc = INDEX_op_mul_vec,
- .load_dest = true,
- .vece = MO_8 },
- { .fni4 = gen_mls16_i32,
- .fniv = gen_mls_vec,
- .opc = INDEX_op_mul_vec,
- .load_dest = true,
- .vece = MO_16 },
- { .fni4 = gen_mls32_i32,
- .fniv = gen_mls_vec,
- .opc = INDEX_op_mul_vec,
- .load_dest = true,
- .vece = MO_32 },
- { .fni8 = gen_mls64_i64,
- .fniv = gen_mls_vec,
- .opc = INDEX_op_mul_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .vece = MO_64 },
- };
-
int is_q = extract32(insn, 30, 1);
int u = extract32(insn, 29, 1);
int size = extract32(insn, 22, 2);
@@ -10910,6 +11133,55 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
}
switch (opcode) {
+ case 0x01: /* SQADD, UQADD */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
+ }
+ return;
+ case 0x05: /* SQSUB, UQSUB */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
+ }
+ return;
+ case 0x08: /* SSHL, USHL */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
+ }
+ return;
+ case 0x0c: /* SMAX, UMAX */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
+ }
+ return;
+ case 0x0d: /* SMIN, UMIN */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
+ }
+ return;
+ case 0xe: /* SABD, UABD */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
+ }
+ return;
+ case 0xf: /* SABA, UABA */
+ if (u) {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
+ } else {
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
+ }
+ return;
case 0x10: /* ADD, SUB */
if (u) {
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
@@ -10920,19 +11192,29 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
case 0x13: /* MUL, PMUL */
if (!u) { /* MUL */
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
- return;
+ } else { /* PMUL */
+ gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
}
- break;
+ return;
case 0x12: /* MLA, MLS */
if (u) {
- gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
} else {
- gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
+ }
+ return;
+ case 0x16: /* SQDMULH, SQRDMULH */
+ {
+ static gen_helper_gvec_3_ptr * const fns[2][2] = {
+ { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
+ { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
+ };
+ gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
}
return;
case 0x11:
if (!u) { /* CMTST */
- gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
return;
}
/* else CMEQ */
@@ -10964,10 +11246,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
write_vec_element(s, tcg_res, rd, pass, MO_64);
-
- tcg_temp_free_i64(tcg_res);
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
}
} else {
for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
@@ -10991,16 +11269,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genfn = fns[size][u];
break;
}
- case 0x1: /* SQADD, UQADD */
- {
- static NeonGenTwoOpEnvFn * const fns[3][2] = {
- { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
- { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
- { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
- };
- genenvfn = fns[size][u];
- break;
- }
case 0x2: /* SRHADD, URHADD */
{
static NeonGenTwoOpFn * const fns[3][2] = {
@@ -11021,26 +11289,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genfn = fns[size][u];
break;
}
- case 0x5: /* SQSUB, UQSUB */
- {
- static NeonGenTwoOpEnvFn * const fns[3][2] = {
- { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
- { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
- { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
- };
- genenvfn = fns[size][u];
- break;
- }
- case 0x8: /* SSHL, USHL */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
- { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
- { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
- };
- genfn = fns[size][u];
- break;
- }
case 0x9: /* SQSHL, UQSHL */
{
static NeonGenTwoOpEnvFn * const fns[3][2] = {
@@ -11071,80 +11319,17 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genenvfn = fns[size][u];
break;
}
- case 0xc: /* SMAX, UMAX */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
- { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
- { tcg_gen_smax_i32, tcg_gen_umax_i32 },
- };
- genfn = fns[size][u];
- break;
- }
-
- case 0xd: /* SMIN, UMIN */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
- { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
- { tcg_gen_smin_i32, tcg_gen_umin_i32 },
- };
- genfn = fns[size][u];
- break;
- }
- case 0xe: /* SABD, UABD */
- case 0xf: /* SABA, UABA */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
- { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
- { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
- };
- genfn = fns[size][u];
- break;
- }
- case 0x13: /* MUL, PMUL */
- assert(u); /* PMUL */
- assert(size == 0);
- genfn = gen_helper_neon_mul_p8;
- break;
- case 0x16: /* SQDMULH, SQRDMULH */
- {
- static NeonGenTwoOpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
- { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
- };
- assert(size == 1 || size == 2);
- genenvfn = fns[size - 1][u];
- break;
- }
default:
g_assert_not_reached();
}
if (genenvfn) {
- genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
+ genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2);
} else {
genfn(tcg_res, tcg_op1, tcg_op2);
}
- if (opcode == 0xf) {
- /* SABA, UABA: accumulating ops */
- static NeonGenTwoOpFn * const fns[3] = {
- gen_helper_neon_add_u8,
- gen_helper_neon_add_u16,
- tcg_gen_add_i32,
- };
-
- read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
- fns[size](tcg_res, tcg_op1, tcg_res);
- }
-
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
-
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
}
}
clear_vec_high(s, is_q, rd);
@@ -11213,38 +11398,46 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
*/
static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
{
- int opcode, fpopcode;
- int is_q, u, a, rm, rn, rd;
- int datasize, elements;
- int pass;
- TCGv_ptr fpst;
- bool pairwise = false;
-
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- /* For these floating point ops, the U, a and opcode bits
+ int opcode = extract32(insn, 11, 3);
+ int u = extract32(insn, 29, 1);
+ int a = extract32(insn, 23, 1);
+ int is_q = extract32(insn, 30, 1);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ /*
+ * For these floating point ops, the U, a and opcode bits
* together indicate the operation.
*/
- opcode = extract32(insn, 11, 3);
- u = extract32(insn, 29, 1);
- a = extract32(insn, 23, 1);
- is_q = extract32(insn, 30, 1);
- rm = extract32(insn, 16, 5);
- rn = extract32(insn, 5, 5);
- rd = extract32(insn, 0, 5);
-
- fpopcode = opcode | (a << 3) | (u << 4);
- datasize = is_q ? 128 : 64;
- elements = datasize / 16;
+ int fpopcode = opcode | (a << 3) | (u << 4);
+ int datasize = is_q ? 128 : 64;
+ int elements = datasize / 16;
+ bool pairwise;
+ TCGv_ptr fpst;
+ int pass;
switch (fpopcode) {
+ case 0x0: /* FMAXNM */
+ case 0x1: /* FMLA */
+ case 0x2: /* FADD */
+ case 0x3: /* FMULX */
+ case 0x4: /* FCMEQ */
+ case 0x6: /* FMAX */
+ case 0x7: /* FRECPS */
+ case 0x8: /* FMINNM */
+ case 0x9: /* FMLS */
+ case 0xa: /* FSUB */
+ case 0xe: /* FMIN */
+ case 0xf: /* FRSQRTS */
+ case 0x13: /* FMUL */
+ case 0x14: /* FCMGE */
+ case 0x15: /* FACGE */
+ case 0x17: /* FDIV */
+ case 0x1a: /* FABD */
+ case 0x1c: /* FCMGT */
+ case 0x1d: /* FACGT */
+ pairwise = false;
+ break;
case 0x10: /* FMAXNMP */
case 0x12: /* FADDP */
case 0x16: /* FMAXP */
@@ -11252,9 +11445,21 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
case 0x1e: /* FMINP */
pairwise = true;
break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!dc_isar_feature(aa64_fp16, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
}
- fpst = get_fpstatus_ptr(true);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
if (pairwise) {
int maxpass = is_q ? 8 : 4;
@@ -11295,12 +11500,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
for (pass = 0; pass < maxpass; pass++) {
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
- tcg_temp_free_i32(tcg_res[pass]);
}
-
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
-
} else {
for (pass = 0; pass < elements; pass++) {
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
@@ -11376,20 +11576,13 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
default:
- fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
- __func__, insn, fpopcode, s->pc);
g_assert_not_reached();
}
write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
}
}
- tcg_temp_free_ptr(fpst);
-
clear_vec_high(s, is_q, rd);
}
@@ -11408,7 +11601,8 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
int size = extract32(insn, 22, 2);
bool u = extract32(insn, 29, 1);
bool is_q = extract32(insn, 30, 1);
- int feature, rot;
+ bool feature;
+ int rot;
switch (u * 16 + opcode) {
case 0x10: /* SQRDMLAH (vector) */
@@ -11417,7 +11611,7 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- feature = ARM_FEATURE_V8_RDM;
+ feature = dc_isar_feature(aa64_rdm, s);
break;
case 0x02: /* SDOT (vector) */
case 0x12: /* UDOT (vector) */
@@ -11425,7 +11619,23 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- feature = ARM_FEATURE_V8_DOTPROD;
+ feature = dc_isar_feature(aa64_dp, s);
+ break;
+ case 0x03: /* USDOT */
+ if (size != MO_32) {
+ unallocated_encoding(s);
+ return;
+ }
+ feature = dc_isar_feature(aa64_i8mm, s);
+ break;
+ case 0x04: /* SMMLA */
+ case 0x14: /* UMMLA */
+ case 0x05: /* USMMLA */
+ if (!is_q || size != MO_32) {
+ unallocated_encoding(s);
+ return;
+ }
+ feature = dc_isar_feature(aa64_i8mm, s);
break;
case 0x18: /* FCMLA, #0 */
case 0x19: /* FCMLA, #90 */
@@ -11434,18 +11644,36 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
case 0x1c: /* FCADD, #90 */
case 0x1e: /* FCADD, #270 */
if (size == 0
- || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))
+ || (size == 1 && !dc_isar_feature(aa64_fp16, s))
|| (size == 3 && !is_q)) {
unallocated_encoding(s);
return;
}
- feature = ARM_FEATURE_V8_FCMA;
+ feature = dc_isar_feature(aa64_fcma, s);
+ break;
+ case 0x1d: /* BFMMLA */
+ if (size != MO_16 || !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ feature = dc_isar_feature(aa64_bf16, s);
+ break;
+ case 0x1f:
+ switch (size) {
+ case 1: /* BFDOT */
+ case 3: /* BFMLAL{B,T} */
+ feature = dc_isar_feature(aa64_bf16, s);
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
break;
default:
unallocated_encoding(s);
return;
}
- if (!arm_dc_feature(s, feature)) {
+ if (!feature) {
unallocated_encoding(s);
return;
}
@@ -11455,36 +11683,31 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0x0: /* SQRDMLAH (vector) */
- switch (size) {
- case 1:
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
- break;
- case 2:
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
- break;
- default:
- g_assert_not_reached();
- }
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
return;
case 0x1: /* SQRDMLSH (vector) */
- switch (size) {
- case 1:
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
- break;
- case 2:
- gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
- break;
- default:
- g_assert_not_reached();
- }
+ gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
return;
case 0x2: /* SDOT / UDOT */
- gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
return;
+ case 0x3: /* USDOT */
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
+ return;
+
+ case 0x04: /* SMMLA, UMMLA */
+ gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
+ u ? gen_helper_gvec_ummla_b
+ : gen_helper_gvec_smmla_b);
+ return;
+ case 0x05: /* USMMLA */
+ gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
+ return;
+
case 0x8: /* FCMLA, #0 */
case 0x9: /* FCMLA, #90 */
case 0xa: /* FCMLA, #180 */
@@ -11492,15 +11715,15 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
rot = extract32(opcode, 0, 2);
switch (size) {
case 1:
- gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
+ gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
gen_helper_gvec_fcmlah);
break;
case 2:
- gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
+ gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
gen_helper_gvec_fcmlas);
break;
case 3:
- gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
+ gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
gen_helper_gvec_fcmlad);
break;
default:
@@ -11529,6 +11752,23 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
}
return;
+ case 0xd: /* BFMMLA */
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
+ return;
+ case 0xf:
+ switch (size) {
+ case 1: /* BFDOT */
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
+ break;
+ case 3: /* BFMLAL{B,T} */
+ gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
+ gen_helper_gvec_bfmlal);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return;
+
default:
g_assert_not_reached();
}
@@ -11553,18 +11793,16 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
tcg_res[pass] = tcg_temp_new_i64();
read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
- gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
- tcg_temp_free_i32(tcg_op);
+ gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- tcg_temp_free_i64(tcg_res[pass]);
}
} else {
/* 16 -> 32 bit fp conversion */
int srcelt = is_q ? 4 : 0;
TCGv_i32 tcg_res[4];
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
TCGv_i32 ahp = get_ahp_flag();
for (pass = 0; pass < 4; pass++) {
@@ -11576,11 +11814,7 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
}
for (pass = 0; pass < 4; pass++) {
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
- tcg_temp_free_i32(tcg_res[pass]);
}
-
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(ahp);
}
}
@@ -11612,10 +11846,10 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
read_vec_element(s, tcg_tmp, rn, i, grp_size);
switch (grp_size) {
case MO_16:
- tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
+ tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
break;
case MO_32:
- tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
+ tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
break;
case MO_64:
tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
@@ -11624,7 +11858,6 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
g_assert_not_reached();
}
write_vec_element(s, tcg_tmp, rd, i, grp_size);
- tcg_temp_free_i64(tcg_tmp);
}
clear_vec_high(s, is_q, rd);
} else {
@@ -11632,26 +11865,26 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
int esize = 8 << size;
int elements = dsize / esize;
TCGv_i64 tcg_rn = tcg_temp_new_i64();
- TCGv_i64 tcg_rd = tcg_const_i64(0);
- TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
+ TCGv_i64 tcg_rd[2];
+
+ for (i = 0; i < 2; i++) {
+ tcg_rd[i] = tcg_temp_new_i64();
+ tcg_gen_movi_i64(tcg_rd[i], 0);
+ }
for (i = 0; i < elements; i++) {
int e_rev = (i & 0xf) ^ revmask;
- int off = e_rev * esize;
+ int w = (e_rev * esize) / 64;
+ int o = (e_rev * esize) % 64;
+
read_vec_element(s, tcg_rn, rn, i, size);
- if (off >= 64) {
- tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
- tcg_rn, off - 64, esize);
- } else {
- tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
- }
+ tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
}
- write_vec_element(s, tcg_rd, rd, 0, MO_64);
- write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_rd_hi);
- tcg_temp_free_i64(tcg_rd);
- tcg_temp_free_i64(tcg_rn);
+ for (i = 0; i < 2; i++) {
+ write_vec_element(s, tcg_rd[i], rd, i, MO_64);
+ }
+ clear_vec_high(s, true, rd);
}
}
@@ -11670,7 +11903,7 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
if (size == 2) {
/* 32 + 32 -> 64 op */
- TCGMemOp memop = size + (u ? 0 : MO_SIGN);
+ MemOp memop = size + (u ? 0 : MO_SIGN);
for (pass = 0; pass < maxpass; pass++) {
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
@@ -11685,15 +11918,12 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
read_vec_element(s, tcg_op1, rd, pass, MO_64);
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
}
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
}
} else {
for (pass = 0; pass < maxpass; pass++) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
- NeonGenOneOpFn *genfn;
- static NeonGenOneOpFn * const fns[2][2] = {
+ NeonGenOne64OpFn *genfn;
+ static NeonGenOne64OpFn * const fns[2][2] = {
{ gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
{ gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
};
@@ -11715,15 +11945,13 @@ static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
tcg_res[pass], tcg_op);
}
}
- tcg_temp_free_i64(tcg_op);
}
}
if (!is_q) {
- tcg_res[1] = tcg_const_i64(0);
+ tcg_res[1] = tcg_constant_i64(0);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- tcg_temp_free_i64(tcg_res[pass]);
}
}
@@ -11747,13 +11975,10 @@ static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
tcg_res[pass] = tcg_temp_new_i64();
widenfn(tcg_res[pass], tcg_op);
tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
-
- tcg_temp_free_i32(tcg_op);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- tcg_temp_free_i64(tcg_res[pass]);
}
}
@@ -11772,7 +11997,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
bool need_fpstatus = false;
- bool need_rmode = false;
int rmode = -1;
TCGv_i32 tcg_rmode;
TCGv_ptr tcg_fpstatus;
@@ -11865,8 +12089,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
break;
case 0xc ... 0xf:
- case 0x16 ... 0x1d:
- case 0x1f:
+ case 0x16 ... 0x1f:
{
/* Floating point: U, size[1] and opcode indicate operation;
* size[0] indicates single or double precision.
@@ -11923,7 +12146,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x7a: /* FCVTPU */
case 0x7b: /* FCVTZU */
need_fpstatus = true;
- need_rmode = true;
rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
if (size == 3 && !is_q) {
unallocated_encoding(s);
@@ -11933,7 +12155,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x5c: /* FCVTAU */
case 0x1c: /* FCVTAS */
need_fpstatus = true;
- need_rmode = true;
rmode = FPROUNDING_TIEAWAY;
if (size == 3 && !is_q) {
unallocated_encoding(s);
@@ -11972,6 +12193,16 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
return;
+ case 0x36: /* BFCVTN, BFCVTN2 */
+ if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ if (!fp_access_check(s)) {
+ return;
+ }
+ handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
+ return;
case 0x17: /* FCVTL, FCVTL2 */
if (!fp_access_check(s)) {
return;
@@ -11982,7 +12213,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
case 0x19: /* FRINTM */
case 0x38: /* FRINTP */
case 0x39: /* FRINTZ */
- need_rmode = true;
rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
/* fall through */
case 0x59: /* FRINTX */
@@ -11994,7 +12224,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
break;
case 0x58: /* FRINTA */
- need_rmode = true;
rmode = FPROUNDING_TIEAWAY;
need_fpstatus = true;
if (size == 3 && !is_q) {
@@ -12007,7 +12236,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
+ break;
+ case 0x1e: /* FRINT32Z */
+ case 0x1f: /* FRINT64Z */
+ rmode = FPROUNDING_ZERO;
+ /* fall through */
+ case 0x5e: /* FRINT32X */
+ case 0x5f: /* FRINT64X */
need_fpstatus = true;
+ if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
+ unallocated_encoding(s);
+ return;
+ }
break;
default:
unallocated_encoding(s);
@@ -12024,14 +12264,13 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
- if (need_fpstatus || need_rmode) {
- tcg_fpstatus = get_fpstatus_ptr(false);
+ if (need_fpstatus || rmode >= 0) {
+ tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
} else {
tcg_fpstatus = NULL;
}
- if (need_rmode) {
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ if (rmode >= 0) {
+ tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
} else {
tcg_rmode = NULL;
}
@@ -12043,12 +12282,30 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
return;
}
break;
+ case 0x8: /* CMGT, CMGE */
+ if (u) {
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
+ } else {
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
+ }
+ return;
+ case 0x9: /* CMEQ, CMLE */
+ if (u) {
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
+ } else {
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
+ }
+ return;
+ case 0xa: /* CMLT */
+ gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
+ return;
case 0xb:
- if (u) { /* NEG */
+ if (u) { /* ABS, NEG */
gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
- return;
+ } else {
+ gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
}
- break;
+ return;
}
if (size == 3) {
@@ -12069,9 +12326,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
tcg_rmode, tcg_fpstatus);
write_vec_element(s, tcg_res, rd, pass, MO_64);
-
- tcg_temp_free_i64(tcg_res);
- tcg_temp_free_i64(tcg_op);
}
} else {
int pass;
@@ -12079,29 +12333,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
TCGv_i32 tcg_op = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
- TCGCond cond;
read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
if (size == 2) {
/* Special cases for 32 bit elements */
switch (opcode) {
- case 0xa: /* CMLT */
- /* 32 bit integer comparison against zero, result is
- * test ? (2^32 - 1) : 0. We implement via setcond(test)
- * and inverting.
- */
- cond = TCG_COND_LT;
- do_cmop:
- tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
- tcg_gen_neg_i32(tcg_res, tcg_res);
- break;
- case 0x8: /* CMGT, CMGE */
- cond = u ? TCG_COND_GE : TCG_COND_GT;
- goto do_cmop;
- case 0x9: /* CMEQ, CMLE */
- cond = u ? TCG_COND_LE : TCG_COND_EQ;
- goto do_cmop;
case 0x4: /* CLS */
if (u) {
tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
@@ -12111,20 +12348,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
break;
case 0x7: /* SQABS, SQNEG */
if (u) {
- gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
- } else {
- gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
- }
- break;
- case 0xb: /* ABS, NEG */
- if (u) {
- tcg_gen_neg_i32(tcg_res, tcg_op);
+ gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
} else {
- TCGv_i32 tcg_zero = tcg_const_i32(0);
- tcg_gen_neg_i32(tcg_res, tcg_op);
- tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
- tcg_zero, tcg_op, tcg_res);
- tcg_temp_free_i32(tcg_zero);
+ gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
}
break;
case 0x2f: /* FABS */
@@ -12134,32 +12360,24 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
gen_helper_vfp_negs(tcg_res, tcg_op);
break;
case 0x7f: /* FSQRT */
- gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
+ gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
break;
case 0x1a: /* FCVTNS */
case 0x1b: /* FCVTMS */
case 0x1c: /* FCVTAS */
case 0x3a: /* FCVTPS */
case 0x3b: /* FCVTZS */
- {
- TCGv_i32 tcg_shift = tcg_const_i32(0);
gen_helper_vfp_tosls(tcg_res, tcg_op,
- tcg_shift, tcg_fpstatus);
- tcg_temp_free_i32(tcg_shift);
+ tcg_constant_i32(0), tcg_fpstatus);
break;
- }
case 0x5a: /* FCVTNU */
case 0x5b: /* FCVTMU */
case 0x5c: /* FCVTAU */
case 0x7a: /* FCVTPU */
case 0x7b: /* FCVTZU */
- {
- TCGv_i32 tcg_shift = tcg_const_i32(0);
gen_helper_vfp_touls(tcg_res, tcg_op,
- tcg_shift, tcg_fpstatus);
- tcg_temp_free_i32(tcg_shift);
+ tcg_constant_i32(0), tcg_fpstatus);
break;
- }
case 0x18: /* FRINTN */
case 0x19: /* FRINTM */
case 0x38: /* FRINTP */
@@ -12172,7 +12390,15 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
break;
case 0x7c: /* URSQRTE */
- gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
+ gen_helper_rsqrte_u32(tcg_res, tcg_op);
+ break;
+ case 0x1e: /* FRINT32Z */
+ case 0x5e: /* FRINT32X */
+ gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x1f: /* FRINT64Z */
+ case 0x5f: /* FRINT64X */
+ gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
break;
default:
g_assert_not_reached();
@@ -12198,56 +12424,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
{ gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
};
genfn = fns[size][u];
- genfn(tcg_res, cpu_env, tcg_op);
- break;
- }
- case 0x8: /* CMGT, CMGE */
- case 0x9: /* CMEQ, CMLE */
- case 0xa: /* CMLT */
- {
- static NeonGenTwoOpFn * const fns[3][2] = {
- { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
- { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
- { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
- };
- NeonGenTwoOpFn *genfn;
- int comp;
- bool reverse;
- TCGv_i32 tcg_zero = tcg_const_i32(0);
-
- /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
- comp = (opcode - 0x8) * 2 + u;
- /* ...but LE, LT are implemented as reverse GE, GT */
- reverse = (comp > 2);
- if (reverse) {
- comp = 4 - comp;
- }
- genfn = fns[comp][size];
- if (reverse) {
- genfn(tcg_res, tcg_zero, tcg_op);
- } else {
- genfn(tcg_res, tcg_op, tcg_zero);
- }
- tcg_temp_free_i32(tcg_zero);
+ genfn(tcg_res, tcg_env, tcg_op);
break;
}
- case 0xb: /* ABS, NEG */
- if (u) {
- TCGv_i32 tcg_zero = tcg_const_i32(0);
- if (size) {
- gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
- } else {
- gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
- }
- tcg_temp_free_i32(tcg_zero);
- } else {
- if (size) {
- gen_helper_neon_abs_s16(tcg_res, tcg_op);
- } else {
- gen_helper_neon_abs_s8(tcg_res, tcg_op);
- }
- }
- break;
case 0x4: /* CLS, CLZ */
if (u) {
if (size == 0) {
@@ -12269,19 +12448,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
-
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_op);
}
}
clear_vec_high(s, is_q, rd);
- if (need_rmode) {
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
- tcg_temp_free_i32(tcg_rmode);
- }
- if (need_fpstatus) {
- tcg_temp_free_ptr(tcg_fpstatus);
+ if (tcg_rmode) {
+ gen_restore_rmode(tcg_rmode, tcg_fpstatus);
}
}
@@ -12310,11 +12482,10 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
int pass;
TCGv_i32 tcg_rmode = NULL;
TCGv_ptr tcg_fpstatus = NULL;
- bool need_rmode = false;
bool need_fpst = true;
- int rmode;
+ int rmode = -1;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
return;
}
@@ -12331,9 +12502,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
fpop = deposit32(opcode, 5, 1, a);
fpop = deposit32(fpop, 6, 1, u);
- rd = extract32(insn, 0, 5);
- rn = extract32(insn, 5, 5);
-
switch (fpop) {
case 0x1d: /* SCVTF */
case 0x5d: /* UCVTF */
@@ -12364,27 +12532,22 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
case 0x3f: /* FRECPX */
break;
case 0x18: /* FRINTN */
- need_rmode = true;
only_in_vector = true;
rmode = FPROUNDING_TIEEVEN;
break;
case 0x19: /* FRINTM */
- need_rmode = true;
only_in_vector = true;
rmode = FPROUNDING_NEGINF;
break;
case 0x38: /* FRINTP */
- need_rmode = true;
only_in_vector = true;
rmode = FPROUNDING_POSINF;
break;
case 0x39: /* FRINTZ */
- need_rmode = true;
only_in_vector = true;
rmode = FPROUNDING_ZERO;
break;
case 0x58: /* FRINTA */
- need_rmode = true;
only_in_vector = true;
rmode = FPROUNDING_TIEAWAY;
break;
@@ -12394,43 +12557,33 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
/* current rounding mode */
break;
case 0x1a: /* FCVTNS */
- need_rmode = true;
rmode = FPROUNDING_TIEEVEN;
break;
case 0x1b: /* FCVTMS */
- need_rmode = true;
rmode = FPROUNDING_NEGINF;
break;
case 0x1c: /* FCVTAS */
- need_rmode = true;
rmode = FPROUNDING_TIEAWAY;
break;
case 0x3a: /* FCVTPS */
- need_rmode = true;
rmode = FPROUNDING_POSINF;
break;
case 0x3b: /* FCVTZS */
- need_rmode = true;
rmode = FPROUNDING_ZERO;
break;
case 0x5a: /* FCVTNU */
- need_rmode = true;
rmode = FPROUNDING_TIEEVEN;
break;
case 0x5b: /* FCVTMU */
- need_rmode = true;
rmode = FPROUNDING_NEGINF;
break;
case 0x5c: /* FCVTAU */
- need_rmode = true;
rmode = FPROUNDING_TIEAWAY;
break;
case 0x7a: /* FCVTPU */
- need_rmode = true;
rmode = FPROUNDING_POSINF;
break;
case 0x7b: /* FCVTZU */
- need_rmode = true;
rmode = FPROUNDING_ZERO;
break;
case 0x2f: /* FABS */
@@ -12441,8 +12594,8 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
case 0x7f: /* FSQRT (vector) */
break;
default:
- fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
- g_assert_not_reached();
+ unallocated_encoding(s);
+ return;
}
@@ -12463,13 +12616,12 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
return;
}
- if (need_rmode || need_fpst) {
- tcg_fpstatus = get_fpstatus_ptr(true);
+ if (rmode >= 0 || need_fpst) {
+ tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
}
- if (need_rmode) {
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ if (rmode >= 0) {
+ tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
}
if (is_scalar) {
@@ -12510,9 +12662,6 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
/* limit any sign extension going on */
tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
write_fp_sreg(s, rd, tcg_res);
-
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_op);
} else {
for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
TCGv_i32 tcg_op = tcg_temp_new_i32();
@@ -12566,21 +12715,13 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
}
write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
-
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_op);
}
clear_vec_high(s, is_q, rd);
}
if (tcg_rmode) {
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
- tcg_temp_free_i32(tcg_rmode);
- }
-
- if (tcg_fpstatus) {
- tcg_temp_free_ptr(tcg_fpstatus);
+ gen_restore_rmode(tcg_rmode, tcg_fpstatus);
}
}
@@ -12659,14 +12800,44 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
break;
case 0x1d: /* SQRDMLAH */
case 0x1f: /* SQRDMLSH */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
+ if (!dc_isar_feature(aa64_rdm, s)) {
unallocated_encoding(s);
return;
}
break;
case 0x0e: /* SDOT */
case 0x1e: /* UDOT */
- if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+ if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x0f:
+ switch (size) {
+ case 0: /* SUDOT */
+ case 2: /* USDOT */
+ if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ size = MO_32;
+ break;
+ case 1: /* BFDOT */
+ if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ size = MO_32;
+ break;
+ case 3: /* BFMLAL{B,T} */
+ if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* can't set is_fp without other incorrect size checks */
+ size = MO_16;
+ break;
+ default:
unallocated_encoding(s);
return;
}
@@ -12675,12 +12846,23 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
case 0x13: /* FCMLA #90 */
case 0x15: /* FCMLA #180 */
case 0x17: /* FCMLA #270 */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
+ if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
unallocated_encoding(s);
return;
}
is_fp = 2;
break;
+ case 0x00: /* FMLAL */
+ case 0x04: /* FMLSL */
+ case 0x18: /* FMLAL2 */
+ case 0x1c: /* FMLSL2 */
+ if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+ size = MO_16;
+ /* is_fp, but we pass tcg_env not fp_status. */
+ break;
default:
unallocated_encoding(s);
return;
@@ -12688,7 +12870,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
switch (is_fp) {
case 1: /* normal fp */
- /* convert insn encoded size to TCGMemOp size */
+ /* convert insn encoded size to MemOp size */
switch (size) {
case 0: /* half-precision */
size = MO_16;
@@ -12705,7 +12887,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
case 2: /* complex fp */
/* Each indexable element is a complex pair. */
- size <<= 1;
+ size += 1;
switch (size) {
case MO_32:
if (h && !is_q) {
@@ -12731,12 +12913,12 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
break;
}
- if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
unallocated_encoding(s);
return;
}
- /* Given TCGMemOp size, adjust register and indexing. */
+ /* Given MemOp size, adjust register and indexing. */
switch (size) {
case MO_16:
index = h << 2 | l << 1 | m;
@@ -12762,7 +12944,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
if (is_fp) {
- fpst = get_fpstatus_ptr(is_fp16);
+ fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
} else {
fpst = NULL;
}
@@ -12770,10 +12952,30 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
switch (16 * u + opcode) {
case 0x0e: /* SDOT */
case 0x1e: /* UDOT */
- gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
u ? gen_helper_gvec_udot_idx_b
: gen_helper_gvec_sdot_idx_b);
return;
+ case 0x0f:
+ switch (extract32(insn, 22, 2)) {
+ case 0: /* SUDOT */
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
+ gen_helper_gvec_sudot_idx_b);
+ return;
+ case 1: /* BFDOT */
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
+ gen_helper_gvec_bfdot_idx);
+ return;
+ case 2: /* USDOT */
+ gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
+ gen_helper_gvec_usdot_idx_b);
+ return;
+ case 3: /* BFMLAL{B,T} */
+ gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
+ gen_helper_gvec_bfmlal_idx);
+ return;
+ }
+ g_assert_not_reached();
case 0x11: /* FCMLA #0 */
case 0x13: /* FCMLA #90 */
case 0x15: /* FCMLA #180 */
@@ -12781,16 +12983,82 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
{
int rot = extract32(insn, 13, 2);
int data = (index << 2) | rot;
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), fpst,
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd), fpst,
is_q ? 16 : 8, vec_full_reg_size(s), data,
size == MO_64
? gen_helper_gvec_fcmlas_idx
: gen_helper_gvec_fcmlah_idx);
- tcg_temp_free_ptr(fpst);
}
return;
+
+ case 0x00: /* FMLAL */
+ case 0x04: /* FMLSL */
+ case 0x18: /* FMLAL2 */
+ case 0x1c: /* FMLSL2 */
+ {
+ int is_s = extract32(opcode, 2, 1);
+ int is_2 = u;
+ int data = (index << 2) | (is_2 << 1) | is_s;
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), tcg_env,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ data, gen_helper_gvec_fmlal_idx_a64);
+ }
+ return;
+
+ case 0x08: /* MUL */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_3 * const fns[3] = {
+ gen_helper_gvec_mul_idx_h,
+ gen_helper_gvec_mul_idx_s,
+ gen_helper_gvec_mul_idx_d,
+ };
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
+
+ case 0x10: /* MLA */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_4 * const fns[3] = {
+ gen_helper_gvec_mla_idx_h,
+ gen_helper_gvec_mla_idx_s,
+ gen_helper_gvec_mla_idx_d,
+ };
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
+
+ case 0x14: /* MLS */
+ if (!is_long && !is_scalar) {
+ static gen_helper_gvec_4 * const fns[3] = {
+ gen_helper_gvec_mls_idx_h,
+ gen_helper_gvec_mls_idx_s,
+ gen_helper_gvec_mls_idx_d,
+ };
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, rd),
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ index, fns[size - 1]);
+ return;
+ }
+ break;
}
if (size == 3) {
@@ -12827,11 +13095,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
- tcg_temp_free_i64(tcg_op);
- tcg_temp_free_i64(tcg_res);
}
- tcg_temp_free_i64(tcg_idx);
clear_vec_high(s, !is_scalar, rd);
} else if (!is_long) {
/* 32 bit floating point, or 16 or 32 bit integer.
@@ -12958,19 +13223,19 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
break;
case 0x0c: /* SQDMULH */
if (size == 1) {
- gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
+ gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
tcg_op, tcg_idx);
} else {
- gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
+ gen_helper_neon_qdmulh_s32(tcg_res, tcg_env,
tcg_op, tcg_idx);
}
break;
case 0x0d: /* SQRDMULH */
if (size == 1) {
- gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
+ gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env,
tcg_op, tcg_idx);
} else {
- gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
+ gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env,
tcg_op, tcg_idx);
}
break;
@@ -12978,10 +13243,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
read_vec_element_i32(s, tcg_res, rd, pass,
is_scalar ? size : MO_32);
if (size == 1) {
- gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
+ gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env,
tcg_op, tcg_idx, tcg_res);
} else {
- gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
+ gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env,
tcg_op, tcg_idx, tcg_res);
}
break;
@@ -12989,10 +13254,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
read_vec_element_i32(s, tcg_res, rd, pass,
is_scalar ? size : MO_32);
if (size == 1) {
- gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
+ gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env,
tcg_op, tcg_idx, tcg_res);
} else {
- gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
+ gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env,
tcg_op, tcg_idx, tcg_res);
}
break;
@@ -13005,19 +13270,15 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
} else {
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
}
-
- tcg_temp_free_i32(tcg_op);
- tcg_temp_free_i32(tcg_res);
}
- tcg_temp_free_i32(tcg_idx);
clear_vec_high(s, is_q, rd);
} else {
/* long ops: 16x16->32 or 32x32->64 */
TCGv_i64 tcg_res[2];
int pass;
bool satop = extract32(opcode, 0, 1);
- TCGMemOp memop = MO_32;
+ MemOp memop = MO_32;
if (satop || !u) {
memop |= MO_SIGN;
@@ -13051,11 +13312,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
- tcg_temp_free_i64(tcg_op);
if (satop) {
/* saturating, doubling */
- gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+ gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env,
tcg_passres, tcg_passres);
}
@@ -13077,16 +13337,14 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
tcg_gen_neg_i64(tcg_passres, tcg_passres);
/* fall through */
case 0x3: /* SQDMLAL, SQDMLAL2 */
- gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+ gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env,
tcg_res[pass],
tcg_passres);
break;
default:
g_assert_not_reached();
}
- tcg_temp_free_i64(tcg_passres);
}
- tcg_temp_free_i64(tcg_idx);
clear_vec_high(s, !is_scalar, rd);
} else {
@@ -13129,10 +13387,9 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
}
if (satop) {
- gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+ gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env,
tcg_passres, tcg_passres);
}
- tcg_temp_free_i32(tcg_op);
if (opcode == 0xa || opcode == 0xb) {
continue;
@@ -13154,16 +13411,14 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
/* fall through */
case 0x3: /* SQDMLAL, SQDMLAL2 */
- gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+ gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env,
tcg_res[pass],
tcg_passres);
break;
default:
g_assert_not_reached();
}
- tcg_temp_free_i64(tcg_passres);
}
- tcg_temp_free_i32(tcg_idx);
if (is_scalar) {
tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
@@ -13171,18 +13426,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
}
if (is_scalar) {
- tcg_res[1] = tcg_const_i64(0);
+ tcg_res[1] = tcg_constant_i64(0);
}
for (pass = 0; pass < 2; pass++) {
write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- tcg_temp_free_i64(tcg_res[pass]);
}
}
-
- if (fpst) {
- tcg_temp_free_ptr(fpst);
- }
}
/* Crypto AES
@@ -13197,33 +13447,26 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 12, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- int decrypt;
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
- TCGv_i32 tcg_decrypt;
- CryptoThreeOpIntFn *genfn;
+ gen_helper_gvec_2 *genfn2 = NULL;
+ gen_helper_gvec_3 *genfn3 = NULL;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
- || size != 0) {
+ if (!dc_isar_feature(aa64_aes, s) || size != 0) {
unallocated_encoding(s);
return;
}
switch (opcode) {
case 0x4: /* AESE */
- decrypt = 0;
- genfn = gen_helper_crypto_aese;
+ genfn3 = gen_helper_crypto_aese;
break;
case 0x6: /* AESMC */
- decrypt = 0;
- genfn = gen_helper_crypto_aesmc;
+ genfn2 = gen_helper_crypto_aesmc;
break;
case 0x5: /* AESD */
- decrypt = 1;
- genfn = gen_helper_crypto_aese;
+ genfn3 = gen_helper_crypto_aesd;
break;
case 0x7: /* AESIMC */
- decrypt = 1;
- genfn = gen_helper_crypto_aesmc;
+ genfn2 = gen_helper_crypto_aesimc;
break;
default:
unallocated_encoding(s);
@@ -13233,16 +13476,11 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- tcg_decrypt = tcg_const_i32(decrypt);
-
- genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_i32(tcg_decrypt);
+ if (genfn2) {
+ gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2);
+ } else {
+ gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3);
+ }
}
/* Crypto three-reg SHA
@@ -13258,9 +13496,8 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- CryptoThreeOpFn *genfn;
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
- int feature = ARM_FEATURE_V8_SHA256;
+ gen_helper_gvec_3 *genfn;
+ bool feature;
if (size != 0) {
unallocated_encoding(s);
@@ -13269,27 +13506,39 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0: /* SHA1C */
+ genfn = gen_helper_crypto_sha1c;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 1: /* SHA1P */
+ genfn = gen_helper_crypto_sha1p;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 2: /* SHA1M */
+ genfn = gen_helper_crypto_sha1m;
+ feature = dc_isar_feature(aa64_sha1, s);
+ break;
case 3: /* SHA1SU0 */
- genfn = NULL;
- feature = ARM_FEATURE_V8_SHA1;
+ genfn = gen_helper_crypto_sha1su0;
+ feature = dc_isar_feature(aa64_sha1, s);
break;
case 4: /* SHA256H */
genfn = gen_helper_crypto_sha256h;
+ feature = dc_isar_feature(aa64_sha256, s);
break;
case 5: /* SHA256H2 */
genfn = gen_helper_crypto_sha256h2;
+ feature = dc_isar_feature(aa64_sha256, s);
break;
case 6: /* SHA256SU1 */
genfn = gen_helper_crypto_sha256su1;
+ feature = dc_isar_feature(aa64_sha256, s);
break;
default:
unallocated_encoding(s);
return;
}
- if (!arm_dc_feature(s, feature)) {
+ if (!feature) {
unallocated_encoding(s);
return;
}
@@ -13297,24 +13546,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
-
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-
- if (genfn) {
- genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
- } else {
- TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
-
- gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
- tcg_rm_ptr, tcg_opcode);
- tcg_temp_free_i32(tcg_opcode);
- }
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_ptr(tcg_rm_ptr);
+ gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
}
/* Crypto two-reg SHA
@@ -13329,9 +13561,8 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 12, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- CryptoTwoOpFn *genfn;
- int feature;
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
+ gen_helper_gvec_2 *genfn;
+ bool feature;
if (size != 0) {
unallocated_encoding(s);
@@ -13340,15 +13571,15 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
switch (opcode) {
case 0: /* SHA1H */
- feature = ARM_FEATURE_V8_SHA1;
+ feature = dc_isar_feature(aa64_sha1, s);
genfn = gen_helper_crypto_sha1h;
break;
case 1: /* SHA1SU1 */
- feature = ARM_FEATURE_V8_SHA1;
+ feature = dc_isar_feature(aa64_sha1, s);
genfn = gen_helper_crypto_sha1su1;
break;
case 2: /* SHA256SU0 */
- feature = ARM_FEATURE_V8_SHA256;
+ feature = dc_isar_feature(aa64_sha256, s);
genfn = gen_helper_crypto_sha256su0;
break;
default:
@@ -13356,7 +13587,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
return;
}
- if (!arm_dc_feature(s, feature)) {
+ if (!feature) {
unallocated_encoding(s);
return;
}
@@ -13364,14 +13595,33 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
+ gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
+}
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
+static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
+{
+ tcg_gen_rotli_i64(d, m, 1);
+ tcg_gen_xor_i64(d, d, n);
+}
- genfn(tcg_rd_ptr, tcg_rn_ptr);
+static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
+{
+ tcg_gen_rotli_vec(vece, d, m, 1);
+ tcg_gen_xor_vec(vece, d, d, n);
+}
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
+void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
+ static const GVecGen3 op = {
+ .fni8 = gen_rax1_i64,
+ .fniv = gen_rax1_vec,
+ .opt_opc = vecop_list,
+ .fno = gen_helper_crypto_rax1,
+ .vece = MO_64,
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
}
/* Crypto three-reg SHA512
@@ -13387,41 +13637,44 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- int feature;
- CryptoThreeOpFn *genfn;
+ bool feature;
+ gen_helper_gvec_3 *oolfn = NULL;
+ GVecGen3Fn *gvecfn = NULL;
if (o == 0) {
switch (opcode) {
case 0: /* SHA512H */
- feature = ARM_FEATURE_V8_SHA512;
- genfn = gen_helper_crypto_sha512h;
+ feature = dc_isar_feature(aa64_sha512, s);
+ oolfn = gen_helper_crypto_sha512h;
break;
case 1: /* SHA512H2 */
- feature = ARM_FEATURE_V8_SHA512;
- genfn = gen_helper_crypto_sha512h2;
+ feature = dc_isar_feature(aa64_sha512, s);
+ oolfn = gen_helper_crypto_sha512h2;
break;
case 2: /* SHA512SU1 */
- feature = ARM_FEATURE_V8_SHA512;
- genfn = gen_helper_crypto_sha512su1;
+ feature = dc_isar_feature(aa64_sha512, s);
+ oolfn = gen_helper_crypto_sha512su1;
break;
case 3: /* RAX1 */
- feature = ARM_FEATURE_V8_SHA3;
- genfn = NULL;
+ feature = dc_isar_feature(aa64_sha3, s);
+ gvecfn = gen_gvec_rax1;
break;
+ default:
+ g_assert_not_reached();
}
} else {
switch (opcode) {
case 0: /* SM3PARTW1 */
- feature = ARM_FEATURE_V8_SM3;
- genfn = gen_helper_crypto_sm3partw1;
+ feature = dc_isar_feature(aa64_sm3, s);
+ oolfn = gen_helper_crypto_sm3partw1;
break;
case 1: /* SM3PARTW2 */
- feature = ARM_FEATURE_V8_SM3;
- genfn = gen_helper_crypto_sm3partw2;
+ feature = dc_isar_feature(aa64_sm3, s);
+ oolfn = gen_helper_crypto_sm3partw2;
break;
case 2: /* SM4EKEY */
- feature = ARM_FEATURE_V8_SM4;
- genfn = gen_helper_crypto_sm4ekey;
+ feature = dc_isar_feature(aa64_sm4, s);
+ oolfn = gen_helper_crypto_sm4ekey;
break;
default:
unallocated_encoding(s);
@@ -13429,7 +13682,7 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
}
}
- if (!arm_dc_feature(s, feature)) {
+ if (!feature) {
unallocated_encoding(s);
return;
}
@@ -13438,41 +13691,10 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
return;
}
- if (genfn) {
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
-
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- tcg_rm_ptr = vec_full_reg_ptr(s, rm);
-
- genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_ptr(tcg_rm_ptr);
+ if (oolfn) {
+ gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
} else {
- TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
- int pass;
-
- tcg_op1 = tcg_temp_new_i64();
- tcg_op2 = tcg_temp_new_i64();
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = tcg_temp_new_i64();
-
- for (pass = 0; pass < 2; pass++) {
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
- tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
- tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
- }
- write_vec_element(s, tcg_res[0], rd, 0, MO_64);
- write_vec_element(s, tcg_res[1], rd, 1, MO_64);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res[0]);
- tcg_temp_free_i64(tcg_res[1]);
+ gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
}
}
@@ -13487,25 +13709,21 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
int opcode = extract32(insn, 10, 2);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
- int feature;
- CryptoTwoOpFn *genfn;
+ bool feature;
switch (opcode) {
case 0: /* SHA512SU0 */
- feature = ARM_FEATURE_V8_SHA512;
- genfn = gen_helper_crypto_sha512su0;
+ feature = dc_isar_feature(aa64_sha512, s);
break;
case 1: /* SM4E */
- feature = ARM_FEATURE_V8_SM4;
- genfn = gen_helper_crypto_sm4e;
+ feature = dc_isar_feature(aa64_sm4, s);
break;
default:
unallocated_encoding(s);
return;
}
- if (!arm_dc_feature(s, feature)) {
+ if (!feature) {
unallocated_encoding(s);
return;
}
@@ -13514,13 +13732,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
return;
}
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
-
- genfn(tcg_rd_ptr, tcg_rn_ptr);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
+ switch (opcode) {
+ case 0: /* SHA512SU0 */
+ gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
+ break;
+ case 1: /* SM4E */
+ gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
/* Crypto four-register
@@ -13536,22 +13757,22 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
int ra = extract32(insn, 10, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- int feature;
+ bool feature;
switch (op0) {
case 0: /* EOR3 */
case 1: /* BCAX */
- feature = ARM_FEATURE_V8_SHA3;
+ feature = dc_isar_feature(aa64_sha3, s);
break;
case 2: /* SM3SS1 */
- feature = ARM_FEATURE_V8_SM3;
+ feature = dc_isar_feature(aa64_sm3, s);
break;
default:
unallocated_encoding(s);
return;
}
- if (!arm_dc_feature(s, feature)) {
+ if (!feature) {
unallocated_encoding(s);
return;
}
@@ -13586,12 +13807,6 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
}
write_vec_element(s, tcg_res[0], rd, 0, MO_64);
write_vec_element(s, tcg_res[1], rd, 1, MO_64);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_op3);
- tcg_temp_free_i64(tcg_res[0]);
- tcg_temp_free_i64(tcg_res[1]);
} else {
TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
@@ -13599,7 +13814,7 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
tcg_op2 = tcg_temp_new_i32();
tcg_op3 = tcg_temp_new_i32();
tcg_res = tcg_temp_new_i32();
- tcg_zero = tcg_const_i32(0);
+ tcg_zero = tcg_constant_i32(0);
read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
@@ -13614,12 +13829,6 @@ static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
-
- tcg_temp_free_i32(tcg_op1);
- tcg_temp_free_i32(tcg_op2);
- tcg_temp_free_i32(tcg_op3);
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_zero);
}
}
@@ -13635,10 +13844,8 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn)
int imm6 = extract32(insn, 10, 6);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
- int pass;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) {
+ if (!dc_isar_feature(aa64_sha3, s)) {
unallocated_encoding(s);
return;
}
@@ -13647,25 +13854,10 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn)
return;
}
- tcg_op1 = tcg_temp_new_i64();
- tcg_op2 = tcg_temp_new_i64();
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = tcg_temp_new_i64();
-
- for (pass = 0; pass < 2; pass++) {
- read_vec_element(s, tcg_op1, rn, pass, MO_64);
- read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
- tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
- tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
- }
- write_vec_element(s, tcg_res[0], rd, 0, MO_64);
- write_vec_element(s, tcg_res[1], rd, 1, MO_64);
-
- tcg_temp_free_i64(tcg_op1);
- tcg_temp_free_i64(tcg_op2);
- tcg_temp_free_i64(tcg_res[0]);
- tcg_temp_free_i64(tcg_res[1]);
+ gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), imm6, 16,
+ vec_full_reg_size(s));
}
/* Crypto three-reg imm2
@@ -13676,15 +13868,17 @@ static void disas_crypto_xar(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
+ gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
+ };
int opcode = extract32(insn, 10, 2);
int imm2 = extract32(insn, 12, 2);
int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
- TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
- TCGv_i32 tcg_imm2, tcg_opcode;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) {
+ if (!dc_isar_feature(aa64_sm3, s)) {
unallocated_encoding(s);
return;
}
@@ -13693,20 +13887,7 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
return;
}
- tcg_rd_ptr = vec_full_reg_ptr(s, rd);
- tcg_rn_ptr = vec_full_reg_ptr(s, rn);
- tcg_rm_ptr = vec_full_reg_ptr(s, rm);
- tcg_imm2 = tcg_const_i32(imm2);
- tcg_opcode = tcg_const_i32(opcode);
-
- gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
- tcg_opcode);
-
- tcg_temp_free_ptr(tcg_rd_ptr);
- tcg_temp_free_ptr(tcg_rn_ptr);
- tcg_temp_free_ptr(tcg_rm_ptr);
- tcg_temp_free_i32(tcg_imm2);
- tcg_temp_free_i32(tcg_opcode);
+ gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
}
/* C3.6 Data processing - SIMD, inc Crypto
@@ -13776,38 +13957,104 @@ static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
}
}
-/* C3.1 A64 instruction index by encoding */
-static void disas_a64_insn(CPUARMState *env, DisasContext *s)
+static bool trans_OK(DisasContext *s, arg_OK *a)
{
- uint32_t insn;
+ return true;
+}
- insn = arm_ldl_code(env, s->pc, s->sctlr_b);
- s->insn = insn;
- s->pc += 4;
+static bool trans_FAIL(DisasContext *s, arg_OK *a)
+{
+ s->is_nonstreaming = true;
+ return true;
+}
- s->fp_access_checked = false;
+/**
+ * is_guarded_page:
+ * @env: The cpu environment
+ * @s: The DisasContext
+ *
+ * Return true if the page is guarded.
+ */
+static bool is_guarded_page(CPUARMState *env, DisasContext *s)
+{
+ uint64_t addr = s->base.pc_first;
+#ifdef CONFIG_USER_ONLY
+ return page_get_flags(addr) & PAGE_BTI;
+#else
+ CPUTLBEntryFull *full;
+ void *host;
+ int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
+ int flags;
+
+ /*
+ * We test this immediately after reading an insn, which means
+ * that the TLB entry must be present and valid, and thus this
+ * access will never raise an exception.
+ */
+ flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
+ false, &host, &full, 0);
+ assert(!(flags & TLB_INVALID_MASK));
- switch (extract32(insn, 25, 4)) {
- case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
- unallocated_encoding(s);
- break;
- case 0x2:
- if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) {
- unallocated_encoding(s);
+ return full->extra.arm.guarded;
+#endif
+}
+
+/**
+ * btype_destination_ok:
+ * @insn: The instruction at the branch destination
+ * @bt: SCTLR_ELx.BT
+ * @btype: PSTATE.BTYPE, and is non-zero
+ *
+ * On a guarded page, there are a limited number of insns
+ * that may be present at the branch target:
+ * - branch target identifiers,
+ * - paciasp, pacibsp,
+ * - BRK insn
+ * - HLT insn
+ * Anything else causes a Branch Target Exception.
+ *
+ * Return true if the branch is compatible, false to raise BTITRAP.
+ */
+static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
+{
+ if ((insn & 0xfffff01fu) == 0xd503201fu) {
+ /* HINT space */
+ switch (extract32(insn, 5, 7)) {
+ case 0b011001: /* PACIASP */
+ case 0b011011: /* PACIBSP */
+ /*
+ * If SCTLR_ELx.BT, then PACI*SP are not compatible
+ * with btype == 3. Otherwise all btype are ok.
+ */
+ return !bt || btype != 3;
+ case 0b100000: /* BTI */
+ /* Not compatible with any btype. */
+ return false;
+ case 0b100010: /* BTI c */
+ /* Not compatible with btype == 3 */
+ return btype != 3;
+ case 0b100100: /* BTI j */
+ /* Not compatible with btype == 2 */
+ return btype != 2;
+ case 0b100110: /* BTI jc */
+ /* Compatible with any btype. */
+ return true;
}
- break;
- case 0x8: case 0x9: /* Data processing - immediate */
- disas_data_proc_imm(s, insn);
- break;
- case 0xa: case 0xb: /* Branch, exception generation and system insns */
- disas_b_exc_sys(s, insn);
- break;
- case 0x4:
- case 0x6:
- case 0xc:
- case 0xe: /* Loads and stores */
- disas_ldst(s, insn);
- break;
+ } else {
+ switch (insn & 0xffe0001fu) {
+ case 0xd4200000u: /* BRK */
+ case 0xd4400000u: /* HLT */
+ /* Give priority to the breakpoint exception. */
+ return true;
+ }
+ }
+ return false;
+}
+
+/* C3.1 A64 instruction index by encoding */
+static void disas_a64_legacy(DisasContext *s, uint32_t insn)
+{
+ switch (extract32(insn, 25, 4)) {
case 0x5:
case 0xd: /* Data processing - register */
disas_data_proc_reg(s, insn);
@@ -13817,50 +14064,78 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
disas_data_proc_simd_fp(s, insn);
break;
default:
- assert(FALSE); /* all 15 cases should be handled above */
+ unallocated_encoding(s);
break;
}
-
- /* if we allocated any temporaries, free them here */
- free_tmp_a64(s);
}
static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
CPUState *cpu)
{
DisasContext *dc = container_of(dcbase, DisasContext, base);
- CPUARMState *env = cpu->env_ptr;
- ARMCPU *arm_cpu = arm_env_get_cpu(env);
- int bound;
+ CPUARMState *env = cpu_env(cpu);
+ ARMCPU *arm_cpu = env_archcpu(env);
+ CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
+ int bound, core_mmu_idx;
- dc->pc = dc->base.pc_first;
+ dc->isar = &arm_cpu->isar;
dc->condjmp = 0;
-
- dc->aarch64 = 1;
- /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
- * there is no secure EL1, so we route exceptions to EL3.
- */
- dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
- !arm_el_is_aa64(env, 3);
- dc->thumb = 0;
+ dc->pc_save = dc->base.pc_first;
+ dc->aarch64 = true;
+ dc->thumb = false;
dc->sctlr_b = 0;
- dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
+ dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
dc->condexec_mask = 0;
dc->condexec_cond = 0;
- dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
- dc->tbi0 = ARM_TBFLAG_TBI0(dc->base.tb->flags);
- dc->tbi1 = ARM_TBFLAG_TBI1(dc->base.tb->flags);
+ core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
+ dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
+ dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
+ dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
+ dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
#if !defined(CONFIG_USER_ONLY)
dc->user = (dc->current_el == 0);
#endif
- dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
- dc->sve_excp_el = ARM_TBFLAG_SVEEXC_EL(dc->base.tb->flags);
- dc->sve_len = (ARM_TBFLAG_ZCR_LEN(dc->base.tb->flags) + 1) * 16;
+ dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
+ dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
+ dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
+ dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
+ dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
+ dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
+ dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
+ dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
+ dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
+ dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
+ dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
+ dc->bt = EX_TBFLAG_A64(tb_flags, BT);
+ dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
+ dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
+ dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
+ dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
+ dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
+ dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
+ dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
+ dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
+ dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
+ dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
+ dc->nv = EX_TBFLAG_A64(tb_flags, NV);
+ dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
+ dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
+ dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
+ dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
dc->vec_len = 0;
dc->vec_stride = 0;
dc->cp_regs = arm_cpu->cp_regs;
dc->features = env->features;
+ dc->dcz_blocksize = arm_cpu->dcz_blocksize;
+ dc->gm_blocksize = arm_cpu->gm_blocksize;
+
+#ifdef CONFIG_USER_ONLY
+ /* In sve_probe_page, we assume TBI is enabled. */
+ tcg_debug_assert(dc->tbid & 1);
+#endif
+
+ dc->lse2 = dc_isar_feature(aa64_lse2, dc);
/* Single step state. The code-generation logic here is:
* SS_ACTIVE == 0:
@@ -13877,10 +14152,9 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
* emit code to generate a software step exception
* end the TB
*/
- dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
- dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
+ dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
+ dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
dc->is_ldex = false;
- dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
/* Bound the number of insns to execute to those left on the page. */
bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
@@ -13890,53 +14164,33 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
bound = 1;
}
dc->base.max_insns = MIN(dc->base.max_insns, bound);
-
- init_tmp_a64_array(dc);
}
static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
{
- tcg_clear_temp_count();
}
static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *dc = container_of(dcbase, DisasContext, base);
+ target_ulong pc_arg = dc->base.pc_next;
- tcg_gen_insn_start(dc->pc, 0, 0);
- dc->insn_start = tcg_last_op();
-}
-
-static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
- const CPUBreakpoint *bp)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
-
- if (bp->flags & BP_CPU) {
- gen_a64_set_pc_im(dc->pc);
- gen_helper_check_breakpoints(cpu_env);
- /* End the TB early; it likely won't be executed */
- dc->base.is_jmp = DISAS_TOO_MANY;
- } else {
- gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
- /* The address covered by the breakpoint must be
- included in [tb->pc, tb->pc + tb->size) in order
- to for it to be properly cleared -- thus we
- increment the PC here so that the logic setting
- tb->size below does the right thing. */
- dc->pc += 4;
- dc->base.is_jmp = DISAS_NORETURN;
+ if (tb_cflags(dcbase->tb) & CF_PCREL) {
+ pc_arg &= ~TARGET_PAGE_MASK;
}
-
- return true;
+ tcg_gen_insn_start(pc_arg, 0, 0);
+ dc->insn_start_updated = false;
}
static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
- CPUARMState *env = cpu->env_ptr;
+ DisasContext *s = container_of(dcbase, DisasContext, base);
+ CPUARMState *env = cpu_env(cpu);
+ uint64_t pc = s->base.pc_next;
+ uint32_t insn;
- if (dc->ss_active && !dc->pstate_ss) {
+ /* Singlestep exceptions have the highest priority. */
+ if (s->ss_active && !s->pstate_ss) {
/* Singlestep state is Active-pending.
* If we're in this state at the start of a TB then either
* a) we just took an exception to an EL which is being debugged
@@ -13947,23 +14201,105 @@ static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
* "did not step an insn" case, and so the syndrome ISV and EX
* bits should be zero.
*/
- assert(dc->base.num_insns == 1);
- gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
- default_exception_el(dc));
- dc->base.is_jmp = DISAS_NORETURN;
- } else {
- disas_a64_insn(env, dc);
+ assert(s->base.num_insns == 1);
+ gen_swstep_exception(s, 0, 0);
+ s->base.is_jmp = DISAS_NORETURN;
+ s->base.pc_next = pc + 4;
+ return;
+ }
+
+ if (pc & 3) {
+ /*
+ * PC alignment fault. This has priority over the instruction abort
+ * that we would receive from a translation fault via arm_ldl_code.
+ * This should only be possible after an indirect branch, at the
+ * start of the TB.
+ */
+ assert(s->base.num_insns == 1);
+ gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
+ s->base.is_jmp = DISAS_NORETURN;
+ s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
+ return;
}
- dc->base.pc_next = dc->pc;
- translator_loop_temp_check(&dc->base);
+ s->pc_curr = pc;
+ insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
+ s->insn = insn;
+ s->base.pc_next = pc + 4;
+
+ s->fp_access_checked = false;
+ s->sve_access_checked = false;
+
+ if (s->pstate_il) {
+ /*
+ * Illegal execution state. This has priority over BTI
+ * exceptions, but comes after instruction abort exceptions.
+ */
+ gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
+ return;
+ }
+
+ if (dc_isar_feature(aa64_bti, s)) {
+ if (s->base.num_insns == 1) {
+ /*
+ * At the first insn of the TB, compute s->guarded_page.
+ * We delayed computing this until successfully reading
+ * the first insn of the TB, above. This (mostly) ensures
+ * that the softmmu tlb entry has been populated, and the
+ * page table GP bit is available.
+ *
+ * Note that we need to compute this even if btype == 0,
+ * because this value is used for BR instructions later
+ * where ENV is not available.
+ */
+ s->guarded_page = is_guarded_page(env, s);
+
+ /* First insn can have btype set to non-zero. */
+ tcg_debug_assert(s->btype >= 0);
+
+ /*
+ * Note that the Branch Target Exception has fairly high
+ * priority -- below debugging exceptions but above most
+ * everything else. This allows us to handle this now
+ * instead of waiting until the insn is otherwise decoded.
+ */
+ if (s->btype != 0
+ && s->guarded_page
+ && !btype_destination_ok(insn, s->bt, s->btype)) {
+ gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
+ return;
+ }
+ } else {
+ /* Not the first insn: btype must be 0. */
+ tcg_debug_assert(s->btype == 0);
+ }
+ }
+
+ s->is_nonstreaming = false;
+ if (s->sme_trap_nonstreaming) {
+ disas_sme_fa64(s, insn);
+ }
+
+ if (!disas_a64(s, insn) &&
+ !disas_sme(s, insn) &&
+ !disas_sve(s, insn)) {
+ disas_a64_legacy(s, insn);
+ }
+
+ /*
+ * After execution of most insns, btype is reset to 0.
+ * Note that we set btype == -1 when the insn sets btype.
+ */
+ if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
+ reset_btype(s);
+ }
}
static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *dc = container_of(dcbase, DisasContext, base);
- if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
+ if (unlikely(dc->ss_active)) {
/* Note that this means single stepping WFI doesn't halt the CPU.
* For conditional branch insns this is harmless unreachable code as
* gen_goto_tb() has already handled emitting the debug exception
@@ -13971,15 +14307,11 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
*/
switch (dc->base.is_jmp) {
default:
- gen_a64_set_pc_im(dc->pc);
+ gen_a64_update_pc(dc, 4);
/* fall through */
case DISAS_EXIT:
case DISAS_JUMP:
- if (dc->base.singlestep_enabled) {
- gen_exception_internal(EXCP_DEBUG);
- } else {
- gen_step_complete_exception(dc);
- }
+ gen_step_complete_exception(dc);
break;
case DISAS_NORETURN:
break;
@@ -13988,15 +14320,18 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
switch (dc->base.is_jmp) {
case DISAS_NEXT:
case DISAS_TOO_MANY:
- gen_goto_tb(dc, 1, dc->pc);
+ gen_goto_tb(dc, 1, 4);
break;
default:
- case DISAS_UPDATE:
- gen_a64_set_pc_im(dc->pc);
+ case DISAS_UPDATE_EXIT:
+ gen_a64_update_pc(dc, 4);
/* fall through */
case DISAS_EXIT:
tcg_gen_exit_tb(NULL, 0);
break;
+ case DISAS_UPDATE_NOCHAIN:
+ gen_a64_update_pc(dc, 4);
+ /* fall through */
case DISAS_JUMP:
tcg_gen_lookup_and_goto_ptr();
break;
@@ -14004,50 +14339,43 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
case DISAS_SWI:
break;
case DISAS_WFE:
- gen_a64_set_pc_im(dc->pc);
- gen_helper_wfe(cpu_env);
+ gen_a64_update_pc(dc, 4);
+ gen_helper_wfe(tcg_env);
break;
case DISAS_YIELD:
- gen_a64_set_pc_im(dc->pc);
- gen_helper_yield(cpu_env);
+ gen_a64_update_pc(dc, 4);
+ gen_helper_yield(tcg_env);
break;
case DISAS_WFI:
- {
- /* This is a special case because we don't want to just halt the CPU
- * if trying to debug across a WFI.
+ /*
+ * This is a special case because we don't want to just halt
+ * the CPU if trying to debug across a WFI.
*/
- TCGv_i32 tmp = tcg_const_i32(4);
-
- gen_a64_set_pc_im(dc->pc);
- gen_helper_wfi(cpu_env, tmp);
- tcg_temp_free_i32(tmp);
- /* The helper doesn't necessarily throw an exception, but we
+ gen_a64_update_pc(dc, 4);
+ gen_helper_wfi(tcg_env, tcg_constant_i32(4));
+ /*
+ * The helper doesn't necessarily throw an exception, but we
* must go back to the main loop to check for interrupts anyway.
*/
tcg_gen_exit_tb(NULL, 0);
break;
}
- }
}
-
- /* Functions above can change dc->pc, so re-align db->pc_next */
- dc->base.pc_next = dc->pc;
}
static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
- CPUState *cpu)
+ CPUState *cpu, FILE *logfile)
{
DisasContext *dc = container_of(dcbase, DisasContext, base);
- qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
- log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
+ fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
+ target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
}
const TranslatorOps aarch64_translator_ops = {
.init_disas_context = aarch64_tr_init_disas_context,
.tb_start = aarch64_tr_tb_start,
.insn_start = aarch64_tr_insn_start,
- .breakpoint_check = aarch64_tr_breakpoint_check,
.translate_insn = aarch64_tr_translate_insn,
.tb_stop = aarch64_tr_tb_stop,
.disas_log = aarch64_tr_disas_log,
diff --git a/target/arm/translate-a64.h b/target/arm/tcg/translate-a64.h
index 63d958cf50..7b811b8ac5 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/tcg/translate-a64.h
@@ -4,7 +4,7 @@
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -18,29 +18,42 @@
#ifndef TARGET_ARM_TRANSLATE_A64_H
#define TARGET_ARM_TRANSLATE_A64_H
-void unallocated_encoding(DisasContext *s);
-
-#define unsupported_encoding(s, insn) \
- do { \
- qemu_log_mask(LOG_UNIMP, \
- "%s:%d: unsupported instruction encoding 0x%08x " \
- "at pc=%016" PRIx64 "\n", \
- __FILE__, __LINE__, insn, s->pc - 4); \
- unallocated_encoding(s); \
- } while (0)
-
-TCGv_i64 new_tmp_a64(DisasContext *s);
-TCGv_i64 new_tmp_a64_zero(DisasContext *s);
TCGv_i64 cpu_reg(DisasContext *s, int reg);
TCGv_i64 cpu_reg_sp(DisasContext *s, int reg);
TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf);
TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf);
void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
-TCGv_ptr get_fpstatus_ptr(bool);
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
unsigned int imms, unsigned int immr);
-uint64_t vfp_expand_imm(int size, uint8_t imm8);
bool sve_access_check(DisasContext *s);
+bool sme_enabled_check(DisasContext *s);
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
+uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
+ uint32_t msz, bool is_write, uint32_t data);
+
+/* This function corresponds to CheckStreamingSVEEnabled. */
+static inline bool sme_sm_enabled_check(DisasContext *s)
+{
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
+}
+
+/* This function corresponds to CheckSMEAndZAEnabled. */
+static inline bool sme_za_enabled_check(DisasContext *s)
+{
+ return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
+}
+
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
+static inline bool sme_smza_enabled_check(DisasContext *s)
+{
+ return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
+}
+
+TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
+TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
+ bool tag_checked, MemOp memop);
+TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
+ bool tag_checked, int total_size, MemOp memop);
/* We should have at some point before trying to access an FP register
* done the necessary access check, so assert that
@@ -65,11 +78,11 @@ static inline void assert_fp_access_checked(DisasContext *s)
* the FP/vector register Qn.
*/
static inline int vec_reg_offset(DisasContext *s, int regno,
- int element, TCGMemOp size)
+ int element, MemOp size)
{
int element_size = 1 << size;
int offs = element * element_size;
-#ifdef HOST_WORDS_BIGENDIAN
+#if HOST_BIG_ENDIAN
/* This is complicated slightly because vfp.zregs[n].d[0] is
* still the lowest and vfp.zregs[n].d[15] the highest of the
* 256 byte vector, even on big endian systems.
@@ -104,23 +117,84 @@ static inline int vec_full_reg_offset(DisasContext *s, int regno)
static inline TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
{
TCGv_ptr ret = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno));
+ tcg_gen_addi_ptr(ret, tcg_env, vec_full_reg_offset(s, regno));
return ret;
}
/* Return the byte size of the "whole" vector register, VL / 8. */
static inline int vec_full_reg_size(DisasContext *s)
{
- return s->sve_len;
+ return s->vl;
+}
+
+/* Return the byte size of the vector register, SVL / 8. */
+static inline int streaming_vec_reg_size(DisasContext *s)
+{
+ return s->svl;
+}
+
+/*
+ * Return the offset info CPUARMState of the predicate vector register Pn.
+ * Note for this purpose, FFR is P16.
+ */
+static inline int pred_full_reg_offset(DisasContext *s, int regno)
+{
+ return offsetof(CPUARMState, vfp.pregs[regno]);
+}
+
+/* Return the byte size of the whole predicate register, VL / 64. */
+static inline int pred_full_reg_size(DisasContext *s)
+{
+ return s->vl >> 3;
+}
+
+/* Return the byte size of the predicate register, SVL / 64. */
+static inline int streaming_pred_reg_size(DisasContext *s)
+{
+ return s->svl >> 3;
+}
+
+/*
+ * Round up the size of a register to a size allowed by
+ * the tcg vector infrastructure. Any operation which uses this
+ * size may assume that the bits above pred_full_reg_size are zero,
+ * and must leave them the same way.
+ *
+ * Note that this is not needed for the vector registers as they
+ * are always properly sized for tcg vectors.
+ */
+static inline int size_for_gvec(int size)
+{
+ if (size <= 8) {
+ return 8;
+ } else {
+ return QEMU_ALIGN_UP(size, 16);
+ }
+}
+
+static inline int pred_gvec_reg_size(DisasContext *s)
+{
+ return size_for_gvec(pred_full_reg_size(s));
+}
+
+/* Return a newly allocated pointer to the predicate register. */
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
+{
+ TCGv_ptr ret = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(ret, tcg_env, pred_full_reg_offset(s, regno));
+ return ret;
}
bool disas_sve(DisasContext *, uint32_t);
+bool disas_sme(DisasContext *, uint32_t);
+
+void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, int64_t shift,
+ uint32_t opr_sz, uint32_t max_sz);
-/* Note that the gvec expanders operate on offsets + sizes. */
-typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
-typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
- uint32_t, uint32_t);
-typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t, uint32_t);
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm);
#endif /* TARGET_ARM_TRANSLATE_A64_H */
diff --git a/target/arm/tcg/translate-m-nocp.c b/target/arm/tcg/translate-m-nocp.c
new file mode 100644
index 0000000000..f564d06ccf
--- /dev/null
+++ b/target/arm/tcg/translate-m-nocp.c
@@ -0,0 +1,766 @@
+/*
+ * ARM translation: M-profile NOCP special-case instructions
+ *
+ * Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+#include "translate-a32.h"
+
+#include "decode-m-nocp.c.inc"
+
+/*
+ * Decode VLLDM and VLSTM are nonstandard because:
+ * * if there is no FPU then these insns must NOP in
+ * Secure state and UNDEF in Nonsecure state
+ * * if there is an FPU then these insns do not have
+ * the usual behaviour that vfp_access_check() provides of
+ * being controlled by CPACR/NSACR enable bits or the
+ * lazy-stacking logic.
+ */
+static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
+{
+ TCGv_i32 fptr;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_M) ||
+ !arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+
+ if (a->op) {
+ /*
+ * T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
+ * to take the IMPDEF option to make memory accesses to the stack
+ * slots that correspond to the D16-D31 registers (discarding
+ * read data and writing UNKNOWN values), so for us the T2
+ * encoding behaves identically to the T1 encoding.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ return false;
+ }
+ } else {
+ /*
+ * T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
+ * This is currently architecturally impossible, but we add the
+ * check to stay in line with the pseudocode. Note that we must
+ * emit code for the UNDEF so it takes precedence over the NOCP.
+ */
+ if (dc_isar_feature(aa32_simd_r32, s)) {
+ unallocated_encoding(s);
+ return true;
+ }
+ }
+
+ /*
+ * If not secure, UNDEF. We must emit code for this
+ * rather than returning false so that this takes
+ * precedence over the m-nocp.decode NOCP fallback.
+ */
+ if (!s->v8m_secure) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ s->eci_handled = true;
+
+ /* If no fpu, NOP. */
+ if (!dc_isar_feature(aa32_vfp, s)) {
+ clear_eci_state(s);
+ return true;
+ }
+
+ fptr = load_reg(s, a->rn);
+ if (a->l) {
+ gen_helper_v7m_vlldm(tcg_env, fptr);
+ } else {
+ gen_helper_v7m_vlstm(tcg_env, fptr);
+ }
+
+ clear_eci_state(s);
+
+ /*
+ * End the TB, because we have updated FP control bits,
+ * and possibly VPR or LTPSIZE.
+ */
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
+ return true;
+}
+
+static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
+{
+ int btmreg, topreg;
+ TCGv_i64 zero;
+ TCGv_i32 aspen, sfpa;
+
+ if (!dc_isar_feature(aa32_m_sec_state, s)) {
+ /* Before v8.1M, fall through in decode to NOCP check */
+ return false;
+ }
+
+ /* Explicitly UNDEF because this takes precedence over NOCP */
+ if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ s->eci_handled = true;
+
+ if (!dc_isar_feature(aa32_vfp_simd, s)) {
+ /* NOP if we have neither FP nor MVE */
+ clear_eci_state(s);
+ return true;
+ }
+
+ /*
+ * If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
+ * active floating point context so we must NOP (without doing
+ * any lazy state preservation or the NOCP check).
+ */
+ aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
+ sfpa = load_cpu_field(v7m.control[M_REG_S]);
+ tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
+ tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
+ tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
+ tcg_gen_or_i32(sfpa, sfpa, aspen);
+ arm_gen_condlabel(s);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel.label);
+
+ if (s->fp_excp_el != 0) {
+ gen_exception_insn_el(s, 0, EXCP_NOCP,
+ syn_uncategorized(), s->fp_excp_el);
+ return true;
+ }
+
+ topreg = a->vd + a->imm - 1;
+ btmreg = a->vd;
+
+ /* Convert to Sreg numbers if the insn specified in Dregs */
+ if (a->size == 3) {
+ topreg = topreg * 2 + 1;
+ btmreg *= 2;
+ }
+
+ if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
+ /* UNPREDICTABLE: we choose to undef */
+ unallocated_encoding(s);
+ return true;
+ }
+
+ /* Silently ignore requests to clear D16-D31 if they don't exist */
+ if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
+ topreg = 31;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Zero the Sregs from btmreg to topreg inclusive. */
+ zero = tcg_constant_i64(0);
+ if (btmreg & 1) {
+ write_neon_element64(zero, btmreg >> 1, 1, MO_32);
+ btmreg++;
+ }
+ for (; btmreg + 1 <= topreg; btmreg += 2) {
+ write_neon_element64(zero, btmreg >> 1, 0, MO_64);
+ }
+ if (btmreg == topreg) {
+ write_neon_element64(zero, btmreg >> 1, 0, MO_32);
+ btmreg++;
+ }
+ assert(btmreg == topreg + 1);
+ if (dc_isar_feature(aa32_mve, s)) {
+ store_cpu_field(tcg_constant_i32(0), v7m.vpr);
+ }
+
+ clear_eci_state(s);
+ return true;
+}
+
+/*
+ * M-profile provides two different sets of instructions that can
+ * access floating point system registers: VMSR/VMRS (which move
+ * to/from a general purpose register) and VLDR/VSTR sysreg (which
+ * move directly to/from memory). In some cases there are also side
+ * effects which must happen after any write to memory (which could
+ * cause an exception). So we implement the common logic for the
+ * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
+ * which take pointers to callback functions which will perform the
+ * actual "read/write general purpose register" and "read/write
+ * memory" operations.
+ */
+
+/*
+ * Emit code to store the sysreg to its final destination; frees the
+ * TCG temp 'value' it is passed. do_access is true to do the store,
+ * and false to skip it and only perform side-effects like base
+ * register writeback.
+ */
+typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value,
+ bool do_access);
+/*
+ * Emit code to load the value to be copied to the sysreg; returns
+ * a new TCG temporary. do_access is true to do the store,
+ * and false to skip it and only perform side-effects like base
+ * register writeback.
+ */
+typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque,
+ bool do_access);
+
+/* Common decode/access checks for fp sysreg read/write */
+typedef enum FPSysRegCheckResult {
+ FPSysRegCheckFailed, /* caller should return false */
+ FPSysRegCheckDone, /* caller should return true */
+ FPSysRegCheckContinue, /* caller should continue generating code */
+} FPSysRegCheckResult;
+
+static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
+{
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return FPSysRegCheckFailed;
+ }
+
+ switch (regno) {
+ case ARM_VFP_FPSCR:
+ case QEMU_VFP_FPSCR_NZCV:
+ break;
+ case ARM_VFP_FPSCR_NZCVQC:
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ return FPSysRegCheckFailed;
+ }
+ break;
+ case ARM_VFP_FPCXT_S:
+ case ARM_VFP_FPCXT_NS:
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ return FPSysRegCheckFailed;
+ }
+ if (!s->v8m_secure) {
+ return FPSysRegCheckFailed;
+ }
+ break;
+ case ARM_VFP_VPR:
+ case ARM_VFP_P0:
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return FPSysRegCheckFailed;
+ }
+ break;
+ default:
+ return FPSysRegCheckFailed;
+ }
+
+ /*
+ * FPCXT_NS is a special case: it has specific handling for
+ * "current FP state is inactive", and must do the PreserveFPState()
+ * but not the usual full set of actions done by ExecuteFPCheck().
+ * So we don't call vfp_access_check() and the callers must handle this.
+ */
+ if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
+ return FPSysRegCheckDone;
+ }
+ return FPSysRegCheckContinue;
+}
+
+static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
+ TCGLabel *label)
+{
+ /*
+ * FPCXT_NS is a special case: it has specific handling for
+ * "current FP state is inactive", and must do the PreserveFPState()
+ * but not the usual full set of actions done by ExecuteFPCheck().
+ * We don't have a TB flag that matches the fpInactive check, so we
+ * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
+ *
+ * Emit code that checks fpInactive and does a conditional
+ * branch to label based on it:
+ * if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
+ * if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
+ */
+ assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
+
+ /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
+ TCGv_i32 aspen, fpca;
+ aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
+ fpca = load_cpu_field(v7m.control[M_REG_S]);
+ tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
+ tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
+ tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
+ tcg_gen_or_i32(fpca, fpca, aspen);
+ tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
+}
+
+static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
+ fp_sysreg_loadfn *loadfn,
+ void *opaque)
+{
+ /* Do a write to an M-profile floating point system register */
+ TCGv_i32 tmp;
+ TCGLabel *lab_end = NULL;
+
+ switch (fp_sysreg_checks(s, regno)) {
+ case FPSysRegCheckFailed:
+ return false;
+ case FPSysRegCheckDone:
+ return true;
+ case FPSysRegCheckContinue:
+ break;
+ }
+
+ switch (regno) {
+ case ARM_VFP_FPSCR:
+ tmp = loadfn(s, opaque, true);
+ gen_helper_vfp_set_fpscr(tcg_env, tmp);
+ gen_lookup_tb(s);
+ break;
+ case ARM_VFP_FPSCR_NZCVQC:
+ {
+ TCGv_i32 fpscr;
+ tmp = loadfn(s, opaque, true);
+ if (dc_isar_feature(aa32_mve, s)) {
+ /* QC is only present for MVE; otherwise RES0 */
+ TCGv_i32 qc = tcg_temp_new_i32();
+ tcg_gen_andi_i32(qc, tmp, FPCR_QC);
+ /*
+ * The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
+ * here writing the same value into all elements is simplest.
+ */
+ tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
+ 16, 16, qc);
+ }
+ tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+ fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+ tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
+ tcg_gen_or_i32(fpscr, fpscr, tmp);
+ store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
+ break;
+ }
+ case ARM_VFP_FPCXT_NS:
+ {
+ TCGLabel *lab_active = gen_new_label();
+
+ lab_end = gen_new_label();
+ gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
+ /*
+ * fpInactive case: write is a NOP, so only do side effects
+ * like register writeback before we branch to end
+ */
+ loadfn(s, opaque, false);
+ tcg_gen_br(lab_end);
+
+ gen_set_label(lab_active);
+ /*
+ * !fpInactive: if FPU disabled, take NOCP exception;
+ * otherwise PreserveFPState(), and then FPCXT_NS writes
+ * behave the same as FPCXT_S writes.
+ */
+ if (!vfp_access_check_m(s, true)) {
+ /*
+ * This was only a conditional exception, so override
+ * gen_exception_insn_el()'s default to DISAS_NORETURN
+ */
+ s->base.is_jmp = DISAS_NEXT;
+ break;
+ }
+ }
+ /* fall through */
+ case ARM_VFP_FPCXT_S:
+ {
+ TCGv_i32 sfpa, control;
+ /*
+ * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
+ * bits [27:0] from value and zeroes bits [31:28].
+ */
+ tmp = loadfn(s, opaque, true);
+ sfpa = tcg_temp_new_i32();
+ tcg_gen_shri_i32(sfpa, tmp, 31);
+ control = load_cpu_field(v7m.control[M_REG_S]);
+ tcg_gen_deposit_i32(control, control, sfpa,
+ R_V7M_CONTROL_SFPA_SHIFT, 1);
+ store_cpu_field(control, v7m.control[M_REG_S]);
+ tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+ gen_helper_vfp_set_fpscr(tcg_env, tmp);
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ break;
+ }
+ case ARM_VFP_VPR:
+ /* Behaves as NOP if not privileged */
+ if (IS_USER(s)) {
+ loadfn(s, opaque, false);
+ break;
+ }
+ tmp = loadfn(s, opaque, true);
+ store_cpu_field(tmp, v7m.vpr);
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ break;
+ case ARM_VFP_P0:
+ {
+ TCGv_i32 vpr;
+ tmp = loadfn(s, opaque, true);
+ vpr = load_cpu_field(v7m.vpr);
+ tcg_gen_deposit_i32(vpr, vpr, tmp,
+ R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
+ store_cpu_field(vpr, v7m.vpr);
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+ if (lab_end) {
+ gen_set_label(lab_end);
+ }
+ return true;
+}
+
+static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
+ fp_sysreg_storefn *storefn,
+ void *opaque)
+{
+ /* Do a read from an M-profile floating point system register */
+ TCGv_i32 tmp;
+ TCGLabel *lab_end = NULL;
+ bool lookup_tb = false;
+
+ switch (fp_sysreg_checks(s, regno)) {
+ case FPSysRegCheckFailed:
+ return false;
+ case FPSysRegCheckDone:
+ return true;
+ case FPSysRegCheckContinue:
+ break;
+ }
+
+ if (regno == ARM_VFP_FPSCR_NZCVQC && !dc_isar_feature(aa32_mve, s)) {
+ /* QC is RES0 without MVE, so NZCVQC simplifies to NZCV */
+ regno = QEMU_VFP_FPSCR_NZCV;
+ }
+
+ switch (regno) {
+ case ARM_VFP_FPSCR:
+ tmp = tcg_temp_new_i32();
+ gen_helper_vfp_get_fpscr(tmp, tcg_env);
+ storefn(s, opaque, tmp, true);
+ break;
+ case ARM_VFP_FPSCR_NZCVQC:
+ tmp = tcg_temp_new_i32();
+ gen_helper_vfp_get_fpscr(tmp, tcg_env);
+ tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
+ storefn(s, opaque, tmp, true);
+ break;
+ case QEMU_VFP_FPSCR_NZCV:
+ /*
+ * Read just NZCV; this is a special case to avoid the
+ * helper call for the "VMRS to CPSR.NZCV" insn.
+ */
+ tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+ tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+ storefn(s, opaque, tmp, true);
+ break;
+ case ARM_VFP_FPCXT_S:
+ {
+ TCGv_i32 control, sfpa, fpscr;
+ /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
+ tmp = tcg_temp_new_i32();
+ sfpa = tcg_temp_new_i32();
+ gen_helper_vfp_get_fpscr(tmp, tcg_env);
+ tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
+ control = load_cpu_field(v7m.control[M_REG_S]);
+ tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
+ tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
+ tcg_gen_or_i32(tmp, tmp, sfpa);
+ /*
+ * Store result before updating FPSCR etc, in case
+ * it is a memory write which causes an exception.
+ */
+ storefn(s, opaque, tmp, true);
+ /*
+ * Now we must reset FPSCR from FPDSCR_NS, and clear
+ * CONTROL.SFPA; so we'll end the TB here.
+ */
+ tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
+ store_cpu_field(control, v7m.control[M_REG_S]);
+ fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
+ gen_helper_vfp_set_fpscr(tcg_env, fpscr);
+ lookup_tb = true;
+ break;
+ }
+ case ARM_VFP_FPCXT_NS:
+ {
+ TCGv_i32 control, sfpa, fpscr, fpdscr;
+ TCGLabel *lab_active = gen_new_label();
+
+ lookup_tb = true;
+
+ gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
+ /* fpInactive case: reads as FPDSCR_NS */
+ tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
+ storefn(s, opaque, tmp, true);
+ lab_end = gen_new_label();
+ tcg_gen_br(lab_end);
+
+ gen_set_label(lab_active);
+ /*
+ * !fpInactive: if FPU disabled, take NOCP exception;
+ * otherwise PreserveFPState(), and then FPCXT_NS
+ * reads the same as FPCXT_S.
+ */
+ if (!vfp_access_check_m(s, true)) {
+ /*
+ * This was only a conditional exception, so override
+ * gen_exception_insn_el()'s default to DISAS_NORETURN
+ */
+ s->base.is_jmp = DISAS_NEXT;
+ break;
+ }
+ tmp = tcg_temp_new_i32();
+ sfpa = tcg_temp_new_i32();
+ fpscr = tcg_temp_new_i32();
+ gen_helper_vfp_get_fpscr(fpscr, tcg_env);
+ tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
+ control = load_cpu_field(v7m.control[M_REG_S]);
+ tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
+ tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
+ tcg_gen_or_i32(tmp, tmp, sfpa);
+ /* Store result before updating FPSCR, in case it faults */
+ storefn(s, opaque, tmp, true);
+ /* If SFPA is zero then set FPSCR from FPDSCR_NS */
+ fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
+ tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, tcg_constant_i32(0),
+ fpdscr, fpscr);
+ gen_helper_vfp_set_fpscr(tcg_env, fpscr);
+ break;
+ }
+ case ARM_VFP_VPR:
+ /* Behaves as NOP if not privileged */
+ if (IS_USER(s)) {
+ storefn(s, opaque, NULL, false);
+ break;
+ }
+ tmp = load_cpu_field(v7m.vpr);
+ storefn(s, opaque, tmp, true);
+ break;
+ case ARM_VFP_P0:
+ tmp = load_cpu_field(v7m.vpr);
+ tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
+ storefn(s, opaque, tmp, true);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (lab_end) {
+ gen_set_label(lab_end);
+ }
+ if (lookup_tb) {
+ gen_lookup_tb(s);
+ }
+ return true;
+}
+
+static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value,
+ bool do_access)
+{
+ arg_VMSR_VMRS *a = opaque;
+
+ if (!do_access) {
+ return;
+ }
+
+ if (a->rt == 15) {
+ /* Set the 4 flag bits in the CPSR */
+ gen_set_nzcv(value);
+ } else {
+ store_reg(s, a->rt, value);
+ }
+}
+
+static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque, bool do_access)
+{
+ arg_VMSR_VMRS *a = opaque;
+
+ if (!do_access) {
+ return NULL;
+ }
+ return load_reg(s, a->rt);
+}
+
+static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
+{
+ /*
+ * Accesses to R15 are UNPREDICTABLE; we choose to undef.
+ * FPSCR -> r15 is a special case which writes to the PSR flags;
+ * set a->reg to a special value to tell gen_M_fp_sysreg_read()
+ * we only care about the top 4 bits of FPSCR there.
+ */
+ if (a->rt == 15) {
+ if (a->l && a->reg == ARM_VFP_FPSCR) {
+ a->reg = QEMU_VFP_FPSCR_NZCV;
+ } else {
+ return false;
+ }
+ }
+
+ if (a->l) {
+ /* VMRS, move FP system register to gp register */
+ return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
+ } else {
+ /* VMSR, move gp register to FP system register */
+ return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
+ }
+}
+
+static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value,
+ bool do_access)
+{
+ arg_vldr_sysreg *a = opaque;
+ uint32_t offset = a->imm;
+ TCGv_i32 addr;
+
+ if (!a->a) {
+ offset = -offset;
+ }
+
+ if (!do_access && !a->w) {
+ return;
+ }
+
+ addr = load_reg(s, a->rn);
+ if (a->p) {
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ gen_helper_v8m_stackcheck(tcg_env, addr);
+ }
+
+ if (do_access) {
+ gen_aa32_st_i32(s, value, addr, get_mem_index(s),
+ MO_UL | MO_ALIGN | s->be_data);
+ }
+
+ if (a->w) {
+ /* writeback */
+ if (!a->p) {
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ store_reg(s, a->rn, addr);
+ }
+}
+
+static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque,
+ bool do_access)
+{
+ arg_vldr_sysreg *a = opaque;
+ uint32_t offset = a->imm;
+ TCGv_i32 addr;
+ TCGv_i32 value = NULL;
+
+ if (!a->a) {
+ offset = -offset;
+ }
+
+ if (!do_access && !a->w) {
+ return NULL;
+ }
+
+ addr = load_reg(s, a->rn);
+ if (a->p) {
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ gen_helper_v8m_stackcheck(tcg_env, addr);
+ }
+
+ if (do_access) {
+ value = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
+ MO_UL | MO_ALIGN | s->be_data);
+ }
+
+ if (a->w) {
+ /* writeback */
+ if (!a->p) {
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ store_reg(s, a->rn, addr);
+ }
+ return value;
+}
+
+static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ return false;
+ }
+ if (a->rn == 15) {
+ return false;
+ }
+ return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
+}
+
+static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ return false;
+ }
+ if (a->rn == 15) {
+ return false;
+ }
+ return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
+}
+
+static bool trans_NOCP(DisasContext *s, arg_nocp *a)
+{
+ /*
+ * Handle M-profile early check for disabled coprocessor:
+ * all we need to do here is emit the NOCP exception if
+ * the coprocessor is disabled. Otherwise we return false
+ * and the real VFP/etc decode will handle the insn.
+ */
+ assert(arm_dc_feature(s, ARM_FEATURE_M));
+
+ if (a->cp == 11) {
+ a->cp = 10;
+ }
+ if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
+ (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
+ /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
+ a->cp = 10;
+ }
+
+ if (a->cp != 10) {
+ gen_exception_insn(s, 0, EXCP_NOCP, syn_uncategorized());
+ return true;
+ }
+
+ if (s->fp_excp_el != 0) {
+ gen_exception_insn_el(s, 0, EXCP_NOCP,
+ syn_uncategorized(), s->fp_excp_el);
+ return true;
+ }
+
+ return false;
+}
+
+static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
+{
+ /* This range needs a coprocessor check for v8.1M and later only */
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ return false;
+ }
+ return trans_NOCP(s, a);
+}
diff --git a/target/arm/tcg/translate-mve.c b/target/arm/tcg/translate-mve.c
new file mode 100644
index 0000000000..b1a8d6a65c
--- /dev/null
+++ b/target/arm/tcg/translate-mve.c
@@ -0,0 +1,2258 @@
+/*
+ * ARM translation: M-profile MVE instructions
+ *
+ * Copyright (c) 2021 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+#include "translate-a32.h"
+
+static inline int vidup_imm(DisasContext *s, int x)
+{
+ return 1 << x;
+}
+
+/* Include the generated decoder */
+#include "decode-mve.c.inc"
+
+typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
+typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
+typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+
+/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
+static inline long mve_qreg_offset(unsigned reg)
+{
+ return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
+}
+
+static TCGv_ptr mve_qreg_ptr(unsigned reg)
+{
+ TCGv_ptr ret = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(ret, tcg_env, mve_qreg_offset(reg));
+ return ret;
+}
+
+static bool mve_no_predication(DisasContext *s)
+{
+ /*
+ * Return true if we are executing the entire MVE instruction
+ * with no predication or partial-execution, and so we can safely
+ * use an inline TCG vector implementation.
+ */
+ return s->eci == 0 && s->mve_no_pred;
+}
+
+static bool mve_check_qreg_bank(DisasContext *s, int qmask)
+{
+ /*
+ * Check whether Qregs are in range. For v8.1M only Q0..Q7
+ * are supported, see VFPSmallRegisterBank().
+ */
+ return qmask < 8;
+}
+
+bool mve_eci_check(DisasContext *s)
+{
+ /*
+ * This is a beatwise insn: check that ECI is valid (not a
+ * reserved value) and note that we are handling it.
+ * Return true if OK, false if we generated an exception.
+ */
+ s->eci_handled = true;
+ switch (s->eci) {
+ case ECI_NONE:
+ case ECI_A0:
+ case ECI_A0A1:
+ case ECI_A0A1A2:
+ case ECI_A0A1A2B0:
+ return true;
+ default:
+ /* Reserved value: INVSTATE UsageFault */
+ gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
+ return false;
+ }
+}
+
+void mve_update_eci(DisasContext *s)
+{
+ /*
+ * The helper function will always update the CPUState field,
+ * so we only need to update the DisasContext field.
+ */
+ if (s->eci) {
+ s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
+ }
+}
+
+void mve_update_and_store_eci(DisasContext *s)
+{
+ /*
+ * For insns which don't call a helper function that will call
+ * mve_advance_vpt(), this version updates s->eci and also stores
+ * it out to the CPUState field.
+ */
+ if (s->eci) {
+ mve_update_eci(s);
+ store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
+ }
+}
+
+static bool mve_skip_first_beat(DisasContext *s)
+{
+ /* Return true if PSR.ECI says we must skip the first beat of this insn */
+ switch (s->eci) {
+ case ECI_NONE:
+ return false;
+ case ECI_A0:
+ case ECI_A0A1:
+ case ECI_A0A1A2:
+ case ECI_A0A1A2B0:
+ return true;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
+ unsigned msize)
+{
+ TCGv_i32 addr;
+ uint32_t offset;
+ TCGv_ptr qreg;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd) ||
+ !fn) {
+ return false;
+ }
+
+ /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
+ if (a->rn == 15 || (a->rn == 13 && a->w)) {
+ return false;
+ }
+
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ offset = a->imm << msize;
+ if (!a->a) {
+ offset = -offset;
+ }
+ addr = load_reg(s, a->rn);
+ if (a->p) {
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+
+ qreg = mve_qreg_ptr(a->qd);
+ fn(tcg_env, qreg, addr);
+
+ /*
+ * Writeback always happens after the last beat of the insn,
+ * regardless of predication
+ */
+ if (a->w) {
+ if (!a->p) {
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ store_reg(s, a->rn, addr);
+ }
+ mve_update_eci(s);
+ return true;
+}
+
+static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
+{
+ static MVEGenLdStFn * const ldstfns[4][2] = {
+ { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
+ { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
+ { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
+ { NULL, NULL }
+ };
+ return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
+}
+
+#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \
+ static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \
+ { \
+ static MVEGenLdStFn * const ldstfns[2][2] = { \
+ { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \
+ { NULL, gen_helper_mve_##ULD }, \
+ }; \
+ return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \
+ }
+
+DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
+DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
+DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
+
+static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
+{
+ TCGv_i32 addr;
+ TCGv_ptr qd, qm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
+ !fn || a->rn == 15) {
+ /* Rn case is UNPREDICTABLE */
+ return false;
+ }
+
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ addr = load_reg(s, a->rn);
+
+ qd = mve_qreg_ptr(a->qd);
+ qm = mve_qreg_ptr(a->qm);
+ fn(tcg_env, qd, qm, addr);
+ mve_update_eci(s);
+ return true;
+}
+
+/*
+ * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
+ * signextended to halfword elements in register". _os_ indicates that
+ * the offsets in Qm should be scaled by the element size.
+ */
+/* This macro is just to make the arrays more compact in these functions */
+#define F(N) gen_helper_mve_##N
+
+/* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
+static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
+{
+ static MVEGenLdStSGFn * const fns[2][4][4] = { {
+ { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
+ { NULL, NULL, F(vldrh_sg_sw), NULL },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL }
+ }, {
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, F(vldrh_sg_os_sw), NULL },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL }
+ }
+ };
+ if (a->qd == a->qm) {
+ return false; /* UNPREDICTABLE */
+ }
+ return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
+}
+
+static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
+{
+ static MVEGenLdStSGFn * const fns[2][4][4] = { {
+ { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
+ { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
+ { NULL, NULL, F(vldrw_sg_uw), NULL },
+ { NULL, NULL, NULL, F(vldrd_sg_ud) }
+ }, {
+ { NULL, NULL, NULL, NULL },
+ { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
+ { NULL, NULL, F(vldrw_sg_os_uw), NULL },
+ { NULL, NULL, NULL, F(vldrd_sg_os_ud) }
+ }
+ };
+ if (a->qd == a->qm) {
+ return false; /* UNPREDICTABLE */
+ }
+ return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
+}
+
+static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
+{
+ static MVEGenLdStSGFn * const fns[2][4][4] = { {
+ { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
+ { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
+ { NULL, NULL, F(vstrw_sg_uw), NULL },
+ { NULL, NULL, NULL, F(vstrd_sg_ud) }
+ }, {
+ { NULL, NULL, NULL, NULL },
+ { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
+ { NULL, NULL, F(vstrw_sg_os_uw), NULL },
+ { NULL, NULL, NULL, F(vstrd_sg_os_ud) }
+ }
+ };
+ return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
+}
+
+#undef F
+
+static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
+ MVEGenLdStSGFn *fn, unsigned msize)
+{
+ uint32_t offset;
+ TCGv_ptr qd, qm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
+ !fn) {
+ return false;
+ }
+
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ offset = a->imm << msize;
+ if (!a->a) {
+ offset = -offset;
+ }
+
+ qd = mve_qreg_ptr(a->qd);
+ qm = mve_qreg_ptr(a->qm);
+ fn(tcg_env, qd, qm, tcg_constant_i32(offset));
+ mve_update_eci(s);
+ return true;
+}
+
+static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+ static MVEGenLdStSGFn * const fns[] = {
+ gen_helper_mve_vldrw_sg_uw,
+ gen_helper_mve_vldrw_sg_wb_uw,
+ };
+ if (a->qd == a->qm) {
+ return false; /* UNPREDICTABLE */
+ }
+ return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
+}
+
+static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+ static MVEGenLdStSGFn * const fns[] = {
+ gen_helper_mve_vldrd_sg_ud,
+ gen_helper_mve_vldrd_sg_wb_ud,
+ };
+ if (a->qd == a->qm) {
+ return false; /* UNPREDICTABLE */
+ }
+ return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
+}
+
+static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+ static MVEGenLdStSGFn * const fns[] = {
+ gen_helper_mve_vstrw_sg_uw,
+ gen_helper_mve_vstrw_sg_wb_uw,
+ };
+ return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
+}
+
+static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+ static MVEGenLdStSGFn * const fns[] = {
+ gen_helper_mve_vstrd_sg_ud,
+ gen_helper_mve_vstrd_sg_wb_ud,
+ };
+ return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
+}
+
+static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
+ int addrinc)
+{
+ TCGv_i32 rn;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd) ||
+ !fn || (a->rn == 13 && a->w) || a->rn == 15) {
+ /* Variously UNPREDICTABLE or UNDEF or related-encoding */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ rn = load_reg(s, a->rn);
+ /*
+ * We pass the index of Qd, not a pointer, because the helper must
+ * access multiple Q registers starting at Qd and working up.
+ */
+ fn(tcg_env, tcg_constant_i32(a->qd), rn);
+
+ if (a->w) {
+ tcg_gen_addi_i32(rn, rn, addrinc);
+ store_reg(s, a->rn, rn);
+ }
+ mve_update_and_store_eci(s);
+ return true;
+}
+
+/* This macro is just to make the arrays more compact in these functions */
+#define F(N) gen_helper_mve_##N
+
+static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
+{
+ static MVEGenLdStIlFn * const fns[4][4] = {
+ { F(vld20b), F(vld20h), F(vld20w), NULL, },
+ { F(vld21b), F(vld21h), F(vld21w), NULL, },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL },
+ };
+ if (a->qd > 6) {
+ return false;
+ }
+ return do_vldst_il(s, a, fns[a->pat][a->size], 32);
+}
+
+static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
+{
+ static MVEGenLdStIlFn * const fns[4][4] = {
+ { F(vld40b), F(vld40h), F(vld40w), NULL, },
+ { F(vld41b), F(vld41h), F(vld41w), NULL, },
+ { F(vld42b), F(vld42h), F(vld42w), NULL, },
+ { F(vld43b), F(vld43h), F(vld43w), NULL, },
+ };
+ if (a->qd > 4) {
+ return false;
+ }
+ return do_vldst_il(s, a, fns[a->pat][a->size], 64);
+}
+
+static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
+{
+ static MVEGenLdStIlFn * const fns[4][4] = {
+ { F(vst20b), F(vst20h), F(vst20w), NULL, },
+ { F(vst21b), F(vst21h), F(vst21w), NULL, },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL },
+ };
+ if (a->qd > 6) {
+ return false;
+ }
+ return do_vldst_il(s, a, fns[a->pat][a->size], 32);
+}
+
+static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
+{
+ static MVEGenLdStIlFn * const fns[4][4] = {
+ { F(vst40b), F(vst40h), F(vst40w), NULL, },
+ { F(vst41b), F(vst41h), F(vst41w), NULL, },
+ { F(vst42b), F(vst42h), F(vst42w), NULL, },
+ { F(vst43b), F(vst43h), F(vst43w), NULL, },
+ };
+ if (a->qd > 4) {
+ return false;
+ }
+ return do_vldst_il(s, a, fns[a->pat][a->size], 64);
+}
+
+#undef F
+
+static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
+{
+ TCGv_ptr qd;
+ TCGv_i32 rt;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd)) {
+ return false;
+ }
+ if (a->rt == 13 || a->rt == 15) {
+ /* UNPREDICTABLE; we choose to UNDEF */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ rt = load_reg(s, a->rt);
+ if (mve_no_predication(s)) {
+ tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
+ } else {
+ qd = mve_qreg_ptr(a->qd);
+ tcg_gen_dup_i32(a->size, rt, rt);
+ gen_helper_mve_vdup(tcg_env, qd, rt);
+ }
+ mve_update_eci(s);
+ return true;
+}
+
+static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
+ GVecGen2Fn vecfn)
+{
+ TCGv_ptr qd, qm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
+ !fn) {
+ return false;
+ }
+
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ if (vecfn && mve_no_predication(s)) {
+ vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
+ } else {
+ qd = mve_qreg_ptr(a->qd);
+ qm = mve_qreg_ptr(a->qm);
+ fn(tcg_env, qd, qm);
+ }
+ mve_update_eci(s);
+ return true;
+}
+
+static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
+{
+ return do_1op_vec(s, a, fn, NULL);
+}
+
+#define DO_1OP_VEC(INSN, FN, VECFN) \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ static MVEGenOneOpFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_1op_vec(s, a, fns[a->size], VECFN); \
+ }
+
+#define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
+
+DO_1OP(VCLZ, vclz)
+DO_1OP(VCLS, vcls)
+DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
+DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
+DO_1OP(VQABS, vqabs)
+DO_1OP(VQNEG, vqneg)
+DO_1OP(VMAXA, vmaxa)
+DO_1OP(VMINA, vmina)
+
+/*
+ * For simple float/int conversions we use the fixed-point
+ * conversion helpers with a zero shift count
+ */
+#define DO_VCVT(INSN, HFN, SFN) \
+ static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
+ { \
+ gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \
+ } \
+ static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
+ { \
+ gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \
+ } \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ static MVEGenOneOpFn * const fns[] = { \
+ NULL, \
+ gen_##INSN##h, \
+ gen_##INSN##s, \
+ NULL, \
+ }; \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_1op(s, a, fns[a->size]); \
+ }
+
+DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
+DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
+DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
+DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
+
+static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
+ ARMFPRounding rmode, bool u)
+{
+ /*
+ * Handle VCVT fp to int with specified rounding mode.
+ * This is a 1op fn but we must pass the rounding mode as
+ * an immediate to the helper.
+ */
+ TCGv_ptr qd, qm;
+ static MVEGenVCVTRmodeFn * const fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
+ { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
+ { NULL, NULL },
+ };
+ MVEGenVCVTRmodeFn *fn = fns[a->size][u];
+
+ if (!dc_isar_feature(aa32_mve_fp, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
+ !fn) {
+ return false;
+ }
+
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qd = mve_qreg_ptr(a->qd);
+ qm = mve_qreg_ptr(a->qm);
+ fn(tcg_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
+ mve_update_eci(s);
+ return true;
+}
+
+#define DO_VCVT_RMODE(INSN, RMODE, U) \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ return do_vcvt_rmode(s, a, RMODE, U); \
+ } \
+
+DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
+DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
+DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
+DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
+DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
+DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
+DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
+DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
+
+#define DO_VCVT_SH(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_1op(s, a, gen_helper_mve_##FN); \
+ } \
+
+DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
+DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
+DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
+DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
+
+#define DO_VRINT(INSN, RMODE) \
+ static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
+ { \
+ gen_helper_mve_vrint_rm_h(env, qd, qm, \
+ tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
+ } \
+ static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \
+ { \
+ gen_helper_mve_vrint_rm_s(env, qd, qm, \
+ tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
+ } \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ static MVEGenOneOpFn * const fns[] = { \
+ NULL, \
+ gen_##INSN##h, \
+ gen_##INSN##s, \
+ NULL, \
+ }; \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_1op(s, a, fns[a->size]); \
+ }
+
+DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
+DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
+DO_VRINT(VRINTZ, FPROUNDING_ZERO)
+DO_VRINT(VRINTM, FPROUNDING_NEGINF)
+DO_VRINT(VRINTP, FPROUNDING_POSINF)
+
+static bool trans_VRINTX(DisasContext *s, arg_1op *a)
+{
+ static MVEGenOneOpFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vrintx_h,
+ gen_helper_mve_vrintx_s,
+ NULL,
+ };
+ if (!dc_isar_feature(aa32_mve_fp, s)) {
+ return false;
+ }
+ return do_1op(s, a, fns[a->size]);
+}
+
+/* Narrowing moves: only size 0 and 1 are valid */
+#define DO_VMOVN(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ static MVEGenOneOpFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ NULL, \
+ NULL, \
+ }; \
+ return do_1op(s, a, fns[a->size]); \
+ }
+
+DO_VMOVN(VMOVNB, vmovnb)
+DO_VMOVN(VMOVNT, vmovnt)
+DO_VMOVN(VQMOVUNB, vqmovunb)
+DO_VMOVN(VQMOVUNT, vqmovunt)
+DO_VMOVN(VQMOVN_BS, vqmovnbs)
+DO_VMOVN(VQMOVN_TS, vqmovnts)
+DO_VMOVN(VQMOVN_BU, vqmovnbu)
+DO_VMOVN(VQMOVN_TU, vqmovntu)
+
+static bool trans_VREV16(DisasContext *s, arg_1op *a)
+{
+ static MVEGenOneOpFn * const fns[] = {
+ gen_helper_mve_vrev16b,
+ NULL,
+ NULL,
+ NULL,
+ };
+ return do_1op(s, a, fns[a->size]);
+}
+
+static bool trans_VREV32(DisasContext *s, arg_1op *a)
+{
+ static MVEGenOneOpFn * const fns[] = {
+ gen_helper_mve_vrev32b,
+ gen_helper_mve_vrev32h,
+ NULL,
+ NULL,
+ };
+ return do_1op(s, a, fns[a->size]);
+}
+
+static bool trans_VREV64(DisasContext *s, arg_1op *a)
+{
+ static MVEGenOneOpFn * const fns[] = {
+ gen_helper_mve_vrev64b,
+ gen_helper_mve_vrev64h,
+ gen_helper_mve_vrev64w,
+ NULL,
+ };
+ return do_1op(s, a, fns[a->size]);
+}
+
+static bool trans_VMVN(DisasContext *s, arg_1op *a)
+{
+ return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
+}
+
+static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
+{
+ static MVEGenOneOpFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vfabsh,
+ gen_helper_mve_vfabss,
+ NULL,
+ };
+ if (!dc_isar_feature(aa32_mve_fp, s)) {
+ return false;
+ }
+ return do_1op(s, a, fns[a->size]);
+}
+
+static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
+{
+ static MVEGenOneOpFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vfnegh,
+ gen_helper_mve_vfnegs,
+ NULL,
+ };
+ if (!dc_isar_feature(aa32_mve_fp, s)) {
+ return false;
+ }
+ return do_1op(s, a, fns[a->size]);
+}
+
+static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
+ GVecGen3Fn *vecfn)
+{
+ TCGv_ptr qd, qn, qm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
+ !fn) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ if (vecfn && mve_no_predication(s)) {
+ vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
+ mve_qreg_offset(a->qm), 16, 16);
+ } else {
+ qd = mve_qreg_ptr(a->qd);
+ qn = mve_qreg_ptr(a->qn);
+ qm = mve_qreg_ptr(a->qm);
+ fn(tcg_env, qd, qn, qm);
+ }
+ mve_update_eci(s);
+ return true;
+}
+
+static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
+{
+ return do_2op_vec(s, a, fn, NULL);
+}
+
+#define DO_LOGIC(INSN, HELPER, VECFN) \
+ static bool trans_##INSN(DisasContext *s, arg_2op *a) \
+ { \
+ return do_2op_vec(s, a, HELPER, VECFN); \
+ }
+
+DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
+DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
+DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
+DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
+DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
+
+static bool trans_VPSEL(DisasContext *s, arg_2op *a)
+{
+ /* This insn updates predication bits */
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ return do_2op(s, a, gen_helper_mve_vpsel);
+}
+
+#define DO_2OP_VEC(INSN, FN, VECFN) \
+ static bool trans_##INSN(DisasContext *s, arg_2op *a) \
+ { \
+ static MVEGenTwoOpFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_2op_vec(s, a, fns[a->size], VECFN); \
+ }
+
+#define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
+
+DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
+DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
+DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
+DO_2OP(VMULH_S, vmulhs)
+DO_2OP(VMULH_U, vmulhu)
+DO_2OP(VRMULH_S, vrmulhs)
+DO_2OP(VRMULH_U, vrmulhu)
+DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
+DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
+DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
+DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
+DO_2OP(VABD_S, vabds)
+DO_2OP(VABD_U, vabdu)
+DO_2OP(VHADD_S, vhadds)
+DO_2OP(VHADD_U, vhaddu)
+DO_2OP(VHSUB_S, vhsubs)
+DO_2OP(VHSUB_U, vhsubu)
+DO_2OP(VMULL_BS, vmullbs)
+DO_2OP(VMULL_BU, vmullbu)
+DO_2OP(VMULL_TS, vmullts)
+DO_2OP(VMULL_TU, vmulltu)
+DO_2OP(VQDMULH, vqdmulh)
+DO_2OP(VQRDMULH, vqrdmulh)
+DO_2OP(VQADD_S, vqadds)
+DO_2OP(VQADD_U, vqaddu)
+DO_2OP(VQSUB_S, vqsubs)
+DO_2OP(VQSUB_U, vqsubu)
+DO_2OP(VSHL_S, vshls)
+DO_2OP(VSHL_U, vshlu)
+DO_2OP(VRSHL_S, vrshls)
+DO_2OP(VRSHL_U, vrshlu)
+DO_2OP(VQSHL_S, vqshls)
+DO_2OP(VQSHL_U, vqshlu)
+DO_2OP(VQRSHL_S, vqrshls)
+DO_2OP(VQRSHL_U, vqrshlu)
+DO_2OP(VQDMLADH, vqdmladh)
+DO_2OP(VQDMLADHX, vqdmladhx)
+DO_2OP(VQRDMLADH, vqrdmladh)
+DO_2OP(VQRDMLADHX, vqrdmladhx)
+DO_2OP(VQDMLSDH, vqdmlsdh)
+DO_2OP(VQDMLSDHX, vqdmlsdhx)
+DO_2OP(VQRDMLSDH, vqrdmlsdh)
+DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
+DO_2OP(VRHADD_S, vrhadds)
+DO_2OP(VRHADD_U, vrhaddu)
+/*
+ * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
+ * so we can reuse the DO_2OP macro. (Our implementation calculates the
+ * "expected" results in this case.) Similarly for VHCADD.
+ */
+DO_2OP(VCADD90, vcadd90)
+DO_2OP(VCADD270, vcadd270)
+DO_2OP(VHCADD90, vhcadd90)
+DO_2OP(VHCADD270, vhcadd270)
+
+static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
+{
+ static MVEGenTwoOpFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vqdmullbh,
+ gen_helper_mve_vqdmullbw,
+ NULL,
+ };
+ if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
+ /* UNPREDICTABLE; we choose to undef */
+ return false;
+ }
+ return do_2op(s, a, fns[a->size]);
+}
+
+static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
+{
+ static MVEGenTwoOpFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vqdmullth,
+ gen_helper_mve_vqdmulltw,
+ NULL,
+ };
+ if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
+ /* UNPREDICTABLE; we choose to undef */
+ return false;
+ }
+ return do_2op(s, a, fns[a->size]);
+}
+
+static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
+{
+ /*
+ * Note that a->size indicates the output size, ie VMULL.P8
+ * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
+ * is the 16x16->32 operation and a->size is MO_32.
+ */
+ static MVEGenTwoOpFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vmullpbh,
+ gen_helper_mve_vmullpbw,
+ NULL,
+ };
+ return do_2op(s, a, fns[a->size]);
+}
+
+static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
+{
+ /* a->size is as for trans_VMULLP_B */
+ static MVEGenTwoOpFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vmullpth,
+ gen_helper_mve_vmullptw,
+ NULL,
+ };
+ return do_2op(s, a, fns[a->size]);
+}
+
+/*
+ * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
+ * of the 32-bit elements in each lane of the input vectors, where the
+ * carry-out of each add is the carry-in of the next. The initial carry
+ * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
+ * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
+ * These insns are subject to beat-wise execution. Partial execution
+ * of an I=1 (initial carry input fixed) insn which does not
+ * execute the first beat must start with the current FPSCR.NZCV
+ * value, not the fixed constant input.
+ */
+static bool trans_VADC(DisasContext *s, arg_2op *a)
+{
+ return do_2op(s, a, gen_helper_mve_vadc);
+}
+
+static bool trans_VADCI(DisasContext *s, arg_2op *a)
+{
+ if (mve_skip_first_beat(s)) {
+ return trans_VADC(s, a);
+ }
+ return do_2op(s, a, gen_helper_mve_vadci);
+}
+
+static bool trans_VSBC(DisasContext *s, arg_2op *a)
+{
+ return do_2op(s, a, gen_helper_mve_vsbc);
+}
+
+static bool trans_VSBCI(DisasContext *s, arg_2op *a)
+{
+ if (mve_skip_first_beat(s)) {
+ return trans_VSBC(s, a);
+ }
+ return do_2op(s, a, gen_helper_mve_vsbci);
+}
+
+#define DO_2OP_FP(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_2op *a) \
+ { \
+ static MVEGenTwoOpFn * const fns[] = { \
+ NULL, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##s, \
+ NULL, \
+ }; \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_2op(s, a, fns[a->size]); \
+ }
+
+DO_2OP_FP(VADD_fp, vfadd)
+DO_2OP_FP(VSUB_fp, vfsub)
+DO_2OP_FP(VMUL_fp, vfmul)
+DO_2OP_FP(VABD_fp, vfabd)
+DO_2OP_FP(VMAXNM, vmaxnm)
+DO_2OP_FP(VMINNM, vminnm)
+DO_2OP_FP(VCADD90_fp, vfcadd90)
+DO_2OP_FP(VCADD270_fp, vfcadd270)
+DO_2OP_FP(VFMA, vfma)
+DO_2OP_FP(VFMS, vfms)
+DO_2OP_FP(VCMUL0, vcmul0)
+DO_2OP_FP(VCMUL90, vcmul90)
+DO_2OP_FP(VCMUL180, vcmul180)
+DO_2OP_FP(VCMUL270, vcmul270)
+DO_2OP_FP(VCMLA0, vcmla0)
+DO_2OP_FP(VCMLA90, vcmla90)
+DO_2OP_FP(VCMLA180, vcmla180)
+DO_2OP_FP(VCMLA270, vcmla270)
+DO_2OP_FP(VMAXNMA, vmaxnma)
+DO_2OP_FP(VMINNMA, vminnma)
+
+static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
+ MVEGenTwoOpScalarFn fn)
+{
+ TCGv_ptr qd, qn;
+ TCGv_i32 rm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qn) ||
+ !fn) {
+ return false;
+ }
+ if (a->rm == 13 || a->rm == 15) {
+ /* UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qd = mve_qreg_ptr(a->qd);
+ qn = mve_qreg_ptr(a->qn);
+ rm = load_reg(s, a->rm);
+ fn(tcg_env, qd, qn, rm);
+ mve_update_eci(s);
+ return true;
+}
+
+#define DO_2OP_SCALAR(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
+ { \
+ static MVEGenTwoOpScalarFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_2op_scalar(s, a, fns[a->size]); \
+ }
+
+DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
+DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
+DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
+DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
+DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
+DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
+DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
+DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
+DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
+DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
+DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
+DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
+DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
+DO_2OP_SCALAR(VBRSR, vbrsr)
+DO_2OP_SCALAR(VMLA, vmla)
+DO_2OP_SCALAR(VMLAS, vmlas)
+DO_2OP_SCALAR(VQDMLAH, vqdmlah)
+DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
+DO_2OP_SCALAR(VQDMLASH, vqdmlash)
+DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
+
+static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
+{
+ static MVEGenTwoOpScalarFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vqdmullb_scalarh,
+ gen_helper_mve_vqdmullb_scalarw,
+ NULL,
+ };
+ if (a->qd == a->qn && a->size == MO_32) {
+ /* UNPREDICTABLE; we choose to undef */
+ return false;
+ }
+ return do_2op_scalar(s, a, fns[a->size]);
+}
+
+static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
+{
+ static MVEGenTwoOpScalarFn * const fns[] = {
+ NULL,
+ gen_helper_mve_vqdmullt_scalarh,
+ gen_helper_mve_vqdmullt_scalarw,
+ NULL,
+ };
+ if (a->qd == a->qn && a->size == MO_32) {
+ /* UNPREDICTABLE; we choose to undef */
+ return false;
+ }
+ return do_2op_scalar(s, a, fns[a->size]);
+}
+
+
+#define DO_2OP_FP_SCALAR(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
+ { \
+ static MVEGenTwoOpScalarFn * const fns[] = { \
+ NULL, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##s, \
+ NULL, \
+ }; \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_2op_scalar(s, a, fns[a->size]); \
+ }
+
+DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
+DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
+DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
+DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
+DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
+
+static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
+ MVEGenLongDualAccOpFn *fn)
+{
+ TCGv_ptr qn, qm;
+ TCGv_i64 rda_i, rda_o;
+ TCGv_i32 rdalo, rdahi;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qn | a->qm) ||
+ !fn) {
+ return false;
+ }
+ /*
+ * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
+ * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
+ */
+ if (a->rdahi == 13 || a->rdahi == 15) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qn = mve_qreg_ptr(a->qn);
+ qm = mve_qreg_ptr(a->qm);
+
+ /*
+ * This insn is subject to beat-wise execution. Partial execution
+ * of an A=0 (no-accumulate) insn which does not execute the first
+ * beat must start with the current rda value, not 0.
+ */
+ rda_o = tcg_temp_new_i64();
+ if (a->a || mve_skip_first_beat(s)) {
+ rda_i = rda_o;
+ rdalo = load_reg(s, a->rdalo);
+ rdahi = load_reg(s, a->rdahi);
+ tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi);
+ } else {
+ rda_i = tcg_constant_i64(0);
+ }
+
+ fn(rda_o, tcg_env, qn, qm, rda_i);
+
+ rdalo = tcg_temp_new_i32();
+ rdahi = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(rdalo, rda_o);
+ tcg_gen_extrh_i64_i32(rdahi, rda_o);
+ store_reg(s, a->rdalo, rdalo);
+ store_reg(s, a->rdahi, rdahi);
+ mve_update_eci(s);
+ return true;
+}
+
+static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
+{
+ static MVEGenLongDualAccOpFn * const fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
+ { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
+ { NULL, NULL },
+ };
+ return do_long_dual_acc(s, a, fns[a->size][a->x]);
+}
+
+static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
+{
+ static MVEGenLongDualAccOpFn * const fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_mve_vmlaldavuh, NULL },
+ { gen_helper_mve_vmlaldavuw, NULL },
+ { NULL, NULL },
+ };
+ return do_long_dual_acc(s, a, fns[a->size][a->x]);
+}
+
+static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
+{
+ static MVEGenLongDualAccOpFn * const fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
+ { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
+ { NULL, NULL },
+ };
+ return do_long_dual_acc(s, a, fns[a->size][a->x]);
+}
+
+static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
+{
+ static MVEGenLongDualAccOpFn * const fns[] = {
+ gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
+ };
+ return do_long_dual_acc(s, a, fns[a->x]);
+}
+
+static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
+{
+ static MVEGenLongDualAccOpFn * const fns[] = {
+ gen_helper_mve_vrmlaldavhuw, NULL,
+ };
+ return do_long_dual_acc(s, a, fns[a->x]);
+}
+
+static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
+{
+ static MVEGenLongDualAccOpFn * const fns[] = {
+ gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
+ };
+ return do_long_dual_acc(s, a, fns[a->x]);
+}
+
+static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
+{
+ TCGv_ptr qn, qm;
+ TCGv_i32 rda_i, rda_o;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qn) ||
+ !fn) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qn = mve_qreg_ptr(a->qn);
+ qm = mve_qreg_ptr(a->qm);
+
+ /*
+ * This insn is subject to beat-wise execution. Partial execution
+ * of an A=0 (no-accumulate) insn which does not execute the first
+ * beat must start with the current rda value, not 0.
+ */
+ if (a->a || mve_skip_first_beat(s)) {
+ rda_o = rda_i = load_reg(s, a->rda);
+ } else {
+ rda_i = tcg_constant_i32(0);
+ rda_o = tcg_temp_new_i32();
+ }
+
+ fn(rda_o, tcg_env, qn, qm, rda_i);
+ store_reg(s, a->rda, rda_o);
+
+ mve_update_eci(s);
+ return true;
+}
+
+#define DO_DUAL_ACC(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \
+ { \
+ static MVEGenDualAccOpFn * const fns[4][2] = { \
+ { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \
+ { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \
+ { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \
+ { NULL, NULL }, \
+ }; \
+ return do_dual_acc(s, a, fns[a->size][a->x]); \
+ }
+
+DO_DUAL_ACC(VMLADAV_S, vmladavs)
+DO_DUAL_ACC(VMLSDAV, vmlsdav)
+
+static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
+{
+ static MVEGenDualAccOpFn * const fns[4][2] = {
+ { gen_helper_mve_vmladavub, NULL },
+ { gen_helper_mve_vmladavuh, NULL },
+ { gen_helper_mve_vmladavuw, NULL },
+ { NULL, NULL },
+ };
+ return do_dual_acc(s, a, fns[a->size][a->x]);
+}
+
+static void gen_vpst(DisasContext *s, uint32_t mask)
+{
+ /*
+ * Set the VPR mask fields. We take advantage of MASK01 and MASK23
+ * being adjacent fields in the register.
+ *
+ * Updating the masks is not predicated, but it is subject to beat-wise
+ * execution, and the mask is updated on the odd-numbered beats.
+ * So if PSR.ECI says we should skip beat 1, we mustn't update the
+ * 01 mask field.
+ */
+ TCGv_i32 vpr = load_cpu_field(v7m.vpr);
+ switch (s->eci) {
+ case ECI_NONE:
+ case ECI_A0:
+ /* Update both 01 and 23 fields */
+ tcg_gen_deposit_i32(vpr, vpr,
+ tcg_constant_i32(mask | (mask << 4)),
+ R_V7M_VPR_MASK01_SHIFT,
+ R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
+ break;
+ case ECI_A0A1:
+ case ECI_A0A1A2:
+ case ECI_A0A1A2B0:
+ /* Update only the 23 mask field */
+ tcg_gen_deposit_i32(vpr, vpr,
+ tcg_constant_i32(mask),
+ R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ store_cpu_field(vpr, v7m.vpr);
+}
+
+static bool trans_VPST(DisasContext *s, arg_VPST *a)
+{
+ /* mask == 0 is a "related encoding" */
+ if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+ gen_vpst(s, a->mask);
+ mve_update_and_store_eci(s);
+ return true;
+}
+
+static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
+{
+ /*
+ * Invert the predicate in VPR.P0. We have call out to
+ * a helper because this insn itself is beatwise and can
+ * be predicated.
+ */
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ gen_helper_mve_vpnot(tcg_env);
+ /* This insn updates predication bits */
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ mve_update_eci(s);
+ return true;
+}
+
+static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
+{
+ /* VADDV: vector add across vector */
+ static MVEGenVADDVFn * const fns[4][2] = {
+ { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
+ { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
+ { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
+ { NULL, NULL }
+ };
+ TCGv_ptr qm;
+ TCGv_i32 rda_i, rda_o;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ a->size == 3) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * This insn is subject to beat-wise execution. Partial execution
+ * of an A=0 (no-accumulate) insn which does not execute the first
+ * beat must start with the current value of Rda, not zero.
+ */
+ if (a->a || mve_skip_first_beat(s)) {
+ /* Accumulate input from Rda */
+ rda_o = rda_i = load_reg(s, a->rda);
+ } else {
+ /* Accumulate starting at zero */
+ rda_i = tcg_constant_i32(0);
+ rda_o = tcg_temp_new_i32();
+ }
+
+ qm = mve_qreg_ptr(a->qm);
+ fns[a->size][a->u](rda_o, tcg_env, qm, rda_i);
+ store_reg(s, a->rda, rda_o);
+
+ mve_update_eci(s);
+ return true;
+}
+
+static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
+{
+ /*
+ * Vector Add Long Across Vector: accumulate the 32-bit
+ * elements of the vector into a 64-bit result stored in
+ * a pair of general-purpose registers.
+ * No need to check Qm's bank: it is only 3 bits in decode.
+ */
+ TCGv_ptr qm;
+ TCGv_i64 rda_i, rda_o;
+ TCGv_i32 rdalo, rdahi;
+
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+ /*
+ * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
+ * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
+ */
+ if (a->rdahi == 13 || a->rdahi == 15) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * This insn is subject to beat-wise execution. Partial execution
+ * of an A=0 (no-accumulate) insn which does not execute the first
+ * beat must start with the current value of RdaHi:RdaLo, not zero.
+ */
+ rda_o = tcg_temp_new_i64();
+ if (a->a || mve_skip_first_beat(s)) {
+ /* Accumulate input from RdaHi:RdaLo */
+ rda_i = rda_o;
+ rdalo = load_reg(s, a->rdalo);
+ rdahi = load_reg(s, a->rdahi);
+ tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi);
+ } else {
+ /* Accumulate starting at zero */
+ rda_i = tcg_constant_i64(0);
+ }
+
+ qm = mve_qreg_ptr(a->qm);
+ if (a->u) {
+ gen_helper_mve_vaddlv_u(rda_o, tcg_env, qm, rda_i);
+ } else {
+ gen_helper_mve_vaddlv_s(rda_o, tcg_env, qm, rda_i);
+ }
+
+ rdalo = tcg_temp_new_i32();
+ rdahi = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(rdalo, rda_o);
+ tcg_gen_extrh_i64_i32(rdahi, rda_o);
+ store_reg(s, a->rdalo, rdalo);
+ store_reg(s, a->rdahi, rdahi);
+ mve_update_eci(s);
+ return true;
+}
+
+static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
+ GVecGen2iFn *vecfn)
+{
+ TCGv_ptr qd;
+ uint64_t imm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd) ||
+ !fn) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ imm = asimd_imm_const(a->imm, a->cmode, a->op);
+
+ if (vecfn && mve_no_predication(s)) {
+ vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
+ imm, 16, 16);
+ } else {
+ qd = mve_qreg_ptr(a->qd);
+ fn(tcg_env, qd, tcg_constant_i64(imm));
+ }
+ mve_update_eci(s);
+ return true;
+}
+
+static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
+}
+
+static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
+{
+ /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
+ MVEGenOneOpImmFn *fn;
+ GVecGen2iFn *vecfn;
+
+ if ((a->cmode & 1) && a->cmode < 12) {
+ if (a->op) {
+ /*
+ * For op=1, the immediate will be inverted by asimd_imm_const(),
+ * so the VBIC becomes a logical AND operation.
+ */
+ fn = gen_helper_mve_vandi;
+ vecfn = tcg_gen_gvec_andi;
+ } else {
+ fn = gen_helper_mve_vorri;
+ vecfn = tcg_gen_gvec_ori;
+ }
+ } else {
+ /* There is one unallocated cmode/op combination in this space */
+ if (a->cmode == 15 && a->op == 1) {
+ return false;
+ }
+ /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
+ fn = gen_helper_mve_vmovi;
+ vecfn = gen_gvec_vmovi;
+ }
+ return do_1imm(s, a, fn, vecfn);
+}
+
+static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
+ bool negateshift, GVecGen2iFn vecfn)
+{
+ TCGv_ptr qd, qm;
+ int shift = a->shift;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
+ !fn) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * When we handle a right shift insn using a left-shift helper
+ * which permits a negative shift count to indicate a right-shift,
+ * we must negate the shift count.
+ */
+ if (negateshift) {
+ shift = -shift;
+ }
+
+ if (vecfn && mve_no_predication(s)) {
+ vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
+ shift, 16, 16);
+ } else {
+ qd = mve_qreg_ptr(a->qd);
+ qm = mve_qreg_ptr(a->qm);
+ fn(tcg_env, qd, qm, tcg_constant_i32(shift));
+ }
+ mve_update_eci(s);
+ return true;
+}
+
+static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
+ bool negateshift)
+{
+ return do_2shift_vec(s, a, fn, negateshift, NULL);
+}
+
+#define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
+ { \
+ static MVEGenTwoOpShiftFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \
+ }
+
+#define DO_2SHIFT(INSN, FN, NEGATESHIFT) \
+ DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
+
+static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ /*
+ * We get here with a negated shift count, and we must handle
+ * shifts by the element size, which tcg_gen_gvec_sari() does not do.
+ */
+ shift = -shift;
+ if (shift == (8 << vece)) {
+ shift--;
+ }
+ tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
+}
+
+static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ /*
+ * We get here with a negated shift count, and we must handle
+ * shifts by the element size, which tcg_gen_gvec_shri() does not do.
+ */
+ shift = -shift;
+ if (shift == (8 << vece)) {
+ tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
+ } else {
+ tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
+ }
+}
+
+DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
+DO_2SHIFT(VQSHLI_S, vqshli_s, false)
+DO_2SHIFT(VQSHLI_U, vqshli_u, false)
+DO_2SHIFT(VQSHLUI, vqshlui_s, false)
+/* These right shifts use a left-shift helper with negated shift count */
+DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
+DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
+DO_2SHIFT(VRSHRI_S, vrshli_s, true)
+DO_2SHIFT(VRSHRI_U, vrshli_u, true)
+
+DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
+DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
+
+#define DO_2SHIFT_FP(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
+ { \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_2shift(s, a, gen_helper_mve_##FN, false); \
+ }
+
+DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
+DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
+DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
+DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
+DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
+DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
+DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
+DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
+
+static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
+ MVEGenTwoOpShiftFn *fn)
+{
+ TCGv_ptr qda;
+ TCGv_i32 rm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qda) ||
+ a->rm == 13 || a->rm == 15 || !fn) {
+ /* Rm cases are UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qda = mve_qreg_ptr(a->qda);
+ rm = load_reg(s, a->rm);
+ fn(tcg_env, qda, qda, rm);
+ mve_update_eci(s);
+ return true;
+}
+
+#define DO_2SHIFT_SCALAR(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \
+ { \
+ static MVEGenTwoOpShiftFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_2shift_scalar(s, a, fns[a->size]); \
+ }
+
+DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
+DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
+DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
+DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
+DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
+DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
+DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
+DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
+
+#define DO_VSHLL(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
+ { \
+ static MVEGenTwoOpShiftFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ }; \
+ return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \
+ }
+
+/*
+ * For the VSHLL vector helpers, the vece is the size of the input
+ * (ie MO_8 or MO_16); the helpers want to work in the output size.
+ * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
+ */
+static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ unsigned ovece = vece + 1;
+ unsigned ibits = vece == MO_8 ? 8 : 16;
+ tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
+ tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
+}
+
+static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ unsigned ovece = vece + 1;
+ tcg_gen_gvec_andi(ovece, dofs, aofs,
+ ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
+ tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
+}
+
+static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ unsigned ovece = vece + 1;
+ unsigned ibits = vece == MO_8 ? 8 : 16;
+ if (shift == 0) {
+ tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_andi(ovece, dofs, aofs,
+ ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
+ tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
+ }
+}
+
+static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ unsigned ovece = vece + 1;
+ unsigned ibits = vece == MO_8 ? 8 : 16;
+ if (shift == 0) {
+ tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
+ } else {
+ tcg_gen_gvec_andi(ovece, dofs, aofs,
+ ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
+ tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
+ }
+}
+
+DO_VSHLL(VSHLL_BS, vshllbs)
+DO_VSHLL(VSHLL_BU, vshllbu)
+DO_VSHLL(VSHLL_TS, vshllts)
+DO_VSHLL(VSHLL_TU, vshlltu)
+
+#define DO_2SHIFT_N(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_2shift *a) \
+ { \
+ static MVEGenTwoOpShiftFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ }; \
+ return do_2shift(s, a, fns[a->size], false); \
+ }
+
+DO_2SHIFT_N(VSHRNB, vshrnb)
+DO_2SHIFT_N(VSHRNT, vshrnt)
+DO_2SHIFT_N(VRSHRNB, vrshrnb)
+DO_2SHIFT_N(VRSHRNT, vrshrnt)
+DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
+DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
+DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
+DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
+DO_2SHIFT_N(VQSHRUNB, vqshrunb)
+DO_2SHIFT_N(VQSHRUNT, vqshrunt)
+DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
+DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
+DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
+DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
+DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
+DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
+
+static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
+{
+ /*
+ * Whole Vector Left Shift with Carry. The carry is taken
+ * from a general purpose register and written back there.
+ * An imm of 0 means "shift by 32".
+ */
+ TCGv_ptr qd;
+ TCGv_i32 rdm;
+
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
+ return false;
+ }
+ if (a->rdm == 13 || a->rdm == 15) {
+ /* CONSTRAINED UNPREDICTABLE: we UNDEF */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qd = mve_qreg_ptr(a->qd);
+ rdm = load_reg(s, a->rdm);
+ gen_helper_mve_vshlc(rdm, tcg_env, qd, rdm, tcg_constant_i32(a->imm));
+ store_reg(s, a->rdm, rdm);
+ mve_update_eci(s);
+ return true;
+}
+
+static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
+{
+ TCGv_ptr qd;
+ TCGv_i32 rn;
+
+ /*
+ * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
+ * This fills the vector with elements of successively increasing
+ * or decreasing values, starting from Rn.
+ */
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
+ return false;
+ }
+ if (a->size == MO_64) {
+ /* size 0b11 is another encoding */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qd = mve_qreg_ptr(a->qd);
+ rn = load_reg(s, a->rn);
+ fn(rn, tcg_env, qd, rn, tcg_constant_i32(a->imm));
+ store_reg(s, a->rn, rn);
+ mve_update_eci(s);
+ return true;
+}
+
+static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
+{
+ TCGv_ptr qd;
+ TCGv_i32 rn, rm;
+
+ /*
+ * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
+ * This fills the vector with elements of successively increasing
+ * or decreasing values, starting from Rn. Rm specifies a point where
+ * the count wraps back around to 0. The updated offset is written back
+ * to Rn.
+ */
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
+ return false;
+ }
+ if (!fn || a->rm == 13 || a->rm == 15) {
+ /*
+ * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
+ * Rm == 13 is VIWDUP, VDWDUP.
+ */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qd = mve_qreg_ptr(a->qd);
+ rn = load_reg(s, a->rn);
+ rm = load_reg(s, a->rm);
+ fn(rn, tcg_env, qd, rn, rm, tcg_constant_i32(a->imm));
+ store_reg(s, a->rn, rn);
+ mve_update_eci(s);
+ return true;
+}
+
+static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
+{
+ static MVEGenVIDUPFn * const fns[] = {
+ gen_helper_mve_vidupb,
+ gen_helper_mve_viduph,
+ gen_helper_mve_vidupw,
+ NULL,
+ };
+ return do_vidup(s, a, fns[a->size]);
+}
+
+static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
+{
+ static MVEGenVIDUPFn * const fns[] = {
+ gen_helper_mve_vidupb,
+ gen_helper_mve_viduph,
+ gen_helper_mve_vidupw,
+ NULL,
+ };
+ /* VDDUP is just like VIDUP but with a negative immediate */
+ a->imm = -a->imm;
+ return do_vidup(s, a, fns[a->size]);
+}
+
+static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
+{
+ static MVEGenVIWDUPFn * const fns[] = {
+ gen_helper_mve_viwdupb,
+ gen_helper_mve_viwduph,
+ gen_helper_mve_viwdupw,
+ NULL,
+ };
+ return do_viwdup(s, a, fns[a->size]);
+}
+
+static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
+{
+ static MVEGenVIWDUPFn * const fns[] = {
+ gen_helper_mve_vdwdupb,
+ gen_helper_mve_vdwduph,
+ gen_helper_mve_vdwdupw,
+ NULL,
+ };
+ return do_viwdup(s, a, fns[a->size]);
+}
+
+static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
+{
+ TCGv_ptr qn, qm;
+
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
+ !fn) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qn = mve_qreg_ptr(a->qn);
+ qm = mve_qreg_ptr(a->qm);
+ fn(tcg_env, qn, qm);
+ if (a->mask) {
+ /* VPT */
+ gen_vpst(s, a->mask);
+ }
+ /* This insn updates predication bits */
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ mve_update_eci(s);
+ return true;
+}
+
+static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
+ MVEGenScalarCmpFn *fn)
+{
+ TCGv_ptr qn;
+ TCGv_i32 rm;
+
+ if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qn = mve_qreg_ptr(a->qn);
+ if (a->rm == 15) {
+ /* Encoding Rm=0b1111 means "constant zero" */
+ rm = tcg_constant_i32(0);
+ } else {
+ rm = load_reg(s, a->rm);
+ }
+ fn(tcg_env, qn, rm);
+ if (a->mask) {
+ /* VPT */
+ gen_vpst(s, a->mask);
+ }
+ /* This insn updates predication bits */
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ mve_update_eci(s);
+ return true;
+}
+
+#define DO_VCMP(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
+ { \
+ static MVEGenCmpFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_vcmp(s, a, fns[a->size]); \
+ } \
+ static bool trans_##INSN##_scalar(DisasContext *s, \
+ arg_vcmp_scalar *a) \
+ { \
+ static MVEGenScalarCmpFn * const fns[] = { \
+ gen_helper_mve_##FN##_scalarb, \
+ gen_helper_mve_##FN##_scalarh, \
+ gen_helper_mve_##FN##_scalarw, \
+ NULL, \
+ }; \
+ return do_vcmp_scalar(s, a, fns[a->size]); \
+ }
+
+DO_VCMP(VCMPEQ, vcmpeq)
+DO_VCMP(VCMPNE, vcmpne)
+DO_VCMP(VCMPCS, vcmpcs)
+DO_VCMP(VCMPHI, vcmphi)
+DO_VCMP(VCMPGE, vcmpge)
+DO_VCMP(VCMPLT, vcmplt)
+DO_VCMP(VCMPGT, vcmpgt)
+DO_VCMP(VCMPLE, vcmple)
+
+#define DO_VCMP_FP(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \
+ { \
+ static MVEGenCmpFn * const fns[] = { \
+ NULL, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##s, \
+ NULL, \
+ }; \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_vcmp(s, a, fns[a->size]); \
+ } \
+ static bool trans_##INSN##_scalar(DisasContext *s, \
+ arg_vcmp_scalar *a) \
+ { \
+ static MVEGenScalarCmpFn * const fns[] = { \
+ NULL, \
+ gen_helper_mve_##FN##_scalarh, \
+ gen_helper_mve_##FN##_scalars, \
+ NULL, \
+ }; \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_vcmp_scalar(s, a, fns[a->size]); \
+ }
+
+DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
+DO_VCMP_FP(VCMPNE_fp, vfcmpne)
+DO_VCMP_FP(VCMPGE_fp, vfcmpge)
+DO_VCMP_FP(VCMPLT_fp, vfcmplt)
+DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
+DO_VCMP_FP(VCMPLE_fp, vfcmple)
+
+static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
+{
+ /*
+ * MIN/MAX operations across a vector: compute the min or
+ * max of the initial value in a general purpose register
+ * and all the elements in the vector, and store it back
+ * into the general purpose register.
+ */
+ TCGv_ptr qm;
+ TCGv_i32 rda;
+
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
+ !fn || a->rda == 13 || a->rda == 15) {
+ /* Rda cases are UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qm = mve_qreg_ptr(a->qm);
+ rda = load_reg(s, a->rda);
+ fn(rda, tcg_env, qm, rda);
+ store_reg(s, a->rda, rda);
+ mve_update_eci(s);
+ return true;
+}
+
+#define DO_VMAXV(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
+ { \
+ static MVEGenVADDVFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_vmaxv(s, a, fns[a->size]); \
+ }
+
+DO_VMAXV(VMAXV_S, vmaxvs)
+DO_VMAXV(VMAXV_U, vmaxvu)
+DO_VMAXV(VMAXAV, vmaxav)
+DO_VMAXV(VMINV_S, vminvs)
+DO_VMAXV(VMINV_U, vminvu)
+DO_VMAXV(VMINAV, vminav)
+
+#define DO_VMAXV_FP(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \
+ { \
+ static MVEGenVADDVFn * const fns[] = { \
+ NULL, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##s, \
+ NULL, \
+ }; \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_vmaxv(s, a, fns[a->size]); \
+ }
+
+DO_VMAXV_FP(VMAXNMV, vmaxnmv)
+DO_VMAXV_FP(VMINNMV, vminnmv)
+DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
+DO_VMAXV_FP(VMINNMAV, vminnmav)
+
+static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
+{
+ /* Absolute difference accumulated across vector */
+ TCGv_ptr qn, qm;
+ TCGv_i32 rda;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qm | a->qn) ||
+ !fn || a->rda == 13 || a->rda == 15) {
+ /* Rda cases are UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qm = mve_qreg_ptr(a->qm);
+ qn = mve_qreg_ptr(a->qn);
+ rda = load_reg(s, a->rda);
+ fn(rda, tcg_env, qn, qm, rda);
+ store_reg(s, a->rda, rda);
+ mve_update_eci(s);
+ return true;
+}
+
+#define DO_VABAV(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_vabav *a) \
+ { \
+ static MVEGenVABAVFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_vabav(s, a, fns[a->size]); \
+ }
+
+DO_VABAV(VABAV_S, vabavs)
+DO_VABAV(VABAV_U, vabavu)
+
+static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
+{
+ /*
+ * VMOV two 32-bit vector lanes to two general-purpose registers.
+ * This insn is not predicated but it is subject to beat-wise
+ * execution if it is not in an IT block. For us this means
+ * only that if PSR.ECI says we should not be executing the beat
+ * corresponding to the lane of the vector register being accessed
+ * then we should skip performing the move, and that we need to do
+ * the usual check for bad ECI state and advance of ECI state.
+ * (If PSR.ECI is non-zero then we cannot be in an IT block.)
+ */
+ TCGv_i32 tmp;
+ int vd;
+
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
+ a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
+ a->rt == a->rt2) {
+ /* Rt/Rt2 cases are UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Convert Qreg index to Dreg for read_neon_element32() etc */
+ vd = a->qd * 2;
+
+ if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, vd, a->idx, MO_32);
+ store_reg(s, a->rt, tmp);
+ }
+ if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, vd + 1, a->idx, MO_32);
+ store_reg(s, a->rt2, tmp);
+ }
+
+ mve_update_and_store_eci(s);
+ return true;
+}
+
+static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
+{
+ /*
+ * VMOV two general-purpose registers to two 32-bit vector lanes.
+ * This insn is not predicated but it is subject to beat-wise
+ * execution if it is not in an IT block. For us this means
+ * only that if PSR.ECI says we should not be executing the beat
+ * corresponding to the lane of the vector register being accessed
+ * then we should skip performing the move, and that we need to do
+ * the usual check for bad ECI state and advance of ECI state.
+ * (If PSR.ECI is non-zero then we cannot be in an IT block.)
+ */
+ TCGv_i32 tmp;
+ int vd;
+
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
+ a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
+ /* Rt/Rt2 cases are UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Convert Qreg idx to Dreg for read_neon_element32() etc */
+ vd = a->qd * 2;
+
+ if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
+ tmp = load_reg(s, a->rt);
+ write_neon_element32(tmp, vd, a->idx, MO_32);
+ }
+ if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
+ tmp = load_reg(s, a->rt2);
+ write_neon_element32(tmp, vd + 1, a->idx, MO_32);
+ }
+
+ mve_update_and_store_eci(s);
+ return true;
+}
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
new file mode 100644
index 0000000000..144f18ba22
--- /dev/null
+++ b/target/arm/tcg/translate-neon.c
@@ -0,0 +1,3931 @@
+/*
+ * ARM translation: AArch32 Neon instructions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2005-2007 CodeSourcery
+ * Copyright (c) 2007 OpenedHand, Ltd.
+ * Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+#include "translate-a32.h"
+
+/* Include the generated Neon decoder */
+#include "decode-neon-dp.c.inc"
+#include "decode-neon-ls.c.inc"
+#include "decode-neon-shared.c.inc"
+
+static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
+{
+ TCGv_ptr ret = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(ret, tcg_env, vfp_reg_offset(dp, reg));
+ return ret;
+}
+
+static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
+{
+ long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+ switch (mop) {
+ case MO_UB:
+ tcg_gen_ld8u_i32(var, tcg_env, offset);
+ break;
+ case MO_UW:
+ tcg_gen_ld16u_i32(var, tcg_env, offset);
+ break;
+ case MO_UL:
+ tcg_gen_ld_i32(var, tcg_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
+{
+ long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+ switch (mop) {
+ case MO_UB:
+ tcg_gen_ld8u_i64(var, tcg_env, offset);
+ break;
+ case MO_UW:
+ tcg_gen_ld16u_i64(var, tcg_env, offset);
+ break;
+ case MO_UL:
+ tcg_gen_ld32u_i64(var, tcg_env, offset);
+ break;
+ case MO_UQ:
+ tcg_gen_ld_i64(var, tcg_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
+{
+ long offset = neon_element_offset(reg, ele, size);
+
+ switch (size) {
+ case MO_8:
+ tcg_gen_st8_i32(var, tcg_env, offset);
+ break;
+ case MO_16:
+ tcg_gen_st16_i32(var, tcg_env, offset);
+ break;
+ case MO_32:
+ tcg_gen_st_i32(var, tcg_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
+{
+ long offset = neon_element_offset(reg, ele, size);
+
+ switch (size) {
+ case MO_8:
+ tcg_gen_st8_i64(var, tcg_env, offset);
+ break;
+ case MO_16:
+ tcg_gen_st16_i64(var, tcg_env, offset);
+ break;
+ case MO_32:
+ tcg_gen_st32_i64(var, tcg_env, offset);
+ break;
+ case MO_64:
+ tcg_gen_st_i64(var, tcg_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
+ int data, gen_helper_gvec_4 *fn_gvec)
+{
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
+ return false;
+ }
+
+ /*
+ * UNDEF accesses to odd registers for each bit of Q.
+ * Q will be 0b111 for all Q-reg instructions, otherwise
+ * when we have mixed Q- and D-reg inputs.
+ */
+ if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ int opr_sz = q ? 16 : 8;
+ tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
+ vfp_reg_offset(1, vn),
+ vfp_reg_offset(1, vm),
+ vfp_reg_offset(1, vd),
+ opr_sz, opr_sz, data, fn_gvec);
+ return true;
+}
+
+static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
+ int data, ARMFPStatusFlavour fp_flavour,
+ gen_helper_gvec_4_ptr *fn_gvec_ptr)
+{
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
+ return false;
+ }
+
+ /*
+ * UNDEF accesses to odd registers for each bit of Q.
+ * Q will be 0b111 for all Q-reg instructions, otherwise
+ * when we have mixed Q- and D-reg inputs.
+ */
+ if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ int opr_sz = q ? 16 : 8;
+ TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
+
+ tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
+ vfp_reg_offset(1, vn),
+ vfp_reg_offset(1, vm),
+ vfp_reg_offset(1, vd),
+ fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
+ return true;
+}
+
+static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
+{
+ if (!dc_isar_feature(aa32_vcma, s)) {
+ return false;
+ }
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
+ FPST_STD_F16, gen_helper_gvec_fcmlah);
+ }
+ return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
+ FPST_STD, gen_helper_gvec_fcmlas);
+}
+
+static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
+{
+ int opr_sz;
+ TCGv_ptr fpst;
+ gen_helper_gvec_3_ptr *fn_gvec_ptr;
+
+ if (!dc_isar_feature(aa32_vcma, s)
+ || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ opr_sz = (1 + a->q) * 8;
+ fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
+ fn_gvec_ptr = (a->size == MO_16) ?
+ gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->vm),
+ fpst, opr_sz, opr_sz, a->rot,
+ fn_gvec_ptr);
+ return true;
+}
+
+static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
+{
+ if (!dc_isar_feature(aa32_dp, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_sdot_b);
+}
+
+static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
+{
+ if (!dc_isar_feature(aa32_dp, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_udot_b);
+}
+
+static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
+{
+ if (!dc_isar_feature(aa32_i8mm, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_usdot_b);
+}
+
+static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
+{
+ if (!dc_isar_feature(aa32_bf16, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_bfdot);
+}
+
+static bool trans_VFML(DisasContext *s, arg_VFML *a)
+{
+ int opr_sz;
+
+ if (!dc_isar_feature(aa32_fhm, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ opr_sz = (1 + a->q) * 8;
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(a->q, a->vn),
+ vfp_reg_offset(a->q, a->vm),
+ tcg_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
+ gen_helper_gvec_fmlal_a32);
+ return true;
+}
+
+static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
+{
+ int data = (a->index << 2) | a->rot;
+
+ if (!dc_isar_feature(aa32_vcma, s)) {
+ return false;
+ }
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
+ FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
+ }
+ return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
+ FPST_STD, gen_helper_gvec_fcmlas_idx);
+}
+
+static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
+{
+ if (!dc_isar_feature(aa32_dp, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+ gen_helper_gvec_sdot_idx_b);
+}
+
+static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
+{
+ if (!dc_isar_feature(aa32_dp, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+ gen_helper_gvec_udot_idx_b);
+}
+
+static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
+{
+ if (!dc_isar_feature(aa32_i8mm, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+ gen_helper_gvec_usdot_idx_b);
+}
+
+static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
+{
+ if (!dc_isar_feature(aa32_i8mm, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+ gen_helper_gvec_sudot_idx_b);
+}
+
+static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
+{
+ if (!dc_isar_feature(aa32_bf16, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
+ gen_helper_gvec_bfdot_idx);
+}
+
+static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
+{
+ int opr_sz;
+
+ if (!dc_isar_feature(aa32_fhm, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
+ return false;
+ }
+
+ if (a->vd & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ opr_sz = (1 + a->q) * 8;
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(a->q, a->vn),
+ vfp_reg_offset(a->q, a->rm),
+ tcg_env, opr_sz, opr_sz,
+ (a->index << 2) | a->s, /* is_2 == 0 */
+ gen_helper_gvec_fmlal_idx_a32);
+ return true;
+}
+
+static struct {
+ int nregs;
+ int interleave;
+ int spacing;
+} const neon_ls_element_type[11] = {
+ {1, 4, 1},
+ {1, 4, 2},
+ {4, 1, 1},
+ {2, 2, 2},
+ {1, 3, 1},
+ {1, 3, 2},
+ {3, 1, 1},
+ {1, 1, 1},
+ {1, 2, 1},
+ {1, 2, 2},
+ {2, 1, 1}
+};
+
+static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
+ int stride)
+{
+ if (rm != 15) {
+ TCGv_i32 base;
+
+ base = load_reg(s, rn);
+ if (rm == 13) {
+ tcg_gen_addi_i32(base, base, stride);
+ } else {
+ TCGv_i32 index;
+ index = load_reg(s, rm);
+ tcg_gen_add_i32(base, base, index);
+ }
+ store_reg(s, rn, base);
+ }
+}
+
+static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
+{
+ /* Neon load/store multiple structures */
+ int nregs, interleave, spacing, reg, n;
+ MemOp mop, align, endian;
+ int mmu_idx = get_mem_index(s);
+ int size = a->size;
+ TCGv_i64 tmp64;
+ TCGv_i32 addr;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+ if (a->itype > 10) {
+ return false;
+ }
+ /* Catch UNDEF cases for bad values of align field */
+ switch (a->itype & 0xc) {
+ case 4:
+ if (a->align >= 2) {
+ return false;
+ }
+ break;
+ case 8:
+ if (a->align == 3) {
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+ nregs = neon_ls_element_type[a->itype].nregs;
+ interleave = neon_ls_element_type[a->itype].interleave;
+ spacing = neon_ls_element_type[a->itype].spacing;
+ if (size == 3 && (interleave | spacing) != 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* For our purposes, bytes are always little-endian. */
+ endian = s->be_data;
+ if (size == 0) {
+ endian = MO_LE;
+ }
+
+ /* Enforce alignment requested by the instruction */
+ if (a->align) {
+ align = pow2_align(a->align + 2); /* 4 ** a->align */
+ } else {
+ align = s->align_mem ? MO_ALIGN : 0;
+ }
+
+ /*
+ * Consecutive little-endian elements from a single register
+ * can be promoted to a larger little-endian operation.
+ */
+ if (interleave == 1 && endian == MO_LE) {
+ /* Retain any natural alignment. */
+ if (align == MO_ALIGN) {
+ align = pow2_align(size);
+ }
+ size = 3;
+ }
+
+ tmp64 = tcg_temp_new_i64();
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, a->rn);
+
+ mop = endian | size | align;
+ for (reg = 0; reg < nregs; reg++) {
+ for (n = 0; n < 8 >> size; n++) {
+ int xs;
+ for (xs = 0; xs < interleave; xs++) {
+ int tt = a->vd + reg + spacing * xs;
+
+ if (a->l) {
+ gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
+ neon_store_element64(tt, n, size, tmp64);
+ } else {
+ neon_load_element64(tmp64, tt, n, size);
+ gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
+ }
+ tcg_gen_addi_i32(addr, addr, 1 << size);
+
+ /* Subsequent memory operations inherit alignment */
+ mop &= ~MO_AMASK;
+ }
+ }
+ }
+
+ gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
+ return true;
+}
+
+static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
+{
+ /* Neon load single structure to all lanes */
+ int reg, stride, vec_size;
+ int vd = a->vd;
+ int size = a->size;
+ int nregs = a->n + 1;
+ TCGv_i32 addr, tmp;
+ MemOp mop, align;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ align = 0;
+ if (size == 3) {
+ if (nregs != 4 || a->a == 0) {
+ return false;
+ }
+ /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
+ size = MO_32;
+ align = MO_ALIGN_16;
+ } else if (a->a) {
+ switch (nregs) {
+ case 1:
+ if (size == 0) {
+ return false;
+ }
+ align = MO_ALIGN;
+ break;
+ case 2:
+ align = pow2_align(size + 1);
+ break;
+ case 3:
+ return false;
+ case 4:
+ if (size == 2) {
+ align = pow2_align(3);
+ } else {
+ align = pow2_align(size + 2);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * VLD1 to all lanes: T bit indicates how many Dregs to write.
+ * VLD2/3/4 to all lanes: T bit indicates register stride.
+ */
+ stride = a->t ? 2 : 1;
+ vec_size = nregs == 1 ? stride * 8 : 8;
+ mop = size | align;
+ tmp = tcg_temp_new_i32();
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, a->rn);
+ for (reg = 0; reg < nregs; reg++) {
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
+ if ((vd & 1) && vec_size == 16) {
+ /*
+ * We cannot write 16 bytes at once because the
+ * destination is unaligned.
+ */
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
+ 8, 8, tmp);
+ tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
+ neon_full_reg_offset(vd), 8, 8);
+ } else {
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
+ vec_size, vec_size, tmp);
+ }
+ tcg_gen_addi_i32(addr, addr, 1 << size);
+ vd += stride;
+
+ /* Subsequent memory operations inherit alignment */
+ mop &= ~MO_AMASK;
+ }
+
+ gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
+
+ return true;
+}
+
+static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
+{
+ /* Neon load/store single structure to one lane */
+ int reg;
+ int nregs = a->n + 1;
+ int vd = a->vd;
+ TCGv_i32 addr, tmp;
+ MemOp mop;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ /* Catch the UNDEF cases. This is unavoidably a bit messy. */
+ switch (nregs) {
+ case 1:
+ if (a->stride != 1) {
+ return false;
+ }
+ if (((a->align & (1 << a->size)) != 0) ||
+ (a->size == 2 && (a->align == 1 || a->align == 2))) {
+ return false;
+ }
+ break;
+ case 2:
+ if (a->size == 2 && (a->align & 2) != 0) {
+ return false;
+ }
+ break;
+ case 3:
+ if (a->align != 0) {
+ return false;
+ }
+ break;
+ case 4:
+ if (a->size == 2 && a->align == 3) {
+ return false;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if ((vd + a->stride * (nregs - 1)) > 31) {
+ /*
+ * Attempts to write off the end of the register file are
+ * UNPREDICTABLE; we choose to UNDEF because otherwise we would
+ * access off the end of the array that holds the register data.
+ */
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Pick up SCTLR settings */
+ mop = finalize_memop(s, a->size);
+
+ if (a->align) {
+ MemOp align_op;
+
+ switch (nregs) {
+ case 1:
+ /* For VLD1, use natural alignment. */
+ align_op = MO_ALIGN;
+ break;
+ case 2:
+ /* For VLD2, use double alignment. */
+ align_op = pow2_align(a->size + 1);
+ break;
+ case 4:
+ if (a->size == MO_32) {
+ /*
+ * For VLD4.32, align = 1 is double alignment, align = 2 is
+ * quad alignment; align = 3 is rejected above.
+ */
+ align_op = pow2_align(a->size + a->align);
+ } else {
+ /* For VLD4.8 and VLD.16, we want quad alignment. */
+ align_op = pow2_align(a->size + 2);
+ }
+ break;
+ default:
+ /* For VLD3, the alignment field is zero and rejected above. */
+ g_assert_not_reached();
+ }
+
+ mop = (mop & ~MO_AMASK) | align_op;
+ }
+
+ tmp = tcg_temp_new_i32();
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, a->rn);
+
+ for (reg = 0; reg < nregs; reg++) {
+ if (a->l) {
+ gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
+ neon_store_element(vd, a->reg_idx, a->size, tmp);
+ } else { /* Store */
+ neon_load_element(tmp, vd, a->reg_idx, a->size);
+ gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
+ }
+ vd += a->stride;
+ tcg_gen_addi_i32(addr, addr, 1 << a->size);
+
+ /* Subsequent memory operations inherit alignment */
+ mop &= ~MO_AMASK;
+ }
+
+ gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
+
+ return true;
+}
+
+static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
+{
+ int vec_size = a->q ? 16 : 8;
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rn_ofs = neon_full_reg_offset(a->vn);
+ int rm_ofs = neon_full_reg_offset(a->vm);
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
+ return true;
+}
+
+#define DO_3SAME(INSN, FUNC) \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ return do_3same(s, a, FUNC); \
+ }
+
+DO_3SAME(VADD, tcg_gen_gvec_add)
+DO_3SAME(VSUB, tcg_gen_gvec_sub)
+DO_3SAME(VAND, tcg_gen_gvec_and)
+DO_3SAME(VBIC, tcg_gen_gvec_andc)
+DO_3SAME(VORR, tcg_gen_gvec_or)
+DO_3SAME(VORN, tcg_gen_gvec_orc)
+DO_3SAME(VEOR, tcg_gen_gvec_xor)
+DO_3SAME(VSHL_S, gen_gvec_sshl)
+DO_3SAME(VSHL_U, gen_gvec_ushl)
+DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
+DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
+DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
+DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
+
+/* These insns are all gvec_bitsel but with the inputs in various orders. */
+#define DO_3SAME_BITSEL(INSN, O1, O2, O3) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \
+ } \
+ DO_3SAME(INSN, gen_##INSN##_3s)
+
+DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
+DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
+DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
+
+#define DO_3SAME_NO_SZ_3(INSN, FUNC) \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size == 3) { \
+ return false; \
+ } \
+ return do_3same(s, a, FUNC); \
+ }
+
+DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
+DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
+DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
+DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
+DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
+DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
+DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
+DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
+DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
+DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
+DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
+DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
+
+#define DO_3SAME_CMP(INSN, COND) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
+ } \
+ DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
+
+DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
+DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
+DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
+DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
+DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
+
+#define WRAP_OOL_FN(WRAPNAME, FUNC) \
+ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
+ uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
+ }
+
+WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
+
+static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
+{
+ if (a->size != 0) {
+ return false;
+ }
+ return do_3same(s, a, gen_VMUL_p_3s);
+}
+
+#define DO_VQRDMLAH(INSN, FUNC) \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (!dc_isar_feature(aa32_rdm, s)) { \
+ return false; \
+ } \
+ if (a->size != 1 && a->size != 2) { \
+ return false; \
+ } \
+ return do_3same(s, a, FUNC); \
+ }
+
+DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
+DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
+
+#define DO_SHA1(NAME, FUNC) \
+ WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
+ static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (!dc_isar_feature(aa32_sha1, s)) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##NAME##_3s); \
+ }
+
+DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
+DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
+DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
+DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
+
+#define DO_SHA2(NAME, FUNC) \
+ WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
+ static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (!dc_isar_feature(aa32_sha2, s)) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##NAME##_3s); \
+ }
+
+DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
+DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
+DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
+
+#define DO_3SAME_64(INSN, FUNC) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static const GVecGen3 op = { .fni8 = FUNC }; \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \
+ } \
+ DO_3SAME(INSN, gen_##INSN##_3s)
+
+#define DO_3SAME_64_ENV(INSN, FUNC) \
+ static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \
+ { \
+ FUNC(d, tcg_env, n, m); \
+ } \
+ DO_3SAME_64(INSN, gen_##INSN##_elt)
+
+DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
+DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
+DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
+DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
+DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
+DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
+
+#define DO_3SAME_32(INSN, FUNC) \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static const GVecGen3 ops[4] = { \
+ { .fni4 = gen_helper_neon_##FUNC##8 }, \
+ { .fni4 = gen_helper_neon_##FUNC##16 }, \
+ { .fni4 = gen_helper_neon_##FUNC##32 }, \
+ { 0 }, \
+ }; \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
+ } \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size > 2) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_3s); \
+ }
+
+/*
+ * Some helper functions need to be passed the tcg_env. In order
+ * to use those with the gvec APIs like tcg_gen_gvec_3() we need
+ * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
+ * and which call a NeonGenTwoOpEnvFn().
+ */
+#define WRAP_ENV_FN(WRAPNAME, FUNC) \
+ static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \
+ { \
+ FUNC(d, tcg_env, n, m); \
+ }
+
+#define DO_3SAME_32_ENV(INSN, FUNC) \
+ WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \
+ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \
+ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static const GVecGen3 ops[4] = { \
+ { .fni4 = gen_##INSN##_tramp8 }, \
+ { .fni4 = gen_##INSN##_tramp16 }, \
+ { .fni4 = gen_##INSN##_tramp32 }, \
+ { 0 }, \
+ }; \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
+ } \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size > 2) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_3s); \
+ }
+
+DO_3SAME_32(VHADD_S, hadd_s)
+DO_3SAME_32(VHADD_U, hadd_u)
+DO_3SAME_32(VHSUB_S, hsub_s)
+DO_3SAME_32(VHSUB_U, hsub_u)
+DO_3SAME_32(VRHADD_S, rhadd_s)
+DO_3SAME_32(VRHADD_U, rhadd_u)
+DO_3SAME_32(VRSHL_S, rshl_s)
+DO_3SAME_32(VRSHL_U, rshl_u)
+
+DO_3SAME_32_ENV(VQSHL_S, qshl_s)
+DO_3SAME_32_ENV(VQSHL_U, qshl_u)
+DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
+DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
+
+static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
+{
+ /* Operations handled pairwise 32 bits at a time */
+ TCGv_i32 tmp, tmp2, tmp3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->size == 3) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ assert(a->q == 0); /* enforced by decode patterns */
+
+ /*
+ * Note that we have to be careful not to clobber the source operands
+ * in the "vm == vd" case by storing the result of the first pass too
+ * early. Since Q is 0 there are always just two passes, so instead
+ * of a complicated loop over each pass we just unroll.
+ */
+ tmp = tcg_temp_new_i32();
+ tmp2 = tcg_temp_new_i32();
+ tmp3 = tcg_temp_new_i32();
+
+ read_neon_element32(tmp, a->vn, 0, MO_32);
+ read_neon_element32(tmp2, a->vn, 1, MO_32);
+ fn(tmp, tmp, tmp2);
+
+ read_neon_element32(tmp3, a->vm, 0, MO_32);
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
+ fn(tmp3, tmp3, tmp2);
+
+ write_neon_element32(tmp, a->vd, 0, MO_32);
+ write_neon_element32(tmp3, a->vd, 1, MO_32);
+
+ return true;
+}
+
+#define DO_3SAME_PAIR(INSN, func) \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ static NeonGenTwoOpFn * const fns[] = { \
+ gen_helper_neon_##func##8, \
+ gen_helper_neon_##func##16, \
+ gen_helper_neon_##func##32, \
+ }; \
+ if (a->size > 2) { \
+ return false; \
+ } \
+ return do_3same_pair(s, a, fns[a->size]); \
+ }
+
+/* 32-bit pairwise ops end up the same as the elementwise versions. */
+#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
+#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
+#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
+#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
+#define gen_helper_neon_padd_u32 tcg_gen_add_i32
+
+DO_3SAME_PAIR(VPMAX_S, pmax_s)
+DO_3SAME_PAIR(VPMIN_S, pmin_s)
+DO_3SAME_PAIR(VPMAX_U, pmax_u)
+DO_3SAME_PAIR(VPMIN_U, pmin_u)
+DO_3SAME_PAIR(VPADD, padd_u)
+
+#define DO_3SAME_VQDMULH(INSN, FUNC) \
+ WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \
+ WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \
+ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static const GVecGen3 ops[2] = { \
+ { .fni4 = gen_##INSN##_tramp16 }, \
+ { .fni4 = gen_##INSN##_tramp32 }, \
+ }; \
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
+ } \
+ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size != 1 && a->size != 2) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_3s); \
+ }
+
+DO_3SAME_VQDMULH(VQDMULH, qdmulh)
+DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
+
+#define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \
+ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rn_ofs, uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ TCGv_ptr fpst = fpstatus_ptr(FPST); \
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst, \
+ oprsz, maxsz, 0, FUNC); \
+ }
+
+#define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC) \
+ WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC) \
+ WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC) \
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size == MO_16) { \
+ if (!dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ return do_3same(s, a, gen_##INSN##_fp16_3s); \
+ } \
+ return do_3same(s, a, gen_##INSN##_fp32_3s); \
+ }
+
+
+DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
+DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
+DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
+DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
+DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
+DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
+DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
+DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
+DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
+DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
+DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
+DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
+DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
+DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
+DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
+DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
+DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
+
+WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
+WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
+WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
+WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
+
+static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ return do_3same(s, a, gen_VMAXNM_fp16_3s);
+ }
+ return do_3same(s, a, gen_VMAXNM_fp32_3s);
+}
+
+static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ return do_3same(s, a, gen_VMINNM_fp16_3s);
+ }
+ return do_3same(s, a, gen_VMINNM_fp32_3s);
+}
+
+static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ /* FP pairwise operations */
+ TCGv_ptr fpstatus;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ assert(a->q == 0); /* enforced by decode patterns */
+
+
+ fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
+ vfp_reg_offset(1, a->vn),
+ vfp_reg_offset(1, a->vm),
+ fpstatus, 8, 8, 0, fn);
+
+ return true;
+}
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP_PAIR(INSN,FUNC) \
+ static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
+ { \
+ if (a->size == MO_16) { \
+ if (!dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ return do_3same_fp_pair(s, a, FUNC##h); \
+ } \
+ return do_3same_fp_pair(s, a, FUNC##s); \
+ }
+
+DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
+DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
+DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
+
+static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
+{
+ /* Handle a 2-reg-shift insn which can be vectorized. */
+ int vec_size = a->q ? 16 : 8;
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rm_ofs = neon_full_reg_offset(a->vm);
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
+ return true;
+}
+
+#define DO_2SH(INSN, FUNC) \
+ static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
+ { \
+ return do_vector_2sh(s, a, FUNC); \
+ } \
+
+DO_2SH(VSHL, tcg_gen_gvec_shli)
+DO_2SH(VSLI, gen_gvec_sli)
+DO_2SH(VSRI, gen_gvec_sri)
+DO_2SH(VSRA_S, gen_gvec_ssra)
+DO_2SH(VSRA_U, gen_gvec_usra)
+DO_2SH(VRSHR_S, gen_gvec_srshr)
+DO_2SH(VRSHR_U, gen_gvec_urshr)
+DO_2SH(VRSRA_S, gen_gvec_srsra)
+DO_2SH(VRSRA_U, gen_gvec_ursra)
+
+static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+ /* Signed shift out of range results in all-sign-bits */
+ a->shift = MIN(a->shift, (8 << a->size) - 1);
+ return do_vector_2sh(s, a, tcg_gen_gvec_sari);
+}
+
+static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
+}
+
+static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+ /* Shift out of range is architecturally valid and results in zero. */
+ if (a->shift >= (8 << a->size)) {
+ return do_vector_2sh(s, a, gen_zero_rd_2sh);
+ } else {
+ return do_vector_2sh(s, a, tcg_gen_gvec_shri);
+ }
+}
+
+static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
+ NeonGenTwo64OpEnvFn *fn)
+{
+ /*
+ * 2-reg-and-shift operations, size == 3 case, where the
+ * function needs to be passed tcg_env.
+ */
+ TCGv_i64 constimm;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * To avoid excessive duplication of ops we implement shift
+ * by immediate using the variable shift operations.
+ */
+ constimm = tcg_constant_i64(dup_const(a->size, a->shift));
+
+ for (pass = 0; pass < a->q + 1; pass++) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ read_neon_element64(tmp, a->vm, pass, MO_64);
+ fn(tmp, tcg_env, tmp, constimm);
+ write_neon_element64(tmp, a->vd, pass, MO_64);
+ }
+ return true;
+}
+
+static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
+ NeonGenTwoOpEnvFn *fn)
+{
+ /*
+ * 2-reg-and-shift operations, size < 3 case, where the
+ * helper needs to be passed tcg_env.
+ */
+ TCGv_i32 constimm, tmp;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * To avoid excessive duplication of ops we implement shift
+ * by immediate using the variable shift operations.
+ */
+ constimm = tcg_constant_i32(dup_const(a->size, a->shift));
+ tmp = tcg_temp_new_i32();
+
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ read_neon_element32(tmp, a->vm, pass, MO_32);
+ fn(tmp, tcg_env, tmp, constimm);
+ write_neon_element32(tmp, a->vd, pass, MO_32);
+ }
+ return true;
+}
+
+#define DO_2SHIFT_ENV(INSN, FUNC) \
+ static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
+ { \
+ return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64); \
+ } \
+ static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
+ { \
+ static NeonGenTwoOpEnvFn * const fns[] = { \
+ gen_helper_neon_##FUNC##8, \
+ gen_helper_neon_##FUNC##16, \
+ gen_helper_neon_##FUNC##32, \
+ }; \
+ assert(a->size < ARRAY_SIZE(fns)); \
+ return do_2shift_env_32(s, a, fns[a->size]); \
+ }
+
+DO_2SHIFT_ENV(VQSHLU, qshlu_s)
+DO_2SHIFT_ENV(VQSHL_U, qshl_u)
+DO_2SHIFT_ENV(VQSHL_S, qshl_s)
+
+static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
+ NeonGenTwo64OpFn *shiftfn,
+ NeonGenNarrowEnvFn *narrowfn)
+{
+ /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
+ TCGv_i64 constimm, rm1, rm2;
+ TCGv_i32 rd;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vm & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * This is always a right shift, and the shiftfn is always a
+ * left-shift helper, which thus needs the negated shift count.
+ */
+ constimm = tcg_constant_i64(-a->shift);
+ rm1 = tcg_temp_new_i64();
+ rm2 = tcg_temp_new_i64();
+ rd = tcg_temp_new_i32();
+
+ /* Load both inputs first to avoid potential overwrite if rm == rd */
+ read_neon_element64(rm1, a->vm, 0, MO_64);
+ read_neon_element64(rm2, a->vm, 1, MO_64);
+
+ shiftfn(rm1, rm1, constimm);
+ narrowfn(rd, tcg_env, rm1);
+ write_neon_element32(rd, a->vd, 0, MO_32);
+
+ shiftfn(rm2, rm2, constimm);
+ narrowfn(rd, tcg_env, rm2);
+ write_neon_element32(rd, a->vd, 1, MO_32);
+
+ return true;
+}
+
+static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
+ NeonGenTwoOpFn *shiftfn,
+ NeonGenNarrowEnvFn *narrowfn)
+{
+ /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
+ TCGv_i32 constimm, rm1, rm2, rm3, rm4;
+ TCGv_i64 rtmp;
+ uint32_t imm;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vm & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * This is always a right shift, and the shiftfn is always a
+ * left-shift helper, which thus needs the negated shift count
+ * duplicated into each lane of the immediate value.
+ */
+ if (a->size == 1) {
+ imm = (uint16_t)(-a->shift);
+ imm |= imm << 16;
+ } else {
+ /* size == 2 */
+ imm = -a->shift;
+ }
+ constimm = tcg_constant_i32(imm);
+
+ /* Load all inputs first to avoid potential overwrite */
+ rm1 = tcg_temp_new_i32();
+ rm2 = tcg_temp_new_i32();
+ rm3 = tcg_temp_new_i32();
+ rm4 = tcg_temp_new_i32();
+ read_neon_element32(rm1, a->vm, 0, MO_32);
+ read_neon_element32(rm2, a->vm, 1, MO_32);
+ read_neon_element32(rm3, a->vm, 2, MO_32);
+ read_neon_element32(rm4, a->vm, 3, MO_32);
+ rtmp = tcg_temp_new_i64();
+
+ shiftfn(rm1, rm1, constimm);
+ shiftfn(rm2, rm2, constimm);
+
+ tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
+
+ narrowfn(rm1, tcg_env, rtmp);
+ write_neon_element32(rm1, a->vd, 0, MO_32);
+
+ shiftfn(rm3, rm3, constimm);
+ shiftfn(rm4, rm4, constimm);
+
+ tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
+
+ narrowfn(rm3, tcg_env, rtmp);
+ write_neon_element32(rm3, a->vd, 1, MO_32);
+ return true;
+}
+
+#define DO_2SN_64(INSN, FUNC, NARROWFUNC) \
+ static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
+ { \
+ return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC); \
+ }
+#define DO_2SN_32(INSN, FUNC, NARROWFUNC) \
+ static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
+ { \
+ return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC); \
+ }
+
+static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+ tcg_gen_extrl_i64_i32(dest, src);
+}
+
+static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+ gen_helper_neon_narrow_u16(dest, src);
+}
+
+static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
+{
+ gen_helper_neon_narrow_u8(dest, src);
+}
+
+DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
+DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
+DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
+DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
+DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
+
+DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
+
+DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
+DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
+DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
+DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
+DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
+DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
+
+DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
+DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
+DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
+
+DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
+DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
+DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
+
+DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
+DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
+DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
+
+static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
+ NeonGenWidenFn *widenfn, bool u)
+{
+ TCGv_i64 tmp;
+ TCGv_i32 rm0, rm1;
+ uint64_t widen_mask = 0;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * This is a widen-and-shift operation. The shift is always less
+ * than the width of the source type, so after widening the input
+ * vector we can simply shift the whole 64-bit widened register,
+ * and then clear the potential overflow bits resulting from left
+ * bits of the narrow input appearing as right bits of the left
+ * neighbour narrow input. Calculate a mask of bits to clear.
+ */
+ if ((a->shift != 0) && (a->size < 2 || u)) {
+ int esize = 8 << a->size;
+ widen_mask = MAKE_64BIT_MASK(0, esize);
+ widen_mask >>= esize - a->shift;
+ widen_mask = dup_const(a->size + 1, widen_mask);
+ }
+
+ rm0 = tcg_temp_new_i32();
+ rm1 = tcg_temp_new_i32();
+ read_neon_element32(rm0, a->vm, 0, MO_32);
+ read_neon_element32(rm1, a->vm, 1, MO_32);
+ tmp = tcg_temp_new_i64();
+
+ widenfn(tmp, rm0);
+ if (a->shift != 0) {
+ tcg_gen_shli_i64(tmp, tmp, a->shift);
+ tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+ }
+ write_neon_element64(tmp, a->vd, 0, MO_64);
+
+ widenfn(tmp, rm1);
+ if (a->shift != 0) {
+ tcg_gen_shli_i64(tmp, tmp, a->shift);
+ tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+ }
+ write_neon_element64(tmp, a->vd, 1, MO_64);
+ return true;
+}
+
+static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+ static NeonGenWidenFn * const widenfn[] = {
+ gen_helper_neon_widen_s8,
+ gen_helper_neon_widen_s16,
+ tcg_gen_ext_i32_i64,
+ };
+ return do_vshll_2sh(s, a, widenfn[a->size], false);
+}
+
+static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+ static NeonGenWidenFn * const widenfn[] = {
+ gen_helper_neon_widen_u8,
+ gen_helper_neon_widen_u16,
+ tcg_gen_extu_i32_i64,
+ };
+ return do_vshll_2sh(s, a, widenfn[a->size], true);
+}
+
+static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
+ gen_helper_gvec_2_ptr *fn)
+{
+ /* FP operations in 2-reg-and-shift group */
+ int vec_size = a->q ? 16 : 8;
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rm_ofs = neon_full_reg_offset(a->vm);
+ TCGv_ptr fpst;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
+ tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
+ return true;
+}
+
+#define DO_FP_2SH(INSN, FUNC) \
+ static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a) \
+ { \
+ return do_fp_2sh(s, a, FUNC); \
+ }
+
+DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
+DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
+DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
+DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
+
+DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
+DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
+DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
+DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
+
+static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
+ GVecGen2iFn *fn)
+{
+ uint64_t imm;
+ int reg_ofs, vec_size;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ reg_ofs = neon_full_reg_offset(a->vd);
+ vec_size = a->q ? 16 : 8;
+ imm = asimd_imm_const(a->imm, a->cmode, a->op);
+
+ fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
+ return true;
+}
+
+static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
+ int64_t c, uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
+}
+
+static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
+{
+ /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
+ GVecGen2iFn *fn;
+
+ if ((a->cmode & 1) && a->cmode < 12) {
+ /* for op=1, the imm will be inverted, so BIC becomes AND. */
+ fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
+ } else {
+ /* There is one unallocated cmode/op combination in this space */
+ if (a->cmode == 15 && a->op == 1) {
+ return false;
+ }
+ fn = gen_VMOV_1r;
+ }
+ return do_1reg_imm(s, a, fn);
+}
+
+static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
+ NeonGenWidenFn *widenfn,
+ NeonGenTwo64OpFn *opfn,
+ int src1_mop, int src2_mop)
+{
+ /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
+ TCGv_i64 rn0_64, rn1_64, rm_64;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rn0_64 = tcg_temp_new_i64();
+ rn1_64 = tcg_temp_new_i64();
+ rm_64 = tcg_temp_new_i64();
+
+ if (src1_mop >= 0) {
+ read_neon_element64(rn0_64, a->vn, 0, src1_mop);
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vn, 0, MO_32);
+ widenfn(rn0_64, tmp);
+ }
+ if (src2_mop >= 0) {
+ read_neon_element64(rm_64, a->vm, 0, src2_mop);
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 0, MO_32);
+ widenfn(rm_64, tmp);
+ }
+
+ opfn(rn0_64, rn0_64, rm_64);
+
+ /*
+ * Load second pass inputs before storing the first pass result, to
+ * avoid incorrect results if a narrow input overlaps with the result.
+ */
+ if (src1_mop >= 0) {
+ read_neon_element64(rn1_64, a->vn, 1, src1_mop);
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vn, 1, MO_32);
+ widenfn(rn1_64, tmp);
+ }
+ if (src2_mop >= 0) {
+ read_neon_element64(rm_64, a->vm, 1, src2_mop);
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 1, MO_32);
+ widenfn(rm_64, tmp);
+ }
+
+ write_neon_element64(rn0_64, a->vd, 0, MO_64);
+
+ opfn(rn1_64, rn1_64, rm_64);
+ write_neon_element64(rn1_64, a->vd, 1, MO_64);
+
+ return true;
+}
+
+#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenWidenFn * const widenfn[] = { \
+ gen_helper_neon_widen_##S##8, \
+ gen_helper_neon_widen_##S##16, \
+ NULL, NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const addfn[] = { \
+ gen_helper_neon_##OP##l_u16, \
+ gen_helper_neon_##OP##l_u32, \
+ tcg_gen_##OP##_i64, \
+ NULL, \
+ }; \
+ int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
+ return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
+ SRC1WIDE ? MO_UQ : narrow_mop, \
+ narrow_mop); \
+ }
+
+DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
+DO_PREWIDEN(VADDL_U, u, add, false, 0)
+DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
+DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
+DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
+DO_PREWIDEN(VADDW_U, u, add, true, 0)
+DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
+DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
+
+static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
+ NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
+{
+ /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
+ TCGv_i64 rn_64, rm_64;
+ TCGv_i32 rd0, rd1;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn || !narrowfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if ((a->vn | a->vm) & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rn_64 = tcg_temp_new_i64();
+ rm_64 = tcg_temp_new_i64();
+ rd0 = tcg_temp_new_i32();
+ rd1 = tcg_temp_new_i32();
+
+ read_neon_element64(rn_64, a->vn, 0, MO_64);
+ read_neon_element64(rm_64, a->vm, 0, MO_64);
+
+ opfn(rn_64, rn_64, rm_64);
+
+ narrowfn(rd0, rn_64);
+
+ read_neon_element64(rn_64, a->vn, 1, MO_64);
+ read_neon_element64(rm_64, a->vm, 1, MO_64);
+
+ opfn(rn_64, rn_64, rm_64);
+
+ narrowfn(rd1, rn_64);
+
+ write_neon_element32(rd0, a->vd, 0, MO_32);
+ write_neon_element32(rd1, a->vd, 1, MO_32);
+
+ return true;
+}
+
+#define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenTwo64OpFn * const addfn[] = { \
+ gen_helper_neon_##OP##l_u16, \
+ gen_helper_neon_##OP##l_u32, \
+ tcg_gen_##OP##_i64, \
+ NULL, \
+ }; \
+ static NeonGenNarrowFn * const narrowfn[] = { \
+ gen_helper_neon_##NARROWTYPE##_high_u8, \
+ gen_helper_neon_##NARROWTYPE##_high_u16, \
+ EXTOP, \
+ NULL, \
+ }; \
+ return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]); \
+ }
+
+static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
+{
+ tcg_gen_addi_i64(rn, rn, 1u << 31);
+ tcg_gen_extrh_i64_i32(rd, rn);
+}
+
+DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
+DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
+DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
+
+static bool do_long_3d(DisasContext *s, arg_3diff *a,
+ NeonGenTwoOpWidenFn *opfn,
+ NeonGenTwo64OpFn *accfn)
+{
+ /*
+ * 3-regs different lengths, long operations.
+ * These perform an operation on two inputs that returns a double-width
+ * result, and then possibly perform an accumulation operation of
+ * that result into the double-width destination.
+ */
+ TCGv_i64 rd0, rd1, tmp;
+ TCGv_i32 rn, rm;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* size == 3 case, which is an entirely different insn group */
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rd0 = tcg_temp_new_i64();
+ rd1 = tcg_temp_new_i64();
+
+ rn = tcg_temp_new_i32();
+ rm = tcg_temp_new_i32();
+ read_neon_element32(rn, a->vn, 0, MO_32);
+ read_neon_element32(rm, a->vm, 0, MO_32);
+ opfn(rd0, rn, rm);
+
+ read_neon_element32(rn, a->vn, 1, MO_32);
+ read_neon_element32(rm, a->vm, 1, MO_32);
+ opfn(rd1, rn, rm);
+
+ /* Don't store results until after all loads: they might overlap */
+ if (accfn) {
+ tmp = tcg_temp_new_i64();
+ read_neon_element64(tmp, a->vd, 0, MO_64);
+ accfn(rd0, tmp, rd0);
+ read_neon_element64(tmp, a->vd, 1, MO_64);
+ accfn(rd1, tmp, rd1);
+ }
+
+ write_neon_element64(rd0, a->vd, 0, MO_64);
+ write_neon_element64(rd1, a->vd, 1, MO_64);
+
+ return true;
+}
+
+static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_s16,
+ gen_helper_neon_abdl_s32,
+ gen_helper_neon_abdl_s64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_u16,
+ gen_helper_neon_abdl_u32,
+ gen_helper_neon_abdl_u64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_s16,
+ gen_helper_neon_abdl_s32,
+ gen_helper_neon_abdl_s64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const addfn[] = {
+ gen_helper_neon_addl_u16,
+ gen_helper_neon_addl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_abdl_u16,
+ gen_helper_neon_abdl_u32,
+ gen_helper_neon_abdl_u64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const addfn[] = {
+ gen_helper_neon_addl_u16,
+ gen_helper_neon_addl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
+}
+
+static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ TCGv_i32 lo = tcg_temp_new_i32();
+ TCGv_i32 hi = tcg_temp_new_i32();
+
+ tcg_gen_muls2_i32(lo, hi, rn, rm);
+ tcg_gen_concat_i32_i64(rd, lo, hi);
+}
+
+static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ TCGv_i32 lo = tcg_temp_new_i32();
+ TCGv_i32 hi = tcg_temp_new_i32();
+
+ tcg_gen_mulu2_i32(lo, hi, rn, rm);
+ tcg_gen_concat_i32_i64(rd, lo, hi);
+}
+
+static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_mull_s8,
+ gen_helper_neon_mull_s16,
+ gen_mull_s32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ gen_helper_neon_mull_u8,
+ gen_helper_neon_mull_u16,
+ gen_mull_u32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL(INSN,MULL,ACC) \
+ static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
+ { \
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
+ gen_helper_neon_##MULL##8, \
+ gen_helper_neon_##MULL##16, \
+ gen_##MULL##32, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const accfn[] = { \
+ gen_helper_neon_##ACC##l_u16, \
+ gen_helper_neon_##ACC##l_u32, \
+ tcg_gen_##ACC##_i64, \
+ NULL, \
+ }; \
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]); \
+ }
+
+DO_VMLAL(VMLAL_S,mull_s,add)
+DO_VMLAL(VMLAL_U,mull_u,add)
+DO_VMLAL(VMLSL_S,mull_s,sub)
+DO_VMLAL(VMLSL_U,mull_u,sub)
+
+static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ gen_helper_neon_mull_s16(rd, rn, rm);
+ gen_helper_neon_addl_saturate_s32(rd, tcg_env, rd, rd);
+}
+
+static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
+{
+ gen_mull_s32(rd, rn, rm);
+ gen_helper_neon_addl_saturate_s64(rd, tcg_env, rd, rd);
+}
+
+static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], NULL);
+}
+
+static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
+}
+
+static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
+}
+
+static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLAL_acc_16,
+ gen_VQDMLAL_acc_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ gen_helper_neon_negl_u32(rm, rm);
+ gen_helper_neon_addl_saturate_s32(rd, tcg_env, rn, rm);
+}
+
+static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_neg_i64(rm, rm);
+ gen_helper_neon_addl_saturate_s64(rd, tcg_env, rn, rm);
+}
+
+static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLSL_acc_16,
+ gen_VQDMLSL_acc_32,
+ NULL,
+ };
+
+ return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
+{
+ gen_helper_gvec_3 *fn_gvec;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ switch (a->size) {
+ case 0:
+ fn_gvec = gen_helper_neon_pmull_h;
+ break;
+ case 2:
+ if (!dc_isar_feature(aa32_pmull, s)) {
+ return false;
+ }
+ fn_gvec = gen_helper_gvec_pmull_q;
+ break;
+ default:
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
+ neon_full_reg_offset(a->vn),
+ neon_full_reg_offset(a->vm),
+ 16, 16, 0, fn_gvec);
+ return true;
+}
+
+static void gen_neon_dup_low16(TCGv_i32 var)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(var, var);
+ tcg_gen_shli_i32(tmp, var, 16);
+ tcg_gen_or_i32(var, var, tmp);
+}
+
+static void gen_neon_dup_high16(TCGv_i32 var)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_andi_i32(var, var, 0xffff0000);
+ tcg_gen_shri_i32(tmp, var, 16);
+ tcg_gen_or_i32(var, var, tmp);
+}
+
+static inline TCGv_i32 neon_get_scalar(int size, int reg)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ if (size == MO_16) {
+ read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
+ if (reg & 8) {
+ gen_neon_dup_high16(tmp);
+ } else {
+ gen_neon_dup_low16(tmp);
+ }
+ } else {
+ read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
+ }
+ return tmp;
+}
+
+static bool do_2scalar(DisasContext *s, arg_2scalar *a,
+ NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
+{
+ /*
+ * Two registers and a scalar: perform an operation between
+ * the input elements and the scalar, and then possibly
+ * perform an accumulation operation of that result into the
+ * destination.
+ */
+ TCGv_i32 scalar, tmp;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->q && ((a->vd | a->vn) & 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+ tmp = tcg_temp_new_i32();
+
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ read_neon_element32(tmp, a->vn, pass, MO_32);
+ opfn(tmp, tmp, scalar);
+ if (accfn) {
+ TCGv_i32 rd = tcg_temp_new_i32();
+ read_neon_element32(rd, a->vd, pass, MO_32);
+ accfn(tmp, rd, tmp);
+ }
+ write_neon_element32(tmp, a->vd, pass, MO_32);
+ }
+ return true;
+}
+
+static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ gen_helper_neon_add_u16,
+ tcg_gen_add_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ NULL,
+ };
+ static NeonGenTwoOpFn * const accfn[] = {
+ NULL,
+ gen_helper_neon_sub_u16,
+ tcg_gen_sub_i32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ /* Two registers and a scalar, using gvec */
+ int vec_size = a->q ? 16 : 8;
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rn_ofs = neon_full_reg_offset(a->vn);
+ int rm_ofs;
+ int idx;
+ TCGv_ptr fpstatus;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!fn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->q && ((a->vd | a->vn) & 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* a->vm is M:Vm, which encodes both register and index */
+ idx = extract32(a->vm, a->size + 2, 2);
+ a->vm = extract32(a->vm, 0, a->size + 2);
+ rm_ofs = neon_full_reg_offset(a->vm);
+
+ fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
+ vec_size, vec_size, idx, fn);
+ return true;
+}
+
+#define DO_VMUL_F_2sc(NAME, FUNC) \
+ static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a) \
+ { \
+ static gen_helper_gvec_3_ptr * const opfn[] = { \
+ NULL, \
+ gen_helper_##FUNC##_h, \
+ gen_helper_##FUNC##_s, \
+ NULL, \
+ }; \
+ if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ return do_2scalar_fp_vec(s, a, opfn[a->size]); \
+ }
+
+DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
+DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
+DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
+
+WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
+WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
+WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
+WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
+
+static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_VQDMULH_16,
+ gen_VQDMULH_32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpFn * const opfn[] = {
+ NULL,
+ gen_VQRDMULH_16,
+ gen_VQRDMULH_32,
+ NULL,
+ };
+
+ return do_2scalar(s, a, opfn[a->size], NULL);
+}
+
+static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
+ NeonGenThreeOpEnvFn *opfn)
+{
+ /*
+ * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
+ * performs a kind of fused op-then-accumulate using a helper
+ * function that takes all of rd, rn and the scalar at once.
+ */
+ TCGv_i32 scalar, rn, rd;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_rdm, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->q && ((a->vd | a->vn) & 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+ rn = tcg_temp_new_i32();
+ rd = tcg_temp_new_i32();
+
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ read_neon_element32(rn, a->vn, pass, MO_32);
+ read_neon_element32(rd, a->vd, pass, MO_32);
+ opfn(rd, tcg_env, rn, scalar, rd);
+ write_neon_element32(rd, a->vd, pass, MO_32);
+ }
+ return true;
+}
+
+static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenThreeOpEnvFn *opfn[] = {
+ NULL,
+ gen_helper_neon_qrdmlah_s16,
+ gen_helper_neon_qrdmlah_s32,
+ NULL,
+ };
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenThreeOpEnvFn *opfn[] = {
+ NULL,
+ gen_helper_neon_qrdmlsh_s16,
+ gen_helper_neon_qrdmlsh_s32,
+ NULL,
+ };
+ return do_vqrdmlah_2sc(s, a, opfn[a->size]);
+}
+
+static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
+ NeonGenTwoOpWidenFn *opfn,
+ NeonGenTwo64OpFn *accfn)
+{
+ /*
+ * Two registers and a scalar, long operations: perform an
+ * operation on the input elements and the scalar which produces
+ * a double-width result, and then possibly perform an accumulation
+ * operation of that result into the destination.
+ */
+ TCGv_i32 scalar, rn;
+ TCGv_i64 rn0_64, rn1_64;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!opfn) {
+ /* Bad size (including size == 3, which is a different insn group) */
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ scalar = neon_get_scalar(a->size, a->vm);
+
+ /* Load all inputs before writing any outputs, in case of overlap */
+ rn = tcg_temp_new_i32();
+ read_neon_element32(rn, a->vn, 0, MO_32);
+ rn0_64 = tcg_temp_new_i64();
+ opfn(rn0_64, rn, scalar);
+
+ read_neon_element32(rn, a->vn, 1, MO_32);
+ rn1_64 = tcg_temp_new_i64();
+ opfn(rn1_64, rn, scalar);
+
+ if (accfn) {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+ read_neon_element64(t64, a->vd, 0, MO_64);
+ accfn(rn0_64, t64, rn0_64);
+ read_neon_element64(t64, a->vd, 1, MO_64);
+ accfn(rn1_64, t64, rn1_64);
+ }
+
+ write_neon_element64(rn0_64, a->vd, 0, MO_64);
+ write_neon_element64(rn1_64, a->vd, 1, MO_64);
+ return true;
+}
+
+static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mull_s16,
+ gen_mull_s32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_helper_neon_mull_u16,
+ gen_mull_u32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+#define DO_VMLAL_2SC(INSN, MULL, ACC) \
+ static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a) \
+ { \
+ static NeonGenTwoOpWidenFn * const opfn[] = { \
+ NULL, \
+ gen_helper_neon_##MULL##16, \
+ gen_##MULL##32, \
+ NULL, \
+ }; \
+ static NeonGenTwo64OpFn * const accfn[] = { \
+ NULL, \
+ gen_helper_neon_##ACC##l_u32, \
+ tcg_gen_##ACC##_i64, \
+ NULL, \
+ }; \
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]); \
+ }
+
+DO_VMLAL_2SC(VMLAL_S, mull_s, add)
+DO_VMLAL_2SC(VMLAL_U, mull_u, add)
+DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
+DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
+
+static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], NULL);
+}
+
+static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLAL_acc_16,
+ gen_VQDMLAL_acc_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
+{
+ static NeonGenTwoOpWidenFn * const opfn[] = {
+ NULL,
+ gen_VQDMULL_16,
+ gen_VQDMULL_32,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ NULL,
+ gen_VQDMLSL_acc_16,
+ gen_VQDMLSL_acc_32,
+ NULL,
+ };
+
+ return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
+}
+
+static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn | a->vm | a->vd) & a->q) {
+ return false;
+ }
+
+ if (a->imm > 7 && !a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (!a->q) {
+ /* Extract 64 bits from <Vm:Vn> */
+ TCGv_i64 left, right, dest;
+
+ left = tcg_temp_new_i64();
+ right = tcg_temp_new_i64();
+ dest = tcg_temp_new_i64();
+
+ read_neon_element64(right, a->vn, 0, MO_64);
+ read_neon_element64(left, a->vm, 0, MO_64);
+ tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
+ write_neon_element64(dest, a->vd, 0, MO_64);
+ } else {
+ /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
+ TCGv_i64 left, middle, right, destleft, destright;
+
+ left = tcg_temp_new_i64();
+ middle = tcg_temp_new_i64();
+ right = tcg_temp_new_i64();
+ destleft = tcg_temp_new_i64();
+ destright = tcg_temp_new_i64();
+
+ if (a->imm < 8) {
+ read_neon_element64(right, a->vn, 0, MO_64);
+ read_neon_element64(middle, a->vn, 1, MO_64);
+ tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
+ read_neon_element64(left, a->vm, 0, MO_64);
+ tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
+ } else {
+ read_neon_element64(right, a->vn, 1, MO_64);
+ read_neon_element64(middle, a->vm, 0, MO_64);
+ tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
+ read_neon_element64(left, a->vm, 1, MO_64);
+ tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
+ }
+
+ write_neon_element64(destright, a->vd, 0, MO_64);
+ write_neon_element64(destleft, a->vd, 1, MO_64);
+ }
+ return true;
+}
+
+static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
+{
+ TCGv_i64 val, def;
+ TCGv_i32 desc;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vn + a->len + 1) > 32) {
+ /*
+ * This is UNPREDICTABLE; we choose to UNDEF to avoid the
+ * helper function running off the end of the register file.
+ */
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ desc = tcg_constant_i32((a->vn << 2) | a->len);
+ def = tcg_temp_new_i64();
+ if (a->op) {
+ read_neon_element64(def, a->vd, 0, MO_64);
+ } else {
+ tcg_gen_movi_i64(def, 0);
+ }
+ val = tcg_temp_new_i64();
+ read_neon_element64(val, a->vm, 0, MO_64);
+
+ gen_helper_neon_tbl(val, tcg_env, desc, val, def);
+ write_neon_element64(val, a->vd, 0, MO_64);
+ return true;
+}
+
+static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
+ neon_element_offset(a->vm, a->index, a->size),
+ a->q ? 16 : 8, a->q ? 16 : 8);
+ return true;
+}
+
+static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
+{
+ int pass, half;
+ TCGv_i32 tmp[2];
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vd | a->vm) & a->q) {
+ return false;
+ }
+
+ if (a->size == 3) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp[0] = tcg_temp_new_i32();
+ tmp[1] = tcg_temp_new_i32();
+
+ for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
+ for (half = 0; half < 2; half++) {
+ read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
+ switch (a->size) {
+ case 0:
+ tcg_gen_bswap32_i32(tmp[half], tmp[half]);
+ break;
+ case 1:
+ gen_swap_half(tmp[half], tmp[half]);
+ break;
+ case 2:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
+ write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
+ }
+ return true;
+}
+
+static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
+ NeonGenWidenFn *widenfn,
+ NeonGenTwo64OpFn *opfn,
+ NeonGenTwo64OpFn *accfn)
+{
+ /*
+ * Pairwise long operations: widen both halves of the pair,
+ * combine the pairs with the opfn, and then possibly accumulate
+ * into the destination with the accfn.
+ */
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vd | a->vm) & a->q) {
+ return false;
+ }
+
+ if (!widenfn) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ for (pass = 0; pass < a->q + 1; pass++) {
+ TCGv_i32 tmp;
+ TCGv_i64 rm0_64, rm1_64, rd_64;
+
+ rm0_64 = tcg_temp_new_i64();
+ rm1_64 = tcg_temp_new_i64();
+ rd_64 = tcg_temp_new_i64();
+
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, pass * 2, MO_32);
+ widenfn(rm0_64, tmp);
+ read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
+ widenfn(rm1_64, tmp);
+
+ opfn(rd_64, rm0_64, rm1_64);
+
+ if (accfn) {
+ TCGv_i64 tmp64 = tcg_temp_new_i64();
+ read_neon_element64(tmp64, a->vd, pass, MO_64);
+ accfn(rd_64, tmp64, rd_64);
+ }
+ write_neon_element64(rd_64, a->vd, pass, MO_64);
+ }
+ return true;
+}
+
+static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenWidenFn * const widenfn[] = {
+ gen_helper_neon_widen_s8,
+ gen_helper_neon_widen_s16,
+ tcg_gen_ext_i32_i64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const opfn[] = {
+ gen_helper_neon_paddl_u16,
+ gen_helper_neon_paddl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
+}
+
+static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenWidenFn * const widenfn[] = {
+ gen_helper_neon_widen_u8,
+ gen_helper_neon_widen_u16,
+ tcg_gen_extu_i32_i64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const opfn[] = {
+ gen_helper_neon_paddl_u16,
+ gen_helper_neon_paddl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
+}
+
+static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenWidenFn * const widenfn[] = {
+ gen_helper_neon_widen_s8,
+ gen_helper_neon_widen_s16,
+ tcg_gen_ext_i32_i64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const opfn[] = {
+ gen_helper_neon_paddl_u16,
+ gen_helper_neon_paddl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ gen_helper_neon_addl_u16,
+ gen_helper_neon_addl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
+ accfn[a->size]);
+}
+
+static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenWidenFn * const widenfn[] = {
+ gen_helper_neon_widen_u8,
+ gen_helper_neon_widen_u16,
+ tcg_gen_extu_i32_i64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const opfn[] = {
+ gen_helper_neon_paddl_u16,
+ gen_helper_neon_paddl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+ static NeonGenTwo64OpFn * const accfn[] = {
+ gen_helper_neon_addl_u16,
+ gen_helper_neon_addl_u32,
+ tcg_gen_add_i64,
+ NULL,
+ };
+
+ return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
+ accfn[a->size]);
+}
+
+typedef void ZipFn(TCGv_ptr, TCGv_ptr);
+
+static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
+ ZipFn *fn)
+{
+ TCGv_ptr pd, pm;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vd | a->vm) & a->q) {
+ return false;
+ }
+
+ if (!fn) {
+ /* Bad size or size/q combination */
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ pd = vfp_reg_ptr(true, a->vd);
+ pm = vfp_reg_ptr(true, a->vm);
+ fn(pd, pm);
+ return true;
+}
+
+static bool trans_VUZP(DisasContext *s, arg_2misc *a)
+{
+ static ZipFn * const fn[2][4] = {
+ {
+ gen_helper_neon_unzip8,
+ gen_helper_neon_unzip16,
+ NULL,
+ NULL,
+ }, {
+ gen_helper_neon_qunzip8,
+ gen_helper_neon_qunzip16,
+ gen_helper_neon_qunzip32,
+ NULL,
+ }
+ };
+ return do_zip_uzp(s, a, fn[a->q][a->size]);
+}
+
+static bool trans_VZIP(DisasContext *s, arg_2misc *a)
+{
+ static ZipFn * const fn[2][4] = {
+ {
+ gen_helper_neon_zip8,
+ gen_helper_neon_zip16,
+ NULL,
+ NULL,
+ }, {
+ gen_helper_neon_qzip8,
+ gen_helper_neon_qzip16,
+ gen_helper_neon_qzip32,
+ NULL,
+ }
+ };
+ return do_zip_uzp(s, a, fn[a->q][a->size]);
+}
+
+static bool do_vmovn(DisasContext *s, arg_2misc *a,
+ NeonGenNarrowEnvFn *narrowfn)
+{
+ TCGv_i64 rm;
+ TCGv_i32 rd0, rd1;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vm & 1) {
+ return false;
+ }
+
+ if (!narrowfn) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rm = tcg_temp_new_i64();
+ rd0 = tcg_temp_new_i32();
+ rd1 = tcg_temp_new_i32();
+
+ read_neon_element64(rm, a->vm, 0, MO_64);
+ narrowfn(rd0, tcg_env, rm);
+ read_neon_element64(rm, a->vm, 1, MO_64);
+ narrowfn(rd1, tcg_env, rm);
+ write_neon_element32(rd0, a->vd, 0, MO_32);
+ write_neon_element32(rd1, a->vd, 1, MO_32);
+ return true;
+}
+
+#define DO_VMOVN(INSN, FUNC) \
+ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
+ { \
+ static NeonGenNarrowEnvFn * const narrowfn[] = { \
+ FUNC##8, \
+ FUNC##16, \
+ FUNC##32, \
+ NULL, \
+ }; \
+ return do_vmovn(s, a, narrowfn[a->size]); \
+ }
+
+DO_VMOVN(VMOVN, gen_neon_narrow_u)
+DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
+DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
+DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
+
+static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
+{
+ TCGv_i32 rm0, rm1;
+ TCGv_i64 rd;
+ static NeonGenWidenFn * const widenfns[] = {
+ gen_helper_neon_widen_u8,
+ gen_helper_neon_widen_u16,
+ tcg_gen_extu_i32_i64,
+ NULL,
+ };
+ NeonGenWidenFn *widenfn = widenfns[a->size];
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->vd & 1) {
+ return false;
+ }
+
+ if (!widenfn) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rd = tcg_temp_new_i64();
+ rm0 = tcg_temp_new_i32();
+ rm1 = tcg_temp_new_i32();
+
+ read_neon_element32(rm0, a->vm, 0, MO_32);
+ read_neon_element32(rm1, a->vm, 1, MO_32);
+
+ widenfn(rd, rm0);
+ tcg_gen_shli_i64(rd, rd, 8 << a->size);
+ write_neon_element64(rd, a->vd, 0, MO_64);
+ widenfn(rd, rm1);
+ tcg_gen_shli_i64(rd, rd, 8 << a->size);
+ write_neon_element64(rd, a->vd, 1, MO_64);
+ return true;
+}
+
+static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i64 tmp;
+ TCGv_i32 dst0, dst1;
+
+ if (!dc_isar_feature(aa32_bf16, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vm & 1) || (a->size != 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_STD);
+ tmp = tcg_temp_new_i64();
+ dst0 = tcg_temp_new_i32();
+ dst1 = tcg_temp_new_i32();
+
+ read_neon_element64(tmp, a->vm, 0, MO_64);
+ gen_helper_bfcvt_pair(dst0, tmp, fpst);
+
+ read_neon_element64(tmp, a->vm, 1, MO_64);
+ gen_helper_bfcvt_pair(dst1, tmp, fpst);
+
+ write_neon_element32(dst0, a->vd, 0, MO_32);
+ write_neon_element32(dst1, a->vd, 1, MO_32);
+ return true;
+}
+
+static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_fp16_spconv, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vm & 1) || (a->size != 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_STD);
+ ahp = get_ahp_flag();
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 0, MO_32);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+ tmp2 = tcg_temp_new_i32();
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_or_i32(tmp2, tmp2, tmp);
+ read_neon_element32(tmp, a->vm, 2, MO_32);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+ tmp3 = tcg_temp_new_i32();
+ read_neon_element32(tmp3, a->vm, 3, MO_32);
+ write_neon_element32(tmp2, a->vd, 0, MO_32);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
+ tcg_gen_shli_i32(tmp3, tmp3, 16);
+ tcg_gen_or_i32(tmp3, tmp3, tmp);
+ write_neon_element32(tmp3, a->vd, 1, MO_32);
+ return true;
+}
+
+static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_fp16_spconv, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vd & 1) || (a->size != 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_STD);
+ ahp = get_ahp_flag();
+ tmp3 = tcg_temp_new_i32();
+ tmp2 = tcg_temp_new_i32();
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vm, 0, MO_32);
+ read_neon_element32(tmp2, a->vm, 1, MO_32);
+ tcg_gen_ext16u_i32(tmp3, tmp);
+ gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+ write_neon_element32(tmp3, a->vd, 0, MO_32);
+ tcg_gen_shri_i32(tmp, tmp, 16);
+ gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
+ write_neon_element32(tmp, a->vd, 1, MO_32);
+ tcg_gen_ext16u_i32(tmp3, tmp2);
+ gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+ write_neon_element32(tmp3, a->vd, 2, MO_32);
+ tcg_gen_shri_i32(tmp2, tmp2, 16);
+ gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
+ write_neon_element32(tmp2, a->vd, 3, MO_32);
+ return true;
+}
+
+static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
+{
+ int vec_size = a->q ? 16 : 8;
+ int rd_ofs = neon_full_reg_offset(a->vd);
+ int rm_ofs = neon_full_reg_offset(a->vm);
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->size == 3) {
+ return false;
+ }
+
+ if ((a->vd | a->vm) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
+
+ return true;
+}
+
+#define DO_2MISC_VEC(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
+ { \
+ return do_2misc_vec(s, a, FN); \
+ }
+
+DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
+DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
+DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
+DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
+DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
+DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
+DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
+
+static bool trans_VMVN(DisasContext *s, arg_2misc *a)
+{
+ if (a->size != 0) {
+ return false;
+ }
+ return do_2misc_vec(s, a, tcg_gen_gvec_not);
+}
+
+#define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA) \
+ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rm_ofs, uint32_t oprsz, \
+ uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz, \
+ DATA, FUNC); \
+ }
+
+#define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA) \
+ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rm_ofs, uint32_t oprsz, \
+ uint32_t maxsz) \
+ { \
+ tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC); \
+ }
+
+WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
+WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aesd, 0)
+WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
+WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesimc, 0)
+WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
+WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
+WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
+
+#define DO_2M_CRYPTO(INSN, FEATURE, SIZE) \
+ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
+ { \
+ if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) { \
+ return false; \
+ } \
+ return do_2misc_vec(s, a, gen_##INSN); \
+ }
+
+DO_2M_CRYPTO(AESE, aa32_aes, 0)
+DO_2M_CRYPTO(AESD, aa32_aes, 0)
+DO_2M_CRYPTO(AESMC, aa32_aes, 0)
+DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
+DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
+DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
+DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
+
+static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
+{
+ TCGv_i32 tmp;
+ int pass;
+
+ /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!fn) {
+ return false;
+ }
+
+ if ((a->vd | a->vm) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ read_neon_element32(tmp, a->vm, pass, MO_32);
+ fn(tmp, tmp);
+ write_neon_element32(tmp, a->vd, pass, MO_32);
+ }
+ return true;
+}
+
+static bool trans_VREV32(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenOneOpFn * const fn[] = {
+ tcg_gen_bswap32_i32,
+ gen_swap_half,
+ NULL,
+ NULL,
+ };
+ return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VREV16(DisasContext *s, arg_2misc *a)
+{
+ if (a->size != 0) {
+ return false;
+ }
+ return do_2misc(s, a, gen_rev16);
+}
+
+static bool trans_VCLS(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenOneOpFn * const fn[] = {
+ gen_helper_neon_cls_s8,
+ gen_helper_neon_cls_s16,
+ gen_helper_neon_cls_s32,
+ NULL,
+ };
+ return do_2misc(s, a, fn[a->size]);
+}
+
+static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
+{
+ tcg_gen_clzi_i32(rd, rm, 32);
+}
+
+static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenOneOpFn * const fn[] = {
+ gen_helper_neon_clz_u8,
+ gen_helper_neon_clz_u16,
+ do_VCLZ_32,
+ NULL,
+ };
+ return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VCNT(DisasContext *s, arg_2misc *a)
+{
+ if (a->size != 0) {
+ return false;
+ }
+ return do_2misc(s, a, gen_helper_neon_cnt_u8);
+}
+
+static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
+ vece == MO_16 ? 0x7fff : 0x7fffffff,
+ oprsz, maxsz);
+}
+
+static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
+{
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ } else if (a->size != MO_32) {
+ return false;
+ }
+ return do_2misc_vec(s, a, gen_VABS_F);
+}
+
+static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
+ vece == MO_16 ? 0x8000 : 0x80000000,
+ oprsz, maxsz);
+}
+
+static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
+{
+ if (a->size == MO_16) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+ } else if (a->size != MO_32) {
+ return false;
+ }
+ return do_2misc_vec(s, a, gen_VNEG_F);
+}
+
+static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
+{
+ if (a->size != 2) {
+ return false;
+ }
+ return do_2misc(s, a, gen_helper_recpe_u32);
+}
+
+static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
+{
+ if (a->size != 2) {
+ return false;
+ }
+ return do_2misc(s, a, gen_helper_rsqrte_u32);
+}
+
+#define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
+ static void WRAPNAME(TCGv_i32 d, TCGv_i32 m) \
+ { \
+ FUNC(d, tcg_env, m); \
+ }
+
+WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
+WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
+WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
+WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
+WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
+WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
+
+static bool trans_VQABS(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenOneOpFn * const fn[] = {
+ gen_VQABS_s8,
+ gen_VQABS_s16,
+ gen_VQABS_s32,
+ NULL,
+ };
+ return do_2misc(s, a, fn[a->size]);
+}
+
+static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
+{
+ static NeonGenOneOpFn * const fn[] = {
+ gen_VQNEG_s8,
+ gen_VQNEG_s16,
+ gen_VQNEG_s32,
+ NULL,
+ };
+ return do_2misc(s, a, fn[a->size]);
+}
+
+#define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC) \
+ static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static gen_helper_gvec_2_ptr * const fns[4] = { \
+ NULL, HFUNC, SFUNC, NULL, \
+ }; \
+ TCGv_ptr fpst; \
+ fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD); \
+ tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0, \
+ fns[vece]); \
+ } \
+ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
+ { \
+ if (a->size == MO_16) { \
+ if (!dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ } else if (a->size != MO_32) { \
+ return false; \
+ } \
+ return do_2misc_vec(s, a, gen_##INSN); \
+ }
+
+DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
+DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
+DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
+DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
+DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
+DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
+DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
+DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
+DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
+DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
+DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
+
+DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
+
+static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+ return trans_VRINTX_impl(s, a);
+}
+
+#define DO_VEC_RMODE(INSN, RMODE, OP) \
+ static void gen_##INSN(unsigned vece, uint32_t rd_ofs, \
+ uint32_t rm_ofs, \
+ uint32_t oprsz, uint32_t maxsz) \
+ { \
+ static gen_helper_gvec_2_ptr * const fns[4] = { \
+ NULL, \
+ gen_helper_gvec_##OP##h, \
+ gen_helper_gvec_##OP##s, \
+ NULL, \
+ }; \
+ TCGv_ptr fpst; \
+ fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD); \
+ tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, \
+ arm_rmode_to_sf(RMODE), fns[vece]); \
+ } \
+ static bool trans_##INSN(DisasContext *s, arg_2misc *a) \
+ { \
+ if (!arm_dc_feature(s, ARM_FEATURE_V8)) { \
+ return false; \
+ } \
+ if (a->size == MO_16) { \
+ if (!dc_isar_feature(aa32_fp16_arith, s)) { \
+ return false; \
+ } \
+ } else if (a->size != MO_32) { \
+ return false; \
+ } \
+ return do_2misc_vec(s, a, gen_##INSN); \
+ }
+
+DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
+DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
+DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
+DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
+DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
+DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
+DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
+DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
+
+DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
+DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
+DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
+DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
+DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
+
+static bool trans_VSWP(DisasContext *s, arg_2misc *a)
+{
+ TCGv_i64 rm, rd;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (a->size != 0) {
+ return false;
+ }
+
+ if ((a->vd | a->vm) & a->q) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ rm = tcg_temp_new_i64();
+ rd = tcg_temp_new_i64();
+ for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
+ read_neon_element64(rm, a->vm, pass, MO_64);
+ read_neon_element64(rd, a->vd, pass, MO_64);
+ write_neon_element64(rm, a->vd, pass, MO_64);
+ write_neon_element64(rd, a->vm, pass, MO_64);
+ }
+ return true;
+}
+
+static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
+{
+ TCGv_i32 rd, tmp;
+
+ rd = tcg_temp_new_i32();
+ tmp = tcg_temp_new_i32();
+
+ tcg_gen_shli_i32(rd, t0, 8);
+ tcg_gen_andi_i32(rd, rd, 0xff00ff00);
+ tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
+ tcg_gen_or_i32(rd, rd, tmp);
+
+ tcg_gen_shri_i32(t1, t1, 8);
+ tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
+ tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
+ tcg_gen_or_i32(t1, t1, tmp);
+ tcg_gen_mov_i32(t0, rd);
+}
+
+static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
+{
+ TCGv_i32 rd, tmp;
+
+ rd = tcg_temp_new_i32();
+ tmp = tcg_temp_new_i32();
+
+ tcg_gen_shli_i32(rd, t0, 16);
+ tcg_gen_andi_i32(tmp, t1, 0xffff);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shri_i32(t1, t1, 16);
+ tcg_gen_andi_i32(tmp, t0, 0xffff0000);
+ tcg_gen_or_i32(t1, t1, tmp);
+ tcg_gen_mov_i32(t0, rd);
+}
+
+static bool trans_VTRN(DisasContext *s, arg_2misc *a)
+{
+ TCGv_i32 tmp, tmp2;
+ int pass;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vd | a->vm) & a->q) {
+ return false;
+ }
+
+ if (a->size == 3) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ tmp2 = tcg_temp_new_i32();
+ if (a->size == MO_32) {
+ for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
+ read_neon_element32(tmp, a->vm, pass, MO_32);
+ read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
+ write_neon_element32(tmp2, a->vm, pass, MO_32);
+ write_neon_element32(tmp, a->vd, pass + 1, MO_32);
+ }
+ } else {
+ for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
+ read_neon_element32(tmp, a->vm, pass, MO_32);
+ read_neon_element32(tmp2, a->vd, pass, MO_32);
+ if (a->size == MO_8) {
+ gen_neon_trn_u8(tmp, tmp2);
+ } else {
+ gen_neon_trn_u16(tmp, tmp2);
+ }
+ write_neon_element32(tmp2, a->vm, pass, MO_32);
+ write_neon_element32(tmp, a->vd, pass, MO_32);
+ }
+ }
+ return true;
+}
+
+static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
+{
+ if (!dc_isar_feature(aa32_i8mm, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_smmla_b);
+}
+
+static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
+{
+ if (!dc_isar_feature(aa32_i8mm, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_ummla_b);
+}
+
+static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
+{
+ if (!dc_isar_feature(aa32_i8mm, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_usmmla_b);
+}
+
+static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
+{
+ if (!dc_isar_feature(aa32_bf16, s)) {
+ return false;
+ }
+ return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
+ gen_helper_gvec_bfmmla);
+}
+
+static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
+{
+ if (!dc_isar_feature(aa32_bf16, s)) {
+ return false;
+ }
+ return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
+ gen_helper_gvec_bfmlal);
+}
+
+static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
+{
+ if (!dc_isar_feature(aa32_bf16, s)) {
+ return false;
+ }
+ return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
+ (a->index << 1) | a->q, FPST_STD,
+ gen_helper_gvec_bfmlal_idx);
+}
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
new file mode 100644
index 0000000000..46c7fce8b4
--- /dev/null
+++ b/target/arm/tcg/translate-sme.c
@@ -0,0 +1,343 @@
+/*
+ * AArch64 SME translation
+ *
+ * Copyright (c) 2022 Linaro, Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+#include "translate-a64.h"
+
+/*
+ * Include the generated decoder.
+ */
+
+#include "decode-sme.c.inc"
+
+
+/*
+ * Resolve tile.size[index] to a host pointer, where tile and index
+ * are always decoded together, dependent on the element size.
+ */
+static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
+ int tile_index, bool vertical)
+{
+ int tile = tile_index >> (4 - esz);
+ int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
+ int pos, len, offset;
+ TCGv_i32 tmp;
+ TCGv_ptr addr;
+
+ /* Compute the final index, which is Rs+imm. */
+ tmp = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
+ tcg_gen_addi_i32(tmp, tmp, index);
+
+ /* Prepare a power-of-two modulo via extraction of @len bits. */
+ len = ctz32(streaming_vec_reg_size(s)) - esz;
+
+ if (vertical) {
+ /*
+ * Compute the byte offset of the index within the tile:
+ * (index % (svl / size)) * size
+ * = (index % (svl >> esz)) << esz
+ * Perform the power-of-two modulo via extraction of the low @len bits.
+ * Perform the multiply by shifting left by @pos bits.
+ * Perform these operations simultaneously via deposit into zero.
+ */
+ pos = esz;
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
+
+ /*
+ * For big-endian, adjust the indexed column byte offset within
+ * the uint64_t host words that make up env->zarray[].
+ */
+ if (HOST_BIG_ENDIAN && esz < MO_64) {
+ tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
+ }
+ } else {
+ /*
+ * Compute the byte offset of the index within the tile:
+ * (index % (svl / size)) * (size * sizeof(row))
+ * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
+ */
+ pos = esz + ctz32(sizeof(ARMVectorReg));
+ tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
+
+ /* Row slices are always aligned and need no endian adjustment. */
+ }
+
+ /* The tile byte offset within env->zarray is the row. */
+ offset = tile * sizeof(ARMVectorReg);
+
+ /* Include the byte offset of zarray to make this relative to env. */
+ offset += offsetof(CPUARMState, zarray);
+ tcg_gen_addi_i32(tmp, tmp, offset);
+
+ /* Add the byte offset to env to produce the final pointer. */
+ addr = tcg_temp_new_ptr();
+ tcg_gen_ext_i32_ptr(addr, tmp);
+ tcg_gen_add_ptr(addr, addr, tcg_env);
+
+ return addr;
+}
+
+/*
+ * Resolve tile.size[0] to a host pointer.
+ * Used by e.g. outer product insns where we require the entire tile.
+ */
+static TCGv_ptr get_tile(DisasContext *s, int esz, int tile)
+{
+ TCGv_ptr addr = tcg_temp_new_ptr();
+ int offset;
+
+ offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray);
+
+ tcg_gen_addi_ptr(addr, tcg_env, offset);
+ return addr;
+}
+
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
+{
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (sme_za_enabled_check(s)) {
+ gen_helper_sme_zero(tcg_env, tcg_constant_i32(a->imm),
+ tcg_constant_i32(streaming_vec_reg_size(s)));
+ }
+ return true;
+}
+
+static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
+{
+ static gen_helper_gvec_4 * const h_fns[5] = {
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
+ gen_helper_sve_sel_zpzz_q
+ };
+ static gen_helper_gvec_3 * const cz_fns[5] = {
+ gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
+ gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
+ gen_helper_sme_mova_cz_q,
+ };
+ static gen_helper_gvec_3 * const zc_fns[5] = {
+ gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
+ gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
+ gen_helper_sme_mova_zc_q,
+ };
+
+ TCGv_ptr t_za, t_zr, t_pg;
+ TCGv_i32 t_desc;
+ int svl;
+
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
+ t_zr = vec_full_reg_ptr(s, a->zr);
+ t_pg = pred_full_reg_ptr(s, a->pg);
+
+ svl = streaming_vec_reg_size(s);
+ t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
+
+ if (a->v) {
+ /* Vertical slice -- use sme mova helpers. */
+ if (a->to_vec) {
+ zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
+ } else {
+ cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
+ }
+ } else {
+ /* Horizontal slice -- reuse sve sel helpers. */
+ if (a->to_vec) {
+ h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
+ } else {
+ h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
+ }
+ }
+ return true;
+}
+
+static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
+{
+ typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
+
+ /*
+ * Indexed by [esz][be][v][mte][st], which is (except for load/store)
+ * also the order in which the elements appear in the function names,
+ * and so how we must concatenate the pieces.
+ */
+
+#define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
+#define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
+#define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
+#define FN_END(L, B) { FN_HV(L), FN_HV(B) }
+
+ static GenLdSt1 * const fns[5][2][2][2][2] = {
+ FN_END(b, b),
+ FN_END(h_le, h_be),
+ FN_END(s_le, s_be),
+ FN_END(d_le, d_be),
+ FN_END(q_le, q_be),
+ };
+
+#undef FN_LS
+#undef FN_MTE
+#undef FN_HV
+#undef FN_END
+
+ TCGv_ptr t_za, t_pg;
+ TCGv_i64 addr;
+ uint32_t desc;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
+ t_pg = pred_full_reg_ptr(s, a->pg);
+ addr = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+
+ if (!mte) {
+ addr = clean_data_tbi(s, addr);
+ }
+
+ desc = make_svemte_desc(s, streaming_vec_reg_size(s), 1, a->esz, a->st, 0);
+
+ fns[a->esz][be][a->v][mte][a->st](tcg_env, t_za, t_pg, addr,
+ tcg_constant_i32(desc));
+ return true;
+}
+
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
+
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
+{
+ int svl = streaming_vec_reg_size(s);
+ int imm = a->imm;
+ TCGv_ptr base;
+
+ if (!sme_za_enabled_check(s)) {
+ return true;
+ }
+
+ /* ZA[n] equates to ZA0H.B[n]. */
+ base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
+
+ fn(s, base, 0, svl, a->rn, imm * svl);
+ return true;
+}
+
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
+
+static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
+ gen_helper_gvec_4 *fn)
+{
+ int svl = streaming_vec_reg_size(s);
+ uint32_t desc = simd_desc(svl, svl, 0);
+ TCGv_ptr za, zn, pn, pm;
+
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ za = get_tile(s, esz, a->zad);
+ zn = vec_full_reg_ptr(s, a->zn);
+ pn = pred_full_reg_ptr(s, a->pn);
+ pm = pred_full_reg_ptr(s, a->pm);
+
+ fn(za, zn, pn, pm, tcg_constant_i32(desc));
+ return true;
+}
+
+TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
+TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
+TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
+TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
+
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
+ gen_helper_gvec_5 *fn)
+{
+ int svl = streaming_vec_reg_size(s);
+ uint32_t desc = simd_desc(svl, svl, a->sub);
+ TCGv_ptr za, zn, zm, pn, pm;
+
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ za = get_tile(s, esz, a->zad);
+ zn = vec_full_reg_ptr(s, a->zn);
+ zm = vec_full_reg_ptr(s, a->zm);
+ pn = pred_full_reg_ptr(s, a->pn);
+ pm = pred_full_reg_ptr(s, a->pm);
+
+ fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
+ return true;
+}
+
+static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
+ gen_helper_gvec_5_ptr *fn)
+{
+ int svl = streaming_vec_reg_size(s);
+ uint32_t desc = simd_desc(svl, svl, a->sub);
+ TCGv_ptr za, zn, zm, pn, pm, fpst;
+
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ za = get_tile(s, esz, a->zad);
+ zn = vec_full_reg_ptr(s, a->zn);
+ zm = vec_full_reg_ptr(s, a->zm);
+ pn = pred_full_reg_ptr(s, a->pn);
+ pm = pred_full_reg_ptr(s, a->pm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
+ return true;
+}
+
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
+TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
+TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
+
+/* TODO: FEAT_EBF16 */
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
+
+TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
+TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
+TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
+TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
+
+TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
+TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
+TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
+TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
new file mode 100644
index 0000000000..ada05aa530
--- /dev/null
+++ b/target/arm/tcg/translate-sve.c
@@ -0,0 +1,7422 @@
+/*
+ * AArch64 SVE translation
+ *
+ * Copyright (c) 2018 Linaro, Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+#include "translate-a64.h"
+#include "fpu/softfloat.h"
+
+
+typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
+ TCGv_i64, uint32_t, uint32_t);
+
+typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_i32);
+typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_ptr, TCGv_i32);
+
+typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
+typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_i64, TCGv_i32);
+
+/*
+ * Helpers for extracting complex instruction fields.
+ */
+
+/* See e.g. ASR (immediate, predicated).
+ * Returns -1 for unallocated encoding; diagnose later.
+ */
+static int tszimm_esz(DisasContext *s, int x)
+{
+ x >>= 3; /* discard imm3 */
+ return 31 - clz32(x);
+}
+
+static int tszimm_shr(DisasContext *s, int x)
+{
+ return (16 << tszimm_esz(s, x)) - x;
+}
+
+/* See e.g. LSL (immediate, predicated). */
+static int tszimm_shl(DisasContext *s, int x)
+{
+ return x - (8 << tszimm_esz(s, x));
+}
+
+/* The SH bit is in bit 8. Extract the low 8 and shift. */
+static inline int expand_imm_sh8s(DisasContext *s, int x)
+{
+ return (int8_t)x << (x & 0x100 ? 8 : 0);
+}
+
+static inline int expand_imm_sh8u(DisasContext *s, int x)
+{
+ return (uint8_t)x << (x & 0x100 ? 8 : 0);
+}
+
+/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
+ * with unsigned data. C.f. SVE Memory Contiguous Load Group.
+ */
+static inline int msz_dtype(DisasContext *s, int msz)
+{
+ static const uint8_t dtype[4] = { 0, 5, 10, 15 };
+ return dtype[msz];
+}
+
+/*
+ * Include the generated decoder.
+ */
+
+#include "decode-sve.c.inc"
+
+/*
+ * Implement all of the translator functions referenced by the decoder.
+ */
+
+/* Invoke an out-of-line helper on 2 Zregs. */
+static bool gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
+ int rd, int rn, int data)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
+ int rd, int rn, int data,
+ ARMFPStatusFlavour flavour)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = fpstatus_ptr(flavour);
+
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ status, vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
+ arg_rr_esz *a, int data)
+{
+ return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs. */
+static bool gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
+ int rd, int rn, int rm, int data)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_ool_arg_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
+ arg_rrr_esz *a, int data)
+{
+ return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs, plus float_status. */
+static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
+ int rd, int rn, int rm,
+ int data, ARMFPStatusFlavour flavour)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = fpstatus_ptr(flavour);
+
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ status, vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
+ arg_rrr_esz *a, int data)
+{
+ return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+}
+
+/* Invoke an out-of-line helper on 4 Zregs. */
+static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
+ int rd, int rn, int rm, int ra, int data)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra),
+ vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_ool_arg_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
+ arg_rrrr_esz *a, int data)
+{
+ return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
+}
+
+static bool gen_gvec_ool_arg_zzxz(DisasContext *s, gen_helper_gvec_4 *fn,
+ arg_rrxr_esz *a)
+{
+ return gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
+}
+
+/* Invoke an out-of-line helper on 4 Zregs, plus a pointer. */
+static bool gen_gvec_ptr_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ int rd, int rn, int rm, int ra,
+ int data, TCGv_ptr ptr)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra),
+ ptr, vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_fpst_zzzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ int rd, int rn, int rm, int ra,
+ int data, ARMFPStatusFlavour flavour)
+{
+ TCGv_ptr status = fpstatus_ptr(flavour);
+ bool ret = gen_gvec_ptr_zzzz(s, fn, rd, rn, rm, ra, data, status);
+ return ret;
+}
+
+/* Invoke an out-of-line helper on 4 Zregs, 1 Preg, plus fpst. */
+static bool gen_gvec_fpst_zzzzp(DisasContext *s, gen_helper_gvec_5_ptr *fn,
+ int rd, int rn, int rm, int ra, int pg,
+ int data, ARMFPStatusFlavour flavour)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = fpstatus_ptr(flavour);
+
+ tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra),
+ pred_full_reg_offset(s, pg),
+ status, vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
+static bool gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
+ int rd, int rn, int pg, int data)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_ool_arg_zpz(DisasContext *s, gen_helper_gvec_3 *fn,
+ arg_rpr_esz *a, int data)
+{
+ return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, data);
+}
+
+static bool gen_gvec_ool_arg_zpzi(DisasContext *s, gen_helper_gvec_3 *fn,
+ arg_rpri_esz *a)
+{
+ return gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
+}
+
+static bool gen_gvec_fpst_zzp(DisasContext *s, gen_helper_gvec_3_ptr *fn,
+ int rd, int rn, int pg, int data,
+ ARMFPStatusFlavour flavour)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = fpstatus_ptr(flavour);
+
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, pg),
+ status, vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_fpst_arg_zpz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
+ arg_rpr_esz *a, int data,
+ ARMFPStatusFlavour flavour)
+{
+ return gen_gvec_fpst_zzp(s, fn, a->rd, a->rn, a->pg, data, flavour);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
+static bool gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
+ int rd, int rn, int rm, int pg, int data)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ pred_full_reg_offset(s, pg),
+ vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_ool_arg_zpzz(DisasContext *s, gen_helper_gvec_4 *fn,
+ arg_rprr_esz *a, int data)
+{
+ return gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, data);
+}
+
+/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
+static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ int rd, int rn, int rm, int pg, int data,
+ ARMFPStatusFlavour flavour)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = fpstatus_ptr(flavour);
+
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ pred_full_reg_offset(s, pg),
+ status, vsz, vsz, data, fn);
+ }
+ return true;
+}
+
+static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
+ arg_rprr_esz *a)
+{
+ return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+}
+
+/* Invoke a vector expander on two Zregs and an immediate. */
+static bool gen_gvec_fn_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
+ int esz, int rd, int rn, uint64_t imm)
+{
+ if (gvec_fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn), imm, vsz, vsz);
+ }
+ return true;
+}
+
+static bool gen_gvec_fn_arg_zzi(DisasContext *s, GVecGen2iFn *gvec_fn,
+ arg_rri_esz *a)
+{
+ if (a->esz < 0) {
+ /* Invalid tsz encoding -- see tszimm_esz. */
+ return false;
+ }
+ return gen_gvec_fn_zzi(s, gvec_fn, a->esz, a->rd, a->rn, a->imm);
+}
+
+/* Invoke a vector expander on three Zregs. */
+static bool gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int esz, int rd, int rn, int rm)
+{
+ if (gvec_fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), vsz, vsz);
+ }
+ return true;
+}
+
+static bool gen_gvec_fn_arg_zzz(DisasContext *s, GVecGen3Fn *fn,
+ arg_rrr_esz *a)
+{
+ return gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
+}
+
+/* Invoke a vector expander on four Zregs. */
+static bool gen_gvec_fn_arg_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
+ arg_rrrr_esz *a)
+{
+ if (gvec_fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vec_full_reg_offset(s, a->ra), vsz, vsz);
+ }
+ return true;
+}
+
+/* Invoke a vector move on two Zregs. */
+static bool do_mov_z(DisasContext *s, int rd, int rn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_mov(MO_8, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn), vsz, vsz);
+ }
+ return true;
+}
+
+/* Initialize a Zreg with replications of a 64-bit immediate. */
+static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
+}
+
+/* Invoke a vector expander on three Pregs. */
+static bool gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
+ int rd, int rn, int rm)
+{
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ gvec_fn(MO_64, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn),
+ pred_full_reg_offset(s, rm), psz, psz);
+ }
+ return true;
+}
+
+/* Invoke a vector move on two Pregs. */
+static bool do_mov_p(DisasContext *s, int rd, int rn)
+{
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
+ pred_full_reg_offset(s, rn), psz, psz);
+ }
+ return true;
+}
+
+/* Set the cpu flags as per a return from an SVE helper. */
+static void do_pred_flags(TCGv_i32 t)
+{
+ tcg_gen_mov_i32(cpu_NF, t);
+ tcg_gen_andi_i32(cpu_ZF, t, 2);
+ tcg_gen_andi_i32(cpu_CF, t, 1);
+ tcg_gen_movi_i32(cpu_VF, 0);
+}
+
+/* Subroutines computing the ARM PredTest psuedofunction. */
+static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ gen_helper_sve_predtest1(t, d, g);
+ do_pred_flags(t);
+}
+
+static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
+{
+ TCGv_ptr dptr = tcg_temp_new_ptr();
+ TCGv_ptr gptr = tcg_temp_new_ptr();
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_addi_ptr(dptr, tcg_env, dofs);
+ tcg_gen_addi_ptr(gptr, tcg_env, gofs);
+
+ gen_helper_sve_predtest(t, dptr, gptr, tcg_constant_i32(words));
+
+ do_pred_flags(t);
+}
+
+/* For each element size, the bits within a predicate word that are active. */
+const uint64_t pred_esz_masks[5] = {
+ 0xffffffffffffffffull, 0x5555555555555555ull,
+ 0x1111111111111111ull, 0x0101010101010101ull,
+ 0x0001000100010001ull,
+};
+
+static bool trans_INVALID(DisasContext *s, arg_INVALID *a)
+{
+ unallocated_encoding(s);
+ return true;
+}
+
+/*
+ *** SVE Logical - Unpredicated Group
+ */
+
+TRANS_FEAT(AND_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_and, a)
+TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a)
+TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a)
+TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a)
+
+static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ uint64_t mask = dup_const(MO_8, 0xff >> sh);
+
+ tcg_gen_xor_i64(t, n, m);
+ tcg_gen_shri_i64(d, t, sh);
+ tcg_gen_shli_i64(t, t, 8 - sh);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(t, t, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ uint64_t mask = dup_const(MO_16, 0xffff >> sh);
+
+ tcg_gen_xor_i64(t, n, m);
+ tcg_gen_shri_i64(d, t, sh);
+ tcg_gen_shli_i64(t, t, 16 - sh);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_andi_i64(t, t, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
+{
+ tcg_gen_xor_i32(d, n, m);
+ tcg_gen_rotri_i32(d, d, sh);
+}
+
+static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
+{
+ tcg_gen_xor_i64(d, n, m);
+ tcg_gen_rotri_i64(d, d, sh);
+}
+
+static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, int64_t sh)
+{
+ tcg_gen_xor_vec(vece, d, n, m);
+ tcg_gen_rotri_vec(vece, d, d, sh);
+}
+
+void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, int64_t shift,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
+ static const GVecGen3i ops[4] = {
+ { .fni8 = gen_xar8_i64,
+ .fniv = gen_xar_vec,
+ .fno = gen_helper_sve2_xar_b,
+ .opt_opc = vecop,
+ .vece = MO_8 },
+ { .fni8 = gen_xar16_i64,
+ .fniv = gen_xar_vec,
+ .fno = gen_helper_sve2_xar_h,
+ .opt_opc = vecop,
+ .vece = MO_16 },
+ { .fni4 = gen_xar_i32,
+ .fniv = gen_xar_vec,
+ .fno = gen_helper_sve2_xar_s,
+ .opt_opc = vecop,
+ .vece = MO_32 },
+ { .fni8 = gen_xar_i64,
+ .fniv = gen_xar_vec,
+ .fno = gen_helper_gvec_xar_d,
+ .opt_opc = vecop,
+ .vece = MO_64 }
+ };
+ int esize = 8 << vece;
+
+ /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
+ tcg_debug_assert(shift >= 0);
+ tcg_debug_assert(shift <= esize);
+ shift &= esize - 1;
+
+ if (shift == 0) {
+ /* xar with no rotate devolves to xor. */
+ tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
+ shift, &ops[vece]);
+ }
+}
+
+static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
+{
+ if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
+ }
+ return true;
+}
+
+static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_xor_i64(d, n, m);
+ tcg_gen_xor_i64(d, d, k);
+}
+
+static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ tcg_gen_xor_vec(vece, d, n, m);
+ tcg_gen_xor_vec(vece, d, d, k);
+}
+
+static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_eor3_i64,
+ .fniv = gen_eor3_vec,
+ .fno = gen_helper_sve2_eor3,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a)
+
+static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_andc_i64(d, m, k);
+ tcg_gen_xor_i64(d, d, n);
+}
+
+static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ tcg_gen_andc_vec(vece, d, m, k);
+ tcg_gen_xor_vec(vece, d, d, n);
+}
+
+static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bcax_i64,
+ .fniv = gen_bcax_vec,
+ .fno = gen_helper_sve2_bcax,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a)
+
+static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ /* BSL differs from the generic bitsel in argument ordering. */
+ tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
+}
+
+TRANS_FEAT(BSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl, a)
+
+static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_andc_i64(n, k, n);
+ tcg_gen_andc_i64(m, m, k);
+ tcg_gen_or_i64(d, n, m);
+}
+
+static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ if (TCG_TARGET_HAS_bitsel_vec) {
+ tcg_gen_not_vec(vece, n, n);
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
+ } else {
+ tcg_gen_andc_vec(vece, n, k, n);
+ tcg_gen_andc_vec(vece, m, m, k);
+ tcg_gen_or_vec(vece, d, n, m);
+ }
+}
+
+static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bsl1n_i64,
+ .fniv = gen_bsl1n_vec,
+ .fno = gen_helper_sve2_bsl1n,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+TRANS_FEAT(BSL1N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl1n, a)
+
+static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ /*
+ * Z[dn] = (n & k) | (~m & ~k)
+ * = | ~(m | k)
+ */
+ tcg_gen_and_i64(n, n, k);
+ if (TCG_TARGET_HAS_orc_i64) {
+ tcg_gen_or_i64(m, m, k);
+ tcg_gen_orc_i64(d, n, m);
+ } else {
+ tcg_gen_nor_i64(m, m, k);
+ tcg_gen_or_i64(d, n, m);
+ }
+}
+
+static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ if (TCG_TARGET_HAS_bitsel_vec) {
+ tcg_gen_not_vec(vece, m, m);
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
+ } else {
+ tcg_gen_and_vec(vece, n, n, k);
+ tcg_gen_or_vec(vece, m, m, k);
+ tcg_gen_orc_vec(vece, d, n, m);
+ }
+}
+
+static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bsl2n_i64,
+ .fniv = gen_bsl2n_vec,
+ .fno = gen_helper_sve2_bsl2n,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+TRANS_FEAT(BSL2N, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bsl2n, a)
+
+static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_and_i64(n, n, k);
+ tcg_gen_andc_i64(m, m, k);
+ tcg_gen_nor_i64(d, n, m);
+}
+
+static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
+ tcg_gen_not_vec(vece, d, d);
+}
+
+static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_nbsl_i64,
+ .fniv = gen_nbsl_vec,
+ .fno = gen_helper_sve2_nbsl,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+TRANS_FEAT(NBSL, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_nbsl, a)
+
+/*
+ *** SVE Integer Arithmetic - Unpredicated Group
+ */
+
+TRANS_FEAT(ADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_add, a)
+TRANS_FEAT(SUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sub, a)
+TRANS_FEAT(SQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ssadd, a)
+TRANS_FEAT(SQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_sssub, a)
+TRANS_FEAT(UQADD_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_usadd, a)
+TRANS_FEAT(UQSUB_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_ussub, a)
+
+/*
+ *** SVE Integer Arithmetic - Binary Predicated Group
+ */
+
+/* Select active elememnts from Zn and inactive elements from Zm,
+ * storing the result in Zd.
+ */
+static bool do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
+{
+ static gen_helper_gvec_4 * const fns[4] = {
+ gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
+ gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
+ };
+ return gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
+}
+
+#define DO_ZPZZ(NAME, FEAT, name) \
+ static gen_helper_gvec_4 * const name##_zpzz_fns[4] = { \
+ gen_helper_##name##_zpzz_b, gen_helper_##name##_zpzz_h, \
+ gen_helper_##name##_zpzz_s, gen_helper_##name##_zpzz_d, \
+ }; \
+ TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpzz, \
+ name##_zpzz_fns[a->esz], a, 0)
+
+DO_ZPZZ(AND_zpzz, aa64_sve, sve_and)
+DO_ZPZZ(EOR_zpzz, aa64_sve, sve_eor)
+DO_ZPZZ(ORR_zpzz, aa64_sve, sve_orr)
+DO_ZPZZ(BIC_zpzz, aa64_sve, sve_bic)
+
+DO_ZPZZ(ADD_zpzz, aa64_sve, sve_add)
+DO_ZPZZ(SUB_zpzz, aa64_sve, sve_sub)
+
+DO_ZPZZ(SMAX_zpzz, aa64_sve, sve_smax)
+DO_ZPZZ(UMAX_zpzz, aa64_sve, sve_umax)
+DO_ZPZZ(SMIN_zpzz, aa64_sve, sve_smin)
+DO_ZPZZ(UMIN_zpzz, aa64_sve, sve_umin)
+DO_ZPZZ(SABD_zpzz, aa64_sve, sve_sabd)
+DO_ZPZZ(UABD_zpzz, aa64_sve, sve_uabd)
+
+DO_ZPZZ(MUL_zpzz, aa64_sve, sve_mul)
+DO_ZPZZ(SMULH_zpzz, aa64_sve, sve_smulh)
+DO_ZPZZ(UMULH_zpzz, aa64_sve, sve_umulh)
+
+DO_ZPZZ(ASR_zpzz, aa64_sve, sve_asr)
+DO_ZPZZ(LSR_zpzz, aa64_sve, sve_lsr)
+DO_ZPZZ(LSL_zpzz, aa64_sve, sve_lsl)
+
+static gen_helper_gvec_4 * const sdiv_fns[4] = {
+ NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
+};
+TRANS_FEAT(SDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, sdiv_fns[a->esz], a, 0)
+
+static gen_helper_gvec_4 * const udiv_fns[4] = {
+ NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
+};
+TRANS_FEAT(UDIV_zpzz, aa64_sve, gen_gvec_ool_arg_zpzz, udiv_fns[a->esz], a, 0)
+
+TRANS_FEAT(SEL_zpzz, aa64_sve, do_sel_z, a->rd, a->rn, a->rm, a->pg, a->esz)
+
+/*
+ *** SVE Integer Arithmetic - Unary Predicated Group
+ */
+
+#define DO_ZPZ(NAME, FEAT, name) \
+ static gen_helper_gvec_3 * const name##_fns[4] = { \
+ gen_helper_##name##_b, gen_helper_##name##_h, \
+ gen_helper_##name##_s, gen_helper_##name##_d, \
+ }; \
+ TRANS_FEAT(NAME, FEAT, gen_gvec_ool_arg_zpz, name##_fns[a->esz], a, 0)
+
+DO_ZPZ(CLS, aa64_sve, sve_cls)
+DO_ZPZ(CLZ, aa64_sve, sve_clz)
+DO_ZPZ(CNT_zpz, aa64_sve, sve_cnt_zpz)
+DO_ZPZ(CNOT, aa64_sve, sve_cnot)
+DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
+DO_ZPZ(ABS, aa64_sve, sve_abs)
+DO_ZPZ(NEG, aa64_sve, sve_neg)
+DO_ZPZ(RBIT, aa64_sve, sve_rbit)
+
+static gen_helper_gvec_3 * const fabs_fns[4] = {
+ NULL, gen_helper_sve_fabs_h,
+ gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
+};
+TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const fneg_fns[4] = {
+ NULL, gen_helper_sve_fneg_h,
+ gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
+};
+TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const sxtb_fns[4] = {
+ NULL, gen_helper_sve_sxtb_h,
+ gen_helper_sve_sxtb_s, gen_helper_sve_sxtb_d,
+};
+TRANS_FEAT(SXTB, aa64_sve, gen_gvec_ool_arg_zpz, sxtb_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const uxtb_fns[4] = {
+ NULL, gen_helper_sve_uxtb_h,
+ gen_helper_sve_uxtb_s, gen_helper_sve_uxtb_d,
+};
+TRANS_FEAT(UXTB, aa64_sve, gen_gvec_ool_arg_zpz, uxtb_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const sxth_fns[4] = {
+ NULL, NULL, gen_helper_sve_sxth_s, gen_helper_sve_sxth_d
+};
+TRANS_FEAT(SXTH, aa64_sve, gen_gvec_ool_arg_zpz, sxth_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const uxth_fns[4] = {
+ NULL, NULL, gen_helper_sve_uxth_s, gen_helper_sve_uxth_d
+};
+TRANS_FEAT(UXTH, aa64_sve, gen_gvec_ool_arg_zpz, uxth_fns[a->esz], a, 0)
+
+TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
+ a->esz == 3 ? gen_helper_sve_sxtw_d : NULL, a, 0)
+TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
+ a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
+
+/*
+ *** SVE Integer Reduction Group
+ */
+
+typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
+static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
+ gen_helper_gvec_reduc *fn)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr t_zn, t_pg;
+ TCGv_i32 desc;
+ TCGv_i64 temp;
+
+ if (fn == NULL) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
+ temp = tcg_temp_new_i64();
+ t_zn = tcg_temp_new_ptr();
+ t_pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
+ fn(temp, t_zn, t_pg, desc);
+
+ write_fp_dreg(s, a->rd, temp);
+ return true;
+}
+
+#define DO_VPZ(NAME, name) \
+ static gen_helper_gvec_reduc * const name##_fns[4] = { \
+ gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, do_vpz_ool, a, name##_fns[a->esz])
+
+DO_VPZ(ORV, orv)
+DO_VPZ(ANDV, andv)
+DO_VPZ(EORV, eorv)
+
+DO_VPZ(UADDV, uaddv)
+DO_VPZ(SMAXV, smaxv)
+DO_VPZ(UMAXV, umaxv)
+DO_VPZ(SMINV, sminv)
+DO_VPZ(UMINV, uminv)
+
+static gen_helper_gvec_reduc * const saddv_fns[4] = {
+ gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
+ gen_helper_sve_saddv_s, NULL
+};
+TRANS_FEAT(SADDV, aa64_sve, do_vpz_ool, a, saddv_fns[a->esz])
+
+#undef DO_VPZ
+
+/*
+ *** SVE Shift by Immediate - Predicated Group
+ */
+
+/*
+ * Copy Zn into Zd, storing zeros into inactive elements.
+ * If invert, store zeros into the active elements.
+ */
+static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
+ int esz, bool invert)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_sve_movz_b, gen_helper_sve_movz_h,
+ gen_helper_sve_movz_s, gen_helper_sve_movz_d,
+ };
+ return gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
+}
+
+static bool do_shift_zpzi(DisasContext *s, arg_rpri_esz *a, bool asr,
+ gen_helper_gvec_3 * const fns[4])
+{
+ int max;
+
+ if (a->esz < 0) {
+ /* Invalid tsz encoding -- see tszimm_esz. */
+ return false;
+ }
+
+ /*
+ * Shift by element size is architecturally valid.
+ * For arithmetic right-shift, it's the same as by one less.
+ * For logical shifts and ASRD, it is a zeroing operation.
+ */
+ max = 8 << a->esz;
+ if (a->imm >= max) {
+ if (asr) {
+ a->imm = max - 1;
+ } else {
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
+ }
+ }
+ return gen_gvec_ool_arg_zpzi(s, fns[a->esz], a);
+}
+
+static gen_helper_gvec_3 * const asr_zpzi_fns[4] = {
+ gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
+ gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
+};
+TRANS_FEAT(ASR_zpzi, aa64_sve, do_shift_zpzi, a, true, asr_zpzi_fns)
+
+static gen_helper_gvec_3 * const lsr_zpzi_fns[4] = {
+ gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
+ gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
+};
+TRANS_FEAT(LSR_zpzi, aa64_sve, do_shift_zpzi, a, false, lsr_zpzi_fns)
+
+static gen_helper_gvec_3 * const lsl_zpzi_fns[4] = {
+ gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
+ gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
+};
+TRANS_FEAT(LSL_zpzi, aa64_sve, do_shift_zpzi, a, false, lsl_zpzi_fns)
+
+static gen_helper_gvec_3 * const asrd_fns[4] = {
+ gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
+ gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
+};
+TRANS_FEAT(ASRD, aa64_sve, do_shift_zpzi, a, false, asrd_fns)
+
+static gen_helper_gvec_3 * const sqshl_zpzi_fns[4] = {
+ gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
+ gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
+};
+TRANS_FEAT(SQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
+ a->esz < 0 ? NULL : sqshl_zpzi_fns[a->esz], a)
+
+static gen_helper_gvec_3 * const uqshl_zpzi_fns[4] = {
+ gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
+ gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
+};
+TRANS_FEAT(UQSHL_zpzi, aa64_sve2, gen_gvec_ool_arg_zpzi,
+ a->esz < 0 ? NULL : uqshl_zpzi_fns[a->esz], a)
+
+static gen_helper_gvec_3 * const srshr_fns[4] = {
+ gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
+ gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
+};
+TRANS_FEAT(SRSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
+ a->esz < 0 ? NULL : srshr_fns[a->esz], a)
+
+static gen_helper_gvec_3 * const urshr_fns[4] = {
+ gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
+ gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
+};
+TRANS_FEAT(URSHR, aa64_sve2, gen_gvec_ool_arg_zpzi,
+ a->esz < 0 ? NULL : urshr_fns[a->esz], a)
+
+static gen_helper_gvec_3 * const sqshlu_fns[4] = {
+ gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
+ gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
+};
+TRANS_FEAT(SQSHLU, aa64_sve2, gen_gvec_ool_arg_zpzi,
+ a->esz < 0 ? NULL : sqshlu_fns[a->esz], a)
+
+/*
+ *** SVE Bitwise Shift - Predicated Group
+ */
+
+#define DO_ZPZW(NAME, name) \
+ static gen_helper_gvec_4 * const name##_zpzw_fns[4] = { \
+ gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
+ gen_helper_sve_##name##_zpzw_s, NULL \
+ }; \
+ TRANS_FEAT(NAME##_zpzw, aa64_sve, gen_gvec_ool_arg_zpzz, \
+ a->esz < 0 ? NULL : name##_zpzw_fns[a->esz], a, 0)
+
+DO_ZPZW(ASR, asr)
+DO_ZPZW(LSR, lsr)
+DO_ZPZW(LSL, lsl)
+
+#undef DO_ZPZW
+
+/*
+ *** SVE Bitwise Shift - Unpredicated Group
+ */
+
+static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
+ void (*gvec_fn)(unsigned, uint32_t, uint32_t,
+ int64_t, uint32_t, uint32_t))
+{
+ if (a->esz < 0) {
+ /* Invalid tsz encoding -- see tszimm_esz. */
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ /* Shift by element size is architecturally valid. For
+ arithmetic right-shift, it's the same as by one less.
+ Otherwise it is a zeroing operation. */
+ if (a->imm >= 8 << a->esz) {
+ if (asr) {
+ a->imm = (8 << a->esz) - 1;
+ } else {
+ do_dupi_z(s, a->rd, 0);
+ return true;
+ }
+ }
+ gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
+ }
+ return true;
+}
+
+TRANS_FEAT(ASR_zzi, aa64_sve, do_shift_imm, a, true, tcg_gen_gvec_sari)
+TRANS_FEAT(LSR_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shri)
+TRANS_FEAT(LSL_zzi, aa64_sve, do_shift_imm, a, false, tcg_gen_gvec_shli)
+
+#define DO_ZZW(NAME, name) \
+ static gen_helper_gvec_3 * const name##_zzw_fns[4] = { \
+ gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
+ gen_helper_sve_##name##_zzw_s, NULL \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_arg_zzz, \
+ name##_zzw_fns[a->esz], a, 0)
+
+DO_ZZW(ASR_zzw, asr)
+DO_ZZW(LSR_zzw, lsr)
+DO_ZZW(LSL_zzw, lsl)
+
+#undef DO_ZZW
+
+/*
+ *** SVE Integer Multiply-Add Group
+ */
+
+static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
+ gen_helper_gvec_5 *fn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->ra),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ pred_full_reg_offset(s, a->pg),
+ vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+static gen_helper_gvec_5 * const mla_fns[4] = {
+ gen_helper_sve_mla_b, gen_helper_sve_mla_h,
+ gen_helper_sve_mla_s, gen_helper_sve_mla_d,
+};
+TRANS_FEAT(MLA, aa64_sve, do_zpzzz_ool, a, mla_fns[a->esz])
+
+static gen_helper_gvec_5 * const mls_fns[4] = {
+ gen_helper_sve_mls_b, gen_helper_sve_mls_h,
+ gen_helper_sve_mls_s, gen_helper_sve_mls_d,
+};
+TRANS_FEAT(MLS, aa64_sve, do_zpzzz_ool, a, mls_fns[a->esz])
+
+/*
+ *** SVE Index Generation Group
+ */
+
+static bool do_index(DisasContext *s, int esz, int rd,
+ TCGv_i64 start, TCGv_i64 incr)
+{
+ unsigned vsz;
+ TCGv_i32 desc;
+ TCGv_ptr t_zd;
+
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vsz = vec_full_reg_size(s);
+ desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
+ t_zd = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
+ if (esz == 3) {
+ gen_helper_sve_index_d(t_zd, start, incr, desc);
+ } else {
+ typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
+ static index_fn * const fns[3] = {
+ gen_helper_sve_index_b,
+ gen_helper_sve_index_h,
+ gen_helper_sve_index_s,
+ };
+ TCGv_i32 s32 = tcg_temp_new_i32();
+ TCGv_i32 i32 = tcg_temp_new_i32();
+
+ tcg_gen_extrl_i64_i32(s32, start);
+ tcg_gen_extrl_i64_i32(i32, incr);
+ fns[esz](t_zd, s32, i32, desc);
+ }
+ return true;
+}
+
+TRANS_FEAT(INDEX_ii, aa64_sve, do_index, a->esz, a->rd,
+ tcg_constant_i64(a->imm1), tcg_constant_i64(a->imm2))
+TRANS_FEAT(INDEX_ir, aa64_sve, do_index, a->esz, a->rd,
+ tcg_constant_i64(a->imm), cpu_reg(s, a->rm))
+TRANS_FEAT(INDEX_ri, aa64_sve, do_index, a->esz, a->rd,
+ cpu_reg(s, a->rn), tcg_constant_i64(a->imm))
+TRANS_FEAT(INDEX_rr, aa64_sve, do_index, a->esz, a->rd,
+ cpu_reg(s, a->rn), cpu_reg(s, a->rm))
+
+/*
+ *** SVE Stack Allocation Group
+ */
+
+static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+ tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
+ }
+ return true;
+}
+
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
+{
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (sme_enabled_check(s)) {
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
+ }
+ return true;
+}
+
+static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+ tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
+ }
+ return true;
+}
+
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
+{
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (sme_enabled_check(s)) {
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
+ }
+ return true;
+}
+
+static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
+ }
+ return true;
+}
+
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
+{
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (sme_enabled_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
+ }
+ return true;
+}
+
+/*
+ *** SVE Compute Vector Address Group
+ */
+
+static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
+{
+ return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
+}
+
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
+
+/*
+ *** SVE Integer Misc - Unpredicated Group
+ */
+
+static gen_helper_gvec_2 * const fexpa_fns[4] = {
+ NULL, gen_helper_sve_fexpa_h,
+ gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
+};
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
+
+static gen_helper_gvec_3 * const ftssel_fns[4] = {
+ NULL, gen_helper_sve_ftssel_h,
+ gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
+};
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
+ ftssel_fns[a->esz], a, 0)
+
+/*
+ *** SVE Predicate Logical Operations Group
+ */
+
+static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
+ const GVecGen4 *gvec_op)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned psz = pred_gvec_reg_size(s);
+ int dofs = pred_full_reg_offset(s, a->rd);
+ int nofs = pred_full_reg_offset(s, a->rn);
+ int mofs = pred_full_reg_offset(s, a->rm);
+ int gofs = pred_full_reg_offset(s, a->pg);
+
+ if (!a->s) {
+ tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
+ return true;
+ }
+
+ if (psz == 8) {
+ /* Do the operation and the flags generation in temps. */
+ TCGv_i64 pd = tcg_temp_new_i64();
+ TCGv_i64 pn = tcg_temp_new_i64();
+ TCGv_i64 pm = tcg_temp_new_i64();
+ TCGv_i64 pg = tcg_temp_new_i64();
+
+ tcg_gen_ld_i64(pn, tcg_env, nofs);
+ tcg_gen_ld_i64(pm, tcg_env, mofs);
+ tcg_gen_ld_i64(pg, tcg_env, gofs);
+
+ gvec_op->fni8(pd, pn, pm, pg);
+ tcg_gen_st_i64(pd, tcg_env, dofs);
+
+ do_predtest1(pd, pg);
+ } else {
+ /* The operation and flags generation is large. The computation
+ * of the flags depends on the original contents of the guarding
+ * predicate. If the destination overwrites the guarding predicate,
+ * then the easiest way to get this right is to save a copy.
+ */
+ int tofs = gofs;
+ if (a->rd == a->pg) {
+ tofs = offsetof(CPUARMState, vfp.preg_tmp);
+ tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
+ }
+
+ tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
+ do_predtest(s, dofs, tofs, psz / 8);
+ }
+ return true;
+}
+
+static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_and_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_and_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_and_pg_i64,
+ .fniv = gen_and_pg_vec,
+ .fno = gen_helper_sve_and_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (!a->s) {
+ if (a->rn == a->rm) {
+ if (a->pg == a->rn) {
+ return do_mov_p(s, a->rd, a->rn);
+ }
+ return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
+ } else if (a->pg == a->rn || a->pg == a->rm) {
+ return gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
+ }
+ }
+ return do_pppp_flags(s, a, &op);
+}
+
+static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_andc_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_andc_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bic_pg_i64,
+ .fniv = gen_bic_pg_vec,
+ .fno = gen_helper_sve_bic_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (!a->s && a->pg == a->rn) {
+ return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
+ }
+ return do_pppp_flags(s, a, &op);
+}
+
+static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_xor_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_xor_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_eor_pg_i64,
+ .fniv = gen_eor_pg_vec,
+ .fno = gen_helper_sve_eor_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ /* Alias NOT (predicate) is EOR Pd.B, Pg/Z, Pn.B, Pg.B */
+ if (!a->s && a->pg == a->rm) {
+ return gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->pg, a->rn);
+ }
+ return do_pppp_flags(s, a, &op);
+}
+
+static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
+{
+ if (a->s || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned psz = pred_gvec_reg_size(s);
+ tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
+ pred_full_reg_offset(s, a->pg),
+ pred_full_reg_offset(s, a->rn),
+ pred_full_reg_offset(s, a->rm), psz, psz);
+ }
+ return true;
+}
+
+static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_or_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_or_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_orr_pg_i64,
+ .fniv = gen_orr_pg_vec,
+ .fno = gen_helper_sve_orr_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (!a->s && a->pg == a->rn && a->rn == a->rm) {
+ return do_mov_p(s, a->rd, a->rn);
+ }
+ return do_pppp_flags(s, a, &op);
+}
+
+static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_orc_i64(pd, pn, pm);
+ tcg_gen_and_i64(pd, pd, pg);
+}
+
+static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_orc_vec(vece, pd, pn, pm);
+ tcg_gen_and_vec(vece, pd, pd, pg);
+}
+
+static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_orn_pg_i64,
+ .fniv = gen_orn_pg_vec,
+ .fno = gen_helper_sve_orn_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ return do_pppp_flags(s, a, &op);
+}
+
+static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_or_i64(pd, pn, pm);
+ tcg_gen_andc_i64(pd, pg, pd);
+}
+
+static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_or_vec(vece, pd, pn, pm);
+ tcg_gen_andc_vec(vece, pd, pg, pd);
+}
+
+static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_nor_pg_i64,
+ .fniv = gen_nor_pg_vec,
+ .fno = gen_helper_sve_nor_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ return do_pppp_flags(s, a, &op);
+}
+
+static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
+{
+ tcg_gen_and_i64(pd, pn, pm);
+ tcg_gen_andc_i64(pd, pg, pd);
+}
+
+static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
+ TCGv_vec pm, TCGv_vec pg)
+{
+ tcg_gen_and_vec(vece, pd, pn, pm);
+ tcg_gen_andc_vec(vece, pd, pg, pd);
+}
+
+static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_nand_pg_i64,
+ .fniv = gen_nand_pg_vec,
+ .fno = gen_helper_sve_nand_pppp,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ return do_pppp_flags(s, a, &op);
+}
+
+/*
+ *** SVE Predicate Misc Group
+ */
+
+static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int nofs = pred_full_reg_offset(s, a->rn);
+ int gofs = pred_full_reg_offset(s, a->pg);
+ int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
+
+ if (words == 1) {
+ TCGv_i64 pn = tcg_temp_new_i64();
+ TCGv_i64 pg = tcg_temp_new_i64();
+
+ tcg_gen_ld_i64(pn, tcg_env, nofs);
+ tcg_gen_ld_i64(pg, tcg_env, gofs);
+ do_predtest1(pn, pg);
+ } else {
+ do_predtest(s, nofs, gofs, words);
+ }
+ }
+ return true;
+}
+
+/* See the ARM pseudocode DecodePredCount. */
+static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
+{
+ unsigned elements = fullsz >> esz;
+ unsigned bound;
+
+ switch (pattern) {
+ case 0x0: /* POW2 */
+ return pow2floor(elements);
+ case 0x1: /* VL1 */
+ case 0x2: /* VL2 */
+ case 0x3: /* VL3 */
+ case 0x4: /* VL4 */
+ case 0x5: /* VL5 */
+ case 0x6: /* VL6 */
+ case 0x7: /* VL7 */
+ case 0x8: /* VL8 */
+ bound = pattern;
+ break;
+ case 0x9: /* VL16 */
+ case 0xa: /* VL32 */
+ case 0xb: /* VL64 */
+ case 0xc: /* VL128 */
+ case 0xd: /* VL256 */
+ bound = 16 << (pattern - 9);
+ break;
+ case 0x1d: /* MUL4 */
+ return elements - elements % 4;
+ case 0x1e: /* MUL3 */
+ return elements - elements % 3;
+ case 0x1f: /* ALL */
+ return elements;
+ default: /* #uimm5 */
+ return 0;
+ }
+ return elements >= bound ? bound : 0;
+}
+
+/* This handles all of the predicate initialization instructions,
+ * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
+ * so that decode_pred_count returns 0. For SETFFR, we will have
+ * set RD == 16 == FFR.
+ */
+static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned ofs = pred_full_reg_offset(s, rd);
+ unsigned numelem, setsz, i;
+ uint64_t word, lastword;
+ TCGv_i64 t;
+
+ numelem = decode_pred_count(fullsz, pat, esz);
+
+ /* Determine what we must store into each bit, and how many. */
+ if (numelem == 0) {
+ lastword = word = 0;
+ setsz = fullsz;
+ } else {
+ setsz = numelem << esz;
+ lastword = word = pred_esz_masks[esz];
+ if (setsz % 64) {
+ lastword &= MAKE_64BIT_MASK(0, setsz % 64);
+ }
+ }
+
+ t = tcg_temp_new_i64();
+ if (fullsz <= 64) {
+ tcg_gen_movi_i64(t, lastword);
+ tcg_gen_st_i64(t, tcg_env, ofs);
+ goto done;
+ }
+
+ if (word == lastword) {
+ unsigned maxsz = size_for_gvec(fullsz / 8);
+ unsigned oprsz = size_for_gvec(setsz / 8);
+
+ if (oprsz * 8 == setsz) {
+ tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
+ goto done;
+ }
+ }
+
+ setsz /= 8;
+ fullsz /= 8;
+
+ tcg_gen_movi_i64(t, word);
+ for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
+ tcg_gen_st_i64(t, tcg_env, ofs + i);
+ }
+ if (lastword != word) {
+ tcg_gen_movi_i64(t, lastword);
+ tcg_gen_st_i64(t, tcg_env, ofs + i);
+ i += 8;
+ }
+ if (i < fullsz) {
+ tcg_gen_movi_i64(t, 0);
+ for (; i < fullsz; i += 8) {
+ tcg_gen_st_i64(t, tcg_env, ofs + i);
+ }
+ }
+
+ done:
+ /* PTRUES */
+ if (setflag) {
+ tcg_gen_movi_i32(cpu_NF, -(word != 0));
+ tcg_gen_movi_i32(cpu_CF, word == 0);
+ tcg_gen_movi_i32(cpu_VF, 0);
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ }
+ return true;
+}
+
+TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
+
+/* Note pat == 31 is #all, to set all elements. */
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
+ do_predset, 0, FFR_PRED_NUM, 31, false)
+
+/* Note pat == 32 is #unimp, to set no elements. */
+TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
+
+static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
+{
+ /* The path through do_pppp_flags is complicated enough to want to avoid
+ * duplication. Frob the arguments into the form of a predicated AND.
+ */
+ arg_rprr_s alt_a = {
+ .rd = a->rd, .pg = a->pg, .s = a->s,
+ .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
+ };
+
+ s->is_nonstreaming = true;
+ return trans_AND_pppp(s, &alt_a);
+}
+
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
+
+static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
+ void (*gen_fn)(TCGv_i32, TCGv_ptr,
+ TCGv_ptr, TCGv_i32))
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ TCGv_ptr t_pd = tcg_temp_new_ptr();
+ TCGv_ptr t_pg = tcg_temp_new_ptr();
+ TCGv_i32 t;
+ unsigned desc = 0;
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+
+ tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->rn));
+ t = tcg_temp_new_i32();
+
+ gen_fn(t, t_pd, t_pg, tcg_constant_i32(desc));
+
+ do_pred_flags(t);
+ return true;
+}
+
+TRANS_FEAT(PFIRST, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pfirst)
+TRANS_FEAT(PNEXT, aa64_sve, do_pfirst_pnext, a, gen_helper_sve_pnext)
+
+/*
+ *** SVE Element Count Group
+ */
+
+/* Perform an inline saturating addition of a 32-bit value within
+ * a 64-bit register. The second operand is known to be positive,
+ * which halves the comparisons we must perform to bound the result.
+ */
+static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
+{
+ int64_t ibound;
+
+ /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
+ if (u) {
+ tcg_gen_ext32u_i64(reg, reg);
+ } else {
+ tcg_gen_ext32s_i64(reg, reg);
+ }
+ if (d) {
+ tcg_gen_sub_i64(reg, reg, val);
+ ibound = (u ? 0 : INT32_MIN);
+ tcg_gen_smax_i64(reg, reg, tcg_constant_i64(ibound));
+ } else {
+ tcg_gen_add_i64(reg, reg, val);
+ ibound = (u ? UINT32_MAX : INT32_MAX);
+ tcg_gen_smin_i64(reg, reg, tcg_constant_i64(ibound));
+ }
+}
+
+/* Similarly with 64-bit values. */
+static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t2;
+
+ if (u) {
+ if (d) {
+ tcg_gen_sub_i64(t0, reg, val);
+ t2 = tcg_constant_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
+ } else {
+ tcg_gen_add_i64(t0, reg, val);
+ t2 = tcg_constant_i64(-1);
+ tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
+ }
+ } else {
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ if (d) {
+ /* Detect signed overflow for subtraction. */
+ tcg_gen_xor_i64(t0, reg, val);
+ tcg_gen_sub_i64(t1, reg, val);
+ tcg_gen_xor_i64(reg, reg, t1);
+ tcg_gen_and_i64(t0, t0, reg);
+
+ /* Bound the result. */
+ tcg_gen_movi_i64(reg, INT64_MIN);
+ t2 = tcg_constant_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
+ } else {
+ /* Detect signed overflow for addition. */
+ tcg_gen_xor_i64(t0, reg, val);
+ tcg_gen_add_i64(reg, reg, val);
+ tcg_gen_xor_i64(t1, reg, val);
+ tcg_gen_andc_i64(t0, t1, t0);
+
+ /* Bound the result. */
+ tcg_gen_movi_i64(t1, INT64_MAX);
+ t2 = tcg_constant_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
+ }
+ }
+}
+
+/* Similarly with a vector and a scalar operand. */
+static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
+ TCGv_i64 val, bool u, bool d)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr dptr, nptr;
+ TCGv_i32 t32, desc;
+ TCGv_i64 t64;
+
+ dptr = tcg_temp_new_ptr();
+ nptr = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(dptr, tcg_env, vec_full_reg_offset(s, rd));
+ tcg_gen_addi_ptr(nptr, tcg_env, vec_full_reg_offset(s, rn));
+ desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
+
+ switch (esz) {
+ case MO_8:
+ t32 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t32, val);
+ if (d) {
+ tcg_gen_neg_i32(t32, t32);
+ }
+ if (u) {
+ gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
+ } else {
+ gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
+ }
+ break;
+
+ case MO_16:
+ t32 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t32, val);
+ if (d) {
+ tcg_gen_neg_i32(t32, t32);
+ }
+ if (u) {
+ gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
+ } else {
+ gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
+ }
+ break;
+
+ case MO_32:
+ t64 = tcg_temp_new_i64();
+ if (d) {
+ tcg_gen_neg_i64(t64, val);
+ } else {
+ tcg_gen_mov_i64(t64, val);
+ }
+ if (u) {
+ gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
+ } else {
+ gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
+ }
+ break;
+
+ case MO_64:
+ if (u) {
+ if (d) {
+ gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
+ } else {
+ gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
+ }
+ } else if (d) {
+ t64 = tcg_temp_new_i64();
+ tcg_gen_neg_i64(t64, val);
+ gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
+ } else {
+ gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
+ }
+ return true;
+}
+
+static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm * (a->d ? -1 : 1);
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+
+ tcg_gen_addi_i64(reg, reg, inc);
+ }
+ return true;
+}
+
+static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm;
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+
+ /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
+ if (inc == 0) {
+ if (a->u) {
+ tcg_gen_ext32u_i64(reg, reg);
+ } else {
+ tcg_gen_ext32s_i64(reg, reg);
+ }
+ } else {
+ do_sat_addsub_32(reg, tcg_constant_i64(inc), a->u, a->d);
+ }
+ return true;
+}
+
+static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm;
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+
+ if (inc != 0) {
+ do_sat_addsub_64(reg, tcg_constant_i64(inc), a->u, a->d);
+ }
+ return true;
+}
+
+static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
+{
+ if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm;
+
+ if (inc != 0) {
+ if (sve_access_check(s)) {
+ tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ tcg_constant_i64(a->d ? -inc : inc),
+ fullsz, fullsz);
+ }
+ } else {
+ do_mov_z(s, a->rd, a->rn);
+ }
+ return true;
+}
+
+static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
+{
+ if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+
+ unsigned fullsz = vec_full_reg_size(s);
+ unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
+ int inc = numelem * a->imm;
+
+ if (inc != 0) {
+ if (sve_access_check(s)) {
+ do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
+ tcg_constant_i64(inc), a->u, a->d);
+ }
+ } else {
+ do_mov_z(s, a->rd, a->rn);
+ }
+ return true;
+}
+
+/*
+ *** SVE Bitwise Immediate Group
+ */
+
+static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
+{
+ uint64_t imm;
+ if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
+ extract32(a->dbm, 0, 6),
+ extract32(a->dbm, 6, 6))) {
+ return false;
+ }
+ return gen_gvec_fn_zzi(s, gvec_fn, MO_64, a->rd, a->rn, imm);
+}
+
+TRANS_FEAT(AND_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_andi)
+TRANS_FEAT(ORR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_ori)
+TRANS_FEAT(EOR_zzi, aa64_sve, do_zz_dbm, a, tcg_gen_gvec_xori)
+
+static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
+{
+ uint64_t imm;
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
+ extract32(a->dbm, 0, 6),
+ extract32(a->dbm, 6, 6))) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ do_dupi_z(s, a->rd, imm);
+ }
+ return true;
+}
+
+/*
+ *** SVE Integer Wide Immediate - Predicated Group
+ */
+
+/* Implement all merging copies. This is used for CPY (immediate),
+ * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
+ */
+static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
+ TCGv_i64 val)
+{
+ typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
+ static gen_cpy * const fns[4] = {
+ gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
+ gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
+ };
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
+ TCGv_ptr t_zd = tcg_temp_new_ptr();
+ TCGv_ptr t_zn = tcg_temp_new_ptr();
+ TCGv_ptr t_pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, rd));
+ tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, rn));
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
+
+ fns[esz](t_zd, t_zn, t_pg, val, desc);
+}
+
+static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
+{
+ if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ /* Decode the VFP immediate. */
+ uint64_t imm = vfp_expand_imm(a->esz, a->imm);
+ do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(imm));
+ }
+ return true;
+}
+
+static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, tcg_constant_i64(a->imm));
+ }
+ return true;
+}
+
+static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
+{
+ static gen_helper_gvec_2i * const fns[4] = {
+ gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
+ gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
+ pred_full_reg_offset(s, a->pg),
+ tcg_constant_i64(a->imm),
+ vsz, vsz, 0, fns[a->esz]);
+ }
+ return true;
+}
+
+/*
+ *** SVE Permute Extract Group
+ */
+
+static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned n_ofs = imm >= vsz ? 0 : imm;
+ unsigned n_siz = vsz - n_ofs;
+ unsigned d = vec_full_reg_offset(s, rd);
+ unsigned n = vec_full_reg_offset(s, rn);
+ unsigned m = vec_full_reg_offset(s, rm);
+
+ /* Use host vector move insns if we have appropriate sizes
+ * and no unfortunate overlap.
+ */
+ if (m != d
+ && n_ofs == size_for_gvec(n_ofs)
+ && n_siz == size_for_gvec(n_siz)
+ && (d != n || n_siz <= n_ofs)) {
+ tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
+ if (n_ofs != 0) {
+ tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
+ }
+ } else {
+ tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
+ }
+ return true;
+}
+
+TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
+TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
+
+/*
+ *** SVE Permute - Unpredicated Group
+ */
+
+static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
+ vsz, vsz, cpu_reg_sp(s, a->rn));
+ }
+ return true;
+}
+
+static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if ((a->imm & 0x1f) == 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned dofs = vec_full_reg_offset(s, a->rd);
+ unsigned esz, index;
+
+ esz = ctz32(a->imm);
+ index = a->imm >> (esz + 1);
+
+ if ((index << esz) < vsz) {
+ unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
+ tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
+ } else {
+ /*
+ * While dup_mem handles 128-bit elements, dup_imm does not.
+ * Thankfully element size doesn't matter for splatting zero.
+ */
+ tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
+ }
+ }
+ return true;
+}
+
+static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
+{
+ typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
+ static gen_insr * const fns[4] = {
+ gen_helper_sve_insr_b, gen_helper_sve_insr_h,
+ gen_helper_sve_insr_s, gen_helper_sve_insr_d,
+ };
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i32 desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
+ TCGv_ptr t_zd = tcg_temp_new_ptr();
+ TCGv_ptr t_zn = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
+
+ fns[a->esz](t_zd, t_zn, val, desc);
+}
+
+static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_ld_i64(t, tcg_env, vec_reg_offset(s, a->rm, 0, MO_64));
+ do_insr_i64(s, a, t);
+ }
+ return true;
+}
+
+static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ do_insr_i64(s, a, cpu_reg(s, a->rm));
+ }
+ return true;
+}
+
+static gen_helper_gvec_2 * const rev_fns[4] = {
+ gen_helper_sve_rev_b, gen_helper_sve_rev_h,
+ gen_helper_sve_rev_s, gen_helper_sve_rev_d
+};
+TRANS_FEAT(REV_v, aa64_sve, gen_gvec_ool_zz, rev_fns[a->esz], a->rd, a->rn, 0)
+
+static gen_helper_gvec_3 * const sve_tbl_fns[4] = {
+ gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
+ gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
+};
+TRANS_FEAT(TBL, aa64_sve, gen_gvec_ool_arg_zzz, sve_tbl_fns[a->esz], a, 0)
+
+static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
+ gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
+ gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
+};
+TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
+ a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
+
+static gen_helper_gvec_3 * const tbx_fns[4] = {
+ gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
+ gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
+};
+TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
+
+static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
+{
+ static gen_helper_gvec_2 * const fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
+ { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
+ { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
+ };
+
+ if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn)
+ + (a->h ? vsz / 2 : 0),
+ vsz, vsz, 0, fns[a->esz][a->u]);
+ }
+ return true;
+}
+
+/*
+ *** SVE Permute - Predicates Group
+ */
+
+static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
+ gen_helper_gvec_3 *fn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = pred_full_reg_size(s);
+
+ TCGv_ptr t_d = tcg_temp_new_ptr();
+ TCGv_ptr t_n = tcg_temp_new_ptr();
+ TCGv_ptr t_m = tcg_temp_new_ptr();
+ uint32_t desc = 0;
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+ desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
+
+ tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(t_m, tcg_env, pred_full_reg_offset(s, a->rm));
+
+ fn(t_d, t_n, t_m, tcg_constant_i32(desc));
+ return true;
+}
+
+static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
+ gen_helper_gvec_2 *fn)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = pred_full_reg_size(s);
+ TCGv_ptr t_d = tcg_temp_new_ptr();
+ TCGv_ptr t_n = tcg_temp_new_ptr();
+ uint32_t desc = 0;
+
+ tcg_gen_addi_ptr(t_d, tcg_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(t_n, tcg_env, pred_full_reg_offset(s, a->rn));
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+ desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
+
+ fn(t_d, t_n, tcg_constant_i32(desc));
+ return true;
+}
+
+TRANS_FEAT(ZIP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_zip_p)
+TRANS_FEAT(ZIP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_zip_p)
+TRANS_FEAT(UZP1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_uzp_p)
+TRANS_FEAT(UZP2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_uzp_p)
+TRANS_FEAT(TRN1_p, aa64_sve, do_perm_pred3, a, 0, gen_helper_sve_trn_p)
+TRANS_FEAT(TRN2_p, aa64_sve, do_perm_pred3, a, 1, gen_helper_sve_trn_p)
+
+TRANS_FEAT(REV_p, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_rev_p)
+TRANS_FEAT(PUNPKLO, aa64_sve, do_perm_pred2, a, 0, gen_helper_sve_punpk_p)
+TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
+
+/*
+ *** SVE Permute - Interleaving Group
+ */
+
+static gen_helper_gvec_3 * const zip_fns[4] = {
+ gen_helper_sve_zip_b, gen_helper_sve_zip_h,
+ gen_helper_sve_zip_s, gen_helper_sve_zip_d,
+};
+TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
+ zip_fns[a->esz], a, 0)
+TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
+ zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
+
+TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
+ gen_helper_sve2_zip_q, a, 0)
+TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
+ gen_helper_sve2_zip_q, a,
+ QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
+
+static gen_helper_gvec_3 * const uzp_fns[4] = {
+ gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
+ gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
+};
+
+TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
+ uzp_fns[a->esz], a, 0)
+TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
+ uzp_fns[a->esz], a, 1 << a->esz)
+
+TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
+ gen_helper_sve2_uzp_q, a, 0)
+TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
+ gen_helper_sve2_uzp_q, a, 16)
+
+static gen_helper_gvec_3 * const trn_fns[4] = {
+ gen_helper_sve_trn_b, gen_helper_sve_trn_h,
+ gen_helper_sve_trn_s, gen_helper_sve_trn_d,
+};
+
+TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
+ trn_fns[a->esz], a, 0)
+TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
+ trn_fns[a->esz], a, 1 << a->esz)
+
+TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
+ gen_helper_sve2_trn_q, a, 0)
+TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
+ gen_helper_sve2_trn_q, a, 16)
+
+/*
+ *** SVE Permute Vector - Predicated Group
+ */
+
+static gen_helper_gvec_3 * const compact_fns[4] = {
+ NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
+};
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
+ compact_fns[a->esz], a, 0)
+
+/* Call the helper that computes the ARM LastActiveElement pseudocode
+ * function, scaled by the element size. This includes the not found
+ * indication; e.g. not found for esz=3 is -8.
+ */
+static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
+{
+ /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
+ * round up, as we do elsewhere, because we need the exact size.
+ */
+ TCGv_ptr t_p = tcg_temp_new_ptr();
+ unsigned desc = 0;
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
+
+ tcg_gen_addi_ptr(t_p, tcg_env, pred_full_reg_offset(s, pg));
+
+ gen_helper_sve_last_active_element(ret, t_p, tcg_constant_i32(desc));
+}
+
+/* Increment LAST to the offset of the next element in the vector,
+ * wrapping around to 0.
+ */
+static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
+{
+ unsigned vsz = vec_full_reg_size(s);
+
+ tcg_gen_addi_i32(last, last, 1 << esz);
+ if (is_power_of_2(vsz)) {
+ tcg_gen_andi_i32(last, last, vsz - 1);
+ } else {
+ TCGv_i32 max = tcg_constant_i32(vsz);
+ TCGv_i32 zero = tcg_constant_i32(0);
+ tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
+ }
+}
+
+/* If LAST < 0, set LAST to the offset of the last element in the vector. */
+static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
+{
+ unsigned vsz = vec_full_reg_size(s);
+
+ if (is_power_of_2(vsz)) {
+ tcg_gen_andi_i32(last, last, vsz - 1);
+ } else {
+ TCGv_i32 max = tcg_constant_i32(vsz - (1 << esz));
+ TCGv_i32 zero = tcg_constant_i32(0);
+ tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
+ }
+}
+
+/* Load an unsigned element of ESZ from BASE+OFS. */
+static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
+{
+ TCGv_i64 r = tcg_temp_new_i64();
+
+ switch (esz) {
+ case 0:
+ tcg_gen_ld8u_i64(r, base, ofs);
+ break;
+ case 1:
+ tcg_gen_ld16u_i64(r, base, ofs);
+ break;
+ case 2:
+ tcg_gen_ld32u_i64(r, base, ofs);
+ break;
+ case 3:
+ tcg_gen_ld_i64(r, base, ofs);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return r;
+}
+
+/* Load an unsigned element of ESZ from RM[LAST]. */
+static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
+ int rm, int esz)
+{
+ TCGv_ptr p = tcg_temp_new_ptr();
+
+ /* Convert offset into vector into offset into ENV.
+ * The final adjustment for the vector register base
+ * is added via constant offset to the load.
+ */
+#if HOST_BIG_ENDIAN
+ /* Adjust for element ordering. See vec_reg_offset. */
+ if (esz < 3) {
+ tcg_gen_xori_i32(last, last, 8 - (1 << esz));
+ }
+#endif
+ tcg_gen_ext_i32_ptr(p, last);
+ tcg_gen_add_ptr(p, p, tcg_env);
+
+ return load_esz(p, vec_full_reg_offset(s, rm), esz);
+}
+
+/* Compute CLAST for a Zreg. */
+static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
+{
+ TCGv_i32 last;
+ TCGLabel *over;
+ TCGv_i64 ele;
+ unsigned vsz, esz = a->esz;
+
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ last = tcg_temp_new_i32();
+ over = gen_new_label();
+
+ find_last_active(s, last, esz, a->pg);
+
+ /* There is of course no movcond for a 2048-bit vector,
+ * so we must branch over the actual store.
+ */
+ tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
+
+ if (!before) {
+ incr_last_active(s, last, esz);
+ }
+
+ ele = load_last_active(s, last, a->rm, esz);
+
+ vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
+
+ /* If this insn used MOVPRFX, we may need a second move. */
+ if (a->rd != a->rn) {
+ TCGLabel *done = gen_new_label();
+ tcg_gen_br(done);
+
+ gen_set_label(over);
+ do_mov_z(s, a->rd, a->rn);
+
+ gen_set_label(done);
+ } else {
+ gen_set_label(over);
+ }
+ return true;
+}
+
+TRANS_FEAT(CLASTA_z, aa64_sve, do_clast_vector, a, false)
+TRANS_FEAT(CLASTB_z, aa64_sve, do_clast_vector, a, true)
+
+/* Compute CLAST for a scalar. */
+static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
+ bool before, TCGv_i64 reg_val)
+{
+ TCGv_i32 last = tcg_temp_new_i32();
+ TCGv_i64 ele, cmp;
+
+ find_last_active(s, last, esz, pg);
+
+ /* Extend the original value of last prior to incrementing. */
+ cmp = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(cmp, last);
+
+ if (!before) {
+ incr_last_active(s, last, esz);
+ }
+
+ /* The conceit here is that while last < 0 indicates not found, after
+ * adjusting for tcg_env->vfp.zregs[rm], it is still a valid address
+ * from which we can load garbage. We then discard the garbage with
+ * a conditional move.
+ */
+ ele = load_last_active(s, last, rm, esz);
+
+ tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, tcg_constant_i64(0),
+ ele, reg_val);
+}
+
+/* Compute CLAST for a Vreg. */
+static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+ if (sve_access_check(s)) {
+ int esz = a->esz;
+ int ofs = vec_reg_offset(s, a->rd, 0, esz);
+ TCGv_i64 reg = load_esz(tcg_env, ofs, esz);
+
+ do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
+ write_fp_dreg(s, a->rd, reg);
+ }
+ return true;
+}
+
+TRANS_FEAT(CLASTA_v, aa64_sve, do_clast_fp, a, false)
+TRANS_FEAT(CLASTB_v, aa64_sve, do_clast_fp, a, true)
+
+/* Compute CLAST for a Xreg. */
+static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+ TCGv_i64 reg;
+
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ reg = cpu_reg(s, a->rd);
+ switch (a->esz) {
+ case 0:
+ tcg_gen_ext8u_i64(reg, reg);
+ break;
+ case 1:
+ tcg_gen_ext16u_i64(reg, reg);
+ break;
+ case 2:
+ tcg_gen_ext32u_i64(reg, reg);
+ break;
+ case 3:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
+ return true;
+}
+
+TRANS_FEAT(CLASTA_r, aa64_sve, do_clast_general, a, false)
+TRANS_FEAT(CLASTB_r, aa64_sve, do_clast_general, a, true)
+
+/* Compute LAST for a scalar. */
+static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
+ int pg, int rm, bool before)
+{
+ TCGv_i32 last = tcg_temp_new_i32();
+
+ find_last_active(s, last, esz, pg);
+ if (before) {
+ wrap_last_active(s, last, esz);
+ } else {
+ incr_last_active(s, last, esz);
+ }
+
+ return load_last_active(s, last, rm, esz);
+}
+
+/* Compute LAST for a Vreg. */
+static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
+ write_fp_dreg(s, a->rd, val);
+ }
+ return true;
+}
+
+TRANS_FEAT(LASTA_v, aa64_sve, do_last_fp, a, false)
+TRANS_FEAT(LASTB_v, aa64_sve, do_last_fp, a, true)
+
+/* Compute LAST for a Xreg. */
+static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
+{
+ if (sve_access_check(s)) {
+ TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
+ tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
+ }
+ return true;
+}
+
+TRANS_FEAT(LASTA_r, aa64_sve, do_last_general, a, false)
+TRANS_FEAT(LASTB_r, aa64_sve, do_last_general, a, true)
+
+static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
+ }
+ return true;
+}
+
+static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
+ TCGv_i64 t = load_esz(tcg_env, ofs, a->esz);
+ do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
+ }
+ return true;
+}
+
+static gen_helper_gvec_3 * const revb_fns[4] = {
+ NULL, gen_helper_sve_revb_h,
+ gen_helper_sve_revb_s, gen_helper_sve_revb_d,
+};
+TRANS_FEAT(REVB, aa64_sve, gen_gvec_ool_arg_zpz, revb_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const revh_fns[4] = {
+ NULL, NULL, gen_helper_sve_revh_s, gen_helper_sve_revh_d,
+};
+TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
+
+TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
+ a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
+
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
+
+TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
+ gen_helper_sve_splice, a, a->esz)
+
+TRANS_FEAT(SPLICE_sve2, aa64_sve2, gen_gvec_ool_zzzp, gen_helper_sve_splice,
+ a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz)
+
+/*
+ *** SVE Integer Compare - Vectors Group
+ */
+
+static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
+ gen_helper_gvec_flags_4 *gen_fn)
+{
+ TCGv_ptr pd, zn, zm, pg;
+ unsigned vsz;
+ TCGv_i32 t;
+
+ if (gen_fn == NULL) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vsz = vec_full_reg_size(s);
+ t = tcg_temp_new_i32();
+ pd = tcg_temp_new_ptr();
+ zn = tcg_temp_new_ptr();
+ zm = tcg_temp_new_ptr();
+ pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(zm, tcg_env, vec_full_reg_offset(s, a->rm));
+ tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
+
+ gen_fn(t, pd, zn, zm, pg, tcg_constant_i32(simd_desc(vsz, vsz, 0)));
+
+ do_pred_flags(t);
+ return true;
+}
+
+#define DO_PPZZ(NAME, name) \
+ static gen_helper_gvec_flags_4 * const name##_ppzz_fns[4] = { \
+ gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
+ gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
+ }; \
+ TRANS_FEAT(NAME##_ppzz, aa64_sve, do_ppzz_flags, \
+ a, name##_ppzz_fns[a->esz])
+
+DO_PPZZ(CMPEQ, cmpeq)
+DO_PPZZ(CMPNE, cmpne)
+DO_PPZZ(CMPGT, cmpgt)
+DO_PPZZ(CMPGE, cmpge)
+DO_PPZZ(CMPHI, cmphi)
+DO_PPZZ(CMPHS, cmphs)
+
+#undef DO_PPZZ
+
+#define DO_PPZW(NAME, name) \
+ static gen_helper_gvec_flags_4 * const name##_ppzw_fns[4] = { \
+ gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
+ gen_helper_sve_##name##_ppzw_s, NULL \
+ }; \
+ TRANS_FEAT(NAME##_ppzw, aa64_sve, do_ppzz_flags, \
+ a, name##_ppzw_fns[a->esz])
+
+DO_PPZW(CMPEQ, cmpeq)
+DO_PPZW(CMPNE, cmpne)
+DO_PPZW(CMPGT, cmpgt)
+DO_PPZW(CMPGE, cmpge)
+DO_PPZW(CMPHI, cmphi)
+DO_PPZW(CMPHS, cmphs)
+DO_PPZW(CMPLT, cmplt)
+DO_PPZW(CMPLE, cmple)
+DO_PPZW(CMPLO, cmplo)
+DO_PPZW(CMPLS, cmpls)
+
+#undef DO_PPZW
+
+/*
+ *** SVE Integer Compare - Immediate Groups
+ */
+
+static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
+ gen_helper_gvec_flags_3 *gen_fn)
+{
+ TCGv_ptr pd, zn, pg;
+ unsigned vsz;
+ TCGv_i32 t;
+
+ if (gen_fn == NULL) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vsz = vec_full_reg_size(s);
+ t = tcg_temp_new_i32();
+ pd = tcg_temp_new_ptr();
+ zn = tcg_temp_new_ptr();
+ pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(pd, tcg_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(zn, tcg_env, vec_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(pg, tcg_env, pred_full_reg_offset(s, a->pg));
+
+ gen_fn(t, pd, zn, pg, tcg_constant_i32(simd_desc(vsz, vsz, a->imm)));
+
+ do_pred_flags(t);
+ return true;
+}
+
+#define DO_PPZI(NAME, name) \
+ static gen_helper_gvec_flags_3 * const name##_ppzi_fns[4] = { \
+ gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
+ gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
+ }; \
+ TRANS_FEAT(NAME##_ppzi, aa64_sve, do_ppzi_flags, a, \
+ name##_ppzi_fns[a->esz])
+
+DO_PPZI(CMPEQ, cmpeq)
+DO_PPZI(CMPNE, cmpne)
+DO_PPZI(CMPGT, cmpgt)
+DO_PPZI(CMPGE, cmpge)
+DO_PPZI(CMPHI, cmphi)
+DO_PPZI(CMPHS, cmphs)
+DO_PPZI(CMPLT, cmplt)
+DO_PPZI(CMPLE, cmple)
+DO_PPZI(CMPLO, cmplo)
+DO_PPZI(CMPLS, cmpls)
+
+#undef DO_PPZI
+
+/*
+ *** SVE Partition Break Group
+ */
+
+static bool do_brk3(DisasContext *s, arg_rprr_s *a,
+ gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = pred_full_reg_size(s);
+
+ /* Predicate sizes may be smaller and cannot use simd_desc. */
+ TCGv_ptr d = tcg_temp_new_ptr();
+ TCGv_ptr n = tcg_temp_new_ptr();
+ TCGv_ptr m = tcg_temp_new_ptr();
+ TCGv_ptr g = tcg_temp_new_ptr();
+ TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
+
+ tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(m, tcg_env, pred_full_reg_offset(s, a->rm));
+ tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
+
+ if (a->s) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ fn_s(t, d, n, m, g, desc);
+ do_pred_flags(t);
+ } else {
+ fn(d, n, m, g, desc);
+ }
+ return true;
+}
+
+static bool do_brk2(DisasContext *s, arg_rpr_s *a,
+ gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
+{
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ unsigned vsz = pred_full_reg_size(s);
+
+ /* Predicate sizes may be smaller and cannot use simd_desc. */
+ TCGv_ptr d = tcg_temp_new_ptr();
+ TCGv_ptr n = tcg_temp_new_ptr();
+ TCGv_ptr g = tcg_temp_new_ptr();
+ TCGv_i32 desc = tcg_constant_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
+
+ tcg_gen_addi_ptr(d, tcg_env, pred_full_reg_offset(s, a->rd));
+ tcg_gen_addi_ptr(n, tcg_env, pred_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(g, tcg_env, pred_full_reg_offset(s, a->pg));
+
+ if (a->s) {
+ TCGv_i32 t = tcg_temp_new_i32();
+ fn_s(t, d, n, g, desc);
+ do_pred_flags(t);
+ } else {
+ fn(d, n, g, desc);
+ }
+ return true;
+}
+
+TRANS_FEAT(BRKPA, aa64_sve, do_brk3, a,
+ gen_helper_sve_brkpa, gen_helper_sve_brkpas)
+TRANS_FEAT(BRKPB, aa64_sve, do_brk3, a,
+ gen_helper_sve_brkpb, gen_helper_sve_brkpbs)
+
+TRANS_FEAT(BRKA_m, aa64_sve, do_brk2, a,
+ gen_helper_sve_brka_m, gen_helper_sve_brkas_m)
+TRANS_FEAT(BRKB_m, aa64_sve, do_brk2, a,
+ gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m)
+
+TRANS_FEAT(BRKA_z, aa64_sve, do_brk2, a,
+ gen_helper_sve_brka_z, gen_helper_sve_brkas_z)
+TRANS_FEAT(BRKB_z, aa64_sve, do_brk2, a,
+ gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z)
+
+TRANS_FEAT(BRKN, aa64_sve, do_brk2, a,
+ gen_helper_sve_brkn, gen_helper_sve_brkns)
+
+/*
+ *** SVE Predicate Count Group
+ */
+
+static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
+{
+ unsigned psz = pred_full_reg_size(s);
+
+ if (psz <= 8) {
+ uint64_t psz_mask;
+
+ tcg_gen_ld_i64(val, tcg_env, pred_full_reg_offset(s, pn));
+ if (pn != pg) {
+ TCGv_i64 g = tcg_temp_new_i64();
+ tcg_gen_ld_i64(g, tcg_env, pred_full_reg_offset(s, pg));
+ tcg_gen_and_i64(val, val, g);
+ }
+
+ /* Reduce the pred_esz_masks value simply to reduce the
+ * size of the code generated here.
+ */
+ psz_mask = MAKE_64BIT_MASK(0, psz * 8);
+ tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
+
+ tcg_gen_ctpop_i64(val, val);
+ } else {
+ TCGv_ptr t_pn = tcg_temp_new_ptr();
+ TCGv_ptr t_pg = tcg_temp_new_ptr();
+ unsigned desc = 0;
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
+
+ tcg_gen_addi_ptr(t_pn, tcg_env, pred_full_reg_offset(s, pn));
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
+
+ gen_helper_sve_cntp(val, t_pn, t_pg, tcg_constant_i32(desc));
+ }
+}
+
+static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
+ }
+ return true;
+}
+
+static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ if (a->d) {
+ tcg_gen_sub_i64(reg, reg, val);
+ } else {
+ tcg_gen_add_i64(reg, reg, val);
+ }
+ }
+ return true;
+}
+
+static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
+{
+ if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_i64 val = tcg_temp_new_i64();
+ GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
+
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn), val, vsz, vsz);
+ }
+ return true;
+}
+
+static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ do_sat_addsub_32(reg, val, a->u, a->d);
+ }
+ return true;
+}
+
+static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ do_sat_addsub_64(reg, val, a->u, a->d);
+ }
+ return true;
+}
+
+static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
+{
+ if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 val = tcg_temp_new_i64();
+ do_cntp(s, val, a->esz, a->pg, a->pg);
+ do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
+ }
+ return true;
+}
+
+/*
+ *** SVE Integer Compare Scalars Group
+ */
+
+static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
+ TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
+ TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
+ TCGv_i64 cmp = tcg_temp_new_i64();
+
+ tcg_gen_setcond_i64(cond, cmp, rn, rm);
+ tcg_gen_extrl_i64_i32(cpu_NF, cmp);
+
+ /* VF = !NF & !CF. */
+ tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
+ tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
+
+ /* Both NF and VF actually look at bit 31. */
+ tcg_gen_neg_i32(cpu_NF, cpu_NF);
+ tcg_gen_neg_i32(cpu_VF, cpu_VF);
+ return true;
+}
+
+static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
+{
+ TCGv_i64 op0, op1, t0, t1, tmax;
+ TCGv_i32 t2;
+ TCGv_ptr ptr;
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned desc = 0;
+ TCGCond cond;
+ uint64_t maxval;
+ /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
+ bool eq = a->eq == a->lt;
+
+ /* The greater-than conditions are all SVE2. */
+ if (a->lt
+ ? !dc_isar_feature(aa64_sve, s)
+ : !dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ op0 = read_cpu_reg(s, a->rn, 1);
+ op1 = read_cpu_reg(s, a->rm, 1);
+
+ if (!a->sf) {
+ if (a->u) {
+ tcg_gen_ext32u_i64(op0, op0);
+ tcg_gen_ext32u_i64(op1, op1);
+ } else {
+ tcg_gen_ext32s_i64(op0, op0);
+ tcg_gen_ext32s_i64(op1, op1);
+ }
+ }
+
+ /* For the helper, compress the different conditions into a computation
+ * of how many iterations for which the condition is true.
+ */
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+
+ if (a->lt) {
+ tcg_gen_sub_i64(t0, op1, op0);
+ if (a->u) {
+ maxval = a->sf ? UINT64_MAX : UINT32_MAX;
+ cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
+ } else {
+ maxval = a->sf ? INT64_MAX : INT32_MAX;
+ cond = eq ? TCG_COND_LE : TCG_COND_LT;
+ }
+ } else {
+ tcg_gen_sub_i64(t0, op0, op1);
+ if (a->u) {
+ maxval = 0;
+ cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
+ } else {
+ maxval = a->sf ? INT64_MIN : INT32_MIN;
+ cond = eq ? TCG_COND_GE : TCG_COND_GT;
+ }
+ }
+
+ tmax = tcg_constant_i64(vsz >> a->esz);
+ if (eq) {
+ /* Equality means one more iteration. */
+ tcg_gen_addi_i64(t0, t0, 1);
+
+ /*
+ * For the less-than while, if op1 is maxval (and the only time
+ * the addition above could overflow), then we produce an all-true
+ * predicate by setting the count to the vector length. This is
+ * because the pseudocode is described as an increment + compare
+ * loop, and the maximum integer would always compare true.
+ * Similarly, the greater-than while has the same issue with the
+ * minimum integer due to the decrement + compare loop.
+ */
+ tcg_gen_movi_i64(t1, maxval);
+ tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
+ }
+
+ /* Bound to the maximum. */
+ tcg_gen_umin_i64(t0, t0, tmax);
+
+ /* Set the count to zero if the condition is false. */
+ tcg_gen_movi_i64(t1, 0);
+ tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
+
+ /* Since we're bounded, pass as a 32-bit type. */
+ t2 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t2, t0);
+
+ /* Scale elements to bits. */
+ tcg_gen_shli_i32(t2, t2, a->esz);
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+
+ ptr = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
+
+ if (a->lt) {
+ gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
+ } else {
+ gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
+ }
+ do_pred_flags(t2);
+ return true;
+}
+
+static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
+{
+ TCGv_i64 op0, op1, diff, t1, tmax;
+ TCGv_i32 t2;
+ TCGv_ptr ptr;
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned desc = 0;
+
+ if (!dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ op0 = read_cpu_reg(s, a->rn, 1);
+ op1 = read_cpu_reg(s, a->rm, 1);
+
+ tmax = tcg_constant_i64(vsz);
+ diff = tcg_temp_new_i64();
+
+ if (a->rw) {
+ /* WHILERW */
+ /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
+ t1 = tcg_temp_new_i64();
+ tcg_gen_sub_i64(diff, op0, op1);
+ tcg_gen_sub_i64(t1, op1, op0);
+ tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
+ /* Round down to a multiple of ESIZE. */
+ tcg_gen_andi_i64(diff, diff, -1 << a->esz);
+ /* If op1 == op0, diff == 0, and the condition is always true. */
+ tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
+ } else {
+ /* WHILEWR */
+ tcg_gen_sub_i64(diff, op1, op0);
+ /* Round down to a multiple of ESIZE. */
+ tcg_gen_andi_i64(diff, diff, -1 << a->esz);
+ /* If op0 >= op1, diff <= 0, the condition is always true. */
+ tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
+ }
+
+ /* Bound to the maximum. */
+ tcg_gen_umin_i64(diff, diff, tmax);
+
+ /* Since we're bounded, pass as a 32-bit type. */
+ t2 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t2, diff);
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+
+ ptr = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
+
+ gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
+ do_pred_flags(t2);
+ return true;
+}
+
+/*
+ *** SVE Integer Wide Immediate - Unpredicated Group
+ */
+
+static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
+{
+ if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ int dofs = vec_full_reg_offset(s, a->rd);
+ uint64_t imm;
+
+ /* Decode the VFP immediate. */
+ imm = vfp_expand_imm(a->esz, a->imm);
+ tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
+ }
+ return true;
+}
+
+static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ int dofs = vec_full_reg_offset(s, a->rd);
+ tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
+ }
+ return true;
+}
+
+TRANS_FEAT(ADD_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_addi, a)
+
+static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
+{
+ a->imm = -a->imm;
+ return trans_ADD_zzi(s, a);
+}
+
+static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
+ static const GVecGen2s op[4] = {
+ { .fni8 = tcg_gen_vec_sub8_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_sve_subri_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8,
+ .scalar_first = true },
+ { .fni8 = tcg_gen_vec_sub16_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_sve_subri_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16,
+ .scalar_first = true },
+ { .fni4 = tcg_gen_sub_i32,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_sve_subri_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32,
+ .scalar_first = true },
+ { .fni8 = tcg_gen_sub_i64,
+ .fniv = tcg_gen_sub_vec,
+ .fno = gen_helper_sve_subri_d,
+ .opt_opc = vecop_list,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64,
+ .scalar_first = true }
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vsz, vsz, tcg_constant_i64(a->imm), &op[a->esz]);
+ }
+ return true;
+}
+
+TRANS_FEAT(MUL_zzi, aa64_sve, gen_gvec_fn_arg_zzi, tcg_gen_gvec_muli, a)
+
+static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
+{
+ if (sve_access_check(s)) {
+ do_sat_addsub_vec(s, a->esz, a->rd, a->rn,
+ tcg_constant_i64(a->imm), u, d);
+ }
+ return true;
+}
+
+TRANS_FEAT(SQADD_zzi, aa64_sve, do_zzi_sat, a, false, false)
+TRANS_FEAT(UQADD_zzi, aa64_sve, do_zzi_sat, a, true, false)
+TRANS_FEAT(SQSUB_zzi, aa64_sve, do_zzi_sat, a, false, true)
+TRANS_FEAT(UQSUB_zzi, aa64_sve, do_zzi_sat, a, true, true)
+
+static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ tcg_constant_i64(a->imm), vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+#define DO_ZZI(NAME, name) \
+ static gen_helper_gvec_2i * const name##i_fns[4] = { \
+ gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
+ gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
+ }; \
+ TRANS_FEAT(NAME##_zzi, aa64_sve, do_zzi_ool, a, name##i_fns[a->esz])
+
+DO_ZZI(SMAX, smax)
+DO_ZZI(UMAX, umax)
+DO_ZZI(SMIN, smin)
+DO_ZZI(UMIN, umin)
+
+#undef DO_ZZI
+
+static gen_helper_gvec_4 * const dot_fns[2][2] = {
+ { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
+ { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
+};
+TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
+ dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
+
+/*
+ * SVE Multiply - Indexed
+ */
+
+TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_sdot_idx_b, a)
+TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_sdot_idx_h, a)
+TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_udot_idx_b, a)
+TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_udot_idx_h, a)
+
+TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_sudot_idx_b, a)
+TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_usdot_idx_b, a)
+
+#define DO_SVE2_RRX(NAME, FUNC) \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
+ a->rd, a->rn, a->rm, a->index)
+
+DO_SVE2_RRX(MUL_zzx_h, gen_helper_gvec_mul_idx_h)
+DO_SVE2_RRX(MUL_zzx_s, gen_helper_gvec_mul_idx_s)
+DO_SVE2_RRX(MUL_zzx_d, gen_helper_gvec_mul_idx_d)
+
+DO_SVE2_RRX(SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
+DO_SVE2_RRX(SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
+DO_SVE2_RRX(SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
+
+DO_SVE2_RRX(SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
+DO_SVE2_RRX(SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
+DO_SVE2_RRX(SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
+
+#undef DO_SVE2_RRX
+
+#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
+ a->rd, a->rn, a->rm, (a->index << 1) | TOP)
+
+DO_SVE2_RRX_TB(SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
+DO_SVE2_RRX_TB(SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
+DO_SVE2_RRX_TB(SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
+DO_SVE2_RRX_TB(SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
+
+DO_SVE2_RRX_TB(SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
+DO_SVE2_RRX_TB(SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
+DO_SVE2_RRX_TB(SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
+DO_SVE2_RRX_TB(SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
+
+DO_SVE2_RRX_TB(UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
+DO_SVE2_RRX_TB(UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
+DO_SVE2_RRX_TB(UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
+DO_SVE2_RRX_TB(UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
+
+#undef DO_SVE2_RRX_TB
+
+#define DO_SVE2_RRXR(NAME, FUNC) \
+ TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzxz, FUNC, a)
+
+DO_SVE2_RRXR(MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
+DO_SVE2_RRXR(MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
+DO_SVE2_RRXR(MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
+
+DO_SVE2_RRXR(MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
+DO_SVE2_RRXR(MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
+DO_SVE2_RRXR(MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
+
+DO_SVE2_RRXR(SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
+DO_SVE2_RRXR(SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
+DO_SVE2_RRXR(SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
+
+DO_SVE2_RRXR(SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
+DO_SVE2_RRXR(SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
+DO_SVE2_RRXR(SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
+
+#undef DO_SVE2_RRXR
+
+#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
+ TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
+ a->rd, a->rn, a->rm, a->ra, (a->index << 1) | TOP)
+
+DO_SVE2_RRXR_TB(SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
+DO_SVE2_RRXR_TB(SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
+DO_SVE2_RRXR_TB(SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
+DO_SVE2_RRXR_TB(SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
+
+DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
+DO_SVE2_RRXR_TB(SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
+DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
+DO_SVE2_RRXR_TB(SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
+
+DO_SVE2_RRXR_TB(SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
+DO_SVE2_RRXR_TB(SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
+DO_SVE2_RRXR_TB(SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
+DO_SVE2_RRXR_TB(SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
+
+DO_SVE2_RRXR_TB(UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
+DO_SVE2_RRXR_TB(UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
+DO_SVE2_RRXR_TB(UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
+DO_SVE2_RRXR_TB(UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
+
+DO_SVE2_RRXR_TB(SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
+DO_SVE2_RRXR_TB(SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
+DO_SVE2_RRXR_TB(SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
+DO_SVE2_RRXR_TB(SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
+
+DO_SVE2_RRXR_TB(UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
+DO_SVE2_RRXR_TB(UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
+DO_SVE2_RRXR_TB(UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
+DO_SVE2_RRXR_TB(UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
+
+#undef DO_SVE2_RRXR_TB
+
+#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
+ TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_zzzz, FUNC, \
+ a->rd, a->rn, a->rm, a->ra, (a->index << 2) | a->rot)
+
+DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
+DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
+
+DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
+DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
+
+DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
+DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
+
+#undef DO_SVE2_RRXR_ROT
+
+/*
+ *** SVE Floating Point Multiply-Add Indexed Group
+ */
+
+static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
+{
+ static gen_helper_gvec_4_ptr * const fns[4] = {
+ NULL,
+ gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s,
+ gen_helper_gvec_fmla_idx_d,
+ };
+ return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
+ (a->index << 1) | sub,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+}
+
+TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
+TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
+
+/*
+ *** SVE Floating Point Multiply Indexed Group
+ */
+
+static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
+ NULL, gen_helper_gvec_fmul_idx_h,
+ gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
+};
+TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
+ fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+/*
+ *** SVE Floating Point Fast Reduction Group
+ */
+
+typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
+ TCGv_ptr, TCGv_i32);
+
+static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
+ gen_helper_fp_reduce *fn)
+{
+ unsigned vsz, p2vsz;
+ TCGv_i32 t_desc;
+ TCGv_ptr t_zn, t_pg, status;
+ TCGv_i64 temp;
+
+ if (fn == NULL) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vsz = vec_full_reg_size(s);
+ p2vsz = pow2ceil(vsz);
+ t_desc = tcg_constant_i32(simd_desc(vsz, vsz, p2vsz));
+ temp = tcg_temp_new_i64();
+ t_zn = tcg_temp_new_ptr();
+ t_pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+
+ fn(temp, t_zn, t_pg, status, t_desc);
+
+ write_fp_dreg(s, a->rd, temp);
+ return true;
+}
+
+#define DO_VPZ(NAME, name) \
+ static gen_helper_fp_reduce * const name##_fns[4] = { \
+ NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
+
+DO_VPZ(FADDV, faddv)
+DO_VPZ(FMINNMV, fminnmv)
+DO_VPZ(FMAXNMV, fmaxnmv)
+DO_VPZ(FMINV, fminv)
+DO_VPZ(FMAXV, fmaxv)
+
+#undef DO_VPZ
+
+/*
+ *** SVE Floating Point Unary Operations - Unpredicated Group
+ */
+
+static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
+ NULL, gen_helper_gvec_frecpe_h,
+ gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
+};
+TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
+
+static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
+ NULL, gen_helper_gvec_frsqrte_h,
+ gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
+};
+TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
+
+/*
+ *** SVE Floating Point Compare with Zero Group
+ */
+
+static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status =
+ fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+
+ tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ pred_full_reg_offset(s, a->pg),
+ status, vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+#define DO_PPZ(NAME, name) \
+ static gen_helper_gvec_3_ptr * const name##_fns[] = { \
+ NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, do_ppz_fp, a, name##_fns[a->esz])
+
+DO_PPZ(FCMGE_ppz0, fcmge0)
+DO_PPZ(FCMGT_ppz0, fcmgt0)
+DO_PPZ(FCMLE_ppz0, fcmle0)
+DO_PPZ(FCMLT_ppz0, fcmlt0)
+DO_PPZ(FCMEQ_ppz0, fcmeq0)
+DO_PPZ(FCMNE_ppz0, fcmne0)
+
+#undef DO_PPZ
+
+/*
+ *** SVE floating-point trig multiply-add coefficient
+ */
+
+static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
+ NULL, gen_helper_sve_ftmad_h,
+ gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
+};
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+/*
+ *** SVE Floating Point Accumulating Reduction Group
+ */
+
+static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
+{
+ typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
+ TCGv_ptr, TCGv_ptr, TCGv_i32);
+ static fadda_fn * const fns[3] = {
+ gen_helper_sve_fadda_h,
+ gen_helper_sve_fadda_s,
+ gen_helper_sve_fadda_d,
+ };
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr t_rm, t_pg, t_fpst;
+ TCGv_i64 t_val;
+ TCGv_i32 t_desc;
+
+ if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ t_val = load_esz(tcg_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
+ t_rm = tcg_temp_new_ptr();
+ t_pg = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
+ t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
+
+ fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
+
+ write_fp_dreg(s, a->rd, t_val);
+ return true;
+}
+
+/*
+ *** SVE Floating Point Arithmetic - Unpredicated Group
+ */
+
+#define DO_FP3(NAME, name) \
+ static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
+ NULL, gen_helper_gvec_##name##_h, \
+ gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
+
+DO_FP3(FADD_zzz, fadd)
+DO_FP3(FSUB_zzz, fsub)
+DO_FP3(FMUL_zzz, fmul)
+DO_FP3(FRECPS, recps)
+DO_FP3(FRSQRTS, rsqrts)
+
+#undef DO_FP3
+
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
+ NULL, gen_helper_gvec_ftsmul_h,
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
+};
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
+ ftsmul_fns[a->esz], a, 0)
+
+/*
+ *** SVE Floating Point Arithmetic - Predicated Group
+ */
+
+#define DO_ZPZZ_FP(NAME, FEAT, name) \
+ static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
+ NULL, gen_helper_##name##_h, \
+ gen_helper_##name##_s, gen_helper_##name##_d \
+ }; \
+ TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
+
+DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
+DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
+DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
+DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
+DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
+DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
+DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
+DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
+DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
+DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
+DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
+
+typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
+ TCGv_i64, TCGv_ptr, TCGv_i32);
+
+static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
+ TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr t_zd, t_zn, t_pg, status;
+ TCGv_i32 desc;
+
+ t_zd = tcg_temp_new_ptr();
+ t_zn = tcg_temp_new_ptr();
+ t_pg = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd));
+ tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
+
+ status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
+ desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
+ fn(t_zd, t_zn, t_pg, scalar, status, desc);
+}
+
+static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
+ gen_helper_sve_fp2scalar *fn)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16,
+ tcg_constant_i64(imm), fn);
+ }
+ return true;
+}
+
+#define DO_FP_IMM(NAME, name, const0, const1) \
+ static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
+ NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, \
+ gen_helper_sve_##name##_d \
+ }; \
+ static uint64_t const name##_const[4][2] = { \
+ { -1, -1 }, \
+ { float16_##const0, float16_##const1 }, \
+ { float32_##const0, float32_##const1 }, \
+ { float64_##const0, float64_##const1 }, \
+ }; \
+ TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
+ name##_const[a->esz][a->imm], name##_fns[a->esz])
+
+DO_FP_IMM(FADD, fadds, half, one)
+DO_FP_IMM(FSUB, fsubs, half, one)
+DO_FP_IMM(FMUL, fmuls, half, two)
+DO_FP_IMM(FSUBR, fsubrs, half, one)
+DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
+DO_FP_IMM(FMINNM, fminnms, zero, one)
+DO_FP_IMM(FMAX, fmaxs, zero, one)
+DO_FP_IMM(FMIN, fmins, zero, one)
+
+#undef DO_FP_IMM
+
+static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ if (fn == NULL) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ pred_full_reg_offset(s, a->pg),
+ status, vsz, vsz, 0, fn);
+ }
+ return true;
+}
+
+#define DO_FPCMP(NAME, name) \
+ static gen_helper_gvec_4_ptr * const name##_fns[4] = { \
+ NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
+ }; \
+ TRANS_FEAT(NAME##_ppzz, aa64_sve, do_fp_cmp, a, name##_fns[a->esz])
+
+DO_FPCMP(FCMGE, fcmge)
+DO_FPCMP(FCMGT, fcmgt)
+DO_FPCMP(FCMEQ, fcmeq)
+DO_FPCMP(FCMNE, fcmne)
+DO_FPCMP(FCMUO, fcmuo)
+DO_FPCMP(FACGE, facge)
+DO_FPCMP(FACGT, facgt)
+
+#undef DO_FPCMP
+
+static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
+ NULL, gen_helper_sve_fcadd_h,
+ gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
+};
+TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
+ a->rd, a->rn, a->rm, a->pg, a->rot,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+#define DO_FMLA(NAME, name) \
+ static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
+ NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
+ a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
+DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
+DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
+DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
+
+#undef DO_FMLA
+
+static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
+ NULL, gen_helper_sve_fcmla_zpzzz_h,
+ gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
+};
+TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
+ a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
+ NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
+};
+TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
+ a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+/*
+ *** SVE Floating Point Unary Operations Predicated Group
+ */
+
+TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR)
+
+TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_bfcvt, a, 0, FPST_FPCR)
+
+TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR)
+
+TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16)
+
+TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR)
+
+TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR)
+
+static gen_helper_gvec_3_ptr * const frint_fns[] = {
+ NULL,
+ gen_helper_sve_frint_h,
+ gen_helper_sve_frint_s,
+ gen_helper_sve_frint_d
+};
+TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+static gen_helper_gvec_3_ptr * const frintx_fns[] = {
+ NULL,
+ gen_helper_sve_frintx_h,
+ gen_helper_sve_frintx_s,
+ gen_helper_sve_frintx_d
+};
+TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+
+static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
+ ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
+{
+ unsigned vsz;
+ TCGv_i32 tmode;
+ TCGv_ptr status;
+
+ if (fn == NULL) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vsz = vec_full_reg_size(s);
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
+ tmode = gen_set_rmode(mode, status);
+
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ pred_full_reg_offset(s, a->pg),
+ status, vsz, vsz, 0, fn);
+
+ gen_restore_rmode(tmode, status);
+ return true;
+}
+
+TRANS_FEAT(FRINTN, aa64_sve, do_frint_mode, a,
+ FPROUNDING_TIEEVEN, frint_fns[a->esz])
+TRANS_FEAT(FRINTP, aa64_sve, do_frint_mode, a,
+ FPROUNDING_POSINF, frint_fns[a->esz])
+TRANS_FEAT(FRINTM, aa64_sve, do_frint_mode, a,
+ FPROUNDING_NEGINF, frint_fns[a->esz])
+TRANS_FEAT(FRINTZ, aa64_sve, do_frint_mode, a,
+ FPROUNDING_ZERO, frint_fns[a->esz])
+TRANS_FEAT(FRINTA, aa64_sve, do_frint_mode, a,
+ FPROUNDING_TIEAWAY, frint_fns[a->esz])
+
+static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
+ NULL, gen_helper_sve_frecpx_h,
+ gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
+};
+TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
+ NULL, gen_helper_sve_fsqrt_h,
+ gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
+};
+TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16)
+
+TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_scvt_ss, a, 0, FPST_FPCR)
+TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_scvt_ds, a, 0, FPST_FPCR)
+
+TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_scvt_sd, a, 0, FPST_FPCR)
+TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_scvt_dd, a, 0, FPST_FPCR)
+
+TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16)
+TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16)
+
+TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR)
+TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR)
+TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR)
+
+TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR)
+
+/*
+ *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
+ */
+
+/* Subroutine loading a vector register at VOFS of LEN bytes.
+ * The load should begin at the address Rn + IMM.
+ */
+
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
+ int len, int rn, int imm)
+{
+ int len_align = QEMU_ALIGN_DOWN(len, 16);
+ int len_remain = len % 16;
+ int nparts = len / 16 + ctpop8(len_remain);
+ int midx = get_mem_index(s);
+ TCGv_i64 dirty_addr, clean_addr, t0, t1;
+ TCGv_i128 t16;
+
+ dirty_addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
+ clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
+
+ /*
+ * Note that unpredicated load/store of vector/predicate registers
+ * are defined as a stream of bytes, which equates to little-endian
+ * operations on larger quantities.
+ * Attempt to keep code expansion to a minimum by limiting the
+ * amount of unrolling done.
+ */
+ if (nparts <= 4) {
+ int i;
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ t16 = tcg_temp_new_i128();
+
+ for (i = 0; i < len_align; i += 16) {
+ tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_extr_i128_i64(t0, t1, t16);
+ tcg_gen_st_i64(t0, base, vofs + i);
+ tcg_gen_st_i64(t1, base, vofs + i + 8);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
+ }
+ } else {
+ TCGLabel *loop = gen_new_label();
+ TCGv_ptr tp, i = tcg_temp_new_ptr();
+
+ tcg_gen_movi_ptr(i, 0);
+ gen_set_label(loop);
+
+ t16 = tcg_temp_new_i128();
+ tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
+
+ tp = tcg_temp_new_ptr();
+ tcg_gen_add_ptr(tp, base, i);
+ tcg_gen_addi_ptr(i, i, 16);
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_extr_i128_i64(t0, t1, t16);
+
+ tcg_gen_st_i64(t0, tp, vofs);
+ tcg_gen_st_i64(t1, tp, vofs + 8);
+
+ tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
+ }
+
+ /*
+ * Predicate register loads can be any multiple of 2.
+ * Note that we still store the entire 64-bit unit into tcg_env.
+ */
+ if (len_remain >= 8) {
+ t0 = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ tcg_gen_st_i64(t0, base, vofs + len_align);
+ len_remain -= 8;
+ len_align += 8;
+ if (len_remain) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ }
+ }
+ if (len_remain) {
+ t0 = tcg_temp_new_i64();
+ switch (len_remain) {
+ case 2:
+ case 4:
+ case 8:
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
+ MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
+ break;
+
+ case 6:
+ t1 = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 4);
+ tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
+ tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_gen_st_i64(t0, base, vofs + len_align);
+ }
+}
+
+/* Similarly for stores. */
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
+ int len, int rn, int imm)
+{
+ int len_align = QEMU_ALIGN_DOWN(len, 16);
+ int len_remain = len % 16;
+ int nparts = len / 16 + ctpop8(len_remain);
+ int midx = get_mem_index(s);
+ TCGv_i64 dirty_addr, clean_addr, t0, t1;
+ TCGv_i128 t16;
+
+ dirty_addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
+ clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
+
+ /* Note that unpredicated load/store of vector/predicate registers
+ * are defined as a stream of bytes, which equates to little-endian
+ * operations on larger quantities. There is no nice way to force
+ * a little-endian store for aarch64_be-linux-user out of line.
+ *
+ * Attempt to keep code expansion to a minimum by limiting the
+ * amount of unrolling done.
+ */
+ if (nparts <= 4) {
+ int i;
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ t16 = tcg_temp_new_i128();
+ for (i = 0; i < len_align; i += 16) {
+ tcg_gen_ld_i64(t0, base, vofs + i);
+ tcg_gen_ld_i64(t1, base, vofs + i + 8);
+ tcg_gen_concat_i64_i128(t16, t0, t1);
+ tcg_gen_qemu_st_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
+ }
+ } else {
+ TCGLabel *loop = gen_new_label();
+ TCGv_ptr tp, i = tcg_temp_new_ptr();
+
+ tcg_gen_movi_ptr(i, 0);
+ gen_set_label(loop);
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tp = tcg_temp_new_ptr();
+ tcg_gen_add_ptr(tp, base, i);
+ tcg_gen_ld_i64(t0, tp, vofs);
+ tcg_gen_ld_i64(t1, tp, vofs + 8);
+ tcg_gen_addi_ptr(i, i, 16);
+
+ t16 = tcg_temp_new_i128();
+ tcg_gen_concat_i64_i128(t16, t0, t1);
+
+ tcg_gen_qemu_st_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
+
+ tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
+ }
+
+ /* Predicate register stores can be any multiple of 2. */
+ if (len_remain >= 8) {
+ t0 = tcg_temp_new_i64();
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ len_remain -= 8;
+ len_align += 8;
+ if (len_remain) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ }
+ }
+ if (len_remain) {
+ t0 = tcg_temp_new_i64();
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
+
+ switch (len_remain) {
+ case 2:
+ case 4:
+ case 8:
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx,
+ MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
+ break;
+
+ case 6:
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 4);
+ tcg_gen_shri_i64(t0, t0, 32);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int size = vec_full_reg_size(s);
+ int off = vec_full_reg_offset(s, a->rd);
+ gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
+ }
+ return true;
+}
+
+static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int size = pred_full_reg_size(s);
+ int off = pred_full_reg_offset(s, a->rd);
+ gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
+ }
+ return true;
+}
+
+static bool trans_STR_zri(DisasContext *s, arg_rri *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int size = vec_full_reg_size(s);
+ int off = vec_full_reg_offset(s, a->rd);
+ gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
+ }
+ return true;
+}
+
+static bool trans_STR_pri(DisasContext *s, arg_rri *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int size = pred_full_reg_size(s);
+ int off = pred_full_reg_offset(s, a->rd);
+ gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
+ }
+ return true;
+}
+
+/*
+ *** SVE Memory - Contiguous Load Group
+ */
+
+/* The memory mode of the dtype. */
+static const MemOp dtype_mop[16] = {
+ MO_UB, MO_UB, MO_UB, MO_UB,
+ MO_SL, MO_UW, MO_UW, MO_UW,
+ MO_SW, MO_SW, MO_UL, MO_UL,
+ MO_SB, MO_SB, MO_SB, MO_UQ
+};
+
+#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
+
+/* The vector element size of dtype. */
+static const uint8_t dtype_esz[16] = {
+ 0, 1, 2, 3,
+ 3, 1, 2, 3,
+ 3, 2, 2, 3,
+ 3, 2, 1, 3
+};
+
+uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
+ uint32_t msz, bool is_write, uint32_t data)
+{
+ uint32_t sizem1;
+ uint32_t desc = 0;
+
+ /* Assert all of the data fits, with or without MTE enabled. */
+ assert(nregs >= 1 && nregs <= 4);
+ sizem1 = (nregs << msz) - 1;
+ assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT);
+ assert(data < 1u << SVE_MTEDESC_SHIFT);
+
+ if (s->mte_active[0]) {
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1);
+ desc <<= SVE_MTEDESC_SHIFT;
+ }
+ return simd_desc(vsz, vsz, desc | data);
+}
+
+static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
+ int dtype, uint32_t nregs, bool is_write,
+ gen_helper_gvec_mem *fn)
+{
+ TCGv_ptr t_pg;
+ uint32_t desc;
+
+ if (!s->mte_active[0]) {
+ addr = clean_data_tbi(s, addr);
+ }
+
+ /*
+ * For e.g. LD4, there are not enough arguments to pass all 4
+ * registers as pointers, so encode the regno into the data field.
+ * For consistency, do this even for LD1.
+ */
+ desc = make_svemte_desc(s, vec_full_reg_size(s), nregs,
+ dtype_msz(dtype), is_write, zt);
+ t_pg = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
+ fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
+}
+
+/* Indexed by [mte][be][dtype][nreg] */
+static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
+ { /* mte inactive, little-endian */
+ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+ gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+ { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
+ gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
+ { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
+ gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
+ { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
+ gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
+
+ /* mte inactive, big-endian */
+ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+ gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+ { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
+ gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
+ { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
+ gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
+ { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
+ gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
+
+ { /* mte active, little-endian */
+ { { gen_helper_sve_ld1bb_r_mte,
+ gen_helper_sve_ld2bb_r_mte,
+ gen_helper_sve_ld3bb_r_mte,
+ gen_helper_sve_ld4bb_r_mte },
+ { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_le_r_mte,
+ gen_helper_sve_ld2hh_le_r_mte,
+ gen_helper_sve_ld3hh_le_r_mte,
+ gen_helper_sve_ld4hh_le_r_mte },
+ { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_le_r_mte,
+ gen_helper_sve_ld2ss_le_r_mte,
+ gen_helper_sve_ld3ss_le_r_mte,
+ gen_helper_sve_ld4ss_le_r_mte },
+ { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_le_r_mte,
+ gen_helper_sve_ld2dd_le_r_mte,
+ gen_helper_sve_ld3dd_le_r_mte,
+ gen_helper_sve_ld4dd_le_r_mte } },
+
+ /* mte active, big-endian */
+ { { gen_helper_sve_ld1bb_r_mte,
+ gen_helper_sve_ld2bb_r_mte,
+ gen_helper_sve_ld3bb_r_mte,
+ gen_helper_sve_ld4bb_r_mte },
+ { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_be_r_mte,
+ gen_helper_sve_ld2hh_be_r_mte,
+ gen_helper_sve_ld3hh_be_r_mte,
+ gen_helper_sve_ld4hh_be_r_mte },
+ { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_be_r_mte,
+ gen_helper_sve_ld2ss_be_r_mte,
+ gen_helper_sve_ld3ss_be_r_mte,
+ gen_helper_sve_ld4ss_be_r_mte },
+ { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_be_r_mte,
+ gen_helper_sve_ld2dd_be_r_mte,
+ gen_helper_sve_ld3dd_be_r_mte,
+ gen_helper_sve_ld4dd_be_r_mte } } },
+};
+
+static void do_ld_zpa(DisasContext *s, int zt, int pg,
+ TCGv_i64 addr, int dtype, int nreg)
+{
+ gen_helper_gvec_mem *fn
+ = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
+
+ /*
+ * While there are holes in the table, they are not
+ * accessible via the instruction encoding.
+ */
+ assert(fn != NULL);
+ do_mem_zpa(s, zt, pg, addr, dtype, nreg + 1, false, fn);
+}
+
+static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
+{
+ if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 addr = tcg_temp_new_i64();
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+ do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
+ }
+ return true;
+}
+
+static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int vsz = vec_full_reg_size(s);
+ int elements = vsz >> dtype_esz[a->dtype];
+ TCGv_i64 addr = tcg_temp_new_i64();
+
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
+ (a->imm * elements * (a->nreg + 1))
+ << dtype_msz(a->dtype));
+ do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
+ }
+ return true;
+}
+
+static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
+{
+ static gen_helper_gvec_mem * const fns[2][2][16] = {
+ { /* mte inactive, little-endian */
+ { gen_helper_sve_ldff1bb_r,
+ gen_helper_sve_ldff1bhu_r,
+ gen_helper_sve_ldff1bsu_r,
+ gen_helper_sve_ldff1bdu_r,
+
+ gen_helper_sve_ldff1sds_le_r,
+ gen_helper_sve_ldff1hh_le_r,
+ gen_helper_sve_ldff1hsu_le_r,
+ gen_helper_sve_ldff1hdu_le_r,
+
+ gen_helper_sve_ldff1hds_le_r,
+ gen_helper_sve_ldff1hss_le_r,
+ gen_helper_sve_ldff1ss_le_r,
+ gen_helper_sve_ldff1sdu_le_r,
+
+ gen_helper_sve_ldff1bds_r,
+ gen_helper_sve_ldff1bss_r,
+ gen_helper_sve_ldff1bhs_r,
+ gen_helper_sve_ldff1dd_le_r },
+
+ /* mte inactive, big-endian */
+ { gen_helper_sve_ldff1bb_r,
+ gen_helper_sve_ldff1bhu_r,
+ gen_helper_sve_ldff1bsu_r,
+ gen_helper_sve_ldff1bdu_r,
+
+ gen_helper_sve_ldff1sds_be_r,
+ gen_helper_sve_ldff1hh_be_r,
+ gen_helper_sve_ldff1hsu_be_r,
+ gen_helper_sve_ldff1hdu_be_r,
+
+ gen_helper_sve_ldff1hds_be_r,
+ gen_helper_sve_ldff1hss_be_r,
+ gen_helper_sve_ldff1ss_be_r,
+ gen_helper_sve_ldff1sdu_be_r,
+
+ gen_helper_sve_ldff1bds_r,
+ gen_helper_sve_ldff1bss_r,
+ gen_helper_sve_ldff1bhs_r,
+ gen_helper_sve_ldff1dd_be_r } },
+
+ { /* mte active, little-endian */
+ { gen_helper_sve_ldff1bb_r_mte,
+ gen_helper_sve_ldff1bhu_r_mte,
+ gen_helper_sve_ldff1bsu_r_mte,
+ gen_helper_sve_ldff1bdu_r_mte,
+
+ gen_helper_sve_ldff1sds_le_r_mte,
+ gen_helper_sve_ldff1hh_le_r_mte,
+ gen_helper_sve_ldff1hsu_le_r_mte,
+ gen_helper_sve_ldff1hdu_le_r_mte,
+
+ gen_helper_sve_ldff1hds_le_r_mte,
+ gen_helper_sve_ldff1hss_le_r_mte,
+ gen_helper_sve_ldff1ss_le_r_mte,
+ gen_helper_sve_ldff1sdu_le_r_mte,
+
+ gen_helper_sve_ldff1bds_r_mte,
+ gen_helper_sve_ldff1bss_r_mte,
+ gen_helper_sve_ldff1bhs_r_mte,
+ gen_helper_sve_ldff1dd_le_r_mte },
+
+ /* mte active, big-endian */
+ { gen_helper_sve_ldff1bb_r_mte,
+ gen_helper_sve_ldff1bhu_r_mte,
+ gen_helper_sve_ldff1bsu_r_mte,
+ gen_helper_sve_ldff1bdu_r_mte,
+
+ gen_helper_sve_ldff1sds_be_r_mte,
+ gen_helper_sve_ldff1hh_be_r_mte,
+ gen_helper_sve_ldff1hsu_be_r_mte,
+ gen_helper_sve_ldff1hdu_be_r_mte,
+
+ gen_helper_sve_ldff1hds_be_r_mte,
+ gen_helper_sve_ldff1hss_be_r_mte,
+ gen_helper_sve_ldff1ss_be_r_mte,
+ gen_helper_sve_ldff1sdu_be_r_mte,
+
+ gen_helper_sve_ldff1bds_r_mte,
+ gen_helper_sve_ldff1bss_r_mte,
+ gen_helper_sve_ldff1bhs_r_mte,
+ gen_helper_sve_ldff1dd_be_r_mte } },
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (sve_access_check(s)) {
+ TCGv_i64 addr = tcg_temp_new_i64();
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+ do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
+ fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
+ }
+ return true;
+}
+
+static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
+{
+ static gen_helper_gvec_mem * const fns[2][2][16] = {
+ { /* mte inactive, little-endian */
+ { gen_helper_sve_ldnf1bb_r,
+ gen_helper_sve_ldnf1bhu_r,
+ gen_helper_sve_ldnf1bsu_r,
+ gen_helper_sve_ldnf1bdu_r,
+
+ gen_helper_sve_ldnf1sds_le_r,
+ gen_helper_sve_ldnf1hh_le_r,
+ gen_helper_sve_ldnf1hsu_le_r,
+ gen_helper_sve_ldnf1hdu_le_r,
+
+ gen_helper_sve_ldnf1hds_le_r,
+ gen_helper_sve_ldnf1hss_le_r,
+ gen_helper_sve_ldnf1ss_le_r,
+ gen_helper_sve_ldnf1sdu_le_r,
+
+ gen_helper_sve_ldnf1bds_r,
+ gen_helper_sve_ldnf1bss_r,
+ gen_helper_sve_ldnf1bhs_r,
+ gen_helper_sve_ldnf1dd_le_r },
+
+ /* mte inactive, big-endian */
+ { gen_helper_sve_ldnf1bb_r,
+ gen_helper_sve_ldnf1bhu_r,
+ gen_helper_sve_ldnf1bsu_r,
+ gen_helper_sve_ldnf1bdu_r,
+
+ gen_helper_sve_ldnf1sds_be_r,
+ gen_helper_sve_ldnf1hh_be_r,
+ gen_helper_sve_ldnf1hsu_be_r,
+ gen_helper_sve_ldnf1hdu_be_r,
+
+ gen_helper_sve_ldnf1hds_be_r,
+ gen_helper_sve_ldnf1hss_be_r,
+ gen_helper_sve_ldnf1ss_be_r,
+ gen_helper_sve_ldnf1sdu_be_r,
+
+ gen_helper_sve_ldnf1bds_r,
+ gen_helper_sve_ldnf1bss_r,
+ gen_helper_sve_ldnf1bhs_r,
+ gen_helper_sve_ldnf1dd_be_r } },
+
+ { /* mte inactive, little-endian */
+ { gen_helper_sve_ldnf1bb_r_mte,
+ gen_helper_sve_ldnf1bhu_r_mte,
+ gen_helper_sve_ldnf1bsu_r_mte,
+ gen_helper_sve_ldnf1bdu_r_mte,
+
+ gen_helper_sve_ldnf1sds_le_r_mte,
+ gen_helper_sve_ldnf1hh_le_r_mte,
+ gen_helper_sve_ldnf1hsu_le_r_mte,
+ gen_helper_sve_ldnf1hdu_le_r_mte,
+
+ gen_helper_sve_ldnf1hds_le_r_mte,
+ gen_helper_sve_ldnf1hss_le_r_mte,
+ gen_helper_sve_ldnf1ss_le_r_mte,
+ gen_helper_sve_ldnf1sdu_le_r_mte,
+
+ gen_helper_sve_ldnf1bds_r_mte,
+ gen_helper_sve_ldnf1bss_r_mte,
+ gen_helper_sve_ldnf1bhs_r_mte,
+ gen_helper_sve_ldnf1dd_le_r_mte },
+
+ /* mte inactive, big-endian */
+ { gen_helper_sve_ldnf1bb_r_mte,
+ gen_helper_sve_ldnf1bhu_r_mte,
+ gen_helper_sve_ldnf1bsu_r_mte,
+ gen_helper_sve_ldnf1bdu_r_mte,
+
+ gen_helper_sve_ldnf1sds_be_r_mte,
+ gen_helper_sve_ldnf1hh_be_r_mte,
+ gen_helper_sve_ldnf1hsu_be_r_mte,
+ gen_helper_sve_ldnf1hdu_be_r_mte,
+
+ gen_helper_sve_ldnf1hds_be_r_mte,
+ gen_helper_sve_ldnf1hss_be_r_mte,
+ gen_helper_sve_ldnf1ss_be_r_mte,
+ gen_helper_sve_ldnf1sdu_be_r_mte,
+
+ gen_helper_sve_ldnf1bds_r_mte,
+ gen_helper_sve_ldnf1bss_r_mte,
+ gen_helper_sve_ldnf1bhs_r_mte,
+ gen_helper_sve_ldnf1dd_be_r_mte } },
+ };
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (sve_access_check(s)) {
+ int vsz = vec_full_reg_size(s);
+ int elements = vsz >> dtype_esz[a->dtype];
+ int off = (a->imm * elements) << dtype_msz(a->dtype);
+ TCGv_i64 addr = tcg_temp_new_i64();
+
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
+ do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
+ fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
+ }
+ return true;
+}
+
+static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr t_pg;
+ int poff;
+ uint32_t desc;
+
+ /* Load the first quadword using the normal predicated load helpers. */
+ if (!s->mte_active[0]) {
+ addr = clean_data_tbi(s, addr);
+ }
+
+ poff = pred_full_reg_offset(s, pg);
+ if (vsz > 16) {
+ /*
+ * Zero-extend the first 16 bits of the predicate into a temporary.
+ * This avoids triggering an assert making sure we don't have bits
+ * set within a predicate beyond VQ, but we have lowered VQ to 1
+ * for this load operation.
+ */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+#if HOST_BIG_ENDIAN
+ poff += 6;
+#endif
+ tcg_gen_ld16u_i64(tmp, tcg_env, poff);
+
+ poff = offsetof(CPUARMState, vfp.preg_tmp);
+ tcg_gen_st_i64(tmp, tcg_env, poff);
+ }
+
+ t_pg = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_pg, tcg_env, poff);
+
+ gen_helper_gvec_mem *fn
+ = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
+ desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt);
+ fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
+
+ /* Replicate that first quadword. */
+ if (vsz > 16) {
+ int doff = vec_full_reg_offset(s, zt);
+ tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
+ }
+}
+
+static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
+{
+ if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int msz = dtype_msz(a->dtype);
+ TCGv_i64 addr = tcg_temp_new_i64();
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+ do_ldrq(s, a->rd, a->pg, addr, a->dtype);
+ }
+ return true;
+}
+
+static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
+ do_ldrq(s, a->rd, a->pg, addr, a->dtype);
+ }
+ return true;
+}
+
+static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned vsz_r32;
+ TCGv_ptr t_pg;
+ int poff, doff;
+ uint32_t desc;
+
+ if (vsz < 32) {
+ /*
+ * Note that this UNDEFINED check comes after CheckSVEEnabled()
+ * in the ARM pseudocode, which is the sve_access_check() done
+ * in our caller. We should not now return false from the caller.
+ */
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* Load the first octaword using the normal predicated load helpers. */
+ if (!s->mte_active[0]) {
+ addr = clean_data_tbi(s, addr);
+ }
+
+ poff = pred_full_reg_offset(s, pg);
+ if (vsz > 32) {
+ /*
+ * Zero-extend the first 32 bits of the predicate into a temporary.
+ * This avoids triggering an assert making sure we don't have bits
+ * set within a predicate beyond VQ, but we have lowered VQ to 2
+ * for this load operation.
+ */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+#if HOST_BIG_ENDIAN
+ poff += 4;
+#endif
+ tcg_gen_ld32u_i64(tmp, tcg_env, poff);
+
+ poff = offsetof(CPUARMState, vfp.preg_tmp);
+ tcg_gen_st_i64(tmp, tcg_env, poff);
+ }
+
+ t_pg = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_pg, tcg_env, poff);
+
+ gen_helper_gvec_mem *fn
+ = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
+ desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt);
+ fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
+
+ /*
+ * Replicate that first octaword.
+ * The replication happens in units of 32; if the full vector size
+ * is not a multiple of 32, the final bits are zeroed.
+ */
+ doff = vec_full_reg_offset(s, zt);
+ vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
+ if (vsz >= 64) {
+ tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
+ }
+ vsz -= vsz_r32;
+ if (vsz) {
+ tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
+ }
+}
+
+static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
+{
+ if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+ return false;
+ }
+ if (a->rm == 31) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (sve_access_check(s)) {
+ TCGv_i64 addr = tcg_temp_new_i64();
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+ do_ldro(s, a->rd, a->pg, addr, a->dtype);
+ }
+ return true;
+}
+
+static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
+{
+ if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (sve_access_check(s)) {
+ TCGv_i64 addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
+ do_ldro(s, a->rd, a->pg, addr, a->dtype);
+ }
+ return true;
+}
+
+/* Load and broadcast element. */
+static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ unsigned psz = pred_full_reg_size(s);
+ unsigned esz = dtype_esz[a->dtype];
+ unsigned msz = dtype_msz(a->dtype);
+ TCGLabel *over;
+ TCGv_i64 temp, clean_addr;
+ MemOp memop;
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ over = gen_new_label();
+
+ /* If the guarding predicate has no bits set, no load occurs. */
+ if (psz <= 8) {
+ /* Reduce the pred_esz_masks value simply to reduce the
+ * size of the code generated here.
+ */
+ uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
+ temp = tcg_temp_new_i64();
+ tcg_gen_ld_i64(temp, tcg_env, pred_full_reg_offset(s, a->pg));
+ tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
+ tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
+ } else {
+ TCGv_i32 t32 = tcg_temp_new_i32();
+ find_last_active(s, t32, esz, a->pg);
+ tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
+ }
+
+ /* Load the data. */
+ temp = tcg_temp_new_i64();
+ tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
+
+ memop = finalize_memop(s, dtype_mop[a->dtype]);
+ clean_addr = gen_mte_check1(s, temp, false, true, memop);
+ tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop);
+
+ /* Broadcast to *all* elements. */
+ tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
+ vsz, vsz, temp);
+
+ /* Zero the inactive elements. */
+ gen_set_label(over);
+ return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
+}
+
+static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
+ int msz, int esz, int nreg)
+{
+ static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
+ { { { gen_helper_sve_st1bb_r,
+ gen_helper_sve_st1bh_r,
+ gen_helper_sve_st1bs_r,
+ gen_helper_sve_st1bd_r },
+ { NULL,
+ gen_helper_sve_st1hh_le_r,
+ gen_helper_sve_st1hs_le_r,
+ gen_helper_sve_st1hd_le_r },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_le_r,
+ gen_helper_sve_st1sd_le_r },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_le_r } },
+ { { gen_helper_sve_st1bb_r,
+ gen_helper_sve_st1bh_r,
+ gen_helper_sve_st1bs_r,
+ gen_helper_sve_st1bd_r },
+ { NULL,
+ gen_helper_sve_st1hh_be_r,
+ gen_helper_sve_st1hs_be_r,
+ gen_helper_sve_st1hd_be_r },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_be_r,
+ gen_helper_sve_st1sd_be_r },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_be_r } } },
+
+ { { { gen_helper_sve_st1bb_r_mte,
+ gen_helper_sve_st1bh_r_mte,
+ gen_helper_sve_st1bs_r_mte,
+ gen_helper_sve_st1bd_r_mte },
+ { NULL,
+ gen_helper_sve_st1hh_le_r_mte,
+ gen_helper_sve_st1hs_le_r_mte,
+ gen_helper_sve_st1hd_le_r_mte },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_le_r_mte,
+ gen_helper_sve_st1sd_le_r_mte },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_le_r_mte } },
+ { { gen_helper_sve_st1bb_r_mte,
+ gen_helper_sve_st1bh_r_mte,
+ gen_helper_sve_st1bs_r_mte,
+ gen_helper_sve_st1bd_r_mte },
+ { NULL,
+ gen_helper_sve_st1hh_be_r_mte,
+ gen_helper_sve_st1hs_be_r_mte,
+ gen_helper_sve_st1hd_be_r_mte },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_be_r_mte,
+ gen_helper_sve_st1sd_be_r_mte },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_be_r_mte } } },
+ };
+ static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
+ { { { gen_helper_sve_st2bb_r,
+ gen_helper_sve_st2hh_le_r,
+ gen_helper_sve_st2ss_le_r,
+ gen_helper_sve_st2dd_le_r },
+ { gen_helper_sve_st3bb_r,
+ gen_helper_sve_st3hh_le_r,
+ gen_helper_sve_st3ss_le_r,
+ gen_helper_sve_st3dd_le_r },
+ { gen_helper_sve_st4bb_r,
+ gen_helper_sve_st4hh_le_r,
+ gen_helper_sve_st4ss_le_r,
+ gen_helper_sve_st4dd_le_r } },
+ { { gen_helper_sve_st2bb_r,
+ gen_helper_sve_st2hh_be_r,
+ gen_helper_sve_st2ss_be_r,
+ gen_helper_sve_st2dd_be_r },
+ { gen_helper_sve_st3bb_r,
+ gen_helper_sve_st3hh_be_r,
+ gen_helper_sve_st3ss_be_r,
+ gen_helper_sve_st3dd_be_r },
+ { gen_helper_sve_st4bb_r,
+ gen_helper_sve_st4hh_be_r,
+ gen_helper_sve_st4ss_be_r,
+ gen_helper_sve_st4dd_be_r } } },
+ { { { gen_helper_sve_st2bb_r_mte,
+ gen_helper_sve_st2hh_le_r_mte,
+ gen_helper_sve_st2ss_le_r_mte,
+ gen_helper_sve_st2dd_le_r_mte },
+ { gen_helper_sve_st3bb_r_mte,
+ gen_helper_sve_st3hh_le_r_mte,
+ gen_helper_sve_st3ss_le_r_mte,
+ gen_helper_sve_st3dd_le_r_mte },
+ { gen_helper_sve_st4bb_r_mte,
+ gen_helper_sve_st4hh_le_r_mte,
+ gen_helper_sve_st4ss_le_r_mte,
+ gen_helper_sve_st4dd_le_r_mte } },
+ { { gen_helper_sve_st2bb_r_mte,
+ gen_helper_sve_st2hh_be_r_mte,
+ gen_helper_sve_st2ss_be_r_mte,
+ gen_helper_sve_st2dd_be_r_mte },
+ { gen_helper_sve_st3bb_r_mte,
+ gen_helper_sve_st3hh_be_r_mte,
+ gen_helper_sve_st3ss_be_r_mte,
+ gen_helper_sve_st3dd_be_r_mte },
+ { gen_helper_sve_st4bb_r_mte,
+ gen_helper_sve_st4hh_be_r_mte,
+ gen_helper_sve_st4ss_be_r_mte,
+ gen_helper_sve_st4dd_be_r_mte } } },
+ };
+ gen_helper_gvec_mem *fn;
+ int be = s->be_data == MO_BE;
+
+ if (nreg == 0) {
+ /* ST1 */
+ fn = fn_single[s->mte_active[0]][be][msz][esz];
+ } else {
+ /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
+ assert(msz == esz);
+ fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
+ }
+ assert(fn != NULL);
+ do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg + 1, true, fn);
+}
+
+static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (a->rm == 31 || a->msz > a->esz) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ TCGv_i64 addr = tcg_temp_new_i64();
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+ do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
+ }
+ return true;
+}
+
+static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ if (a->msz > a->esz) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ int vsz = vec_full_reg_size(s);
+ int elements = vsz >> a->esz;
+ TCGv_i64 addr = tcg_temp_new_i64();
+
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
+ (a->imm * elements * (a->nreg + 1)) << a->msz);
+ do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
+ }
+ return true;
+}
+
+/*
+ *** SVE gather loads / scatter stores
+ */
+
+static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
+ int scale, TCGv_i64 scalar, int msz, bool is_write,
+ gen_helper_gvec_mem_scatter *fn)
+{
+ TCGv_ptr t_zm = tcg_temp_new_ptr();
+ TCGv_ptr t_pg = tcg_temp_new_ptr();
+ TCGv_ptr t_zt = tcg_temp_new_ptr();
+ uint32_t desc;
+
+ tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
+ tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm));
+ tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt));
+
+ desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale);
+ fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
+}
+
+/* Indexed by [mte][be][ff][xs][u][msz]. */
+static gen_helper_gvec_mem_scatter * const
+gather_load_fn32[2][2][2][2][2][3] = {
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { { { gen_helper_sve_ldbss_zsu,
+ gen_helper_sve_ldhss_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu,
+ gen_helper_sve_ldhsu_le_zsu,
+ gen_helper_sve_ldss_le_zsu, } },
+ { { gen_helper_sve_ldbss_zss,
+ gen_helper_sve_ldhss_le_zss,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss,
+ gen_helper_sve_ldhsu_le_zss,
+ gen_helper_sve_ldss_le_zss, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu,
+ gen_helper_sve_ldffhss_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu,
+ gen_helper_sve_ldffhsu_le_zsu,
+ gen_helper_sve_ldffss_le_zsu, } },
+ { { gen_helper_sve_ldffbss_zss,
+ gen_helper_sve_ldffhss_le_zss,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss,
+ gen_helper_sve_ldffhsu_le_zss,
+ gen_helper_sve_ldffss_le_zss, } } } },
+
+ { /* Big-endian */
+ { { { gen_helper_sve_ldbss_zsu,
+ gen_helper_sve_ldhss_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu,
+ gen_helper_sve_ldhsu_be_zsu,
+ gen_helper_sve_ldss_be_zsu, } },
+ { { gen_helper_sve_ldbss_zss,
+ gen_helper_sve_ldhss_be_zss,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss,
+ gen_helper_sve_ldhsu_be_zss,
+ gen_helper_sve_ldss_be_zss, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu,
+ gen_helper_sve_ldffhss_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu,
+ gen_helper_sve_ldffhsu_be_zsu,
+ gen_helper_sve_ldffss_be_zsu, } },
+ { { gen_helper_sve_ldffbss_zss,
+ gen_helper_sve_ldffhss_be_zss,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss,
+ gen_helper_sve_ldffhsu_be_zss,
+ gen_helper_sve_ldffss_be_zss, } } } } },
+ { /* MTE Active */
+ { /* Little-endian */
+ { { { gen_helper_sve_ldbss_zsu_mte,
+ gen_helper_sve_ldhss_le_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu_mte,
+ gen_helper_sve_ldhsu_le_zsu_mte,
+ gen_helper_sve_ldss_le_zsu_mte, } },
+ { { gen_helper_sve_ldbss_zss_mte,
+ gen_helper_sve_ldhss_le_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss_mte,
+ gen_helper_sve_ldhsu_le_zss_mte,
+ gen_helper_sve_ldss_le_zss_mte, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu_mte,
+ gen_helper_sve_ldffhss_le_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu_mte,
+ gen_helper_sve_ldffhsu_le_zsu_mte,
+ gen_helper_sve_ldffss_le_zsu_mte, } },
+ { { gen_helper_sve_ldffbss_zss_mte,
+ gen_helper_sve_ldffhss_le_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss_mte,
+ gen_helper_sve_ldffhsu_le_zss_mte,
+ gen_helper_sve_ldffss_le_zss_mte, } } } },
+
+ { /* Big-endian */
+ { { { gen_helper_sve_ldbss_zsu_mte,
+ gen_helper_sve_ldhss_be_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu_mte,
+ gen_helper_sve_ldhsu_be_zsu_mte,
+ gen_helper_sve_ldss_be_zsu_mte, } },
+ { { gen_helper_sve_ldbss_zss_mte,
+ gen_helper_sve_ldhss_be_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss_mte,
+ gen_helper_sve_ldhsu_be_zss_mte,
+ gen_helper_sve_ldss_be_zss_mte, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu_mte,
+ gen_helper_sve_ldffhss_be_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu_mte,
+ gen_helper_sve_ldffhsu_be_zsu_mte,
+ gen_helper_sve_ldffss_be_zsu_mte, } },
+ { { gen_helper_sve_ldffbss_zss_mte,
+ gen_helper_sve_ldffhss_be_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss_mte,
+ gen_helper_sve_ldffhsu_be_zss_mte,
+ gen_helper_sve_ldffss_be_zss_mte, } } } } },
+};
+
+/* Note that we overload xs=2 to indicate 64-bit offset. */
+static gen_helper_gvec_mem_scatter * const
+gather_load_fn64[2][2][2][3][2][4] = {
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { { { gen_helper_sve_ldbds_zsu,
+ gen_helper_sve_ldhds_le_zsu,
+ gen_helper_sve_ldsds_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu,
+ gen_helper_sve_ldhdu_le_zsu,
+ gen_helper_sve_ldsdu_le_zsu,
+ gen_helper_sve_lddd_le_zsu, } },
+ { { gen_helper_sve_ldbds_zss,
+ gen_helper_sve_ldhds_le_zss,
+ gen_helper_sve_ldsds_le_zss,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss,
+ gen_helper_sve_ldhdu_le_zss,
+ gen_helper_sve_ldsdu_le_zss,
+ gen_helper_sve_lddd_le_zss, } },
+ { { gen_helper_sve_ldbds_zd,
+ gen_helper_sve_ldhds_le_zd,
+ gen_helper_sve_ldsds_le_zd,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd,
+ gen_helper_sve_ldhdu_le_zd,
+ gen_helper_sve_ldsdu_le_zd,
+ gen_helper_sve_lddd_le_zd, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu,
+ gen_helper_sve_ldffhds_le_zsu,
+ gen_helper_sve_ldffsds_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu,
+ gen_helper_sve_ldffhdu_le_zsu,
+ gen_helper_sve_ldffsdu_le_zsu,
+ gen_helper_sve_ldffdd_le_zsu, } },
+ { { gen_helper_sve_ldffbds_zss,
+ gen_helper_sve_ldffhds_le_zss,
+ gen_helper_sve_ldffsds_le_zss,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss,
+ gen_helper_sve_ldffhdu_le_zss,
+ gen_helper_sve_ldffsdu_le_zss,
+ gen_helper_sve_ldffdd_le_zss, } },
+ { { gen_helper_sve_ldffbds_zd,
+ gen_helper_sve_ldffhds_le_zd,
+ gen_helper_sve_ldffsds_le_zd,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd,
+ gen_helper_sve_ldffhdu_le_zd,
+ gen_helper_sve_ldffsdu_le_zd,
+ gen_helper_sve_ldffdd_le_zd, } } } },
+ { /* Big-endian */
+ { { { gen_helper_sve_ldbds_zsu,
+ gen_helper_sve_ldhds_be_zsu,
+ gen_helper_sve_ldsds_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu,
+ gen_helper_sve_ldhdu_be_zsu,
+ gen_helper_sve_ldsdu_be_zsu,
+ gen_helper_sve_lddd_be_zsu, } },
+ { { gen_helper_sve_ldbds_zss,
+ gen_helper_sve_ldhds_be_zss,
+ gen_helper_sve_ldsds_be_zss,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss,
+ gen_helper_sve_ldhdu_be_zss,
+ gen_helper_sve_ldsdu_be_zss,
+ gen_helper_sve_lddd_be_zss, } },
+ { { gen_helper_sve_ldbds_zd,
+ gen_helper_sve_ldhds_be_zd,
+ gen_helper_sve_ldsds_be_zd,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd,
+ gen_helper_sve_ldhdu_be_zd,
+ gen_helper_sve_ldsdu_be_zd,
+ gen_helper_sve_lddd_be_zd, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu,
+ gen_helper_sve_ldffhds_be_zsu,
+ gen_helper_sve_ldffsds_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu,
+ gen_helper_sve_ldffhdu_be_zsu,
+ gen_helper_sve_ldffsdu_be_zsu,
+ gen_helper_sve_ldffdd_be_zsu, } },
+ { { gen_helper_sve_ldffbds_zss,
+ gen_helper_sve_ldffhds_be_zss,
+ gen_helper_sve_ldffsds_be_zss,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss,
+ gen_helper_sve_ldffhdu_be_zss,
+ gen_helper_sve_ldffsdu_be_zss,
+ gen_helper_sve_ldffdd_be_zss, } },
+ { { gen_helper_sve_ldffbds_zd,
+ gen_helper_sve_ldffhds_be_zd,
+ gen_helper_sve_ldffsds_be_zd,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd,
+ gen_helper_sve_ldffhdu_be_zd,
+ gen_helper_sve_ldffsdu_be_zd,
+ gen_helper_sve_ldffdd_be_zd, } } } } },
+ { /* MTE Active */
+ { /* Little-endian */
+ { { { gen_helper_sve_ldbds_zsu_mte,
+ gen_helper_sve_ldhds_le_zsu_mte,
+ gen_helper_sve_ldsds_le_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu_mte,
+ gen_helper_sve_ldhdu_le_zsu_mte,
+ gen_helper_sve_ldsdu_le_zsu_mte,
+ gen_helper_sve_lddd_le_zsu_mte, } },
+ { { gen_helper_sve_ldbds_zss_mte,
+ gen_helper_sve_ldhds_le_zss_mte,
+ gen_helper_sve_ldsds_le_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss_mte,
+ gen_helper_sve_ldhdu_le_zss_mte,
+ gen_helper_sve_ldsdu_le_zss_mte,
+ gen_helper_sve_lddd_le_zss_mte, } },
+ { { gen_helper_sve_ldbds_zd_mte,
+ gen_helper_sve_ldhds_le_zd_mte,
+ gen_helper_sve_ldsds_le_zd_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd_mte,
+ gen_helper_sve_ldhdu_le_zd_mte,
+ gen_helper_sve_ldsdu_le_zd_mte,
+ gen_helper_sve_lddd_le_zd_mte, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu_mte,
+ gen_helper_sve_ldffhds_le_zsu_mte,
+ gen_helper_sve_ldffsds_le_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu_mte,
+ gen_helper_sve_ldffhdu_le_zsu_mte,
+ gen_helper_sve_ldffsdu_le_zsu_mte,
+ gen_helper_sve_ldffdd_le_zsu_mte, } },
+ { { gen_helper_sve_ldffbds_zss_mte,
+ gen_helper_sve_ldffhds_le_zss_mte,
+ gen_helper_sve_ldffsds_le_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss_mte,
+ gen_helper_sve_ldffhdu_le_zss_mte,
+ gen_helper_sve_ldffsdu_le_zss_mte,
+ gen_helper_sve_ldffdd_le_zss_mte, } },
+ { { gen_helper_sve_ldffbds_zd_mte,
+ gen_helper_sve_ldffhds_le_zd_mte,
+ gen_helper_sve_ldffsds_le_zd_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd_mte,
+ gen_helper_sve_ldffhdu_le_zd_mte,
+ gen_helper_sve_ldffsdu_le_zd_mte,
+ gen_helper_sve_ldffdd_le_zd_mte, } } } },
+ { /* Big-endian */
+ { { { gen_helper_sve_ldbds_zsu_mte,
+ gen_helper_sve_ldhds_be_zsu_mte,
+ gen_helper_sve_ldsds_be_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu_mte,
+ gen_helper_sve_ldhdu_be_zsu_mte,
+ gen_helper_sve_ldsdu_be_zsu_mte,
+ gen_helper_sve_lddd_be_zsu_mte, } },
+ { { gen_helper_sve_ldbds_zss_mte,
+ gen_helper_sve_ldhds_be_zss_mte,
+ gen_helper_sve_ldsds_be_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss_mte,
+ gen_helper_sve_ldhdu_be_zss_mte,
+ gen_helper_sve_ldsdu_be_zss_mte,
+ gen_helper_sve_lddd_be_zss_mte, } },
+ { { gen_helper_sve_ldbds_zd_mte,
+ gen_helper_sve_ldhds_be_zd_mte,
+ gen_helper_sve_ldsds_be_zd_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd_mte,
+ gen_helper_sve_ldhdu_be_zd_mte,
+ gen_helper_sve_ldsdu_be_zd_mte,
+ gen_helper_sve_lddd_be_zd_mte, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu_mte,
+ gen_helper_sve_ldffhds_be_zsu_mte,
+ gen_helper_sve_ldffsds_be_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu_mte,
+ gen_helper_sve_ldffhdu_be_zsu_mte,
+ gen_helper_sve_ldffsdu_be_zsu_mte,
+ gen_helper_sve_ldffdd_be_zsu_mte, } },
+ { { gen_helper_sve_ldffbds_zss_mte,
+ gen_helper_sve_ldffhds_be_zss_mte,
+ gen_helper_sve_ldffsds_be_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss_mte,
+ gen_helper_sve_ldffhdu_be_zss_mte,
+ gen_helper_sve_ldffsdu_be_zss_mte,
+ gen_helper_sve_ldffdd_be_zss_mte, } },
+ { { gen_helper_sve_ldffbds_zd_mte,
+ gen_helper_sve_ldffhds_be_zd_mte,
+ gen_helper_sve_ldffsds_be_zd_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd_mte,
+ gen_helper_sve_ldffhdu_be_zd_mte,
+ gen_helper_sve_ldffsdu_be_zd_mte,
+ gen_helper_sve_ldffdd_be_zd_mte, } } } } },
+};
+
+static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
+{
+ gen_helper_gvec_mem_scatter *fn = NULL;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ switch (a->esz) {
+ case MO_32:
+ fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
+ break;
+ case MO_64:
+ fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
+ break;
+ }
+ assert(fn != NULL);
+
+ do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
+ cpu_reg_sp(s, a->rn), a->msz, false, fn);
+ return true;
+}
+
+static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
+{
+ gen_helper_gvec_mem_scatter *fn = NULL;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
+ return false;
+ }
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ switch (a->esz) {
+ case MO_32:
+ fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
+ break;
+ case MO_64:
+ fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
+ break;
+ }
+ assert(fn != NULL);
+
+ /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
+ * by loading the immediate into the scalar parameter.
+ */
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+ tcg_constant_i64(a->imm << a->msz), a->msz, false, fn);
+ return true;
+}
+
+static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
+{
+ gen_helper_gvec_mem_scatter *fn = NULL;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (a->esz < a->msz + !a->u) {
+ return false;
+ }
+ if (!dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ switch (a->esz) {
+ case MO_32:
+ fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
+ break;
+ case MO_64:
+ fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
+ break;
+ }
+ assert(fn != NULL);
+
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+ cpu_reg(s, a->rm), a->msz, false, fn);
+ return true;
+}
+
+/* Indexed by [mte][be][xs][msz]. */
+static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { gen_helper_sve_stbs_zsu,
+ gen_helper_sve_sths_le_zsu,
+ gen_helper_sve_stss_le_zsu, },
+ { gen_helper_sve_stbs_zss,
+ gen_helper_sve_sths_le_zss,
+ gen_helper_sve_stss_le_zss, } },
+ { /* Big-endian */
+ { gen_helper_sve_stbs_zsu,
+ gen_helper_sve_sths_be_zsu,
+ gen_helper_sve_stss_be_zsu, },
+ { gen_helper_sve_stbs_zss,
+ gen_helper_sve_sths_be_zss,
+ gen_helper_sve_stss_be_zss, } } },
+ { /* MTE Active */
+ { /* Little-endian */
+ { gen_helper_sve_stbs_zsu_mte,
+ gen_helper_sve_sths_le_zsu_mte,
+ gen_helper_sve_stss_le_zsu_mte, },
+ { gen_helper_sve_stbs_zss_mte,
+ gen_helper_sve_sths_le_zss_mte,
+ gen_helper_sve_stss_le_zss_mte, } },
+ { /* Big-endian */
+ { gen_helper_sve_stbs_zsu_mte,
+ gen_helper_sve_sths_be_zsu_mte,
+ gen_helper_sve_stss_be_zsu_mte, },
+ { gen_helper_sve_stbs_zss_mte,
+ gen_helper_sve_sths_be_zss_mte,
+ gen_helper_sve_stss_be_zss_mte, } } },
+};
+
+/* Note that we overload xs=2 to indicate 64-bit offset. */
+static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { gen_helper_sve_stbd_zsu,
+ gen_helper_sve_sthd_le_zsu,
+ gen_helper_sve_stsd_le_zsu,
+ gen_helper_sve_stdd_le_zsu, },
+ { gen_helper_sve_stbd_zss,
+ gen_helper_sve_sthd_le_zss,
+ gen_helper_sve_stsd_le_zss,
+ gen_helper_sve_stdd_le_zss, },
+ { gen_helper_sve_stbd_zd,
+ gen_helper_sve_sthd_le_zd,
+ gen_helper_sve_stsd_le_zd,
+ gen_helper_sve_stdd_le_zd, } },
+ { /* Big-endian */
+ { gen_helper_sve_stbd_zsu,
+ gen_helper_sve_sthd_be_zsu,
+ gen_helper_sve_stsd_be_zsu,
+ gen_helper_sve_stdd_be_zsu, },
+ { gen_helper_sve_stbd_zss,
+ gen_helper_sve_sthd_be_zss,
+ gen_helper_sve_stsd_be_zss,
+ gen_helper_sve_stdd_be_zss, },
+ { gen_helper_sve_stbd_zd,
+ gen_helper_sve_sthd_be_zd,
+ gen_helper_sve_stsd_be_zd,
+ gen_helper_sve_stdd_be_zd, } } },
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { gen_helper_sve_stbd_zsu_mte,
+ gen_helper_sve_sthd_le_zsu_mte,
+ gen_helper_sve_stsd_le_zsu_mte,
+ gen_helper_sve_stdd_le_zsu_mte, },
+ { gen_helper_sve_stbd_zss_mte,
+ gen_helper_sve_sthd_le_zss_mte,
+ gen_helper_sve_stsd_le_zss_mte,
+ gen_helper_sve_stdd_le_zss_mte, },
+ { gen_helper_sve_stbd_zd_mte,
+ gen_helper_sve_sthd_le_zd_mte,
+ gen_helper_sve_stsd_le_zd_mte,
+ gen_helper_sve_stdd_le_zd_mte, } },
+ { /* Big-endian */
+ { gen_helper_sve_stbd_zsu_mte,
+ gen_helper_sve_sthd_be_zsu_mte,
+ gen_helper_sve_stsd_be_zsu_mte,
+ gen_helper_sve_stdd_be_zsu_mte, },
+ { gen_helper_sve_stbd_zss_mte,
+ gen_helper_sve_sthd_be_zss_mte,
+ gen_helper_sve_stsd_be_zss_mte,
+ gen_helper_sve_stdd_be_zss_mte, },
+ { gen_helper_sve_stbd_zd_mte,
+ gen_helper_sve_sthd_be_zd_mte,
+ gen_helper_sve_stsd_be_zd_mte,
+ gen_helper_sve_stdd_be_zd_mte, } } },
+};
+
+static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
+{
+ gen_helper_gvec_mem_scatter *fn;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
+ return false;
+ }
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+ switch (a->esz) {
+ case MO_32:
+ fn = scatter_store_fn32[mte][be][a->xs][a->msz];
+ break;
+ case MO_64:
+ fn = scatter_store_fn64[mte][be][a->xs][a->msz];
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
+ cpu_reg_sp(s, a->rn), a->msz, true, fn);
+ return true;
+}
+
+static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
+{
+ gen_helper_gvec_mem_scatter *fn = NULL;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (a->esz < a->msz) {
+ return false;
+ }
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ switch (a->esz) {
+ case MO_32:
+ fn = scatter_store_fn32[mte][be][0][a->msz];
+ break;
+ case MO_64:
+ fn = scatter_store_fn64[mte][be][2][a->msz];
+ break;
+ }
+ assert(fn != NULL);
+
+ /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
+ * by loading the immediate into the scalar parameter.
+ */
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+ tcg_constant_i64(a->imm << a->msz), a->msz, true, fn);
+ return true;
+}
+
+static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
+{
+ gen_helper_gvec_mem_scatter *fn;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (a->esz < a->msz) {
+ return false;
+ }
+ if (!dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ switch (a->esz) {
+ case MO_32:
+ fn = scatter_store_fn32[mte][be][0][a->msz];
+ break;
+ case MO_64:
+ fn = scatter_store_fn64[mte][be][2][a->msz];
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+ cpu_reg(s, a->rm), a->msz, true, fn);
+ return true;
+}
+
+/*
+ * Prefetches
+ */
+
+static bool trans_PRF(DisasContext *s, arg_PRF *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ /* Prefetch is a nop within QEMU. */
+ (void)sve_access_check(s);
+ return true;
+}
+
+static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
+{
+ if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ /* Prefetch is a nop within QEMU. */
+ (void)sve_access_check(s);
+ return true;
+}
+
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ /* Prefetch is a nop within QEMU. */
+ s->is_nonstreaming = true;
+ (void)sve_access_check(s);
+ return true;
+}
+
+/*
+ * Move Prefix
+ *
+ * TODO: The implementation so far could handle predicated merging movprfx.
+ * The helper functions as written take an extra source register to
+ * use in the operation, but the result is only written when predication
+ * succeeds. For unpredicated movprfx, we need to rearrange the helpers
+ * to allow the final write back to the destination to be unconditional.
+ * For predicated zeroing movprfx, we need to rearrange the helpers to
+ * allow the final write back to zero inactives.
+ *
+ * In the meantime, just emit the moves.
+ */
+
+TRANS_FEAT(MOVPRFX, aa64_sve, do_mov_z, a->rd, a->rn)
+TRANS_FEAT(MOVPRFX_m, aa64_sve, do_sel_z, a->rd, a->rn, a->rd, a->pg, a->esz)
+TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
+
+/*
+ * SVE2 Integer Multiply - Unpredicated
+ */
+
+TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
+
+static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
+ gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
+ gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
+};
+TRANS_FEAT(SMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ smulh_zzz_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const umulh_zzz_fns[4] = {
+ gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
+ gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
+};
+TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ umulh_zzz_fns[a->esz], a, 0)
+
+TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ gen_helper_gvec_pmul_b, a, 0)
+
+static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
+ gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
+ gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
+};
+TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ sqdmulh_zzz_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
+ gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
+ gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
+};
+TRANS_FEAT(SQRDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ sqrdmulh_zzz_fns[a->esz], a, 0)
+
+/*
+ * SVE2 Integer - Predicated
+ */
+
+static gen_helper_gvec_4 * const sadlp_fns[4] = {
+ NULL, gen_helper_sve2_sadalp_zpzz_h,
+ gen_helper_sve2_sadalp_zpzz_s, gen_helper_sve2_sadalp_zpzz_d,
+};
+TRANS_FEAT(SADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
+ sadlp_fns[a->esz], a, 0)
+
+static gen_helper_gvec_4 * const uadlp_fns[4] = {
+ NULL, gen_helper_sve2_uadalp_zpzz_h,
+ gen_helper_sve2_uadalp_zpzz_s, gen_helper_sve2_uadalp_zpzz_d,
+};
+TRANS_FEAT(UADALP_zpzz, aa64_sve2, gen_gvec_ool_arg_zpzz,
+ uadlp_fns[a->esz], a, 0)
+
+/*
+ * SVE2 integer unary operations (predicated)
+ */
+
+TRANS_FEAT(URECPE, aa64_sve2, gen_gvec_ool_arg_zpz,
+ a->esz == 2 ? gen_helper_sve2_urecpe_s : NULL, a, 0)
+
+TRANS_FEAT(URSQRTE, aa64_sve2, gen_gvec_ool_arg_zpz,
+ a->esz == 2 ? gen_helper_sve2_ursqrte_s : NULL, a, 0)
+
+static gen_helper_gvec_3 * const sqabs_fns[4] = {
+ gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
+ gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
+};
+TRANS_FEAT(SQABS, aa64_sve2, gen_gvec_ool_arg_zpz, sqabs_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const sqneg_fns[4] = {
+ gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
+ gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
+};
+TRANS_FEAT(SQNEG, aa64_sve2, gen_gvec_ool_arg_zpz, sqneg_fns[a->esz], a, 0)
+
+DO_ZPZZ(SQSHL, aa64_sve2, sve2_sqshl)
+DO_ZPZZ(SQRSHL, aa64_sve2, sve2_sqrshl)
+DO_ZPZZ(SRSHL, aa64_sve2, sve2_srshl)
+
+DO_ZPZZ(UQSHL, aa64_sve2, sve2_uqshl)
+DO_ZPZZ(UQRSHL, aa64_sve2, sve2_uqrshl)
+DO_ZPZZ(URSHL, aa64_sve2, sve2_urshl)
+
+DO_ZPZZ(SHADD, aa64_sve2, sve2_shadd)
+DO_ZPZZ(SRHADD, aa64_sve2, sve2_srhadd)
+DO_ZPZZ(SHSUB, aa64_sve2, sve2_shsub)
+
+DO_ZPZZ(UHADD, aa64_sve2, sve2_uhadd)
+DO_ZPZZ(URHADD, aa64_sve2, sve2_urhadd)
+DO_ZPZZ(UHSUB, aa64_sve2, sve2_uhsub)
+
+DO_ZPZZ(ADDP, aa64_sve2, sve2_addp)
+DO_ZPZZ(SMAXP, aa64_sve2, sve2_smaxp)
+DO_ZPZZ(UMAXP, aa64_sve2, sve2_umaxp)
+DO_ZPZZ(SMINP, aa64_sve2, sve2_sminp)
+DO_ZPZZ(UMINP, aa64_sve2, sve2_uminp)
+
+DO_ZPZZ(SQADD_zpzz, aa64_sve2, sve2_sqadd)
+DO_ZPZZ(UQADD_zpzz, aa64_sve2, sve2_uqadd)
+DO_ZPZZ(SQSUB_zpzz, aa64_sve2, sve2_sqsub)
+DO_ZPZZ(UQSUB_zpzz, aa64_sve2, sve2_uqsub)
+DO_ZPZZ(SUQADD, aa64_sve2, sve2_suqadd)
+DO_ZPZZ(USQADD, aa64_sve2, sve2_usqadd)
+
+/*
+ * SVE2 Widening Integer Arithmetic
+ */
+
+static gen_helper_gvec_3 * const saddl_fns[4] = {
+ NULL, gen_helper_sve2_saddl_h,
+ gen_helper_sve2_saddl_s, gen_helper_sve2_saddl_d,
+};
+TRANS_FEAT(SADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
+ saddl_fns[a->esz], a, 0)
+TRANS_FEAT(SADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
+ saddl_fns[a->esz], a, 3)
+TRANS_FEAT(SADDLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
+ saddl_fns[a->esz], a, 2)
+
+static gen_helper_gvec_3 * const ssubl_fns[4] = {
+ NULL, gen_helper_sve2_ssubl_h,
+ gen_helper_sve2_ssubl_s, gen_helper_sve2_ssubl_d,
+};
+TRANS_FEAT(SSUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
+ ssubl_fns[a->esz], a, 0)
+TRANS_FEAT(SSUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
+ ssubl_fns[a->esz], a, 3)
+TRANS_FEAT(SSUBLBT, aa64_sve2, gen_gvec_ool_arg_zzz,
+ ssubl_fns[a->esz], a, 2)
+TRANS_FEAT(SSUBLTB, aa64_sve2, gen_gvec_ool_arg_zzz,
+ ssubl_fns[a->esz], a, 1)
+
+static gen_helper_gvec_3 * const sabdl_fns[4] = {
+ NULL, gen_helper_sve2_sabdl_h,
+ gen_helper_sve2_sabdl_s, gen_helper_sve2_sabdl_d,
+};
+TRANS_FEAT(SABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
+ sabdl_fns[a->esz], a, 0)
+TRANS_FEAT(SABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
+ sabdl_fns[a->esz], a, 3)
+
+static gen_helper_gvec_3 * const uaddl_fns[4] = {
+ NULL, gen_helper_sve2_uaddl_h,
+ gen_helper_sve2_uaddl_s, gen_helper_sve2_uaddl_d,
+};
+TRANS_FEAT(UADDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
+ uaddl_fns[a->esz], a, 0)
+TRANS_FEAT(UADDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
+ uaddl_fns[a->esz], a, 3)
+
+static gen_helper_gvec_3 * const usubl_fns[4] = {
+ NULL, gen_helper_sve2_usubl_h,
+ gen_helper_sve2_usubl_s, gen_helper_sve2_usubl_d,
+};
+TRANS_FEAT(USUBLB, aa64_sve2, gen_gvec_ool_arg_zzz,
+ usubl_fns[a->esz], a, 0)
+TRANS_FEAT(USUBLT, aa64_sve2, gen_gvec_ool_arg_zzz,
+ usubl_fns[a->esz], a, 3)
+
+static gen_helper_gvec_3 * const uabdl_fns[4] = {
+ NULL, gen_helper_sve2_uabdl_h,
+ gen_helper_sve2_uabdl_s, gen_helper_sve2_uabdl_d,
+};
+TRANS_FEAT(UABDLB, aa64_sve2, gen_gvec_ool_arg_zzz,
+ uabdl_fns[a->esz], a, 0)
+TRANS_FEAT(UABDLT, aa64_sve2, gen_gvec_ool_arg_zzz,
+ uabdl_fns[a->esz], a, 3)
+
+static gen_helper_gvec_3 * const sqdmull_fns[4] = {
+ NULL, gen_helper_sve2_sqdmull_zzz_h,
+ gen_helper_sve2_sqdmull_zzz_s, gen_helper_sve2_sqdmull_zzz_d,
+};
+TRANS_FEAT(SQDMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ sqdmull_fns[a->esz], a, 0)
+TRANS_FEAT(SQDMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ sqdmull_fns[a->esz], a, 3)
+
+static gen_helper_gvec_3 * const smull_fns[4] = {
+ NULL, gen_helper_sve2_smull_zzz_h,
+ gen_helper_sve2_smull_zzz_s, gen_helper_sve2_smull_zzz_d,
+};
+TRANS_FEAT(SMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ smull_fns[a->esz], a, 0)
+TRANS_FEAT(SMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ smull_fns[a->esz], a, 3)
+
+static gen_helper_gvec_3 * const umull_fns[4] = {
+ NULL, gen_helper_sve2_umull_zzz_h,
+ gen_helper_sve2_umull_zzz_s, gen_helper_sve2_umull_zzz_d,
+};
+TRANS_FEAT(UMULLB_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ umull_fns[a->esz], a, 0)
+TRANS_FEAT(UMULLT_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
+ umull_fns[a->esz], a, 3)
+
+static gen_helper_gvec_3 * const eoril_fns[4] = {
+ gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
+ gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
+};
+TRANS_FEAT(EORBT, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 2)
+TRANS_FEAT(EORTB, aa64_sve2, gen_gvec_ool_arg_zzz, eoril_fns[a->esz], a, 1)
+
+static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
+{
+ static gen_helper_gvec_3 * const fns[4] = {
+ gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
+ NULL, gen_helper_sve2_pmull_d,
+ };
+
+ if (a->esz == 0) {
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ } else if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
+}
+
+TRANS_FEAT(PMULLB, aa64_sve2, do_trans_pmull, a, false)
+TRANS_FEAT(PMULLT, aa64_sve2, do_trans_pmull, a, true)
+
+static gen_helper_gvec_3 * const saddw_fns[4] = {
+ NULL, gen_helper_sve2_saddw_h,
+ gen_helper_sve2_saddw_s, gen_helper_sve2_saddw_d,
+};
+TRANS_FEAT(SADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 0)
+TRANS_FEAT(SADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, saddw_fns[a->esz], a, 1)
+
+static gen_helper_gvec_3 * const ssubw_fns[4] = {
+ NULL, gen_helper_sve2_ssubw_h,
+ gen_helper_sve2_ssubw_s, gen_helper_sve2_ssubw_d,
+};
+TRANS_FEAT(SSUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 0)
+TRANS_FEAT(SSUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, ssubw_fns[a->esz], a, 1)
+
+static gen_helper_gvec_3 * const uaddw_fns[4] = {
+ NULL, gen_helper_sve2_uaddw_h,
+ gen_helper_sve2_uaddw_s, gen_helper_sve2_uaddw_d,
+};
+TRANS_FEAT(UADDWB, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 0)
+TRANS_FEAT(UADDWT, aa64_sve2, gen_gvec_ool_arg_zzz, uaddw_fns[a->esz], a, 1)
+
+static gen_helper_gvec_3 * const usubw_fns[4] = {
+ NULL, gen_helper_sve2_usubw_h,
+ gen_helper_sve2_usubw_s, gen_helper_sve2_usubw_d,
+};
+TRANS_FEAT(USUBWB, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 0)
+TRANS_FEAT(USUBWT, aa64_sve2, gen_gvec_ool_arg_zzz, usubw_fns[a->esz], a, 1)
+
+static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
+{
+ int top = imm & 1;
+ int shl = imm >> 1;
+ int halfbits = 4 << vece;
+
+ if (top) {
+ if (shl == halfbits) {
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
+ tcg_gen_and_vec(vece, d, n, t);
+ } else {
+ tcg_gen_sari_vec(vece, d, n, halfbits);
+ tcg_gen_shli_vec(vece, d, d, shl);
+ }
+ } else {
+ tcg_gen_shli_vec(vece, d, n, halfbits);
+ tcg_gen_sari_vec(vece, d, d, halfbits - shl);
+ }
+}
+
+static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
+{
+ int halfbits = 4 << vece;
+ int top = imm & 1;
+ int shl = (imm >> 1);
+ int shift;
+ uint64_t mask;
+
+ mask = MAKE_64BIT_MASK(0, halfbits);
+ mask <<= shl;
+ mask = dup_const(vece, mask);
+
+ shift = shl - top * halfbits;
+ if (shift < 0) {
+ tcg_gen_shri_i64(d, n, -shift);
+ } else {
+ tcg_gen_shli_i64(d, n, shift);
+ }
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
+{
+ gen_ushll_i64(MO_16, d, n, imm);
+}
+
+static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
+{
+ gen_ushll_i64(MO_32, d, n, imm);
+}
+
+static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
+{
+ gen_ushll_i64(MO_64, d, n, imm);
+}
+
+static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
+{
+ int halfbits = 4 << vece;
+ int top = imm & 1;
+ int shl = imm >> 1;
+
+ if (top) {
+ if (shl == halfbits) {
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
+ tcg_gen_and_vec(vece, d, n, t);
+ } else {
+ tcg_gen_shri_vec(vece, d, n, halfbits);
+ tcg_gen_shli_vec(vece, d, d, shl);
+ }
+ } else {
+ if (shl == 0) {
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_and_vec(vece, d, n, t);
+ } else {
+ tcg_gen_shli_vec(vece, d, n, halfbits);
+ tcg_gen_shri_vec(vece, d, d, halfbits - shl);
+ }
+ }
+}
+
+static bool do_shll_tb(DisasContext *s, arg_rri_esz *a,
+ const GVecGen2i ops[3], bool sel)
+{
+
+ if (a->esz < 0 || a->esz > 2) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vsz, vsz, (a->imm << 1) | sel,
+ &ops[a->esz]);
+ }
+ return true;
+}
+
+static const TCGOpcode sshll_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec, 0
+};
+static const GVecGen2i sshll_ops[3] = {
+ { .fniv = gen_sshll_vec,
+ .opt_opc = sshll_list,
+ .fno = gen_helper_sve2_sshll_h,
+ .vece = MO_16 },
+ { .fniv = gen_sshll_vec,
+ .opt_opc = sshll_list,
+ .fno = gen_helper_sve2_sshll_s,
+ .vece = MO_32 },
+ { .fniv = gen_sshll_vec,
+ .opt_opc = sshll_list,
+ .fno = gen_helper_sve2_sshll_d,
+ .vece = MO_64 }
+};
+TRANS_FEAT(SSHLLB, aa64_sve2, do_shll_tb, a, sshll_ops, false)
+TRANS_FEAT(SSHLLT, aa64_sve2, do_shll_tb, a, sshll_ops, true)
+
+static const TCGOpcode ushll_list[] = {
+ INDEX_op_shli_vec, INDEX_op_shri_vec, 0
+};
+static const GVecGen2i ushll_ops[3] = {
+ { .fni8 = gen_ushll16_i64,
+ .fniv = gen_ushll_vec,
+ .opt_opc = ushll_list,
+ .fno = gen_helper_sve2_ushll_h,
+ .vece = MO_16 },
+ { .fni8 = gen_ushll32_i64,
+ .fniv = gen_ushll_vec,
+ .opt_opc = ushll_list,
+ .fno = gen_helper_sve2_ushll_s,
+ .vece = MO_32 },
+ { .fni8 = gen_ushll64_i64,
+ .fniv = gen_ushll_vec,
+ .opt_opc = ushll_list,
+ .fno = gen_helper_sve2_ushll_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(USHLLB, aa64_sve2, do_shll_tb, a, ushll_ops, false)
+TRANS_FEAT(USHLLT, aa64_sve2, do_shll_tb, a, ushll_ops, true)
+
+static gen_helper_gvec_3 * const bext_fns[4] = {
+ gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
+ gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
+};
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+ bext_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const bdep_fns[4] = {
+ gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
+ gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
+};
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+ bdep_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const bgrp_fns[4] = {
+ gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
+ gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
+};
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+ bgrp_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const cadd_fns[4] = {
+ gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
+ gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d,
+};
+TRANS_FEAT(CADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
+ cadd_fns[a->esz], a, 0)
+TRANS_FEAT(CADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
+ cadd_fns[a->esz], a, 1)
+
+static gen_helper_gvec_3 * const sqcadd_fns[4] = {
+ gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
+ gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d,
+};
+TRANS_FEAT(SQCADD_rot90, aa64_sve2, gen_gvec_ool_arg_zzz,
+ sqcadd_fns[a->esz], a, 0)
+TRANS_FEAT(SQCADD_rot270, aa64_sve2, gen_gvec_ool_arg_zzz,
+ sqcadd_fns[a->esz], a, 1)
+
+static gen_helper_gvec_4 * const sabal_fns[4] = {
+ NULL, gen_helper_sve2_sabal_h,
+ gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d,
+};
+TRANS_FEAT(SABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 0)
+TRANS_FEAT(SABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, sabal_fns[a->esz], a, 1)
+
+static gen_helper_gvec_4 * const uabal_fns[4] = {
+ NULL, gen_helper_sve2_uabal_h,
+ gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d,
+};
+TRANS_FEAT(UABALB, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 0)
+TRANS_FEAT(UABALT, aa64_sve2, gen_gvec_ool_arg_zzzz, uabal_fns[a->esz], a, 1)
+
+static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+ static gen_helper_gvec_4 * const fns[2] = {
+ gen_helper_sve2_adcl_s,
+ gen_helper_sve2_adcl_d,
+ };
+ /*
+ * Note that in this case the ESZ field encodes both size and sign.
+ * Split out 'subtract' into bit 1 of the data field for the helper.
+ */
+ return gen_gvec_ool_arg_zzzz(s, fns[a->esz & 1], a, (a->esz & 2) | sel);
+}
+
+TRANS_FEAT(ADCLB, aa64_sve2, do_adcl, a, false)
+TRANS_FEAT(ADCLT, aa64_sve2, do_adcl, a, true)
+
+TRANS_FEAT(SSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ssra, a)
+TRANS_FEAT(USRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_usra, a)
+TRANS_FEAT(SRSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_srsra, a)
+TRANS_FEAT(URSRA, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_ursra, a)
+TRANS_FEAT(SRI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sri, a)
+TRANS_FEAT(SLI, aa64_sve2, gen_gvec_fn_arg_zzi, gen_gvec_sli, a)
+
+TRANS_FEAT(SABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_saba, a)
+TRANS_FEAT(UABA, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_uaba, a)
+
+static bool do_narrow_extract(DisasContext *s, arg_rri_esz *a,
+ const GVecGen2 ops[3])
+{
+ if (a->esz < 0 || a->esz > MO_32 || a->imm != 0) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vsz, vsz, &ops[a->esz]);
+ }
+ return true;
+}
+
+static const TCGOpcode sqxtn_list[] = {
+ INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+};
+
+static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ int64_t mask = (1ull << halfbits) - 1;
+ int64_t min = -1ull << (halfbits - 1);
+ int64_t max = -min - 1;
+
+ tcg_gen_dupi_vec(vece, t, min);
+ tcg_gen_smax_vec(vece, d, n, t);
+ tcg_gen_dupi_vec(vece, t, max);
+ tcg_gen_smin_vec(vece, d, d, t);
+ tcg_gen_dupi_vec(vece, t, mask);
+ tcg_gen_and_vec(vece, d, d, t);
+}
+
+static const GVecGen2 sqxtnb_ops[3] = {
+ { .fniv = gen_sqxtnb_vec,
+ .opt_opc = sqxtn_list,
+ .fno = gen_helper_sve2_sqxtnb_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqxtnb_vec,
+ .opt_opc = sqxtn_list,
+ .fno = gen_helper_sve2_sqxtnb_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqxtnb_vec,
+ .opt_opc = sqxtn_list,
+ .fno = gen_helper_sve2_sqxtnb_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SQXTNB, aa64_sve2, do_narrow_extract, a, sqxtnb_ops)
+
+static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ int64_t mask = (1ull << halfbits) - 1;
+ int64_t min = -1ull << (halfbits - 1);
+ int64_t max = -min - 1;
+
+ tcg_gen_dupi_vec(vece, t, min);
+ tcg_gen_smax_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, max);
+ tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_shli_vec(vece, n, n, halfbits);
+ tcg_gen_dupi_vec(vece, t, mask);
+ tcg_gen_bitsel_vec(vece, d, t, d, n);
+}
+
+static const GVecGen2 sqxtnt_ops[3] = {
+ { .fniv = gen_sqxtnt_vec,
+ .opt_opc = sqxtn_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqxtnt_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqxtnt_vec,
+ .opt_opc = sqxtn_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqxtnt_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqxtnt_vec,
+ .opt_opc = sqxtn_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqxtnt_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SQXTNT, aa64_sve2, do_narrow_extract, a, sqxtnt_ops)
+
+static const TCGOpcode uqxtn_list[] = {
+ INDEX_op_shli_vec, INDEX_op_umin_vec, 0
+};
+
+static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ int64_t max = (1ull << halfbits) - 1;
+
+ tcg_gen_dupi_vec(vece, t, max);
+ tcg_gen_umin_vec(vece, d, n, t);
+}
+
+static const GVecGen2 uqxtnb_ops[3] = {
+ { .fniv = gen_uqxtnb_vec,
+ .opt_opc = uqxtn_list,
+ .fno = gen_helper_sve2_uqxtnb_h,
+ .vece = MO_16 },
+ { .fniv = gen_uqxtnb_vec,
+ .opt_opc = uqxtn_list,
+ .fno = gen_helper_sve2_uqxtnb_s,
+ .vece = MO_32 },
+ { .fniv = gen_uqxtnb_vec,
+ .opt_opc = uqxtn_list,
+ .fno = gen_helper_sve2_uqxtnb_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(UQXTNB, aa64_sve2, do_narrow_extract, a, uqxtnb_ops)
+
+static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ int64_t max = (1ull << halfbits) - 1;
+
+ tcg_gen_dupi_vec(vece, t, max);
+ tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_shli_vec(vece, n, n, halfbits);
+ tcg_gen_bitsel_vec(vece, d, t, d, n);
+}
+
+static const GVecGen2 uqxtnt_ops[3] = {
+ { .fniv = gen_uqxtnt_vec,
+ .opt_opc = uqxtn_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_uqxtnt_h,
+ .vece = MO_16 },
+ { .fniv = gen_uqxtnt_vec,
+ .opt_opc = uqxtn_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_uqxtnt_s,
+ .vece = MO_32 },
+ { .fniv = gen_uqxtnt_vec,
+ .opt_opc = uqxtn_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_uqxtnt_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(UQXTNT, aa64_sve2, do_narrow_extract, a, uqxtnt_ops)
+
+static const TCGOpcode sqxtun_list[] = {
+ INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
+};
+
+static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ int64_t max = (1ull << halfbits) - 1;
+
+ tcg_gen_dupi_vec(vece, t, 0);
+ tcg_gen_smax_vec(vece, d, n, t);
+ tcg_gen_dupi_vec(vece, t, max);
+ tcg_gen_umin_vec(vece, d, d, t);
+}
+
+static const GVecGen2 sqxtunb_ops[3] = {
+ { .fniv = gen_sqxtunb_vec,
+ .opt_opc = sqxtun_list,
+ .fno = gen_helper_sve2_sqxtunb_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqxtunb_vec,
+ .opt_opc = sqxtun_list,
+ .fno = gen_helper_sve2_sqxtunb_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqxtunb_vec,
+ .opt_opc = sqxtun_list,
+ .fno = gen_helper_sve2_sqxtunb_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SQXTUNB, aa64_sve2, do_narrow_extract, a, sqxtunb_ops)
+
+static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ int64_t max = (1ull << halfbits) - 1;
+
+ tcg_gen_dupi_vec(vece, t, 0);
+ tcg_gen_smax_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, max);
+ tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_shli_vec(vece, n, n, halfbits);
+ tcg_gen_bitsel_vec(vece, d, t, d, n);
+}
+
+static const GVecGen2 sqxtunt_ops[3] = {
+ { .fniv = gen_sqxtunt_vec,
+ .opt_opc = sqxtun_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqxtunt_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqxtunt_vec,
+ .opt_opc = sqxtun_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqxtunt_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqxtunt_vec,
+ .opt_opc = sqxtun_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqxtunt_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SQXTUNT, aa64_sve2, do_narrow_extract, a, sqxtunt_ops)
+
+static bool do_shr_narrow(DisasContext *s, arg_rri_esz *a,
+ const GVecGen2i ops[3])
+{
+ if (a->esz < 0 || a->esz > MO_32) {
+ return false;
+ }
+ assert(a->imm > 0 && a->imm <= (8 << a->esz));
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vsz, vsz, a->imm, &ops[a->esz]);
+ }
+ return true;
+}
+
+static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
+{
+ int halfbits = 4 << vece;
+ uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
+
+ tcg_gen_shri_i64(d, n, shr);
+ tcg_gen_andi_i64(d, d, mask);
+}
+
+static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+ gen_shrnb_i64(MO_16, d, n, shr);
+}
+
+static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+ gen_shrnb_i64(MO_32, d, n, shr);
+}
+
+static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+ gen_shrnb_i64(MO_64, d, n, shr);
+}
+
+static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
+
+ tcg_gen_shri_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, mask);
+ tcg_gen_and_vec(vece, d, n, t);
+}
+
+static const TCGOpcode shrnb_vec_list[] = { INDEX_op_shri_vec, 0 };
+static const GVecGen2i shrnb_ops[3] = {
+ { .fni8 = gen_shrnb16_i64,
+ .fniv = gen_shrnb_vec,
+ .opt_opc = shrnb_vec_list,
+ .fno = gen_helper_sve2_shrnb_h,
+ .vece = MO_16 },
+ { .fni8 = gen_shrnb32_i64,
+ .fniv = gen_shrnb_vec,
+ .opt_opc = shrnb_vec_list,
+ .fno = gen_helper_sve2_shrnb_s,
+ .vece = MO_32 },
+ { .fni8 = gen_shrnb64_i64,
+ .fniv = gen_shrnb_vec,
+ .opt_opc = shrnb_vec_list,
+ .fno = gen_helper_sve2_shrnb_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SHRNB, aa64_sve2, do_shr_narrow, a, shrnb_ops)
+
+static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
+{
+ int halfbits = 4 << vece;
+ uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
+
+ tcg_gen_shli_i64(n, n, halfbits - shr);
+ tcg_gen_andi_i64(n, n, ~mask);
+ tcg_gen_andi_i64(d, d, mask);
+ tcg_gen_or_i64(d, d, n);
+}
+
+static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+ gen_shrnt_i64(MO_16, d, n, shr);
+}
+
+static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+ gen_shrnt_i64(MO_32, d, n, shr);
+}
+
+static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
+{
+ tcg_gen_shri_i64(n, n, shr);
+ tcg_gen_deposit_i64(d, d, n, 32, 32);
+}
+
+static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
+
+ tcg_gen_shli_vec(vece, n, n, halfbits - shr);
+ tcg_gen_dupi_vec(vece, t, mask);
+ tcg_gen_bitsel_vec(vece, d, t, d, n);
+}
+
+static const TCGOpcode shrnt_vec_list[] = { INDEX_op_shli_vec, 0 };
+static const GVecGen2i shrnt_ops[3] = {
+ { .fni8 = gen_shrnt16_i64,
+ .fniv = gen_shrnt_vec,
+ .opt_opc = shrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_shrnt_h,
+ .vece = MO_16 },
+ { .fni8 = gen_shrnt32_i64,
+ .fniv = gen_shrnt_vec,
+ .opt_opc = shrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_shrnt_s,
+ .vece = MO_32 },
+ { .fni8 = gen_shrnt64_i64,
+ .fniv = gen_shrnt_vec,
+ .opt_opc = shrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_shrnt_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SHRNT, aa64_sve2, do_shr_narrow, a, shrnt_ops)
+
+static const GVecGen2i rshrnb_ops[3] = {
+ { .fno = gen_helper_sve2_rshrnb_h },
+ { .fno = gen_helper_sve2_rshrnb_s },
+ { .fno = gen_helper_sve2_rshrnb_d },
+};
+TRANS_FEAT(RSHRNB, aa64_sve2, do_shr_narrow, a, rshrnb_ops)
+
+static const GVecGen2i rshrnt_ops[3] = {
+ { .fno = gen_helper_sve2_rshrnt_h },
+ { .fno = gen_helper_sve2_rshrnt_s },
+ { .fno = gen_helper_sve2_rshrnt_d },
+};
+TRANS_FEAT(RSHRNT, aa64_sve2, do_shr_narrow, a, rshrnt_ops)
+
+static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+
+ tcg_gen_sari_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, 0);
+ tcg_gen_smax_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_umin_vec(vece, d, n, t);
+}
+
+static const TCGOpcode sqshrunb_vec_list[] = {
+ INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
+};
+static const GVecGen2i sqshrunb_ops[3] = {
+ { .fniv = gen_sqshrunb_vec,
+ .opt_opc = sqshrunb_vec_list,
+ .fno = gen_helper_sve2_sqshrunb_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqshrunb_vec,
+ .opt_opc = sqshrunb_vec_list,
+ .fno = gen_helper_sve2_sqshrunb_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqshrunb_vec,
+ .opt_opc = sqshrunb_vec_list,
+ .fno = gen_helper_sve2_sqshrunb_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SQSHRUNB, aa64_sve2, do_shr_narrow, a, sqshrunb_ops)
+
+static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+
+ tcg_gen_sari_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, 0);
+ tcg_gen_smax_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_shli_vec(vece, n, n, halfbits);
+ tcg_gen_bitsel_vec(vece, d, t, d, n);
+}
+
+static const TCGOpcode sqshrunt_vec_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec,
+ INDEX_op_smax_vec, INDEX_op_umin_vec, 0
+};
+static const GVecGen2i sqshrunt_ops[3] = {
+ { .fniv = gen_sqshrunt_vec,
+ .opt_opc = sqshrunt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrunt_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqshrunt_vec,
+ .opt_opc = sqshrunt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrunt_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqshrunt_vec,
+ .opt_opc = sqshrunt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrunt_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SQSHRUNT, aa64_sve2, do_shr_narrow, a, sqshrunt_ops)
+
+static const GVecGen2i sqrshrunb_ops[3] = {
+ { .fno = gen_helper_sve2_sqrshrunb_h },
+ { .fno = gen_helper_sve2_sqrshrunb_s },
+ { .fno = gen_helper_sve2_sqrshrunb_d },
+};
+TRANS_FEAT(SQRSHRUNB, aa64_sve2, do_shr_narrow, a, sqrshrunb_ops)
+
+static const GVecGen2i sqrshrunt_ops[3] = {
+ { .fno = gen_helper_sve2_sqrshrunt_h },
+ { .fno = gen_helper_sve2_sqrshrunt_s },
+ { .fno = gen_helper_sve2_sqrshrunt_d },
+};
+TRANS_FEAT(SQRSHRUNT, aa64_sve2, do_shr_narrow, a, sqrshrunt_ops)
+
+static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
+ int64_t min = -max - 1;
+
+ tcg_gen_sari_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, min);
+ tcg_gen_smax_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, max);
+ tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_and_vec(vece, d, n, t);
+}
+
+static const TCGOpcode sqshrnb_vec_list[] = {
+ INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
+};
+static const GVecGen2i sqshrnb_ops[3] = {
+ { .fniv = gen_sqshrnb_vec,
+ .opt_opc = sqshrnb_vec_list,
+ .fno = gen_helper_sve2_sqshrnb_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqshrnb_vec,
+ .opt_opc = sqshrnb_vec_list,
+ .fno = gen_helper_sve2_sqshrnb_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqshrnb_vec,
+ .opt_opc = sqshrnb_vec_list,
+ .fno = gen_helper_sve2_sqshrnb_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SQSHRNB, aa64_sve2, do_shr_narrow, a, sqshrnb_ops)
+
+static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+ int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
+ int64_t min = -max - 1;
+
+ tcg_gen_sari_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, min);
+ tcg_gen_smax_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, max);
+ tcg_gen_smin_vec(vece, n, n, t);
+ tcg_gen_shli_vec(vece, n, n, halfbits);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_bitsel_vec(vece, d, t, d, n);
+}
+
+static const TCGOpcode sqshrnt_vec_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec,
+ INDEX_op_smax_vec, INDEX_op_smin_vec, 0
+};
+static const GVecGen2i sqshrnt_ops[3] = {
+ { .fniv = gen_sqshrnt_vec,
+ .opt_opc = sqshrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrnt_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqshrnt_vec,
+ .opt_opc = sqshrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrnt_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqshrnt_vec,
+ .opt_opc = sqshrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrnt_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(SQSHRNT, aa64_sve2, do_shr_narrow, a, sqshrnt_ops)
+
+static const GVecGen2i sqrshrnb_ops[3] = {
+ { .fno = gen_helper_sve2_sqrshrnb_h },
+ { .fno = gen_helper_sve2_sqrshrnb_s },
+ { .fno = gen_helper_sve2_sqrshrnb_d },
+};
+TRANS_FEAT(SQRSHRNB, aa64_sve2, do_shr_narrow, a, sqrshrnb_ops)
+
+static const GVecGen2i sqrshrnt_ops[3] = {
+ { .fno = gen_helper_sve2_sqrshrnt_h },
+ { .fno = gen_helper_sve2_sqrshrnt_s },
+ { .fno = gen_helper_sve2_sqrshrnt_d },
+};
+TRANS_FEAT(SQRSHRNT, aa64_sve2, do_shr_narrow, a, sqrshrnt_ops)
+
+static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+
+ tcg_gen_shri_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_umin_vec(vece, d, n, t);
+}
+
+static const TCGOpcode uqshrnb_vec_list[] = {
+ INDEX_op_shri_vec, INDEX_op_umin_vec, 0
+};
+static const GVecGen2i uqshrnb_ops[3] = {
+ { .fniv = gen_uqshrnb_vec,
+ .opt_opc = uqshrnb_vec_list,
+ .fno = gen_helper_sve2_uqshrnb_h,
+ .vece = MO_16 },
+ { .fniv = gen_uqshrnb_vec,
+ .opt_opc = uqshrnb_vec_list,
+ .fno = gen_helper_sve2_uqshrnb_s,
+ .vece = MO_32 },
+ { .fniv = gen_uqshrnb_vec,
+ .opt_opc = uqshrnb_vec_list,
+ .fno = gen_helper_sve2_uqshrnb_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(UQSHRNB, aa64_sve2, do_shr_narrow, a, uqshrnb_ops)
+
+static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+
+ tcg_gen_shri_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_shli_vec(vece, n, n, halfbits);
+ tcg_gen_bitsel_vec(vece, d, t, d, n);
+}
+
+static const TCGOpcode uqshrnt_vec_list[] = {
+ INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
+};
+static const GVecGen2i uqshrnt_ops[3] = {
+ { .fniv = gen_uqshrnt_vec,
+ .opt_opc = uqshrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_uqshrnt_h,
+ .vece = MO_16 },
+ { .fniv = gen_uqshrnt_vec,
+ .opt_opc = uqshrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_uqshrnt_s,
+ .vece = MO_32 },
+ { .fniv = gen_uqshrnt_vec,
+ .opt_opc = uqshrnt_vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_uqshrnt_d,
+ .vece = MO_64 },
+};
+TRANS_FEAT(UQSHRNT, aa64_sve2, do_shr_narrow, a, uqshrnt_ops)
+
+static const GVecGen2i uqrshrnb_ops[3] = {
+ { .fno = gen_helper_sve2_uqrshrnb_h },
+ { .fno = gen_helper_sve2_uqrshrnb_s },
+ { .fno = gen_helper_sve2_uqrshrnb_d },
+};
+TRANS_FEAT(UQRSHRNB, aa64_sve2, do_shr_narrow, a, uqrshrnb_ops)
+
+static const GVecGen2i uqrshrnt_ops[3] = {
+ { .fno = gen_helper_sve2_uqrshrnt_h },
+ { .fno = gen_helper_sve2_uqrshrnt_s },
+ { .fno = gen_helper_sve2_uqrshrnt_d },
+};
+TRANS_FEAT(UQRSHRNT, aa64_sve2, do_shr_narrow, a, uqrshrnt_ops)
+
+#define DO_SVE2_ZZZ_NARROW(NAME, name) \
+ static gen_helper_gvec_3 * const name##_fns[4] = { \
+ NULL, gen_helper_sve2_##name##_h, \
+ gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve2, gen_gvec_ool_arg_zzz, \
+ name##_fns[a->esz], a, 0)
+
+DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
+DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
+DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
+DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
+
+DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
+DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
+DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
+DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
+
+static gen_helper_gvec_flags_4 * const match_fns[4] = {
+ gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
+};
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
+
+static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
+ gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
+};
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
+
+static gen_helper_gvec_4 * const histcnt_fns[4] = {
+ NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
+};
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
+ histcnt_fns[a->esz], a, 0)
+
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
+
+DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
+DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
+DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
+DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
+DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
+
+/*
+ * SVE Integer Multiply-Add (unpredicated)
+ */
+
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
+ 0, FPST_FPCR)
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
+ 0, FPST_FPCR)
+
+static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
+ NULL, gen_helper_sve2_sqdmlal_zzzw_h,
+ gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
+};
+TRANS_FEAT(SQDMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ sqdmlal_zzzw_fns[a->esz], a, 0)
+TRANS_FEAT(SQDMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ sqdmlal_zzzw_fns[a->esz], a, 3)
+TRANS_FEAT(SQDMLALBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ sqdmlal_zzzw_fns[a->esz], a, 2)
+
+static gen_helper_gvec_4 * const sqdmlsl_zzzw_fns[] = {
+ NULL, gen_helper_sve2_sqdmlsl_zzzw_h,
+ gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
+};
+TRANS_FEAT(SQDMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ sqdmlsl_zzzw_fns[a->esz], a, 0)
+TRANS_FEAT(SQDMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ sqdmlsl_zzzw_fns[a->esz], a, 3)
+TRANS_FEAT(SQDMLSLBT, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ sqdmlsl_zzzw_fns[a->esz], a, 2)
+
+static gen_helper_gvec_4 * const sqrdmlah_fns[] = {
+ gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
+ gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
+};
+TRANS_FEAT(SQRDMLAH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ sqrdmlah_fns[a->esz], a, 0)
+
+static gen_helper_gvec_4 * const sqrdmlsh_fns[] = {
+ gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
+ gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
+};
+TRANS_FEAT(SQRDMLSH_zzzz, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ sqrdmlsh_fns[a->esz], a, 0)
+
+static gen_helper_gvec_4 * const smlal_zzzw_fns[] = {
+ NULL, gen_helper_sve2_smlal_zzzw_h,
+ gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
+};
+TRANS_FEAT(SMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ smlal_zzzw_fns[a->esz], a, 0)
+TRANS_FEAT(SMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ smlal_zzzw_fns[a->esz], a, 1)
+
+static gen_helper_gvec_4 * const umlal_zzzw_fns[] = {
+ NULL, gen_helper_sve2_umlal_zzzw_h,
+ gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
+};
+TRANS_FEAT(UMLALB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ umlal_zzzw_fns[a->esz], a, 0)
+TRANS_FEAT(UMLALT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ umlal_zzzw_fns[a->esz], a, 1)
+
+static gen_helper_gvec_4 * const smlsl_zzzw_fns[] = {
+ NULL, gen_helper_sve2_smlsl_zzzw_h,
+ gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
+};
+TRANS_FEAT(SMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ smlsl_zzzw_fns[a->esz], a, 0)
+TRANS_FEAT(SMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ smlsl_zzzw_fns[a->esz], a, 1)
+
+static gen_helper_gvec_4 * const umlsl_zzzw_fns[] = {
+ NULL, gen_helper_sve2_umlsl_zzzw_h,
+ gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
+};
+TRANS_FEAT(UMLSLB_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ umlsl_zzzw_fns[a->esz], a, 0)
+TRANS_FEAT(UMLSLT_zzzw, aa64_sve2, gen_gvec_ool_arg_zzzz,
+ umlsl_zzzw_fns[a->esz], a, 1)
+
+static gen_helper_gvec_4 * const cmla_fns[] = {
+ gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
+ gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
+};
+TRANS_FEAT(CMLA_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
+ cmla_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
+
+static gen_helper_gvec_4 * const cdot_fns[] = {
+ NULL, NULL, gen_helper_sve2_cdot_zzzz_s, gen_helper_sve2_cdot_zzzz_d
+};
+TRANS_FEAT(CDOT_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
+ cdot_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
+
+static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
+ gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
+ gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
+};
+TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
+ sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
+
+TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
+ a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
+
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
+ gen_helper_crypto_aesmc, a->rd, a->rd, 0)
+TRANS_FEAT_NONSTREAMING(AESIMC, aa64_sve2_aes, gen_gvec_ool_zz,
+ gen_helper_crypto_aesimc, a->rd, a->rd, 0)
+
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
+ gen_helper_crypto_aese, a, 0)
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
+ gen_helper_crypto_aesd, a, 0)
+
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
+ gen_helper_crypto_sm4e, a, 0)
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
+ gen_helper_crypto_sm4ekey, a, 0)
+
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
+ gen_gvec_rax1, a)
+
+TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR)
+
+TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve_bfcvtnt, a, 0, FPST_FPCR)
+
+TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR)
+TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
+ gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR)
+
+TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
+ FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
+TRANS_FEAT(FCVTXNT_ds, aa64_sve2, do_frint_mode, a,
+ FPROUNDING_ODD, gen_helper_sve2_fcvtnt_ds)
+
+static gen_helper_gvec_3_ptr * const flogb_fns[] = {
+ NULL, gen_helper_flogb_h,
+ gen_helper_flogb_s, gen_helper_flogb_d
+};
+TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+
+static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
+{
+ return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzzw_s,
+ a->rd, a->rn, a->rm, a->ra,
+ (sel << 1) | sub, tcg_env);
+}
+
+TRANS_FEAT(FMLALB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, false)
+TRANS_FEAT(FMLALT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, false, true)
+TRANS_FEAT(FMLSLB_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, false)
+TRANS_FEAT(FMLSLT_zzzw, aa64_sve2, do_FMLAL_zzzw, a, true, true)
+
+static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
+{
+ return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
+ a->rd, a->rn, a->rm, a->ra,
+ (a->index << 2) | (sel << 1) | sub, tcg_env);
+}
+
+TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
+TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
+TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
+TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
+
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_smmla_b, a, 0)
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_usmmla_b, a, 0)
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_ummla_b, a, 0)
+
+TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_bfdot, a, 0)
+TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_bfdot_idx, a)
+
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_bfmmla, a, 0)
+
+static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+ return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
+ a->rd, a->rn, a->rm, a->ra, sel, FPST_FPCR);
+}
+
+TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
+TRANS_FEAT(BFMLALT_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, true)
+
+static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
+{
+ return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
+ a->rd, a->rn, a->rm, a->ra,
+ (a->index << 1) | sel, FPST_FPCR);
+}
+
+TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
+TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
+
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
+{
+ int vl = vec_full_reg_size(s);
+ int pl = pred_gvec_reg_size(s);
+ int elements = vl >> a->esz;
+ TCGv_i64 tmp, didx, dbit;
+ TCGv_ptr ptr;
+
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i64();
+ dbit = tcg_temp_new_i64();
+ didx = tcg_temp_new_i64();
+ ptr = tcg_temp_new_ptr();
+
+ /* Compute the predicate element. */
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
+ if (is_power_of_2(elements)) {
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
+ } else {
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
+ }
+
+ /* Extract the predicate byte and bit indices. */
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
+ tcg_gen_andi_i64(dbit, tmp, 7);
+ tcg_gen_shri_i64(didx, tmp, 3);
+ if (HOST_BIG_ENDIAN) {
+ tcg_gen_xori_i64(didx, didx, 7);
+ }
+
+ /* Load the predicate word. */
+ tcg_gen_trunc_i64_ptr(ptr, didx);
+ tcg_gen_add_ptr(ptr, ptr, tcg_env);
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
+
+ /* Extract the predicate bit and replicate to MO_64. */
+ tcg_gen_shr_i64(tmp, tmp, dbit);
+ tcg_gen_andi_i64(tmp, tmp, 1);
+ tcg_gen_neg_i64(tmp, tmp);
+
+ /* Apply to either copy the source, or write zeros. */
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
+ return true;
+}
+
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
+{
+ tcg_gen_smax_i32(d, a, n);
+ tcg_gen_smin_i32(d, d, m);
+}
+
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
+{
+ tcg_gen_smax_i64(d, a, n);
+ tcg_gen_smin_i64(d, d, m);
+}
+
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec a)
+{
+ tcg_gen_smax_vec(vece, d, a, n);
+ tcg_gen_smin_vec(vece, d, d, m);
+}
+
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop[] = {
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sclamp_vec,
+ .fno = gen_helper_gvec_sclamp_b,
+ .opt_opc = vecop,
+ .vece = MO_8 },
+ { .fniv = gen_sclamp_vec,
+ .fno = gen_helper_gvec_sclamp_h,
+ .opt_opc = vecop,
+ .vece = MO_16 },
+ { .fni4 = gen_sclamp_i32,
+ .fniv = gen_sclamp_vec,
+ .fno = gen_helper_gvec_sclamp_s,
+ .opt_opc = vecop,
+ .vece = MO_32 },
+ { .fni8 = gen_sclamp_i64,
+ .fniv = gen_sclamp_vec,
+ .fno = gen_helper_gvec_sclamp_d,
+ .opt_opc = vecop,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
+}
+
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
+
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
+{
+ tcg_gen_umax_i32(d, a, n);
+ tcg_gen_umin_i32(d, d, m);
+}
+
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
+{
+ tcg_gen_umax_i64(d, a, n);
+ tcg_gen_umin_i64(d, d, m);
+}
+
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec a)
+{
+ tcg_gen_umax_vec(vece, d, a, n);
+ tcg_gen_umin_vec(vece, d, d, m);
+}
+
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop[] = {
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uclamp_vec,
+ .fno = gen_helper_gvec_uclamp_b,
+ .opt_opc = vecop,
+ .vece = MO_8 },
+ { .fniv = gen_uclamp_vec,
+ .fno = gen_helper_gvec_uclamp_h,
+ .opt_opc = vecop,
+ .vece = MO_16 },
+ { .fni4 = gen_uclamp_i32,
+ .fniv = gen_uclamp_vec,
+ .fno = gen_helper_gvec_uclamp_s,
+ .opt_opc = vecop,
+ .vece = MO_32 },
+ { .fni8 = gen_uclamp_i64,
+ .fniv = gen_uclamp_vec,
+ .fno = gen_helper_gvec_uclamp_d,
+ .opt_opc = vecop,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
+}
+
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
new file mode 100644
index 0000000000..b9af03b7c3
--- /dev/null
+++ b/target/arm/tcg/translate-vfp.c
@@ -0,0 +1,3415 @@
+/*
+ * ARM translation: AArch32 VFP instructions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2005-2007 CodeSourcery
+ * Copyright (c) 2007 OpenedHand, Ltd.
+ * Copyright (c) 2019 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "translate.h"
+#include "translate-a32.h"
+
+/* Include the generated VFP decoder */
+#include "decode-vfp.c.inc"
+#include "decode-vfp-uncond.c.inc"
+
+static inline void vfp_load_reg64(TCGv_i64 var, int reg)
+{
+ tcg_gen_ld_i64(var, tcg_env, vfp_reg_offset(true, reg));
+}
+
+static inline void vfp_store_reg64(TCGv_i64 var, int reg)
+{
+ tcg_gen_st_i64(var, tcg_env, vfp_reg_offset(true, reg));
+}
+
+static inline void vfp_load_reg32(TCGv_i32 var, int reg)
+{
+ tcg_gen_ld_i32(var, tcg_env, vfp_reg_offset(false, reg));
+}
+
+static inline void vfp_store_reg32(TCGv_i32 var, int reg)
+{
+ tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg));
+}
+
+/*
+ * The imm8 encodes the sign bit, enough bits to represent an exponent in
+ * the range 01....1xx to 10....0xx, and the most significant 4 bits of
+ * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
+ */
+uint64_t vfp_expand_imm(int size, uint8_t imm8)
+{
+ uint64_t imm;
+
+ switch (size) {
+ case MO_64:
+ imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+ (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
+ extract32(imm8, 0, 6);
+ imm <<= 48;
+ break;
+ case MO_32:
+ imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+ (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
+ (extract32(imm8, 0, 6) << 3);
+ imm <<= 16;
+ break;
+ case MO_16:
+ imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
+ (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
+ (extract32(imm8, 0, 6) << 6);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return imm;
+}
+
+/*
+ * Return the offset of a 16-bit half of the specified VFP single-precision
+ * register. If top is true, returns the top 16 bits; otherwise the bottom
+ * 16 bits.
+ */
+static inline long vfp_f16_offset(unsigned reg, bool top)
+{
+ long offs = vfp_reg_offset(false, reg);
+#if HOST_BIG_ENDIAN
+ if (!top) {
+ offs += 2;
+ }
+#else
+ if (top) {
+ offs += 2;
+ }
+#endif
+ return offs;
+}
+
+/*
+ * Generate code for M-profile lazy FP state preservation if needed;
+ * this corresponds to the pseudocode PreserveFPState() function.
+ */
+static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
+{
+ if (s->v7m_lspact) {
+ /*
+ * Lazy state saving affects external memory and also the NVIC,
+ * so we must mark it as an IO operation for icount (and cause
+ * this to be the last insn in the TB).
+ */
+ if (translator_io_start(&s->base)) {
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
+ }
+ gen_helper_v7m_preserve_fp_state(tcg_env);
+ /*
+ * If the preserve_fp_state helper doesn't throw an exception
+ * then it will clear LSPACT; we don't need to repeat this for
+ * any further FP insns in this TB.
+ */
+ s->v7m_lspact = false;
+ /*
+ * The helper might have zeroed VPR, so we do not know the
+ * correct value for the MVE_NO_PRED TB flag any more.
+ * If we're about to create a new fp context then that
+ * will precisely determine the MVE_NO_PRED value (see
+ * gen_update_fp_context()). Otherwise, we must:
+ * - set s->mve_no_pred to false, so this instruction
+ * is generated to use helper functions
+ * - end the TB now, without chaining to the next TB
+ */
+ if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
+ s->mve_no_pred = false;
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ }
+ }
+}
+
+/*
+ * Generate code for M-profile FP context handling: update the
+ * ownership of the FP context, and create a new context if
+ * necessary. This corresponds to the parts of the pseudocode
+ * ExecuteFPCheck() after the initial PreserveFPState() call.
+ */
+static void gen_update_fp_context(DisasContext *s)
+{
+ /* Update ownership of FP context: set FPCCR.S to match current state */
+ if (s->v8m_fpccr_s_wrong) {
+ TCGv_i32 tmp;
+
+ tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
+ if (s->v8m_secure) {
+ tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
+ } else {
+ tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
+ }
+ store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
+ /* Don't need to do this for any further FP insns in this TB */
+ s->v8m_fpccr_s_wrong = false;
+ }
+
+ if (s->v7m_new_fp_ctxt_needed) {
+ /*
+ * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
+ * the FPSCR, and VPR.
+ */
+ TCGv_i32 control, fpscr;
+ uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
+
+ fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
+ gen_helper_vfp_set_fpscr(tcg_env, fpscr);
+ if (dc_isar_feature(aa32_mve, s)) {
+ store_cpu_field(tcg_constant_i32(0), v7m.vpr);
+ }
+ /*
+ * We just updated the FPSCR and VPR. Some of this state is cached
+ * in the MVE_NO_PRED TB flag. We want to avoid having to end the
+ * TB here, which means we need the new value of the MVE_NO_PRED
+ * flag to be exactly known here and the same for all executions.
+ * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
+ * always set to 0, so the new MVE_NO_PRED flag is always 1
+ * if and only if we have MVE.
+ *
+ * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
+ * but those do not exist for M-profile, so are not relevant here.)
+ */
+ s->mve_no_pred = dc_isar_feature(aa32_mve, s);
+
+ if (s->v8m_secure) {
+ bits |= R_V7M_CONTROL_SFPA_MASK;
+ }
+ control = load_cpu_field(v7m.control[M_REG_S]);
+ tcg_gen_ori_i32(control, control, bits);
+ store_cpu_field(control, v7m.control[M_REG_S]);
+ /* Don't need to do this for any further FP insns in this TB */
+ s->v7m_new_fp_ctxt_needed = false;
+ }
+}
+
+/*
+ * Check that VFP access is enabled, A-profile specific version.
+ *
+ * If VFP is enabled, return true. If not, emit code to generate an
+ * appropriate exception and return false.
+ * The ignore_vfp_enabled argument specifies that we should ignore
+ * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
+ * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
+ */
+static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
+{
+ if (s->fp_excp_el) {
+ /*
+ * The full syndrome is only used for HSR when HCPTR traps:
+ * For v8, when TA==0, coproc is RES0.
+ * For v7, any use of a Floating-point instruction or access
+ * to a Floating-point Extension register that is trapped to
+ * Hyp mode because of a trap configured in the HCPTR sets
+ * this field to 0xA.
+ */
+ int coproc = arm_dc_feature(s, ARM_FEATURE_V8) ? 0 : 0xa;
+ uint32_t syn = syn_fp_access_trap(1, 0xe, false, coproc);
+
+ gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el);
+ return false;
+ }
+
+ /*
+ * Note that rebuild_hflags_a32 has already accounted for being in EL0
+ * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
+ * appear to be any insns which touch VFP which are allowed.
+ */
+ if (s->sme_trap_nonstreaming) {
+ gen_exception_insn(s, 0, EXCP_UDEF,
+ syn_smetrap(SME_ET_Streaming,
+ curr_insn_len(s) == 2));
+ return false;
+ }
+
+ if (!s->vfp_enabled && !ignore_vfp_enabled) {
+ assert(!arm_dc_feature(s, ARM_FEATURE_M));
+ unallocated_encoding(s);
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Check that VFP access is enabled, M-profile specific version.
+ *
+ * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
+ * return true. If not, emit code to generate an appropriate exception and
+ * return false.
+ * skip_context_update is true to skip the "update FP context" part of this.
+ */
+bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
+{
+ if (s->fp_excp_el) {
+ /*
+ * M-profile mostly catches the "FPU disabled" case early, in
+ * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
+ * which do coprocessor-checks are outside the large ranges of
+ * the encoding space handled by the patterns in m-nocp.decode,
+ * and for them we may need to raise NOCP here.
+ */
+ gen_exception_insn_el(s, 0, EXCP_NOCP,
+ syn_uncategorized(), s->fp_excp_el);
+ return false;
+ }
+
+ /* Handle M-profile lazy FP state mechanics */
+
+ /* Trigger lazy-state preservation if necessary */
+ gen_preserve_fp_state(s, skip_context_update);
+
+ if (!skip_context_update) {
+ /* Update ownership of FP context and create new FP context if needed */
+ gen_update_fp_context(s);
+ }
+
+ return true;
+}
+
+/*
+ * The most usual kind of VFP access check, for everything except
+ * FMXR/FMRX to the always-available special registers.
+ */
+bool vfp_access_check(DisasContext *s)
+{
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ return vfp_access_check_m(s, false);
+ } else {
+ return vfp_access_check_a(s, false);
+ }
+}
+
+static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
+{
+ uint32_t rd, rn, rm;
+ int sz = a->sz;
+
+ if (!dc_isar_feature(aa32_vsel, s)) {
+ return false;
+ }
+
+ if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vm | a->vn | a->vd) & 0x10)) {
+ return false;
+ }
+
+ rd = a->vd;
+ rn = a->vn;
+ rm = a->vm;
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (sz == 3) {
+ TCGv_i64 frn, frm, dest;
+ TCGv_i64 tmp, zero, zf, nf, vf;
+
+ zero = tcg_constant_i64(0);
+
+ frn = tcg_temp_new_i64();
+ frm = tcg_temp_new_i64();
+ dest = tcg_temp_new_i64();
+
+ zf = tcg_temp_new_i64();
+ nf = tcg_temp_new_i64();
+ vf = tcg_temp_new_i64();
+
+ tcg_gen_extu_i32_i64(zf, cpu_ZF);
+ tcg_gen_ext_i32_i64(nf, cpu_NF);
+ tcg_gen_ext_i32_i64(vf, cpu_VF);
+
+ vfp_load_reg64(frn, rn);
+ vfp_load_reg64(frm, rm);
+ switch (a->cc) {
+ case 0: /* eq: Z */
+ tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm);
+ break;
+ case 1: /* vs: V */
+ tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm);
+ break;
+ case 2: /* ge: N == V -> N ^ V == 0 */
+ tmp = tcg_temp_new_i64();
+ tcg_gen_xor_i64(tmp, vf, nf);
+ tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm);
+ break;
+ case 3: /* gt: !Z && N == V */
+ tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm);
+ tmp = tcg_temp_new_i64();
+ tcg_gen_xor_i64(tmp, vf, nf);
+ tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm);
+ break;
+ }
+ vfp_store_reg64(dest, rd);
+ } else {
+ TCGv_i32 frn, frm, dest;
+ TCGv_i32 tmp, zero;
+
+ zero = tcg_constant_i32(0);
+
+ frn = tcg_temp_new_i32();
+ frm = tcg_temp_new_i32();
+ dest = tcg_temp_new_i32();
+ vfp_load_reg32(frn, rn);
+ vfp_load_reg32(frm, rm);
+ switch (a->cc) {
+ case 0: /* eq: Z */
+ tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm);
+ break;
+ case 1: /* vs: V */
+ tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm);
+ break;
+ case 2: /* ge: N == V -> N ^ V == 0 */
+ tmp = tcg_temp_new_i32();
+ tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+ tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm);
+ break;
+ case 3: /* gt: !Z && N == V */
+ tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm);
+ tmp = tcg_temp_new_i32();
+ tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
+ tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm);
+ break;
+ }
+ /* For fp16 the top half is always zeroes */
+ if (sz == 1) {
+ tcg_gen_andi_i32(dest, dest, 0xffff);
+ }
+ vfp_store_reg32(dest, rd);
+ }
+
+ return true;
+}
+
+/*
+ * Table for converting the most common AArch32 encoding of
+ * rounding mode to arm_fprounding order (which matches the
+ * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
+ */
+static const uint8_t fp_decode_rm[] = {
+ FPROUNDING_TIEAWAY,
+ FPROUNDING_TIEEVEN,
+ FPROUNDING_POSINF,
+ FPROUNDING_NEGINF,
+};
+
+static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
+{
+ uint32_t rd, rm;
+ int sz = a->sz;
+ TCGv_ptr fpst;
+ TCGv_i32 tcg_rmode;
+ int rounding = fp_decode_rm[a->rm];
+
+ if (!dc_isar_feature(aa32_vrint, s)) {
+ return false;
+ }
+
+ if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vm | a->vd) & 0x10)) {
+ return false;
+ }
+
+ rd = a->vd;
+ rm = a->vm;
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (sz == 1) {
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ } else {
+ fpst = fpstatus_ptr(FPST_FPCR);
+ }
+
+ tcg_rmode = gen_set_rmode(rounding, fpst);
+
+ if (sz == 3) {
+ TCGv_i64 tcg_op;
+ TCGv_i64 tcg_res;
+ tcg_op = tcg_temp_new_i64();
+ tcg_res = tcg_temp_new_i64();
+ vfp_load_reg64(tcg_op, rm);
+ gen_helper_rintd(tcg_res, tcg_op, fpst);
+ vfp_store_reg64(tcg_res, rd);
+ } else {
+ TCGv_i32 tcg_op;
+ TCGv_i32 tcg_res;
+ tcg_op = tcg_temp_new_i32();
+ tcg_res = tcg_temp_new_i32();
+ vfp_load_reg32(tcg_op, rm);
+ if (sz == 1) {
+ gen_helper_rinth(tcg_res, tcg_op, fpst);
+ } else {
+ gen_helper_rints(tcg_res, tcg_op, fpst);
+ }
+ vfp_store_reg32(tcg_res, rd);
+ }
+
+ gen_restore_rmode(tcg_rmode, fpst);
+ return true;
+}
+
+static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
+{
+ uint32_t rd, rm;
+ int sz = a->sz;
+ TCGv_ptr fpst;
+ TCGv_i32 tcg_rmode, tcg_shift;
+ int rounding = fp_decode_rm[a->rm];
+ bool is_signed = a->op;
+
+ if (!dc_isar_feature(aa32_vcvt_dr, s)) {
+ return false;
+ }
+
+ if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+ return false;
+ }
+
+ rd = a->vd;
+ rm = a->vm;
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (sz == 1) {
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ } else {
+ fpst = fpstatus_ptr(FPST_FPCR);
+ }
+
+ tcg_shift = tcg_constant_i32(0);
+ tcg_rmode = gen_set_rmode(rounding, fpst);
+
+ if (sz == 3) {
+ TCGv_i64 tcg_double, tcg_res;
+ TCGv_i32 tcg_tmp;
+ tcg_double = tcg_temp_new_i64();
+ tcg_res = tcg_temp_new_i64();
+ tcg_tmp = tcg_temp_new_i32();
+ vfp_load_reg64(tcg_double, rm);
+ if (is_signed) {
+ gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
+ } else {
+ gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
+ }
+ tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
+ vfp_store_reg32(tcg_tmp, rd);
+ } else {
+ TCGv_i32 tcg_single, tcg_res;
+ tcg_single = tcg_temp_new_i32();
+ tcg_res = tcg_temp_new_i32();
+ vfp_load_reg32(tcg_single, rm);
+ if (sz == 1) {
+ if (is_signed) {
+ gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
+ } else {
+ gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
+ }
+ } else {
+ if (is_signed) {
+ gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
+ } else {
+ gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
+ }
+ }
+ vfp_store_reg32(tcg_res, rd);
+ }
+
+ gen_restore_rmode(tcg_rmode, fpst);
+ return true;
+}
+
+bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
+{
+ /*
+ * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
+ * and VMOV (general-purpose register to vector lane) insns are not
+ * predicated, but they are subject to beatwise execution if they are
+ * not in an IT block.
+ *
+ * Since our implementation always executes all 4 beats in one tick,
+ * this means only that if PSR.ECI says we should not be executing
+ * the beat corresponding to the lane of the vector register being
+ * accessed then we should skip performing the move, and that we need
+ * to do the usual check for bad ECI state and advance of ECI state.
+ *
+ * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
+ *
+ * Return true if this VMOV scalar <-> gpreg should be skipped because
+ * the MVE PSR.ECI state says we skip the beat where the store happens.
+ */
+
+ /* Calculate the byte offset into Qn which we're going to access */
+ int ofs = (index << size) + ((vn & 1) * 8);
+
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ switch (s->eci) {
+ case ECI_NONE:
+ return false;
+ case ECI_A0:
+ return ofs < 4;
+ case ECI_A0A1:
+ return ofs < 8;
+ case ECI_A0A1A2:
+ case ECI_A0A1A2B0:
+ return ofs < 12;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
+{
+ /* VMOV scalar to general purpose register */
+ TCGv_i32 tmp;
+
+ /*
+ * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
+ * all sizes, whether the CPU has fp or not.
+ */
+ if (!dc_isar_feature(aa32_mve, s)) {
+ if (a->size == MO_32
+ ? !dc_isar_feature(aa32_fpsp_v2, s)
+ : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
+ return false;
+ }
+
+ if (dc_isar_feature(aa32_mve, s)) {
+ if (!mve_eci_check(s)) {
+ return true;
+ }
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vn, a->index,
+ a->size | (a->u ? 0 : MO_SIGN));
+ store_reg(s, a->rt, tmp);
+ }
+
+ if (dc_isar_feature(aa32_mve, s)) {
+ mve_update_and_store_eci(s);
+ }
+ return true;
+}
+
+static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
+{
+ /* VMOV general purpose register to scalar */
+ TCGv_i32 tmp;
+
+ /*
+ * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
+ * all sizes, whether the CPU has fp or not.
+ */
+ if (!dc_isar_feature(aa32_mve, s)) {
+ if (a->size == MO_32
+ ? !dc_isar_feature(aa32_fpsp_v2, s)
+ : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
+ return false;
+ }
+
+ if (dc_isar_feature(aa32_mve, s)) {
+ if (!mve_eci_check(s)) {
+ return true;
+ }
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
+ tmp = load_reg(s, a->rt);
+ write_neon_element32(tmp, a->vn, a->index, a->size);
+ }
+
+ if (dc_isar_feature(aa32_mve, s)) {
+ mve_update_and_store_eci(s);
+ }
+ return true;
+}
+
+static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
+{
+ /* VDUP (general purpose register) */
+ TCGv_i32 tmp;
+ int size, vec_size;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
+ return false;
+ }
+
+ if (a->b && a->e) {
+ return false;
+ }
+
+ if (a->q && (a->vn & 1)) {
+ return false;
+ }
+
+ vec_size = a->q ? 16 : 8;
+ if (a->b) {
+ size = 0;
+ } else if (a->e) {
+ size = 1;
+ } else {
+ size = 2;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = load_reg(s, a->rt);
+ tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
+ vec_size, vec_size, tmp);
+ return true;
+}
+
+static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
+{
+ TCGv_i32 tmp;
+ bool ignore_vfp_enabled = false;
+
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ /* M profile version was already handled in m-nocp.decode */
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+
+ switch (a->reg) {
+ case ARM_VFP_FPSID:
+ /*
+ * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
+ * all ID registers to privileged access only.
+ */
+ if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
+ return false;
+ }
+ ignore_vfp_enabled = true;
+ break;
+ case ARM_VFP_MVFR0:
+ case ARM_VFP_MVFR1:
+ if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
+ return false;
+ }
+ ignore_vfp_enabled = true;
+ break;
+ case ARM_VFP_MVFR2:
+ if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+ ignore_vfp_enabled = true;
+ break;
+ case ARM_VFP_FPSCR:
+ break;
+ case ARM_VFP_FPEXC:
+ if (IS_USER(s)) {
+ return false;
+ }
+ ignore_vfp_enabled = true;
+ break;
+ case ARM_VFP_FPINST:
+ case ARM_VFP_FPINST2:
+ /* Not present in VFPv3 */
+ if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ /*
+ * Call vfp_access_check_a() directly, because we need to tell
+ * it to ignore FPEXC.EN for some register accesses.
+ */
+ if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
+ return true;
+ }
+
+ if (a->l) {
+ /* VMRS, move VFP special register to gp register */
+ switch (a->reg) {
+ case ARM_VFP_MVFR0:
+ case ARM_VFP_MVFR1:
+ case ARM_VFP_MVFR2:
+ case ARM_VFP_FPSID:
+ if (s->current_el == 1) {
+ gen_set_condexec(s);
+ gen_update_pc(s, 0);
+ gen_helper_check_hcr_el2_trap(tcg_env,
+ tcg_constant_i32(a->rt),
+ tcg_constant_i32(a->reg));
+ }
+ /* fall through */
+ case ARM_VFP_FPEXC:
+ case ARM_VFP_FPINST:
+ case ARM_VFP_FPINST2:
+ tmp = load_cpu_field(vfp.xregs[a->reg]);
+ break;
+ case ARM_VFP_FPSCR:
+ if (a->rt == 15) {
+ tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+ tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
+ } else {
+ tmp = tcg_temp_new_i32();
+ gen_helper_vfp_get_fpscr(tmp, tcg_env);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (a->rt == 15) {
+ /* Set the 4 flag bits in the CPSR. */
+ gen_set_nzcv(tmp);
+ } else {
+ store_reg(s, a->rt, tmp);
+ }
+ } else {
+ /* VMSR, move gp register to VFP special register */
+ switch (a->reg) {
+ case ARM_VFP_FPSID:
+ case ARM_VFP_MVFR0:
+ case ARM_VFP_MVFR1:
+ case ARM_VFP_MVFR2:
+ /* Writes are ignored. */
+ break;
+ case ARM_VFP_FPSCR:
+ tmp = load_reg(s, a->rt);
+ gen_helper_vfp_set_fpscr(tcg_env, tmp);
+ gen_lookup_tb(s);
+ break;
+ case ARM_VFP_FPEXC:
+ /*
+ * TODO: VFP subarchitecture support.
+ * For now, keep the EN bit only
+ */
+ tmp = load_reg(s, a->rt);
+ tcg_gen_andi_i32(tmp, tmp, 1 << 30);
+ store_cpu_field(tmp, vfp.xregs[a->reg]);
+ gen_lookup_tb(s);
+ break;
+ case ARM_VFP_FPINST:
+ case ARM_VFP_FPINST2:
+ tmp = load_reg(s, a->rt);
+ store_cpu_field(tmp, vfp.xregs[a->reg]);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ return true;
+}
+
+
+static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
+{
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (a->rt == 15) {
+ /* UNPREDICTABLE; we choose to UNDEF */
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (a->l) {
+ /* VFP to general purpose register */
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vn);
+ tcg_gen_andi_i32(tmp, tmp, 0xffff);
+ store_reg(s, a->rt, tmp);
+ } else {
+ /* general purpose register to VFP */
+ tmp = load_reg(s, a->rt);
+ tcg_gen_andi_i32(tmp, tmp, 0xffff);
+ vfp_store_reg32(tmp, a->vn);
+ }
+
+ return true;
+}
+
+static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
+{
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (a->l) {
+ /* VFP to general purpose register */
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vn);
+ if (a->rt == 15) {
+ /* Set the 4 flag bits in the CPSR. */
+ gen_set_nzcv(tmp);
+ } else {
+ store_reg(s, a->rt, tmp);
+ }
+ } else {
+ /* general purpose register to VFP */
+ tmp = load_reg(s, a->rt);
+ vfp_store_reg32(tmp, a->vn);
+ }
+
+ return true;
+}
+
+static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
+{
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ /*
+ * VMOV between two general-purpose registers and two single precision
+ * floating point registers
+ */
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (a->op) {
+ /* fpreg to gpreg */
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm);
+ store_reg(s, a->rt, tmp);
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm + 1);
+ store_reg(s, a->rt2, tmp);
+ } else {
+ /* gpreg to fpreg */
+ tmp = load_reg(s, a->rt);
+ vfp_store_reg32(tmp, a->vm);
+ tmp = load_reg(s, a->rt2);
+ vfp_store_reg32(tmp, a->vm + 1);
+ }
+
+ return true;
+}
+
+static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
+{
+ TCGv_i32 tmp;
+
+ /*
+ * VMOV between two general-purpose registers and one double precision
+ * floating point register. Note that this does not require support
+ * for double precision arithmetic.
+ */
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (a->op) {
+ /* fpreg to gpreg */
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm * 2);
+ store_reg(s, a->rt, tmp);
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm * 2 + 1);
+ store_reg(s, a->rt2, tmp);
+ } else {
+ /* gpreg to fpreg */
+ tmp = load_reg(s, a->rt);
+ vfp_store_reg32(tmp, a->vm * 2);
+ tmp = load_reg(s, a->rt2);
+ vfp_store_reg32(tmp, a->vm * 2 + 1);
+ }
+
+ return true;
+}
+
+static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
+{
+ uint32_t offset;
+ TCGv_i32 addr, tmp;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
+ offset = a->imm << 1;
+ if (!a->u) {
+ offset = -offset;
+ }
+
+ /* For thumb, use of PC is UNPREDICTABLE. */
+ addr = add_reg_for_lit(s, a->rn, offset);
+ tmp = tcg_temp_new_i32();
+ if (a->l) {
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
+ vfp_store_reg32(tmp, a->vd);
+ } else {
+ vfp_load_reg32(tmp, a->vd);
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
+ }
+ return true;
+}
+
+static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
+{
+ uint32_t offset;
+ TCGv_i32 addr, tmp;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ offset = a->imm << 2;
+ if (!a->u) {
+ offset = -offset;
+ }
+
+ /* For thumb, use of PC is UNPREDICTABLE. */
+ addr = add_reg_for_lit(s, a->rn, offset);
+ tmp = tcg_temp_new_i32();
+ if (a->l) {
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+ vfp_store_reg32(tmp, a->vd);
+ } else {
+ vfp_load_reg32(tmp, a->vd);
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+ }
+ return true;
+}
+
+static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
+{
+ uint32_t offset;
+ TCGv_i32 addr;
+ TCGv_i64 tmp;
+
+ /* Note that this does not require support for double arithmetic. */
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ offset = a->imm << 2;
+ if (!a->u) {
+ offset = -offset;
+ }
+
+ /* For thumb, use of PC is UNPREDICTABLE. */
+ addr = add_reg_for_lit(s, a->rn, offset);
+ tmp = tcg_temp_new_i64();
+ if (a->l) {
+ gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
+ vfp_store_reg64(tmp, a->vd);
+ } else {
+ vfp_load_reg64(tmp, a->vd);
+ gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
+ }
+ return true;
+}
+
+static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
+{
+ uint32_t offset;
+ TCGv_i32 addr, tmp;
+ int i, n;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ n = a->imm;
+
+ if (n == 0 || (a->vd + n) > 32) {
+ /*
+ * UNPREDICTABLE cases for bad immediates: we choose to
+ * UNDEF to avoid generating huge numbers of TCG ops
+ */
+ return false;
+ }
+ if (a->rn == 15 && a->w) {
+ /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
+ return false;
+ }
+
+ s->eci_handled = true;
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* For thumb, use of PC is UNPREDICTABLE. */
+ addr = add_reg_for_lit(s, a->rn, 0);
+ if (a->p) {
+ /* pre-decrement */
+ tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
+ }
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ /*
+ * Here 'addr' is the lowest address we will store to,
+ * and is either the old SP (if post-increment) or
+ * the new SP (if pre-decrement). For post-increment
+ * where the old value is below the limit and the new
+ * value is above, it is UNKNOWN whether the limit check
+ * triggers; we choose to trigger.
+ */
+ gen_helper_v8m_stackcheck(tcg_env, addr);
+ }
+
+ offset = 4;
+ tmp = tcg_temp_new_i32();
+ for (i = 0; i < n; i++) {
+ if (a->l) {
+ /* load */
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+ vfp_store_reg32(tmp, a->vd + i);
+ } else {
+ /* store */
+ vfp_load_reg32(tmp, a->vd + i);
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+ }
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ if (a->w) {
+ /* writeback */
+ if (a->p) {
+ offset = -offset * n;
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ store_reg(s, a->rn, addr);
+ }
+
+ clear_eci_state(s);
+ return true;
+}
+
+static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
+{
+ uint32_t offset;
+ TCGv_i32 addr;
+ TCGv_i64 tmp;
+ int i, n;
+
+ /* Note that this does not require support for double arithmetic. */
+ if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ n = a->imm >> 1;
+
+ if (n == 0 || (a->vd + n) > 32 || n > 16) {
+ /*
+ * UNPREDICTABLE cases for bad immediates: we choose to
+ * UNDEF to avoid generating huge numbers of TCG ops
+ */
+ return false;
+ }
+ if (a->rn == 15 && a->w) {
+ /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
+ return false;
+ }
+
+ s->eci_handled = true;
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* For thumb, use of PC is UNPREDICTABLE. */
+ addr = add_reg_for_lit(s, a->rn, 0);
+ if (a->p) {
+ /* pre-decrement */
+ tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
+ }
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ /*
+ * Here 'addr' is the lowest address we will store to,
+ * and is either the old SP (if post-increment) or
+ * the new SP (if pre-decrement). For post-increment
+ * where the old value is below the limit and the new
+ * value is above, it is UNKNOWN whether the limit check
+ * triggers; we choose to trigger.
+ */
+ gen_helper_v8m_stackcheck(tcg_env, addr);
+ }
+
+ offset = 8;
+ tmp = tcg_temp_new_i64();
+ for (i = 0; i < n; i++) {
+ if (a->l) {
+ /* load */
+ gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
+ vfp_store_reg64(tmp, a->vd + i);
+ } else {
+ /* store */
+ vfp_load_reg64(tmp, a->vd + i);
+ gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
+ }
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ if (a->w) {
+ /* writeback */
+ if (a->p) {
+ offset = -offset * n;
+ } else if (a->imm & 1) {
+ offset = 4;
+ } else {
+ offset = 0;
+ }
+
+ if (offset != 0) {
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ store_reg(s, a->rn, addr);
+ }
+
+ clear_eci_state(s);
+ return true;
+}
+
+/*
+ * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
+ * The callback should emit code to write a value to vd. If
+ * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
+ * will contain the old value of the relevant VFP register;
+ * otherwise it must be written to only.
+ */
+typedef void VFPGen3OpSPFn(TCGv_i32 vd,
+ TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
+typedef void VFPGen3OpDPFn(TCGv_i64 vd,
+ TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
+
+/*
+ * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
+ * The callback should emit code to write a value to vd (which
+ * should be written to only).
+ */
+typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
+typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
+
+/*
+ * Return true if the specified S reg is in a scalar bank
+ * (ie if it is s0..s7)
+ */
+static inline bool vfp_sreg_is_scalar(int reg)
+{
+ return (reg & 0x18) == 0;
+}
+
+/*
+ * Return true if the specified D reg is in a scalar bank
+ * (ie if it is d0..d3 or d16..d19)
+ */
+static inline bool vfp_dreg_is_scalar(int reg)
+{
+ return (reg & 0xc) == 0;
+}
+
+/*
+ * Advance the S reg number forwards by delta within its bank
+ * (ie increment the low 3 bits but leave the rest the same)
+ */
+static inline int vfp_advance_sreg(int reg, int delta)
+{
+ return ((reg + delta) & 0x7) | (reg & ~0x7);
+}
+
+/*
+ * Advance the D reg number forwards by delta within its bank
+ * (ie increment the low 2 bits but leave the rest the same)
+ */
+static inline int vfp_advance_dreg(int reg, int delta)
+{
+ return ((reg + delta) & 0x3) | (reg & ~0x3);
+}
+
+/*
+ * Perform a 3-operand VFP data processing instruction. fn is the
+ * callback to do the actual operation; this function deals with the
+ * code to handle looping around for VFP vector processing.
+ */
+static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
+ int vd, int vn, int vm, bool reads_vd)
+{
+ uint32_t delta_m = 0;
+ uint32_t delta_d = 0;
+ int veclen = s->vec_len;
+ TCGv_i32 f0, f1, fd;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
+ (veclen != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (veclen > 0) {
+ /* Figure out what type of vector operation this is. */
+ if (vfp_sreg_is_scalar(vd)) {
+ /* scalar */
+ veclen = 0;
+ } else {
+ delta_d = s->vec_stride + 1;
+
+ if (vfp_sreg_is_scalar(vm)) {
+ /* mixed scalar/vector */
+ delta_m = 0;
+ } else {
+ /* vector */
+ delta_m = delta_d;
+ }
+ }
+ }
+
+ f0 = tcg_temp_new_i32();
+ f1 = tcg_temp_new_i32();
+ fd = tcg_temp_new_i32();
+ fpst = fpstatus_ptr(FPST_FPCR);
+
+ vfp_load_reg32(f0, vn);
+ vfp_load_reg32(f1, vm);
+
+ for (;;) {
+ if (reads_vd) {
+ vfp_load_reg32(fd, vd);
+ }
+ fn(fd, f0, f1, fpst);
+ vfp_store_reg32(fd, vd);
+
+ if (veclen == 0) {
+ break;
+ }
+
+ /* Set up the operands for the next iteration */
+ veclen--;
+ vd = vfp_advance_sreg(vd, delta_d);
+ vn = vfp_advance_sreg(vn, delta_d);
+ vfp_load_reg32(f0, vn);
+ if (delta_m) {
+ vm = vfp_advance_sreg(vm, delta_m);
+ vfp_load_reg32(f1, vm);
+ }
+ }
+ return true;
+}
+
+static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
+ int vd, int vn, int vm, bool reads_vd)
+{
+ /*
+ * Do a half-precision operation. Functionally this is
+ * the same as do_vfp_3op_sp(), except:
+ * - it uses the FPST_FPCR_F16
+ * - it doesn't need the VFP vector handling (fp16 is a
+ * v8 feature, and in v8 VFP vectors don't exist)
+ * - it does the aa32_fp16_arith feature test
+ */
+ TCGv_i32 f0, f1, fd;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ f0 = tcg_temp_new_i32();
+ f1 = tcg_temp_new_i32();
+ fd = tcg_temp_new_i32();
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+
+ vfp_load_reg32(f0, vn);
+ vfp_load_reg32(f1, vm);
+
+ if (reads_vd) {
+ vfp_load_reg32(fd, vd);
+ }
+ fn(fd, f0, f1, fpst);
+ vfp_store_reg32(fd, vd);
+ return true;
+}
+
+static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
+ int vd, int vn, int vm, bool reads_vd)
+{
+ uint32_t delta_m = 0;
+ uint32_t delta_d = 0;
+ int veclen = s->vec_len;
+ TCGv_i64 f0, f1, fd;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
+ (veclen != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (veclen > 0) {
+ /* Figure out what type of vector operation this is. */
+ if (vfp_dreg_is_scalar(vd)) {
+ /* scalar */
+ veclen = 0;
+ } else {
+ delta_d = (s->vec_stride >> 1) + 1;
+
+ if (vfp_dreg_is_scalar(vm)) {
+ /* mixed scalar/vector */
+ delta_m = 0;
+ } else {
+ /* vector */
+ delta_m = delta_d;
+ }
+ }
+ }
+
+ f0 = tcg_temp_new_i64();
+ f1 = tcg_temp_new_i64();
+ fd = tcg_temp_new_i64();
+ fpst = fpstatus_ptr(FPST_FPCR);
+
+ vfp_load_reg64(f0, vn);
+ vfp_load_reg64(f1, vm);
+
+ for (;;) {
+ if (reads_vd) {
+ vfp_load_reg64(fd, vd);
+ }
+ fn(fd, f0, f1, fpst);
+ vfp_store_reg64(fd, vd);
+
+ if (veclen == 0) {
+ break;
+ }
+ /* Set up the operands for the next iteration */
+ veclen--;
+ vd = vfp_advance_dreg(vd, delta_d);
+ vn = vfp_advance_dreg(vn, delta_d);
+ vfp_load_reg64(f0, vn);
+ if (delta_m) {
+ vm = vfp_advance_dreg(vm, delta_m);
+ vfp_load_reg64(f1, vm);
+ }
+ }
+ return true;
+}
+
+static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
+{
+ uint32_t delta_m = 0;
+ uint32_t delta_d = 0;
+ int veclen = s->vec_len;
+ TCGv_i32 f0, fd;
+
+ /* Note that the caller must check the aa32_fpsp_v2 feature. */
+
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
+ (veclen != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (veclen > 0) {
+ /* Figure out what type of vector operation this is. */
+ if (vfp_sreg_is_scalar(vd)) {
+ /* scalar */
+ veclen = 0;
+ } else {
+ delta_d = s->vec_stride + 1;
+
+ if (vfp_sreg_is_scalar(vm)) {
+ /* mixed scalar/vector */
+ delta_m = 0;
+ } else {
+ /* vector */
+ delta_m = delta_d;
+ }
+ }
+ }
+
+ f0 = tcg_temp_new_i32();
+ fd = tcg_temp_new_i32();
+
+ vfp_load_reg32(f0, vm);
+
+ for (;;) {
+ fn(fd, f0);
+ vfp_store_reg32(fd, vd);
+
+ if (veclen == 0) {
+ break;
+ }
+
+ if (delta_m == 0) {
+ /* single source one-many */
+ while (veclen--) {
+ vd = vfp_advance_sreg(vd, delta_d);
+ vfp_store_reg32(fd, vd);
+ }
+ break;
+ }
+
+ /* Set up the operands for the next iteration */
+ veclen--;
+ vd = vfp_advance_sreg(vd, delta_d);
+ vm = vfp_advance_sreg(vm, delta_m);
+ vfp_load_reg32(f0, vm);
+ }
+ return true;
+}
+
+static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
+{
+ /*
+ * Do a half-precision operation. Functionally this is
+ * the same as do_vfp_2op_sp(), except:
+ * - it doesn't need the VFP vector handling (fp16 is a
+ * v8 feature, and in v8 VFP vectors don't exist)
+ * - it does the aa32_fp16_arith feature test
+ */
+ TCGv_i32 f0;
+
+ /* Note that the caller must check the aa32_fp16_arith feature */
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ f0 = tcg_temp_new_i32();
+ vfp_load_reg32(f0, vm);
+ fn(f0, f0);
+ vfp_store_reg32(f0, vd);
+
+ return true;
+}
+
+static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
+{
+ uint32_t delta_m = 0;
+ uint32_t delta_d = 0;
+ int veclen = s->vec_len;
+ TCGv_i64 f0, fd;
+
+ /* Note that the caller must check the aa32_fpdp_v2 feature. */
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
+ (veclen != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (veclen > 0) {
+ /* Figure out what type of vector operation this is. */
+ if (vfp_dreg_is_scalar(vd)) {
+ /* scalar */
+ veclen = 0;
+ } else {
+ delta_d = (s->vec_stride >> 1) + 1;
+
+ if (vfp_dreg_is_scalar(vm)) {
+ /* mixed scalar/vector */
+ delta_m = 0;
+ } else {
+ /* vector */
+ delta_m = delta_d;
+ }
+ }
+ }
+
+ f0 = tcg_temp_new_i64();
+ fd = tcg_temp_new_i64();
+
+ vfp_load_reg64(f0, vm);
+
+ for (;;) {
+ fn(fd, f0);
+ vfp_store_reg64(fd, vd);
+
+ if (veclen == 0) {
+ break;
+ }
+
+ if (delta_m == 0) {
+ /* single source one-many */
+ while (veclen--) {
+ vd = vfp_advance_dreg(vd, delta_d);
+ vfp_store_reg64(fd, vd);
+ }
+ break;
+ }
+
+ /* Set up the operands for the next iteration */
+ veclen--;
+ vd = vfp_advance_dreg(vd, delta_d);
+ vd = vfp_advance_dreg(vm, delta_m);
+ vfp_load_reg64(f0, vm);
+ }
+ return true;
+}
+
+static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* Note that order of inputs to the add matters for NaNs */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+}
+
+static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* Note that order of inputs to the add matters for NaNs */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_muls(tmp, vn, vm, fpst);
+ gen_helper_vfp_adds(vd, vd, tmp, fpst);
+}
+
+static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+ /* Note that order of inputs to the add matters for NaNs */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ gen_helper_vfp_muld(tmp, vn, vm, fpst);
+ gen_helper_vfp_addd(vd, vd, tmp, fpst);
+}
+
+static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /*
+ * VMLS: vd = vd + -(vn * vm)
+ * Note that order of inputs to the add matters for NaNs.
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(tmp, tmp);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+}
+
+static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /*
+ * VMLS: vd = vd + -(vn * vm)
+ * Note that order of inputs to the add matters for NaNs.
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_muls(tmp, vn, vm, fpst);
+ gen_helper_vfp_negs(tmp, tmp);
+ gen_helper_vfp_adds(vd, vd, tmp, fpst);
+}
+
+static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+ /*
+ * VMLS: vd = vd + -(vn * vm)
+ * Note that order of inputs to the add matters for NaNs.
+ */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ gen_helper_vfp_muld(tmp, vn, vm, fpst);
+ gen_helper_vfp_negd(tmp, tmp);
+ gen_helper_vfp_addd(vd, vd, tmp, fpst);
+}
+
+static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /*
+ * VNMLS: -fd + (fn * fm)
+ * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+ * plausible looking simplifications because this will give wrong results
+ * for NaNs.
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(vd, vd);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+}
+
+static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /*
+ * VNMLS: -fd + (fn * fm)
+ * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+ * plausible looking simplifications because this will give wrong results
+ * for NaNs.
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_muls(tmp, vn, vm, fpst);
+ gen_helper_vfp_negs(vd, vd);
+ gen_helper_vfp_adds(vd, vd, tmp, fpst);
+}
+
+static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+ /*
+ * VNMLS: -fd + (fn * fm)
+ * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+ * plausible looking simplifications because this will give wrong results
+ * for NaNs.
+ */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ gen_helper_vfp_muld(tmp, vn, vm, fpst);
+ gen_helper_vfp_negd(vd, vd);
+ gen_helper_vfp_addd(vd, vd, tmp, fpst);
+}
+
+static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* VNMLA: -fd + -(fn * fm) */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(tmp, tmp);
+ gen_helper_vfp_negh(vd, vd);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+}
+
+static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* VNMLA: -fd + -(fn * fm) */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_muls(tmp, vn, vm, fpst);
+ gen_helper_vfp_negs(tmp, tmp);
+ gen_helper_vfp_negs(vd, vd);
+ gen_helper_vfp_adds(vd, vd, tmp, fpst);
+}
+
+static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
+}
+
+static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+ /* VNMLA: -fd + (fn * fm) */
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ gen_helper_vfp_muld(tmp, vn, vm, fpst);
+ gen_helper_vfp_negd(tmp, tmp);
+ gen_helper_vfp_negd(vd, vd);
+ gen_helper_vfp_addd(vd, vd, tmp, fpst);
+}
+
+static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
+}
+
+static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
+}
+
+static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* VNMUL: -(fn * fm) */
+ gen_helper_vfp_mulh(vd, vn, vm, fpst);
+ gen_helper_vfp_negh(vd, vd);
+}
+
+static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
+}
+
+static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* VNMUL: -(fn * fm) */
+ gen_helper_vfp_muls(vd, vn, vm, fpst);
+ gen_helper_vfp_negs(vd, vd);
+}
+
+static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
+}
+
+static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
+{
+ /* VNMUL: -(fn * fm) */
+ gen_helper_vfp_muld(vd, vn, vm, fpst);
+ gen_helper_vfp_negd(vd, vd);
+}
+
+static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
+{
+ return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
+{
+ return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
+ a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
+ a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
+ a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
+ a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
+ a->vd, a->vn, a->vm, false);
+}
+
+static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
+{
+ if (!dc_isar_feature(aa32_vminmaxnm, s)) {
+ return false;
+ }
+ return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
+ a->vd, a->vn, a->vm, false);
+}
+
+static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i32 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only, and only with the FP16 extension.
+ * Note that we can't rely on the SIMDFMAC check alone, because
+ * in a Neon-no-VFP core that ID register field will be non-zero.
+ */
+ if (!dc_isar_feature(aa32_fp16_arith, s) ||
+ !dc_isar_feature(aa32_simdfmac, s) ||
+ !dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+ vd = tcg_temp_new_i32();
+
+ vfp_load_reg32(vn, a->vn);
+ vfp_load_reg32(vm, a->vm);
+ if (neg_n) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negh(vn, vn);
+ }
+ vfp_load_reg32(vd, a->vd);
+ if (neg_d) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negh(vd, vd);
+ }
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
+ vfp_store_reg32(vd, a->vd);
+ return true;
+}
+
+static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i32 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only.
+ * Note that we can't rely on the SIMDFMAC check alone, because
+ * in a Neon-no-VFP core that ID register field will be non-zero.
+ */
+ if (!dc_isar_feature(aa32_simdfmac, s) ||
+ !dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+ /*
+ * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+ * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+ */
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+ vd = tcg_temp_new_i32();
+
+ vfp_load_reg32(vn, a->vn);
+ vfp_load_reg32(vm, a->vm);
+ if (neg_n) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negs(vn, vn);
+ }
+ vfp_load_reg32(vd, a->vd);
+ if (neg_d) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negs(vd, vd);
+ }
+ fpst = fpstatus_ptr(FPST_FPCR);
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
+ vfp_store_reg32(vd, a->vd);
+ return true;
+}
+
+static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i64 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only.
+ * Note that we can't rely on the SIMDFMAC check alone, because
+ * in a Neon-no-VFP core that ID register field will be non-zero.
+ */
+ if (!dc_isar_feature(aa32_simdfmac, s) ||
+ !dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+ /*
+ * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+ * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+ */
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i64();
+ vm = tcg_temp_new_i64();
+ vd = tcg_temp_new_i64();
+
+ vfp_load_reg64(vn, a->vn);
+ vfp_load_reg64(vm, a->vm);
+ if (neg_n) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negd(vn, vn);
+ }
+ vfp_load_reg64(vd, a->vd);
+ if (neg_d) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negd(vd, vd);
+ }
+ fpst = fpstatus_ptr(FPST_FPCR);
+ gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
+ vfp_store_reg64(vd, a->vd);
+ return true;
+}
+
+#define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \
+ static bool trans_##INSN##_##PREC(DisasContext *s, \
+ arg_##INSN##_##PREC *a) \
+ { \
+ return do_vfm_##PREC(s, a, NEGN, NEGD); \
+ }
+
+#define MAKE_VFM_TRANS_FNS(PREC) \
+ MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
+ MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
+ MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
+ MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
+
+MAKE_VFM_TRANS_FNS(hp)
+MAKE_VFM_TRANS_FNS(sp)
+MAKE_VFM_TRANS_FNS(dp)
+
+static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
+{
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd);
+ return true;
+}
+
+static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
+{
+ uint32_t delta_d = 0;
+ int veclen = s->vec_len;
+ TCGv_i32 fd;
+ uint32_t vd;
+
+ vd = a->vd;
+
+ if (!dc_isar_feature(aa32_fpsp_v3, s)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
+ (veclen != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (veclen > 0) {
+ /* Figure out what type of vector operation this is. */
+ if (vfp_sreg_is_scalar(vd)) {
+ /* scalar */
+ veclen = 0;
+ } else {
+ delta_d = s->vec_stride + 1;
+ }
+ }
+
+ fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm));
+
+ for (;;) {
+ vfp_store_reg32(fd, vd);
+
+ if (veclen == 0) {
+ break;
+ }
+
+ /* Set up the operands for the next iteration */
+ veclen--;
+ vd = vfp_advance_sreg(vd, delta_d);
+ }
+
+ return true;
+}
+
+static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
+{
+ uint32_t delta_d = 0;
+ int veclen = s->vec_len;
+ TCGv_i64 fd;
+ uint32_t vd;
+
+ vd = a->vd;
+
+ if (!dc_isar_feature(aa32_fpdp_v3, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_fpshvec, s) &&
+ (veclen != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (veclen > 0) {
+ /* Figure out what type of vector operation this is. */
+ if (vfp_dreg_is_scalar(vd)) {
+ /* scalar */
+ veclen = 0;
+ } else {
+ delta_d = (s->vec_stride >> 1) + 1;
+ }
+ }
+
+ fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm));
+
+ for (;;) {
+ vfp_store_reg64(fd, vd);
+
+ if (veclen == 0) {
+ break;
+ }
+
+ /* Set up the operands for the next iteration */
+ veclen--;
+ vd = vfp_advance_dreg(vd, delta_d);
+ }
+
+ return true;
+}
+
+#define DO_VFP_2OP(INSN, PREC, FN, CHECK) \
+ static bool trans_##INSN##_##PREC(DisasContext *s, \
+ arg_##INSN##_##PREC *a) \
+ { \
+ if (!dc_isar_feature(CHECK, s)) { \
+ return false; \
+ } \
+ return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
+ }
+
+#define DO_VFP_VMOV(INSN, PREC, FN) \
+ static bool trans_##INSN##_##PREC(DisasContext *s, \
+ arg_##INSN##_##PREC *a) \
+ { \
+ if (!dc_isar_feature(aa32_fp##PREC##_v2, s) && \
+ !dc_isar_feature(aa32_mve, s)) { \
+ return false; \
+ } \
+ return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
+ }
+
+DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
+DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
+
+DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
+DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
+DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
+
+DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
+DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
+DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
+
+static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
+{
+ gen_helper_vfp_sqrth(vd, vm, tcg_env);
+}
+
+static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
+{
+ gen_helper_vfp_sqrts(vd, vm, tcg_env);
+}
+
+static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
+{
+ gen_helper_vfp_sqrtd(vd, vm, tcg_env);
+}
+
+DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
+DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
+DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
+
+static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
+{
+ TCGv_i32 vd, vm;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ /* Vm/M bits must be zero for the Z variant */
+ if (a->z && a->vm != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vd = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+
+ vfp_load_reg32(vd, a->vd);
+ if (a->z) {
+ tcg_gen_movi_i32(vm, 0);
+ } else {
+ vfp_load_reg32(vm, a->vm);
+ }
+
+ if (a->e) {
+ gen_helper_vfp_cmpeh(vd, vm, tcg_env);
+ } else {
+ gen_helper_vfp_cmph(vd, vm, tcg_env);
+ }
+ return true;
+}
+
+static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
+{
+ TCGv_i32 vd, vm;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+
+ /* Vm/M bits must be zero for the Z variant */
+ if (a->z && a->vm != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vd = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+
+ vfp_load_reg32(vd, a->vd);
+ if (a->z) {
+ tcg_gen_movi_i32(vm, 0);
+ } else {
+ vfp_load_reg32(vm, a->vm);
+ }
+
+ if (a->e) {
+ gen_helper_vfp_cmpes(vd, vm, tcg_env);
+ } else {
+ gen_helper_vfp_cmps(vd, vm, tcg_env);
+ }
+ return true;
+}
+
+static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
+{
+ TCGv_i64 vd, vm;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ /* Vm/M bits must be zero for the Z variant */
+ if (a->z && a->vm != 0) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vd = tcg_temp_new_i64();
+ vm = tcg_temp_new_i64();
+
+ vfp_load_reg64(vd, a->vd);
+ if (a->z) {
+ tcg_gen_movi_i64(vm, 0);
+ } else {
+ vfp_load_reg64(vm, a->vm);
+ }
+
+ if (a->e) {
+ gen_helper_vfp_cmped(vd, vm, tcg_env);
+ } else {
+ gen_helper_vfp_cmpd(vd, vm, tcg_env);
+ }
+ return true;
+}
+
+static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp_mode;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ ahp_mode = get_ahp_flag();
+ tmp = tcg_temp_new_i32();
+ /* The T bit tells us if we want the low or high 16 bits of Vm */
+ tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));
+ gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
+ vfp_store_reg32(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp_mode;
+ TCGv_i32 tmp;
+ TCGv_i64 vd;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ ahp_mode = get_ahp_flag();
+ tmp = tcg_temp_new_i32();
+ /* The T bit tells us if we want the low or high 16 bits of Vm */
+ tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t));
+ vd = tcg_temp_new_i64();
+ gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
+ vfp_store_reg64(vd, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_bf16, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ tmp = tcg_temp_new_i32();
+
+ vfp_load_reg32(tmp, a->vm);
+ gen_helper_bfcvt(tmp, tmp, fpst);
+ tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
+ return true;
+}
+
+static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp_mode;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ ahp_mode = get_ahp_flag();
+ tmp = tcg_temp_new_i32();
+
+ vfp_load_reg32(tmp, a->vm);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
+ tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
+ return true;
+}
+
+static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp_mode;
+ TCGv_i32 tmp;
+ TCGv_i64 vm;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ ahp_mode = get_ahp_flag();
+ tmp = tcg_temp_new_i32();
+ vm = tcg_temp_new_i64();
+
+ vfp_load_reg64(vm, a->vm);
+ gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
+ tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t));
+ return true;
+}
+
+static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_rinth(tmp, tmp, fpst);
+ vfp_store_reg32(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_vrint, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ gen_helper_rints(tmp, tmp, fpst);
+ vfp_store_reg32(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i64 tmp;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_vrint, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i64();
+ vfp_load_reg64(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ gen_helper_rintd(tmp, tmp, fpst);
+ vfp_store_reg64(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+ TCGv_i32 tcg_rmode;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
+ gen_helper_rinth(tmp, tmp, fpst);
+ gen_restore_rmode(tcg_rmode, fpst);
+ vfp_store_reg32(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+ TCGv_i32 tcg_rmode;
+
+ if (!dc_isar_feature(aa32_vrint, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
+ gen_helper_rints(tmp, tmp, fpst);
+ gen_restore_rmode(tcg_rmode, fpst);
+ vfp_store_reg32(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i64 tmp;
+ TCGv_i32 tcg_rmode;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_vrint, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i64();
+ vfp_load_reg64(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
+ gen_helper_rintd(tmp, tmp, fpst);
+ gen_restore_rmode(tcg_rmode, fpst);
+ vfp_store_reg64(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ gen_helper_rinth_exact(tmp, tmp, fpst);
+ vfp_store_reg32(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_vrint, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i32();
+ vfp_load_reg32(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ gen_helper_rints_exact(tmp, tmp, fpst);
+ vfp_store_reg32(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i64 tmp;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_vrint, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i64();
+ vfp_load_reg64(tmp, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ gen_helper_rintd_exact(tmp, tmp, fpst);
+ vfp_store_reg64(tmp, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
+{
+ TCGv_i64 vd;
+ TCGv_i32 vm;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vm = tcg_temp_new_i32();
+ vd = tcg_temp_new_i64();
+ vfp_load_reg32(vm, a->vm);
+ gen_helper_vfp_fcvtds(vd, vm, tcg_env);
+ vfp_store_reg64(vd, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
+{
+ TCGv_i64 vm;
+ TCGv_i32 vd;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vd = tcg_temp_new_i32();
+ vm = tcg_temp_new_i64();
+ vfp_load_reg64(vm, a->vm);
+ gen_helper_vfp_fcvtsd(vd, vm, tcg_env);
+ vfp_store_reg32(vd, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
+{
+ TCGv_i32 vm;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vm = tcg_temp_new_i32();
+ vfp_load_reg32(vm, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ if (a->s) {
+ /* i32 -> f16 */
+ gen_helper_vfp_sitoh(vm, vm, fpst);
+ } else {
+ /* u32 -> f16 */
+ gen_helper_vfp_uitoh(vm, vm, fpst);
+ }
+ vfp_store_reg32(vm, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
+{
+ TCGv_i32 vm;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vm = tcg_temp_new_i32();
+ vfp_load_reg32(vm, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ if (a->s) {
+ /* i32 -> f32 */
+ gen_helper_vfp_sitos(vm, vm, fpst);
+ } else {
+ /* u32 -> f32 */
+ gen_helper_vfp_uitos(vm, vm, fpst);
+ }
+ vfp_store_reg32(vm, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
+{
+ TCGv_i32 vm;
+ TCGv_i64 vd;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vm = tcg_temp_new_i32();
+ vd = tcg_temp_new_i64();
+ vfp_load_reg32(vm, a->vm);
+ fpst = fpstatus_ptr(FPST_FPCR);
+ if (a->s) {
+ /* i32 -> f64 */
+ gen_helper_vfp_sitod(vd, vm, fpst);
+ } else {
+ /* u32 -> f64 */
+ gen_helper_vfp_uitod(vd, vm, fpst);
+ }
+ vfp_store_reg64(vd, a->vd);
+ return true;
+}
+
+static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
+{
+ TCGv_i32 vd;
+ TCGv_i64 vm;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ if (!dc_isar_feature(aa32_jscvt, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vm = tcg_temp_new_i64();
+ vd = tcg_temp_new_i32();
+ vfp_load_reg64(vm, a->vm);
+ gen_helper_vjcvt(vd, vm, tcg_env);
+ vfp_store_reg32(vd, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
+{
+ TCGv_i32 vd, shift;
+ TCGv_ptr fpst;
+ int frac_bits;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
+
+ vd = tcg_temp_new_i32();
+ vfp_load_reg32(vd, a->vd);
+
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ shift = tcg_constant_i32(frac_bits);
+
+ /* Switch on op:U:sx bits */
+ switch (a->opc) {
+ case 0:
+ gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 1:
+ gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 2:
+ gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 3:
+ gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 4:
+ gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 5:
+ gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 6:
+ gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 7:
+ gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ vfp_store_reg32(vd, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
+{
+ TCGv_i32 vd, shift;
+ TCGv_ptr fpst;
+ int frac_bits;
+
+ if (!dc_isar_feature(aa32_fpsp_v3, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
+
+ vd = tcg_temp_new_i32();
+ vfp_load_reg32(vd, a->vd);
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ shift = tcg_constant_i32(frac_bits);
+
+ /* Switch on op:U:sx bits */
+ switch (a->opc) {
+ case 0:
+ gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 1:
+ gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 2:
+ gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 3:
+ gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 4:
+ gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 5:
+ gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 6:
+ gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 7:
+ gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ vfp_store_reg32(vd, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
+{
+ TCGv_i64 vd;
+ TCGv_i32 shift;
+ TCGv_ptr fpst;
+ int frac_bits;
+
+ if (!dc_isar_feature(aa32_fpdp_v3, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
+
+ vd = tcg_temp_new_i64();
+ vfp_load_reg64(vd, a->vd);
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ shift = tcg_constant_i32(frac_bits);
+
+ /* Switch on op:U:sx bits */
+ switch (a->opc) {
+ case 0:
+ gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 1:
+ gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 2:
+ gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 3:
+ gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
+ break;
+ case 4:
+ gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 5:
+ gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 6:
+ gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
+ break;
+ case 7:
+ gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ vfp_store_reg64(vd, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
+{
+ TCGv_i32 vm;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR_F16);
+ vm = tcg_temp_new_i32();
+ vfp_load_reg32(vm, a->vm);
+
+ if (a->s) {
+ if (a->rz) {
+ gen_helper_vfp_tosizh(vm, vm, fpst);
+ } else {
+ gen_helper_vfp_tosih(vm, vm, fpst);
+ }
+ } else {
+ if (a->rz) {
+ gen_helper_vfp_touizh(vm, vm, fpst);
+ } else {
+ gen_helper_vfp_touih(vm, vm, fpst);
+ }
+ }
+ vfp_store_reg32(vm, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
+{
+ TCGv_i32 vm;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fpsp_v2, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ vm = tcg_temp_new_i32();
+ vfp_load_reg32(vm, a->vm);
+
+ if (a->s) {
+ if (a->rz) {
+ gen_helper_vfp_tosizs(vm, vm, fpst);
+ } else {
+ gen_helper_vfp_tosis(vm, vm, fpst);
+ }
+ } else {
+ if (a->rz) {
+ gen_helper_vfp_touizs(vm, vm, fpst);
+ } else {
+ gen_helper_vfp_touis(vm, vm, fpst);
+ }
+ }
+ vfp_store_reg32(vm, a->vd);
+ return true;
+}
+
+static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
+{
+ TCGv_i32 vd;
+ TCGv_i64 vm;
+ TCGv_ptr fpst;
+
+ if (!dc_isar_feature(aa32_fpdp_v2, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = fpstatus_ptr(FPST_FPCR);
+ vm = tcg_temp_new_i64();
+ vd = tcg_temp_new_i32();
+ vfp_load_reg64(vm, a->vm);
+
+ if (a->s) {
+ if (a->rz) {
+ gen_helper_vfp_tosizd(vd, vm, fpst);
+ } else {
+ gen_helper_vfp_tosid(vd, vm, fpst);
+ }
+ } else {
+ if (a->rz) {
+ gen_helper_vfp_touizd(vd, vm, fpst);
+ } else {
+ gen_helper_vfp_touid(vd, vm, fpst);
+ }
+ }
+ vfp_store_reg32(vd, a->vd);
+ return true;
+}
+
+static bool trans_VINS(DisasContext *s, arg_VINS *a)
+{
+ TCGv_i32 rd, rm;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Insert low half of Vm into high half of Vd */
+ rm = tcg_temp_new_i32();
+ rd = tcg_temp_new_i32();
+ vfp_load_reg32(rm, a->vm);
+ vfp_load_reg32(rd, a->vd);
+ tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
+ vfp_store_reg32(rd, a->vd);
+ return true;
+}
+
+static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
+{
+ TCGv_i32 rm;
+
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
+ return false;
+ }
+
+ if (s->vec_len != 0 || s->vec_stride != 0) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Set Vd to high half of Vm */
+ rm = tcg_temp_new_i32();
+ vfp_load_reg32(rm, a->vm);
+ tcg_gen_shri_i32(rm, rm, 16);
+ vfp_store_reg32(rm, a->vd);
+ return true;
+}
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
new file mode 100644
index 0000000000..dc49a8d806
--- /dev/null
+++ b/target/arm/tcg/translate.c
@@ -0,0 +1,9711 @@
+/*
+ * ARM translation
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2005-2007 CodeSourcery
+ * Copyright (c) 2007 OpenedHand, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+
+#include "translate.h"
+#include "translate-a32.h"
+#include "qemu/log.h"
+#include "disas/disas.h"
+#include "arm_ldst.h"
+#include "semihosting/semihost.h"
+#include "cpregs.h"
+#include "exec/helper-proto.h"
+
+#define HELPER_H "helper.h"
+#include "exec/helper-info.c.inc"
+#undef HELPER_H
+
+#define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
+#define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
+/* currently all emulated v5 cores are also v5TE, so don't bother */
+#define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
+#define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s)
+#define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
+#define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
+#define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
+#define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
+#define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
+
+/* These are TCG temporaries used only by the legacy iwMMXt decoder */
+static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
+/* These are TCG globals which alias CPUARMState fields */
+static TCGv_i32 cpu_R[16];
+TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
+TCGv_i64 cpu_exclusive_addr;
+TCGv_i64 cpu_exclusive_val;
+
+static const char * const regnames[] =
+ { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
+
+
+/* initialize TCG globals. */
+void arm_translate_init(void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ cpu_R[i] = tcg_global_mem_new_i32(tcg_env,
+ offsetof(CPUARMState, regs[i]),
+ regnames[i]);
+ }
+ cpu_CF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, CF), "CF");
+ cpu_NF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, NF), "NF");
+ cpu_VF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, VF), "VF");
+ cpu_ZF = tcg_global_mem_new_i32(tcg_env, offsetof(CPUARMState, ZF), "ZF");
+
+ cpu_exclusive_addr = tcg_global_mem_new_i64(tcg_env,
+ offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
+ cpu_exclusive_val = tcg_global_mem_new_i64(tcg_env,
+ offsetof(CPUARMState, exclusive_val), "exclusive_val");
+
+ a64_translate_init();
+}
+
+uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
+{
+ /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
+ switch (cmode) {
+ case 0: case 1:
+ /* no-op */
+ break;
+ case 2: case 3:
+ imm <<= 8;
+ break;
+ case 4: case 5:
+ imm <<= 16;
+ break;
+ case 6: case 7:
+ imm <<= 24;
+ break;
+ case 8: case 9:
+ imm |= imm << 16;
+ break;
+ case 10: case 11:
+ imm = (imm << 8) | (imm << 24);
+ break;
+ case 12:
+ imm = (imm << 8) | 0xff;
+ break;
+ case 13:
+ imm = (imm << 16) | 0xffff;
+ break;
+ case 14:
+ if (op) {
+ /*
+ * This and cmode == 15 op == 1 are the only cases where
+ * the top and bottom 32 bits of the encoded constant differ.
+ */
+ uint64_t imm64 = 0;
+ int n;
+
+ for (n = 0; n < 8; n++) {
+ if (imm & (1 << n)) {
+ imm64 |= (0xffULL << (n * 8));
+ }
+ }
+ return imm64;
+ }
+ imm |= (imm << 8) | (imm << 16) | (imm << 24);
+ break;
+ case 15:
+ if (op) {
+ /* Reserved encoding for AArch32; valid for AArch64 */
+ uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
+ if (imm & 0x80) {
+ imm64 |= 0x8000000000000000ULL;
+ }
+ if (imm & 0x40) {
+ imm64 |= 0x3fc0000000000000ULL;
+ } else {
+ imm64 |= 0x4000000000000000ULL;
+ }
+ return imm64;
+ }
+ imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
+ | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
+ break;
+ }
+ if (op) {
+ imm = ~imm;
+ }
+ return dup_const(MO_32, imm);
+}
+
+/* Generate a label used for skipping this instruction */
+void arm_gen_condlabel(DisasContext *s)
+{
+ if (!s->condjmp) {
+ s->condlabel = gen_disas_label(s);
+ s->condjmp = 1;
+ }
+}
+
+/* Flags for the disas_set_da_iss info argument:
+ * lower bits hold the Rt register number, higher bits are flags.
+ */
+typedef enum ISSInfo {
+ ISSNone = 0,
+ ISSRegMask = 0x1f,
+ ISSInvalid = (1 << 5),
+ ISSIsAcqRel = (1 << 6),
+ ISSIsWrite = (1 << 7),
+ ISSIs16Bit = (1 << 8),
+} ISSInfo;
+
+/*
+ * Store var into env + offset to a member with size bytes.
+ * Free var after use.
+ */
+void store_cpu_offset(TCGv_i32 var, int offset, int size)
+{
+ switch (size) {
+ case 1:
+ tcg_gen_st8_i32(var, tcg_env, offset);
+ break;
+ case 4:
+ tcg_gen_st_i32(var, tcg_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Save the syndrome information for a Data Abort */
+static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
+{
+ uint32_t syn;
+ int sas = memop & MO_SIZE;
+ bool sse = memop & MO_SIGN;
+ bool is_acqrel = issinfo & ISSIsAcqRel;
+ bool is_write = issinfo & ISSIsWrite;
+ bool is_16bit = issinfo & ISSIs16Bit;
+ int srt = issinfo & ISSRegMask;
+
+ if (issinfo & ISSInvalid) {
+ /* Some callsites want to conditionally provide ISS info,
+ * eg "only if this was not a writeback"
+ */
+ return;
+ }
+
+ if (srt == 15) {
+ /* For AArch32, insns where the src/dest is R15 never generate
+ * ISS information. Catching that here saves checking at all
+ * the call sites.
+ */
+ return;
+ }
+
+ syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
+ 0, 0, 0, is_write, 0, is_16bit);
+ disas_set_insn_syndrome(s, syn);
+}
+
+static inline int get_a32_user_mem_index(DisasContext *s)
+{
+ /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
+ * insns:
+ * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
+ * otherwise, access as if at PL0.
+ */
+ switch (s->mmu_idx) {
+ case ARMMMUIdx_E3:
+ case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
+ case ARMMMUIdx_MUser:
+ case ARMMMUIdx_MPriv:
+ return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
+ case ARMMMUIdx_MUserNegPri:
+ case ARMMMUIdx_MPrivNegPri:
+ return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
+ case ARMMMUIdx_MSUser:
+ case ARMMMUIdx_MSPriv:
+ return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
+ case ARMMMUIdx_MSUserNegPri:
+ case ARMMMUIdx_MSPrivNegPri:
+ return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* The pc_curr difference for an architectural jump. */
+static target_long jmp_diff(DisasContext *s, target_long diff)
+{
+ return diff + (s->thumb ? 4 : 8);
+}
+
+static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
+{
+ assert(s->pc_save != -1);
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
+ tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
+ } else {
+ tcg_gen_movi_i32(var, s->pc_curr + diff);
+ }
+}
+
+/* Set a variable to the value of a CPU register. */
+void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
+{
+ if (reg == 15) {
+ gen_pc_plus_diff(s, var, jmp_diff(s, 0));
+ } else {
+ tcg_gen_mov_i32(var, cpu_R[reg]);
+ }
+}
+
+/*
+ * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
+ * This is used for load/store for which use of PC implies (literal),
+ * or ADD that implies ADR.
+ */
+TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ if (reg == 15) {
+ /*
+ * This address is computed from an aligned PC:
+ * subtract off the low bits.
+ */
+ gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
+ } else {
+ tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
+ }
+ return tmp;
+}
+
+/* Set a CPU register. The source must be a temporary and will be
+ marked as dead. */
+void store_reg(DisasContext *s, int reg, TCGv_i32 var)
+{
+ if (reg == 15) {
+ /* In Thumb mode, we must ignore bit 0.
+ * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
+ * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
+ * We choose to ignore [1:0] in ARM mode for all architecture versions.
+ */
+ tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
+ s->base.is_jmp = DISAS_JUMP;
+ s->pc_save = -1;
+ } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
+ /* For M-profile SP bits [1:0] are always zero */
+ tcg_gen_andi_i32(var, var, ~3);
+ }
+ tcg_gen_mov_i32(cpu_R[reg], var);
+}
+
+/*
+ * Variant of store_reg which applies v8M stack-limit checks before updating
+ * SP. If the check fails this will result in an exception being taken.
+ * We disable the stack checks for CONFIG_USER_ONLY because we have
+ * no idea what the stack limits should be in that case.
+ * If stack checking is not being done this just acts like store_reg().
+ */
+static void store_sp_checked(DisasContext *s, TCGv_i32 var)
+{
+#ifndef CONFIG_USER_ONLY
+ if (s->v8m_stackcheck) {
+ gen_helper_v8m_stackcheck(tcg_env, var);
+ }
+#endif
+ store_reg(s, 13, var);
+}
+
+/* Value extensions. */
+#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
+#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
+#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
+#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
+
+#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
+#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
+
+void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
+{
+ gen_helper_cpsr_write(tcg_env, var, tcg_constant_i32(mask));
+}
+
+static void gen_rebuild_hflags(DisasContext *s, bool new_el)
+{
+ bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
+
+ if (new_el) {
+ if (m_profile) {
+ gen_helper_rebuild_hflags_m32_newel(tcg_env);
+ } else {
+ gen_helper_rebuild_hflags_a32_newel(tcg_env);
+ }
+ } else {
+ TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
+ if (m_profile) {
+ gen_helper_rebuild_hflags_m32(tcg_env, tcg_el);
+ } else {
+ gen_helper_rebuild_hflags_a32(tcg_env, tcg_el);
+ }
+ }
+}
+
+static void gen_exception_internal(int excp)
+{
+ assert(excp_is_internal(excp));
+ gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
+}
+
+static void gen_singlestep_exception(DisasContext *s)
+{
+ /* We just completed step of an insn. Move from Active-not-pending
+ * to Active-pending, and then also take the swstep exception.
+ * This corresponds to making the (IMPDEF) choice to prioritize
+ * swstep exceptions over asynchronous exceptions taken to an exception
+ * level where debug is disabled. This choice has the advantage that
+ * we do not need to maintain internal state corresponding to the
+ * ISV/EX syndrome bits between completion of the step and generation
+ * of the exception, and our syndrome information is always correct.
+ */
+ gen_ss_advance(s);
+ gen_swstep_exception(s, 1, s->is_ldex);
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
+void clear_eci_state(DisasContext *s)
+{
+ /*
+ * Clear any ECI/ICI state: used when a load multiple/store
+ * multiple insn executes.
+ */
+ if (s->eci) {
+ store_cpu_field_constant(0, condexec_bits);
+ s->eci = 0;
+ }
+}
+
+static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 tmp1 = tcg_temp_new_i32();
+ TCGv_i32 tmp2 = tcg_temp_new_i32();
+ tcg_gen_ext16s_i32(tmp1, a);
+ tcg_gen_ext16s_i32(tmp2, b);
+ tcg_gen_mul_i32(tmp1, tmp1, tmp2);
+ tcg_gen_sari_i32(a, a, 16);
+ tcg_gen_sari_i32(b, b, 16);
+ tcg_gen_mul_i32(b, b, a);
+ tcg_gen_mov_i32(a, tmp1);
+}
+
+/* Byteswap each halfword. */
+void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
+ tcg_gen_shri_i32(tmp, var, 8);
+ tcg_gen_and_i32(tmp, tmp, mask);
+ tcg_gen_and_i32(var, var, mask);
+ tcg_gen_shli_i32(var, var, 8);
+ tcg_gen_or_i32(dest, var, tmp);
+}
+
+/* Byteswap low halfword and sign extend. */
+static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
+{
+ tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
+}
+
+/* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
+ tmp = (t0 ^ t1) & 0x8000;
+ t0 &= ~0x8000;
+ t1 &= ~0x8000;
+ t0 = (t0 + t1) ^ tmp;
+ */
+
+static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_xor_i32(tmp, t0, t1);
+ tcg_gen_andi_i32(tmp, tmp, 0x8000);
+ tcg_gen_andi_i32(t0, t0, ~0x8000);
+ tcg_gen_andi_i32(t1, t1, ~0x8000);
+ tcg_gen_add_i32(t0, t0, t1);
+ tcg_gen_xor_i32(dest, t0, tmp);
+}
+
+/* Set N and Z flags from var. */
+static inline void gen_logic_CC(TCGv_i32 var)
+{
+ tcg_gen_mov_i32(cpu_NF, var);
+ tcg_gen_mov_i32(cpu_ZF, var);
+}
+
+/* dest = T0 + T1 + CF. */
+static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
+{
+ tcg_gen_add_i32(dest, t0, t1);
+ tcg_gen_add_i32(dest, dest, cpu_CF);
+}
+
+/* dest = T0 - T1 + CF - 1. */
+static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
+{
+ tcg_gen_sub_i32(dest, t0, t1);
+ tcg_gen_add_i32(dest, dest, cpu_CF);
+ tcg_gen_subi_i32(dest, dest, 1);
+}
+
+/* dest = T0 + T1. Compute C, N, V and Z flags */
+static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, 0);
+ tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+ tcg_gen_xor_i32(tmp, t0, t1);
+ tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
+ tcg_gen_mov_i32(dest, cpu_NF);
+}
+
+/* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
+static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ if (TCG_TARGET_HAS_add2_i32) {
+ tcg_gen_movi_i32(tmp, 0);
+ tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
+ tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
+ } else {
+ TCGv_i64 q0 = tcg_temp_new_i64();
+ TCGv_i64 q1 = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(q0, t0);
+ tcg_gen_extu_i32_i64(q1, t1);
+ tcg_gen_add_i64(q0, q0, q1);
+ tcg_gen_extu_i32_i64(q1, cpu_CF);
+ tcg_gen_add_i64(q0, q0, q1);
+ tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
+ }
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+ tcg_gen_xor_i32(tmp, t0, t1);
+ tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
+ tcg_gen_mov_i32(dest, cpu_NF);
+}
+
+/* dest = T0 - T1. Compute C, N, V and Z flags */
+static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
+{
+ TCGv_i32 tmp;
+ tcg_gen_sub_i32(cpu_NF, t0, t1);
+ tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+ tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
+ tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+ tmp = tcg_temp_new_i32();
+ tcg_gen_xor_i32(tmp, t0, t1);
+ tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
+ tcg_gen_mov_i32(dest, cpu_NF);
+}
+
+/* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
+static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_not_i32(tmp, t1);
+ gen_adc_CC(dest, t0, tmp);
+}
+
+#define GEN_SHIFT(name) \
+static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
+{ \
+ TCGv_i32 tmpd = tcg_temp_new_i32(); \
+ TCGv_i32 tmp1 = tcg_temp_new_i32(); \
+ TCGv_i32 zero = tcg_constant_i32(0); \
+ tcg_gen_andi_i32(tmp1, t1, 0x1f); \
+ tcg_gen_##name##_i32(tmpd, t0, tmp1); \
+ tcg_gen_andi_i32(tmp1, t1, 0xe0); \
+ tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd); \
+}
+GEN_SHIFT(shl)
+GEN_SHIFT(shr)
+#undef GEN_SHIFT
+
+static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
+{
+ TCGv_i32 tmp1 = tcg_temp_new_i32();
+
+ tcg_gen_andi_i32(tmp1, t1, 0xff);
+ tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
+ tcg_gen_sar_i32(dest, t0, tmp1);
+}
+
+static void shifter_out_im(TCGv_i32 var, int shift)
+{
+ tcg_gen_extract_i32(cpu_CF, var, shift, 1);
+}
+
+/* Shift by immediate. Includes special handling for shift == 0. */
+static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
+ int shift, int flags)
+{
+ switch (shiftop) {
+ case 0: /* LSL */
+ if (shift != 0) {
+ if (flags)
+ shifter_out_im(var, 32 - shift);
+ tcg_gen_shli_i32(var, var, shift);
+ }
+ break;
+ case 1: /* LSR */
+ if (shift == 0) {
+ if (flags) {
+ tcg_gen_shri_i32(cpu_CF, var, 31);
+ }
+ tcg_gen_movi_i32(var, 0);
+ } else {
+ if (flags)
+ shifter_out_im(var, shift - 1);
+ tcg_gen_shri_i32(var, var, shift);
+ }
+ break;
+ case 2: /* ASR */
+ if (shift == 0)
+ shift = 32;
+ if (flags)
+ shifter_out_im(var, shift - 1);
+ if (shift == 32)
+ shift = 31;
+ tcg_gen_sari_i32(var, var, shift);
+ break;
+ case 3: /* ROR/RRX */
+ if (shift != 0) {
+ if (flags)
+ shifter_out_im(var, shift - 1);
+ tcg_gen_rotri_i32(var, var, shift); break;
+ } else {
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ tcg_gen_shli_i32(tmp, cpu_CF, 31);
+ if (flags)
+ shifter_out_im(var, 0);
+ tcg_gen_shri_i32(var, var, 1);
+ tcg_gen_or_i32(var, var, tmp);
+ }
+ }
+};
+
+static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
+ TCGv_i32 shift, int flags)
+{
+ if (flags) {
+ switch (shiftop) {
+ case 0: gen_helper_shl_cc(var, tcg_env, var, shift); break;
+ case 1: gen_helper_shr_cc(var, tcg_env, var, shift); break;
+ case 2: gen_helper_sar_cc(var, tcg_env, var, shift); break;
+ case 3: gen_helper_ror_cc(var, tcg_env, var, shift); break;
+ }
+ } else {
+ switch (shiftop) {
+ case 0:
+ gen_shl(var, var, shift);
+ break;
+ case 1:
+ gen_shr(var, var, shift);
+ break;
+ case 2:
+ gen_sar(var, var, shift);
+ break;
+ case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
+ tcg_gen_rotr_i32(var, var, shift); break;
+ }
+ }
+}
+
+/*
+ * Generate a conditional based on ARM condition code cc.
+ * This is common between ARM and Aarch64 targets.
+ */
+void arm_test_cc(DisasCompare *cmp, int cc)
+{
+ TCGv_i32 value;
+ TCGCond cond;
+
+ switch (cc) {
+ case 0: /* eq: Z */
+ case 1: /* ne: !Z */
+ cond = TCG_COND_EQ;
+ value = cpu_ZF;
+ break;
+
+ case 2: /* cs: C */
+ case 3: /* cc: !C */
+ cond = TCG_COND_NE;
+ value = cpu_CF;
+ break;
+
+ case 4: /* mi: N */
+ case 5: /* pl: !N */
+ cond = TCG_COND_LT;
+ value = cpu_NF;
+ break;
+
+ case 6: /* vs: V */
+ case 7: /* vc: !V */
+ cond = TCG_COND_LT;
+ value = cpu_VF;
+ break;
+
+ case 8: /* hi: C && !Z */
+ case 9: /* ls: !C || Z -> !(C && !Z) */
+ cond = TCG_COND_NE;
+ value = tcg_temp_new_i32();
+ /* CF is 1 for C, so -CF is an all-bits-set mask for C;
+ ZF is non-zero for !Z; so AND the two subexpressions. */
+ tcg_gen_neg_i32(value, cpu_CF);
+ tcg_gen_and_i32(value, value, cpu_ZF);
+ break;
+
+ case 10: /* ge: N == V -> N ^ V == 0 */
+ case 11: /* lt: N != V -> N ^ V != 0 */
+ /* Since we're only interested in the sign bit, == 0 is >= 0. */
+ cond = TCG_COND_GE;
+ value = tcg_temp_new_i32();
+ tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
+ break;
+
+ case 12: /* gt: !Z && N == V */
+ case 13: /* le: Z || N != V */
+ cond = TCG_COND_NE;
+ value = tcg_temp_new_i32();
+ /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
+ * the sign bit then AND with ZF to yield the result. */
+ tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
+ tcg_gen_sari_i32(value, value, 31);
+ tcg_gen_andc_i32(value, cpu_ZF, value);
+ break;
+
+ case 14: /* always */
+ case 15: /* always */
+ /* Use the ALWAYS condition, which will fold early.
+ * It doesn't matter what we use for the value. */
+ cond = TCG_COND_ALWAYS;
+ value = cpu_ZF;
+ goto no_invert;
+
+ default:
+ fprintf(stderr, "Bad condition code 0x%x\n", cc);
+ abort();
+ }
+
+ if (cc & 1) {
+ cond = tcg_invert_cond(cond);
+ }
+
+ no_invert:
+ cmp->cond = cond;
+ cmp->value = value;
+}
+
+void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
+{
+ tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
+}
+
+void arm_gen_test_cc(int cc, TCGLabel *label)
+{
+ DisasCompare cmp;
+ arm_test_cc(&cmp, cc);
+ arm_jump_cc(&cmp, label);
+}
+
+void gen_set_condexec(DisasContext *s)
+{
+ if (s->condexec_mask) {
+ uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
+
+ store_cpu_field_constant(val, condexec_bits);
+ }
+}
+
+void gen_update_pc(DisasContext *s, target_long diff)
+{
+ gen_pc_plus_diff(s, cpu_R[15], diff);
+ s->pc_save = s->pc_curr + diff;
+}
+
+/* Set PC and Thumb state from var. var is marked as dead. */
+static inline void gen_bx(DisasContext *s, TCGv_i32 var)
+{
+ s->base.is_jmp = DISAS_JUMP;
+ tcg_gen_andi_i32(cpu_R[15], var, ~1);
+ tcg_gen_andi_i32(var, var, 1);
+ store_cpu_field(var, thumb);
+ s->pc_save = -1;
+}
+
+/*
+ * Set PC and Thumb state from var. var is marked as dead.
+ * For M-profile CPUs, include logic to detect exception-return
+ * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
+ * and BX reg, and no others, and happens only for code in Handler mode.
+ * The Security Extension also requires us to check for the FNC_RETURN
+ * which signals a function return from non-secure state; this can happen
+ * in both Handler and Thread mode.
+ * To avoid having to do multiple comparisons in inline generated code,
+ * we make the check we do here loose, so it will match for EXC_RETURN
+ * in Thread mode. For system emulation do_v7m_exception_exit() checks
+ * for these spurious cases and returns without doing anything (giving
+ * the same behaviour as for a branch to a non-magic address).
+ *
+ * In linux-user mode it is unclear what the right behaviour for an
+ * attempted FNC_RETURN should be, because in real hardware this will go
+ * directly to Secure code (ie not the Linux kernel) which will then treat
+ * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
+ * attempt behave the way it would on a CPU without the security extension,
+ * which is to say "like a normal branch". That means we can simply treat
+ * all branches as normal with no magic address behaviour.
+ */
+static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
+{
+ /* Generate the same code here as for a simple bx, but flag via
+ * s->base.is_jmp that we need to do the rest of the work later.
+ */
+ gen_bx(s, var);
+#ifndef CONFIG_USER_ONLY
+ if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
+ (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
+ s->base.is_jmp = DISAS_BX_EXCRET;
+ }
+#endif
+}
+
+static inline void gen_bx_excret_final_code(DisasContext *s)
+{
+ /* Generate the code to finish possible exception return and end the TB */
+ DisasLabel excret_label = gen_disas_label(s);
+ uint32_t min_magic;
+
+ if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
+ /* Covers FNC_RETURN and EXC_RETURN magic */
+ min_magic = FNC_RETURN_MIN_MAGIC;
+ } else {
+ /* EXC_RETURN magic only */
+ min_magic = EXC_RETURN_MIN_MAGIC;
+ }
+
+ /* Is the new PC value in the magic range indicating exception return? */
+ tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
+ /* No: end the TB as we would for a DISAS_JMP */
+ if (s->ss_active) {
+ gen_singlestep_exception(s);
+ } else {
+ tcg_gen_exit_tb(NULL, 0);
+ }
+ set_disas_label(s, excret_label);
+ /* Yes: this is an exception return.
+ * At this point in runtime env->regs[15] and env->thumb will hold
+ * the exception-return magic number, which do_v7m_exception_exit()
+ * will read. Nothing else will be able to see those values because
+ * the cpu-exec main loop guarantees that we will always go straight
+ * from raising the exception to the exception-handling code.
+ *
+ * gen_ss_advance(s) does nothing on M profile currently but
+ * calling it is conceptually the right thing as we have executed
+ * this instruction (compare SWI, HVC, SMC handling).
+ */
+ gen_ss_advance(s);
+ gen_exception_internal(EXCP_EXCEPTION_EXIT);
+}
+
+static inline void gen_bxns(DisasContext *s, int rm)
+{
+ TCGv_i32 var = load_reg(s, rm);
+
+ /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
+ * we need to sync state before calling it, but:
+ * - we don't need to do gen_update_pc() because the bxns helper will
+ * always set the PC itself
+ * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
+ * unless it's outside an IT block or the last insn in an IT block,
+ * so we know that condexec == 0 (already set at the top of the TB)
+ * is correct in the non-UNPREDICTABLE cases, and we can choose
+ * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
+ */
+ gen_helper_v7m_bxns(tcg_env, var);
+ s->base.is_jmp = DISAS_EXIT;
+}
+
+static inline void gen_blxns(DisasContext *s, int rm)
+{
+ TCGv_i32 var = load_reg(s, rm);
+
+ /* We don't need to sync condexec state, for the same reason as bxns.
+ * We do however need to set the PC, because the blxns helper reads it.
+ * The blxns helper may throw an exception.
+ */
+ gen_update_pc(s, curr_insn_len(s));
+ gen_helper_v7m_blxns(tcg_env, var);
+ s->base.is_jmp = DISAS_EXIT;
+}
+
+/* Variant of store_reg which uses branch&exchange logic when storing
+ to r15 in ARM architecture v7 and above. The source must be a temporary
+ and will be marked as dead. */
+static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
+{
+ if (reg == 15 && ENABLE_ARCH_7) {
+ gen_bx(s, var);
+ } else {
+ store_reg(s, reg, var);
+ }
+}
+
+/* Variant of store_reg which uses branch&exchange logic when storing
+ * to r15 in ARM architecture v5T and above. This is used for storing
+ * the results of a LDR/LDM/POP into r15, and corresponds to the cases
+ * in the ARM ARM which use the LoadWritePC() pseudocode function. */
+static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
+{
+ if (reg == 15 && ENABLE_ARCH_5) {
+ gen_bx_excret(s, var);
+ } else {
+ store_reg(s, reg, var);
+ }
+}
+
+#ifdef CONFIG_USER_ONLY
+#define IS_USER_ONLY 1
+#else
+#define IS_USER_ONLY 0
+#endif
+
+MemOp pow2_align(unsigned i)
+{
+ static const MemOp mop_align[] = {
+ 0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16, MO_ALIGN_32
+ };
+ g_assert(i < ARRAY_SIZE(mop_align));
+ return mop_align[i];
+}
+
+/*
+ * Abstractions of "generate code to do a guest load/store for
+ * AArch32", where a vaddr is always 32 bits (and is zero
+ * extended if we're a 64 bit core) and data is also
+ * 32 bits unless specifically doing a 64 bit access.
+ * These functions work like tcg_gen_qemu_{ld,st}* except
+ * that the address argument is TCGv_i32 rather than TCGv.
+ */
+
+static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
+{
+ TCGv addr = tcg_temp_new();
+ tcg_gen_extu_i32_tl(addr, a32);
+
+ /* Not needed for user-mode BE32, where we use MO_BE instead. */
+ if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
+ tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
+ }
+ return addr;
+}
+
+/*
+ * Internal routines are used for NEON cases where the endianness
+ * and/or alignment has already been taken into account and manipulated.
+ */
+void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
+ TCGv_i32 a32, int index, MemOp opc)
+{
+ TCGv addr = gen_aa32_addr(s, a32, opc);
+ tcg_gen_qemu_ld_i32(val, addr, index, opc);
+}
+
+void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
+ TCGv_i32 a32, int index, MemOp opc)
+{
+ TCGv addr = gen_aa32_addr(s, a32, opc);
+ tcg_gen_qemu_st_i32(val, addr, index, opc);
+}
+
+void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
+ TCGv_i32 a32, int index, MemOp opc)
+{
+ TCGv addr = gen_aa32_addr(s, a32, opc);
+
+ tcg_gen_qemu_ld_i64(val, addr, index, opc);
+
+ /* Not needed for user-mode BE32, where we use MO_BE instead. */
+ if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
+ tcg_gen_rotri_i64(val, val, 32);
+ }
+}
+
+void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
+ TCGv_i32 a32, int index, MemOp opc)
+{
+ TCGv addr = gen_aa32_addr(s, a32, opc);
+
+ /* Not needed for user-mode BE32, where we use MO_BE instead. */
+ if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ tcg_gen_rotri_i64(tmp, val, 32);
+ tcg_gen_qemu_st_i64(tmp, addr, index, opc);
+ } else {
+ tcg_gen_qemu_st_i64(val, addr, index, opc);
+ }
+}
+
+void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+ int index, MemOp opc)
+{
+ gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
+}
+
+void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+ int index, MemOp opc)
+{
+ gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
+}
+
+void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+ int index, MemOp opc)
+{
+ gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
+}
+
+void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+ int index, MemOp opc)
+{
+ gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
+}
+
+#define DO_GEN_LD(SUFF, OPC) \
+ static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
+ TCGv_i32 a32, int index) \
+ { \
+ gen_aa32_ld_i32(s, val, a32, index, OPC); \
+ }
+
+#define DO_GEN_ST(SUFF, OPC) \
+ static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
+ TCGv_i32 a32, int index) \
+ { \
+ gen_aa32_st_i32(s, val, a32, index, OPC); \
+ }
+
+static inline void gen_hvc(DisasContext *s, int imm16)
+{
+ /* The pre HVC helper handles cases when HVC gets trapped
+ * as an undefined insn by runtime configuration (ie before
+ * the insn really executes).
+ */
+ gen_update_pc(s, 0);
+ gen_helper_pre_hvc(tcg_env);
+ /* Otherwise we will treat this as a real exception which
+ * happens after execution of the insn. (The distinction matters
+ * for the PC value reported to the exception handler and also
+ * for single stepping.)
+ */
+ s->svc_imm = imm16;
+ gen_update_pc(s, curr_insn_len(s));
+ s->base.is_jmp = DISAS_HVC;
+}
+
+static inline void gen_smc(DisasContext *s)
+{
+ /* As with HVC, we may take an exception either before or after
+ * the insn executes.
+ */
+ gen_update_pc(s, 0);
+ gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa32_smc()));
+ gen_update_pc(s, curr_insn_len(s));
+ s->base.is_jmp = DISAS_SMC;
+}
+
+static void gen_exception_internal_insn(DisasContext *s, int excp)
+{
+ gen_set_condexec(s);
+ gen_update_pc(s, 0);
+ gen_exception_internal(excp);
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
+{
+ gen_helper_exception_with_syndrome_el(tcg_env, tcg_constant_i32(excp),
+ tcg_constant_i32(syndrome), tcg_el);
+}
+
+static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
+{
+ gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
+}
+
+static void gen_exception(int excp, uint32_t syndrome)
+{
+ gen_helper_exception_with_syndrome(tcg_env, tcg_constant_i32(excp),
+ tcg_constant_i32(syndrome));
+}
+
+static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
+ int excp, uint32_t syn, TCGv_i32 tcg_el)
+{
+ if (s->aarch64) {
+ gen_a64_update_pc(s, pc_diff);
+ } else {
+ gen_set_condexec(s);
+ gen_update_pc(s, pc_diff);
+ }
+ gen_exception_el_v(excp, syn, tcg_el);
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
+void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
+ uint32_t syn, uint32_t target_el)
+{
+ gen_exception_insn_el_v(s, pc_diff, excp, syn,
+ tcg_constant_i32(target_el));
+}
+
+void gen_exception_insn(DisasContext *s, target_long pc_diff,
+ int excp, uint32_t syn)
+{
+ if (s->aarch64) {
+ gen_a64_update_pc(s, pc_diff);
+ } else {
+ gen_set_condexec(s);
+ gen_update_pc(s, pc_diff);
+ }
+ gen_exception(excp, syn);
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
+{
+ gen_set_condexec(s);
+ gen_update_pc(s, 0);
+ gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syn));
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
+void unallocated_encoding(DisasContext *s)
+{
+ /* Unallocated and reserved encodings are uncategorized */
+ gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
+}
+
+/* Force a TB lookup after an instruction that changes the CPU state. */
+void gen_lookup_tb(DisasContext *s)
+{
+ gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
+ s->base.is_jmp = DISAS_EXIT;
+}
+
+static inline void gen_hlt(DisasContext *s, int imm)
+{
+ /* HLT. This has two purposes.
+ * Architecturally, it is an external halting debug instruction.
+ * Since QEMU doesn't implement external debug, we treat this as
+ * it is required for halting debug disabled: it will UNDEF.
+ * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
+ * and "HLT 0xF000" is an A32 semihosting syscall. These traps
+ * must trigger semihosting even for ARMv7 and earlier, where
+ * HLT was an undefined encoding.
+ * In system mode, we don't allow userspace access to
+ * semihosting, to provide some semblance of security
+ * (and for consistency with our 32-bit semihosting).
+ */
+ if (semihosting_enabled(s->current_el == 0) &&
+ (imm == (s->thumb ? 0x3c : 0xf000))) {
+ gen_exception_internal_insn(s, EXCP_SEMIHOST);
+ return;
+ }
+
+ unallocated_encoding(s);
+}
+
+/*
+ * Return the offset of a "full" NEON Dreg.
+ */
+long neon_full_reg_offset(unsigned reg)
+{
+ return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
+}
+
+/*
+ * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
+ * where 0 is the least significant end of the register.
+ */
+long neon_element_offset(int reg, int element, MemOp memop)
+{
+ int element_size = 1 << (memop & MO_SIZE);
+ int ofs = element * element_size;
+#if HOST_BIG_ENDIAN
+ /*
+ * Calculate the offset assuming fully little-endian,
+ * then XOR to account for the order of the 8-byte units.
+ */
+ if (element_size < 8) {
+ ofs ^= 8 - element_size;
+ }
+#endif
+ return neon_full_reg_offset(reg) + ofs;
+}
+
+/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
+long vfp_reg_offset(bool dp, unsigned reg)
+{
+ if (dp) {
+ return neon_element_offset(reg, 0, MO_64);
+ } else {
+ return neon_element_offset(reg >> 1, reg & 1, MO_32);
+ }
+}
+
+void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
+{
+ long off = neon_element_offset(reg, ele, memop);
+
+ switch (memop) {
+ case MO_SB:
+ tcg_gen_ld8s_i32(dest, tcg_env, off);
+ break;
+ case MO_UB:
+ tcg_gen_ld8u_i32(dest, tcg_env, off);
+ break;
+ case MO_SW:
+ tcg_gen_ld16s_i32(dest, tcg_env, off);
+ break;
+ case MO_UW:
+ tcg_gen_ld16u_i32(dest, tcg_env, off);
+ break;
+ case MO_UL:
+ case MO_SL:
+ tcg_gen_ld_i32(dest, tcg_env, off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
+{
+ long off = neon_element_offset(reg, ele, memop);
+
+ switch (memop) {
+ case MO_SL:
+ tcg_gen_ld32s_i64(dest, tcg_env, off);
+ break;
+ case MO_UL:
+ tcg_gen_ld32u_i64(dest, tcg_env, off);
+ break;
+ case MO_UQ:
+ tcg_gen_ld_i64(dest, tcg_env, off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
+{
+ long off = neon_element_offset(reg, ele, memop);
+
+ switch (memop) {
+ case MO_8:
+ tcg_gen_st8_i32(src, tcg_env, off);
+ break;
+ case MO_16:
+ tcg_gen_st16_i32(src, tcg_env, off);
+ break;
+ case MO_32:
+ tcg_gen_st_i32(src, tcg_env, off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
+{
+ long off = neon_element_offset(reg, ele, memop);
+
+ switch (memop) {
+ case MO_32:
+ tcg_gen_st32_i64(src, tcg_env, off);
+ break;
+ case MO_64:
+ tcg_gen_st_i64(src, tcg_env, off);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+#define ARM_CP_RW_BIT (1 << 20)
+
+static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
+{
+ tcg_gen_ld_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
+}
+
+static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
+{
+ tcg_gen_st_i64(var, tcg_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
+}
+
+static inline TCGv_i32 iwmmxt_load_creg(int reg)
+{
+ TCGv_i32 var = tcg_temp_new_i32();
+ tcg_gen_ld_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
+ return var;
+}
+
+static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
+{
+ tcg_gen_st_i32(var, tcg_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
+}
+
+static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
+{
+ iwmmxt_store_reg(cpu_M0, rn);
+}
+
+static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
+{
+ iwmmxt_load_reg(cpu_M0, rn);
+}
+
+static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
+{
+ iwmmxt_load_reg(cpu_V1, rn);
+ tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
+}
+
+static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
+{
+ iwmmxt_load_reg(cpu_V1, rn);
+ tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
+}
+
+static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
+{
+ iwmmxt_load_reg(cpu_V1, rn);
+ tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
+}
+
+#define IWMMXT_OP(name) \
+static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
+{ \
+ iwmmxt_load_reg(cpu_V1, rn); \
+ gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
+}
+
+#define IWMMXT_OP_ENV(name) \
+static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
+{ \
+ iwmmxt_load_reg(cpu_V1, rn); \
+ gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0, cpu_V1); \
+}
+
+#define IWMMXT_OP_ENV_SIZE(name) \
+IWMMXT_OP_ENV(name##b) \
+IWMMXT_OP_ENV(name##w) \
+IWMMXT_OP_ENV(name##l)
+
+#define IWMMXT_OP_ENV1(name) \
+static inline void gen_op_iwmmxt_##name##_M0(void) \
+{ \
+ gen_helper_iwmmxt_##name(cpu_M0, tcg_env, cpu_M0); \
+}
+
+IWMMXT_OP(maddsq)
+IWMMXT_OP(madduq)
+IWMMXT_OP(sadb)
+IWMMXT_OP(sadw)
+IWMMXT_OP(mulslw)
+IWMMXT_OP(mulshw)
+IWMMXT_OP(mululw)
+IWMMXT_OP(muluhw)
+IWMMXT_OP(macsw)
+IWMMXT_OP(macuw)
+
+IWMMXT_OP_ENV_SIZE(unpackl)
+IWMMXT_OP_ENV_SIZE(unpackh)
+
+IWMMXT_OP_ENV1(unpacklub)
+IWMMXT_OP_ENV1(unpackluw)
+IWMMXT_OP_ENV1(unpacklul)
+IWMMXT_OP_ENV1(unpackhub)
+IWMMXT_OP_ENV1(unpackhuw)
+IWMMXT_OP_ENV1(unpackhul)
+IWMMXT_OP_ENV1(unpacklsb)
+IWMMXT_OP_ENV1(unpacklsw)
+IWMMXT_OP_ENV1(unpacklsl)
+IWMMXT_OP_ENV1(unpackhsb)
+IWMMXT_OP_ENV1(unpackhsw)
+IWMMXT_OP_ENV1(unpackhsl)
+
+IWMMXT_OP_ENV_SIZE(cmpeq)
+IWMMXT_OP_ENV_SIZE(cmpgtu)
+IWMMXT_OP_ENV_SIZE(cmpgts)
+
+IWMMXT_OP_ENV_SIZE(mins)
+IWMMXT_OP_ENV_SIZE(minu)
+IWMMXT_OP_ENV_SIZE(maxs)
+IWMMXT_OP_ENV_SIZE(maxu)
+
+IWMMXT_OP_ENV_SIZE(subn)
+IWMMXT_OP_ENV_SIZE(addn)
+IWMMXT_OP_ENV_SIZE(subu)
+IWMMXT_OP_ENV_SIZE(addu)
+IWMMXT_OP_ENV_SIZE(subs)
+IWMMXT_OP_ENV_SIZE(adds)
+
+IWMMXT_OP_ENV(avgb0)
+IWMMXT_OP_ENV(avgb1)
+IWMMXT_OP_ENV(avgw0)
+IWMMXT_OP_ENV(avgw1)
+
+IWMMXT_OP_ENV(packuw)
+IWMMXT_OP_ENV(packul)
+IWMMXT_OP_ENV(packuq)
+IWMMXT_OP_ENV(packsw)
+IWMMXT_OP_ENV(packsl)
+IWMMXT_OP_ENV(packsq)
+
+static void gen_op_iwmmxt_set_mup(void)
+{
+ TCGv_i32 tmp;
+ tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
+ tcg_gen_ori_i32(tmp, tmp, 2);
+ store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
+}
+
+static void gen_op_iwmmxt_set_cup(void)
+{
+ TCGv_i32 tmp;
+ tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
+ tcg_gen_ori_i32(tmp, tmp, 1);
+ store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
+}
+
+static void gen_op_iwmmxt_setpsr_nz(void)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
+ store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
+}
+
+static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
+{
+ iwmmxt_load_reg(cpu_V1, rn);
+ tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
+ tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
+}
+
+static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
+ TCGv_i32 dest)
+{
+ int rd;
+ uint32_t offset;
+ TCGv_i32 tmp;
+
+ rd = (insn >> 16) & 0xf;
+ tmp = load_reg(s, rd);
+
+ offset = (insn & 0xff) << ((insn >> 7) & 2);
+ if (insn & (1 << 24)) {
+ /* Pre indexed */
+ if (insn & (1 << 23))
+ tcg_gen_addi_i32(tmp, tmp, offset);
+ else
+ tcg_gen_addi_i32(tmp, tmp, -offset);
+ tcg_gen_mov_i32(dest, tmp);
+ if (insn & (1 << 21)) {
+ store_reg(s, rd, tmp);
+ }
+ } else if (insn & (1 << 21)) {
+ /* Post indexed */
+ tcg_gen_mov_i32(dest, tmp);
+ if (insn & (1 << 23))
+ tcg_gen_addi_i32(tmp, tmp, offset);
+ else
+ tcg_gen_addi_i32(tmp, tmp, -offset);
+ store_reg(s, rd, tmp);
+ } else if (!(insn & (1 << 23)))
+ return 1;
+ return 0;
+}
+
+static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
+{
+ int rd = (insn >> 0) & 0xf;
+ TCGv_i32 tmp;
+
+ if (insn & (1 << 8)) {
+ if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
+ return 1;
+ } else {
+ tmp = iwmmxt_load_creg(rd);
+ }
+ } else {
+ tmp = tcg_temp_new_i32();
+ iwmmxt_load_reg(cpu_V0, rd);
+ tcg_gen_extrl_i64_i32(tmp, cpu_V0);
+ }
+ tcg_gen_andi_i32(tmp, tmp, mask);
+ tcg_gen_mov_i32(dest, tmp);
+ return 0;
+}
+
+/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
+ (ie. an undefined instruction). */
+static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
+{
+ int rd, wrd;
+ int rdhi, rdlo, rd0, rd1, i;
+ TCGv_i32 addr;
+ TCGv_i32 tmp, tmp2, tmp3;
+
+ if ((insn & 0x0e000e00) == 0x0c000000) {
+ if ((insn & 0x0fe00ff0) == 0x0c400000) {
+ wrd = insn & 0xf;
+ rdlo = (insn >> 12) & 0xf;
+ rdhi = (insn >> 16) & 0xf;
+ if (insn & ARM_CP_RW_BIT) { /* TMRRC */
+ iwmmxt_load_reg(cpu_V0, wrd);
+ tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
+ tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
+ } else { /* TMCRR */
+ tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
+ iwmmxt_store_reg(cpu_V0, wrd);
+ gen_op_iwmmxt_set_mup();
+ }
+ return 0;
+ }
+
+ wrd = (insn >> 12) & 0xf;
+ addr = tcg_temp_new_i32();
+ if (gen_iwmmxt_address(s, insn, addr)) {
+ return 1;
+ }
+ if (insn & ARM_CP_RW_BIT) {
+ if ((insn >> 28) == 0xf) { /* WLDRW wCx */
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
+ iwmmxt_store_creg(wrd, tmp);
+ } else {
+ i = 1;
+ if (insn & (1 << 8)) {
+ if (insn & (1 << 22)) { /* WLDRD */
+ gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
+ i = 0;
+ } else { /* WLDRW wRd */
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
+ }
+ } else {
+ tmp = tcg_temp_new_i32();
+ if (insn & (1 << 22)) { /* WLDRH */
+ gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
+ } else { /* WLDRB */
+ gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
+ }
+ }
+ if (i) {
+ tcg_gen_extu_i32_i64(cpu_M0, tmp);
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ }
+ } else {
+ if ((insn >> 28) == 0xf) { /* WSTRW wCx */
+ tmp = iwmmxt_load_creg(wrd);
+ gen_aa32_st32(s, tmp, addr, get_mem_index(s));
+ } else {
+ gen_op_iwmmxt_movq_M0_wRn(wrd);
+ tmp = tcg_temp_new_i32();
+ if (insn & (1 << 8)) {
+ if (insn & (1 << 22)) { /* WSTRD */
+ gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
+ } else { /* WSTRW wRd */
+ tcg_gen_extrl_i64_i32(tmp, cpu_M0);
+ gen_aa32_st32(s, tmp, addr, get_mem_index(s));
+ }
+ } else {
+ if (insn & (1 << 22)) { /* WSTRH */
+ tcg_gen_extrl_i64_i32(tmp, cpu_M0);
+ gen_aa32_st16(s, tmp, addr, get_mem_index(s));
+ } else { /* WSTRB */
+ tcg_gen_extrl_i64_i32(tmp, cpu_M0);
+ gen_aa32_st8(s, tmp, addr, get_mem_index(s));
+ }
+ }
+ }
+ }
+ return 0;
+ }
+
+ if ((insn & 0x0f000000) != 0x0e000000)
+ return 1;
+
+ switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
+ case 0x000: /* WOR */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 0) & 0xf;
+ rd1 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ gen_op_iwmmxt_orq_M0_wRn(rd1);
+ gen_op_iwmmxt_setpsr_nz();
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x011: /* TMCR */
+ if (insn & 0xf)
+ return 1;
+ rd = (insn >> 12) & 0xf;
+ wrd = (insn >> 16) & 0xf;
+ switch (wrd) {
+ case ARM_IWMMXT_wCID:
+ case ARM_IWMMXT_wCASF:
+ break;
+ case ARM_IWMMXT_wCon:
+ gen_op_iwmmxt_set_cup();
+ /* Fall through. */
+ case ARM_IWMMXT_wCSSF:
+ tmp = iwmmxt_load_creg(wrd);
+ tmp2 = load_reg(s, rd);
+ tcg_gen_andc_i32(tmp, tmp, tmp2);
+ iwmmxt_store_creg(wrd, tmp);
+ break;
+ case ARM_IWMMXT_wCGR0:
+ case ARM_IWMMXT_wCGR1:
+ case ARM_IWMMXT_wCGR2:
+ case ARM_IWMMXT_wCGR3:
+ gen_op_iwmmxt_set_cup();
+ tmp = load_reg(s, rd);
+ iwmmxt_store_creg(wrd, tmp);
+ break;
+ default:
+ return 1;
+ }
+ break;
+ case 0x100: /* WXOR */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 0) & 0xf;
+ rd1 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ gen_op_iwmmxt_xorq_M0_wRn(rd1);
+ gen_op_iwmmxt_setpsr_nz();
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x111: /* TMRC */
+ if (insn & 0xf)
+ return 1;
+ rd = (insn >> 12) & 0xf;
+ wrd = (insn >> 16) & 0xf;
+ tmp = iwmmxt_load_creg(wrd);
+ store_reg(s, rd, tmp);
+ break;
+ case 0x300: /* WANDN */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 0) & 0xf;
+ rd1 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tcg_gen_neg_i64(cpu_M0, cpu_M0);
+ gen_op_iwmmxt_andq_M0_wRn(rd1);
+ gen_op_iwmmxt_setpsr_nz();
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x200: /* WAND */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 0) & 0xf;
+ rd1 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ gen_op_iwmmxt_andq_M0_wRn(rd1);
+ gen_op_iwmmxt_setpsr_nz();
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x810: case 0xa10: /* WMADD */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 0) & 0xf;
+ rd1 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_maddsq_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_madduq_M0_wRn(rd1);
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
+ break;
+ case 1:
+ gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
+ break;
+ case 2:
+ gen_op_iwmmxt_unpackll_M0_wRn(rd1);
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
+ break;
+ case 1:
+ gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
+ break;
+ case 2:
+ gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ if (insn & (1 << 22))
+ gen_op_iwmmxt_sadw_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_sadb_M0_wRn(rd1);
+ if (!(insn & (1 << 20)))
+ gen_op_iwmmxt_addl_M0_wRn(wrd);
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ if (insn & (1 << 21)) {
+ if (insn & (1 << 20))
+ gen_op_iwmmxt_mulshw_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_mulslw_M0_wRn(rd1);
+ } else {
+ if (insn & (1 << 20))
+ gen_op_iwmmxt_muluhw_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_mululw_M0_wRn(rd1);
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_macsw_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_macuw_M0_wRn(rd1);
+ if (!(insn & (1 << 20))) {
+ iwmmxt_load_reg(cpu_V1, wrd);
+ tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
+ break;
+ case 1:
+ gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
+ break;
+ case 2:
+ gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ if (insn & (1 << 22)) {
+ if (insn & (1 << 20))
+ gen_op_iwmmxt_avgw1_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_avgw0_M0_wRn(rd1);
+ } else {
+ if (insn & (1 << 20))
+ gen_op_iwmmxt_avgb1_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_avgb0_M0_wRn(rd1);
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
+ tcg_gen_andi_i32(tmp, tmp, 7);
+ iwmmxt_load_reg(cpu_V1, rd1);
+ gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
+ if (((insn >> 6) & 3) == 3)
+ return 1;
+ rd = (insn >> 12) & 0xf;
+ wrd = (insn >> 16) & 0xf;
+ tmp = load_reg(s, rd);
+ gen_op_iwmmxt_movq_M0_wRn(wrd);
+ switch ((insn >> 6) & 3) {
+ case 0:
+ tmp2 = tcg_constant_i32(0xff);
+ tmp3 = tcg_constant_i32((insn & 7) << 3);
+ break;
+ case 1:
+ tmp2 = tcg_constant_i32(0xffff);
+ tmp3 = tcg_constant_i32((insn & 3) << 4);
+ break;
+ case 2:
+ tmp2 = tcg_constant_i32(0xffffffff);
+ tmp3 = tcg_constant_i32((insn & 1) << 5);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
+ rd = (insn >> 12) & 0xf;
+ wrd = (insn >> 16) & 0xf;
+ if (rd == 15 || ((insn >> 22) & 3) == 3)
+ return 1;
+ gen_op_iwmmxt_movq_M0_wRn(wrd);
+ tmp = tcg_temp_new_i32();
+ switch ((insn >> 22) & 3) {
+ case 0:
+ tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
+ tcg_gen_extrl_i64_i32(tmp, cpu_M0);
+ if (insn & 8) {
+ tcg_gen_ext8s_i32(tmp, tmp);
+ } else {
+ tcg_gen_andi_i32(tmp, tmp, 0xff);
+ }
+ break;
+ case 1:
+ tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
+ tcg_gen_extrl_i64_i32(tmp, cpu_M0);
+ if (insn & 8) {
+ tcg_gen_ext16s_i32(tmp, tmp);
+ } else {
+ tcg_gen_andi_i32(tmp, tmp, 0xffff);
+ }
+ break;
+ case 2:
+ tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
+ tcg_gen_extrl_i64_i32(tmp, cpu_M0);
+ break;
+ }
+ store_reg(s, rd, tmp);
+ break;
+ case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
+ if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
+ return 1;
+ tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
+ break;
+ case 1:
+ tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
+ break;
+ case 2:
+ tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
+ break;
+ }
+ tcg_gen_shli_i32(tmp, tmp, 28);
+ gen_set_nzcv(tmp);
+ break;
+ case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
+ if (((insn >> 6) & 3) == 3)
+ return 1;
+ rd = (insn >> 12) & 0xf;
+ wrd = (insn >> 16) & 0xf;
+ tmp = load_reg(s, rd);
+ switch ((insn >> 6) & 3) {
+ case 0:
+ gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
+ break;
+ case 1:
+ gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
+ break;
+ case 2:
+ gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
+ break;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
+ if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
+ return 1;
+ tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_mov_i32(tmp2, tmp);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ for (i = 0; i < 7; i ++) {
+ tcg_gen_shli_i32(tmp2, tmp2, 4);
+ tcg_gen_and_i32(tmp, tmp, tmp2);
+ }
+ break;
+ case 1:
+ for (i = 0; i < 3; i ++) {
+ tcg_gen_shli_i32(tmp2, tmp2, 8);
+ tcg_gen_and_i32(tmp, tmp, tmp2);
+ }
+ break;
+ case 2:
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_and_i32(tmp, tmp, tmp2);
+ break;
+ }
+ gen_set_nzcv(tmp);
+ break;
+ case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
+ break;
+ case 1:
+ gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
+ break;
+ case 2:
+ gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
+ if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
+ return 1;
+ tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
+ tmp2 = tcg_temp_new_i32();
+ tcg_gen_mov_i32(tmp2, tmp);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ for (i = 0; i < 7; i ++) {
+ tcg_gen_shli_i32(tmp2, tmp2, 4);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ }
+ break;
+ case 1:
+ for (i = 0; i < 3; i ++) {
+ tcg_gen_shli_i32(tmp2, tmp2, 8);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ }
+ break;
+ case 2:
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ break;
+ }
+ gen_set_nzcv(tmp);
+ break;
+ case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
+ rd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
+ return 1;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = tcg_temp_new_i32();
+ switch ((insn >> 22) & 3) {
+ case 0:
+ gen_helper_iwmmxt_msbb(tmp, cpu_M0);
+ break;
+ case 1:
+ gen_helper_iwmmxt_msbw(tmp, cpu_M0);
+ break;
+ case 2:
+ gen_helper_iwmmxt_msbl(tmp, cpu_M0);
+ break;
+ }
+ store_reg(s, rd, tmp);
+ break;
+ case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
+ case 0x906: case 0xb06: case 0xd06: case 0xf06:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
+ break;
+ case 1:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
+ break;
+ case 2:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
+ case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_unpacklsb_M0();
+ else
+ gen_op_iwmmxt_unpacklub_M0();
+ break;
+ case 1:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_unpacklsw_M0();
+ else
+ gen_op_iwmmxt_unpackluw_M0();
+ break;
+ case 2:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_unpacklsl_M0();
+ else
+ gen_op_iwmmxt_unpacklul_M0();
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
+ case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_unpackhsb_M0();
+ else
+ gen_op_iwmmxt_unpackhub_M0();
+ break;
+ case 1:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_unpackhsw_M0();
+ else
+ gen_op_iwmmxt_unpackhuw_M0();
+ break;
+ case 2:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_unpackhsl_M0();
+ else
+ gen_op_iwmmxt_unpackhul_M0();
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
+ case 0x214: case 0x614: case 0xa14: case 0xe14:
+ if (((insn >> 22) & 3) == 0)
+ return 1;
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = tcg_temp_new_i32();
+ if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
+ return 1;
+ }
+ switch ((insn >> 22) & 3) {
+ case 1:
+ gen_helper_iwmmxt_srlw(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ case 2:
+ gen_helper_iwmmxt_srll(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ case 3:
+ gen_helper_iwmmxt_srlq(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
+ case 0x014: case 0x414: case 0x814: case 0xc14:
+ if (((insn >> 22) & 3) == 0)
+ return 1;
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = tcg_temp_new_i32();
+ if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
+ return 1;
+ }
+ switch ((insn >> 22) & 3) {
+ case 1:
+ gen_helper_iwmmxt_sraw(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ case 2:
+ gen_helper_iwmmxt_sral(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ case 3:
+ gen_helper_iwmmxt_sraq(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
+ case 0x114: case 0x514: case 0x914: case 0xd14:
+ if (((insn >> 22) & 3) == 0)
+ return 1;
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = tcg_temp_new_i32();
+ if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
+ return 1;
+ }
+ switch ((insn >> 22) & 3) {
+ case 1:
+ gen_helper_iwmmxt_sllw(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ case 2:
+ gen_helper_iwmmxt_slll(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ case 3:
+ gen_helper_iwmmxt_sllq(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
+ case 0x314: case 0x714: case 0xb14: case 0xf14:
+ if (((insn >> 22) & 3) == 0)
+ return 1;
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = tcg_temp_new_i32();
+ switch ((insn >> 22) & 3) {
+ case 1:
+ if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
+ return 1;
+ }
+ gen_helper_iwmmxt_rorw(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ case 2:
+ if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
+ return 1;
+ }
+ gen_helper_iwmmxt_rorl(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ case 3:
+ if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
+ return 1;
+ }
+ gen_helper_iwmmxt_rorq(cpu_M0, tcg_env, cpu_M0, tmp);
+ break;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
+ case 0x916: case 0xb16: case 0xd16: case 0xf16:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_minsb_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_minub_M0_wRn(rd1);
+ break;
+ case 1:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_minsw_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_minuw_M0_wRn(rd1);
+ break;
+ case 2:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_minsl_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_minul_M0_wRn(rd1);
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
+ case 0x816: case 0xa16: case 0xc16: case 0xe16:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 0:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_maxsb_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_maxub_M0_wRn(rd1);
+ break;
+ case 1:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_maxsw_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_maxuw_M0_wRn(rd1);
+ break;
+ case 2:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_maxsl_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_maxul_M0_wRn(rd1);
+ break;
+ case 3:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
+ case 0x402: case 0x502: case 0x602: case 0x702:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ iwmmxt_load_reg(cpu_V1, rd1);
+ gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
+ tcg_constant_i32((insn >> 20) & 3));
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
+ case 0x41a: case 0x51a: case 0x61a: case 0x71a:
+ case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
+ case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 20) & 0xf) {
+ case 0x0:
+ gen_op_iwmmxt_subnb_M0_wRn(rd1);
+ break;
+ case 0x1:
+ gen_op_iwmmxt_subub_M0_wRn(rd1);
+ break;
+ case 0x3:
+ gen_op_iwmmxt_subsb_M0_wRn(rd1);
+ break;
+ case 0x4:
+ gen_op_iwmmxt_subnw_M0_wRn(rd1);
+ break;
+ case 0x5:
+ gen_op_iwmmxt_subuw_M0_wRn(rd1);
+ break;
+ case 0x7:
+ gen_op_iwmmxt_subsw_M0_wRn(rd1);
+ break;
+ case 0x8:
+ gen_op_iwmmxt_subnl_M0_wRn(rd1);
+ break;
+ case 0x9:
+ gen_op_iwmmxt_subul_M0_wRn(rd1);
+ break;
+ case 0xb:
+ gen_op_iwmmxt_subsl_M0_wRn(rd1);
+ break;
+ default:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
+ case 0x41e: case 0x51e: case 0x61e: case 0x71e:
+ case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
+ case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
+ gen_helper_iwmmxt_shufh(cpu_M0, tcg_env, cpu_M0, tmp);
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
+ case 0x418: case 0x518: case 0x618: case 0x718:
+ case 0x818: case 0x918: case 0xa18: case 0xb18:
+ case 0xc18: case 0xd18: case 0xe18: case 0xf18:
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 20) & 0xf) {
+ case 0x0:
+ gen_op_iwmmxt_addnb_M0_wRn(rd1);
+ break;
+ case 0x1:
+ gen_op_iwmmxt_addub_M0_wRn(rd1);
+ break;
+ case 0x3:
+ gen_op_iwmmxt_addsb_M0_wRn(rd1);
+ break;
+ case 0x4:
+ gen_op_iwmmxt_addnw_M0_wRn(rd1);
+ break;
+ case 0x5:
+ gen_op_iwmmxt_adduw_M0_wRn(rd1);
+ break;
+ case 0x7:
+ gen_op_iwmmxt_addsw_M0_wRn(rd1);
+ break;
+ case 0x8:
+ gen_op_iwmmxt_addnl_M0_wRn(rd1);
+ break;
+ case 0x9:
+ gen_op_iwmmxt_addul_M0_wRn(rd1);
+ break;
+ case 0xb:
+ gen_op_iwmmxt_addsl_M0_wRn(rd1);
+ break;
+ default:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
+ case 0x408: case 0x508: case 0x608: case 0x708:
+ case 0x808: case 0x908: case 0xa08: case 0xb08:
+ case 0xc08: case 0xd08: case 0xe08: case 0xf08:
+ if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
+ return 1;
+ wrd = (insn >> 12) & 0xf;
+ rd0 = (insn >> 16) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ gen_op_iwmmxt_movq_M0_wRn(rd0);
+ switch ((insn >> 22) & 3) {
+ case 1:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_packsw_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_packuw_M0_wRn(rd1);
+ break;
+ case 2:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_packsl_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_packul_M0_wRn(rd1);
+ break;
+ case 3:
+ if (insn & (1 << 21))
+ gen_op_iwmmxt_packsq_M0_wRn(rd1);
+ else
+ gen_op_iwmmxt_packuq_M0_wRn(rd1);
+ break;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ gen_op_iwmmxt_set_cup();
+ break;
+ case 0x201: case 0x203: case 0x205: case 0x207:
+ case 0x209: case 0x20b: case 0x20d: case 0x20f:
+ case 0x211: case 0x213: case 0x215: case 0x217:
+ case 0x219: case 0x21b: case 0x21d: case 0x21f:
+ wrd = (insn >> 5) & 0xf;
+ rd0 = (insn >> 12) & 0xf;
+ rd1 = (insn >> 0) & 0xf;
+ if (rd0 == 0xf || rd1 == 0xf)
+ return 1;
+ gen_op_iwmmxt_movq_M0_wRn(wrd);
+ tmp = load_reg(s, rd0);
+ tmp2 = load_reg(s, rd1);
+ switch ((insn >> 16) & 0xf) {
+ case 0x0: /* TMIA */
+ gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
+ break;
+ case 0x8: /* TMIAPH */
+ gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
+ break;
+ case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
+ if (insn & (1 << 16))
+ tcg_gen_shri_i32(tmp, tmp, 16);
+ if (insn & (1 << 17))
+ tcg_gen_shri_i32(tmp2, tmp2, 16);
+ gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
+ break;
+ default:
+ return 1;
+ }
+ gen_op_iwmmxt_movq_wRn_M0(wrd);
+ gen_op_iwmmxt_set_mup();
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
+ (ie. an undefined instruction). */
+static int disas_dsp_insn(DisasContext *s, uint32_t insn)
+{
+ int acc, rd0, rd1, rdhi, rdlo;
+ TCGv_i32 tmp, tmp2;
+
+ if ((insn & 0x0ff00f10) == 0x0e200010) {
+ /* Multiply with Internal Accumulate Format */
+ rd0 = (insn >> 12) & 0xf;
+ rd1 = insn & 0xf;
+ acc = (insn >> 5) & 7;
+
+ if (acc != 0)
+ return 1;
+
+ tmp = load_reg(s, rd0);
+ tmp2 = load_reg(s, rd1);
+ switch ((insn >> 16) & 0xf) {
+ case 0x0: /* MIA */
+ gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
+ break;
+ case 0x8: /* MIAPH */
+ gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
+ break;
+ case 0xc: /* MIABB */
+ case 0xd: /* MIABT */
+ case 0xe: /* MIATB */
+ case 0xf: /* MIATT */
+ if (insn & (1 << 16))
+ tcg_gen_shri_i32(tmp, tmp, 16);
+ if (insn & (1 << 17))
+ tcg_gen_shri_i32(tmp2, tmp2, 16);
+ gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
+ break;
+ default:
+ return 1;
+ }
+
+ gen_op_iwmmxt_movq_wRn_M0(acc);
+ return 0;
+ }
+
+ if ((insn & 0x0fe00ff8) == 0x0c400000) {
+ /* Internal Accumulator Access Format */
+ rdhi = (insn >> 16) & 0xf;
+ rdlo = (insn >> 12) & 0xf;
+ acc = insn & 7;
+
+ if (acc != 0)
+ return 1;
+
+ if (insn & ARM_CP_RW_BIT) { /* MRA */
+ iwmmxt_load_reg(cpu_V0, acc);
+ tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
+ tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
+ tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
+ } else { /* MAR */
+ tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
+ iwmmxt_store_reg(cpu_V0, acc);
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+static void gen_goto_ptr(void)
+{
+ tcg_gen_lookup_and_goto_ptr();
+}
+
+/* This will end the TB but doesn't guarantee we'll return to
+ * cpu_loop_exec. Any live exit_requests will be processed as we
+ * enter the next TB.
+ */
+static void gen_goto_tb(DisasContext *s, int n, target_long diff)
+{
+ if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
+ /*
+ * For pcrel, the pc must always be up-to-date on entry to
+ * the linked TB, so that it can use simple additions for all
+ * further adjustments. For !pcrel, the linked TB is compiled
+ * to know its full virtual address, so we can delay the
+ * update to pc to the unlinked path. A long chain of links
+ * can thus avoid many updates to the PC.
+ */
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
+ gen_update_pc(s, diff);
+ tcg_gen_goto_tb(n);
+ } else {
+ tcg_gen_goto_tb(n);
+ gen_update_pc(s, diff);
+ }
+ tcg_gen_exit_tb(s->base.tb, n);
+ } else {
+ gen_update_pc(s, diff);
+ gen_goto_ptr();
+ }
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
+/* Jump, specifying which TB number to use if we gen_goto_tb() */
+static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
+{
+ if (unlikely(s->ss_active)) {
+ /* An indirect jump so that we still trigger the debug exception. */
+ gen_update_pc(s, diff);
+ s->base.is_jmp = DISAS_JUMP;
+ return;
+ }
+ switch (s->base.is_jmp) {
+ case DISAS_NEXT:
+ case DISAS_TOO_MANY:
+ case DISAS_NORETURN:
+ /*
+ * The normal case: just go to the destination TB.
+ * NB: NORETURN happens if we generate code like
+ * gen_brcondi(l);
+ * gen_jmp();
+ * gen_set_label(l);
+ * gen_jmp();
+ * on the second call to gen_jmp().
+ */
+ gen_goto_tb(s, tbno, diff);
+ break;
+ case DISAS_UPDATE_NOCHAIN:
+ case DISAS_UPDATE_EXIT:
+ /*
+ * We already decided we're leaving the TB for some other reason.
+ * Avoid using goto_tb so we really do exit back to the main loop
+ * and don't chain to another TB.
+ */
+ gen_update_pc(s, diff);
+ gen_goto_ptr();
+ s->base.is_jmp = DISAS_NORETURN;
+ break;
+ default:
+ /*
+ * We shouldn't be emitting code for a jump and also have
+ * is_jmp set to one of the special cases like DISAS_SWI.
+ */
+ g_assert_not_reached();
+ }
+}
+
+static inline void gen_jmp(DisasContext *s, target_long diff)
+{
+ gen_jmp_tb(s, diff, 0);
+}
+
+static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
+{
+ if (x)
+ tcg_gen_sari_i32(t0, t0, 16);
+ else
+ gen_sxth(t0);
+ if (y)
+ tcg_gen_sari_i32(t1, t1, 16);
+ else
+ gen_sxth(t1);
+ tcg_gen_mul_i32(t0, t0, t1);
+}
+
+/* Return the mask of PSR bits set by a MSR instruction. */
+static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
+{
+ uint32_t mask = 0;
+
+ if (flags & (1 << 0)) {
+ mask |= 0xff;
+ }
+ if (flags & (1 << 1)) {
+ mask |= 0xff00;
+ }
+ if (flags & (1 << 2)) {
+ mask |= 0xff0000;
+ }
+ if (flags & (1 << 3)) {
+ mask |= 0xff000000;
+ }
+
+ /* Mask out undefined and reserved bits. */
+ mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
+
+ /* Mask out execution state. */
+ if (!spsr) {
+ mask &= ~CPSR_EXEC;
+ }
+
+ /* Mask out privileged bits. */
+ if (IS_USER(s)) {
+ mask &= CPSR_USER;
+ }
+ return mask;
+}
+
+/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
+static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
+{
+ TCGv_i32 tmp;
+ if (spsr) {
+ /* ??? This is also undefined in system mode. */
+ if (IS_USER(s))
+ return 1;
+
+ tmp = load_cpu_field(spsr);
+ tcg_gen_andi_i32(tmp, tmp, ~mask);
+ tcg_gen_andi_i32(t0, t0, mask);
+ tcg_gen_or_i32(tmp, tmp, t0);
+ store_cpu_field(tmp, spsr);
+ } else {
+ gen_set_cpsr(t0, mask);
+ }
+ gen_lookup_tb(s);
+ return 0;
+}
+
+/* Returns nonzero if access to the PSR is not permitted. */
+static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
+{
+ TCGv_i32 tmp;
+ tmp = tcg_temp_new_i32();
+ tcg_gen_movi_i32(tmp, val);
+ return gen_set_psr(s, mask, spsr, tmp);
+}
+
+static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
+ int *tgtmode, int *regno)
+{
+ /* Decode the r and sysm fields of MSR/MRS banked accesses into
+ * the target mode and register number, and identify the various
+ * unpredictable cases.
+ * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
+ * + executed in user mode
+ * + using R15 as the src/dest register
+ * + accessing an unimplemented register
+ * + accessing a register that's inaccessible at current PL/security state*
+ * + accessing a register that you could access with a different insn
+ * We choose to UNDEF in all these cases.
+ * Since we don't know which of the various AArch32 modes we are in
+ * we have to defer some checks to runtime.
+ * Accesses to Monitor mode registers from Secure EL1 (which implies
+ * that EL3 is AArch64) must trap to EL3.
+ *
+ * If the access checks fail this function will emit code to take
+ * an exception and return false. Otherwise it will return true,
+ * and set *tgtmode and *regno appropriately.
+ */
+ /* These instructions are present only in ARMv8, or in ARMv7 with the
+ * Virtualization Extensions.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
+ !arm_dc_feature(s, ARM_FEATURE_EL2)) {
+ goto undef;
+ }
+
+ if (IS_USER(s) || rn == 15) {
+ goto undef;
+ }
+
+ /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
+ * of registers into (r, sysm).
+ */
+ if (r) {
+ /* SPSRs for other modes */
+ switch (sysm) {
+ case 0xe: /* SPSR_fiq */
+ *tgtmode = ARM_CPU_MODE_FIQ;
+ break;
+ case 0x10: /* SPSR_irq */
+ *tgtmode = ARM_CPU_MODE_IRQ;
+ break;
+ case 0x12: /* SPSR_svc */
+ *tgtmode = ARM_CPU_MODE_SVC;
+ break;
+ case 0x14: /* SPSR_abt */
+ *tgtmode = ARM_CPU_MODE_ABT;
+ break;
+ case 0x16: /* SPSR_und */
+ *tgtmode = ARM_CPU_MODE_UND;
+ break;
+ case 0x1c: /* SPSR_mon */
+ *tgtmode = ARM_CPU_MODE_MON;
+ break;
+ case 0x1e: /* SPSR_hyp */
+ *tgtmode = ARM_CPU_MODE_HYP;
+ break;
+ default: /* unallocated */
+ goto undef;
+ }
+ /* We arbitrarily assign SPSR a register number of 16. */
+ *regno = 16;
+ } else {
+ /* general purpose registers for other modes */
+ switch (sysm) {
+ case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
+ *tgtmode = ARM_CPU_MODE_USR;
+ *regno = sysm + 8;
+ break;
+ case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
+ *tgtmode = ARM_CPU_MODE_FIQ;
+ *regno = sysm;
+ break;
+ case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
+ *tgtmode = ARM_CPU_MODE_IRQ;
+ *regno = sysm & 1 ? 13 : 14;
+ break;
+ case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
+ *tgtmode = ARM_CPU_MODE_SVC;
+ *regno = sysm & 1 ? 13 : 14;
+ break;
+ case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
+ *tgtmode = ARM_CPU_MODE_ABT;
+ *regno = sysm & 1 ? 13 : 14;
+ break;
+ case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
+ *tgtmode = ARM_CPU_MODE_UND;
+ *regno = sysm & 1 ? 13 : 14;
+ break;
+ case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
+ *tgtmode = ARM_CPU_MODE_MON;
+ *regno = sysm & 1 ? 13 : 14;
+ break;
+ case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
+ *tgtmode = ARM_CPU_MODE_HYP;
+ /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
+ *regno = sysm & 1 ? 13 : 17;
+ break;
+ default: /* unallocated */
+ goto undef;
+ }
+ }
+
+ /* Catch the 'accessing inaccessible register' cases we can detect
+ * at translate time.
+ */
+ switch (*tgtmode) {
+ case ARM_CPU_MODE_MON:
+ if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
+ goto undef;
+ }
+ if (s->current_el == 1) {
+ /* If we're in Secure EL1 (which implies that EL3 is AArch64)
+ * then accesses to Mon registers trap to Secure EL2, if it exists,
+ * otherwise EL3.
+ */
+ TCGv_i32 tcg_el;
+
+ if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
+ dc_isar_feature(aa64_sel2, s)) {
+ /* Target EL is EL<3 minus SCR_EL3.EEL2> */
+ tcg_el = load_cpu_field_low32(cp15.scr_el3);
+ tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
+ tcg_gen_addi_i32(tcg_el, tcg_el, 3);
+ } else {
+ tcg_el = tcg_constant_i32(3);
+ }
+
+ gen_exception_insn_el_v(s, 0, EXCP_UDEF,
+ syn_uncategorized(), tcg_el);
+ return false;
+ }
+ break;
+ case ARM_CPU_MODE_HYP:
+ /*
+ * r13_hyp can only be accessed from Monitor mode, and so we
+ * can forbid accesses from EL2 or below.
+ * elr_hyp can be accessed also from Hyp mode, so forbid
+ * accesses from EL0 or EL1.
+ * SPSR_hyp is supposed to be in the same category as r13_hyp
+ * and UNPREDICTABLE if accessed from anything except Monitor
+ * mode. However there is some real-world code that will do
+ * it because at least some hardware happens to permit the
+ * access. (Notably a standard Cortex-R52 startup code fragment
+ * does this.) So we permit SPSR_hyp from Hyp mode also, to allow
+ * this (incorrect) guest code to run.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2
+ || (s->current_el < 3 && *regno != 16 && *regno != 17)) {
+ goto undef;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return true;
+
+undef:
+ /* If we get here then some access check did not pass */
+ gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
+ return false;
+}
+
+static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
+{
+ TCGv_i32 tcg_reg;
+ int tgtmode = 0, regno = 0;
+
+ if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
+ return;
+ }
+
+ /* Sync state because msr_banked() can raise exceptions */
+ gen_set_condexec(s);
+ gen_update_pc(s, 0);
+ tcg_reg = load_reg(s, rn);
+ gen_helper_msr_banked(tcg_env, tcg_reg,
+ tcg_constant_i32(tgtmode),
+ tcg_constant_i32(regno));
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
+}
+
+static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
+{
+ TCGv_i32 tcg_reg;
+ int tgtmode = 0, regno = 0;
+
+ if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
+ return;
+ }
+
+ /* Sync state because mrs_banked() can raise exceptions */
+ gen_set_condexec(s);
+ gen_update_pc(s, 0);
+ tcg_reg = tcg_temp_new_i32();
+ gen_helper_mrs_banked(tcg_reg, tcg_env,
+ tcg_constant_i32(tgtmode),
+ tcg_constant_i32(regno));
+ store_reg(s, rn, tcg_reg);
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
+}
+
+/* Store value to PC as for an exception return (ie don't
+ * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
+ * will do the masking based on the new value of the Thumb bit.
+ */
+static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
+{
+ tcg_gen_mov_i32(cpu_R[15], pc);
+}
+
+/* Generate a v6 exception return. Marks both values as dead. */
+static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
+{
+ store_pc_exc_ret(s, pc);
+ /* The cpsr_write_eret helper will mask the low bits of PC
+ * appropriately depending on the new Thumb bit, so it must
+ * be called after storing the new PC.
+ */
+ translator_io_start(&s->base);
+ gen_helper_cpsr_write_eret(tcg_env, cpsr);
+ /* Must exit loop to check un-masked IRQs */
+ s->base.is_jmp = DISAS_EXIT;
+}
+
+/* Generate an old-style exception return. Marks pc as dead. */
+static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
+{
+ gen_rfe(s, pc, load_cpu_field(spsr));
+}
+
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz,
+ gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
+ opr_sz, max_sz, 0, fn);
+}
+
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
+}
+
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
+}
+
+#define GEN_CMP0(NAME, COND) \
+ void NAME(unsigned vece, uint32_t d, uint32_t m, \
+ uint32_t opr_sz, uint32_t max_sz) \
+ { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
+
+GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
+GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
+GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
+GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
+GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
+
+#undef GEN_CMP0
+
+static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_sar8i_i64(a, a, shift);
+ tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_sar16i_i64(a, a, shift);
+ tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_sari_i32(a, a, shift);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_sari_i64(a, a, shift);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ tcg_gen_sari_vec(vece, a, a, sh);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ssra8_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_ssra16_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ssra32_i32,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ssra64_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits.
+ */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
+
+static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_shr8i_i64(a, a, shift);
+ tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_shr16i_i64(a, a, shift);
+ tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ tcg_gen_shri_vec(vece, a, a, sh);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_usra8_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8, },
+ { .fni8 = gen_usra16_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16, },
+ { .fni4 = gen_usra32_i32,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32, },
+ { .fni8 = gen_usra64_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64, },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in all zeros as input to accumulate: nop.
+ */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
+
+/*
+ * Shift one less than the requested amount, and the low bit is
+ * the rounding bit. For the 8 and 16-bit operations, because we
+ * mask the low bit, we can perform a normal integer shift instead
+ * of a vector shift.
+ */
+static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_sar8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+}
+
+static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_sar16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t;
+
+ /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
+ if (sh == 32) {
+ tcg_gen_movi_i32(d, 0);
+ return;
+ }
+ t = tcg_temp_new_i32();
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_sari_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_sari_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_sari_vec(vece, d, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srshr8_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_srshr16_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_srshr32_i32,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_srshr64_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero.
+ */
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr8_i64(t, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+}
+
+static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr16_i64(t, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ gen_srshr32_i32(t, a, sh);
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr64_i64(t, a, sh);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ gen_srshr_vec(vece, t, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srsra8_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_srsra16_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_srsra32_i32,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_srsra64_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero. With accumulation, this leaves D unchanged.
+ */
+ if (shift == (8 << vece)) {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_shr8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+}
+
+static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_shr16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t;
+
+ /* Handle shift by the input size for the benefit of trans_URSHR_ri */
+ if (sh == 32) {
+ tcg_gen_extract_i32(d, a, sh - 1, 1);
+ return;
+ }
+ t = tcg_temp_new_i32();
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_shri_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_shri_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, shift - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_shri_vec(vece, d, a, shift);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_urshr8_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_urshr16_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_urshr32_i32,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_urshr64_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in zero. With rounding, this produces a
+ * copy of the most significant bit.
+ */
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 8) {
+ tcg_gen_vec_shr8i_i64(t, a, 7);
+ } else {
+ gen_urshr8_i64(t, a, sh);
+ }
+ tcg_gen_vec_add8_i64(d, d, t);
+}
+
+static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 16) {
+ tcg_gen_vec_shr16i_i64(t, a, 15);
+ } else {
+ gen_urshr16_i64(t, a, sh);
+ }
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ if (sh == 32) {
+ tcg_gen_shri_i32(t, a, 31);
+ } else {
+ gen_urshr32_i32(t, a, sh);
+ }
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 64) {
+ tcg_gen_shri_i64(t, a, 63);
+ } else {
+ gen_urshr64_i64(t, a, sh);
+ }
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ if (sh == (8 << vece)) {
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ } else {
+ gen_urshr_vec(vece, t, a, sh);
+ }
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ursra8_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_ursra16_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_ursra32_i32,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_ursra64_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
+
+static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
+}
+
+static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
+}
+
+static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+ tcg_gen_shri_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+}
+
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shr8_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shr16_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shr32_ins_i32,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shr64_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /* Shift of esize leaves destination unchanged. */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
+
+static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+}
+
+static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
+}
+
+static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
+}
+
+static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shli_vec(vece, t, a, sh);
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+}
+
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shl8_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shl16_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shl32_ins_i32,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shl64_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
+ tcg_debug_assert(shift >= 0);
+ tcg_debug_assert(shift < (8 << vece));
+
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_add_u8(d, d, a);
+}
+
+static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_sub_u8(d, d, a);
+}
+
+static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_add_u16(d, d, a);
+}
+
+static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_sub_u16(d, d, a);
+}
+
+static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_sub_i32(d, d, a);
+}
+
+static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_sub_i64(d, d, a);
+}
+
+static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_sub_vec(vece, d, d, a);
+}
+
+/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
+ * these tables are shared with AArch64 which does support them.
+ */
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mla8_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mla16_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mla32_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mla64_i64,
+ .fniv = gen_mla_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mls8_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mls16_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mls32_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mls64_i64,
+ .fniv = gen_mls_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+/* CMTST : test is "if (X & Y != 0)". */
+static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_and_i32(d, a, b);
+ tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0));
+}
+
+void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_and_i64(d, a, b);
+ tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0));
+}
+
+static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_and_vec(vece, d, a, b);
+ tcg_gen_dupi_vec(vece, a, 0);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
+}
+
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_helper_neon_tst_u8,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_tst_u16,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_cmtst_i32,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_cmtst_i64,
+ .fniv = gen_cmtst_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
+{
+ TCGv_i32 lval = tcg_temp_new_i32();
+ TCGv_i32 rval = tcg_temp_new_i32();
+ TCGv_i32 lsh = tcg_temp_new_i32();
+ TCGv_i32 rsh = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_constant_i32(0);
+ TCGv_i32 max = tcg_constant_i32(32);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i32(lsh, shift);
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_shl_i32(lval, src, lsh);
+ tcg_gen_shr_i32(rval, src, rsh);
+ tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
+ tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
+}
+
+void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
+{
+ TCGv_i64 lval = tcg_temp_new_i64();
+ TCGv_i64 rval = tcg_temp_new_i64();
+ TCGv_i64 lsh = tcg_temp_new_i64();
+ TCGv_i64 rsh = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_constant_i64(0);
+ TCGv_i64 max = tcg_constant_i64(64);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i64(lsh, shift);
+ tcg_gen_neg_i64(rsh, lsh);
+ tcg_gen_shl_i64(lval, src, lsh);
+ tcg_gen_shr_i64(rval, src, rsh);
+ tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
+ tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
+}
+
+static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
+ TCGv_vec src, TCGv_vec shift)
+{
+ TCGv_vec lval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec msk, max;
+
+ tcg_gen_neg_vec(vece, rsh, shift);
+ if (vece == MO_8) {
+ tcg_gen_mov_vec(lsh, shift);
+ } else {
+ msk = tcg_temp_new_vec_matching(dst);
+ tcg_gen_dupi_vec(vece, msk, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
+ }
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_shlv_vec(vece, lval, src, lsh);
+ tcg_gen_shrv_vec(vece, rval, src, rsh);
+
+ max = tcg_temp_new_vec_matching(dst);
+ tcg_gen_dupi_vec(vece, max, 8 << vece);
+
+ /*
+ * The choice of LT (signed) and GEU (unsigned) are biased toward
+ * the instructions of the x86_64 host. For MO_8, the whole byte
+ * is significant so we must use an unsigned compare; otherwise we
+ * have already masked to a byte and so a signed compare works.
+ * Other tcg hosts have a full set of comparisons and do not care.
+ */
+ if (vece == MO_8) {
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
+ tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
+ tcg_gen_andc_vec(vece, lval, lval, lsh);
+ tcg_gen_andc_vec(vece, rval, rval, rsh);
+ } else {
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
+ tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
+ tcg_gen_and_vec(vece, lval, lval, lsh);
+ tcg_gen_and_vec(vece, rval, rval, rsh);
+ }
+ tcg_gen_or_vec(vece, dst, lval, rval);
+}
+
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ushl_i32,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ushl_i64,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
+{
+ TCGv_i32 lval = tcg_temp_new_i32();
+ TCGv_i32 rval = tcg_temp_new_i32();
+ TCGv_i32 lsh = tcg_temp_new_i32();
+ TCGv_i32 rsh = tcg_temp_new_i32();
+ TCGv_i32 zero = tcg_constant_i32(0);
+ TCGv_i32 max = tcg_constant_i32(31);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i32(lsh, shift);
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_shl_i32(lval, src, lsh);
+ tcg_gen_umin_i32(rsh, rsh, max);
+ tcg_gen_sar_i32(rval, src, rsh);
+ tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
+ tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
+}
+
+void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
+{
+ TCGv_i64 lval = tcg_temp_new_i64();
+ TCGv_i64 rval = tcg_temp_new_i64();
+ TCGv_i64 lsh = tcg_temp_new_i64();
+ TCGv_i64 rsh = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_constant_i64(0);
+ TCGv_i64 max = tcg_constant_i64(63);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_ext8s_i64(lsh, shift);
+ tcg_gen_neg_i64(rsh, lsh);
+ tcg_gen_shl_i64(lval, src, lsh);
+ tcg_gen_umin_i64(rsh, rsh, max);
+ tcg_gen_sar_i64(rval, src, rsh);
+ tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
+ tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
+}
+
+static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
+ TCGv_vec src, TCGv_vec shift)
+{
+ TCGv_vec lval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rval = tcg_temp_new_vec_matching(dst);
+ TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
+ TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+
+ /*
+ * Rely on the TCG guarantee that out of range shifts produce
+ * unspecified results, not undefined behaviour (i.e. no trap).
+ * Discard out-of-range results after the fact.
+ */
+ tcg_gen_neg_vec(vece, rsh, shift);
+ if (vece == MO_8) {
+ tcg_gen_mov_vec(lsh, shift);
+ } else {
+ tcg_gen_dupi_vec(vece, tmp, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, tmp);
+ tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ }
+
+ /* Bound rsh so out of bound right shift gets -1. */
+ tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, tmp);
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+
+ tcg_gen_shlv_vec(vece, lval, src, lsh);
+ tcg_gen_sarv_vec(vece, rval, src, rsh);
+
+ /* Select in-bound left shift. */
+ tcg_gen_andc_vec(vece, lval, lval, tmp);
+
+ /* Select between left and right shift. */
+ if (vece == MO_8) {
+ tcg_gen_dupi_vec(vece, tmp, 0);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ } else {
+ tcg_gen_dupi_vec(vece, tmp, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ }
+}
+
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sshl_i32,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sshl_i64,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_add_vec(vece, x, a, b);
+ tcg_gen_usadd_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+}
+
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_b,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_h,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_s,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_d,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_add_vec(vece, x, a, b);
+ tcg_gen_ssadd_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+}
+
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_sub_vec(vece, x, a, b);
+ tcg_gen_ussub_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+}
+
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_sub_vec(vece, x, a, b);
+ tcg_gen_sssub_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+}
+
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
+}
+
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
+}
+
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_smin_vec(vece, t, a, b);
+ tcg_gen_smax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+}
+
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sabd_i32,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sabd_i64,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
+}
+
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
+}
+
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_umin_vec(vece, t, a, b);
+ tcg_gen_umax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+}
+
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_uabd_i32,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_uabd_i64,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_sabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_sabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_sabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_saba_i32,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_saba_i64,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_uabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+}
+
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_uabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_uabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_uaba_i32,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_uaba_i64,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm)
+{
+ static const uint16_t mask[3] = {
+ 0b0000000111100111, /* crn == 9, crm == {c0-c2, c5-c8} */
+ 0b0000000100010011, /* crn == 10, crm == {c0, c1, c4, c8} */
+ 0b1000000111111111, /* crn == 11, crm == {c0-c8, c15} */
+ };
+
+ if (crn >= 9 && crn <= 11) {
+ return (mask[crn - 9] >> crm) & 1;
+ }
+ return false;
+}
+
+static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
+ int opc1, int crn, int crm, int opc2,
+ bool isread, int rt, int rt2)
+{
+ uint32_t key = ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2);
+ const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
+ TCGv_ptr tcg_ri = NULL;
+ bool need_exit_tb = false;
+ uint32_t syndrome;
+
+ /*
+ * Note that since we are an implementation which takes an
+ * exception on a trapped conditional instruction only if the
+ * instruction passes its condition code check, we can take
+ * advantage of the clause in the ARM ARM that allows us to set
+ * the COND field in the instruction to 0xE in all cases.
+ * We could fish the actual condition out of the insn (ARM)
+ * or the condexec bits (Thumb) but it isn't necessary.
+ */
+ switch (cpnum) {
+ case 14:
+ if (is64) {
+ syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
+ isread, false);
+ } else {
+ syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
+ rt, isread, false);
+ }
+ break;
+ case 15:
+ if (is64) {
+ syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
+ isread, false);
+ } else {
+ syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
+ rt, isread, false);
+ }
+ break;
+ default:
+ /*
+ * ARMv8 defines that only coprocessors 14 and 15 exist,
+ * so this can only happen if this is an ARMv7 or earlier CPU,
+ * in which case the syndrome information won't actually be
+ * guest visible.
+ */
+ assert(!arm_dc_feature(s, ARM_FEATURE_V8));
+ syndrome = syn_uncategorized();
+ break;
+ }
+
+ if (s->hstr_active && cpnum == 15 && s->current_el == 1) {
+ /*
+ * At EL1, check for a HSTR_EL2 trap, which must take precedence
+ * over the UNDEF for "no such register" or the UNDEF for "access
+ * permissions forbid this EL1 access". HSTR_EL2 traps from EL0
+ * only happen if the cpreg doesn't UNDEF at EL0, so we do those in
+ * access_check_cp_reg(), after the checks for whether the access
+ * configurably trapped to EL1.
+ */
+ uint32_t maskbit = is64 ? crm : crn;
+
+ if (maskbit != 4 && maskbit != 14) {
+ /* T4 and T14 are RES0 so never cause traps */
+ TCGv_i32 t;
+ DisasLabel over = gen_disas_label(s);
+
+ t = load_cpu_offset(offsetoflow32(CPUARMState, cp15.hstr_el2));
+ tcg_gen_andi_i32(t, t, 1u << maskbit);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, over.label);
+
+ gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
+ /*
+ * gen_exception_insn() will set is_jmp to DISAS_NORETURN,
+ * but since we're conditionally branching over it, we want
+ * to assume continue-to-next-instruction.
+ */
+ s->base.is_jmp = DISAS_NEXT;
+ set_disas_label(s, over);
+ }
+ }
+
+ if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) {
+ /*
+ * Check for TIDCP trap, which must take precedence over the UNDEF
+ * for "no such register" etc. It shares precedence with HSTR,
+ * but raises the same exception, so order doesn't matter.
+ */
+ switch (s->current_el) {
+ case 0:
+ if (arm_dc_feature(s, ARM_FEATURE_AARCH64)
+ && dc_isar_feature(aa64_tidcp1, s)) {
+ gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
+ }
+ break;
+ case 1:
+ gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
+ break;
+ }
+ }
+
+ if (!ri) {
+ /*
+ * Unknown register; this might be a guest error or a QEMU
+ * unimplemented feature.
+ */
+ if (is64) {
+ qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
+ "64 bit system register cp:%d opc1: %d crm:%d "
+ "(%s)\n",
+ isread ? "read" : "write", cpnum, opc1, crm,
+ s->ns ? "non-secure" : "secure");
+ } else {
+ qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
+ "system register cp:%d opc1:%d crn:%d crm:%d "
+ "opc2:%d (%s)\n",
+ isread ? "read" : "write", cpnum, opc1, crn,
+ crm, opc2, s->ns ? "non-secure" : "secure");
+ }
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* Check access permissions */
+ if (!cp_access_ok(s->current_el, ri, isread)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if ((s->hstr_active && s->current_el == 0) || ri->accessfn ||
+ (ri->fgt && s->fgt_active) ||
+ (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
+ /*
+ * Emit code to perform further access permissions checks at
+ * runtime; this may result in an exception.
+ * Note that on XScale all cp0..c13 registers do an access check
+ * call in order to handle c15_cpar.
+ */
+ gen_set_condexec(s);
+ gen_update_pc(s, 0);
+ tcg_ri = tcg_temp_new_ptr();
+ gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
+ tcg_constant_i32(key),
+ tcg_constant_i32(syndrome),
+ tcg_constant_i32(isread));
+ } else if (ri->type & ARM_CP_RAISES_EXC) {
+ /*
+ * The readfn or writefn might raise an exception;
+ * synchronize the CPU state in case it does.
+ */
+ gen_set_condexec(s);
+ gen_update_pc(s, 0);
+ }
+
+ /* Handle special cases first */
+ switch (ri->type & ARM_CP_SPECIAL_MASK) {
+ case 0:
+ break;
+ case ARM_CP_NOP:
+ return;
+ case ARM_CP_WFI:
+ if (isread) {
+ unallocated_encoding(s);
+ } else {
+ gen_update_pc(s, curr_insn_len(s));
+ s->base.is_jmp = DISAS_WFI;
+ }
+ return;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (ri->type & ARM_CP_IO) {
+ /* I/O operations must end the TB here (whether read or write) */
+ need_exit_tb = translator_io_start(&s->base);
+ }
+
+ if (isread) {
+ /* Read */
+ if (is64) {
+ TCGv_i64 tmp64;
+ TCGv_i32 tmp;
+ if (ri->type & ARM_CP_CONST) {
+ tmp64 = tcg_constant_i64(ri->resetvalue);
+ } else if (ri->readfn) {
+ if (!tcg_ri) {
+ tcg_ri = gen_lookup_cp_reg(key);
+ }
+ tmp64 = tcg_temp_new_i64();
+ gen_helper_get_cp_reg64(tmp64, tcg_env, tcg_ri);
+ } else {
+ tmp64 = tcg_temp_new_i64();
+ tcg_gen_ld_i64(tmp64, tcg_env, ri->fieldoffset);
+ }
+ tmp = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(tmp, tmp64);
+ store_reg(s, rt, tmp);
+ tmp = tcg_temp_new_i32();
+ tcg_gen_extrh_i64_i32(tmp, tmp64);
+ store_reg(s, rt2, tmp);
+ } else {
+ TCGv_i32 tmp;
+ if (ri->type & ARM_CP_CONST) {
+ tmp = tcg_constant_i32(ri->resetvalue);
+ } else if (ri->readfn) {
+ if (!tcg_ri) {
+ tcg_ri = gen_lookup_cp_reg(key);
+ }
+ tmp = tcg_temp_new_i32();
+ gen_helper_get_cp_reg(tmp, tcg_env, tcg_ri);
+ } else {
+ tmp = load_cpu_offset(ri->fieldoffset);
+ }
+ if (rt == 15) {
+ /* Destination register of r15 for 32 bit loads sets
+ * the condition codes from the high 4 bits of the value
+ */
+ gen_set_nzcv(tmp);
+ } else {
+ store_reg(s, rt, tmp);
+ }
+ }
+ } else {
+ /* Write */
+ if (ri->type & ARM_CP_CONST) {
+ /* If not forbidden by access permissions, treat as WI */
+ return;
+ }
+
+ if (is64) {
+ TCGv_i32 tmplo, tmphi;
+ TCGv_i64 tmp64 = tcg_temp_new_i64();
+ tmplo = load_reg(s, rt);
+ tmphi = load_reg(s, rt2);
+ tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
+ if (ri->writefn) {
+ if (!tcg_ri) {
+ tcg_ri = gen_lookup_cp_reg(key);
+ }
+ gen_helper_set_cp_reg64(tcg_env, tcg_ri, tmp64);
+ } else {
+ tcg_gen_st_i64(tmp64, tcg_env, ri->fieldoffset);
+ }
+ } else {
+ TCGv_i32 tmp = load_reg(s, rt);
+ if (ri->writefn) {
+ if (!tcg_ri) {
+ tcg_ri = gen_lookup_cp_reg(key);
+ }
+ gen_helper_set_cp_reg(tcg_env, tcg_ri, tmp);
+ } else {
+ store_cpu_offset(tmp, ri->fieldoffset, 4);
+ }
+ }
+ }
+
+ if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
+ /*
+ * A write to any coprocessor register that ends a TB
+ * must rebuild the hflags for the next TB.
+ */
+ gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
+ /*
+ * We default to ending the TB on a coprocessor register write,
+ * but allow this to be suppressed by the register definition
+ * (usually only necessary to work around guest bugs).
+ */
+ need_exit_tb = true;
+ }
+ if (need_exit_tb) {
+ gen_lookup_tb(s);
+ }
+}
+
+/* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
+static void disas_xscale_insn(DisasContext *s, uint32_t insn)
+{
+ int cpnum = (insn >> 8) & 0xf;
+
+ if (extract32(s->c15_cpar, cpnum, 1) == 0) {
+ unallocated_encoding(s);
+ } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
+ if (disas_iwmmxt_insn(s, insn)) {
+ unallocated_encoding(s);
+ }
+ } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
+ if (disas_dsp_insn(s, insn)) {
+ unallocated_encoding(s);
+ }
+ }
+}
+
+/* Store a 64-bit value to a register pair. Clobbers val. */
+static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
+{
+ TCGv_i32 tmp;
+ tmp = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(tmp, val);
+ store_reg(s, rlow, tmp);
+ tmp = tcg_temp_new_i32();
+ tcg_gen_extrh_i64_i32(tmp, val);
+ store_reg(s, rhigh, tmp);
+}
+
+/* load and add a 64-bit value from a register pair. */
+static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
+{
+ TCGv_i64 tmp;
+ TCGv_i32 tmpl;
+ TCGv_i32 tmph;
+
+ /* Load 64-bit value rd:rn. */
+ tmpl = load_reg(s, rlow);
+ tmph = load_reg(s, rhigh);
+ tmp = tcg_temp_new_i64();
+ tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
+ tcg_gen_add_i64(val, val, tmp);
+}
+
+/* Set N and Z flags from hi|lo. */
+static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
+{
+ tcg_gen_mov_i32(cpu_NF, hi);
+ tcg_gen_or_i32(cpu_ZF, lo, hi);
+}
+
+/* Load/Store exclusive instructions are implemented by remembering
+ the value/address loaded, and seeing if these are the same
+ when the store is performed. This should be sufficient to implement
+ the architecturally mandated semantics, and avoids having to monitor
+ regular stores. The compare vs the remembered value is done during
+ the cmpxchg operation, but we must compare the addresses manually. */
+static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
+ TCGv_i32 addr, int size)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+ MemOp opc = size | MO_ALIGN | s->be_data;
+
+ s->is_ldex = true;
+
+ if (size == 3) {
+ TCGv_i32 tmp2 = tcg_temp_new_i32();
+ TCGv_i64 t64 = tcg_temp_new_i64();
+
+ /*
+ * For AArch32, architecturally the 32-bit word at the lowest
+ * address is always Rt and the one at addr+4 is Rt2, even if
+ * the CPU is big-endian. That means we don't want to do a
+ * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
+ * architecturally 64-bit access, but instead do a 64-bit access
+ * using MO_BE if appropriate and then split the two halves.
+ */
+ TCGv taddr = gen_aa32_addr(s, addr, opc);
+
+ tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
+ tcg_gen_mov_i64(cpu_exclusive_val, t64);
+ if (s->be_data == MO_BE) {
+ tcg_gen_extr_i64_i32(tmp2, tmp, t64);
+ } else {
+ tcg_gen_extr_i64_i32(tmp, tmp2, t64);
+ }
+ store_reg(s, rt2, tmp2);
+ } else {
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
+ tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
+ }
+
+ store_reg(s, rt, tmp);
+ tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
+}
+
+static void gen_clrex(DisasContext *s)
+{
+ tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+}
+
+static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
+ TCGv_i32 addr, int size)
+{
+ TCGv_i32 t0, t1, t2;
+ TCGv_i64 extaddr;
+ TCGv taddr;
+ TCGLabel *done_label;
+ TCGLabel *fail_label;
+ MemOp opc = size | MO_ALIGN | s->be_data;
+
+ /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
+ [addr] = {Rt};
+ {Rd} = 0;
+ } else {
+ {Rd} = 1;
+ } */
+ fail_label = gen_new_label();
+ done_label = gen_new_label();
+ extaddr = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(extaddr, addr);
+ tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
+
+ taddr = gen_aa32_addr(s, addr, opc);
+ t0 = tcg_temp_new_i32();
+ t1 = load_reg(s, rt);
+ if (size == 3) {
+ TCGv_i64 o64 = tcg_temp_new_i64();
+ TCGv_i64 n64 = tcg_temp_new_i64();
+
+ t2 = load_reg(s, rt2);
+
+ /*
+ * For AArch32, architecturally the 32-bit word at the lowest
+ * address is always Rt and the one at addr+4 is Rt2, even if
+ * the CPU is big-endian. Since we're going to treat this as a
+ * single 64-bit BE store, we need to put the two halves in the
+ * opposite order for BE to LE, so that they end up in the right
+ * places. We don't want gen_aa32_st_i64, because that checks
+ * SCTLR_B as if for an architectural 64-bit access.
+ */
+ if (s->be_data == MO_BE) {
+ tcg_gen_concat_i32_i64(n64, t2, t1);
+ } else {
+ tcg_gen_concat_i32_i64(n64, t1, t2);
+ }
+
+ tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
+ get_mem_index(s), opc);
+
+ tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
+ tcg_gen_extrl_i64_i32(t0, o64);
+ } else {
+ t2 = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
+ tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
+ tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
+ }
+ tcg_gen_mov_i32(cpu_R[rd], t0);
+ tcg_gen_br(done_label);
+
+ gen_set_label(fail_label);
+ tcg_gen_movi_i32(cpu_R[rd], 1);
+ gen_set_label(done_label);
+ tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+}
+
+/* gen_srs:
+ * @env: CPUARMState
+ * @s: DisasContext
+ * @mode: mode field from insn (which stack to store to)
+ * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
+ * @writeback: true if writeback bit set
+ *
+ * Generate code for the SRS (Store Return State) insn.
+ */
+static void gen_srs(DisasContext *s,
+ uint32_t mode, uint32_t amode, bool writeback)
+{
+ int32_t offset;
+ TCGv_i32 addr, tmp;
+ bool undef = false;
+
+ /* SRS is:
+ * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
+ * and specified mode is monitor mode
+ * - UNDEFINED in Hyp mode
+ * - UNPREDICTABLE in User or System mode
+ * - UNPREDICTABLE if the specified mode is:
+ * -- not implemented
+ * -- not a valid mode number
+ * -- a mode that's at a higher exception level
+ * -- Monitor, if we are Non-secure
+ * For the UNPREDICTABLE cases we choose to UNDEF.
+ */
+ if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
+ gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
+ return;
+ }
+
+ if (s->current_el == 0 || s->current_el == 2) {
+ undef = true;
+ }
+
+ switch (mode) {
+ case ARM_CPU_MODE_USR:
+ case ARM_CPU_MODE_FIQ:
+ case ARM_CPU_MODE_IRQ:
+ case ARM_CPU_MODE_SVC:
+ case ARM_CPU_MODE_ABT:
+ case ARM_CPU_MODE_UND:
+ case ARM_CPU_MODE_SYS:
+ break;
+ case ARM_CPU_MODE_HYP:
+ if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
+ undef = true;
+ }
+ break;
+ case ARM_CPU_MODE_MON:
+ /* No need to check specifically for "are we non-secure" because
+ * we've already made EL0 UNDEF and handled the trap for S-EL1;
+ * so if this isn't EL3 then we must be non-secure.
+ */
+ if (s->current_el != 3) {
+ undef = true;
+ }
+ break;
+ default:
+ undef = true;
+ }
+
+ if (undef) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ addr = tcg_temp_new_i32();
+ /* get_r13_banked() will raise an exception if called from System mode */
+ gen_set_condexec(s);
+ gen_update_pc(s, 0);
+ gen_helper_get_r13_banked(addr, tcg_env, tcg_constant_i32(mode));
+ switch (amode) {
+ case 0: /* DA */
+ offset = -4;
+ break;
+ case 1: /* IA */
+ offset = 0;
+ break;
+ case 2: /* DB */
+ offset = -8;
+ break;
+ case 3: /* IB */
+ offset = 4;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_gen_addi_i32(addr, addr, offset);
+ tmp = load_reg(s, 14);
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+ tmp = load_cpu_field(spsr);
+ tcg_gen_addi_i32(addr, addr, 4);
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+ if (writeback) {
+ switch (amode) {
+ case 0:
+ offset = -8;
+ break;
+ case 1:
+ offset = 4;
+ break;
+ case 2:
+ offset = -4;
+ break;
+ case 3:
+ offset = 0;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_gen_addi_i32(addr, addr, offset);
+ gen_helper_set_r13_banked(tcg_env, tcg_constant_i32(mode), addr);
+ }
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
+}
+
+/* Skip this instruction if the ARM condition is false */
+static void arm_skip_unless(DisasContext *s, uint32_t cond)
+{
+ arm_gen_condlabel(s);
+ arm_gen_test_cc(cond ^ 1, s->condlabel.label);
+}
+
+
+/*
+ * Constant expanders used by T16/T32 decode
+ */
+
+/* Return only the rotation part of T32ExpandImm. */
+static int t32_expandimm_rot(DisasContext *s, int x)
+{
+ return x & 0xc00 ? extract32(x, 7, 5) : 0;
+}
+
+/* Return the unrotated immediate from T32ExpandImm. */
+static int t32_expandimm_imm(DisasContext *s, int x)
+{
+ int imm = extract32(x, 0, 8);
+
+ switch (extract32(x, 8, 4)) {
+ case 0: /* XY */
+ /* Nothing to do. */
+ break;
+ case 1: /* 00XY00XY */
+ imm *= 0x00010001;
+ break;
+ case 2: /* XY00XY00 */
+ imm *= 0x01000100;
+ break;
+ case 3: /* XYXYXYXY */
+ imm *= 0x01010101;
+ break;
+ default:
+ /* Rotated constant. */
+ imm |= 0x80;
+ break;
+ }
+ return imm;
+}
+
+static int t32_branch24(DisasContext *s, int x)
+{
+ /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. */
+ x ^= !(x < 0) * (3 << 21);
+ /* Append the final zero. */
+ return x << 1;
+}
+
+static int t16_setflags(DisasContext *s)
+{
+ return s->condexec_mask == 0;
+}
+
+static int t16_push_list(DisasContext *s, int x)
+{
+ return (x & 0xff) | (x & 0x100) << (14 - 8);
+}
+
+static int t16_pop_list(DisasContext *s, int x)
+{
+ return (x & 0xff) | (x & 0x100) << (15 - 8);
+}
+
+/*
+ * Include the generated decoders.
+ */
+
+#include "decode-a32.c.inc"
+#include "decode-a32-uncond.c.inc"
+#include "decode-t32.c.inc"
+#include "decode-t16.c.inc"
+
+static bool valid_cp(DisasContext *s, int cp)
+{
+ /*
+ * Return true if this coprocessor field indicates something
+ * that's really a possible coprocessor.
+ * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
+ * and of those only cp14 and cp15 were used for registers.
+ * cp10 and cp11 were used for VFP and Neon, whose decode is
+ * dealt with elsewhere. With the advent of fp16, cp9 is also
+ * now part of VFP.
+ * For v8A and later, the encoding has been tightened so that
+ * only cp14 and cp15 are valid, and other values aren't considered
+ * to be in the coprocessor-instruction space at all. v8M still
+ * permits coprocessors 0..7.
+ * For XScale, we must not decode the XScale cp0, cp1 space as
+ * a standard coprocessor insn, because we want to fall through to
+ * the legacy disas_xscale_insn() decoder after decodetree is done.
+ */
+ if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
+ return false;
+ }
+
+ if (arm_dc_feature(s, ARM_FEATURE_V8) &&
+ !arm_dc_feature(s, ARM_FEATURE_M)) {
+ return cp >= 14;
+ }
+ return cp < 8 || cp >= 14;
+}
+
+static bool trans_MCR(DisasContext *s, arg_MCR *a)
+{
+ if (!valid_cp(s, a->cp)) {
+ return false;
+ }
+ do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
+ false, a->rt, 0);
+ return true;
+}
+
+static bool trans_MRC(DisasContext *s, arg_MRC *a)
+{
+ if (!valid_cp(s, a->cp)) {
+ return false;
+ }
+ do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
+ true, a->rt, 0);
+ return true;
+}
+
+static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
+{
+ if (!valid_cp(s, a->cp)) {
+ return false;
+ }
+ do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
+ false, a->rt, a->rt2);
+ return true;
+}
+
+static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
+{
+ if (!valid_cp(s, a->cp)) {
+ return false;
+ }
+ do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
+ true, a->rt, a->rt2);
+ return true;
+}
+
+/* Helpers to swap operands for reverse-subtract. */
+static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_sub_i32(dst, b, a);
+}
+
+static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_sub_CC(dst, b, a);
+}
+
+static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_sub_carry(dest, b, a);
+}
+
+static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_sbc_CC(dest, b, a);
+}
+
+/*
+ * Helpers for the data processing routines.
+ *
+ * After the computation store the results back.
+ * This may be suppressed altogether (STREG_NONE), require a runtime
+ * check against the stack limits (STREG_SP_CHECK), or generate an
+ * exception return. Oh, or store into a register.
+ *
+ * Always return true, indicating success for a trans_* function.
+ */
+typedef enum {
+ STREG_NONE,
+ STREG_NORMAL,
+ STREG_SP_CHECK,
+ STREG_EXC_RET,
+} StoreRegKind;
+
+static bool store_reg_kind(DisasContext *s, int rd,
+ TCGv_i32 val, StoreRegKind kind)
+{
+ switch (kind) {
+ case STREG_NONE:
+ return true;
+ case STREG_NORMAL:
+ /* See ALUWritePC: Interworking only from a32 mode. */
+ if (s->thumb) {
+ store_reg(s, rd, val);
+ } else {
+ store_reg_bx(s, rd, val);
+ }
+ return true;
+ case STREG_SP_CHECK:
+ store_sp_checked(s, val);
+ return true;
+ case STREG_EXC_RET:
+ gen_exception_return(s, val);
+ return true;
+ }
+ g_assert_not_reached();
+}
+
+/*
+ * Data Processing (register)
+ *
+ * Operate, with set flags, one register source,
+ * one immediate shifted register source, and a destination.
+ */
+static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
+ int logic_cc, StoreRegKind kind)
+{
+ TCGv_i32 tmp1, tmp2;
+
+ tmp2 = load_reg(s, a->rm);
+ gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
+ tmp1 = load_reg(s, a->rn);
+
+ gen(tmp1, tmp1, tmp2);
+
+ if (logic_cc) {
+ gen_logic_CC(tmp1);
+ }
+ return store_reg_kind(s, a->rd, tmp1, kind);
+}
+
+static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
+ void (*gen)(TCGv_i32, TCGv_i32),
+ int logic_cc, StoreRegKind kind)
+{
+ TCGv_i32 tmp;
+
+ tmp = load_reg(s, a->rm);
+ gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
+
+ gen(tmp, tmp);
+ if (logic_cc) {
+ gen_logic_CC(tmp);
+ }
+ return store_reg_kind(s, a->rd, tmp, kind);
+}
+
+/*
+ * Data-processing (register-shifted register)
+ *
+ * Operate, with set flags, one register source,
+ * one register shifted register source, and a destination.
+ */
+static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
+ int logic_cc, StoreRegKind kind)
+{
+ TCGv_i32 tmp1, tmp2;
+
+ tmp1 = load_reg(s, a->rs);
+ tmp2 = load_reg(s, a->rm);
+ gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
+ tmp1 = load_reg(s, a->rn);
+
+ gen(tmp1, tmp1, tmp2);
+
+ if (logic_cc) {
+ gen_logic_CC(tmp1);
+ }
+ return store_reg_kind(s, a->rd, tmp1, kind);
+}
+
+static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
+ void (*gen)(TCGv_i32, TCGv_i32),
+ int logic_cc, StoreRegKind kind)
+{
+ TCGv_i32 tmp1, tmp2;
+
+ tmp1 = load_reg(s, a->rs);
+ tmp2 = load_reg(s, a->rm);
+ gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
+
+ gen(tmp2, tmp2);
+ if (logic_cc) {
+ gen_logic_CC(tmp2);
+ }
+ return store_reg_kind(s, a->rd, tmp2, kind);
+}
+
+/*
+ * Data-processing (immediate)
+ *
+ * Operate, with set flags, one register source,
+ * one rotated immediate, and a destination.
+ *
+ * Note that logic_cc && a->rot setting CF based on the msb of the
+ * immediate is the reason why we must pass in the unrotated form
+ * of the immediate.
+ */
+static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
+ int logic_cc, StoreRegKind kind)
+{
+ TCGv_i32 tmp1;
+ uint32_t imm;
+
+ imm = ror32(a->imm, a->rot);
+ if (logic_cc && a->rot) {
+ tcg_gen_movi_i32(cpu_CF, imm >> 31);
+ }
+ tmp1 = load_reg(s, a->rn);
+
+ gen(tmp1, tmp1, tcg_constant_i32(imm));
+
+ if (logic_cc) {
+ gen_logic_CC(tmp1);
+ }
+ return store_reg_kind(s, a->rd, tmp1, kind);
+}
+
+static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
+ void (*gen)(TCGv_i32, TCGv_i32),
+ int logic_cc, StoreRegKind kind)
+{
+ TCGv_i32 tmp;
+ uint32_t imm;
+
+ imm = ror32(a->imm, a->rot);
+ if (logic_cc && a->rot) {
+ tcg_gen_movi_i32(cpu_CF, imm >> 31);
+ }
+
+ tmp = tcg_temp_new_i32();
+ gen(tmp, tcg_constant_i32(imm));
+
+ if (logic_cc) {
+ gen_logic_CC(tmp);
+ }
+ return store_reg_kind(s, a->rd, tmp, kind);
+}
+
+#define DO_ANY3(NAME, OP, L, K) \
+ static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a) \
+ { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
+ static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a) \
+ { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
+ static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a) \
+ { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
+
+#define DO_ANY2(NAME, OP, L, K) \
+ static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a) \
+ { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
+ static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a) \
+ { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
+ static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a) \
+ { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
+
+#define DO_CMP2(NAME, OP, L) \
+ static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a) \
+ { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
+ static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a) \
+ { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
+ static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a) \
+ { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
+
+DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
+DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
+DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
+DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
+
+DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
+DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
+DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
+DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
+
+DO_CMP2(TST, tcg_gen_and_i32, true)
+DO_CMP2(TEQ, tcg_gen_xor_i32, true)
+DO_CMP2(CMN, gen_add_CC, false)
+DO_CMP2(CMP, gen_sub_CC, false)
+
+DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
+ a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
+
+/*
+ * Note for the computation of StoreRegKind we return out of the
+ * middle of the functions that are expanded by DO_ANY3, and that
+ * we modify a->s via that parameter before it is used by OP.
+ */
+DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
+ ({
+ StoreRegKind ret = STREG_NORMAL;
+ if (a->rd == 15 && a->s) {
+ /*
+ * See ALUExceptionReturn:
+ * In User mode, UNPREDICTABLE; we choose UNDEF.
+ * In Hyp mode, UNDEFINED.
+ */
+ if (IS_USER(s) || s->current_el == 2) {
+ unallocated_encoding(s);
+ return true;
+ }
+ /* There is no writeback of nzcv to PSTATE. */
+ a->s = 0;
+ ret = STREG_EXC_RET;
+ } else if (a->rd == 13 && a->rn == 13) {
+ ret = STREG_SP_CHECK;
+ }
+ ret;
+ }))
+
+DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
+ ({
+ StoreRegKind ret = STREG_NORMAL;
+ if (a->rd == 15 && a->s) {
+ /*
+ * See ALUExceptionReturn:
+ * In User mode, UNPREDICTABLE; we choose UNDEF.
+ * In Hyp mode, UNDEFINED.
+ */
+ if (IS_USER(s) || s->current_el == 2) {
+ unallocated_encoding(s);
+ return true;
+ }
+ /* There is no writeback of nzcv to PSTATE. */
+ a->s = 0;
+ ret = STREG_EXC_RET;
+ } else if (a->rd == 13) {
+ ret = STREG_SP_CHECK;
+ }
+ ret;
+ }))
+
+DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
+
+/*
+ * ORN is only available with T32, so there is no register-shifted-register
+ * form of the insn. Using the DO_ANY3 macro would create an unused function.
+ */
+static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
+{
+ return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
+}
+
+static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
+{
+ return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
+}
+
+#undef DO_ANY3
+#undef DO_ANY2
+#undef DO_CMP2
+
+static bool trans_ADR(DisasContext *s, arg_ri *a)
+{
+ store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
+ return true;
+}
+
+static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
+{
+ if (!ENABLE_ARCH_6T2) {
+ return false;
+ }
+
+ store_reg(s, a->rd, tcg_constant_i32(a->imm));
+ return true;
+}
+
+static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
+{
+ TCGv_i32 tmp;
+
+ if (!ENABLE_ARCH_6T2) {
+ return false;
+ }
+
+ tmp = load_reg(s, a->rd);
+ tcg_gen_ext16u_i32(tmp, tmp);
+ tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+/*
+ * v8.1M MVE wide-shifts
+ */
+static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
+ WideShiftImmFn *fn)
+{
+ TCGv_i64 rda;
+ TCGv_i32 rdalo, rdahi;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
+ return false;
+ }
+ if (a->rdahi == 15) {
+ /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
+ return false;
+ }
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
+ a->rdahi == 13) {
+ /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
+ unallocated_encoding(s);
+ return true;
+ }
+
+ if (a->shim == 0) {
+ a->shim = 32;
+ }
+
+ rda = tcg_temp_new_i64();
+ rdalo = load_reg(s, a->rdalo);
+ rdahi = load_reg(s, a->rdahi);
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
+
+ fn(rda, rda, a->shim);
+
+ tcg_gen_extrl_i64_i32(rdalo, rda);
+ tcg_gen_extrh_i64_i32(rdahi, rda);
+ store_reg(s, a->rdalo, rdalo);
+ store_reg(s, a->rdahi, rdahi);
+
+ return true;
+}
+
+static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+ return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
+}
+
+static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+ return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
+}
+
+static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+ return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
+}
+
+static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
+{
+ gen_helper_mve_sqshll(r, tcg_env, n, tcg_constant_i32(shift));
+}
+
+static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+ return do_mve_shl_ri(s, a, gen_mve_sqshll);
+}
+
+static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
+{
+ gen_helper_mve_uqshll(r, tcg_env, n, tcg_constant_i32(shift));
+}
+
+static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+ return do_mve_shl_ri(s, a, gen_mve_uqshll);
+}
+
+static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+ return do_mve_shl_ri(s, a, gen_srshr64_i64);
+}
+
+static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
+{
+ return do_mve_shl_ri(s, a, gen_urshr64_i64);
+}
+
+static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
+{
+ TCGv_i64 rda;
+ TCGv_i32 rdalo, rdahi;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
+ return false;
+ }
+ if (a->rdahi == 15) {
+ /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
+ return false;
+ }
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
+ a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
+ a->rm == a->rdahi || a->rm == a->rdalo) {
+ /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
+ unallocated_encoding(s);
+ return true;
+ }
+
+ rda = tcg_temp_new_i64();
+ rdalo = load_reg(s, a->rdalo);
+ rdahi = load_reg(s, a->rdahi);
+ tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
+
+ /* The helper takes care of the sign-extension of the low 8 bits of Rm */
+ fn(rda, tcg_env, rda, cpu_R[a->rm]);
+
+ tcg_gen_extrl_i64_i32(rdalo, rda);
+ tcg_gen_extrh_i64_i32(rdahi, rda);
+ store_reg(s, a->rdalo, rdalo);
+ store_reg(s, a->rdahi, rdahi);
+
+ return true;
+}
+
+static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+ return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
+}
+
+static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+ return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
+}
+
+static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+ return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
+}
+
+static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+ return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
+}
+
+static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+ return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
+}
+
+static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
+{
+ return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
+}
+
+static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
+ return false;
+ }
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
+ a->rda == 13 || a->rda == 15) {
+ /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
+ unallocated_encoding(s);
+ return true;
+ }
+
+ if (a->shim == 0) {
+ a->shim = 32;
+ }
+ fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
+
+ return true;
+}
+
+static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
+{
+ return do_mve_sh_ri(s, a, gen_urshr32_i32);
+}
+
+static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
+{
+ return do_mve_sh_ri(s, a, gen_srshr32_i32);
+}
+
+static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
+{
+ gen_helper_mve_sqshl(r, tcg_env, n, tcg_constant_i32(shift));
+}
+
+static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
+{
+ return do_mve_sh_ri(s, a, gen_mve_sqshl);
+}
+
+static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
+{
+ gen_helper_mve_uqshl(r, tcg_env, n, tcg_constant_i32(shift));
+}
+
+static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
+{
+ return do_mve_sh_ri(s, a, gen_mve_uqshl);
+}
+
+static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
+ return false;
+ }
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
+ a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
+ a->rm == a->rda) {
+ /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
+ unallocated_encoding(s);
+ return true;
+ }
+
+ /* The helper takes care of the sign-extension of the low 8 bits of Rm */
+ fn(cpu_R[a->rda], tcg_env, cpu_R[a->rda], cpu_R[a->rm]);
+ return true;
+}
+
+static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
+{
+ return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
+}
+
+static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
+{
+ return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
+}
+
+/*
+ * Multiply and multiply accumulate
+ */
+
+static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ tcg_gen_mul_i32(t1, t1, t2);
+ if (add) {
+ t2 = load_reg(s, a->ra);
+ tcg_gen_add_i32(t1, t1, t2);
+ }
+ if (a->s) {
+ gen_logic_CC(t1);
+ }
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+static bool trans_MUL(DisasContext *s, arg_MUL *a)
+{
+ return op_mla(s, a, false);
+}
+
+static bool trans_MLA(DisasContext *s, arg_MLA *a)
+{
+ return op_mla(s, a, true);
+}
+
+static bool trans_MLS(DisasContext *s, arg_MLS *a)
+{
+ TCGv_i32 t1, t2;
+
+ if (!ENABLE_ARCH_6T2) {
+ return false;
+ }
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ tcg_gen_mul_i32(t1, t1, t2);
+ t2 = load_reg(s, a->ra);
+ tcg_gen_sub_i32(t1, t2, t1);
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
+{
+ TCGv_i32 t0, t1, t2, t3;
+
+ t0 = load_reg(s, a->rm);
+ t1 = load_reg(s, a->rn);
+ if (uns) {
+ tcg_gen_mulu2_i32(t0, t1, t0, t1);
+ } else {
+ tcg_gen_muls2_i32(t0, t1, t0, t1);
+ }
+ if (add) {
+ t2 = load_reg(s, a->ra);
+ t3 = load_reg(s, a->rd);
+ tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
+ }
+ if (a->s) {
+ gen_logicq_cc(t0, t1);
+ }
+ store_reg(s, a->ra, t0);
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
+{
+ return op_mlal(s, a, true, false);
+}
+
+static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
+{
+ return op_mlal(s, a, false, false);
+}
+
+static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
+{
+ return op_mlal(s, a, true, true);
+}
+
+static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
+{
+ return op_mlal(s, a, false, true);
+}
+
+static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
+{
+ TCGv_i32 t0, t1, t2, zero;
+
+ if (s->thumb
+ ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
+ : !ENABLE_ARCH_6) {
+ return false;
+ }
+
+ t0 = load_reg(s, a->rm);
+ t1 = load_reg(s, a->rn);
+ tcg_gen_mulu2_i32(t0, t1, t0, t1);
+ zero = tcg_constant_i32(0);
+ t2 = load_reg(s, a->ra);
+ tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
+ t2 = load_reg(s, a->rd);
+ tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
+ store_reg(s, a->ra, t0);
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+/*
+ * Saturating addition and subtraction
+ */
+
+static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
+{
+ TCGv_i32 t0, t1;
+
+ if (s->thumb
+ ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
+ : !ENABLE_ARCH_5TE) {
+ return false;
+ }
+
+ t0 = load_reg(s, a->rm);
+ t1 = load_reg(s, a->rn);
+ if (doub) {
+ gen_helper_add_saturate(t1, tcg_env, t1, t1);
+ }
+ if (add) {
+ gen_helper_add_saturate(t0, tcg_env, t0, t1);
+ } else {
+ gen_helper_sub_saturate(t0, tcg_env, t0, t1);
+ }
+ store_reg(s, a->rd, t0);
+ return true;
+}
+
+#define DO_QADDSUB(NAME, ADD, DOUB) \
+static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
+{ \
+ return op_qaddsub(s, a, ADD, DOUB); \
+}
+
+DO_QADDSUB(QADD, true, false)
+DO_QADDSUB(QSUB, false, false)
+DO_QADDSUB(QDADD, true, true)
+DO_QADDSUB(QDSUB, false, true)
+
+#undef DO_QADDSUB
+
+/*
+ * Halfword multiply and multiply accumulate
+ */
+
+static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
+ int add_long, bool nt, bool mt)
+{
+ TCGv_i32 t0, t1, tl, th;
+
+ if (s->thumb
+ ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
+ : !ENABLE_ARCH_5TE) {
+ return false;
+ }
+
+ t0 = load_reg(s, a->rn);
+ t1 = load_reg(s, a->rm);
+ gen_mulxy(t0, t1, nt, mt);
+
+ switch (add_long) {
+ case 0:
+ store_reg(s, a->rd, t0);
+ break;
+ case 1:
+ t1 = load_reg(s, a->ra);
+ gen_helper_add_setq(t0, tcg_env, t0, t1);
+ store_reg(s, a->rd, t0);
+ break;
+ case 2:
+ tl = load_reg(s, a->ra);
+ th = load_reg(s, a->rd);
+ /* Sign-extend the 32-bit product to 64 bits. */
+ t1 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t1, t0, 31);
+ tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
+ store_reg(s, a->ra, tl);
+ store_reg(s, a->rd, th);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return true;
+}
+
+#define DO_SMLAX(NAME, add, nt, mt) \
+static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
+{ \
+ return op_smlaxxx(s, a, add, nt, mt); \
+}
+
+DO_SMLAX(SMULBB, 0, 0, 0)
+DO_SMLAX(SMULBT, 0, 0, 1)
+DO_SMLAX(SMULTB, 0, 1, 0)
+DO_SMLAX(SMULTT, 0, 1, 1)
+
+DO_SMLAX(SMLABB, 1, 0, 0)
+DO_SMLAX(SMLABT, 1, 0, 1)
+DO_SMLAX(SMLATB, 1, 1, 0)
+DO_SMLAX(SMLATT, 1, 1, 1)
+
+DO_SMLAX(SMLALBB, 2, 0, 0)
+DO_SMLAX(SMLALBT, 2, 0, 1)
+DO_SMLAX(SMLALTB, 2, 1, 0)
+DO_SMLAX(SMLALTT, 2, 1, 1)
+
+#undef DO_SMLAX
+
+static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
+{
+ TCGv_i32 t0, t1;
+
+ if (!ENABLE_ARCH_5TE) {
+ return false;
+ }
+
+ t0 = load_reg(s, a->rn);
+ t1 = load_reg(s, a->rm);
+ /*
+ * Since the nominal result is product<47:16>, shift the 16-bit
+ * input up by 16 bits, so that the result is at product<63:32>.
+ */
+ if (mt) {
+ tcg_gen_andi_i32(t1, t1, 0xffff0000);
+ } else {
+ tcg_gen_shli_i32(t1, t1, 16);
+ }
+ tcg_gen_muls2_i32(t0, t1, t0, t1);
+ if (add) {
+ t0 = load_reg(s, a->ra);
+ gen_helper_add_setq(t1, tcg_env, t1, t0);
+ }
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+#define DO_SMLAWX(NAME, add, mt) \
+static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
+{ \
+ return op_smlawx(s, a, add, mt); \
+}
+
+DO_SMLAWX(SMULWB, 0, 0)
+DO_SMLAWX(SMULWT, 0, 1)
+DO_SMLAWX(SMLAWB, 1, 0)
+DO_SMLAWX(SMLAWT, 1, 1)
+
+#undef DO_SMLAWX
+
+/*
+ * MSR (immediate) and hints
+ */
+
+static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
+{
+ /*
+ * When running single-threaded TCG code, use the helper to ensure that
+ * the next round-robin scheduled vCPU gets a crack. When running in
+ * MTTCG we don't generate jumps to the helper as it won't affect the
+ * scheduling of other vCPUs.
+ */
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+ gen_update_pc(s, curr_insn_len(s));
+ s->base.is_jmp = DISAS_YIELD;
+ }
+ return true;
+}
+
+static bool trans_WFE(DisasContext *s, arg_WFE *a)
+{
+ /*
+ * When running single-threaded TCG code, use the helper to ensure that
+ * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
+ * just skip this instruction. Currently the SEV/SEVL instructions,
+ * which are *one* of many ways to wake the CPU from WFE, are not
+ * implemented so we can't sleep like WFI does.
+ */
+ if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
+ gen_update_pc(s, curr_insn_len(s));
+ s->base.is_jmp = DISAS_WFE;
+ }
+ return true;
+}
+
+static bool trans_WFI(DisasContext *s, arg_WFI *a)
+{
+ /* For WFI, halt the vCPU until an IRQ. */
+ gen_update_pc(s, curr_insn_len(s));
+ s->base.is_jmp = DISAS_WFI;
+ return true;
+}
+
+static bool trans_ESB(DisasContext *s, arg_ESB *a)
+{
+ /*
+ * For M-profile, minimal-RAS ESB can be a NOP.
+ * Without RAS, we must implement this as NOP.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
+ /*
+ * QEMU does not have a source of physical SErrors,
+ * so we are only concerned with virtual SErrors.
+ * The pseudocode in the ARM for this case is
+ * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
+ * AArch32.vESBOperation();
+ * Most of the condition can be evaluated at translation time.
+ * Test for EL2 present, and defer test for SEL2 to runtime.
+ */
+ if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
+ gen_helper_vesb(tcg_env);
+ }
+ }
+ return true;
+}
+
+static bool trans_NOP(DisasContext *s, arg_NOP *a)
+{
+ return true;
+}
+
+static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
+{
+ uint32_t val = ror32(a->imm, a->rot * 2);
+ uint32_t mask = msr_mask(s, a->mask, a->r);
+
+ if (gen_set_psr_im(s, mask, a->r, val)) {
+ unallocated_encoding(s);
+ }
+ return true;
+}
+
+/*
+ * Cyclic Redundancy Check
+ */
+
+static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
+{
+ TCGv_i32 t1, t2, t3;
+
+ if (!dc_isar_feature(aa32_crc32, s)) {
+ return false;
+ }
+
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ switch (sz) {
+ case MO_8:
+ gen_uxtb(t2);
+ break;
+ case MO_16:
+ gen_uxth(t2);
+ break;
+ case MO_32:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ t3 = tcg_constant_i32(1 << sz);
+ if (c) {
+ gen_helper_crc32c(t1, t1, t2, t3);
+ } else {
+ gen_helper_crc32(t1, t1, t2, t3);
+ }
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+#define DO_CRC32(NAME, c, sz) \
+static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
+ { return op_crc32(s, a, c, sz); }
+
+DO_CRC32(CRC32B, false, MO_8)
+DO_CRC32(CRC32H, false, MO_16)
+DO_CRC32(CRC32W, false, MO_32)
+DO_CRC32(CRC32CB, true, MO_8)
+DO_CRC32(CRC32CH, true, MO_16)
+DO_CRC32(CRC32CW, true, MO_32)
+
+#undef DO_CRC32
+
+/*
+ * Miscellaneous instructions
+ */
+
+static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
+{
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ gen_mrs_banked(s, a->r, a->sysm, a->rd);
+ return true;
+}
+
+static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
+{
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ gen_msr_banked(s, a->r, a->sysm, a->rn);
+ return true;
+}
+
+static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
+{
+ TCGv_i32 tmp;
+
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ if (a->r) {
+ if (IS_USER(s)) {
+ unallocated_encoding(s);
+ return true;
+ }
+ tmp = load_cpu_field(spsr);
+ } else {
+ tmp = tcg_temp_new_i32();
+ gen_helper_cpsr_read(tmp, tcg_env);
+ }
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
+{
+ TCGv_i32 tmp;
+ uint32_t mask = msr_mask(s, a->mask, a->r);
+
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ tmp = load_reg(s, a->rn);
+ if (gen_set_psr(s, mask, a->r, tmp)) {
+ unallocated_encoding(s);
+ }
+ return true;
+}
+
+static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
+{
+ TCGv_i32 tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ tmp = tcg_temp_new_i32();
+ gen_helper_v7m_mrs(tmp, tcg_env, tcg_constant_i32(a->sysm));
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
+{
+ TCGv_i32 addr, reg;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ addr = tcg_constant_i32((a->mask << 10) | a->sysm);
+ reg = load_reg(s, a->rn);
+ gen_helper_v7m_msr(tcg_env, addr, reg);
+ /* If we wrote to CONTROL, the EL might have changed */
+ gen_rebuild_hflags(s, true);
+ gen_lookup_tb(s);
+ return true;
+}
+
+static bool trans_BX(DisasContext *s, arg_BX *a)
+{
+ if (!ENABLE_ARCH_4T) {
+ return false;
+ }
+ gen_bx_excret(s, load_reg(s, a->rm));
+ return true;
+}
+
+static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
+{
+ if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ /*
+ * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
+ * TBFLAGS bit on a basically-never-happens case, so call a helper
+ * function to check for the trap and raise the exception if needed
+ * (passing it the register number for the syndrome value).
+ * v8A doesn't have this HSTR bit.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
+ arm_dc_feature(s, ARM_FEATURE_EL2) &&
+ s->current_el < 2 && s->ns) {
+ gen_helper_check_bxj_trap(tcg_env, tcg_constant_i32(a->rm));
+ }
+ /* Trivial implementation equivalent to bx. */
+ gen_bx(s, load_reg(s, a->rm));
+ return true;
+}
+
+static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
+{
+ TCGv_i32 tmp;
+
+ if (!ENABLE_ARCH_5) {
+ return false;
+ }
+ tmp = load_reg(s, a->rm);
+ gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
+ gen_bx(s, tmp);
+ return true;
+}
+
+/*
+ * BXNS/BLXNS: only exist for v8M with the security extensions,
+ * and always UNDEF if NonSecure. We don't implement these in
+ * the user-only mode either (in theory you can use them from
+ * Secure User mode but they are too tied in to system emulation).
+ */
+static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
+{
+ if (!s->v8m_secure || IS_USER_ONLY) {
+ unallocated_encoding(s);
+ } else {
+ gen_bxns(s, a->rm);
+ }
+ return true;
+}
+
+static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
+{
+ if (!s->v8m_secure || IS_USER_ONLY) {
+ unallocated_encoding(s);
+ } else {
+ gen_blxns(s, a->rm);
+ }
+ return true;
+}
+
+static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
+{
+ TCGv_i32 tmp;
+
+ if (!ENABLE_ARCH_5) {
+ return false;
+ }
+ tmp = load_reg(s, a->rm);
+ tcg_gen_clzi_i32(tmp, tmp, 32);
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+static bool trans_ERET(DisasContext *s, arg_ERET *a)
+{
+ TCGv_i32 tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
+ return false;
+ }
+ if (IS_USER(s)) {
+ unallocated_encoding(s);
+ return true;
+ }
+ if (s->current_el == 2) {
+ /* ERET from Hyp uses ELR_Hyp, not LR */
+ tmp = load_cpu_field_low32(elr_el[2]);
+ } else {
+ tmp = load_reg(s, 14);
+ }
+ gen_exception_return(s, tmp);
+ return true;
+}
+
+static bool trans_HLT(DisasContext *s, arg_HLT *a)
+{
+ gen_hlt(s, a->imm);
+ return true;
+}
+
+static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
+{
+ if (!ENABLE_ARCH_5) {
+ return false;
+ }
+ /* BKPT is OK with ECI set and leaves it untouched */
+ s->eci_handled = true;
+ if (arm_dc_feature(s, ARM_FEATURE_M) &&
+ semihosting_enabled(s->current_el == 0) &&
+ (a->imm == 0xab)) {
+ gen_exception_internal_insn(s, EXCP_SEMIHOST);
+ } else {
+ gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
+ }
+ return true;
+}
+
+static bool trans_HVC(DisasContext *s, arg_HVC *a)
+{
+ if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ if (IS_USER(s)) {
+ unallocated_encoding(s);
+ } else {
+ gen_hvc(s, a->imm);
+ }
+ return true;
+}
+
+static bool trans_SMC(DisasContext *s, arg_SMC *a)
+{
+ if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ if (IS_USER(s)) {
+ unallocated_encoding(s);
+ } else {
+ gen_smc(s);
+ }
+ return true;
+}
+
+static bool trans_SG(DisasContext *s, arg_SG *a)
+{
+ if (!arm_dc_feature(s, ARM_FEATURE_M) ||
+ !arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+ /*
+ * SG (v8M only)
+ * The bulk of the behaviour for this instruction is implemented
+ * in v7m_handle_execute_nsc(), which deals with the insn when
+ * it is executed by a CPU in non-secure state from memory
+ * which is Secure & NonSecure-Callable.
+ * Here we only need to handle the remaining cases:
+ * * in NS memory (including the "security extension not
+ * implemented" case) : NOP
+ * * in S memory but CPU already secure (clear IT bits)
+ * We know that the attribute for the memory this insn is
+ * in must match the current CPU state, because otherwise
+ * get_phys_addr_pmsav8 would have generated an exception.
+ */
+ if (s->v8m_secure) {
+ /* Like the IT insn, we don't need to generate any code */
+ s->condexec_cond = 0;
+ s->condexec_mask = 0;
+ }
+ return true;
+}
+
+static bool trans_TT(DisasContext *s, arg_TT *a)
+{
+ TCGv_i32 addr, tmp;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_M) ||
+ !arm_dc_feature(s, ARM_FEATURE_V8)) {
+ return false;
+ }
+ if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
+ /* We UNDEF for these UNPREDICTABLE cases */
+ unallocated_encoding(s);
+ return true;
+ }
+ if (a->A && !s->v8m_secure) {
+ /* This case is UNDEFINED. */
+ unallocated_encoding(s);
+ return true;
+ }
+
+ addr = load_reg(s, a->rn);
+ tmp = tcg_temp_new_i32();
+ gen_helper_v7m_tt(tmp, tcg_env, addr, tcg_constant_i32((a->A << 1) | a->T));
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+/*
+ * Load/store register index
+ */
+
+static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
+{
+ ISSInfo ret;
+
+ /* ISS not valid if writeback */
+ if (p && !w) {
+ ret = rd;
+ if (curr_insn_len(s) == 2) {
+ ret |= ISSIs16Bit;
+ }
+ } else {
+ ret = ISSInvalid;
+ }
+ return ret;
+}
+
+static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
+{
+ TCGv_i32 addr = load_reg(s, a->rn);
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ gen_helper_v8m_stackcheck(tcg_env, addr);
+ }
+
+ if (a->p) {
+ TCGv_i32 ofs = load_reg(s, a->rm);
+ gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
+ if (a->u) {
+ tcg_gen_add_i32(addr, addr, ofs);
+ } else {
+ tcg_gen_sub_i32(addr, addr, ofs);
+ }
+ }
+ return addr;
+}
+
+static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
+ TCGv_i32 addr, int address_offset)
+{
+ if (!a->p) {
+ TCGv_i32 ofs = load_reg(s, a->rm);
+ gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
+ if (a->u) {
+ tcg_gen_add_i32(addr, addr, ofs);
+ } else {
+ tcg_gen_sub_i32(addr, addr, ofs);
+ }
+ } else if (!a->w) {
+ return;
+ }
+ tcg_gen_addi_i32(addr, addr, address_offset);
+ store_reg(s, a->rn, addr);
+}
+
+static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
+ MemOp mop, int mem_idx)
+{
+ ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
+ TCGv_i32 addr, tmp;
+
+ addr = op_addr_rr_pre(s, a);
+
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
+ disas_set_da_iss(s, mop, issinfo);
+
+ /*
+ * Perform base writeback before the loaded value to
+ * ensure correct behavior with overlapping index registers.
+ */
+ op_addr_rr_post(s, a, addr, 0);
+ store_reg_from_load(s, a->rt, tmp);
+ return true;
+}
+
+static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
+ MemOp mop, int mem_idx)
+{
+ ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
+ TCGv_i32 addr, tmp;
+
+ /*
+ * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
+ * is either UNPREDICTABLE or has defined behaviour
+ */
+ if (s->thumb && a->rn == 15) {
+ return false;
+ }
+
+ addr = op_addr_rr_pre(s, a);
+
+ tmp = load_reg(s, a->rt);
+ gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
+ disas_set_da_iss(s, mop, issinfo);
+
+ op_addr_rr_post(s, a, addr, 0);
+ return true;
+}
+
+static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
+{
+ int mem_idx = get_mem_index(s);
+ TCGv_i32 addr, tmp;
+
+ if (!ENABLE_ARCH_5TE) {
+ return false;
+ }
+ if (a->rt & 1) {
+ unallocated_encoding(s);
+ return true;
+ }
+ addr = op_addr_rr_pre(s, a);
+
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+ store_reg(s, a->rt, tmp);
+
+ tcg_gen_addi_i32(addr, addr, 4);
+
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+ store_reg(s, a->rt + 1, tmp);
+
+ /* LDRD w/ base writeback is undefined if the registers overlap. */
+ op_addr_rr_post(s, a, addr, -4);
+ return true;
+}
+
+static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
+{
+ int mem_idx = get_mem_index(s);
+ TCGv_i32 addr, tmp;
+
+ if (!ENABLE_ARCH_5TE) {
+ return false;
+ }
+ if (a->rt & 1) {
+ unallocated_encoding(s);
+ return true;
+ }
+ addr = op_addr_rr_pre(s, a);
+
+ tmp = load_reg(s, a->rt);
+ gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+
+ tcg_gen_addi_i32(addr, addr, 4);
+
+ tmp = load_reg(s, a->rt + 1);
+ gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+
+ op_addr_rr_post(s, a, addr, -4);
+ return true;
+}
+
+/*
+ * Load/store immediate index
+ */
+
+static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
+{
+ int ofs = a->imm;
+
+ if (!a->u) {
+ ofs = -ofs;
+ }
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ /*
+ * Stackcheck. Here we know 'addr' is the current SP;
+ * U is set if we're moving SP up, else down. It is
+ * UNKNOWN whether the limit check triggers when SP starts
+ * below the limit and ends up above it; we chose to do so.
+ */
+ if (!a->u) {
+ TCGv_i32 newsp = tcg_temp_new_i32();
+ tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
+ gen_helper_v8m_stackcheck(tcg_env, newsp);
+ } else {
+ gen_helper_v8m_stackcheck(tcg_env, cpu_R[13]);
+ }
+ }
+
+ return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
+}
+
+static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
+ TCGv_i32 addr, int address_offset)
+{
+ if (!a->p) {
+ if (a->u) {
+ address_offset += a->imm;
+ } else {
+ address_offset -= a->imm;
+ }
+ } else if (!a->w) {
+ return;
+ }
+ tcg_gen_addi_i32(addr, addr, address_offset);
+ store_reg(s, a->rn, addr);
+}
+
+static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
+ MemOp mop, int mem_idx)
+{
+ ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
+ TCGv_i32 addr, tmp;
+
+ addr = op_addr_ri_pre(s, a);
+
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
+ disas_set_da_iss(s, mop, issinfo);
+
+ /*
+ * Perform base writeback before the loaded value to
+ * ensure correct behavior with overlapping index registers.
+ */
+ op_addr_ri_post(s, a, addr, 0);
+ store_reg_from_load(s, a->rt, tmp);
+ return true;
+}
+
+static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
+ MemOp mop, int mem_idx)
+{
+ ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
+ TCGv_i32 addr, tmp;
+
+ /*
+ * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
+ * is either UNPREDICTABLE or has defined behaviour
+ */
+ if (s->thumb && a->rn == 15) {
+ return false;
+ }
+
+ addr = op_addr_ri_pre(s, a);
+
+ tmp = load_reg(s, a->rt);
+ gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
+ disas_set_da_iss(s, mop, issinfo);
+
+ op_addr_ri_post(s, a, addr, 0);
+ return true;
+}
+
+static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
+{
+ int mem_idx = get_mem_index(s);
+ TCGv_i32 addr, tmp;
+
+ addr = op_addr_ri_pre(s, a);
+
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+ store_reg(s, a->rt, tmp);
+
+ tcg_gen_addi_i32(addr, addr, 4);
+
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+ store_reg(s, rt2, tmp);
+
+ /* LDRD w/ base writeback is undefined if the registers overlap. */
+ op_addr_ri_post(s, a, addr, -4);
+ return true;
+}
+
+static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
+{
+ if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
+ return false;
+ }
+ return op_ldrd_ri(s, a, a->rt + 1);
+}
+
+static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
+{
+ arg_ldst_ri b = {
+ .u = a->u, .w = a->w, .p = a->p,
+ .rn = a->rn, .rt = a->rt, .imm = a->imm
+ };
+ return op_ldrd_ri(s, &b, a->rt2);
+}
+
+static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
+{
+ int mem_idx = get_mem_index(s);
+ TCGv_i32 addr, tmp;
+
+ addr = op_addr_ri_pre(s, a);
+
+ tmp = load_reg(s, a->rt);
+ gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+
+ tcg_gen_addi_i32(addr, addr, 4);
+
+ tmp = load_reg(s, rt2);
+ gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+
+ op_addr_ri_post(s, a, addr, -4);
+ return true;
+}
+
+static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
+{
+ if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
+ return false;
+ }
+ return op_strd_ri(s, a, a->rt + 1);
+}
+
+static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
+{
+ arg_ldst_ri b = {
+ .u = a->u, .w = a->w, .p = a->p,
+ .rn = a->rn, .rt = a->rt, .imm = a->imm
+ };
+ return op_strd_ri(s, &b, a->rt2);
+}
+
+#define DO_LDST(NAME, WHICH, MEMOP) \
+static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a) \
+{ \
+ return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
+} \
+static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a) \
+{ \
+ return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
+} \
+static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a) \
+{ \
+ return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
+} \
+static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a) \
+{ \
+ return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
+}
+
+DO_LDST(LDR, load, MO_UL)
+DO_LDST(LDRB, load, MO_UB)
+DO_LDST(LDRH, load, MO_UW)
+DO_LDST(LDRSB, load, MO_SB)
+DO_LDST(LDRSH, load, MO_SW)
+
+DO_LDST(STR, store, MO_UL)
+DO_LDST(STRB, store, MO_UB)
+DO_LDST(STRH, store, MO_UW)
+
+#undef DO_LDST
+
+/*
+ * Synchronization primitives
+ */
+
+static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
+{
+ TCGv_i32 addr, tmp;
+ TCGv taddr;
+
+ opc |= s->be_data;
+ addr = load_reg(s, a->rn);
+ taddr = gen_aa32_addr(s, addr, opc);
+
+ tmp = load_reg(s, a->rt2);
+ tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
+
+ store_reg(s, a->rt, tmp);
+ return true;
+}
+
+static bool trans_SWP(DisasContext *s, arg_SWP *a)
+{
+ return op_swp(s, a, MO_UL | MO_ALIGN);
+}
+
+static bool trans_SWPB(DisasContext *s, arg_SWP *a)
+{
+ return op_swp(s, a, MO_UB);
+}
+
+/*
+ * Load/Store Exclusive and Load-Acquire/Store-Release
+ */
+
+static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
+{
+ TCGv_i32 addr;
+ /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
+ bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
+
+ /* We UNDEF for these UNPREDICTABLE cases. */
+ if (a->rd == 15 || a->rn == 15 || a->rt == 15
+ || a->rd == a->rn || a->rd == a->rt
+ || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
+ || (mop == MO_64
+ && (a->rt2 == 15
+ || a->rd == a->rt2
+ || (!v8a && s->thumb && a->rt2 == 13)))) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ if (rel) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
+
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, a->rn);
+ tcg_gen_addi_i32(addr, addr, a->imm);
+
+ gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
+ return true;
+}
+
+static bool trans_STREX(DisasContext *s, arg_STREX *a)
+{
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+ return op_strex(s, a, MO_32, false);
+}
+
+static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
+{
+ if (!ENABLE_ARCH_6K) {
+ return false;
+ }
+ /* We UNDEF for these UNPREDICTABLE cases. */
+ if (a->rt & 1) {
+ unallocated_encoding(s);
+ return true;
+ }
+ a->rt2 = a->rt + 1;
+ return op_strex(s, a, MO_64, false);
+}
+
+static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
+{
+ return op_strex(s, a, MO_64, false);
+}
+
+static bool trans_STREXB(DisasContext *s, arg_STREX *a)
+{
+ if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
+ return false;
+ }
+ return op_strex(s, a, MO_8, false);
+}
+
+static bool trans_STREXH(DisasContext *s, arg_STREX *a)
+{
+ if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
+ return false;
+ }
+ return op_strex(s, a, MO_16, false);
+}
+
+static bool trans_STLEX(DisasContext *s, arg_STREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ return op_strex(s, a, MO_32, true);
+}
+
+static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ /* We UNDEF for these UNPREDICTABLE cases. */
+ if (a->rt & 1) {
+ unallocated_encoding(s);
+ return true;
+ }
+ a->rt2 = a->rt + 1;
+ return op_strex(s, a, MO_64, true);
+}
+
+static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ return op_strex(s, a, MO_64, true);
+}
+
+static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ return op_strex(s, a, MO_8, true);
+}
+
+static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ return op_strex(s, a, MO_16, true);
+}
+
+static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
+{
+ TCGv_i32 addr, tmp;
+
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ /* We UNDEF for these UNPREDICTABLE cases. */
+ if (a->rn == 15 || a->rt == 15) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ addr = load_reg(s, a->rn);
+ tmp = load_reg(s, a->rt);
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
+ disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
+
+ return true;
+}
+
+static bool trans_STL(DisasContext *s, arg_STL *a)
+{
+ return op_stl(s, a, MO_UL);
+}
+
+static bool trans_STLB(DisasContext *s, arg_STL *a)
+{
+ return op_stl(s, a, MO_UB);
+}
+
+static bool trans_STLH(DisasContext *s, arg_STL *a)
+{
+ return op_stl(s, a, MO_UW);
+}
+
+static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
+{
+ TCGv_i32 addr;
+ /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
+ bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
+
+ /* We UNDEF for these UNPREDICTABLE cases. */
+ if (a->rn == 15 || a->rt == 15
+ || (!v8a && s->thumb && a->rt == 13)
+ || (mop == MO_64
+ && (a->rt2 == 15 || a->rt == a->rt2
+ || (!v8a && s->thumb && a->rt2 == 13)))) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ addr = tcg_temp_new_i32();
+ load_reg_var(s, addr, a->rn);
+ tcg_gen_addi_i32(addr, addr, a->imm);
+
+ gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
+
+ if (acq) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
+ return true;
+}
+
+static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
+{
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+ return op_ldrex(s, a, MO_32, false);
+}
+
+static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
+{
+ if (!ENABLE_ARCH_6K) {
+ return false;
+ }
+ /* We UNDEF for these UNPREDICTABLE cases. */
+ if (a->rt & 1) {
+ unallocated_encoding(s);
+ return true;
+ }
+ a->rt2 = a->rt + 1;
+ return op_ldrex(s, a, MO_64, false);
+}
+
+static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
+{
+ return op_ldrex(s, a, MO_64, false);
+}
+
+static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
+{
+ if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
+ return false;
+ }
+ return op_ldrex(s, a, MO_8, false);
+}
+
+static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
+{
+ if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
+ return false;
+ }
+ return op_ldrex(s, a, MO_16, false);
+}
+
+static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ return op_ldrex(s, a, MO_32, true);
+}
+
+static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ /* We UNDEF for these UNPREDICTABLE cases. */
+ if (a->rt & 1) {
+ unallocated_encoding(s);
+ return true;
+ }
+ a->rt2 = a->rt + 1;
+ return op_ldrex(s, a, MO_64, true);
+}
+
+static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ return op_ldrex(s, a, MO_64, true);
+}
+
+static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ return op_ldrex(s, a, MO_8, true);
+}
+
+static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
+{
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ return op_ldrex(s, a, MO_16, true);
+}
+
+static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
+{
+ TCGv_i32 addr, tmp;
+
+ if (!ENABLE_ARCH_8) {
+ return false;
+ }
+ /* We UNDEF for these UNPREDICTABLE cases. */
+ if (a->rn == 15 || a->rt == 15) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ addr = load_reg(s, a->rn);
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
+ disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
+
+ store_reg(s, a->rt, tmp);
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ return true;
+}
+
+static bool trans_LDA(DisasContext *s, arg_LDA *a)
+{
+ return op_lda(s, a, MO_UL);
+}
+
+static bool trans_LDAB(DisasContext *s, arg_LDA *a)
+{
+ return op_lda(s, a, MO_UB);
+}
+
+static bool trans_LDAH(DisasContext *s, arg_LDA *a)
+{
+ return op_lda(s, a, MO_UW);
+}
+
+/*
+ * Media instructions
+ */
+
+static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
+{
+ TCGv_i32 t1, t2;
+
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ gen_helper_usad8(t1, t1, t2);
+ if (a->ra != 15) {
+ t2 = load_reg(s, a->ra);
+ tcg_gen_add_i32(t1, t1, t2);
+ }
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
+{
+ TCGv_i32 tmp;
+ int width = a->widthm1 + 1;
+ int shift = a->lsb;
+
+ if (!ENABLE_ARCH_6T2) {
+ return false;
+ }
+ if (shift + width > 32) {
+ /* UNPREDICTABLE; we choose to UNDEF */
+ unallocated_encoding(s);
+ return true;
+ }
+
+ tmp = load_reg(s, a->rn);
+ if (u) {
+ tcg_gen_extract_i32(tmp, tmp, shift, width);
+ } else {
+ tcg_gen_sextract_i32(tmp, tmp, shift, width);
+ }
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
+{
+ return op_bfx(s, a, false);
+}
+
+static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
+{
+ return op_bfx(s, a, true);
+}
+
+static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
+{
+ int msb = a->msb, lsb = a->lsb;
+ TCGv_i32 t_in, t_rd;
+ int width;
+
+ if (!ENABLE_ARCH_6T2) {
+ return false;
+ }
+ if (msb < lsb) {
+ /* UNPREDICTABLE; we choose to UNDEF */
+ unallocated_encoding(s);
+ return true;
+ }
+
+ width = msb + 1 - lsb;
+ if (a->rn == 15) {
+ /* BFC */
+ t_in = tcg_constant_i32(0);
+ } else {
+ /* BFI */
+ t_in = load_reg(s, a->rn);
+ }
+ t_rd = load_reg(s, a->rd);
+ tcg_gen_deposit_i32(t_rd, t_rd, t_in, lsb, width);
+ store_reg(s, a->rd, t_rd);
+ return true;
+}
+
+static bool trans_UDF(DisasContext *s, arg_UDF *a)
+{
+ unallocated_encoding(s);
+ return true;
+}
+
+/*
+ * Parallel addition and subtraction
+ */
+
+static bool op_par_addsub(DisasContext *s, arg_rrr *a,
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 t0, t1;
+
+ if (s->thumb
+ ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
+ : !ENABLE_ARCH_6) {
+ return false;
+ }
+
+ t0 = load_reg(s, a->rn);
+ t1 = load_reg(s, a->rm);
+
+ gen(t0, t0, t1);
+
+ store_reg(s, a->rd, t0);
+ return true;
+}
+
+static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
+ void (*gen)(TCGv_i32, TCGv_i32,
+ TCGv_i32, TCGv_ptr))
+{
+ TCGv_i32 t0, t1;
+ TCGv_ptr ge;
+
+ if (s->thumb
+ ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
+ : !ENABLE_ARCH_6) {
+ return false;
+ }
+
+ t0 = load_reg(s, a->rn);
+ t1 = load_reg(s, a->rm);
+
+ ge = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(ge, tcg_env, offsetof(CPUARMState, GE));
+ gen(t0, t0, t1, ge);
+
+ store_reg(s, a->rd, t0);
+ return true;
+}
+
+#define DO_PAR_ADDSUB(NAME, helper) \
+static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
+{ \
+ return op_par_addsub(s, a, helper); \
+}
+
+#define DO_PAR_ADDSUB_GE(NAME, helper) \
+static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
+{ \
+ return op_par_addsub_ge(s, a, helper); \
+}
+
+DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
+DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
+DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
+DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
+DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
+DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
+
+DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
+DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
+DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
+DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
+DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
+DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
+
+DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
+DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
+DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
+DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
+DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
+DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
+
+DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
+DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
+DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
+DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
+DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
+DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
+
+DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
+DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
+DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
+DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
+DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
+DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
+
+DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
+DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
+DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
+DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
+DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
+DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
+
+#undef DO_PAR_ADDSUB
+#undef DO_PAR_ADDSUB_GE
+
+/*
+ * Packing, unpacking, saturation, and reversal
+ */
+
+static bool trans_PKH(DisasContext *s, arg_PKH *a)
+{
+ TCGv_i32 tn, tm;
+ int shift = a->imm;
+
+ if (s->thumb
+ ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
+ : !ENABLE_ARCH_6) {
+ return false;
+ }
+
+ tn = load_reg(s, a->rn);
+ tm = load_reg(s, a->rm);
+ if (a->tb) {
+ /* PKHTB */
+ if (shift == 0) {
+ shift = 31;
+ }
+ tcg_gen_sari_i32(tm, tm, shift);
+ tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
+ } else {
+ /* PKHBT */
+ tcg_gen_shli_i32(tm, tm, shift);
+ tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
+ }
+ store_reg(s, a->rd, tn);
+ return true;
+}
+
+static bool op_sat(DisasContext *s, arg_sat *a,
+ void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 tmp;
+ int shift = a->imm;
+
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+
+ tmp = load_reg(s, a->rn);
+ if (a->sh) {
+ tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
+ } else {
+ tcg_gen_shli_i32(tmp, tmp, shift);
+ }
+
+ gen(tmp, tcg_env, tmp, tcg_constant_i32(a->satimm));
+
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+static bool trans_SSAT(DisasContext *s, arg_sat *a)
+{
+ return op_sat(s, a, gen_helper_ssat);
+}
+
+static bool trans_USAT(DisasContext *s, arg_sat *a)
+{
+ return op_sat(s, a, gen_helper_usat);
+}
+
+static bool trans_SSAT16(DisasContext *s, arg_sat *a)
+{
+ if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
+ return false;
+ }
+ return op_sat(s, a, gen_helper_ssat16);
+}
+
+static bool trans_USAT16(DisasContext *s, arg_sat *a)
+{
+ if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
+ return false;
+ }
+ return op_sat(s, a, gen_helper_usat16);
+}
+
+static bool op_xta(DisasContext *s, arg_rrr_rot *a,
+ void (*gen_extract)(TCGv_i32, TCGv_i32),
+ void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 tmp;
+
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+
+ tmp = load_reg(s, a->rm);
+ /*
+ * TODO: In many cases we could do a shift instead of a rotate.
+ * Combined with a simple extend, that becomes an extract.
+ */
+ tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
+ gen_extract(tmp, tmp);
+
+ if (a->rn != 15) {
+ TCGv_i32 tmp2 = load_reg(s, a->rn);
+ gen_add(tmp, tmp, tmp2);
+ }
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
+{
+ return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
+}
+
+static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
+{
+ return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
+}
+
+static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
+{
+ if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
+ return false;
+ }
+ return op_xta(s, a, gen_helper_sxtb16, gen_add16);
+}
+
+static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
+{
+ return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
+}
+
+static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
+{
+ return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
+}
+
+static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
+{
+ if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
+ return false;
+ }
+ return op_xta(s, a, gen_helper_uxtb16, gen_add16);
+}
+
+static bool trans_SEL(DisasContext *s, arg_rrr *a)
+{
+ TCGv_i32 t1, t2, t3;
+
+ if (s->thumb
+ ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
+ : !ENABLE_ARCH_6) {
+ return false;
+ }
+
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ t3 = tcg_temp_new_i32();
+ tcg_gen_ld_i32(t3, tcg_env, offsetof(CPUARMState, GE));
+ gen_helper_sel_flags(t1, t3, t1, t2);
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+static bool op_rr(DisasContext *s, arg_rr *a,
+ void (*gen)(TCGv_i32, TCGv_i32))
+{
+ TCGv_i32 tmp;
+
+ tmp = load_reg(s, a->rm);
+ gen(tmp, tmp);
+ store_reg(s, a->rd, tmp);
+ return true;
+}
+
+static bool trans_REV(DisasContext *s, arg_rr *a)
+{
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+ return op_rr(s, a, tcg_gen_bswap32_i32);
+}
+
+static bool trans_REV16(DisasContext *s, arg_rr *a)
+{
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+ return op_rr(s, a, gen_rev16);
+}
+
+static bool trans_REVSH(DisasContext *s, arg_rr *a)
+{
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+ return op_rr(s, a, gen_revsh);
+}
+
+static bool trans_RBIT(DisasContext *s, arg_rr *a)
+{
+ if (!ENABLE_ARCH_6T2) {
+ return false;
+ }
+ return op_rr(s, a, gen_helper_rbit);
+}
+
+/*
+ * Signed multiply, signed and unsigned divide
+ */
+
+static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
+{
+ TCGv_i32 t1, t2;
+
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ if (m_swap) {
+ gen_swap_half(t2, t2);
+ }
+ gen_smul_dual(t1, t2);
+
+ if (sub) {
+ /*
+ * This subtraction cannot overflow, so we can do a simple
+ * 32-bit subtraction and then a possible 32-bit saturating
+ * addition of Ra.
+ */
+ tcg_gen_sub_i32(t1, t1, t2);
+
+ if (a->ra != 15) {
+ t2 = load_reg(s, a->ra);
+ gen_helper_add_setq(t1, tcg_env, t1, t2);
+ }
+ } else if (a->ra == 15) {
+ /* Single saturation-checking addition */
+ gen_helper_add_setq(t1, tcg_env, t1, t2);
+ } else {
+ /*
+ * We need to add the products and Ra together and then
+ * determine whether the final result overflowed. Doing
+ * this as two separate add-and-check-overflow steps incorrectly
+ * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
+ * Do all the arithmetic at 64-bits and then check for overflow.
+ */
+ TCGv_i64 p64, q64;
+ TCGv_i32 t3, qf, one;
+
+ p64 = tcg_temp_new_i64();
+ q64 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(p64, t1);
+ tcg_gen_ext_i32_i64(q64, t2);
+ tcg_gen_add_i64(p64, p64, q64);
+ load_reg_var(s, t2, a->ra);
+ tcg_gen_ext_i32_i64(q64, t2);
+ tcg_gen_add_i64(p64, p64, q64);
+
+ tcg_gen_extr_i64_i32(t1, t2, p64);
+ /*
+ * t1 is the low half of the result which goes into Rd.
+ * We have overflow and must set Q if the high half (t2)
+ * is different from the sign-extension of t1.
+ */
+ t3 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t3, t1, 31);
+ qf = load_cpu_field(QF);
+ one = tcg_constant_i32(1);
+ tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
+ store_cpu_field(qf, QF);
+ }
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
+{
+ return op_smlad(s, a, false, false);
+}
+
+static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
+{
+ return op_smlad(s, a, true, false);
+}
+
+static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
+{
+ return op_smlad(s, a, false, true);
+}
+
+static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
+{
+ return op_smlad(s, a, true, true);
+}
+
+static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
+{
+ TCGv_i32 t1, t2;
+ TCGv_i64 l1, l2;
+
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ if (m_swap) {
+ gen_swap_half(t2, t2);
+ }
+ gen_smul_dual(t1, t2);
+
+ l1 = tcg_temp_new_i64();
+ l2 = tcg_temp_new_i64();
+ tcg_gen_ext_i32_i64(l1, t1);
+ tcg_gen_ext_i32_i64(l2, t2);
+
+ if (sub) {
+ tcg_gen_sub_i64(l1, l1, l2);
+ } else {
+ tcg_gen_add_i64(l1, l1, l2);
+ }
+
+ gen_addq(s, l1, a->ra, a->rd);
+ gen_storeq_reg(s, a->ra, a->rd, l1);
+ return true;
+}
+
+static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
+{
+ return op_smlald(s, a, false, false);
+}
+
+static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
+{
+ return op_smlald(s, a, true, false);
+}
+
+static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
+{
+ return op_smlald(s, a, false, true);
+}
+
+static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
+{
+ return op_smlald(s, a, true, true);
+}
+
+static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
+{
+ TCGv_i32 t1, t2;
+
+ if (s->thumb
+ ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
+ : !ENABLE_ARCH_6) {
+ return false;
+ }
+
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ tcg_gen_muls2_i32(t2, t1, t1, t2);
+
+ if (a->ra != 15) {
+ TCGv_i32 t3 = load_reg(s, a->ra);
+ if (sub) {
+ /*
+ * For SMMLS, we need a 64-bit subtract. Borrow caused by
+ * a non-zero multiplicand lowpart, and the correct result
+ * lowpart for rounding.
+ */
+ tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
+ } else {
+ tcg_gen_add_i32(t1, t1, t3);
+ }
+ }
+ if (round) {
+ /*
+ * Adding 0x80000000 to the 64-bit quantity means that we have
+ * carry in to the high word when the low word has the msb set.
+ */
+ tcg_gen_shri_i32(t2, t2, 31);
+ tcg_gen_add_i32(t1, t1, t2);
+ }
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
+{
+ return op_smmla(s, a, false, false);
+}
+
+static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
+{
+ return op_smmla(s, a, true, false);
+}
+
+static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
+{
+ return op_smmla(s, a, false, true);
+}
+
+static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
+{
+ return op_smmla(s, a, true, true);
+}
+
+static bool op_div(DisasContext *s, arg_rrr *a, bool u)
+{
+ TCGv_i32 t1, t2;
+
+ if (s->thumb
+ ? !dc_isar_feature(aa32_thumb_div, s)
+ : !dc_isar_feature(aa32_arm_div, s)) {
+ return false;
+ }
+
+ t1 = load_reg(s, a->rn);
+ t2 = load_reg(s, a->rm);
+ if (u) {
+ gen_helper_udiv(t1, tcg_env, t1, t2);
+ } else {
+ gen_helper_sdiv(t1, tcg_env, t1, t2);
+ }
+ store_reg(s, a->rd, t1);
+ return true;
+}
+
+static bool trans_SDIV(DisasContext *s, arg_rrr *a)
+{
+ return op_div(s, a, false);
+}
+
+static bool trans_UDIV(DisasContext *s, arg_rrr *a)
+{
+ return op_div(s, a, true);
+}
+
+/*
+ * Block data transfer
+ */
+
+static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
+{
+ TCGv_i32 addr = load_reg(s, a->rn);
+
+ if (a->b) {
+ if (a->i) {
+ /* pre increment */
+ tcg_gen_addi_i32(addr, addr, 4);
+ } else {
+ /* pre decrement */
+ tcg_gen_addi_i32(addr, addr, -(n * 4));
+ }
+ } else if (!a->i && n != 1) {
+ /* post decrement */
+ tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
+ }
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ /*
+ * If the writeback is incrementing SP rather than
+ * decrementing it, and the initial SP is below the
+ * stack limit but the final written-back SP would
+ * be above, then we must not perform any memory
+ * accesses, but it is IMPDEF whether we generate
+ * an exception. We choose to do so in this case.
+ * At this point 'addr' is the lowest address, so
+ * either the original SP (if incrementing) or our
+ * final SP (if decrementing), so that's what we check.
+ */
+ gen_helper_v8m_stackcheck(tcg_env, addr);
+ }
+
+ return addr;
+}
+
+static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
+ TCGv_i32 addr, int n)
+{
+ if (a->w) {
+ /* write back */
+ if (!a->b) {
+ if (a->i) {
+ /* post increment */
+ tcg_gen_addi_i32(addr, addr, 4);
+ } else {
+ /* post decrement */
+ tcg_gen_addi_i32(addr, addr, -(n * 4));
+ }
+ } else if (!a->i && n != 1) {
+ /* pre decrement */
+ tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
+ }
+ store_reg(s, a->rn, addr);
+ }
+}
+
+static bool op_stm(DisasContext *s, arg_ldst_block *a)
+{
+ int i, j, n, list, mem_idx;
+ bool user = a->u;
+ TCGv_i32 addr, tmp;
+
+ if (user) {
+ /* STM (user) */
+ if (IS_USER(s)) {
+ /* Only usable in supervisor mode. */
+ unallocated_encoding(s);
+ return true;
+ }
+ }
+
+ list = a->list;
+ n = ctpop16(list);
+ /*
+ * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
+ * to UNDEF. In the T32 STM encoding n == 1 is also UNPREDICTABLE,
+ * but hardware treats it like the A32 version and implements the
+ * single-register-store, and some in-the-wild (buggy) software
+ * assumes that, so we don't UNDEF on that case.
+ */
+ if (n < 1 || a->rn == 15) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ s->eci_handled = true;
+
+ addr = op_addr_block_pre(s, a, n);
+ mem_idx = get_mem_index(s);
+
+ for (i = j = 0; i < 16; i++) {
+ if (!(list & (1 << i))) {
+ continue;
+ }
+
+ if (user && i != 15) {
+ tmp = tcg_temp_new_i32();
+ gen_helper_get_user_reg(tmp, tcg_env, tcg_constant_i32(i));
+ } else {
+ tmp = load_reg(s, i);
+ }
+ gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+
+ /* No need to add after the last transfer. */
+ if (++j != n) {
+ tcg_gen_addi_i32(addr, addr, 4);
+ }
+ }
+
+ op_addr_block_post(s, a, addr, n);
+ clear_eci_state(s);
+ return true;
+}
+
+static bool trans_STM(DisasContext *s, arg_ldst_block *a)
+{
+ return op_stm(s, a);
+}
+
+static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
+{
+ /* Writeback register in register list is UNPREDICTABLE for T32. */
+ if (a->w && (a->list & (1 << a->rn))) {
+ unallocated_encoding(s);
+ return true;
+ }
+ return op_stm(s, a);
+}
+
+static bool do_ldm(DisasContext *s, arg_ldst_block *a)
+{
+ int i, j, n, list, mem_idx;
+ bool loaded_base;
+ bool user = a->u;
+ bool exc_return = false;
+ TCGv_i32 addr, tmp, loaded_var;
+
+ if (user) {
+ /* LDM (user), LDM (exception return) */
+ if (IS_USER(s)) {
+ /* Only usable in supervisor mode. */
+ unallocated_encoding(s);
+ return true;
+ }
+ if (extract32(a->list, 15, 1)) {
+ exc_return = true;
+ user = false;
+ } else {
+ /* LDM (user) does not allow writeback. */
+ if (a->w) {
+ unallocated_encoding(s);
+ return true;
+ }
+ }
+ }
+
+ list = a->list;
+ n = ctpop16(list);
+ /*
+ * This is UNPREDICTABLE for n < 1 in all encodings, and we choose
+ * to UNDEF. In the T32 LDM encoding n == 1 is also UNPREDICTABLE,
+ * but hardware treats it like the A32 version and implements the
+ * single-register-load, and some in-the-wild (buggy) software
+ * assumes that, so we don't UNDEF on that case.
+ */
+ if (n < 1 || a->rn == 15) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ s->eci_handled = true;
+
+ addr = op_addr_block_pre(s, a, n);
+ mem_idx = get_mem_index(s);
+ loaded_base = false;
+ loaded_var = NULL;
+
+ for (i = j = 0; i < 16; i++) {
+ if (!(list & (1 << i))) {
+ continue;
+ }
+
+ tmp = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+ if (user) {
+ gen_helper_set_user_reg(tcg_env, tcg_constant_i32(i), tmp);
+ } else if (i == a->rn) {
+ loaded_var = tmp;
+ loaded_base = true;
+ } else if (i == 15 && exc_return) {
+ store_pc_exc_ret(s, tmp);
+ } else {
+ store_reg_from_load(s, i, tmp);
+ }
+
+ /* No need to add after the last transfer. */
+ if (++j != n) {
+ tcg_gen_addi_i32(addr, addr, 4);
+ }
+ }
+
+ op_addr_block_post(s, a, addr, n);
+
+ if (loaded_base) {
+ /* Note that we reject base == pc above. */
+ store_reg(s, a->rn, loaded_var);
+ }
+
+ if (exc_return) {
+ /* Restore CPSR from SPSR. */
+ tmp = load_cpu_field(spsr);
+ translator_io_start(&s->base);
+ gen_helper_cpsr_write_eret(tcg_env, tmp);
+ /* Must exit loop to check un-masked IRQs */
+ s->base.is_jmp = DISAS_EXIT;
+ }
+ clear_eci_state(s);
+ return true;
+}
+
+static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
+{
+ /*
+ * Writeback register in register list is UNPREDICTABLE
+ * for ArchVersion() >= 7. Prior to v7, A32 would write
+ * an UNKNOWN value to the base register.
+ */
+ if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
+ unallocated_encoding(s);
+ return true;
+ }
+ return do_ldm(s, a);
+}
+
+static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
+{
+ /* Writeback register in register list is UNPREDICTABLE for T32. */
+ if (a->w && (a->list & (1 << a->rn))) {
+ unallocated_encoding(s);
+ return true;
+ }
+ return do_ldm(s, a);
+}
+
+static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
+{
+ /* Writeback is conditional on the base register not being loaded. */
+ a->w = !(a->list & (1 << a->rn));
+ return do_ldm(s, a);
+}
+
+static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
+{
+ int i;
+ TCGv_i32 zero;
+
+ if (!dc_isar_feature(aa32_m_sec_state, s)) {
+ return false;
+ }
+
+ if (extract32(a->list, 13, 1)) {
+ return false;
+ }
+
+ if (!a->list) {
+ /* UNPREDICTABLE; we choose to UNDEF */
+ return false;
+ }
+
+ s->eci_handled = true;
+
+ zero = tcg_constant_i32(0);
+ for (i = 0; i < 15; i++) {
+ if (extract32(a->list, i, 1)) {
+ /* Clear R[i] */
+ tcg_gen_mov_i32(cpu_R[i], zero);
+ }
+ }
+ if (extract32(a->list, 15, 1)) {
+ /*
+ * Clear APSR (by calling the MSR helper with the same argument
+ * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
+ */
+ gen_helper_v7m_msr(tcg_env, tcg_constant_i32(0xc00), zero);
+ }
+ clear_eci_state(s);
+ return true;
+}
+
+/*
+ * Branch, branch with link
+ */
+
+static bool trans_B(DisasContext *s, arg_i *a)
+{
+ gen_jmp(s, jmp_diff(s, a->imm));
+ return true;
+}
+
+static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
+{
+ /* This has cond from encoding, required to be outside IT block. */
+ if (a->cond >= 0xe) {
+ return false;
+ }
+ if (s->condexec_mask) {
+ unallocated_encoding(s);
+ return true;
+ }
+ arm_skip_unless(s, a->cond);
+ gen_jmp(s, jmp_diff(s, a->imm));
+ return true;
+}
+
+static bool trans_BL(DisasContext *s, arg_i *a)
+{
+ gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
+ gen_jmp(s, jmp_diff(s, a->imm));
+ return true;
+}
+
+static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
+{
+ /*
+ * BLX <imm> would be useless on M-profile; the encoding space
+ * is used for other insns from v8.1M onward, and UNDEFs before that.
+ */
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+
+ /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
+ if (s->thumb && (a->imm & 2)) {
+ return false;
+ }
+ gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
+ store_cpu_field_constant(!s->thumb, thumb);
+ /* This jump is computed from an aligned PC: subtract off the low bits. */
+ gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
+ return true;
+}
+
+static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
+{
+ assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
+ gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
+ return true;
+}
+
+static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
+{
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
+ tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
+ gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
+ gen_bx(s, tmp);
+ return true;
+}
+
+static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
+{
+ TCGv_i32 tmp;
+
+ assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
+ if (!ENABLE_ARCH_5) {
+ return false;
+ }
+ tmp = tcg_temp_new_i32();
+ tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
+ tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
+ gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
+ gen_bx(s, tmp);
+ return true;
+}
+
+static bool trans_BF(DisasContext *s, arg_BF *a)
+{
+ /*
+ * M-profile branch future insns. The architecture permits an
+ * implementation to implement these as NOPs (equivalent to
+ * discarding the LO_BRANCH_INFO cache immediately), and we
+ * take that IMPDEF option because for QEMU a "real" implementation
+ * would be complicated and wouldn't execute any faster.
+ */
+ if (!dc_isar_feature(aa32_lob, s)) {
+ return false;
+ }
+ if (a->boff == 0) {
+ /* SEE "Related encodings" (loop insns) */
+ return false;
+ }
+ /* Handle as NOP */
+ return true;
+}
+
+static bool trans_DLS(DisasContext *s, arg_DLS *a)
+{
+ /* M-profile low-overhead loop start */
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_lob, s)) {
+ return false;
+ }
+ if (a->rn == 13 || a->rn == 15) {
+ /*
+ * For DLSTP rn == 15 is a related encoding (LCTP); the
+ * other cases caught by this condition are all
+ * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
+ */
+ return false;
+ }
+
+ if (a->size != 4) {
+ /* DLSTP */
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+ }
+
+ /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
+ tmp = load_reg(s, a->rn);
+ store_reg(s, 14, tmp);
+ if (a->size != 4) {
+ /* DLSTP: set FPSCR.LTPSIZE */
+ store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ }
+ return true;
+}
+
+static bool trans_WLS(DisasContext *s, arg_WLS *a)
+{
+ /* M-profile low-overhead while-loop start */
+ TCGv_i32 tmp;
+ DisasLabel nextlabel;
+
+ if (!dc_isar_feature(aa32_lob, s)) {
+ return false;
+ }
+ if (a->rn == 13 || a->rn == 15) {
+ /*
+ * For WLSTP rn == 15 is a related encoding (LE); the
+ * other cases caught by this condition are all
+ * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
+ */
+ return false;
+ }
+ if (s->condexec_mask) {
+ /*
+ * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
+ * we choose to UNDEF, because otherwise our use of
+ * gen_goto_tb(1) would clash with the use of TB exit 1
+ * in the dc->condjmp condition-failed codepath in
+ * arm_tr_tb_stop() and we'd get an assertion.
+ */
+ return false;
+ }
+ if (a->size != 4) {
+ /* WLSTP */
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+ /*
+ * We need to check that the FPU is enabled here, but mustn't
+ * call vfp_access_check() to do that because we don't want to
+ * do the lazy state preservation in the "loop count is zero" case.
+ * Do the check-and-raise-exception by hand.
+ */
+ if (s->fp_excp_el) {
+ gen_exception_insn_el(s, 0, EXCP_NOCP,
+ syn_uncategorized(), s->fp_excp_el);
+ return true;
+ }
+ }
+
+ nextlabel = gen_disas_label(s);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
+ tmp = load_reg(s, a->rn);
+ store_reg(s, 14, tmp);
+ if (a->size != 4) {
+ /*
+ * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
+ * lazy state preservation, new FP context creation, etc,
+ * that vfp_access_check() does. We know that the actual
+ * access check will succeed (ie it won't generate code that
+ * throws an exception) because we did that check by hand earlier.
+ */
+ bool ok = vfp_access_check(s);
+ assert(ok);
+ store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
+ /*
+ * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
+ * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
+ */
+ }
+ gen_jmp_tb(s, curr_insn_len(s), 1);
+
+ set_disas_label(s, nextlabel);
+ gen_jmp(s, jmp_diff(s, a->imm));
+ return true;
+}
+
+static bool trans_LE(DisasContext *s, arg_LE *a)
+{
+ /*
+ * M-profile low-overhead loop end. The architecture permits an
+ * implementation to discard the LO_BRANCH_INFO cache at any time,
+ * and we take the IMPDEF option to never set it in the first place
+ * (equivalent to always discarding it immediately), because for QEMU
+ * a "real" implementation would be complicated and wouldn't execute
+ * any faster.
+ */
+ TCGv_i32 tmp;
+ DisasLabel loopend;
+ bool fpu_active;
+
+ if (!dc_isar_feature(aa32_lob, s)) {
+ return false;
+ }
+ if (a->f && a->tp) {
+ return false;
+ }
+ if (s->condexec_mask) {
+ /*
+ * LE in an IT block is CONSTRAINED UNPREDICTABLE;
+ * we choose to UNDEF, because otherwise our use of
+ * gen_goto_tb(1) would clash with the use of TB exit 1
+ * in the dc->condjmp condition-failed codepath in
+ * arm_tr_tb_stop() and we'd get an assertion.
+ */
+ return false;
+ }
+ if (a->tp) {
+ /* LETP */
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+ if (!vfp_access_check(s)) {
+ s->eci_handled = true;
+ return true;
+ }
+ }
+
+ /* LE/LETP is OK with ECI set and leaves it untouched */
+ s->eci_handled = true;
+
+ /*
+ * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
+ * UsageFault exception for the LE insn in that case. Note that we
+ * are not directly checking FPSCR.LTPSIZE but instead check the
+ * pseudocode LTPSIZE() function, which returns 4 if the FPU is
+ * not currently active (ie ActiveFPState() returns false). We
+ * can identify not-active purely from our TB state flags, as the
+ * FPU is active only if:
+ * the FPU is enabled
+ * AND lazy state preservation is not active
+ * AND we do not need a new fp context (this is the ASPEN/FPCA check)
+ *
+ * Usually we don't need to care about this distinction between
+ * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
+ * will either take an exception or clear the conditions that make
+ * the FPU not active. But LE is an unusual case of a non-FP insn
+ * that looks at LTPSIZE.
+ */
+ fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
+
+ if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
+ /* Need to do a runtime check for LTPSIZE != 4 */
+ DisasLabel skipexc = gen_disas_label(s);
+ tmp = load_cpu_field(v7m.ltpsize);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
+ gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
+ set_disas_label(s, skipexc);
+ }
+
+ if (a->f) {
+ /* Loop-forever: just jump back to the loop start */
+ gen_jmp(s, jmp_diff(s, -a->imm));
+ return true;
+ }
+
+ /*
+ * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
+ * For LE, we know at this point that LTPSIZE must be 4 and the
+ * loop decrement value is 1. For LETP we need to calculate the decrement
+ * value from LTPSIZE.
+ */
+ loopend = gen_disas_label(s);
+ if (!a->tp) {
+ tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
+ tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
+ } else {
+ /*
+ * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
+ * so that decr stays live after the brcondi.
+ */
+ TCGv_i32 decr = tcg_temp_new_i32();
+ TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
+ tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
+ tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
+
+ tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
+
+ tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
+ }
+ /* Jump back to the loop start */
+ gen_jmp(s, jmp_diff(s, -a->imm));
+
+ set_disas_label(s, loopend);
+ if (a->tp) {
+ /* Exits from tail-pred loops must reset LTPSIZE to 4 */
+ store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
+ }
+ /* End TB, continuing to following insn */
+ gen_jmp_tb(s, curr_insn_len(s), 1);
+ return true;
+}
+
+static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
+{
+ /*
+ * M-profile Loop Clear with Tail Predication. Since our implementation
+ * doesn't cache branch information, all we need to do is reset
+ * FPSCR.LTPSIZE to 4.
+ */
+
+ if (!dc_isar_feature(aa32_lob, s) ||
+ !dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ store_cpu_field_constant(4, v7m.ltpsize);
+ return true;
+}
+
+static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
+{
+ /*
+ * M-profile Create Vector Tail Predicate. This insn is itself
+ * predicated and is subject to beatwise execution.
+ */
+ TCGv_i32 rn_shifted, masklen;
+
+ if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
+ return false;
+ }
+
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /*
+ * We pre-calculate the mask length here to avoid having
+ * to have multiple helpers specialized for size.
+ * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
+ */
+ rn_shifted = tcg_temp_new_i32();
+ masklen = load_reg(s, a->rn);
+ tcg_gen_shli_i32(rn_shifted, masklen, a->size);
+ tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
+ masklen, tcg_constant_i32(1 << (4 - a->size)),
+ rn_shifted, tcg_constant_i32(16));
+ gen_helper_mve_vctp(tcg_env, masklen);
+ /* This insn updates predication bits */
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ mve_update_eci(s);
+ return true;
+}
+
+static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
+{
+ TCGv_i32 addr, tmp;
+
+ tmp = load_reg(s, a->rm);
+ if (half) {
+ tcg_gen_add_i32(tmp, tmp, tmp);
+ }
+ addr = load_reg(s, a->rn);
+ tcg_gen_add_i32(addr, addr, tmp);
+
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
+
+ tcg_gen_add_i32(tmp, tmp, tmp);
+ gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
+ tcg_gen_add_i32(tmp, tmp, addr);
+ store_reg(s, 15, tmp);
+ return true;
+}
+
+static bool trans_TBB(DisasContext *s, arg_tbranch *a)
+{
+ return op_tbranch(s, a, false);
+}
+
+static bool trans_TBH(DisasContext *s, arg_tbranch *a)
+{
+ return op_tbranch(s, a, true);
+}
+
+static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
+{
+ TCGv_i32 tmp = load_reg(s, a->rn);
+
+ arm_gen_condlabel(s);
+ tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
+ tmp, 0, s->condlabel.label);
+ gen_jmp(s, jmp_diff(s, a->imm));
+ return true;
+}
+
+/*
+ * Supervisor call - both T32 & A32 come here so we need to check
+ * which mode we are in when checking for semihosting.
+ */
+
+static bool trans_SVC(DisasContext *s, arg_SVC *a)
+{
+ const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_M) &&
+ semihosting_enabled(s->current_el == 0) &&
+ (a->imm == semihost_imm)) {
+ gen_exception_internal_insn(s, EXCP_SEMIHOST);
+ } else {
+ if (s->fgt_svc) {
+ uint32_t syndrome = syn_aa32_svc(a->imm, s->thumb);
+ gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
+ } else {
+ gen_update_pc(s, curr_insn_len(s));
+ s->svc_imm = a->imm;
+ s->base.is_jmp = DISAS_SWI;
+ }
+ }
+ return true;
+}
+
+/*
+ * Unconditional system instructions
+ */
+
+static bool trans_RFE(DisasContext *s, arg_RFE *a)
+{
+ static const int8_t pre_offset[4] = {
+ /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
+ };
+ static const int8_t post_offset[4] = {
+ /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
+ };
+ TCGv_i32 addr, t1, t2;
+
+ if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ if (IS_USER(s)) {
+ unallocated_encoding(s);
+ return true;
+ }
+
+ addr = load_reg(s, a->rn);
+ tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
+
+ /* Load PC into tmp and CPSR into tmp2. */
+ t1 = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+ tcg_gen_addi_i32(addr, addr, 4);
+ t2 = tcg_temp_new_i32();
+ gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
+
+ if (a->w) {
+ /* Base writeback. */
+ tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
+ store_reg(s, a->rn, addr);
+ }
+ gen_rfe(s, t1, t2);
+ return true;
+}
+
+static bool trans_SRS(DisasContext *s, arg_SRS *a)
+{
+ if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ gen_srs(s, a->mode, a->pu, a->w);
+ return true;
+}
+
+static bool trans_CPS(DisasContext *s, arg_CPS *a)
+{
+ uint32_t mask, val;
+
+ if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ if (IS_USER(s)) {
+ /* Implemented as NOP in user mode. */
+ return true;
+ }
+ /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
+
+ mask = val = 0;
+ if (a->imod & 2) {
+ if (a->A) {
+ mask |= CPSR_A;
+ }
+ if (a->I) {
+ mask |= CPSR_I;
+ }
+ if (a->F) {
+ mask |= CPSR_F;
+ }
+ if (a->imod & 1) {
+ val |= mask;
+ }
+ }
+ if (a->M) {
+ mask |= CPSR_M;
+ val |= a->mode;
+ }
+ if (mask) {
+ gen_set_psr_im(s, mask, 0, val);
+ }
+ return true;
+}
+
+static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
+{
+ TCGv_i32 tmp, addr;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ if (IS_USER(s)) {
+ /* Implemented as NOP in user mode. */
+ return true;
+ }
+
+ tmp = tcg_constant_i32(a->im);
+ /* FAULTMASK */
+ if (a->F) {
+ addr = tcg_constant_i32(19);
+ gen_helper_v7m_msr(tcg_env, addr, tmp);
+ }
+ /* PRIMASK */
+ if (a->I) {
+ addr = tcg_constant_i32(16);
+ gen_helper_v7m_msr(tcg_env, addr, tmp);
+ }
+ gen_rebuild_hflags(s, false);
+ gen_lookup_tb(s);
+ return true;
+}
+
+/*
+ * Clear-Exclusive, Barriers
+ */
+
+static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
+{
+ if (s->thumb
+ ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
+ : !ENABLE_ARCH_6K) {
+ return false;
+ }
+ gen_clrex(s);
+ return true;
+}
+
+static bool trans_DSB(DisasContext *s, arg_DSB *a)
+{
+ if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+ return true;
+}
+
+static bool trans_DMB(DisasContext *s, arg_DMB *a)
+{
+ return trans_DSB(s, NULL);
+}
+
+static bool trans_ISB(DisasContext *s, arg_ISB *a)
+{
+ if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
+ return false;
+ }
+ /*
+ * We need to break the TB after this insn to execute
+ * self-modifying code correctly and also to take
+ * any pending interrupts immediately.
+ */
+ s->base.is_jmp = DISAS_TOO_MANY;
+ return true;
+}
+
+static bool trans_SB(DisasContext *s, arg_SB *a)
+{
+ if (!dc_isar_feature(aa32_sb, s)) {
+ return false;
+ }
+ /*
+ * TODO: There is no speculation barrier opcode
+ * for TCG; MB and end the TB instead.
+ */
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+ s->base.is_jmp = DISAS_TOO_MANY;
+ return true;
+}
+
+static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
+{
+ if (!ENABLE_ARCH_6) {
+ return false;
+ }
+ if (a->E != (s->be_data == MO_BE)) {
+ gen_helper_setend(tcg_env);
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
+ }
+ return true;
+}
+
+/*
+ * Preload instructions
+ * All are nops, contingent on the appropriate arch level.
+ */
+
+static bool trans_PLD(DisasContext *s, arg_PLD *a)
+{
+ return ENABLE_ARCH_5TE;
+}
+
+static bool trans_PLDW(DisasContext *s, arg_PLD *a)
+{
+ return arm_dc_feature(s, ARM_FEATURE_V7MP);
+}
+
+static bool trans_PLI(DisasContext *s, arg_PLD *a)
+{
+ return ENABLE_ARCH_7;
+}
+
+/*
+ * If-then
+ */
+
+static bool trans_IT(DisasContext *s, arg_IT *a)
+{
+ int cond_mask = a->cond_mask;
+
+ /*
+ * No actual code generated for this insn, just setup state.
+ *
+ * Combinations of firstcond and mask which set up an 0b1111
+ * condition are UNPREDICTABLE; we take the CONSTRAINED
+ * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
+ * i.e. both meaning "execute always".
+ */
+ s->condexec_cond = (cond_mask >> 4) & 0xe;
+ s->condexec_mask = cond_mask & 0x1f;
+ return true;
+}
+
+/* v8.1M CSEL/CSINC/CSNEG/CSINV */
+static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
+{
+ TCGv_i32 rn, rm;
+ DisasCompare c;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
+ return false;
+ }
+
+ if (a->rm == 13) {
+ /* SEE "Related encodings" (MVE shifts) */
+ return false;
+ }
+
+ if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
+ /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
+ return false;
+ }
+
+ /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
+ rn = tcg_temp_new_i32();
+ rm = tcg_temp_new_i32();
+ if (a->rn == 15) {
+ tcg_gen_movi_i32(rn, 0);
+ } else {
+ load_reg_var(s, rn, a->rn);
+ }
+ if (a->rm == 15) {
+ tcg_gen_movi_i32(rm, 0);
+ } else {
+ load_reg_var(s, rm, a->rm);
+ }
+
+ switch (a->op) {
+ case 0: /* CSEL */
+ break;
+ case 1: /* CSINC */
+ tcg_gen_addi_i32(rm, rm, 1);
+ break;
+ case 2: /* CSINV */
+ tcg_gen_not_i32(rm, rm);
+ break;
+ case 3: /* CSNEG */
+ tcg_gen_neg_i32(rm, rm);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ arm_test_cc(&c, a->fcond);
+ tcg_gen_movcond_i32(c.cond, rn, c.value, tcg_constant_i32(0), rn, rm);
+
+ store_reg(s, a->rd, rn);
+ return true;
+}
+
+/*
+ * Legacy decoder.
+ */
+
+static void disas_arm_insn(DisasContext *s, unsigned int insn)
+{
+ unsigned int cond = insn >> 28;
+
+ /* M variants do not implement ARM mode; this must raise the INVSTATE
+ * UsageFault exception.
+ */
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
+ return;
+ }
+
+ if (s->pstate_il) {
+ /*
+ * Illegal execution state. This has priority over BTI
+ * exceptions, but comes after instruction abort exceptions.
+ */
+ gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
+ return;
+ }
+
+ if (cond == 0xf) {
+ /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
+ * choose to UNDEF. In ARMv5 and above the space is used
+ * for miscellaneous unconditional instructions.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* Unconditional instructions. */
+ /* TODO: Perhaps merge these into one decodetree output file. */
+ if (disas_a32_uncond(s, insn) ||
+ disas_vfp_uncond(s, insn) ||
+ disas_neon_dp(s, insn) ||
+ disas_neon_ls(s, insn) ||
+ disas_neon_shared(s, insn)) {
+ return;
+ }
+ /* fall back to legacy decoder */
+
+ if ((insn & 0x0e000f00) == 0x0c000100) {
+ if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
+ /* iWMMXt register transfer. */
+ if (extract32(s->c15_cpar, 1, 1)) {
+ if (!disas_iwmmxt_insn(s, insn)) {
+ return;
+ }
+ }
+ }
+ }
+ goto illegal_op;
+ }
+ if (cond != 0xe) {
+ /* if not always execute, we generate a conditional jump to
+ next instruction */
+ arm_skip_unless(s, cond);
+ }
+
+ /* TODO: Perhaps merge these into one decodetree output file. */
+ if (disas_a32(s, insn) ||
+ disas_vfp(s, insn)) {
+ return;
+ }
+ /* fall back to legacy decoder */
+ /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
+ if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
+ if (((insn & 0x0c000e00) == 0x0c000000)
+ && ((insn & 0x03000000) != 0x03000000)) {
+ /* Coprocessor insn, coprocessor 0 or 1 */
+ disas_xscale_insn(s, insn);
+ return;
+ }
+ }
+
+illegal_op:
+ unallocated_encoding(s);
+}
+
+static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
+{
+ /*
+ * Return true if this is a 16 bit instruction. We must be precise
+ * about this (matching the decode).
+ */
+ if ((insn >> 11) < 0x1d) {
+ /* Definitely a 16-bit instruction */
+ return true;
+ }
+
+ /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
+ * first half of a 32-bit Thumb insn. Thumb-1 cores might
+ * end up actually treating this as two 16-bit insns, though,
+ * if it's half of a bl/blx pair that might span a page boundary.
+ */
+ if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
+ arm_dc_feature(s, ARM_FEATURE_M)) {
+ /* Thumb2 cores (including all M profile ones) always treat
+ * 32-bit insns as 32-bit.
+ */
+ return false;
+ }
+
+ if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
+ /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
+ * is not on the next page; we merge this into a 32-bit
+ * insn.
+ */
+ return false;
+ }
+ /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
+ * 0b1111_1xxx_xxxx_xxxx : BL suffix;
+ * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
+ * -- handle as single 16 bit insn
+ */
+ return true;
+}
+
+/* Translate a 32-bit thumb instruction. */
+static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
+{
+ /*
+ * ARMv6-M supports a limited subset of Thumb2 instructions.
+ * Other Thumb1 architectures allow only 32-bit
+ * combined BL/BLX prefix and suffix.
+ */
+ if (arm_dc_feature(s, ARM_FEATURE_M) &&
+ !arm_dc_feature(s, ARM_FEATURE_V7)) {
+ int i;
+ bool found = false;
+ static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
+ 0xf3b08040 /* dsb */,
+ 0xf3b08050 /* dmb */,
+ 0xf3b08060 /* isb */,
+ 0xf3e08000 /* mrs */,
+ 0xf000d000 /* bl */};
+ static const uint32_t armv6m_mask[] = {0xffe0d000,
+ 0xfff0d0f0,
+ 0xfff0d0f0,
+ 0xfff0d0f0,
+ 0xffe0d000,
+ 0xf800d000};
+
+ for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
+ if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ goto illegal_op;
+ }
+ } else if ((insn & 0xf800e800) != 0xf000e800) {
+ if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
+ unallocated_encoding(s);
+ return;
+ }
+ }
+
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ /*
+ * NOCP takes precedence over any UNDEF for (almost) the
+ * entire wide range of coprocessor-space encodings, so check
+ * for it first before proceeding to actually decode eg VFP
+ * insns. This decode also handles the few insns which are
+ * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
+ */
+ if (disas_m_nocp(s, insn)) {
+ return;
+ }
+ }
+
+ if ((insn & 0xef000000) == 0xef000000) {
+ /*
+ * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+ * transform into
+ * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+ */
+ uint32_t a32_insn = (insn & 0xe2ffffff) |
+ ((insn & (1 << 28)) >> 4) | (1 << 28);
+
+ if (disas_neon_dp(s, a32_insn)) {
+ return;
+ }
+ }
+
+ if ((insn & 0xff100000) == 0xf9000000) {
+ /*
+ * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
+ * transform into
+ * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
+ */
+ uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
+
+ if (disas_neon_ls(s, a32_insn)) {
+ return;
+ }
+ }
+
+ /*
+ * TODO: Perhaps merge these into one decodetree output file.
+ * Note disas_vfp is written for a32 with cond field in the
+ * top nibble. The t32 encoding requires 0xe in the top nibble.
+ */
+ if (disas_t32(s, insn) ||
+ disas_vfp_uncond(s, insn) ||
+ disas_neon_shared(s, insn) ||
+ disas_mve(s, insn) ||
+ ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
+ return;
+ }
+
+illegal_op:
+ unallocated_encoding(s);
+}
+
+static void disas_thumb_insn(DisasContext *s, uint32_t insn)
+{
+ if (!disas_t16(s, insn)) {
+ unallocated_encoding(s);
+ }
+}
+
+static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
+{
+ /* Return true if the insn at dc->base.pc_next might cross a page boundary.
+ * (False positives are OK, false negatives are not.)
+ * We know this is a Thumb insn, and our caller ensures we are
+ * only called if dc->base.pc_next is less than 4 bytes from the page
+ * boundary, so we cross the page if the first 16 bits indicate
+ * that this is a 32 bit insn.
+ */
+ uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
+
+ return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
+}
+
+static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
+{
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
+ CPUARMState *env = cpu_env(cs);
+ ARMCPU *cpu = env_archcpu(env);
+ CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
+ uint32_t condexec, core_mmu_idx;
+
+ dc->isar = &cpu->isar;
+ dc->condjmp = 0;
+ dc->pc_save = dc->base.pc_first;
+ dc->aarch64 = false;
+ dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
+ dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
+ condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
+ /*
+ * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
+ * is always the IT bits. On M-profile, some of the reserved encodings
+ * of IT are used instead to indicate either ICI or ECI, which
+ * indicate partial progress of a restartable insn that was interrupted
+ * partway through by an exception:
+ * * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
+ * * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
+ * In all cases CONDEXEC == 0 means "not in IT block or restartable
+ * insn, behave normally".
+ */
+ dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
+ dc->eci_handled = false;
+ if (condexec & 0xf) {
+ dc->condexec_mask = (condexec & 0xf) << 1;
+ dc->condexec_cond = condexec >> 4;
+ } else {
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ dc->eci = condexec >> 4;
+ }
+ }
+
+ core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
+ dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
+ dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
+#if !defined(CONFIG_USER_ONLY)
+ dc->user = (dc->current_el == 0);
+#endif
+ dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
+ dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
+ dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
+ dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
+ dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
+
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ dc->vfp_enabled = 1;
+ dc->be_data = MO_TE;
+ dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
+ dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
+ dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
+ dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
+ dc->v7m_new_fp_ctxt_needed =
+ EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
+ dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
+ dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
+ } else {
+ dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
+ dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
+ dc->ns = EX_TBFLAG_A32(tb_flags, NS);
+ dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
+ if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+ dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
+ } else {
+ dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
+ dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
+ }
+ dc->sme_trap_nonstreaming =
+ EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
+ }
+ dc->lse2 = false; /* applies only to aarch64 */
+ dc->cp_regs = cpu->cp_regs;
+ dc->features = env->features;
+
+ /* Single step state. The code-generation logic here is:
+ * SS_ACTIVE == 0:
+ * generate code with no special handling for single-stepping (except
+ * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
+ * this happens anyway because those changes are all system register or
+ * PSTATE writes).
+ * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
+ * emit code for one insn
+ * emit code to clear PSTATE.SS
+ * emit code to generate software step exception for completed step
+ * end TB (as usual for having generated an exception)
+ * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
+ * emit code to generate a software step exception
+ * end the TB
+ */
+ dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
+ dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
+ dc->is_ldex = false;
+
+ dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
+
+ /* If architectural single step active, limit to 1. */
+ if (dc->ss_active) {
+ dc->base.max_insns = 1;
+ }
+
+ /* ARM is a fixed-length ISA. Bound the number of insns to execute
+ to those left on the page. */
+ if (!dc->thumb) {
+ int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
+ dc->base.max_insns = MIN(dc->base.max_insns, bound);
+ }
+
+ cpu_V0 = tcg_temp_new_i64();
+ cpu_V1 = tcg_temp_new_i64();
+ cpu_M0 = tcg_temp_new_i64();
+}
+
+static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
+{
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+ /* A note on handling of the condexec (IT) bits:
+ *
+ * We want to avoid the overhead of having to write the updated condexec
+ * bits back to the CPUARMState for every instruction in an IT block. So:
+ * (1) if the condexec bits are not already zero then we write
+ * zero back into the CPUARMState now. This avoids complications trying
+ * to do it at the end of the block. (For example if we don't do this
+ * it's hard to identify whether we can safely skip writing condexec
+ * at the end of the TB, which we definitely want to do for the case
+ * where a TB doesn't do anything with the IT state at all.)
+ * (2) if we are going to leave the TB then we call gen_set_condexec()
+ * which will write the correct value into CPUARMState if zero is wrong.
+ * This is done both for leaving the TB at the end, and for leaving
+ * it because of an exception we know will happen, which is done in
+ * gen_exception_insn(). The latter is necessary because we need to
+ * leave the TB with the PC/IT state just prior to execution of the
+ * instruction which caused the exception.
+ * (3) if we leave the TB unexpectedly (eg a data abort on a load)
+ * then the CPUARMState will be wrong and we need to reset it.
+ * This is handled in the same way as restoration of the
+ * PC in these situations; we save the value of the condexec bits
+ * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
+ * then uses this to restore them after an exception.
+ *
+ * Note that there are no instructions which can read the condexec
+ * bits, and none which can write non-static values to them, so
+ * we don't need to care about whether CPUARMState is correct in the
+ * middle of a TB.
+ */
+
+ /* Reset the conditional execution bits immediately. This avoids
+ complications trying to do it at the end of the block. */
+ if (dc->condexec_mask || dc->condexec_cond) {
+ store_cpu_field_constant(0, condexec_bits);
+ }
+}
+
+static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
+{
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
+ /*
+ * The ECI/ICI bits share PSR bits with the IT bits, so we
+ * need to reconstitute the bits from the split-out DisasContext
+ * fields here.
+ */
+ uint32_t condexec_bits;
+ target_ulong pc_arg = dc->base.pc_next;
+
+ if (tb_cflags(dcbase->tb) & CF_PCREL) {
+ pc_arg &= ~TARGET_PAGE_MASK;
+ }
+ if (dc->eci) {
+ condexec_bits = dc->eci << 4;
+ } else {
+ condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
+ }
+ tcg_gen_insn_start(pc_arg, condexec_bits, 0);
+ dc->insn_start_updated = false;
+}
+
+static bool arm_check_kernelpage(DisasContext *dc)
+{
+#ifdef CONFIG_USER_ONLY
+ /* Intercept jump to the magic kernel page. */
+ if (dc->base.pc_next >= 0xffff0000) {
+ /* We always get here via a jump, so know we are not in a
+ conditional execution block. */
+ gen_exception_internal(EXCP_KERNEL_TRAP);
+ dc->base.is_jmp = DISAS_NORETURN;
+ return true;
+ }
+#endif
+ return false;
+}
+
+static bool arm_check_ss_active(DisasContext *dc)
+{
+ if (dc->ss_active && !dc->pstate_ss) {
+ /* Singlestep state is Active-pending.
+ * If we're in this state at the start of a TB then either
+ * a) we just took an exception to an EL which is being debugged
+ * and this is the first insn in the exception handler
+ * b) debug exceptions were masked and we just unmasked them
+ * without changing EL (eg by clearing PSTATE.D)
+ * In either case we're going to take a swstep exception in the
+ * "did not step an insn" case, and so the syndrome ISV and EX
+ * bits should be zero.
+ */
+ assert(dc->base.num_insns == 1);
+ gen_swstep_exception(dc, 0, 0);
+ dc->base.is_jmp = DISAS_NORETURN;
+ return true;
+ }
+
+ return false;
+}
+
+static void arm_post_translate_insn(DisasContext *dc)
+{
+ if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
+ if (dc->pc_save != dc->condlabel.pc_save) {
+ gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
+ }
+ gen_set_label(dc->condlabel.label);
+ dc->condjmp = 0;
+ }
+}
+
+static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
+{
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
+ CPUARMState *env = cpu_env(cpu);
+ uint32_t pc = dc->base.pc_next;
+ unsigned int insn;
+
+ /* Singlestep exceptions have the highest priority. */
+ if (arm_check_ss_active(dc)) {
+ dc->base.pc_next = pc + 4;
+ return;
+ }
+
+ if (pc & 3) {
+ /*
+ * PC alignment fault. This has priority over the instruction abort
+ * that we would receive from a translation fault via arm_ldl_code
+ * (or the execution of the kernelpage entrypoint). This should only
+ * be possible after an indirect branch, at the start of the TB.
+ */
+ assert(dc->base.num_insns == 1);
+ gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
+ dc->base.is_jmp = DISAS_NORETURN;
+ dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
+ return;
+ }
+
+ if (arm_check_kernelpage(dc)) {
+ dc->base.pc_next = pc + 4;
+ return;
+ }
+
+ dc->pc_curr = pc;
+ insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
+ dc->insn = insn;
+ dc->base.pc_next = pc + 4;
+ disas_arm_insn(dc, insn);
+
+ arm_post_translate_insn(dc);
+
+ /* ARM is a fixed-length ISA. We performed the cross-page check
+ in init_disas_context by adjusting max_insns. */
+}
+
+static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
+{
+ /* Return true if this Thumb insn is always unconditional,
+ * even inside an IT block. This is true of only a very few
+ * instructions: BKPT, HLT, and SG.
+ *
+ * A larger class of instructions are UNPREDICTABLE if used
+ * inside an IT block; we do not need to detect those here, because
+ * what we do by default (perform the cc check and update the IT
+ * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
+ * choice for those situations.
+ *
+ * insn is either a 16-bit or a 32-bit instruction; the two are
+ * distinguishable because for the 16-bit case the top 16 bits
+ * are zeroes, and that isn't a valid 32-bit encoding.
+ */
+ if ((insn & 0xffffff00) == 0xbe00) {
+ /* BKPT */
+ return true;
+ }
+
+ if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
+ !arm_dc_feature(s, ARM_FEATURE_M)) {
+ /* HLT: v8A only. This is unconditional even when it is going to
+ * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
+ * For v7 cores this was a plain old undefined encoding and so
+ * honours its cc check. (We might be using the encoding as
+ * a semihosting trap, but we don't change the cc check behaviour
+ * on that account, because a debugger connected to a real v7A
+ * core and emulating semihosting traps by catching the UNDEF
+ * exception would also only see cases where the cc check passed.
+ * No guest code should be trying to do a HLT semihosting trap
+ * in an IT block anyway.
+ */
+ return true;
+ }
+
+ if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
+ arm_dc_feature(s, ARM_FEATURE_M)) {
+ /* SG: v8M only */
+ return true;
+ }
+
+ return false;
+}
+
+static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
+{
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
+ CPUARMState *env = cpu_env(cpu);
+ uint32_t pc = dc->base.pc_next;
+ uint32_t insn;
+ bool is_16bit;
+ /* TCG op to rewind to if this turns out to be an invalid ECI state */
+ TCGOp *insn_eci_rewind = NULL;
+ target_ulong insn_eci_pc_save = -1;
+
+ /* Misaligned thumb PC is architecturally impossible. */
+ assert((dc->base.pc_next & 1) == 0);
+
+ if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
+ dc->base.pc_next = pc + 2;
+ return;
+ }
+
+ dc->pc_curr = pc;
+ insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
+ is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
+ pc += 2;
+ if (!is_16bit) {
+ uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
+ insn = insn << 16 | insn2;
+ pc += 2;
+ }
+ dc->base.pc_next = pc;
+ dc->insn = insn;
+
+ if (dc->pstate_il) {
+ /*
+ * Illegal execution state. This has priority over BTI
+ * exceptions, but comes after instruction abort exceptions.
+ */
+ gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
+ return;
+ }
+
+ if (dc->eci) {
+ /*
+ * For M-profile continuable instructions, ECI/ICI handling
+ * falls into these cases:
+ * - interrupt-continuable instructions
+ * These are the various load/store multiple insns (both
+ * integer and fp). The ICI bits indicate the register
+ * where the load/store can resume. We make the IMPDEF
+ * choice to always do "instruction restart", ie ignore
+ * the ICI value and always execute the ldm/stm from the
+ * start. So all we need to do is zero PSR.ICI if the
+ * insn executes.
+ * - MVE instructions subject to beat-wise execution
+ * Here the ECI bits indicate which beats have already been
+ * executed, and we must honour this. Each insn of this
+ * type will handle it correctly. We will update PSR.ECI
+ * in the helper function for the insn (some ECI values
+ * mean that the following insn also has been partially
+ * executed).
+ * - Special cases which don't advance ECI
+ * The insns LE, LETP and BKPT leave the ECI/ICI state
+ * bits untouched.
+ * - all other insns (the common case)
+ * Non-zero ECI/ICI means an INVSTATE UsageFault.
+ * We place a rewind-marker here. Insns in the previous
+ * three categories will set a flag in the DisasContext.
+ * If the flag isn't set after we call disas_thumb_insn()
+ * or disas_thumb2_insn() then we know we have a "some other
+ * insn" case. We will rewind to the marker (ie throwing away
+ * all the generated code) and instead emit "take exception".
+ */
+ insn_eci_rewind = tcg_last_op();
+ insn_eci_pc_save = dc->pc_save;
+ }
+
+ if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
+ uint32_t cond = dc->condexec_cond;
+
+ /*
+ * Conditionally skip the insn. Note that both 0xe and 0xf mean
+ * "always"; 0xf is not "never".
+ */
+ if (cond < 0x0e) {
+ arm_skip_unless(dc, cond);
+ }
+ }
+
+ if (is_16bit) {
+ disas_thumb_insn(dc, insn);
+ } else {
+ disas_thumb2_insn(dc, insn);
+ }
+
+ /* Advance the Thumb condexec condition. */
+ if (dc->condexec_mask) {
+ dc->condexec_cond = ((dc->condexec_cond & 0xe) |
+ ((dc->condexec_mask >> 4) & 1));
+ dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
+ if (dc->condexec_mask == 0) {
+ dc->condexec_cond = 0;
+ }
+ }
+
+ if (dc->eci && !dc->eci_handled) {
+ /*
+ * Insn wasn't valid for ECI/ICI at all: undo what we
+ * just generated and instead emit an exception
+ */
+ tcg_remove_ops_after(insn_eci_rewind);
+ dc->pc_save = insn_eci_pc_save;
+ dc->condjmp = 0;
+ gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
+ }
+
+ arm_post_translate_insn(dc);
+
+ /* Thumb is a variable-length ISA. Stop translation when the next insn
+ * will touch a new page. This ensures that prefetch aborts occur at
+ * the right place.
+ *
+ * We want to stop the TB if the next insn starts in a new page,
+ * or if it spans between this page and the next. This means that
+ * if we're looking at the last halfword in the page we need to
+ * see if it's a 16-bit Thumb insn (which will fit in this TB)
+ * or a 32-bit Thumb insn (which won't).
+ * This is to avoid generating a silly TB with a single 16-bit insn
+ * in it at the end of this page (which would execute correctly
+ * but isn't very efficient).
+ */
+ if (dc->base.is_jmp == DISAS_NEXT
+ && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
+ || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
+ && insn_crosses_page(env, dc)))) {
+ dc->base.is_jmp = DISAS_TOO_MANY;
+ }
+}
+
+static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
+{
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+ /* At this stage dc->condjmp will only be set when the skipped
+ instruction was a conditional branch or trap, and the PC has
+ already been written. */
+ gen_set_condexec(dc);
+ if (dc->base.is_jmp == DISAS_BX_EXCRET) {
+ /* Exception return branches need some special case code at the
+ * end of the TB, which is complex enough that it has to
+ * handle the single-step vs not and the condition-failed
+ * insn codepath itself.
+ */
+ gen_bx_excret_final_code(dc);
+ } else if (unlikely(dc->ss_active)) {
+ /* Unconditional and "condition passed" instruction codepath. */
+ switch (dc->base.is_jmp) {
+ case DISAS_SWI:
+ gen_ss_advance(dc);
+ gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
+ break;
+ case DISAS_HVC:
+ gen_ss_advance(dc);
+ gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
+ break;
+ case DISAS_SMC:
+ gen_ss_advance(dc);
+ gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
+ break;
+ case DISAS_NEXT:
+ case DISAS_TOO_MANY:
+ case DISAS_UPDATE_EXIT:
+ case DISAS_UPDATE_NOCHAIN:
+ gen_update_pc(dc, curr_insn_len(dc));
+ /* fall through */
+ default:
+ /* FIXME: Single stepping a WFI insn will not halt the CPU. */
+ gen_singlestep_exception(dc);
+ break;
+ case DISAS_NORETURN:
+ break;
+ }
+ } else {
+ /* While branches must always occur at the end of an IT block,
+ there are a few other things that can cause us to terminate
+ the TB in the middle of an IT block:
+ - Exception generating instructions (bkpt, swi, undefined).
+ - Page boundaries.
+ - Hardware watchpoints.
+ Hardware breakpoints have already been handled and skip this code.
+ */
+ switch (dc->base.is_jmp) {
+ case DISAS_NEXT:
+ case DISAS_TOO_MANY:
+ gen_goto_tb(dc, 1, curr_insn_len(dc));
+ break;
+ case DISAS_UPDATE_NOCHAIN:
+ gen_update_pc(dc, curr_insn_len(dc));
+ /* fall through */
+ case DISAS_JUMP:
+ gen_goto_ptr();
+ break;
+ case DISAS_UPDATE_EXIT:
+ gen_update_pc(dc, curr_insn_len(dc));
+ /* fall through */
+ default:
+ /* indicate that the hash table must be used to find the next TB */
+ tcg_gen_exit_tb(NULL, 0);
+ break;
+ case DISAS_NORETURN:
+ /* nothing more to generate */
+ break;
+ case DISAS_WFI:
+ gen_helper_wfi(tcg_env, tcg_constant_i32(curr_insn_len(dc)));
+ /*
+ * The helper doesn't necessarily throw an exception, but we
+ * must go back to the main loop to check for interrupts anyway.
+ */
+ tcg_gen_exit_tb(NULL, 0);
+ break;
+ case DISAS_WFE:
+ gen_helper_wfe(tcg_env);
+ break;
+ case DISAS_YIELD:
+ gen_helper_yield(tcg_env);
+ break;
+ case DISAS_SWI:
+ gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
+ break;
+ case DISAS_HVC:
+ gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
+ break;
+ case DISAS_SMC:
+ gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
+ break;
+ }
+ }
+
+ if (dc->condjmp) {
+ /* "Condition failed" instruction codepath for the branch/trap insn */
+ set_disas_label(dc, dc->condlabel);
+ gen_set_condexec(dc);
+ if (unlikely(dc->ss_active)) {
+ gen_update_pc(dc, curr_insn_len(dc));
+ gen_singlestep_exception(dc);
+ } else {
+ gen_goto_tb(dc, 1, curr_insn_len(dc));
+ }
+ }
+}
+
+static void arm_tr_disas_log(const DisasContextBase *dcbase,
+ CPUState *cpu, FILE *logfile)
+{
+ DisasContext *dc = container_of(dcbase, DisasContext, base);
+
+ fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
+ target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
+}
+
+static const TranslatorOps arm_translator_ops = {
+ .init_disas_context = arm_tr_init_disas_context,
+ .tb_start = arm_tr_tb_start,
+ .insn_start = arm_tr_insn_start,
+ .translate_insn = arm_tr_translate_insn,
+ .tb_stop = arm_tr_tb_stop,
+ .disas_log = arm_tr_disas_log,
+};
+
+static const TranslatorOps thumb_translator_ops = {
+ .init_disas_context = arm_tr_init_disas_context,
+ .tb_start = arm_tr_tb_start,
+ .insn_start = arm_tr_insn_start,
+ .translate_insn = thumb_tr_translate_insn,
+ .tb_stop = arm_tr_tb_stop,
+ .disas_log = arm_tr_disas_log,
+};
+
+/* generate intermediate code for basic block 'tb'. */
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
+ vaddr pc, void *host_pc)
+{
+ DisasContext dc = { };
+ const TranslatorOps *ops = &arm_translator_ops;
+ CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
+
+ if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
+ ops = &thumb_translator_ops;
+ }
+#ifdef TARGET_AARCH64
+ if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
+ ops = &aarch64_translator_ops;
+ }
+#endif
+
+ translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
+}
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
new file mode 100644
index 0000000000..dc66ff2190
--- /dev/null
+++ b/target/arm/tcg/translate.h
@@ -0,0 +1,738 @@
+#ifndef TARGET_ARM_TRANSLATE_H
+#define TARGET_ARM_TRANSLATE_H
+
+#include "cpu.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "exec/exec-all.h"
+#include "exec/translator.h"
+#include "exec/helper-gen.h"
+#include "internals.h"
+#include "cpu-features.h"
+
+/* internal defines */
+
+/*
+ * Save pc_save across a branch, so that we may restore the value from
+ * before the branch at the point the label is emitted.
+ */
+typedef struct DisasLabel {
+ TCGLabel *label;
+ target_ulong pc_save;
+} DisasLabel;
+
+typedef struct DisasContext {
+ DisasContextBase base;
+ const ARMISARegisters *isar;
+
+ /* The address of the current instruction being translated. */
+ target_ulong pc_curr;
+ /*
+ * For CF_PCREL, the full value of cpu_pc is not known
+ * (although the page offset is known). For convenience, the
+ * translation loop uses the full virtual address that triggered
+ * the translation, from base.pc_start through pc_curr.
+ * For efficiency, we do not update cpu_pc for every instruction.
+ * Instead, pc_save has the value of pc_curr at the time of the
+ * last update to cpu_pc, which allows us to compute the addend
+ * needed to bring cpu_pc current: pc_curr - pc_save.
+ * If cpu_pc now contains the destination of an indirect branch,
+ * pc_save contains -1 to indicate that relative updates are no
+ * longer possible.
+ */
+ target_ulong pc_save;
+ target_ulong page_start;
+ uint32_t insn;
+ /* Nonzero if this instruction has been conditionally skipped. */
+ int condjmp;
+ /* The label that will be jumped to when the instruction is skipped. */
+ DisasLabel condlabel;
+ /* Thumb-2 conditional execution bits. */
+ int condexec_mask;
+ int condexec_cond;
+ /* M-profile ECI/ICI exception-continuable instruction state */
+ int eci;
+ /*
+ * trans_ functions for insns which are continuable should set this true
+ * after decode (ie after any UNDEF checks)
+ */
+ bool eci_handled;
+ int sctlr_b;
+ MemOp be_data;
+#if !defined(CONFIG_USER_ONLY)
+ int user;
+#endif
+ ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
+ uint8_t tbii; /* TBI1|TBI0 for insns */
+ uint8_t tbid; /* TBI1|TBI0 for data */
+ uint8_t tcma; /* TCMA1|TCMA0 for MTE */
+ bool ns; /* Use non-secure CPREG bank on access */
+ int fp_excp_el; /* FP exception EL or 0 if enabled */
+ int sve_excp_el; /* SVE exception EL or 0 if enabled */
+ int sme_excp_el; /* SME exception EL or 0 if enabled */
+ int vl; /* current vector length in bytes */
+ int svl; /* current streaming vector length in bytes */
+ bool vfp_enabled; /* FP enabled via FPSCR.EN */
+ int vec_len;
+ int vec_stride;
+ bool v7m_handler_mode;
+ bool v8m_secure; /* true if v8M and we're in Secure mode */
+ bool v8m_stackcheck; /* true if we need to perform v8M stack limit checks */
+ bool v8m_fpccr_s_wrong; /* true if v8M FPCCR.S != v8m_secure */
+ bool v7m_new_fp_ctxt_needed; /* ASPEN set but no active FP context */
+ bool v7m_lspact; /* FPCCR.LSPACT set */
+ /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
+ * so that top level loop can generate correct syndrome information.
+ */
+ uint32_t svc_imm;
+ int current_el;
+ GHashTable *cp_regs;
+ uint64_t features; /* CPU features bits */
+ bool aarch64;
+ bool thumb;
+ bool lse2;
+ /* Because unallocated encodings generate different exception syndrome
+ * information from traps due to FP being disabled, we can't do a single
+ * "is fp access disabled" check at a high level in the decode tree.
+ * To help in catching bugs where the access check was forgotten in some
+ * code path, we set this flag when the access check is done, and assert
+ * that it is set at the point where we actually touch the FP regs.
+ */
+ bool fp_access_checked;
+ bool sve_access_checked;
+ /* ARMv8 single-step state (this is distinct from the QEMU gdbstub
+ * single-step support).
+ */
+ bool ss_active;
+ bool pstate_ss;
+ /* True if the insn just emitted was a load-exclusive instruction
+ * (necessary for syndrome information for single step exceptions),
+ * ie A64 LDX*, LDAX*, A32/T32 LDREX*, LDAEX*.
+ */
+ bool is_ldex;
+ /* True if AccType_UNPRIV should be used for LDTR et al */
+ bool unpriv;
+ /* True if v8.3-PAuth is active. */
+ bool pauth_active;
+ /* True if v8.5-MTE access to tags is enabled; index with is_unpriv. */
+ bool ata[2];
+ /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */
+ bool mte_active[2];
+ /* True with v8.5-BTI and SCTLR_ELx.BT* set. */
+ bool bt;
+ /* True if any CP15 access is trapped by HSTR_EL2 */
+ bool hstr_active;
+ /* True if memory operations require alignment */
+ bool align_mem;
+ /* True if PSTATE.IL is set */
+ bool pstate_il;
+ /* True if PSTATE.SM is set. */
+ bool pstate_sm;
+ /* True if PSTATE.ZA is set. */
+ bool pstate_za;
+ /* True if non-streaming insns should raise an SME Streaming exception. */
+ bool sme_trap_nonstreaming;
+ /* True if the current instruction is non-streaming. */
+ bool is_nonstreaming;
+ /* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
+ bool mve_no_pred;
+ /* True if fine-grained traps are active */
+ bool fgt_active;
+ /* True if fine-grained trap on SVC is enabled */
+ bool fgt_svc;
+ /* True if a trap on ERET is enabled (FGT or NV) */
+ bool trap_eret;
+ /* True if FEAT_LSE2 SCTLR_ELx.nAA is set */
+ bool naa;
+ /* True if FEAT_NV HCR_EL2.NV is enabled */
+ bool nv;
+ /* True if NV enabled and HCR_EL2.NV1 is set */
+ bool nv1;
+ /* True if NV enabled and HCR_EL2.NV2 is set */
+ bool nv2;
+ /* True if NV2 enabled and NV2 RAM accesses use EL2&0 translation regime */
+ bool nv2_mem_e20;
+ /* True if NV2 enabled and NV2 RAM accesses are big-endian */
+ bool nv2_mem_be;
+ /*
+ * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
+ * < 0, set by the current instruction.
+ */
+ int8_t btype;
+ /* A copy of cpu->dcz_blocksize. */
+ uint8_t dcz_blocksize;
+ /* A copy of cpu->gm_blocksize. */
+ uint8_t gm_blocksize;
+ /* True if this page is guarded. */
+ bool guarded_page;
+ /* True if the current insn_start has been updated. */
+ bool insn_start_updated;
+ /* Bottom two bits of XScale c15_cpar coprocessor access control reg */
+ int c15_cpar;
+ /* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */
+ uint32_t nv2_redirect_offset;
+} DisasContext;
+
+typedef struct DisasCompare {
+ TCGCond cond;
+ TCGv_i32 value;
+} DisasCompare;
+
+/* Share the TCG temporaries common between 32 and 64 bit modes. */
+extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
+extern TCGv_i64 cpu_exclusive_addr;
+extern TCGv_i64 cpu_exclusive_val;
+
+/*
+ * Constant expanders for the decoders.
+ */
+
+static inline int negate(DisasContext *s, int x)
+{
+ return -x;
+}
+
+static inline int plus_1(DisasContext *s, int x)
+{
+ return x + 1;
+}
+
+static inline int plus_2(DisasContext *s, int x)
+{
+ return x + 2;
+}
+
+static inline int plus_12(DisasContext *s, int x)
+{
+ return x + 12;
+}
+
+static inline int times_2(DisasContext *s, int x)
+{
+ return x * 2;
+}
+
+static inline int times_4(DisasContext *s, int x)
+{
+ return x * 4;
+}
+
+static inline int times_8(DisasContext *s, int x)
+{
+ return x * 8;
+}
+
+static inline int times_2_plus_1(DisasContext *s, int x)
+{
+ return x * 2 + 1;
+}
+
+static inline int rsub_64(DisasContext *s, int x)
+{
+ return 64 - x;
+}
+
+static inline int rsub_32(DisasContext *s, int x)
+{
+ return 32 - x;
+}
+
+static inline int rsub_16(DisasContext *s, int x)
+{
+ return 16 - x;
+}
+
+static inline int rsub_8(DisasContext *s, int x)
+{
+ return 8 - x;
+}
+
+static inline int shl_12(DisasContext *s, int x)
+{
+ return x << 12;
+}
+
+static inline int neon_3same_fp_size(DisasContext *s, int x)
+{
+ /* Convert 0==fp32, 1==fp16 into a MO_* value */
+ return MO_32 - x;
+}
+
+static inline int arm_dc_feature(DisasContext *dc, int feature)
+{
+ return (dc->features & (1ULL << feature)) != 0;
+}
+
+static inline int get_mem_index(DisasContext *s)
+{
+ return arm_to_core_mmu_idx(s->mmu_idx);
+}
+
+static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
+{
+ /* We don't need to save all of the syndrome so we mask and shift
+ * out unneeded bits to help the sleb128 encoder do a better job.
+ */
+ syn &= ARM_INSN_START_WORD2_MASK;
+ syn >>= ARM_INSN_START_WORD2_SHIFT;
+
+ /* Check for multiple updates. */
+ assert(!s->insn_start_updated);
+ s->insn_start_updated = true;
+ tcg_set_insn_start_param(s->base.insn_start, 2, syn);
+}
+
+static inline int curr_insn_len(DisasContext *s)
+{
+ return s->base.pc_next - s->pc_curr;
+}
+
+/* is_jmp field values */
+#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
+/* CPU state was modified dynamically; exit to main loop for interrupts. */
+#define DISAS_UPDATE_EXIT DISAS_TARGET_1
+/* These instructions trap after executing, so the A32/T32 decoder must
+ * defer them until after the conditional execution state has been updated.
+ * WFI also needs special handling when single-stepping.
+ */
+#define DISAS_WFI DISAS_TARGET_2
+#define DISAS_SWI DISAS_TARGET_3
+/* WFE */
+#define DISAS_WFE DISAS_TARGET_4
+#define DISAS_HVC DISAS_TARGET_5
+#define DISAS_SMC DISAS_TARGET_6
+#define DISAS_YIELD DISAS_TARGET_7
+/* M profile branch which might be an exception return (and so needs
+ * custom end-of-TB code)
+ */
+#define DISAS_BX_EXCRET DISAS_TARGET_8
+/*
+ * For instructions which want an immediate exit to the main loop, as opposed
+ * to attempting to use lookup_and_goto_ptr. Unlike DISAS_UPDATE_EXIT, this
+ * doesn't write the PC on exiting the translation loop so you need to ensure
+ * something (gen_a64_update_pc or runtime helper) has done so before we reach
+ * return from cpu_tb_exec.
+ */
+#define DISAS_EXIT DISAS_TARGET_9
+/* CPU state was modified dynamically; no need to exit, but do not chain. */
+#define DISAS_UPDATE_NOCHAIN DISAS_TARGET_10
+
+#ifdef TARGET_AARCH64
+void a64_translate_init(void);
+void gen_a64_update_pc(DisasContext *s, target_long diff);
+extern const TranslatorOps aarch64_translator_ops;
+#else
+static inline void a64_translate_init(void)
+{
+}
+
+static inline void gen_a64_update_pc(DisasContext *s, target_long diff)
+{
+}
+#endif
+
+void arm_test_cc(DisasCompare *cmp, int cc);
+void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
+void arm_gen_test_cc(int cc, TCGLabel *label);
+MemOp pow2_align(unsigned i);
+void unallocated_encoding(DisasContext *s);
+void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
+ uint32_t syn, uint32_t target_el);
+void gen_exception_insn(DisasContext *s, target_long pc_diff,
+ int excp, uint32_t syn);
+
+/* Return state of Alternate Half-precision flag, caller frees result */
+static inline TCGv_i32 get_ahp_flag(void)
+{
+ TCGv_i32 ret = tcg_temp_new_i32();
+
+ tcg_gen_ld_i32(ret, tcg_env,
+ offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR]));
+ tcg_gen_extract_i32(ret, ret, 26, 1);
+
+ return ret;
+}
+
+/* Set bits within PSTATE. */
+static inline void set_pstate_bits(uint32_t bits)
+{
+ TCGv_i32 p = tcg_temp_new_i32();
+
+ tcg_debug_assert(!(bits & CACHED_PSTATE_BITS));
+
+ tcg_gen_ld_i32(p, tcg_env, offsetof(CPUARMState, pstate));
+ tcg_gen_ori_i32(p, p, bits);
+ tcg_gen_st_i32(p, tcg_env, offsetof(CPUARMState, pstate));
+}
+
+/* Clear bits within PSTATE. */
+static inline void clear_pstate_bits(uint32_t bits)
+{
+ TCGv_i32 p = tcg_temp_new_i32();
+
+ tcg_debug_assert(!(bits & CACHED_PSTATE_BITS));
+
+ tcg_gen_ld_i32(p, tcg_env, offsetof(CPUARMState, pstate));
+ tcg_gen_andi_i32(p, p, ~bits);
+ tcg_gen_st_i32(p, tcg_env, offsetof(CPUARMState, pstate));
+}
+
+/* If the singlestep state is Active-not-pending, advance to Active-pending. */
+static inline void gen_ss_advance(DisasContext *s)
+{
+ if (s->ss_active) {
+ s->pstate_ss = 0;
+ clear_pstate_bits(PSTATE_SS);
+ }
+}
+
+/* Generate an architectural singlestep exception */
+static inline void gen_swstep_exception(DisasContext *s, int isv, int ex)
+{
+ /* Fill in the same_el field of the syndrome in the helper. */
+ uint32_t syn = syn_swstep(false, isv, ex);
+ gen_helper_exception_swstep(tcg_env, tcg_constant_i32(syn));
+}
+
+/*
+ * Given a VFP floating point constant encoded into an 8 bit immediate in an
+ * instruction, expand it to the actual constant value of the specified
+ * size, as per the VFPExpandImm() pseudocode in the Arm ARM.
+ */
+uint64_t vfp_expand_imm(int size, uint8_t imm8);
+
+/* Vector operations shared between ARM and AArch64. */
+void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_clt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cgt0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cle0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_cge0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
+
+/*
+ * Forward to the isar_feature_* tests given a DisasContext pointer.
+ */
+#define dc_isar_feature(name, ctx) \
+ ({ DisasContext *ctx_ = (ctx); isar_feature_##name(ctx_->isar); })
+
+/* Note that the gvec expanders operate on offsets + sizes. */
+typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
+typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
+ uint32_t, uint32_t);
+typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t);
+typedef void GVecGen4Fn(unsigned, uint32_t, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t);
+
+/* Function prototype for gen_ functions for calling Neon helpers */
+typedef void NeonGenOneOpFn(TCGv_i32, TCGv_i32);
+typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
+typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
+ TCGv_i32, TCGv_i32);
+typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
+typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
+typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
+typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
+typedef void NeonGenTwoOpWidenFn(TCGv_i64, TCGv_i32, TCGv_i32);
+typedef void NeonGenOneSingleOpFn(TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoSingleOpFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoDoubleOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
+typedef void NeonGenOne64OpFn(TCGv_i64, TCGv_i64);
+typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
+typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
+typedef void WideShiftImmFn(TCGv_i64, TCGv_i64, int64_t shift);
+typedef void WideShiftFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i32);
+typedef void ShiftImmFn(TCGv_i32, TCGv_i32, int32_t shift);
+typedef void ShiftFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+
+/**
+ * arm_tbflags_from_tb:
+ * @tb: the TranslationBlock
+ *
+ * Extract the flag values from @tb.
+ */
+static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
+{
+ return (CPUARMTBFlags){ tb->flags, tb->cs_base };
+}
+
+/*
+ * Enum for argument to fpstatus_ptr().
+ */
+typedef enum ARMFPStatusFlavour {
+ FPST_FPCR,
+ FPST_FPCR_F16,
+ FPST_STD,
+ FPST_STD_F16,
+} ARMFPStatusFlavour;
+
+/**
+ * fpstatus_ptr: return TCGv_ptr to the specified fp_status field
+ *
+ * We have multiple softfloat float_status fields in the Arm CPU state struct
+ * (see the comment in cpu.h for details). Return a TCGv_ptr which has
+ * been set up to point to the requested field in the CPU state struct.
+ * The options are:
+ *
+ * FPST_FPCR
+ * for non-FP16 operations controlled by the FPCR
+ * FPST_FPCR_F16
+ * for operations controlled by the FPCR where FPCR.FZ16 is to be used
+ * FPST_STD
+ * for A32/T32 Neon operations using the "standard FPSCR value"
+ * FPST_STD_F16
+ * as FPST_STD, but where FPCR.FZ16 is to be used
+ */
+static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
+{
+ TCGv_ptr statusptr = tcg_temp_new_ptr();
+ int offset;
+
+ switch (flavour) {
+ case FPST_FPCR:
+ offset = offsetof(CPUARMState, vfp.fp_status);
+ break;
+ case FPST_FPCR_F16:
+ offset = offsetof(CPUARMState, vfp.fp_status_f16);
+ break;
+ case FPST_STD:
+ offset = offsetof(CPUARMState, vfp.standard_fp_status);
+ break;
+ case FPST_STD_F16:
+ offset = offsetof(CPUARMState, vfp.standard_fp_status_f16);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_gen_addi_ptr(statusptr, tcg_env, offset);
+ return statusptr;
+}
+
+/**
+ * finalize_memop_atom:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ * @atom: atomicity of the memory operation
+ *
+ * Build the complete MemOp for a memory operation, including alignment,
+ * endianness, and atomicity.
+ *
+ * If (op & MO_AMASK) then the operation already contains the required
+ * alignment, e.g. for AccType_ATOMIC. Otherwise, this an optionally
+ * unaligned operation, e.g. for AccType_NORMAL.
+ *
+ * In the latter case, there are configuration bits that require alignment,
+ * and this is applied here. Note that there is no way to indicate that
+ * no alignment should ever be enforced; this must be handled manually.
+ */
+static inline MemOp finalize_memop_atom(DisasContext *s, MemOp opc, MemOp atom)
+{
+ if (s->align_mem && !(opc & MO_AMASK)) {
+ opc |= MO_ALIGN;
+ }
+ return opc | atom | s->be_data;
+}
+
+/**
+ * finalize_memop:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ *
+ * Like finalize_memop_atom, but with default atomicity.
+ */
+static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
+{
+ MemOp atom = s->lse2 ? MO_ATOM_WITHIN16 : MO_ATOM_IFALIGN;
+ return finalize_memop_atom(s, opc, atom);
+}
+
+/**
+ * finalize_memop_pair:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ *
+ * Like finalize_memop_atom, but with atomicity for a pair.
+ * C.f. Pseudocode for Mem[], operand ispair.
+ */
+static inline MemOp finalize_memop_pair(DisasContext *s, MemOp opc)
+{
+ MemOp atom = s->lse2 ? MO_ATOM_WITHIN16_PAIR : MO_ATOM_IFALIGN_PAIR;
+ return finalize_memop_atom(s, opc, atom);
+}
+
+/**
+ * finalize_memop_asimd:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ *
+ * Like finalize_memop_atom, but with atomicity of AccessType_ASIMD.
+ */
+static inline MemOp finalize_memop_asimd(DisasContext *s, MemOp opc)
+{
+ /*
+ * In the pseudocode for Mem[], with AccessType_ASIMD, size == 16,
+ * if IsAligned(8), the first case provides separate atomicity for
+ * the pair of 64-bit accesses. If !IsAligned(8), the middle cases
+ * do not apply, and we're left with the final case of no atomicity.
+ * Thus MO_ATOM_IFALIGN_PAIR.
+ *
+ * For other sizes, normal LSE2 rules apply.
+ */
+ if ((opc & MO_SIZE) == MO_128) {
+ return finalize_memop_atom(s, opc, MO_ATOM_IFALIGN_PAIR);
+ }
+ return finalize_memop(s, opc);
+}
+
+/**
+ * asimd_imm_const: Expand an encoded SIMD constant value
+ *
+ * Expand a SIMD constant value. This is essentially the pseudocode
+ * AdvSIMDExpandImm, except that we also perform the boolean NOT needed for
+ * VMVN and VBIC (when cmode < 14 && op == 1).
+ *
+ * The combination cmode == 15 op == 1 is a reserved encoding for AArch32;
+ * callers must catch this; we return the 64-bit constant value defined
+ * for AArch64.
+ *
+ * cmode = 2,3,4,5,6,7,10,11,12,13 imm=0 was UNPREDICTABLE in v7A but
+ * is either not unpredictable or merely CONSTRAINED UNPREDICTABLE in v8A;
+ * we produce an immediate constant value of 0 in these cases.
+ */
+uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
+
+/*
+ * gen_disas_label:
+ * Create a label and cache a copy of pc_save.
+ */
+static inline DisasLabel gen_disas_label(DisasContext *s)
+{
+ return (DisasLabel){
+ .label = gen_new_label(),
+ .pc_save = s->pc_save,
+ };
+}
+
+/*
+ * set_disas_label:
+ * Emit a label and restore the cached copy of pc_save.
+ */
+static inline void set_disas_label(DisasContext *s, DisasLabel l)
+{
+ gen_set_label(l.label);
+ s->pc_save = l.pc_save;
+}
+
+static inline TCGv_ptr gen_lookup_cp_reg(uint32_t key)
+{
+ TCGv_ptr ret = tcg_temp_new_ptr();
+ gen_helper_lookup_cp_reg(ret, tcg_env, tcg_constant_i32(key));
+ return ret;
+}
+
+/*
+ * Set and reset rounding mode around another operation.
+ */
+static inline TCGv_i32 gen_set_rmode(ARMFPRounding rmode, TCGv_ptr fpst)
+{
+ TCGv_i32 new = tcg_constant_i32(arm_rmode_to_sf(rmode));
+ TCGv_i32 old = tcg_temp_new_i32();
+
+ gen_helper_set_rmode(old, new, fpst);
+ return old;
+}
+
+static inline void gen_restore_rmode(TCGv_i32 old, TCGv_ptr fpst)
+{
+ gen_helper_set_rmode(old, old, fpst);
+}
+
+/*
+ * Helpers for implementing sets of trans_* functions.
+ * Defer the implementation of NAME to FUNC, with optional extra arguments.
+ */
+#define TRANS(NAME, FUNC, ...) \
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
+ { return FUNC(s, __VA_ARGS__); }
+#define TRANS_FEAT(NAME, FEAT, FUNC, ...) \
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
+ { return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
+
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...) \
+ static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
+ { \
+ s->is_nonstreaming = true; \
+ return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); \
+ }
+
+#endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
new file mode 100644
index 0000000000..1f93510b85
--- /dev/null
+++ b/target/arm/tcg/vec_helper.c
@@ -0,0 +1,2636 @@
+/*
+ * ARM AdvSIMD / SVE Vector Operations
+ *
+ * Copyright (c) 2018 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "fpu/softfloat.h"
+#include "qemu/int128.h"
+#include "crypto/clmul.h"
+#include "vec_internal.h"
+
+/*
+ * Data for expanding active predicate bits to bytes, for byte elements.
+ *
+ * for (i = 0; i < 256; ++i) {
+ * unsigned long m = 0;
+ * for (j = 0; j < 8; j++) {
+ * if ((i >> j) & 1) {
+ * m |= 0xfful << (j << 3);
+ * }
+ * }
+ * printf("0x%016lx,\n", m);
+ * }
+ */
+const uint64_t expand_pred_b_data[256] = {
+ 0x0000000000000000, 0x00000000000000ff, 0x000000000000ff00,
+ 0x000000000000ffff, 0x0000000000ff0000, 0x0000000000ff00ff,
+ 0x0000000000ffff00, 0x0000000000ffffff, 0x00000000ff000000,
+ 0x00000000ff0000ff, 0x00000000ff00ff00, 0x00000000ff00ffff,
+ 0x00000000ffff0000, 0x00000000ffff00ff, 0x00000000ffffff00,
+ 0x00000000ffffffff, 0x000000ff00000000, 0x000000ff000000ff,
+ 0x000000ff0000ff00, 0x000000ff0000ffff, 0x000000ff00ff0000,
+ 0x000000ff00ff00ff, 0x000000ff00ffff00, 0x000000ff00ffffff,
+ 0x000000ffff000000, 0x000000ffff0000ff, 0x000000ffff00ff00,
+ 0x000000ffff00ffff, 0x000000ffffff0000, 0x000000ffffff00ff,
+ 0x000000ffffffff00, 0x000000ffffffffff, 0x0000ff0000000000,
+ 0x0000ff00000000ff, 0x0000ff000000ff00, 0x0000ff000000ffff,
+ 0x0000ff0000ff0000, 0x0000ff0000ff00ff, 0x0000ff0000ffff00,
+ 0x0000ff0000ffffff, 0x0000ff00ff000000, 0x0000ff00ff0000ff,
+ 0x0000ff00ff00ff00, 0x0000ff00ff00ffff, 0x0000ff00ffff0000,
+ 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0x0000ff00ffffffff,
+ 0x0000ffff00000000, 0x0000ffff000000ff, 0x0000ffff0000ff00,
+ 0x0000ffff0000ffff, 0x0000ffff00ff0000, 0x0000ffff00ff00ff,
+ 0x0000ffff00ffff00, 0x0000ffff00ffffff, 0x0000ffffff000000,
+ 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0x0000ffffff00ffff,
+ 0x0000ffffffff0000, 0x0000ffffffff00ff, 0x0000ffffffffff00,
+ 0x0000ffffffffffff, 0x00ff000000000000, 0x00ff0000000000ff,
+ 0x00ff00000000ff00, 0x00ff00000000ffff, 0x00ff000000ff0000,
+ 0x00ff000000ff00ff, 0x00ff000000ffff00, 0x00ff000000ffffff,
+ 0x00ff0000ff000000, 0x00ff0000ff0000ff, 0x00ff0000ff00ff00,
+ 0x00ff0000ff00ffff, 0x00ff0000ffff0000, 0x00ff0000ffff00ff,
+ 0x00ff0000ffffff00, 0x00ff0000ffffffff, 0x00ff00ff00000000,
+ 0x00ff00ff000000ff, 0x00ff00ff0000ff00, 0x00ff00ff0000ffff,
+ 0x00ff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
+ 0x00ff00ff00ffffff, 0x00ff00ffff000000, 0x00ff00ffff0000ff,
+ 0x00ff00ffff00ff00, 0x00ff00ffff00ffff, 0x00ff00ffffff0000,
+ 0x00ff00ffffff00ff, 0x00ff00ffffffff00, 0x00ff00ffffffffff,
+ 0x00ffff0000000000, 0x00ffff00000000ff, 0x00ffff000000ff00,
+ 0x00ffff000000ffff, 0x00ffff0000ff0000, 0x00ffff0000ff00ff,
+ 0x00ffff0000ffff00, 0x00ffff0000ffffff, 0x00ffff00ff000000,
+ 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0x00ffff00ff00ffff,
+ 0x00ffff00ffff0000, 0x00ffff00ffff00ff, 0x00ffff00ffffff00,
+ 0x00ffff00ffffffff, 0x00ffffff00000000, 0x00ffffff000000ff,
+ 0x00ffffff0000ff00, 0x00ffffff0000ffff, 0x00ffffff00ff0000,
+ 0x00ffffff00ff00ff, 0x00ffffff00ffff00, 0x00ffffff00ffffff,
+ 0x00ffffffff000000, 0x00ffffffff0000ff, 0x00ffffffff00ff00,
+ 0x00ffffffff00ffff, 0x00ffffffffff0000, 0x00ffffffffff00ff,
+ 0x00ffffffffffff00, 0x00ffffffffffffff, 0xff00000000000000,
+ 0xff000000000000ff, 0xff0000000000ff00, 0xff0000000000ffff,
+ 0xff00000000ff0000, 0xff00000000ff00ff, 0xff00000000ffff00,
+ 0xff00000000ffffff, 0xff000000ff000000, 0xff000000ff0000ff,
+ 0xff000000ff00ff00, 0xff000000ff00ffff, 0xff000000ffff0000,
+ 0xff000000ffff00ff, 0xff000000ffffff00, 0xff000000ffffffff,
+ 0xff0000ff00000000, 0xff0000ff000000ff, 0xff0000ff0000ff00,
+ 0xff0000ff0000ffff, 0xff0000ff00ff0000, 0xff0000ff00ff00ff,
+ 0xff0000ff00ffff00, 0xff0000ff00ffffff, 0xff0000ffff000000,
+ 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0xff0000ffff00ffff,
+ 0xff0000ffffff0000, 0xff0000ffffff00ff, 0xff0000ffffffff00,
+ 0xff0000ffffffffff, 0xff00ff0000000000, 0xff00ff00000000ff,
+ 0xff00ff000000ff00, 0xff00ff000000ffff, 0xff00ff0000ff0000,
+ 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0xff00ff0000ffffff,
+ 0xff00ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00,
+ 0xff00ff00ff00ffff, 0xff00ff00ffff0000, 0xff00ff00ffff00ff,
+ 0xff00ff00ffffff00, 0xff00ff00ffffffff, 0xff00ffff00000000,
+ 0xff00ffff000000ff, 0xff00ffff0000ff00, 0xff00ffff0000ffff,
+ 0xff00ffff00ff0000, 0xff00ffff00ff00ff, 0xff00ffff00ffff00,
+ 0xff00ffff00ffffff, 0xff00ffffff000000, 0xff00ffffff0000ff,
+ 0xff00ffffff00ff00, 0xff00ffffff00ffff, 0xff00ffffffff0000,
+ 0xff00ffffffff00ff, 0xff00ffffffffff00, 0xff00ffffffffffff,
+ 0xffff000000000000, 0xffff0000000000ff, 0xffff00000000ff00,
+ 0xffff00000000ffff, 0xffff000000ff0000, 0xffff000000ff00ff,
+ 0xffff000000ffff00, 0xffff000000ffffff, 0xffff0000ff000000,
+ 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0xffff0000ff00ffff,
+ 0xffff0000ffff0000, 0xffff0000ffff00ff, 0xffff0000ffffff00,
+ 0xffff0000ffffffff, 0xffff00ff00000000, 0xffff00ff000000ff,
+ 0xffff00ff0000ff00, 0xffff00ff0000ffff, 0xffff00ff00ff0000,
+ 0xffff00ff00ff00ff, 0xffff00ff00ffff00, 0xffff00ff00ffffff,
+ 0xffff00ffff000000, 0xffff00ffff0000ff, 0xffff00ffff00ff00,
+ 0xffff00ffff00ffff, 0xffff00ffffff0000, 0xffff00ffffff00ff,
+ 0xffff00ffffffff00, 0xffff00ffffffffff, 0xffffff0000000000,
+ 0xffffff00000000ff, 0xffffff000000ff00, 0xffffff000000ffff,
+ 0xffffff0000ff0000, 0xffffff0000ff00ff, 0xffffff0000ffff00,
+ 0xffffff0000ffffff, 0xffffff00ff000000, 0xffffff00ff0000ff,
+ 0xffffff00ff00ff00, 0xffffff00ff00ffff, 0xffffff00ffff0000,
+ 0xffffff00ffff00ff, 0xffffff00ffffff00, 0xffffff00ffffffff,
+ 0xffffffff00000000, 0xffffffff000000ff, 0xffffffff0000ff00,
+ 0xffffffff0000ffff, 0xffffffff00ff0000, 0xffffffff00ff00ff,
+ 0xffffffff00ffff00, 0xffffffff00ffffff, 0xffffffffff000000,
+ 0xffffffffff0000ff, 0xffffffffff00ff00, 0xffffffffff00ffff,
+ 0xffffffffffff0000, 0xffffffffffff00ff, 0xffffffffffffff00,
+ 0xffffffffffffffff,
+};
+
+/*
+ * Similarly for half-word elements.
+ * for (i = 0; i < 256; ++i) {
+ * unsigned long m = 0;
+ * if (i & 0xaa) {
+ * continue;
+ * }
+ * for (j = 0; j < 8; j += 2) {
+ * if ((i >> j) & 1) {
+ * m |= 0xfffful << (j << 3);
+ * }
+ * }
+ * printf("[0x%x] = 0x%016lx,\n", i, m);
+ * }
+ */
+const uint64_t expand_pred_h_data[0x55 + 1] = {
+ [0x01] = 0x000000000000ffff, [0x04] = 0x00000000ffff0000,
+ [0x05] = 0x00000000ffffffff, [0x10] = 0x0000ffff00000000,
+ [0x11] = 0x0000ffff0000ffff, [0x14] = 0x0000ffffffff0000,
+ [0x15] = 0x0000ffffffffffff, [0x40] = 0xffff000000000000,
+ [0x41] = 0xffff00000000ffff, [0x44] = 0xffff0000ffff0000,
+ [0x45] = 0xffff0000ffffffff, [0x50] = 0xffffffff00000000,
+ [0x51] = 0xffffffff0000ffff, [0x54] = 0xffffffffffff0000,
+ [0x55] = 0xffffffffffffffff,
+};
+
+/* Signed saturating rounding doubling multiply-accumulate high half, 8-bit */
+int8_t do_sqrdmlah_b(int8_t src1, int8_t src2, int8_t src3,
+ bool neg, bool round)
+{
+ /*
+ * Simplify:
+ * = ((a3 << 8) + ((e1 * e2) << 1) + (round << 7)) >> 8
+ * = ((a3 << 7) + (e1 * e2) + (round << 6)) >> 7
+ */
+ int32_t ret = (int32_t)src1 * src2;
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int32_t)src3 << 7) + (round << 6);
+ ret >>= 7;
+
+ if (ret != (int8_t)ret) {
+ ret = (ret < 0 ? INT8_MIN : INT8_MAX);
+ }
+ return ret;
+}
+
+void HELPER(sve2_sqrdmlah_b)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm, *a = va;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = do_sqrdmlah_b(n[i], m[i], a[i], false, true);
+ }
+}
+
+void HELPER(sve2_sqrdmlsh_b)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm, *a = va;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = do_sqrdmlah_b(n[i], m[i], a[i], true, true);
+ }
+}
+
+void HELPER(sve2_sqdmulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = do_sqrdmlah_b(n[i], m[i], 0, false, false);
+ }
+}
+
+void HELPER(sve2_sqrdmulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = do_sqrdmlah_b(n[i], m[i], 0, false, true);
+ }
+}
+
+/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
+int16_t do_sqrdmlah_h(int16_t src1, int16_t src2, int16_t src3,
+ bool neg, bool round, uint32_t *sat)
+{
+ /* Simplify similarly to do_sqrdmlah_b above. */
+ int32_t ret = (int32_t)src1 * src2;
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int32_t)src3 << 15) + (round << 14);
+ ret >>= 15;
+
+ if (ret != (int16_t)ret) {
+ *sat = 1;
+ ret = (ret < 0 ? INT16_MIN : INT16_MAX);
+ }
+ return ret;
+}
+
+uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
+ uint32_t src2, uint32_t src3)
+{
+ uint32_t *sat = &env->vfp.qc[0];
+ uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, false, true, sat);
+ uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+ false, true, sat);
+ return deposit32(e1, 16, 16, e2);
+}
+
+void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ int16_t *d = vd;
+ int16_t *n = vn;
+ int16_t *m = vm;
+ uintptr_t i;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], d[i], false, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
+ uint32_t src2, uint32_t src3)
+{
+ uint32_t *sat = &env->vfp.qc[0];
+ uint16_t e1 = do_sqrdmlah_h(src1, src2, src3, true, true, sat);
+ uint16_t e2 = do_sqrdmlah_h(src1 >> 16, src2 >> 16, src3 >> 16,
+ true, true, sat);
+ return deposit32(e1, 16, 16, e2);
+}
+
+void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ int16_t *d = vd;
+ int16_t *n = vn;
+ int16_t *m = vm;
+ uintptr_t i;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], d[i], true, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm, *a = va;
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], a[i], false, true, &discard);
+ }
+}
+
+void HELPER(sve2_sqrdmlsh_h)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm, *a = va;
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], a[i], true, true, &discard);
+ }
+}
+
+void HELPER(sve2_sqdmulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, &discard);
+ }
+}
+
+void HELPER(sve2_sqrdmulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, &discard);
+ }
+}
+
+void HELPER(sve2_sqdmulh_idx_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+ int16_t mm = m[i];
+ for (j = 0; j < 16 / 2; ++j) {
+ d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, &discard);
+ }
+ }
+}
+
+void HELPER(sve2_sqrdmulh_idx_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+ int16_t mm = m[i];
+ for (j = 0; j < 16 / 2; ++j) {
+ d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, &discard);
+ }
+ }
+}
+
+/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
+int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3,
+ bool neg, bool round, uint32_t *sat)
+{
+ /* Simplify similarly to do_sqrdmlah_b above. */
+ int64_t ret = (int64_t)src1 * src2;
+ if (neg) {
+ ret = -ret;
+ }
+ ret += ((int64_t)src3 << 31) + (round << 30);
+ ret >>= 31;
+
+ if (ret != (int32_t)ret) {
+ *sat = 1;
+ ret = (ret < 0 ? INT32_MIN : INT32_MAX);
+ }
+ return ret;
+}
+
+uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
+ int32_t src2, int32_t src3)
+{
+ uint32_t *sat = &env->vfp.qc[0];
+ return do_sqrdmlah_s(src1, src2, src3, false, true, sat);
+}
+
+void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ int32_t *d = vd;
+ int32_t *n = vn;
+ int32_t *m = vm;
+ uintptr_t i;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], d[i], false, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
+ int32_t src2, int32_t src3)
+{
+ uint32_t *sat = &env->vfp.qc[0];
+ return do_sqrdmlah_s(src1, src2, src3, true, true, sat);
+}
+
+void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ int32_t *d = vd;
+ int32_t *n = vn;
+ int32_t *m = vm;
+ uintptr_t i;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], d[i], true, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm, *a = va;
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], a[i], false, true, &discard);
+ }
+}
+
+void HELPER(sve2_sqrdmlsh_s)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm, *a = va;
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], a[i], true, true, &discard);
+ }
+}
+
+void HELPER(sve2_sqdmulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, &discard);
+ }
+}
+
+void HELPER(sve2_sqrdmulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, &discard);
+ }
+}
+
+void HELPER(sve2_sqdmulh_idx_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+ int32_t mm = m[i];
+ for (j = 0; j < 16 / 4; ++j) {
+ d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, &discard);
+ }
+ }
+}
+
+void HELPER(sve2_sqrdmulh_idx_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+ uint32_t discard;
+
+ for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+ int32_t mm = m[i];
+ for (j = 0; j < 16 / 4; ++j) {
+ d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, &discard);
+ }
+ }
+}
+
+/* Signed saturating rounding doubling multiply-accumulate high half, 64-bit */
+static int64_t do_sat128_d(Int128 r)
+{
+ int64_t ls = int128_getlo(r);
+ int64_t hs = int128_gethi(r);
+
+ if (unlikely(hs != (ls >> 63))) {
+ return hs < 0 ? INT64_MIN : INT64_MAX;
+ }
+ return ls;
+}
+
+int64_t do_sqrdmlah_d(int64_t n, int64_t m, int64_t a, bool neg, bool round)
+{
+ uint64_t l, h;
+ Int128 r, t;
+
+ /* As in do_sqrdmlah_b, but with 128-bit arithmetic. */
+ muls64(&l, &h, m, n);
+ r = int128_make128(l, h);
+ if (neg) {
+ r = int128_neg(r);
+ }
+ if (a) {
+ t = int128_exts64(a);
+ t = int128_lshift(t, 63);
+ r = int128_add(r, t);
+ }
+ if (round) {
+ t = int128_exts64(1ll << 62);
+ r = int128_add(r, t);
+ }
+ r = int128_rshift(r, 63);
+
+ return do_sat128_d(r);
+}
+
+void HELPER(sve2_sqrdmlah_d)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int64_t *d = vd, *n = vn, *m = vm, *a = va;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = do_sqrdmlah_d(n[i], m[i], a[i], false, true);
+ }
+}
+
+void HELPER(sve2_sqrdmlsh_d)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int64_t *d = vd, *n = vn, *m = vm, *a = va;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = do_sqrdmlah_d(n[i], m[i], a[i], true, true);
+ }
+}
+
+void HELPER(sve2_sqdmulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = do_sqrdmlah_d(n[i], m[i], 0, false, false);
+ }
+}
+
+void HELPER(sve2_sqrdmulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = do_sqrdmlah_d(n[i], m[i], 0, false, true);
+ }
+}
+
+void HELPER(sve2_sqdmulh_idx_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int64_t *d = vd, *n = vn, *m = (int64_t *)vm + idx;
+
+ for (i = 0; i < opr_sz / 8; i += 16 / 8) {
+ int64_t mm = m[i];
+ for (j = 0; j < 16 / 8; ++j) {
+ d[i + j] = do_sqrdmlah_d(n[i + j], mm, 0, false, false);
+ }
+ }
+}
+
+void HELPER(sve2_sqrdmulh_idx_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int64_t *d = vd, *n = vn, *m = (int64_t *)vm + idx;
+
+ for (i = 0; i < opr_sz / 8; i += 16 / 8) {
+ int64_t mm = m[i];
+ for (j = 0; j < 16 / 8; ++j) {
+ d[i + j] = do_sqrdmlah_d(n[i + j], mm, 0, false, true);
+ }
+ }
+}
+
+/* Integer 8 and 16-bit dot-product.
+ *
+ * Note that for the loops herein, host endianness does not matter
+ * with respect to the ordering of data within the quad-width lanes.
+ * All elements are treated equally, no matter where they are.
+ */
+
+#define DO_DOT(NAME, TYPED, TYPEN, TYPEM) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPED *d = vd, *a = va; \
+ TYPEN *n = vn; \
+ TYPEM *m = vm; \
+ for (i = 0; i < opr_sz / sizeof(TYPED); ++i) { \
+ d[i] = (a[i] + \
+ (TYPED)n[i * 4 + 0] * m[i * 4 + 0] + \
+ (TYPED)n[i * 4 + 1] * m[i * 4 + 1] + \
+ (TYPED)n[i * 4 + 2] * m[i * 4 + 2] + \
+ (TYPED)n[i * 4 + 3] * m[i * 4 + 3]); \
+ } \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
+DO_DOT(gvec_sdot_b, int32_t, int8_t, int8_t)
+DO_DOT(gvec_udot_b, uint32_t, uint8_t, uint8_t)
+DO_DOT(gvec_usdot_b, uint32_t, uint8_t, int8_t)
+DO_DOT(gvec_sdot_h, int64_t, int16_t, int16_t)
+DO_DOT(gvec_udot_h, uint64_t, uint16_t, uint16_t)
+
+#define DO_DOT_IDX(NAME, TYPED, TYPEN, TYPEM, HD) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i = 0, opr_sz = simd_oprsz(desc); \
+ intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \
+ intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \
+ intptr_t index = simd_data(desc); \
+ TYPED *d = vd, *a = va; \
+ TYPEN *n = vn; \
+ TYPEM *m_indexed = (TYPEM *)vm + HD(index) * 4; \
+ do { \
+ TYPED m0 = m_indexed[i * 4 + 0]; \
+ TYPED m1 = m_indexed[i * 4 + 1]; \
+ TYPED m2 = m_indexed[i * 4 + 2]; \
+ TYPED m3 = m_indexed[i * 4 + 3]; \
+ do { \
+ d[i] = (a[i] + \
+ n[i * 4 + 0] * m0 + \
+ n[i * 4 + 1] * m1 + \
+ n[i * 4 + 2] * m2 + \
+ n[i * 4 + 3] * m3); \
+ } while (++i < segend); \
+ segend = i + 4; \
+ } while (i < opr_sz_n); \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
+DO_DOT_IDX(gvec_sdot_idx_b, int32_t, int8_t, int8_t, H4)
+DO_DOT_IDX(gvec_udot_idx_b, uint32_t, uint8_t, uint8_t, H4)
+DO_DOT_IDX(gvec_sudot_idx_b, int32_t, int8_t, uint8_t, H4)
+DO_DOT_IDX(gvec_usdot_idx_b, int32_t, uint8_t, int8_t, H4)
+DO_DOT_IDX(gvec_sdot_idx_h, int64_t, int16_t, int16_t, H8)
+DO_DOT_IDX(gvec_udot_idx_h, uint64_t, uint16_t, uint16_t, H8)
+
+void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
+ void *vfpst, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ float16 *d = vd;
+ float16 *n = vn;
+ float16 *m = vm;
+ float_status *fpst = vfpst;
+ uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t neg_imag = neg_real ^ 1;
+ uintptr_t i;
+
+ /* Shift boolean to the sign bit so we can xor to negate. */
+ neg_real <<= 15;
+ neg_imag <<= 15;
+
+ for (i = 0; i < opr_sz / 2; i += 2) {
+ float16 e0 = n[H2(i)];
+ float16 e1 = m[H2(i + 1)] ^ neg_imag;
+ float16 e2 = n[H2(i + 1)];
+ float16 e3 = m[H2(i)] ^ neg_real;
+
+ d[H2(i)] = float16_add(e0, e1, fpst);
+ d[H2(i + 1)] = float16_add(e2, e3, fpst);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm,
+ void *vfpst, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ float32 *d = vd;
+ float32 *n = vn;
+ float32 *m = vm;
+ float_status *fpst = vfpst;
+ uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t neg_imag = neg_real ^ 1;
+ uintptr_t i;
+
+ /* Shift boolean to the sign bit so we can xor to negate. */
+ neg_real <<= 31;
+ neg_imag <<= 31;
+
+ for (i = 0; i < opr_sz / 4; i += 2) {
+ float32 e0 = n[H4(i)];
+ float32 e1 = m[H4(i + 1)] ^ neg_imag;
+ float32 e2 = n[H4(i + 1)];
+ float32 e3 = m[H4(i)] ^ neg_real;
+
+ d[H4(i)] = float32_add(e0, e1, fpst);
+ d[H4(i + 1)] = float32_add(e2, e3, fpst);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
+ void *vfpst, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ float64 *d = vd;
+ float64 *n = vn;
+ float64 *m = vm;
+ float_status *fpst = vfpst;
+ uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t neg_imag = neg_real ^ 1;
+ uintptr_t i;
+
+ /* Shift boolean to the sign bit so we can xor to negate. */
+ neg_real <<= 63;
+ neg_imag <<= 63;
+
+ for (i = 0; i < opr_sz / 8; i += 2) {
+ float64 e0 = n[i];
+ float64 e1 = m[i + 1] ^ neg_imag;
+ float64 e2 = n[i + 1];
+ float64 e3 = m[i] ^ neg_real;
+
+ d[i] = float64_add(e0, e1, fpst);
+ d[i + 1] = float64_add(e2, e3, fpst);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va,
+ void *vfpst, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ float16 *d = vd, *n = vn, *m = vm, *a = va;
+ float_status *fpst = vfpst;
+ intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t neg_real = flip ^ neg_imag;
+ uintptr_t i;
+
+ /* Shift boolean to the sign bit so we can xor to negate. */
+ neg_real <<= 15;
+ neg_imag <<= 15;
+
+ for (i = 0; i < opr_sz / 2; i += 2) {
+ float16 e2 = n[H2(i + flip)];
+ float16 e1 = m[H2(i + flip)] ^ neg_real;
+ float16 e4 = e2;
+ float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
+
+ d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst);
+ d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
+ void *vfpst, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ float16 *d = vd, *n = vn, *m = vm, *a = va;
+ float_status *fpst = vfpst;
+ intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
+ uint32_t neg_real = flip ^ neg_imag;
+ intptr_t elements = opr_sz / sizeof(float16);
+ intptr_t eltspersegment = 16 / sizeof(float16);
+ intptr_t i, j;
+
+ /* Shift boolean to the sign bit so we can xor to negate. */
+ neg_real <<= 15;
+ neg_imag <<= 15;
+
+ for (i = 0; i < elements; i += eltspersegment) {
+ float16 mr = m[H2(i + 2 * index + 0)];
+ float16 mi = m[H2(i + 2 * index + 1)];
+ float16 e1 = neg_real ^ (flip ? mi : mr);
+ float16 e3 = neg_imag ^ (flip ? mr : mi);
+
+ for (j = i; j < i + eltspersegment; j += 2) {
+ float16 e2 = n[H2(j + flip)];
+ float16 e4 = e2;
+
+ d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
+ d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va,
+ void *vfpst, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ float32 *d = vd, *n = vn, *m = vm, *a = va;
+ float_status *fpst = vfpst;
+ intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t neg_real = flip ^ neg_imag;
+ uintptr_t i;
+
+ /* Shift boolean to the sign bit so we can xor to negate. */
+ neg_real <<= 31;
+ neg_imag <<= 31;
+
+ for (i = 0; i < opr_sz / 4; i += 2) {
+ float32 e2 = n[H4(i + flip)];
+ float32 e1 = m[H4(i + flip)] ^ neg_real;
+ float32 e4 = e2;
+ float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
+
+ d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst);
+ d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
+ void *vfpst, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ float32 *d = vd, *n = vn, *m = vm, *a = va;
+ float_status *fpst = vfpst;
+ intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
+ uint32_t neg_real = flip ^ neg_imag;
+ intptr_t elements = opr_sz / sizeof(float32);
+ intptr_t eltspersegment = 16 / sizeof(float32);
+ intptr_t i, j;
+
+ /* Shift boolean to the sign bit so we can xor to negate. */
+ neg_real <<= 31;
+ neg_imag <<= 31;
+
+ for (i = 0; i < elements; i += eltspersegment) {
+ float32 mr = m[H4(i + 2 * index + 0)];
+ float32 mi = m[H4(i + 2 * index + 1)];
+ float32 e1 = neg_real ^ (flip ? mi : mr);
+ float32 e3 = neg_imag ^ (flip ? mr : mi);
+
+ for (j = i; j < i + eltspersegment; j += 2) {
+ float32 e2 = n[H4(j + flip)];
+ float32 e4 = e2;
+
+ d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
+ d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va,
+ void *vfpst, uint32_t desc)
+{
+ uintptr_t opr_sz = simd_oprsz(desc);
+ float64 *d = vd, *n = vn, *m = vm, *a = va;
+ float_status *fpst = vfpst;
+ intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint64_t neg_real = flip ^ neg_imag;
+ uintptr_t i;
+
+ /* Shift boolean to the sign bit so we can xor to negate. */
+ neg_real <<= 63;
+ neg_imag <<= 63;
+
+ for (i = 0; i < opr_sz / 8; i += 2) {
+ float64 e2 = n[i + flip];
+ float64 e1 = m[i + flip] ^ neg_real;
+ float64 e4 = e2;
+ float64 e3 = m[i + 1 - flip] ^ neg_imag;
+
+ d[i] = float64_muladd(e2, e1, a[i], 0, fpst);
+ d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+/*
+ * Floating point comparisons producing an integer result (all 1s or all 0s).
+ * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
+ * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
+ */
+static uint16_t float16_ceq(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_eq_quiet(op1, op2, stat);
+}
+
+static uint32_t float32_ceq(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_eq_quiet(op1, op2, stat);
+}
+
+static uint16_t float16_cge(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_le(op2, op1, stat);
+}
+
+static uint32_t float32_cge(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_le(op2, op1, stat);
+}
+
+static uint16_t float16_cgt(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_lt(op2, op1, stat);
+}
+
+static uint32_t float32_cgt(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_lt(op2, op1, stat);
+}
+
+static uint16_t float16_acge(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_le(float16_abs(op2), float16_abs(op1), stat);
+}
+
+static uint32_t float32_acge(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_le(float32_abs(op2), float32_abs(op1), stat);
+}
+
+static uint16_t float16_acgt(float16 op1, float16 op2, float_status *stat)
+{
+ return -float16_lt(float16_abs(op2), float16_abs(op1), stat);
+}
+
+static uint32_t float32_acgt(float32 op1, float32 op2, float_status *stat)
+{
+ return -float32_lt(float32_abs(op2), float32_abs(op1), stat);
+}
+
+static int16_t vfp_tosszh(float16 x, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ if (float16_is_any_nan(x)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_int16_round_to_zero(x, fpst);
+}
+
+static uint16_t vfp_touszh(float16 x, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ if (float16_is_any_nan(x)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_uint16_round_to_zero(x, fpst);
+}
+
+#define DO_2OP(NAME, FUNC, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], stat); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
+DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32)
+DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64)
+
+DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
+DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
+DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
+
+DO_2OP(gvec_vrintx_h, float16_round_to_int, float16)
+DO_2OP(gvec_vrintx_s, float32_round_to_int, float32)
+
+DO_2OP(gvec_sitos, helper_vfp_sitos, int32_t)
+DO_2OP(gvec_uitos, helper_vfp_uitos, uint32_t)
+DO_2OP(gvec_tosizs, helper_vfp_tosizs, float32)
+DO_2OP(gvec_touizs, helper_vfp_touizs, float32)
+DO_2OP(gvec_sstoh, int16_to_float16, int16_t)
+DO_2OP(gvec_ustoh, uint16_to_float16, uint16_t)
+DO_2OP(gvec_tosszh, vfp_tosszh, float16)
+DO_2OP(gvec_touszh, vfp_touszh, float16)
+
+#define WRAP_CMP0_FWD(FN, CMPOP, TYPE) \
+ static TYPE TYPE##_##FN##0(TYPE op, float_status *stat) \
+ { \
+ return TYPE##_##CMPOP(op, TYPE##_zero, stat); \
+ }
+
+#define WRAP_CMP0_REV(FN, CMPOP, TYPE) \
+ static TYPE TYPE##_##FN##0(TYPE op, float_status *stat) \
+ { \
+ return TYPE##_##CMPOP(TYPE##_zero, op, stat); \
+ }
+
+#define DO_2OP_CMP0(FN, CMPOP, DIRN) \
+ WRAP_CMP0_##DIRN(FN, CMPOP, float16) \
+ WRAP_CMP0_##DIRN(FN, CMPOP, float32) \
+ DO_2OP(gvec_f##FN##0_h, float16_##FN##0, float16) \
+ DO_2OP(gvec_f##FN##0_s, float32_##FN##0, float32)
+
+DO_2OP_CMP0(cgt, cgt, FWD)
+DO_2OP_CMP0(cge, cge, FWD)
+DO_2OP_CMP0(ceq, ceq, FWD)
+DO_2OP_CMP0(clt, cgt, REV)
+DO_2OP_CMP0(cle, cge, REV)
+
+#undef DO_2OP
+#undef DO_2OP_CMP0
+
+/* Floating-point trigonometric starting value.
+ * See the ARM ARM pseudocode function FPTrigSMul.
+ */
+static float16 float16_ftsmul(float16 op1, uint16_t op2, float_status *stat)
+{
+ float16 result = float16_mul(op1, op1, stat);
+ if (!float16_is_any_nan(result)) {
+ result = float16_set_sign(result, op2 & 1);
+ }
+ return result;
+}
+
+static float32 float32_ftsmul(float32 op1, uint32_t op2, float_status *stat)
+{
+ float32 result = float32_mul(op1, op1, stat);
+ if (!float32_is_any_nan(result)) {
+ result = float32_set_sign(result, op2 & 1);
+ }
+ return result;
+}
+
+static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
+{
+ float64 result = float64_mul(op1, op1, stat);
+ if (!float64_is_any_nan(result)) {
+ result = float64_set_sign(result, op2 & 1);
+ }
+ return result;
+}
+
+static float16 float16_abd(float16 op1, float16 op2, float_status *stat)
+{
+ return float16_abs(float16_sub(op1, op2, stat));
+}
+
+static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
+{
+ return float32_abs(float32_sub(op1, op2, stat));
+}
+
+/*
+ * Reciprocal step. These are the AArch32 version which uses a
+ * non-fused multiply-and-subtract.
+ */
+static float16 float16_recps_nf(float16 op1, float16 op2, float_status *stat)
+{
+ op1 = float16_squash_input_denormal(op1, stat);
+ op2 = float16_squash_input_denormal(op2, stat);
+
+ if ((float16_is_infinity(op1) && float16_is_zero(op2)) ||
+ (float16_is_infinity(op2) && float16_is_zero(op1))) {
+ return float16_two;
+ }
+ return float16_sub(float16_two, float16_mul(op1, op2, stat), stat);
+}
+
+static float32 float32_recps_nf(float32 op1, float32 op2, float_status *stat)
+{
+ op1 = float32_squash_input_denormal(op1, stat);
+ op2 = float32_squash_input_denormal(op2, stat);
+
+ if ((float32_is_infinity(op1) && float32_is_zero(op2)) ||
+ (float32_is_infinity(op2) && float32_is_zero(op1))) {
+ return float32_two;
+ }
+ return float32_sub(float32_two, float32_mul(op1, op2, stat), stat);
+}
+
+/* Reciprocal square-root step. AArch32 non-fused semantics. */
+static float16 float16_rsqrts_nf(float16 op1, float16 op2, float_status *stat)
+{
+ op1 = float16_squash_input_denormal(op1, stat);
+ op2 = float16_squash_input_denormal(op2, stat);
+
+ if ((float16_is_infinity(op1) && float16_is_zero(op2)) ||
+ (float16_is_infinity(op2) && float16_is_zero(op1))) {
+ return float16_one_point_five;
+ }
+ op1 = float16_sub(float16_three, float16_mul(op1, op2, stat), stat);
+ return float16_div(op1, float16_two, stat);
+}
+
+static float32 float32_rsqrts_nf(float32 op1, float32 op2, float_status *stat)
+{
+ op1 = float32_squash_input_denormal(op1, stat);
+ op2 = float32_squash_input_denormal(op2, stat);
+
+ if ((float32_is_infinity(op1) && float32_is_zero(op2)) ||
+ (float32_is_infinity(op2) && float32_is_zero(op1))) {
+ return float32_one_point_five;
+ }
+ op1 = float32_sub(float32_three, float32_mul(op1, op2, stat), stat);
+ return float32_div(op1, float32_two, stat);
+}
+
+#define DO_3OP(NAME, FUNC, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], m[i], stat); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_3OP(gvec_fadd_h, float16_add, float16)
+DO_3OP(gvec_fadd_s, float32_add, float32)
+DO_3OP(gvec_fadd_d, float64_add, float64)
+
+DO_3OP(gvec_fsub_h, float16_sub, float16)
+DO_3OP(gvec_fsub_s, float32_sub, float32)
+DO_3OP(gvec_fsub_d, float64_sub, float64)
+
+DO_3OP(gvec_fmul_h, float16_mul, float16)
+DO_3OP(gvec_fmul_s, float32_mul, float32)
+DO_3OP(gvec_fmul_d, float64_mul, float64)
+
+DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
+DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
+DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
+
+DO_3OP(gvec_fabd_h, float16_abd, float16)
+DO_3OP(gvec_fabd_s, float32_abd, float32)
+
+DO_3OP(gvec_fceq_h, float16_ceq, float16)
+DO_3OP(gvec_fceq_s, float32_ceq, float32)
+
+DO_3OP(gvec_fcge_h, float16_cge, float16)
+DO_3OP(gvec_fcge_s, float32_cge, float32)
+
+DO_3OP(gvec_fcgt_h, float16_cgt, float16)
+DO_3OP(gvec_fcgt_s, float32_cgt, float32)
+
+DO_3OP(gvec_facge_h, float16_acge, float16)
+DO_3OP(gvec_facge_s, float32_acge, float32)
+
+DO_3OP(gvec_facgt_h, float16_acgt, float16)
+DO_3OP(gvec_facgt_s, float32_acgt, float32)
+
+DO_3OP(gvec_fmax_h, float16_max, float16)
+DO_3OP(gvec_fmax_s, float32_max, float32)
+
+DO_3OP(gvec_fmin_h, float16_min, float16)
+DO_3OP(gvec_fmin_s, float32_min, float32)
+
+DO_3OP(gvec_fmaxnum_h, float16_maxnum, float16)
+DO_3OP(gvec_fmaxnum_s, float32_maxnum, float32)
+
+DO_3OP(gvec_fminnum_h, float16_minnum, float16)
+DO_3OP(gvec_fminnum_s, float32_minnum, float32)
+
+DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16)
+DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32)
+
+DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16)
+DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32)
+
+#ifdef TARGET_AARCH64
+
+DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
+DO_3OP(gvec_recps_s, helper_recpsf_f32, float32)
+DO_3OP(gvec_recps_d, helper_recpsf_f64, float64)
+
+DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
+DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
+DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
+
+#endif
+#undef DO_3OP
+
+/* Non-fused multiply-add (unlike float16_muladd etc, which are fused) */
+static float16 float16_muladd_nf(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_add(dest, float16_mul(op1, op2, stat), stat);
+}
+
+static float32 float32_muladd_nf(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_add(dest, float32_mul(op1, op2, stat), stat);
+}
+
+static float16 float16_mulsub_nf(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_sub(dest, float16_mul(op1, op2, stat), stat);
+}
+
+static float32 float32_mulsub_nf(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_sub(dest, float32_mul(op1, op2, stat), stat);
+}
+
+/* Fused versions; these have the semantics Neon VFMA/VFMS want */
+static float16 float16_muladd_f(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_muladd(op1, op2, dest, 0, stat);
+}
+
+static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_muladd(op1, op2, dest, 0, stat);
+}
+
+static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2,
+ float_status *stat)
+{
+ return float16_muladd(float16_chs(op1), op2, dest, 0, stat);
+}
+
+static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2,
+ float_status *stat)
+{
+ return float32_muladd(float32_chs(op1), op2, dest, 0, stat);
+}
+
+#define DO_MULADD(NAME, FUNC, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(d[i], n[i], m[i], stat); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MULADD(gvec_fmla_h, float16_muladd_nf, float16)
+DO_MULADD(gvec_fmla_s, float32_muladd_nf, float32)
+
+DO_MULADD(gvec_fmls_h, float16_mulsub_nf, float16)
+DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32)
+
+DO_MULADD(gvec_vfma_h, float16_muladd_f, float16)
+DO_MULADD(gvec_vfma_s, float32_muladd_f, float32)
+
+DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
+DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
+
+/* For the indexed ops, SVE applies the index per 128-bit vector segment.
+ * For AdvSIMD, there is of course only one such vector segment.
+ */
+
+#define DO_MUL_IDX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = n[i + j] * mm; \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MUL_IDX(gvec_mul_idx_h, uint16_t, H2)
+DO_MUL_IDX(gvec_mul_idx_s, uint32_t, H4)
+DO_MUL_IDX(gvec_mul_idx_d, uint64_t, H8)
+
+#undef DO_MUL_IDX
+
+#define DO_MLA_IDX(NAME, TYPE, OP, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = a[i + j] OP n[i + j] * mm; \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_MLA_IDX(gvec_mla_idx_h, uint16_t, +, H2)
+DO_MLA_IDX(gvec_mla_idx_s, uint32_t, +, H4)
+DO_MLA_IDX(gvec_mla_idx_d, uint64_t, +, H8)
+
+DO_MLA_IDX(gvec_mls_idx_h, uint16_t, -, H2)
+DO_MLA_IDX(gvec_mls_idx_s, uint32_t, -, H4)
+DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8)
+
+#undef DO_MLA_IDX
+
+#define DO_FMUL_IDX(NAME, ADD, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = TYPE##_##ADD(d[i + j], \
+ TYPE##_mul(n[i + j], mm, stat), stat); \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+#define float16_nop(N, M, S) (M)
+#define float32_nop(N, M, S) (M)
+#define float64_nop(N, M, S) (M)
+
+DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2)
+DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4)
+DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, H8)
+
+/*
+ * Non-fused multiply-accumulate operations, for Neon. NB that unlike
+ * the fused ops below they assume accumulate both from and into Vd.
+ */
+DO_FMUL_IDX(gvec_fmla_nf_idx_h, add, float16, H2)
+DO_FMUL_IDX(gvec_fmla_nf_idx_s, add, float32, H4)
+DO_FMUL_IDX(gvec_fmls_nf_idx_h, sub, float16, H2)
+DO_FMUL_IDX(gvec_fmls_nf_idx_s, sub, float32, H4)
+
+#undef float16_nop
+#undef float32_nop
+#undef float64_nop
+#undef DO_FMUL_IDX
+
+#define DO_FMLA_IDX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
+ void *stat, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc); \
+ intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
+ TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
+ intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ op1_neg <<= (8 * sizeof(TYPE) - 1); \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
+ mm, a[i + j], 0, stat); \
+ } \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
+DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
+
+#undef DO_FMLA_IDX
+
+#define DO_SAT(NAME, WTYPE, TYPEN, TYPEM, OP, MIN, MAX) \
+void HELPER(NAME)(void *vd, void *vq, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ TYPEN *d = vd, *n = vn; TYPEM *m = vm; \
+ bool q = false; \
+ for (i = 0; i < oprsz / sizeof(TYPEN); i++) { \
+ WTYPE dd = (WTYPE)n[i] OP m[i]; \
+ if (dd < MIN) { \
+ dd = MIN; \
+ q = true; \
+ } else if (dd > MAX) { \
+ dd = MAX; \
+ q = true; \
+ } \
+ d[i] = dd; \
+ } \
+ if (q) { \
+ uint32_t *qc = vq; \
+ qc[0] = 1; \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SAT(gvec_uqadd_b, int, uint8_t, uint8_t, +, 0, UINT8_MAX)
+DO_SAT(gvec_uqadd_h, int, uint16_t, uint16_t, +, 0, UINT16_MAX)
+DO_SAT(gvec_uqadd_s, int64_t, uint32_t, uint32_t, +, 0, UINT32_MAX)
+
+DO_SAT(gvec_sqadd_b, int, int8_t, int8_t, +, INT8_MIN, INT8_MAX)
+DO_SAT(gvec_sqadd_h, int, int16_t, int16_t, +, INT16_MIN, INT16_MAX)
+DO_SAT(gvec_sqadd_s, int64_t, int32_t, int32_t, +, INT32_MIN, INT32_MAX)
+
+DO_SAT(gvec_uqsub_b, int, uint8_t, uint8_t, -, 0, UINT8_MAX)
+DO_SAT(gvec_uqsub_h, int, uint16_t, uint16_t, -, 0, UINT16_MAX)
+DO_SAT(gvec_uqsub_s, int64_t, uint32_t, uint32_t, -, 0, UINT32_MAX)
+
+DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX)
+DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX)
+DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX)
+
+#undef DO_SAT
+
+void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn,
+ void *vm, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ bool q = false;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ uint64_t nn = n[i], mm = m[i], dd = nn + mm;
+ if (dd < nn) {
+ dd = UINT64_MAX;
+ q = true;
+ }
+ d[i] = dd;
+ }
+ if (q) {
+ uint32_t *qc = vq;
+ qc[0] = 1;
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_uqsub_d)(void *vd, void *vq, void *vn,
+ void *vm, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ bool q = false;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ uint64_t nn = n[i], mm = m[i], dd = nn - mm;
+ if (nn < mm) {
+ dd = 0;
+ q = true;
+ }
+ d[i] = dd;
+ }
+ if (q) {
+ uint32_t *qc = vq;
+ qc[0] = 1;
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sqadd_d)(void *vd, void *vq, void *vn,
+ void *vm, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ int64_t *d = vd, *n = vn, *m = vm;
+ bool q = false;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ int64_t nn = n[i], mm = m[i], dd = nn + mm;
+ if (((dd ^ nn) & ~(nn ^ mm)) & INT64_MIN) {
+ dd = (nn >> 63) ^ ~INT64_MIN;
+ q = true;
+ }
+ d[i] = dd;
+ }
+ if (q) {
+ uint32_t *qc = vq;
+ qc[0] = 1;
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
+ void *vm, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ int64_t *d = vd, *n = vn, *m = vm;
+ bool q = false;
+
+ for (i = 0; i < oprsz / 8; i++) {
+ int64_t nn = n[i], mm = m[i], dd = nn - mm;
+ if (((dd ^ nn) & (nn ^ mm)) & INT64_MIN) {
+ dd = (nn >> 63) ^ ~INT64_MIN;
+ q = true;
+ }
+ d[i] = dd;
+ }
+ if (q) {
+ uint32_t *qc = vq;
+ qc[0] = 1;
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+
+#define DO_SRA(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] += n[i] >> shift; \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SRA(gvec_ssra_b, int8_t)
+DO_SRA(gvec_ssra_h, int16_t)
+DO_SRA(gvec_ssra_s, int32_t)
+DO_SRA(gvec_ssra_d, int64_t)
+
+DO_SRA(gvec_usra_b, uint8_t)
+DO_SRA(gvec_usra_h, uint16_t)
+DO_SRA(gvec_usra_s, uint32_t)
+DO_SRA(gvec_usra_d, uint64_t)
+
+#undef DO_SRA
+
+#define DO_RSHR(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ TYPE tmp = n[i] >> (shift - 1); \
+ d[i] = (tmp >> 1) + (tmp & 1); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_RSHR(gvec_srshr_b, int8_t)
+DO_RSHR(gvec_srshr_h, int16_t)
+DO_RSHR(gvec_srshr_s, int32_t)
+DO_RSHR(gvec_srshr_d, int64_t)
+
+DO_RSHR(gvec_urshr_b, uint8_t)
+DO_RSHR(gvec_urshr_h, uint16_t)
+DO_RSHR(gvec_urshr_s, uint32_t)
+DO_RSHR(gvec_urshr_d, uint64_t)
+
+#undef DO_RSHR
+
+#define DO_RSRA(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ TYPE tmp = n[i] >> (shift - 1); \
+ d[i] += (tmp >> 1) + (tmp & 1); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_RSRA(gvec_srsra_b, int8_t)
+DO_RSRA(gvec_srsra_h, int16_t)
+DO_RSRA(gvec_srsra_s, int32_t)
+DO_RSRA(gvec_srsra_d, int64_t)
+
+DO_RSRA(gvec_ursra_b, uint8_t)
+DO_RSRA(gvec_ursra_h, uint16_t)
+DO_RSRA(gvec_ursra_s, uint32_t)
+DO_RSRA(gvec_ursra_d, uint64_t)
+
+#undef DO_RSRA
+
+#define DO_SRI(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = deposit64(d[i], 0, sizeof(TYPE) * 8 - shift, n[i] >> shift); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SRI(gvec_sri_b, uint8_t)
+DO_SRI(gvec_sri_h, uint16_t)
+DO_SRI(gvec_sri_s, uint32_t)
+DO_SRI(gvec_sri_d, uint64_t)
+
+#undef DO_SRI
+
+#define DO_SLI(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = deposit64(d[i], shift, sizeof(TYPE) * 8 - shift, n[i]); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+}
+
+DO_SLI(gvec_sli_b, uint8_t)
+DO_SLI(gvec_sli_h, uint16_t)
+DO_SLI(gvec_sli_s, uint32_t)
+DO_SLI(gvec_sli_d, uint64_t)
+
+#undef DO_SLI
+
+/*
+ * Convert float16 to float32, raising no exceptions and
+ * preserving exceptional values, including SNaN.
+ * This is effectively an unpack+repack operation.
+ */
+static float32 float16_to_float32_by_bits(uint32_t f16, bool fz16)
+{
+ const int f16_bias = 15;
+ const int f32_bias = 127;
+ uint32_t sign = extract32(f16, 15, 1);
+ uint32_t exp = extract32(f16, 10, 5);
+ uint32_t frac = extract32(f16, 0, 10);
+
+ if (exp == 0x1f) {
+ /* Inf or NaN */
+ exp = 0xff;
+ } else if (exp == 0) {
+ /* Zero or denormal. */
+ if (frac != 0) {
+ if (fz16) {
+ frac = 0;
+ } else {
+ /*
+ * Denormal; these are all normal float32.
+ * Shift the fraction so that the msb is at bit 11,
+ * then remove bit 11 as the implicit bit of the
+ * normalized float32. Note that we still go through
+ * the shift for normal numbers below, to put the
+ * float32 fraction at the right place.
+ */
+ int shift = clz32(frac) - 21;
+ frac = (frac << shift) & 0x3ff;
+ exp = f32_bias - f16_bias - shift + 1;
+ }
+ }
+ } else {
+ /* Normal number; adjust the bias. */
+ exp += f32_bias - f16_bias;
+ }
+ sign <<= 31;
+ exp <<= 23;
+ frac <<= 23 - 10;
+
+ return sign | exp | frac;
+}
+
+static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
+{
+ /*
+ * Branchless load of u32[0], u64[0], u32[1], or u64[1].
+ * Load the 2nd qword iff is_q & is_2.
+ * Shift to the 2nd dword iff !is_q & is_2.
+ * For !is_q & !is_2, the upper bits of the result are garbage.
+ */
+ return ptr[is_q & is_2] >> ((is_2 & ~is_q) << 5);
+}
+
+/*
+ * Note that FMLAL requires oprsz == 8 or oprsz == 16,
+ * as there is not yet SVE versions that might use blocking.
+ */
+
+static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
+ uint32_t desc, bool fz16)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ int is_q = oprsz == 16;
+ uint64_t n_4, m_4;
+
+ /* Pre-load all of the f16 data, avoiding overlap issues. */
+ n_4 = load4_f16(vn, is_q, is_2);
+ m_4 = load4_f16(vm, is_q, is_2);
+
+ /* Negate all inputs for FMLSL at once. */
+ if (is_s) {
+ n_4 ^= 0x8000800080008000ull;
+ }
+
+ for (i = 0; i < oprsz / 4; i++) {
+ float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
+ float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16);
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
+ void *venv, uint32_t desc)
+{
+ CPUARMState *env = venv;
+ do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc,
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+}
+
+void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
+ void *venv, uint32_t desc)
+{
+ CPUARMState *env = venv;
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status, desc,
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+}
+
+void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
+ void *venv, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
+ intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
+ CPUARMState *env = venv;
+ float_status *status = &env->vfp.fp_status;
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
+
+ for (i = 0; i < oprsz; i += sizeof(float32)) {
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
+ float16 mm_16 = *(float16 *)(vm + H1_2(i + sel));
+ float32 nn = float16_to_float32_by_bits(nn_16, fz16);
+ float32 mm = float16_to_float32_by_bits(mm_16, fz16);
+ float32 aa = *(float32 *)(va + H1_4(i));
+
+ *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status);
+ }
+}
+
+static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
+ uint32_t desc, bool fz16)
+{
+ intptr_t i, oprsz = simd_oprsz(desc);
+ int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
+ int is_q = oprsz == 16;
+ uint64_t n_4;
+ float32 m_1;
+
+ /* Pre-load all of the f16 data, avoiding overlap issues. */
+ n_4 = load4_f16(vn, is_q, is_2);
+
+ /* Negate all inputs for FMLSL at once. */
+ if (is_s) {
+ n_4 ^= 0x8000800080008000ull;
+ }
+
+ m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
+
+ for (i = 0; i < oprsz / 4; i++) {
+ float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
+ }
+ clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
+ void *venv, uint32_t desc)
+{
+ CPUARMState *env = venv;
+ do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc,
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+}
+
+void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
+ void *venv, uint32_t desc)
+{
+ CPUARMState *env = venv;
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc,
+ get_flush_inputs_to_zero(&env->vfp.fp_status_f16));
+}
+
+void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
+ void *venv, uint32_t desc)
+{
+ intptr_t i, j, oprsz = simd_oprsz(desc);
+ uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
+ intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
+ intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
+ CPUARMState *env = venv;
+ float_status *status = &env->vfp.fp_status;
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16);
+
+ for (i = 0; i < oprsz; i += 16) {
+ float16 mm_16 = *(float16 *)(vm + i + idx);
+ float32 mm = float16_to_float32_by_bits(mm_16, fz16);
+
+ for (j = 0; j < 16; j += sizeof(float32)) {
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn;
+ float32 nn = float16_to_float32_by_bits(nn_16, fz16);
+ float32 aa = *(float32 *)(va + H1_4(i + j));
+
+ *(float32 *)(vd + H1_4(i + j)) =
+ float32_muladd(nn, mm, aa, 0, status);
+ }
+ }
+}
+
+void HELPER(gvec_sshl_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ int8_t mm = m[i];
+ int8_t nn = n[i];
+ int8_t res = 0;
+ if (mm >= 0) {
+ if (mm < 8) {
+ res = nn << mm;
+ }
+ } else {
+ res = nn >> (mm > -8 ? -mm : 7);
+ }
+ d[i] = res;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sshl_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ int8_t mm = m[i]; /* only 8 bits of shift are significant */
+ int16_t nn = n[i];
+ int16_t res = 0;
+ if (mm >= 0) {
+ if (mm < 16) {
+ res = nn << mm;
+ }
+ } else {
+ res = nn >> (mm > -16 ? -mm : 15);
+ }
+ d[i] = res;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_ushl_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ int8_t mm = m[i];
+ uint8_t nn = n[i];
+ uint8_t res = 0;
+ if (mm >= 0) {
+ if (mm < 8) {
+ res = nn << mm;
+ }
+ } else {
+ if (mm > -8) {
+ res = nn >> -mm;
+ }
+ }
+ d[i] = res;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ int8_t mm = m[i]; /* only 8 bits of shift are significant */
+ uint16_t nn = n[i];
+ uint16_t res = 0;
+ if (mm >= 0) {
+ if (mm < 16) {
+ res = nn << mm;
+ }
+ } else {
+ if (mm > -16) {
+ res = nn >> -mm;
+ }
+ }
+ d[i] = res;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+/*
+ * 8x8->8 polynomial multiply.
+ *
+ * Polynomial multiplication is like integer multiplication except the
+ * partial products are XORed, not added.
+ *
+ * TODO: expose this as a generic vector operation, as it is a common
+ * crypto building block.
+ */
+void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = clmul_8x8_low(n[i], m[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+/*
+ * 64x64->128 polynomial multiply.
+ * Because of the lanes are not accessed in strict columns,
+ * this probably cannot be turned into a generic helper.
+ */
+void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ intptr_t hi = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; i += 2) {
+ Int128 r = clmul_64(n[i + hi], m[i + hi]);
+ d[i] = int128_getlo(r);
+ d[i + 1] = int128_gethi(r);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ int hi = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t nn = n[hi], mm = m[hi];
+
+ d[0] = clmul_8x4_packed(nn, mm);
+ nn >>= 32;
+ mm >>= 32;
+ d[1] = clmul_8x4_packed(nn, mm);
+
+ clear_tail(d, 16, simd_maxsz(desc));
+}
+
+#ifdef TARGET_AARCH64
+void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ int shift = simd_data(desc) * 8;
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = clmul_8x4_even(n[i] >> shift, m[i] >> shift);
+ }
+}
+
+void HELPER(sve2_pmull_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t sel = H4(simd_data(desc));
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint32_t *n = vn, *m = vm;
+ uint64_t *d = vd;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ d[i] = clmul_32(n[2 * i + sel], m[2 * i + sel]);
+ }
+}
+#endif
+
+#define DO_CMP0(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
+ TYPE nn = *(TYPE *)(vn + i); \
+ *(TYPE *)(vd + i) = -(nn OP 0); \
+ } \
+ clear_tail(vd, opr_sz, simd_maxsz(desc)); \
+}
+
+DO_CMP0(gvec_ceq0_b, int8_t, ==)
+DO_CMP0(gvec_clt0_b, int8_t, <)
+DO_CMP0(gvec_cle0_b, int8_t, <=)
+DO_CMP0(gvec_cgt0_b, int8_t, >)
+DO_CMP0(gvec_cge0_b, int8_t, >=)
+
+DO_CMP0(gvec_ceq0_h, int16_t, ==)
+DO_CMP0(gvec_clt0_h, int16_t, <)
+DO_CMP0(gvec_cle0_h, int16_t, <=)
+DO_CMP0(gvec_cgt0_h, int16_t, >)
+DO_CMP0(gvec_cge0_h, int16_t, >=)
+
+#undef DO_CMP0
+
+#define DO_ABD(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ \
+ for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
+ d[i] = n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
+ } \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
+DO_ABD(gvec_sabd_b, int8_t)
+DO_ABD(gvec_sabd_h, int16_t)
+DO_ABD(gvec_sabd_s, int32_t)
+DO_ABD(gvec_sabd_d, int64_t)
+
+DO_ABD(gvec_uabd_b, uint8_t)
+DO_ABD(gvec_uabd_h, uint16_t)
+DO_ABD(gvec_uabd_s, uint32_t)
+DO_ABD(gvec_uabd_d, uint64_t)
+
+#undef DO_ABD
+
+#define DO_ABA(NAME, TYPE) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ \
+ for (i = 0; i < opr_sz / sizeof(TYPE); ++i) { \
+ d[i] += n[i] < m[i] ? m[i] - n[i] : n[i] - m[i]; \
+ } \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
+DO_ABA(gvec_saba_b, int8_t)
+DO_ABA(gvec_saba_h, int16_t)
+DO_ABA(gvec_saba_s, int32_t)
+DO_ABA(gvec_saba_d, int64_t)
+
+DO_ABA(gvec_uaba_b, uint8_t)
+DO_ABA(gvec_uaba_h, uint16_t)
+DO_ABA(gvec_uaba_s, uint32_t)
+DO_ABA(gvec_uaba_d, uint64_t)
+
+#undef DO_ABA
+
+#define DO_NEON_PAIRWISE(NAME, OP) \
+ void HELPER(NAME##s)(void *vd, void *vn, void *vm, \
+ void *stat, uint32_t oprsz) \
+ { \
+ float_status *fpst = stat; \
+ float32 *d = vd; \
+ float32 *n = vn; \
+ float32 *m = vm; \
+ float32 r0, r1; \
+ \
+ /* Read all inputs before writing outputs in case vm == vd */ \
+ r0 = float32_##OP(n[H4(0)], n[H4(1)], fpst); \
+ r1 = float32_##OP(m[H4(0)], m[H4(1)], fpst); \
+ \
+ d[H4(0)] = r0; \
+ d[H4(1)] = r1; \
+ } \
+ \
+ void HELPER(NAME##h)(void *vd, void *vn, void *vm, \
+ void *stat, uint32_t oprsz) \
+ { \
+ float_status *fpst = stat; \
+ float16 *d = vd; \
+ float16 *n = vn; \
+ float16 *m = vm; \
+ float16 r0, r1, r2, r3; \
+ \
+ /* Read all inputs before writing outputs in case vm == vd */ \
+ r0 = float16_##OP(n[H2(0)], n[H2(1)], fpst); \
+ r1 = float16_##OP(n[H2(2)], n[H2(3)], fpst); \
+ r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \
+ r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \
+ \
+ d[H2(0)] = r0; \
+ d[H2(1)] = r1; \
+ d[H2(2)] = r2; \
+ d[H2(3)] = r3; \
+ }
+
+DO_NEON_PAIRWISE(neon_padd, add)
+DO_NEON_PAIRWISE(neon_pmax, max)
+DO_NEON_PAIRWISE(neon_pmin, min)
+
+#undef DO_NEON_PAIRWISE
+
+#define DO_VCVT_FIXED(NAME, FUNC, TYPE) \
+ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ { \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ int shift = simd_data(desc); \
+ TYPE *d = vd, *n = vn; \
+ float_status *fpst = stat; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], shift, fpst); \
+ } \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+ }
+
+DO_VCVT_FIXED(gvec_vcvt_sf, helper_vfp_sltos, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_uf, helper_vfp_ultos, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_fs, helper_vfp_tosls_round_to_zero, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_fu, helper_vfp_touls_round_to_zero, uint32_t)
+DO_VCVT_FIXED(gvec_vcvt_sh, helper_vfp_shtoh, uint16_t)
+DO_VCVT_FIXED(gvec_vcvt_uh, helper_vfp_uhtoh, uint16_t)
+DO_VCVT_FIXED(gvec_vcvt_hs, helper_vfp_toshh_round_to_zero, uint16_t)
+DO_VCVT_FIXED(gvec_vcvt_hu, helper_vfp_touhh_round_to_zero, uint16_t)
+
+#undef DO_VCVT_FIXED
+
+#define DO_VCVT_RMODE(NAME, FUNC, TYPE) \
+ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ { \
+ float_status *fpst = stat; \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ uint32_t rmode = simd_data(desc); \
+ uint32_t prev_rmode = get_float_rounding_mode(fpst); \
+ TYPE *d = vd, *n = vn; \
+ set_float_rounding_mode(rmode, fpst); \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], 0, fpst); \
+ } \
+ set_float_rounding_mode(prev_rmode, fpst); \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+ }
+
+DO_VCVT_RMODE(gvec_vcvt_rm_ss, helper_vfp_tosls, uint32_t)
+DO_VCVT_RMODE(gvec_vcvt_rm_us, helper_vfp_touls, uint32_t)
+DO_VCVT_RMODE(gvec_vcvt_rm_sh, helper_vfp_toshh, uint16_t)
+DO_VCVT_RMODE(gvec_vcvt_rm_uh, helper_vfp_touhh, uint16_t)
+
+#undef DO_VCVT_RMODE
+
+#define DO_VRINT_RMODE(NAME, FUNC, TYPE) \
+ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
+ { \
+ float_status *fpst = stat; \
+ intptr_t i, oprsz = simd_oprsz(desc); \
+ uint32_t rmode = simd_data(desc); \
+ uint32_t prev_rmode = get_float_rounding_mode(fpst); \
+ TYPE *d = vd, *n = vn; \
+ set_float_rounding_mode(rmode, fpst); \
+ for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
+ d[i] = FUNC(n[i], fpst); \
+ } \
+ set_float_rounding_mode(prev_rmode, fpst); \
+ clear_tail(d, oprsz, simd_maxsz(desc)); \
+ }
+
+DO_VRINT_RMODE(gvec_vrint_rm_h, helper_rinth, uint16_t)
+DO_VRINT_RMODE(gvec_vrint_rm_s, helper_rints, uint32_t)
+
+#undef DO_VRINT_RMODE
+
+#ifdef TARGET_AARCH64
+void HELPER(simd_tblx)(void *vd, void *vm, void *venv, uint32_t desc)
+{
+ const uint8_t *indices = vm;
+ CPUARMState *env = venv;
+ size_t oprsz = simd_oprsz(desc);
+ uint32_t rn = extract32(desc, SIMD_DATA_SHIFT, 5);
+ bool is_tbx = extract32(desc, SIMD_DATA_SHIFT + 5, 1);
+ uint32_t table_len = desc >> (SIMD_DATA_SHIFT + 6);
+ union {
+ uint8_t b[16];
+ uint64_t d[2];
+ } result;
+
+ /*
+ * We must construct the final result in a temp, lest the output
+ * overlaps the input table. For TBL, begin with zero; for TBX,
+ * begin with the original register contents. Note that we always
+ * copy 16 bytes here to avoid an extra branch; clearing the high
+ * bits of the register for oprsz == 8 is handled below.
+ */
+ if (is_tbx) {
+ memcpy(&result, vd, 16);
+ } else {
+ memset(&result, 0, 16);
+ }
+
+ for (size_t i = 0; i < oprsz; ++i) {
+ uint32_t index = indices[H1(i)];
+
+ if (index < table_len) {
+ /*
+ * Convert index (a byte offset into the virtual table
+ * which is a series of 128-bit vectors concatenated)
+ * into the correct register element, bearing in mind
+ * that the table can wrap around from V31 to V0.
+ */
+ const uint8_t *table = (const uint8_t *)
+ aa64_vfp_qreg(env, (rn + (index >> 4)) % 32);
+ result.b[H1(i)] = table[H1(index % 16)];
+ }
+ }
+
+ memcpy(vd, &result, 16);
+ clear_tail(vd, oprsz, simd_maxsz(desc));
+}
+#endif
+
+/*
+ * NxN -> N highpart multiply
+ *
+ * TODO: expose this as a generic vector operation.
+ */
+
+void HELPER(gvec_smulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ((int32_t)n[i] * m[i]) >> 8;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = ((int32_t)n[i] * m[i]) >> 16;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ int32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = ((int64_t)n[i] * m[i]) >> 32;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_smulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t discard;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ muls64(&discard, &d[i], n[i], m[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_b)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint8_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ((uint32_t)n[i] * m[i]) >> 8;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_h)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint16_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 2; ++i) {
+ d[i] = ((uint32_t)n[i] * m[i]) >> 16;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_s)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint32_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = ((uint64_t)n[i] * m[i]) >> 32;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_umulh_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+ uint64_t discard;
+
+ for (i = 0; i < opr_sz / 8; ++i) {
+ mulu64(&discard, &d[i], n[i], m[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_xar_d)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ int shr = simd_data(desc);
+ uint64_t *d = vd, *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ror64(n[i] ^ m[i], shr);
+ }
+ clear_tail(d, opr_sz * 8, simd_maxsz(desc));
+}
+
+/*
+ * Integer matrix-multiply accumulate
+ */
+
+static uint32_t do_smmla_b(uint32_t sum, void *vn, void *vm)
+{
+ int8_t *n = vn, *m = vm;
+
+ for (intptr_t k = 0; k < 8; ++k) {
+ sum += n[H1(k)] * m[H1(k)];
+ }
+ return sum;
+}
+
+static uint32_t do_ummla_b(uint32_t sum, void *vn, void *vm)
+{
+ uint8_t *n = vn, *m = vm;
+
+ for (intptr_t k = 0; k < 8; ++k) {
+ sum += n[H1(k)] * m[H1(k)];
+ }
+ return sum;
+}
+
+static uint32_t do_usmmla_b(uint32_t sum, void *vn, void *vm)
+{
+ uint8_t *n = vn;
+ int8_t *m = vm;
+
+ for (intptr_t k = 0; k < 8; ++k) {
+ sum += n[H1(k)] * m[H1(k)];
+ }
+ return sum;
+}
+
+static void do_mmla_b(void *vd, void *vn, void *vm, void *va, uint32_t desc,
+ uint32_t (*inner_loop)(uint32_t, void *, void *))
+{
+ intptr_t seg, opr_sz = simd_oprsz(desc);
+
+ for (seg = 0; seg < opr_sz; seg += 16) {
+ uint32_t *d = vd + seg;
+ uint32_t *a = va + seg;
+ uint32_t sum0, sum1, sum2, sum3;
+
+ /*
+ * Process the entire segment at once, writing back the
+ * results only after we've consumed all of the inputs.
+ *
+ * Key to indices by column:
+ * i j i j
+ */
+ sum0 = a[H4(0 + 0)];
+ sum0 = inner_loop(sum0, vn + seg + 0, vm + seg + 0);
+ sum1 = a[H4(0 + 1)];
+ sum1 = inner_loop(sum1, vn + seg + 0, vm + seg + 8);
+ sum2 = a[H4(2 + 0)];
+ sum2 = inner_loop(sum2, vn + seg + 8, vm + seg + 0);
+ sum3 = a[H4(2 + 1)];
+ sum3 = inner_loop(sum3, vn + seg + 8, vm + seg + 8);
+
+ d[H4(0)] = sum0;
+ d[H4(1)] = sum1;
+ d[H4(2)] = sum2;
+ d[H4(3)] = sum3;
+ }
+ clear_tail(vd, opr_sz, simd_maxsz(desc));
+}
+
+#define DO_MMLA_B(NAME, INNER) \
+ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+ { do_mmla_b(vd, vn, vm, va, desc, INNER); }
+
+DO_MMLA_B(gvec_smmla_b, do_smmla_b)
+DO_MMLA_B(gvec_ummla_b, do_ummla_b)
+DO_MMLA_B(gvec_usmmla_b, do_usmmla_b)
+
+/*
+ * BFloat16 Dot Product
+ */
+
+float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2)
+{
+ /* FPCR is ignored for BFDOT and BFMMLA. */
+ float_status bf_status = {
+ .tininess_before_rounding = float_tininess_before_rounding,
+ .float_rounding_mode = float_round_to_odd_inf,
+ .flush_to_zero = true,
+ .flush_inputs_to_zero = true,
+ .default_nan_mode = true,
+ };
+ float32 t1, t2;
+
+ /*
+ * Extract each BFloat16 from the element pair, and shift
+ * them such that they become float32.
+ */
+ t1 = float32_mul(e1 << 16, e2 << 16, &bf_status);
+ t2 = float32_mul(e1 & 0xffff0000u, e2 & 0xffff0000u, &bf_status);
+ t1 = float32_add(t1, t2, &bf_status);
+ t1 = float32_add(sum, t1, &bf_status);
+
+ return t1;
+}
+
+void HELPER(gvec_bfdot)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ float32 *d = vd, *a = va;
+ uint32_t *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ d[i] = bfdotadd(a[i], n[i], m[i]);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_bfdot_idx)(void *vd, void *vn, void *vm,
+ void *va, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ intptr_t index = simd_data(desc);
+ intptr_t elements = opr_sz / 4;
+ intptr_t eltspersegment = MIN(16 / 4, elements);
+ float32 *d = vd, *a = va;
+ uint32_t *n = vn, *m = vm;
+
+ for (i = 0; i < elements; i += eltspersegment) {
+ uint32_t m_idx = m[i + H4(index)];
+
+ for (j = i; j < i + eltspersegment; j++) {
+ d[j] = bfdotadd(a[j], n[j], m_idx);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_bfmmla)(void *vd, void *vn, void *vm, void *va, uint32_t desc)
+{
+ intptr_t s, opr_sz = simd_oprsz(desc);
+ float32 *d = vd, *a = va;
+ uint32_t *n = vn, *m = vm;
+
+ for (s = 0; s < opr_sz / 4; s += 4) {
+ float32 sum00, sum01, sum10, sum11;
+
+ /*
+ * Process the entire segment at once, writing back the
+ * results only after we've consumed all of the inputs.
+ *
+ * Key to indices by column:
+ * i j i k j k
+ */
+ sum00 = a[s + H4(0 + 0)];
+ sum00 = bfdotadd(sum00, n[s + H4(0 + 0)], m[s + H4(0 + 0)]);
+ sum00 = bfdotadd(sum00, n[s + H4(0 + 1)], m[s + H4(0 + 1)]);
+
+ sum01 = a[s + H4(0 + 1)];
+ sum01 = bfdotadd(sum01, n[s + H4(0 + 0)], m[s + H4(2 + 0)]);
+ sum01 = bfdotadd(sum01, n[s + H4(0 + 1)], m[s + H4(2 + 1)]);
+
+ sum10 = a[s + H4(2 + 0)];
+ sum10 = bfdotadd(sum10, n[s + H4(2 + 0)], m[s + H4(0 + 0)]);
+ sum10 = bfdotadd(sum10, n[s + H4(2 + 1)], m[s + H4(0 + 1)]);
+
+ sum11 = a[s + H4(2 + 1)];
+ sum11 = bfdotadd(sum11, n[s + H4(2 + 0)], m[s + H4(2 + 0)]);
+ sum11 = bfdotadd(sum11, n[s + H4(2 + 1)], m[s + H4(2 + 1)]);
+
+ d[s + H4(0 + 0)] = sum00;
+ d[s + H4(0 + 1)] = sum01;
+ d[s + H4(2 + 0)] = sum10;
+ d[s + H4(2 + 1)] = sum11;
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va,
+ void *stat, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc);
+ intptr_t sel = simd_data(desc);
+ float32 *d = vd, *a = va;
+ bfloat16 *n = vn, *m = vm;
+
+ for (i = 0; i < opr_sz / 4; ++i) {
+ float32 nn = n[H2(i * 2 + sel)] << 16;
+ float32 mm = m[H2(i * 2 + sel)] << 16;
+ d[H4(i)] = float32_muladd(nn, mm, a[H4(i)], 0, stat);
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm,
+ void *va, void *stat, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1);
+ intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3);
+ intptr_t elements = opr_sz / 4;
+ intptr_t eltspersegment = MIN(16 / 4, elements);
+ float32 *d = vd, *a = va;
+ bfloat16 *n = vn, *m = vm;
+
+ for (i = 0; i < elements; i += eltspersegment) {
+ float32 m_idx = m[H2(2 * i + index)] << 16;
+
+ for (j = i; j < i + eltspersegment; j++) {
+ float32 n_j = n[H2(2 * j + sel)] << 16;
+ d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+#define DO_CLAMP(NAME, TYPE) \
+void HELPER(NAME)(void *d, void *n, void *m, void *a, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; i += sizeof(TYPE)) { \
+ TYPE aa = *(TYPE *)(a + i); \
+ TYPE nn = *(TYPE *)(n + i); \
+ TYPE mm = *(TYPE *)(m + i); \
+ TYPE dd = MIN(MAX(aa, nn), mm); \
+ *(TYPE *)(d + i) = dd; \
+ } \
+ clear_tail(d, opr_sz, simd_maxsz(desc)); \
+}
+
+DO_CLAMP(gvec_sclamp_b, int8_t)
+DO_CLAMP(gvec_sclamp_h, int16_t)
+DO_CLAMP(gvec_sclamp_s, int32_t)
+DO_CLAMP(gvec_sclamp_d, int64_t)
+
+DO_CLAMP(gvec_uclamp_b, uint8_t)
+DO_CLAMP(gvec_uclamp_h, uint16_t)
+DO_CLAMP(gvec_uclamp_s, uint32_t)
+DO_CLAMP(gvec_uclamp_d, uint64_t)
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
new file mode 100644
index 0000000000..3ca1b94ccf
--- /dev/null
+++ b/target/arm/tcg/vec_internal.h
@@ -0,0 +1,235 @@
+/*
+ * ARM AdvSIMD / SVE Vector Helpers
+ *
+ * Copyright (c) 2020 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_ARM_VEC_INTERNAL_H
+#define TARGET_ARM_VEC_INTERNAL_H
+
+/*
+ * Note that vector data is stored in host-endian 64-bit chunks,
+ * so addressing units smaller than that needs a host-endian fixup.
+ *
+ * The H<N> macros are used when indexing an array of elements of size N.
+ *
+ * The H1_<N> macros are used when performing byte arithmetic and then
+ * casting the final pointer to a type of size N.
+ */
+#if HOST_BIG_ENDIAN
+#define H1(x) ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x) ((x) ^ 3)
+#define H4(x) ((x) ^ 1)
+#else
+#define H1(x) (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x) (x)
+#define H4(x) (x)
+#endif
+/*
+ * Access to 64-bit elements isn't host-endian dependent; we provide H8
+ * and H1_8 so that when a function is being generated from a macro we
+ * can pass these rather than an empty macro argument, for clarity.
+ */
+#define H8(x) (x)
+#define H1_8(x) (x)
+
+/*
+ * Expand active predicate bits to bytes, for byte elements.
+ */
+extern const uint64_t expand_pred_b_data[256];
+static inline uint64_t expand_pred_b(uint8_t byte)
+{
+ return expand_pred_b_data[byte];
+}
+
+/* Similarly for half-word elements. */
+extern const uint64_t expand_pred_h_data[0x55 + 1];
+static inline uint64_t expand_pred_h(uint8_t byte)
+{
+ return expand_pred_h_data[byte & 0x55];
+}
+
+static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
+{
+ uint64_t *d = vd + opr_sz;
+ uintptr_t i;
+
+ for (i = opr_sz; i < max_sz; i += 8) {
+ *d++ = 0;
+ }
+}
+
+static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits,
+ bool round, uint32_t *sat)
+{
+ if (shift <= -bits) {
+ /* Rounding the sign bit always produces 0. */
+ if (round) {
+ return 0;
+ }
+ return src >> 31;
+ } else if (shift < 0) {
+ if (round) {
+ src >>= -shift - 1;
+ return (src >> 1) + (src & 1);
+ }
+ return src >> -shift;
+ } else if (shift < bits) {
+ int32_t val = src << shift;
+ if (bits == 32) {
+ if (!sat || val >> shift == src) {
+ return val;
+ }
+ } else {
+ int32_t extval = sextract32(val, 0, bits);
+ if (!sat || val == extval) {
+ return extval;
+ }
+ }
+ } else if (!sat || src == 0) {
+ return 0;
+ }
+
+ *sat = 1;
+ return (1u << (bits - 1)) - (src >= 0);
+}
+
+static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits,
+ bool round, uint32_t *sat)
+{
+ if (shift <= -(bits + round)) {
+ return 0;
+ } else if (shift < 0) {
+ if (round) {
+ src >>= -shift - 1;
+ return (src >> 1) + (src & 1);
+ }
+ return src >> -shift;
+ } else if (shift < bits) {
+ uint32_t val = src << shift;
+ if (bits == 32) {
+ if (!sat || val >> shift == src) {
+ return val;
+ }
+ } else {
+ uint32_t extval = extract32(val, 0, bits);
+ if (!sat || val == extval) {
+ return extval;
+ }
+ }
+ } else if (!sat || src == 0) {
+ return 0;
+ }
+
+ *sat = 1;
+ return MAKE_64BIT_MASK(0, bits);
+}
+
+static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits,
+ bool round, uint32_t *sat)
+{
+ if (sat && src < 0) {
+ *sat = 1;
+ return 0;
+ }
+ return do_uqrshl_bhs(src, shift, bits, round, sat);
+}
+
+static inline int64_t do_sqrshl_d(int64_t src, int64_t shift,
+ bool round, uint32_t *sat)
+{
+ if (shift <= -64) {
+ /* Rounding the sign bit always produces 0. */
+ if (round) {
+ return 0;
+ }
+ return src >> 63;
+ } else if (shift < 0) {
+ if (round) {
+ src >>= -shift - 1;
+ return (src >> 1) + (src & 1);
+ }
+ return src >> -shift;
+ } else if (shift < 64) {
+ int64_t val = src << shift;
+ if (!sat || val >> shift == src) {
+ return val;
+ }
+ } else if (!sat || src == 0) {
+ return 0;
+ }
+
+ *sat = 1;
+ return src < 0 ? INT64_MIN : INT64_MAX;
+}
+
+static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift,
+ bool round, uint32_t *sat)
+{
+ if (shift <= -(64 + round)) {
+ return 0;
+ } else if (shift < 0) {
+ if (round) {
+ src >>= -shift - 1;
+ return (src >> 1) + (src & 1);
+ }
+ return src >> -shift;
+ } else if (shift < 64) {
+ uint64_t val = src << shift;
+ if (!sat || val >> shift == src) {
+ return val;
+ }
+ } else if (!sat || src == 0) {
+ return 0;
+ }
+
+ *sat = 1;
+ return UINT64_MAX;
+}
+
+static inline int64_t do_suqrshl_d(int64_t src, int64_t shift,
+ bool round, uint32_t *sat)
+{
+ if (sat && src < 0) {
+ *sat = 1;
+ return 0;
+ }
+ return do_uqrshl_d(src, shift, round, sat);
+}
+
+int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool);
+int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
+int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
+int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
+
+/**
+ * bfdotadd:
+ * @sum: addend
+ * @e1, @e2: multiplicand vectors
+ *
+ * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
+ * The @e1 and @e2 operands correspond to the 32-bit source vector
+ * slots and contain two Bfloat16 values each.
+ *
+ * Corresponds to the ARM pseudocode function BFDotAdd.
+ */
+float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2);
+
+#endif /* TARGET_ARM_VEC_INTERNAL_H */
diff --git a/target/arm/tcg/vfp-uncond.decode b/target/arm/tcg/vfp-uncond.decode
new file mode 100644
index 0000000000..5c50447a66
--- /dev/null
+++ b/target/arm/tcg/vfp-uncond.decode
@@ -0,0 +1,82 @@
+# AArch32 VFP instruction descriptions (unconditional insns)
+#
+# Copyright (c) 2019 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+# Encodings for the unconditional VFP instructions are here:
+# generally anything matching A32
+# 1111 1110 .... .... .... 101. ...0 ....
+# and T32
+# 1111 110. .... .... .... 101. .... ....
+# 1111 1110 .... .... .... 101. .... ....
+# (but those patterns might also cover some Neon instructions,
+# which do not live in this file.)
+
+# VFP registers have an odd encoding with a four-bit field
+# and a one-bit field which are assembled in different orders
+# depending on whether the register is double or single precision.
+# Each individual instruction function must do the checks for
+# "double register selected but CPU does not have double support"
+# and "double register number has bit 4 set but CPU does not
+# support D16-D31" (which should UNDEF).
+%vm_dp 5:1 0:4
+%vm_sp 0:4 5:1
+%vn_dp 7:1 16:4
+%vn_sp 16:4 7:1
+%vd_dp 22:1 12:4
+%vd_sp 12:4 22:1
+
+@vfp_dnm_s ................................ vm=%vm_sp vn=%vn_sp vd=%vd_sp
+@vfp_dnm_d ................................ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+VSEL 1111 1110 0. cc:2 .... .... 1001 .0.0 .... \
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp sz=1
+VSEL 1111 1110 0. cc:2 .... .... 1010 .0.0 .... \
+ vm=%vm_sp vn=%vn_sp vd=%vd_sp sz=2
+VSEL 1111 1110 0. cc:2 .... .... 1011 .0.0 .... \
+ vm=%vm_dp vn=%vn_dp vd=%vd_dp sz=3
+
+VMAXNM_hp 1111 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
+VMINNM_hp 1111 1110 1.00 .... .... 1001 .1.0 .... @vfp_dnm_s
+
+VMAXNM_sp 1111 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
+VMINNM_sp 1111 1110 1.00 .... .... 1010 .1.0 .... @vfp_dnm_s
+
+VMAXNM_dp 1111 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
+VMINNM_dp 1111 1110 1.00 .... .... 1011 .1.0 .... @vfp_dnm_d
+
+VRINT 1111 1110 1.11 10 rm:2 .... 1001 01.0 .... \
+ vm=%vm_sp vd=%vd_sp sz=1
+VRINT 1111 1110 1.11 10 rm:2 .... 1010 01.0 .... \
+ vm=%vm_sp vd=%vd_sp sz=2
+VRINT 1111 1110 1.11 10 rm:2 .... 1011 01.0 .... \
+ vm=%vm_dp vd=%vd_dp sz=3
+
+# VCVT float to int with specified rounding mode; Vd is always single-precision
+VCVT 1111 1110 1.11 11 rm:2 .... 1001 op:1 1.0 .... \
+ vm=%vm_sp vd=%vd_sp sz=1
+VCVT 1111 1110 1.11 11 rm:2 .... 1010 op:1 1.0 .... \
+ vm=%vm_sp vd=%vd_sp sz=2
+VCVT 1111 1110 1.11 11 rm:2 .... 1011 op:1 1.0 .... \
+ vm=%vm_dp vd=%vd_sp sz=3
+
+VMOVX 1111 1110 1.11 0000 .... 1010 01 . 0 .... \
+ vd=%vd_sp vm=%vm_sp
+
+VINS 1111 1110 1.11 0000 .... 1010 11 . 0 .... \
+ vd=%vd_sp vm=%vm_sp
diff --git a/target/arm/tcg/vfp.decode b/target/arm/tcg/vfp.decode
new file mode 100644
index 0000000000..5405e80197
--- /dev/null
+++ b/target/arm/tcg/vfp.decode
@@ -0,0 +1,247 @@
+# AArch32 VFP instruction descriptions (conditional insns)
+#
+# Copyright (c) 2019 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see <http://www.gnu.org/licenses/>.
+
+#
+# This file is processed by scripts/decodetree.py
+#
+# Encodings for the conditional VFP instructions are here:
+# generally anything matching A32
+# cccc 11.. .... .... .... 101. .... ....
+# and T32
+# 1110 110. .... .... .... 101. .... ....
+# 1110 1110 .... .... .... 101. .... ....
+# (but those patterns might also cover some Neon instructions,
+# which do not live in this file.)
+
+# VFP registers have an odd encoding with a four-bit field
+# and a one-bit field which are assembled in different orders
+# depending on whether the register is double or single precision.
+# Each individual instruction function must do the checks for
+# "double register selected but CPU does not have double support"
+# and "double register number has bit 4 set but CPU does not
+# support D16-D31" (which should UNDEF).
+%vm_dp 5:1 0:4
+%vm_sp 0:4 5:1
+%vn_dp 7:1 16:4
+%vn_sp 16:4 7:1
+%vd_dp 22:1 12:4
+%vd_sp 12:4 22:1
+
+%vmov_idx_b 21:1 5:2
+%vmov_idx_h 21:1 6:1
+
+%vmov_imm 16:4 0:4
+
+@vfp_dnm_s ................................ vm=%vm_sp vn=%vn_sp vd=%vd_sp
+@vfp_dnm_d ................................ vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
+@vfp_dm_ss ................................ vm=%vm_sp vd=%vd_sp
+@vfp_dm_dd ................................ vm=%vm_dp vd=%vd_dp
+@vfp_dm_ds ................................ vm=%vm_sp vd=%vd_dp
+@vfp_dm_sd ................................ vm=%vm_dp vd=%vd_sp
+
+# VMOV scalar to general-purpose register; note that this does
+# include some Neon cases.
+VMOV_to_gp ---- 1110 u:1 1. 1 .... rt:4 1011 ... 1 0000 \
+ vn=%vn_dp size=0 index=%vmov_idx_b
+VMOV_to_gp ---- 1110 u:1 0. 1 .... rt:4 1011 ..1 1 0000 \
+ vn=%vn_dp size=1 index=%vmov_idx_h
+VMOV_to_gp ---- 1110 0 0 index:1 1 .... rt:4 1011 .00 1 0000 \
+ vn=%vn_dp size=2 u=0
+
+VMOV_from_gp ---- 1110 0 1. 0 .... rt:4 1011 ... 1 0000 \
+ vn=%vn_dp size=0 index=%vmov_idx_b
+VMOV_from_gp ---- 1110 0 0. 0 .... rt:4 1011 ..1 1 0000 \
+ vn=%vn_dp size=1 index=%vmov_idx_h
+VMOV_from_gp ---- 1110 0 0 index:1 0 .... rt:4 1011 .00 1 0000 \
+ vn=%vn_dp size=2
+
+VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \
+ vn=%vn_dp
+
+VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
+VMOV_half ---- 1110 000 l:1 .... rt:4 1001 . 001 0000 vn=%vn_sp
+VMOV_single ---- 1110 000 l:1 .... rt:4 1010 . 001 0000 vn=%vn_sp
+
+VMOV_64_sp ---- 1100 010 op:1 rt2:4 rt:4 1010 00.1 .... vm=%vm_sp
+VMOV_64_dp ---- 1100 010 op:1 rt2:4 rt:4 1011 00.1 .... vm=%vm_dp
+
+VLDR_VSTR_hp ---- 1101 u:1 .0 l:1 rn:4 .... 1001 imm:8 vd=%vd_sp
+VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 vd=%vd_sp
+VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 vd=%vd_dp
+
+# We split the load/store multiple up into two patterns to avoid
+# overlap with other insns in the "Advanced SIMD load/store and 64-bit move"
+# grouping:
+# P=0 U=0 W=0 is 64-bit VMOV
+# P=1 W=0 is VLDR/VSTR
+# P=U W=1 is UNDEF
+# leaving P=0 U=1 W=x and P=1 U=0 W=1 for load/store multiple.
+# These include FSTM/FLDM.
+VLDM_VSTM_sp ---- 1100 1 . w:1 l:1 rn:4 .... 1010 imm:8 \
+ vd=%vd_sp p=0 u=1
+VLDM_VSTM_dp ---- 1100 1 . w:1 l:1 rn:4 .... 1011 imm:8 \
+ vd=%vd_dp p=0 u=1
+
+VLDM_VSTM_sp ---- 1101 0.1 l:1 rn:4 .... 1010 imm:8 \
+ vd=%vd_sp p=1 u=0 w=1
+VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
+ vd=%vd_dp p=1 u=0 w=1
+
+# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
+VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s
+VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s
+VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d
+
+VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s
+VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s
+VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d
+
+VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s
+VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s
+VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
+
+VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s
+VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
+VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
+
+VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
+VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
+VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
+
+VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s
+VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
+VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d
+
+VADD_hp ---- 1110 0.11 .... .... 1001 .0.0 .... @vfp_dnm_s
+VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... @vfp_dnm_s
+VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... @vfp_dnm_d
+
+VSUB_hp ---- 1110 0.11 .... .... 1001 .1.0 .... @vfp_dnm_s
+VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... @vfp_dnm_s
+VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... @vfp_dnm_d
+
+VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
+VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
+VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
+
+VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s
+VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s
+VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s
+VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s
+
+VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s
+VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s
+VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s
+VFNMS_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s
+
+VFMA_dp ---- 1110 1.10 .... .... 1011 .0.0 .... @vfp_dnm_d
+VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d
+VFNMA_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d
+VFNMS_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d
+
+VMOV_imm_hp ---- 1110 1.11 .... .... 1001 0000 .... \
+ vd=%vd_sp imm=%vmov_imm
+VMOV_imm_sp ---- 1110 1.11 .... .... 1010 0000 .... \
+ vd=%vd_sp imm=%vmov_imm
+VMOV_imm_dp ---- 1110 1.11 .... .... 1011 0000 .... \
+ vd=%vd_dp imm=%vmov_imm
+
+VMOV_reg_sp ---- 1110 1.11 0000 .... 1010 01.0 .... @vfp_dm_ss
+VMOV_reg_dp ---- 1110 1.11 0000 .... 1011 01.0 .... @vfp_dm_dd
+
+VABS_hp ---- 1110 1.11 0000 .... 1001 11.0 .... @vfp_dm_ss
+VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... @vfp_dm_ss
+VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... @vfp_dm_dd
+
+VNEG_hp ---- 1110 1.11 0001 .... 1001 01.0 .... @vfp_dm_ss
+VNEG_sp ---- 1110 1.11 0001 .... 1010 01.0 .... @vfp_dm_ss
+VNEG_dp ---- 1110 1.11 0001 .... 1011 01.0 .... @vfp_dm_dd
+
+VSQRT_hp ---- 1110 1.11 0001 .... 1001 11.0 .... @vfp_dm_ss
+VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... @vfp_dm_ss
+VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... @vfp_dm_dd
+
+VCMP_hp ---- 1110 1.11 010 z:1 .... 1001 e:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \
+ vd=%vd_dp vm=%vm_dp
+
+# VCVTT and VCVTB from f16: Vd format depends on size bit; Vm is always vm_sp
+VCVT_f32_f16 ---- 1110 1.11 0010 .... 1010 t:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
+ vd=%vd_dp vm=%vm_sp
+
+# VCVTB and VCVTT to f16: Vd format is always vd_sp;
+# Vm format depends on size bit
+VCVT_b16_f32 ---- 1110 1.11 0011 .... 1001 t:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
+ vd=%vd_sp vm=%vm_dp
+
+VRINTR_hp ---- 1110 1.11 0110 .... 1001 01.0 .... @vfp_dm_ss
+VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss
+VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd
+
+VRINTZ_hp ---- 1110 1.11 0110 .... 1001 11.0 .... @vfp_dm_ss
+VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss
+VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd
+
+VRINTX_hp ---- 1110 1.11 0111 .... 1001 01.0 .... @vfp_dm_ss
+VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss
+VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd
+
+# VCVT between single and double:
+# Vm precision depends on size; Vd is its reverse
+VCVT_sp ---- 1110 1.11 0111 .... 1010 11.0 .... @vfp_dm_ds
+VCVT_dp ---- 1110 1.11 0111 .... 1011 11.0 .... @vfp_dm_sd
+
+# VCVT from integer to floating point: Vm always single; Vd depends on size
+VCVT_int_hp ---- 1110 1.11 1000 .... 1001 s:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCVT_int_sp ---- 1110 1.11 1000 .... 1010 s:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \
+ vd=%vd_dp vm=%vm_sp
+
+# VJCVT is always dp to sp
+VJCVT ---- 1110 1.11 1001 .... 1011 11.0 .... @vfp_dm_sd
+
+# VCVT between floating-point and fixed-point. The immediate value
+# is in the same format as a Vm single-precision register number.
+# We assemble bits 18 (op), 16 (u) and 7 (sx) into a single opc field
+# for the convenience of the trans_VCVT_fix functions.
+%vcvt_fix_op 18:1 16:1 7:1
+VCVT_fix_hp ---- 1110 1.11 1.1. .... 1001 .1.0 .... \
+ vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
+VCVT_fix_sp ---- 1110 1.11 1.1. .... 1010 .1.0 .... \
+ vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
+VCVT_fix_dp ---- 1110 1.11 1.1. .... 1011 .1.0 .... \
+ vd=%vd_dp imm=%vm_sp opc=%vcvt_fix_op
+
+# VCVT float to integer (VCVT and VCVTR): Vd always single; Vd depends on size
+VCVT_hp_int ---- 1110 1.11 110 s:1 .... 1001 rz:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCVT_sp_int ---- 1110 1.11 110 s:1 .... 1010 rz:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCVT_dp_int ---- 1110 1.11 110 s:1 .... 1011 rz:1 1.0 .... \
+ vd=%vd_sp vm=%vm_dp
diff --git a/target/arm/trace-events b/target/arm/trace-events
index 6b759f9d4f..4438dce7be 100644
--- a/target/arm/trace-events
+++ b/target/arm/trace-events
@@ -1,13 +1,15 @@
-# See docs/devel/tracing.txt for syntax documentation.
+# See docs/devel/tracing.rst for syntax documentation.
-# target/arm/helper.c
-arm_gt_recalc(int timer, int irqstate, uint64_t nexttick) "gt recalc: timer %d irqstate %d next tick 0x%" PRIx64
-arm_gt_recalc_disabled(int timer) "gt recalc: timer %d irqstate 0 timer disabled"
+# helper.c
+arm_gt_recalc(int timer, uint64_t nexttick) "gt recalc: timer %d next tick 0x%" PRIx64
+arm_gt_recalc_disabled(int timer) "gt recalc: timer %d timer disabled"
arm_gt_cval_write(int timer, uint64_t value) "gt_cval_write: timer %d value 0x%" PRIx64
arm_gt_tval_write(int timer, uint64_t value) "gt_tval_write: timer %d value 0x%" PRIx64
arm_gt_ctl_write(int timer, uint64_t value) "gt_ctl_write: timer %d value 0x%" PRIx64
-arm_gt_imask_toggle(int timer, int irqstate) "gt_ctl_write: timer %d IMASK toggle, new irqstate %d"
+arm_gt_imask_toggle(int timer) "gt_ctl_write: timer %d IMASK toggle"
arm_gt_cntvoff_write(uint64_t value) "gt_cntvoff_write: value 0x%" PRIx64
+arm_gt_cntpoff_write(uint64_t value) "gt_cntpoff_write: value 0x%" PRIx64
+arm_gt_update_irq(int timer, int irqstate) "gt_update_irq: timer %d irqstate %d"
-# target/arm/kvm.c
+# kvm.c
kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova = 0x%"PRIx64" is translated into 0x%"PRIx64
diff --git a/target/arm/trace.h b/target/arm/trace.h
new file mode 100644
index 0000000000..60372d8e26
--- /dev/null
+++ b/target/arm/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-target_arm.h"
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
deleted file mode 100644
index fe7aebdc19..0000000000
--- a/target/arm/translate-sve.c
+++ /dev/null
@@ -1,5483 +0,0 @@
-/*
- * AArch64 SVE translation
- *
- * Copyright (c) 2018 Linaro, Ltd
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "tcg-op.h"
-#include "tcg-op-gvec.h"
-#include "tcg-gvec-desc.h"
-#include "qemu/log.h"
-#include "arm_ldst.h"
-#include "translate.h"
-#include "internals.h"
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-#include "exec/log.h"
-#include "trace-tcg.h"
-#include "translate-a64.h"
-#include "fpu/softfloat.h"
-
-
-typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
- TCGv_i64, uint32_t, uint32_t);
-
-typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
- TCGv_ptr, TCGv_i32);
-typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
- TCGv_ptr, TCGv_ptr, TCGv_i32);
-
-typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
-typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
- TCGv_ptr, TCGv_i64, TCGv_i32);
-
-/*
- * Helpers for extracting complex instruction fields.
- */
-
-/* See e.g. ASR (immediate, predicated).
- * Returns -1 for unallocated encoding; diagnose later.
- */
-static int tszimm_esz(int x)
-{
- x >>= 3; /* discard imm3 */
- return 31 - clz32(x);
-}
-
-static int tszimm_shr(int x)
-{
- return (16 << tszimm_esz(x)) - x;
-}
-
-/* See e.g. LSL (immediate, predicated). */
-static int tszimm_shl(int x)
-{
- return x - (8 << tszimm_esz(x));
-}
-
-static inline int plus1(int x)
-{
- return x + 1;
-}
-
-/* The SH bit is in bit 8. Extract the low 8 and shift. */
-static inline int expand_imm_sh8s(int x)
-{
- return (int8_t)x << (x & 0x100 ? 8 : 0);
-}
-
-static inline int expand_imm_sh8u(int x)
-{
- return (uint8_t)x << (x & 0x100 ? 8 : 0);
-}
-
-/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
- * with unsigned data. C.f. SVE Memory Contiguous Load Group.
- */
-static inline int msz_dtype(int msz)
-{
- static const uint8_t dtype[4] = { 0, 5, 10, 15 };
- return dtype[msz];
-}
-
-/*
- * Include the generated decoder.
- */
-
-#include "decode-sve.inc.c"
-
-/*
- * Implement all of the translator functions referenced by the decoder.
- */
-
-/* Return the offset info CPUARMState of the predicate vector register Pn.
- * Note for this purpose, FFR is P16.
- */
-static inline int pred_full_reg_offset(DisasContext *s, int regno)
-{
- return offsetof(CPUARMState, vfp.pregs[regno]);
-}
-
-/* Return the byte size of the whole predicate register, VL / 64. */
-static inline int pred_full_reg_size(DisasContext *s)
-{
- return s->sve_len >> 3;
-}
-
-/* Round up the size of a register to a size allowed by
- * the tcg vector infrastructure. Any operation which uses this
- * size may assume that the bits above pred_full_reg_size are zero,
- * and must leave them the same way.
- *
- * Note that this is not needed for the vector registers as they
- * are always properly sized for tcg vectors.
- */
-static int size_for_gvec(int size)
-{
- if (size <= 8) {
- return 8;
- } else {
- return QEMU_ALIGN_UP(size, 16);
- }
-}
-
-static int pred_gvec_reg_size(DisasContext *s)
-{
- return size_for_gvec(pred_full_reg_size(s));
-}
-
-/* Invoke a vector expander on two Zregs. */
-static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
- int esz, int rd, int rn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(esz, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn), vsz, vsz);
- }
- return true;
-}
-
-/* Invoke a vector expander on three Zregs. */
-static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
- int esz, int rd, int rn, int rm)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(esz, vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm), vsz, vsz);
- }
- return true;
-}
-
-/* Invoke a vector move on two Zregs. */
-static bool do_mov_z(DisasContext *s, int rd, int rn)
-{
- return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
-}
-
-/* Initialize a Zreg with replications of a 64-bit immediate. */
-static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
-{
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
-}
-
-/* Invoke a vector expander on two Pregs. */
-static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
- int esz, int rd, int rn)
-{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- gvec_fn(esz, pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn), psz, psz);
- }
- return true;
-}
-
-/* Invoke a vector expander on three Pregs. */
-static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
- int esz, int rd, int rn, int rm)
-{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- gvec_fn(esz, pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn),
- pred_full_reg_offset(s, rm), psz, psz);
- }
- return true;
-}
-
-/* Invoke a vector operation on four Pregs. */
-static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
- int rd, int rn, int rm, int rg)
-{
- if (sve_access_check(s)) {
- unsigned psz = pred_gvec_reg_size(s);
- tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
- pred_full_reg_offset(s, rn),
- pred_full_reg_offset(s, rm),
- pred_full_reg_offset(s, rg),
- psz, psz, gvec_op);
- }
- return true;
-}
-
-/* Invoke a vector move on two Pregs. */
-static bool do_mov_p(DisasContext *s, int rd, int rn)
-{
- return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
-}
-
-/* Set the cpu flags as per a return from an SVE helper. */
-static void do_pred_flags(TCGv_i32 t)
-{
- tcg_gen_mov_i32(cpu_NF, t);
- tcg_gen_andi_i32(cpu_ZF, t, 2);
- tcg_gen_andi_i32(cpu_CF, t, 1);
- tcg_gen_movi_i32(cpu_VF, 0);
-}
-
-/* Subroutines computing the ARM PredTest psuedofunction. */
-static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
-{
- TCGv_i32 t = tcg_temp_new_i32();
-
- gen_helper_sve_predtest1(t, d, g);
- do_pred_flags(t);
- tcg_temp_free_i32(t);
-}
-
-static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
-{
- TCGv_ptr dptr = tcg_temp_new_ptr();
- TCGv_ptr gptr = tcg_temp_new_ptr();
- TCGv_i32 t;
-
- tcg_gen_addi_ptr(dptr, cpu_env, dofs);
- tcg_gen_addi_ptr(gptr, cpu_env, gofs);
- t = tcg_const_i32(words);
-
- gen_helper_sve_predtest(t, dptr, gptr, t);
- tcg_temp_free_ptr(dptr);
- tcg_temp_free_ptr(gptr);
-
- do_pred_flags(t);
- tcg_temp_free_i32(t);
-}
-
-/* For each element size, the bits within a predicate word that are active. */
-const uint64_t pred_esz_masks[4] = {
- 0xffffffffffffffffull, 0x5555555555555555ull,
- 0x1111111111111111ull, 0x0101010101010101ull
-};
-
-/*
- *** SVE Logical - Unpredicated Group
- */
-
-static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
-}
-
-static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- if (a->rn == a->rm) { /* MOV */
- return do_mov_z(s, a->rd, a->rn);
- } else {
- return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
- }
-}
-
-static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
-}
-
-static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
-}
-
-/*
- *** SVE Integer Arithmetic - Unpredicated Group
- */
-
-static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
-}
-
-static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
-}
-
-static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
-}
-
-static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
-}
-
-static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
-}
-
-static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
-}
-
-/*
- *** SVE Integer Arithmetic - Binary Predicated Group
- */
-
-static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
-{
- unsigned vsz = vec_full_reg_size(s);
- if (fn == NULL) {
- return false;
- }
- if (sve_access_check(s)) {
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
- }
- return true;
-}
-
-/* Select active elememnts from Zn and inactive elements from Zm,
- * storing the result in Zd.
- */
-static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
-{
- static gen_helper_gvec_4 * const fns[4] = {
- gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
- gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
- };
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- vec_full_reg_offset(s, rm),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
-}
-
-#define DO_ZPZZ(NAME, name) \
-static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_gvec_4 * const fns[4] = { \
- gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
- gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
- }; \
- return do_zpzz_ool(s, a, fns[a->esz]); \
-}
-
-DO_ZPZZ(AND, and)
-DO_ZPZZ(EOR, eor)
-DO_ZPZZ(ORR, orr)
-DO_ZPZZ(BIC, bic)
-
-DO_ZPZZ(ADD, add)
-DO_ZPZZ(SUB, sub)
-
-DO_ZPZZ(SMAX, smax)
-DO_ZPZZ(UMAX, umax)
-DO_ZPZZ(SMIN, smin)
-DO_ZPZZ(UMIN, umin)
-DO_ZPZZ(SABD, sabd)
-DO_ZPZZ(UABD, uabd)
-
-DO_ZPZZ(MUL, mul)
-DO_ZPZZ(SMULH, smulh)
-DO_ZPZZ(UMULH, umulh)
-
-DO_ZPZZ(ASR, asr)
-DO_ZPZZ(LSR, lsr)
-DO_ZPZZ(LSL, lsl)
-
-static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_4 * const fns[4] = {
- NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
- };
- return do_zpzz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_4 * const fns[4] = {
- NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
- };
- return do_zpzz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
- }
- return true;
-}
-
-#undef DO_ZPZZ
-
-/*
- *** SVE Integer Arithmetic - Unary Predicated Group
- */
-
-static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
-{
- if (fn == NULL) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
- }
- return true;
-}
-
-#define DO_ZPZ(NAME, name) \
-static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
-{ \
- static gen_helper_gvec_3 * const fns[4] = { \
- gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
- }; \
- return do_zpz_ool(s, a, fns[a->esz]); \
-}
-
-DO_ZPZ(CLS, cls)
-DO_ZPZ(CLZ, clz)
-DO_ZPZ(CNT_zpz, cnt_zpz)
-DO_ZPZ(CNOT, cnot)
-DO_ZPZ(NOT_zpz, not_zpz)
-DO_ZPZ(ABS, abs)
-DO_ZPZ(NEG, neg)
-
-static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL,
- gen_helper_sve_fabs_h,
- gen_helper_sve_fabs_s,
- gen_helper_sve_fabs_d
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL,
- gen_helper_sve_fneg_h,
- gen_helper_sve_fneg_s,
- gen_helper_sve_fneg_d
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL,
- gen_helper_sve_sxtb_h,
- gen_helper_sve_sxtb_s,
- gen_helper_sve_sxtb_d
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL,
- gen_helper_sve_uxtb_h,
- gen_helper_sve_uxtb_s,
- gen_helper_sve_uxtb_d
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL, NULL,
- gen_helper_sve_sxth_s,
- gen_helper_sve_sxth_d
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL, NULL,
- gen_helper_sve_uxth_s,
- gen_helper_sve_uxth_d
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
-}
-
-static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
-}
-
-#undef DO_ZPZ
-
-/*
- *** SVE Integer Reduction Group
- */
-
-typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
-static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
- gen_helper_gvec_reduc *fn)
-{
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr t_zn, t_pg;
- TCGv_i32 desc;
- TCGv_i64 temp;
-
- if (fn == NULL) {
- return false;
- }
- if (!sve_access_check(s)) {
- return true;
- }
-
- desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
- temp = tcg_temp_new_i64();
- t_zn = tcg_temp_new_ptr();
- t_pg = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
- fn(temp, t_zn, t_pg, desc);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(desc);
-
- write_fp_dreg(s, a->rd, temp);
- tcg_temp_free_i64(temp);
- return true;
-}
-
-#define DO_VPZ(NAME, name) \
-static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
-{ \
- static gen_helper_gvec_reduc * const fns[4] = { \
- gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
- }; \
- return do_vpz_ool(s, a, fns[a->esz]); \
-}
-
-DO_VPZ(ORV, orv)
-DO_VPZ(ANDV, andv)
-DO_VPZ(EORV, eorv)
-
-DO_VPZ(UADDV, uaddv)
-DO_VPZ(SMAXV, smaxv)
-DO_VPZ(UMAXV, umaxv)
-DO_VPZ(SMINV, sminv)
-DO_VPZ(UMINV, uminv)
-
-static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_reduc * const fns[4] = {
- gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
- gen_helper_sve_saddv_s, NULL
- };
- return do_vpz_ool(s, a, fns[a->esz]);
-}
-
-#undef DO_VPZ
-
-/*
- *** SVE Shift by Immediate - Predicated Group
- */
-
-/* Store zero into every active element of Zd. We will use this for two
- * and three-operand predicated instructions for which logic dictates a
- * zero result.
- */
-static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
-{
- static gen_helper_gvec_2 * const fns[4] = {
- gen_helper_sve_clr_b, gen_helper_sve_clr_h,
- gen_helper_sve_clr_s, gen_helper_sve_clr_d,
- };
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
- }
- return true;
-}
-
-/* Copy Zn into Zd, storing zeros into inactive elements. */
-static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_movz_b, gen_helper_sve_movz_h,
- gen_helper_sve_movz_s, gen_helper_sve_movz_d,
- };
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- pred_full_reg_offset(s, pg),
- vsz, vsz, 0, fns[esz]);
-}
-
-static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
- gen_helper_gvec_3 *fn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, a->imm, fn);
- }
- return true;
-}
-
-static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
- gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
- };
- if (a->esz < 0) {
- /* Invalid tsz encoding -- see tszimm_esz. */
- return false;
- }
- /* Shift by element size is architecturally valid. For
- arithmetic right-shift, it's the same as by one less. */
- a->imm = MIN(a->imm, (8 << a->esz) - 1);
- return do_zpzi_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
- gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
- };
- if (a->esz < 0) {
- return false;
- }
- /* Shift by element size is architecturally valid.
- For logical shifts, it is a zeroing operation. */
- if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
- } else {
- return do_zpzi_ool(s, a, fns[a->esz]);
- }
-}
-
-static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
- gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
- };
- if (a->esz < 0) {
- return false;
- }
- /* Shift by element size is architecturally valid.
- For logical shifts, it is a zeroing operation. */
- if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
- } else {
- return do_zpzi_ool(s, a, fns[a->esz]);
- }
-}
-
-static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
- gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
- };
- if (a->esz < 0) {
- return false;
- }
- /* Shift by element size is architecturally valid. For arithmetic
- right shift for division, it is a zeroing operation. */
- if (a->imm >= (8 << a->esz)) {
- return do_clr_zp(s, a->rd, a->pg, a->esz);
- } else {
- return do_zpzi_ool(s, a, fns[a->esz]);
- }
-}
-
-/*
- *** SVE Bitwise Shift - Predicated Group
- */
-
-#define DO_ZPZW(NAME, name) \
-static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_gvec_4 * const fns[3] = { \
- gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
- gen_helper_sve_##name##_zpzw_s, \
- }; \
- if (a->esz < 0 || a->esz >= 3) { \
- return false; \
- } \
- return do_zpzz_ool(s, a, fns[a->esz]); \
-}
-
-DO_ZPZW(ASR, asr)
-DO_ZPZW(LSR, lsr)
-DO_ZPZW(LSL, lsl)
-
-#undef DO_ZPZW
-
-/*
- *** SVE Bitwise Shift - Unpredicated Group
- */
-
-static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
- void (*gvec_fn)(unsigned, uint32_t, uint32_t,
- int64_t, uint32_t, uint32_t))
-{
- if (a->esz < 0) {
- /* Invalid tsz encoding -- see tszimm_esz. */
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- /* Shift by element size is architecturally valid. For
- arithmetic right-shift, it's the same as by one less.
- Otherwise it is a zeroing operation. */
- if (a->imm >= 8 << a->esz) {
- if (asr) {
- a->imm = (8 << a->esz) - 1;
- } else {
- do_dupi_z(s, a->rd, 0);
- return true;
- }
- }
- gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
- }
- return true;
-}
-
-static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
-}
-
-static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
-}
-
-static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
-}
-
-static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
-{
- if (fn == NULL) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fn);
- }
- return true;
-}
-
-#define DO_ZZW(NAME, name) \
-static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_gvec_3 * const fns[4] = { \
- gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
- gen_helper_sve_##name##_zzw_s, NULL \
- }; \
- return do_zzw_ool(s, a, fns[a->esz]); \
-}
-
-DO_ZZW(ASR, asr)
-DO_ZZW(LSR, lsr)
-DO_ZZW(LSL, lsl)
-
-#undef DO_ZZW
-
-/*
- *** SVE Integer Multiply-Add Group
- */
-
-static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
- gen_helper_gvec_5 *fn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->ra),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, 0, fn);
- }
- return true;
-}
-
-#define DO_ZPZZZ(NAME, name) \
-static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
-{ \
- static gen_helper_gvec_5 * const fns[4] = { \
- gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
- }; \
- return do_zpzzz_ool(s, a, fns[a->esz]); \
-}
-
-DO_ZPZZZ(MLA, mla)
-DO_ZPZZZ(MLS, mls)
-
-#undef DO_ZPZZZ
-
-/*
- *** SVE Index Generation Group
- */
-
-static void do_index(DisasContext *s, int esz, int rd,
- TCGv_i64 start, TCGv_i64 incr)
-{
- unsigned vsz = vec_full_reg_size(s);
- TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
- TCGv_ptr t_zd = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
- if (esz == 3) {
- gen_helper_sve_index_d(t_zd, start, incr, desc);
- } else {
- typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
- static index_fn * const fns[3] = {
- gen_helper_sve_index_b,
- gen_helper_sve_index_h,
- gen_helper_sve_index_s,
- };
- TCGv_i32 s32 = tcg_temp_new_i32();
- TCGv_i32 i32 = tcg_temp_new_i32();
-
- tcg_gen_extrl_i64_i32(s32, start);
- tcg_gen_extrl_i64_i32(i32, incr);
- fns[esz](t_zd, s32, i32, desc);
-
- tcg_temp_free_i32(s32);
- tcg_temp_free_i32(i32);
- }
- tcg_temp_free_ptr(t_zd);
- tcg_temp_free_i32(desc);
-}
-
-static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 start = tcg_const_i64(a->imm1);
- TCGv_i64 incr = tcg_const_i64(a->imm2);
- do_index(s, a->esz, a->rd, start, incr);
- tcg_temp_free_i64(start);
- tcg_temp_free_i64(incr);
- }
- return true;
-}
-
-static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 start = tcg_const_i64(a->imm);
- TCGv_i64 incr = cpu_reg(s, a->rm);
- do_index(s, a->esz, a->rd, start, incr);
- tcg_temp_free_i64(start);
- }
- return true;
-}
-
-static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 start = cpu_reg(s, a->rn);
- TCGv_i64 incr = tcg_const_i64(a->imm);
- do_index(s, a->esz, a->rd, start, incr);
- tcg_temp_free_i64(incr);
- }
- return true;
-}
-
-static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 start = cpu_reg(s, a->rn);
- TCGv_i64 incr = cpu_reg(s, a->rm);
- do_index(s, a->esz, a->rd, start, incr);
- }
- return true;
-}
-
-/*
- *** SVE Stack Allocation Group
- */
-
-static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
-{
- TCGv_i64 rd = cpu_reg_sp(s, a->rd);
- TCGv_i64 rn = cpu_reg_sp(s, a->rn);
- tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
- return true;
-}
-
-static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
-{
- TCGv_i64 rd = cpu_reg_sp(s, a->rd);
- TCGv_i64 rn = cpu_reg_sp(s, a->rn);
- tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
- return true;
-}
-
-static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
-{
- TCGv_i64 reg = cpu_reg(s, a->rd);
- tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
- return true;
-}
-
-/*
- *** SVE Compute Vector Address Group
- */
-
-static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, a->imm, fn);
- }
- return true;
-}
-
-static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
-{
- return do_adr(s, a, gen_helper_sve_adr_p32);
-}
-
-static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
-{
- return do_adr(s, a, gen_helper_sve_adr_p64);
-}
-
-static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
-{
- return do_adr(s, a, gen_helper_sve_adr_s32);
-}
-
-static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
-{
- return do_adr(s, a, gen_helper_sve_adr_u32);
-}
-
-/*
- *** SVE Integer Misc - Unpredicated Group
- */
-
-static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_2 * const fns[4] = {
- NULL,
- gen_helper_sve_fexpa_h,
- gen_helper_sve_fexpa_s,
- gen_helper_sve_fexpa_d,
- };
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, 0, fns[a->esz]);
- }
- return true;
-}
-
-static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL,
- gen_helper_sve_ftssel_h,
- gen_helper_sve_ftssel_s,
- gen_helper_sve_ftssel_d,
- };
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->esz]);
- }
- return true;
-}
-
-/*
- *** SVE Predicate Logical Operations Group
- */
-
-static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
- const GVecGen4 *gvec_op)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned psz = pred_gvec_reg_size(s);
- int dofs = pred_full_reg_offset(s, a->rd);
- int nofs = pred_full_reg_offset(s, a->rn);
- int mofs = pred_full_reg_offset(s, a->rm);
- int gofs = pred_full_reg_offset(s, a->pg);
-
- if (psz == 8) {
- /* Do the operation and the flags generation in temps. */
- TCGv_i64 pd = tcg_temp_new_i64();
- TCGv_i64 pn = tcg_temp_new_i64();
- TCGv_i64 pm = tcg_temp_new_i64();
- TCGv_i64 pg = tcg_temp_new_i64();
-
- tcg_gen_ld_i64(pn, cpu_env, nofs);
- tcg_gen_ld_i64(pm, cpu_env, mofs);
- tcg_gen_ld_i64(pg, cpu_env, gofs);
-
- gvec_op->fni8(pd, pn, pm, pg);
- tcg_gen_st_i64(pd, cpu_env, dofs);
-
- do_predtest1(pd, pg);
-
- tcg_temp_free_i64(pd);
- tcg_temp_free_i64(pn);
- tcg_temp_free_i64(pm);
- tcg_temp_free_i64(pg);
- } else {
- /* The operation and flags generation is large. The computation
- * of the flags depends on the original contents of the guarding
- * predicate. If the destination overwrites the guarding predicate,
- * then the easiest way to get this right is to save a copy.
- */
- int tofs = gofs;
- if (a->rd == a->pg) {
- tofs = offsetof(CPUARMState, vfp.preg_tmp);
- tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
- }
-
- tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
- do_predtest(s, dofs, tofs, psz / 8);
- }
- return true;
-}
-
-static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_and_i64(pd, pn, pm);
- tcg_gen_and_i64(pd, pd, pg);
-}
-
-static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_and_vec(vece, pd, pn, pm);
- tcg_gen_and_vec(vece, pd, pd, pg);
-}
-
-static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- static const GVecGen4 op = {
- .fni8 = gen_and_pg_i64,
- .fniv = gen_and_pg_vec,
- .fno = gen_helper_sve_and_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->rn == a->rm) {
- if (a->pg == a->rn) {
- return do_mov_p(s, a->rd, a->rn);
- } else {
- return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
- }
- } else if (a->pg == a->rn || a->pg == a->rm) {
- return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_andc_i64(pd, pn, pm);
- tcg_gen_and_i64(pd, pd, pg);
-}
-
-static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_andc_vec(vece, pd, pn, pm);
- tcg_gen_and_vec(vece, pd, pd, pg);
-}
-
-static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- static const GVecGen4 op = {
- .fni8 = gen_bic_pg_i64,
- .fniv = gen_bic_pg_vec,
- .fno = gen_helper_sve_bic_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->pg == a->rn) {
- return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_xor_i64(pd, pn, pm);
- tcg_gen_and_i64(pd, pd, pg);
-}
-
-static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_xor_vec(vece, pd, pn, pm);
- tcg_gen_and_vec(vece, pd, pd, pg);
-}
-
-static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- static const GVecGen4 op = {
- .fni8 = gen_eor_pg_i64,
- .fniv = gen_eor_pg_vec,
- .fno = gen_helper_sve_eor_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_and_i64(pn, pn, pg);
- tcg_gen_andc_i64(pm, pm, pg);
- tcg_gen_or_i64(pd, pn, pm);
-}
-
-static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_and_vec(vece, pn, pn, pg);
- tcg_gen_andc_vec(vece, pm, pm, pg);
- tcg_gen_or_vec(vece, pd, pn, pm);
-}
-
-static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- static const GVecGen4 op = {
- .fni8 = gen_sel_pg_i64,
- .fniv = gen_sel_pg_vec,
- .fno = gen_helper_sve_sel_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- if (a->s) {
- return false;
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_or_i64(pd, pn, pm);
- tcg_gen_and_i64(pd, pd, pg);
-}
-
-static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_or_vec(vece, pd, pn, pm);
- tcg_gen_and_vec(vece, pd, pd, pg);
-}
-
-static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- static const GVecGen4 op = {
- .fni8 = gen_orr_pg_i64,
- .fniv = gen_orr_pg_vec,
- .fno = gen_helper_sve_orr_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else if (a->pg == a->rn && a->rn == a->rm) {
- return do_mov_p(s, a->rd, a->rn);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_orc_i64(pd, pn, pm);
- tcg_gen_and_i64(pd, pd, pg);
-}
-
-static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_orc_vec(vece, pd, pn, pm);
- tcg_gen_and_vec(vece, pd, pd, pg);
-}
-
-static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- static const GVecGen4 op = {
- .fni8 = gen_orn_pg_i64,
- .fniv = gen_orn_pg_vec,
- .fno = gen_helper_sve_orn_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_or_i64(pd, pn, pm);
- tcg_gen_andc_i64(pd, pg, pd);
-}
-
-static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_or_vec(vece, pd, pn, pm);
- tcg_gen_andc_vec(vece, pd, pg, pd);
-}
-
-static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- static const GVecGen4 op = {
- .fni8 = gen_nor_pg_i64,
- .fniv = gen_nor_pg_vec,
- .fno = gen_helper_sve_nor_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
-{
- tcg_gen_and_i64(pd, pn, pm);
- tcg_gen_andc_i64(pd, pg, pd);
-}
-
-static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
- TCGv_vec pm, TCGv_vec pg)
-{
- tcg_gen_and_vec(vece, pd, pn, pm);
- tcg_gen_andc_vec(vece, pd, pg, pd);
-}
-
-static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- static const GVecGen4 op = {
- .fni8 = gen_nand_pg_i64,
- .fniv = gen_nand_pg_vec,
- .fno = gen_helper_sve_nand_pppp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- };
- if (a->s) {
- return do_pppp_flags(s, a, &op);
- } else {
- return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
- }
-}
-
-/*
- *** SVE Predicate Misc Group
- */
-
-static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- int nofs = pred_full_reg_offset(s, a->rn);
- int gofs = pred_full_reg_offset(s, a->pg);
- int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
-
- if (words == 1) {
- TCGv_i64 pn = tcg_temp_new_i64();
- TCGv_i64 pg = tcg_temp_new_i64();
-
- tcg_gen_ld_i64(pn, cpu_env, nofs);
- tcg_gen_ld_i64(pg, cpu_env, gofs);
- do_predtest1(pn, pg);
-
- tcg_temp_free_i64(pn);
- tcg_temp_free_i64(pg);
- } else {
- do_predtest(s, nofs, gofs, words);
- }
- }
- return true;
-}
-
-/* See the ARM pseudocode DecodePredCount. */
-static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
-{
- unsigned elements = fullsz >> esz;
- unsigned bound;
-
- switch (pattern) {
- case 0x0: /* POW2 */
- return pow2floor(elements);
- case 0x1: /* VL1 */
- case 0x2: /* VL2 */
- case 0x3: /* VL3 */
- case 0x4: /* VL4 */
- case 0x5: /* VL5 */
- case 0x6: /* VL6 */
- case 0x7: /* VL7 */
- case 0x8: /* VL8 */
- bound = pattern;
- break;
- case 0x9: /* VL16 */
- case 0xa: /* VL32 */
- case 0xb: /* VL64 */
- case 0xc: /* VL128 */
- case 0xd: /* VL256 */
- bound = 16 << (pattern - 9);
- break;
- case 0x1d: /* MUL4 */
- return elements - elements % 4;
- case 0x1e: /* MUL3 */
- return elements - elements % 3;
- case 0x1f: /* ALL */
- return elements;
- default: /* #uimm5 */
- return 0;
- }
- return elements >= bound ? bound : 0;
-}
-
-/* This handles all of the predicate initialization instructions,
- * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
- * so that decode_pred_count returns 0. For SETFFR, we will have
- * set RD == 16 == FFR.
- */
-static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned fullsz = vec_full_reg_size(s);
- unsigned ofs = pred_full_reg_offset(s, rd);
- unsigned numelem, setsz, i;
- uint64_t word, lastword;
- TCGv_i64 t;
-
- numelem = decode_pred_count(fullsz, pat, esz);
-
- /* Determine what we must store into each bit, and how many. */
- if (numelem == 0) {
- lastword = word = 0;
- setsz = fullsz;
- } else {
- setsz = numelem << esz;
- lastword = word = pred_esz_masks[esz];
- if (setsz % 64) {
- lastword &= MAKE_64BIT_MASK(0, setsz % 64);
- }
- }
-
- t = tcg_temp_new_i64();
- if (fullsz <= 64) {
- tcg_gen_movi_i64(t, lastword);
- tcg_gen_st_i64(t, cpu_env, ofs);
- goto done;
- }
-
- if (word == lastword) {
- unsigned maxsz = size_for_gvec(fullsz / 8);
- unsigned oprsz = size_for_gvec(setsz / 8);
-
- if (oprsz * 8 == setsz) {
- tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
- goto done;
- }
- }
-
- setsz /= 8;
- fullsz /= 8;
-
- tcg_gen_movi_i64(t, word);
- for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
- tcg_gen_st_i64(t, cpu_env, ofs + i);
- }
- if (lastword != word) {
- tcg_gen_movi_i64(t, lastword);
- tcg_gen_st_i64(t, cpu_env, ofs + i);
- i += 8;
- }
- if (i < fullsz) {
- tcg_gen_movi_i64(t, 0);
- for (; i < fullsz; i += 8) {
- tcg_gen_st_i64(t, cpu_env, ofs + i);
- }
- }
-
- done:
- tcg_temp_free_i64(t);
-
- /* PTRUES */
- if (setflag) {
- tcg_gen_movi_i32(cpu_NF, -(word != 0));
- tcg_gen_movi_i32(cpu_CF, word == 0);
- tcg_gen_movi_i32(cpu_VF, 0);
- tcg_gen_mov_i32(cpu_ZF, cpu_NF);
- }
- return true;
-}
-
-static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
-{
- return do_predset(s, a->esz, a->rd, a->pat, a->s);
-}
-
-static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
-{
- /* Note pat == 31 is #all, to set all elements. */
- return do_predset(s, 0, FFR_PRED_NUM, 31, false);
-}
-
-static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
-{
- /* Note pat == 32 is #unimp, to set no elements. */
- return do_predset(s, 0, a->rd, 32, false);
-}
-
-static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
-{
- /* The path through do_pppp_flags is complicated enough to want to avoid
- * duplication. Frob the arguments into the form of a predicated AND.
- */
- arg_rprr_s alt_a = {
- .rd = a->rd, .pg = a->pg, .s = a->s,
- .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
- };
- return trans_AND_pppp(s, &alt_a, insn);
-}
-
-static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
-{
- return do_mov_p(s, a->rd, FFR_PRED_NUM);
-}
-
-static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
-{
- return do_mov_p(s, FFR_PRED_NUM, a->rn);
-}
-
-static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
- void (*gen_fn)(TCGv_i32, TCGv_ptr,
- TCGv_ptr, TCGv_i32))
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- TCGv_ptr t_pd = tcg_temp_new_ptr();
- TCGv_ptr t_pg = tcg_temp_new_ptr();
- TCGv_i32 t;
- unsigned desc;
-
- desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
- desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
-
- tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
- t = tcg_const_i32(desc);
-
- gen_fn(t, t_pd, t_pg, t);
- tcg_temp_free_ptr(t_pd);
- tcg_temp_free_ptr(t_pg);
-
- do_pred_flags(t);
- tcg_temp_free_i32(t);
- return true;
-}
-
-static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
-{
- return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
-}
-
-static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
-{
- return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
-}
-
-/*
- *** SVE Element Count Group
- */
-
-/* Perform an inline saturating addition of a 32-bit value within
- * a 64-bit register. The second operand is known to be positive,
- * which halves the comparisions we must perform to bound the result.
- */
-static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
-{
- int64_t ibound;
- TCGv_i64 bound;
- TCGCond cond;
-
- /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
- if (u) {
- tcg_gen_ext32u_i64(reg, reg);
- } else {
- tcg_gen_ext32s_i64(reg, reg);
- }
- if (d) {
- tcg_gen_sub_i64(reg, reg, val);
- ibound = (u ? 0 : INT32_MIN);
- cond = TCG_COND_LT;
- } else {
- tcg_gen_add_i64(reg, reg, val);
- ibound = (u ? UINT32_MAX : INT32_MAX);
- cond = TCG_COND_GT;
- }
- bound = tcg_const_i64(ibound);
- tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
- tcg_temp_free_i64(bound);
-}
-
-/* Similarly with 64-bit values. */
-static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
-{
- TCGv_i64 t0 = tcg_temp_new_i64();
- TCGv_i64 t1 = tcg_temp_new_i64();
- TCGv_i64 t2;
-
- if (u) {
- if (d) {
- tcg_gen_sub_i64(t0, reg, val);
- tcg_gen_movi_i64(t1, 0);
- tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
- } else {
- tcg_gen_add_i64(t0, reg, val);
- tcg_gen_movi_i64(t1, -1);
- tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
- }
- } else {
- if (d) {
- /* Detect signed overflow for subtraction. */
- tcg_gen_xor_i64(t0, reg, val);
- tcg_gen_sub_i64(t1, reg, val);
- tcg_gen_xor_i64(reg, reg, t1);
- tcg_gen_and_i64(t0, t0, reg);
-
- /* Bound the result. */
- tcg_gen_movi_i64(reg, INT64_MIN);
- t2 = tcg_const_i64(0);
- tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
- } else {
- /* Detect signed overflow for addition. */
- tcg_gen_xor_i64(t0, reg, val);
- tcg_gen_add_i64(reg, reg, val);
- tcg_gen_xor_i64(t1, reg, val);
- tcg_gen_andc_i64(t0, t1, t0);
-
- /* Bound the result. */
- tcg_gen_movi_i64(t1, INT64_MAX);
- t2 = tcg_const_i64(0);
- tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
- }
- tcg_temp_free_i64(t2);
- }
- tcg_temp_free_i64(t0);
- tcg_temp_free_i64(t1);
-}
-
-/* Similarly with a vector and a scalar operand. */
-static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
- TCGv_i64 val, bool u, bool d)
-{
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr dptr, nptr;
- TCGv_i32 t32, desc;
- TCGv_i64 t64;
-
- dptr = tcg_temp_new_ptr();
- nptr = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
- tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
- desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
-
- switch (esz) {
- case MO_8:
- t32 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(t32, val);
- if (d) {
- tcg_gen_neg_i32(t32, t32);
- }
- if (u) {
- gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
- } else {
- gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
- }
- tcg_temp_free_i32(t32);
- break;
-
- case MO_16:
- t32 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(t32, val);
- if (d) {
- tcg_gen_neg_i32(t32, t32);
- }
- if (u) {
- gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
- } else {
- gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
- }
- tcg_temp_free_i32(t32);
- break;
-
- case MO_32:
- t64 = tcg_temp_new_i64();
- if (d) {
- tcg_gen_neg_i64(t64, val);
- } else {
- tcg_gen_mov_i64(t64, val);
- }
- if (u) {
- gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
- } else {
- gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
- }
- tcg_temp_free_i64(t64);
- break;
-
- case MO_64:
- if (u) {
- if (d) {
- gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
- } else {
- gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
- }
- } else if (d) {
- t64 = tcg_temp_new_i64();
- tcg_gen_neg_i64(t64, val);
- gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
- tcg_temp_free_i64(t64);
- } else {
- gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
- }
- break;
-
- default:
- g_assert_not_reached();
- }
-
- tcg_temp_free_ptr(dptr);
- tcg_temp_free_ptr(nptr);
- tcg_temp_free_i32(desc);
-}
-
-static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- unsigned fullsz = vec_full_reg_size(s);
- unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
- tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
- }
- return true;
-}
-
-static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- unsigned fullsz = vec_full_reg_size(s);
- unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
- int inc = numelem * a->imm * (a->d ? -1 : 1);
- TCGv_i64 reg = cpu_reg(s, a->rd);
-
- tcg_gen_addi_i64(reg, reg, inc);
- }
- return true;
-}
-
-static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
- uint32_t insn)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned fullsz = vec_full_reg_size(s);
- unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
- int inc = numelem * a->imm;
- TCGv_i64 reg = cpu_reg(s, a->rd);
-
- /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
- if (inc == 0) {
- if (a->u) {
- tcg_gen_ext32u_i64(reg, reg);
- } else {
- tcg_gen_ext32s_i64(reg, reg);
- }
- } else {
- TCGv_i64 t = tcg_const_i64(inc);
- do_sat_addsub_32(reg, t, a->u, a->d);
- tcg_temp_free_i64(t);
- }
- return true;
-}
-
-static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
- uint32_t insn)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned fullsz = vec_full_reg_size(s);
- unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
- int inc = numelem * a->imm;
- TCGv_i64 reg = cpu_reg(s, a->rd);
-
- if (inc != 0) {
- TCGv_i64 t = tcg_const_i64(inc);
- do_sat_addsub_64(reg, t, a->u, a->d);
- tcg_temp_free_i64(t);
- }
- return true;
-}
-
-static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
-{
- if (a->esz == 0) {
- return false;
- }
-
- unsigned fullsz = vec_full_reg_size(s);
- unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
- int inc = numelem * a->imm;
-
- if (inc != 0) {
- if (sve_access_check(s)) {
- TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
- tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- t, fullsz, fullsz);
- tcg_temp_free_i64(t);
- }
- } else {
- do_mov_z(s, a->rd, a->rn);
- }
- return true;
-}
-
-static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
- uint32_t insn)
-{
- if (a->esz == 0) {
- return false;
- }
-
- unsigned fullsz = vec_full_reg_size(s);
- unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
- int inc = numelem * a->imm;
-
- if (inc != 0) {
- if (sve_access_check(s)) {
- TCGv_i64 t = tcg_const_i64(inc);
- do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
- tcg_temp_free_i64(t);
- }
- } else {
- do_mov_z(s, a->rd, a->rn);
- }
- return true;
-}
-
-/*
- *** SVE Bitwise Immediate Group
- */
-
-static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
-{
- uint64_t imm;
- if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
- extract32(a->dbm, 0, 6),
- extract32(a->dbm, 6, 6))) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
- }
- return true;
-}
-
-static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
-{
- return do_zz_dbm(s, a, tcg_gen_gvec_andi);
-}
-
-static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
-{
- return do_zz_dbm(s, a, tcg_gen_gvec_ori);
-}
-
-static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
-{
- return do_zz_dbm(s, a, tcg_gen_gvec_xori);
-}
-
-static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
-{
- uint64_t imm;
- if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
- extract32(a->dbm, 0, 6),
- extract32(a->dbm, 6, 6))) {
- return false;
- }
- if (sve_access_check(s)) {
- do_dupi_z(s, a->rd, imm);
- }
- return true;
-}
-
-/*
- *** SVE Integer Wide Immediate - Predicated Group
- */
-
-/* Implement all merging copies. This is used for CPY (immediate),
- * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
- */
-static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
- TCGv_i64 val)
-{
- typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
- static gen_cpy * const fns[4] = {
- gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
- gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
- };
- unsigned vsz = vec_full_reg_size(s);
- TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
- TCGv_ptr t_zd = tcg_temp_new_ptr();
- TCGv_ptr t_zn = tcg_temp_new_ptr();
- TCGv_ptr t_pg = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
- tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
-
- fns[esz](t_zd, t_zn, t_pg, val, desc);
-
- tcg_temp_free_ptr(t_zd);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(desc);
-}
-
-static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
-{
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- /* Decode the VFP immediate. */
- uint64_t imm = vfp_expand_imm(a->esz, a->imm);
- TCGv_i64 t_imm = tcg_const_i64(imm);
- do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
- tcg_temp_free_i64(t_imm);
- }
- return true;
-}
-
-static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
-{
- if (a->esz == 0 && extract32(insn, 13, 1)) {
- return false;
- }
- if (sve_access_check(s)) {
- TCGv_i64 t_imm = tcg_const_i64(a->imm);
- do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
- tcg_temp_free_i64(t_imm);
- }
- return true;
-}
-
-static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
-{
- static gen_helper_gvec_2i * const fns[4] = {
- gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
- gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
- };
-
- if (a->esz == 0 && extract32(insn, 13, 1)) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_i64 t_imm = tcg_const_i64(a->imm);
- tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
- pred_full_reg_offset(s, a->pg),
- t_imm, vsz, vsz, 0, fns[a->esz]);
- tcg_temp_free_i64(t_imm);
- }
- return true;
-}
-
-/*
- *** SVE Permute Extract Group
- */
-
-static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned vsz = vec_full_reg_size(s);
- unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
- unsigned n_siz = vsz - n_ofs;
- unsigned d = vec_full_reg_offset(s, a->rd);
- unsigned n = vec_full_reg_offset(s, a->rn);
- unsigned m = vec_full_reg_offset(s, a->rm);
-
- /* Use host vector move insns if we have appropriate sizes
- * and no unfortunate overlap.
- */
- if (m != d
- && n_ofs == size_for_gvec(n_ofs)
- && n_siz == size_for_gvec(n_siz)
- && (d != n || n_siz <= n_ofs)) {
- tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
- if (n_ofs != 0) {
- tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
- }
- } else {
- tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
- }
- return true;
-}
-
-/*
- *** SVE Permute - Unpredicated Group
- */
-
-static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
- vsz, vsz, cpu_reg_sp(s, a->rn));
- }
- return true;
-}
-
-static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
-{
- if ((a->imm & 0x1f) == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- unsigned dofs = vec_full_reg_offset(s, a->rd);
- unsigned esz, index;
-
- esz = ctz32(a->imm);
- index = a->imm >> (esz + 1);
-
- if ((index << esz) < vsz) {
- unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
- tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
- } else {
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
- }
- }
- return true;
-}
-
-static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
-{
- typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
- static gen_insr * const fns[4] = {
- gen_helper_sve_insr_b, gen_helper_sve_insr_h,
- gen_helper_sve_insr_s, gen_helper_sve_insr_d,
- };
- unsigned vsz = vec_full_reg_size(s);
- TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
- TCGv_ptr t_zd = tcg_temp_new_ptr();
- TCGv_ptr t_zn = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
- tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
-
- fns[a->esz](t_zd, t_zn, val, desc);
-
- tcg_temp_free_ptr(t_zd);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_i32(desc);
-}
-
-static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 t = tcg_temp_new_i64();
- tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
- do_insr_i64(s, a, t);
- tcg_temp_free_i64(t);
- }
- return true;
-}
-
-static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- do_insr_i64(s, a, cpu_reg(s, a->rm));
- }
- return true;
-}
-
-static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_2 * const fns[4] = {
- gen_helper_sve_rev_b, gen_helper_sve_rev_h,
- gen_helper_sve_rev_s, gen_helper_sve_rev_d
- };
-
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, 0, fns[a->esz]);
- }
- return true;
-}
-
-static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
- gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
- };
-
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->esz]);
- }
- return true;
-}
-
-static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
-{
- static gen_helper_gvec_2 * const fns[4][2] = {
- { NULL, NULL },
- { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
- { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
- { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
- };
-
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn)
- + (a->h ? vsz / 2 : 0),
- vsz, vsz, 0, fns[a->esz][a->u]);
- }
- return true;
-}
-
-/*
- *** SVE Permute - Predicates Group
- */
-
-static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
- gen_helper_gvec_3 *fn)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned vsz = pred_full_reg_size(s);
-
- /* Predicate sizes may be smaller and cannot use simd_desc.
- We cannot round up, as we do elsewhere, because we need
- the exact size for ZIP2 and REV. We retain the style for
- the other helpers for consistency. */
- TCGv_ptr t_d = tcg_temp_new_ptr();
- TCGv_ptr t_n = tcg_temp_new_ptr();
- TCGv_ptr t_m = tcg_temp_new_ptr();
- TCGv_i32 t_desc;
- int desc;
-
- desc = vsz - 2;
- desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
- desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
-
- tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
- tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
- tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
- t_desc = tcg_const_i32(desc);
-
- fn(t_d, t_n, t_m, t_desc);
-
- tcg_temp_free_ptr(t_d);
- tcg_temp_free_ptr(t_n);
- tcg_temp_free_ptr(t_m);
- tcg_temp_free_i32(t_desc);
- return true;
-}
-
-static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
- gen_helper_gvec_2 *fn)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned vsz = pred_full_reg_size(s);
- TCGv_ptr t_d = tcg_temp_new_ptr();
- TCGv_ptr t_n = tcg_temp_new_ptr();
- TCGv_i32 t_desc;
- int desc;
-
- tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
- tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
-
- /* Predicate sizes may be smaller and cannot use simd_desc.
- We cannot round up, as we do elsewhere, because we need
- the exact size for ZIP2 and REV. We retain the style for
- the other helpers for consistency. */
-
- desc = vsz - 2;
- desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
- desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
- t_desc = tcg_const_i32(desc);
-
- fn(t_d, t_n, t_desc);
-
- tcg_temp_free_i32(t_desc);
- tcg_temp_free_ptr(t_d);
- tcg_temp_free_ptr(t_n);
- return true;
-}
-
-static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
-}
-
-static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
-}
-
-static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
-}
-
-static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
-}
-
-static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
-}
-
-static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
-}
-
-static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
-{
- return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
-}
-
-static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
-{
- return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
-}
-
-static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
-{
- return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
-}
-
-/*
- *** SVE Permute - Interleaving Group
- */
-
-static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_zip_b, gen_helper_sve_zip_h,
- gen_helper_sve_zip_s, gen_helper_sve_zip_d,
- };
-
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- unsigned high_ofs = high ? vsz / 2 : 0;
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn) + high_ofs,
- vec_full_reg_offset(s, a->rm) + high_ofs,
- vsz, vsz, 0, fns[a->esz]);
- }
- return true;
-}
-
-static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
- gen_helper_gvec_3 *fn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, data, fn);
- }
- return true;
-}
-
-static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_zip(s, a, false);
-}
-
-static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_zip(s, a, true);
-}
-
-static gen_helper_gvec_3 * const uzp_fns[4] = {
- gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
- gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
-};
-
-static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
-}
-
-static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
-}
-
-static gen_helper_gvec_3 * const trn_fns[4] = {
- gen_helper_sve_trn_b, gen_helper_sve_trn_h,
- gen_helper_sve_trn_s, gen_helper_sve_trn_d,
-};
-
-static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
-}
-
-static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
-{
- return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
-}
-
-/*
- *** SVE Permute Vector - Predicated Group
- */
-
-static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-/* Call the helper that computes the ARM LastActiveElement pseudocode
- * function, scaled by the element size. This includes the not found
- * indication; e.g. not found for esz=3 is -8.
- */
-static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
-{
- /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
- * round up, as we do elsewhere, because we need the exact size.
- */
- TCGv_ptr t_p = tcg_temp_new_ptr();
- TCGv_i32 t_desc;
- unsigned vsz = pred_full_reg_size(s);
- unsigned desc;
-
- desc = vsz - 2;
- desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
-
- tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
- t_desc = tcg_const_i32(desc);
-
- gen_helper_sve_last_active_element(ret, t_p, t_desc);
-
- tcg_temp_free_i32(t_desc);
- tcg_temp_free_ptr(t_p);
-}
-
-/* Increment LAST to the offset of the next element in the vector,
- * wrapping around to 0.
- */
-static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
-{
- unsigned vsz = vec_full_reg_size(s);
-
- tcg_gen_addi_i32(last, last, 1 << esz);
- if (is_power_of_2(vsz)) {
- tcg_gen_andi_i32(last, last, vsz - 1);
- } else {
- TCGv_i32 max = tcg_const_i32(vsz);
- TCGv_i32 zero = tcg_const_i32(0);
- tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
- tcg_temp_free_i32(max);
- tcg_temp_free_i32(zero);
- }
-}
-
-/* If LAST < 0, set LAST to the offset of the last element in the vector. */
-static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
-{
- unsigned vsz = vec_full_reg_size(s);
-
- if (is_power_of_2(vsz)) {
- tcg_gen_andi_i32(last, last, vsz - 1);
- } else {
- TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
- TCGv_i32 zero = tcg_const_i32(0);
- tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
- tcg_temp_free_i32(max);
- tcg_temp_free_i32(zero);
- }
-}
-
-/* Load an unsigned element of ESZ from BASE+OFS. */
-static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
-{
- TCGv_i64 r = tcg_temp_new_i64();
-
- switch (esz) {
- case 0:
- tcg_gen_ld8u_i64(r, base, ofs);
- break;
- case 1:
- tcg_gen_ld16u_i64(r, base, ofs);
- break;
- case 2:
- tcg_gen_ld32u_i64(r, base, ofs);
- break;
- case 3:
- tcg_gen_ld_i64(r, base, ofs);
- break;
- default:
- g_assert_not_reached();
- }
- return r;
-}
-
-/* Load an unsigned element of ESZ from RM[LAST]. */
-static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
- int rm, int esz)
-{
- TCGv_ptr p = tcg_temp_new_ptr();
- TCGv_i64 r;
-
- /* Convert offset into vector into offset into ENV.
- * The final adjustment for the vector register base
- * is added via constant offset to the load.
- */
-#ifdef HOST_WORDS_BIGENDIAN
- /* Adjust for element ordering. See vec_reg_offset. */
- if (esz < 3) {
- tcg_gen_xori_i32(last, last, 8 - (1 << esz));
- }
-#endif
- tcg_gen_ext_i32_ptr(p, last);
- tcg_gen_add_ptr(p, p, cpu_env);
-
- r = load_esz(p, vec_full_reg_offset(s, rm), esz);
- tcg_temp_free_ptr(p);
-
- return r;
-}
-
-/* Compute CLAST for a Zreg. */
-static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
-{
- TCGv_i32 last;
- TCGLabel *over;
- TCGv_i64 ele;
- unsigned vsz, esz = a->esz;
-
- if (!sve_access_check(s)) {
- return true;
- }
-
- last = tcg_temp_local_new_i32();
- over = gen_new_label();
-
- find_last_active(s, last, esz, a->pg);
-
- /* There is of course no movcond for a 2048-bit vector,
- * so we must branch over the actual store.
- */
- tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
-
- if (!before) {
- incr_last_active(s, last, esz);
- }
-
- ele = load_last_active(s, last, a->rm, esz);
- tcg_temp_free_i32(last);
-
- vsz = vec_full_reg_size(s);
- tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
- tcg_temp_free_i64(ele);
-
- /* If this insn used MOVPRFX, we may need a second move. */
- if (a->rd != a->rn) {
- TCGLabel *done = gen_new_label();
- tcg_gen_br(done);
-
- gen_set_label(over);
- do_mov_z(s, a->rd, a->rn);
-
- gen_set_label(done);
- } else {
- gen_set_label(over);
- }
- return true;
-}
-
-static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
-{
- return do_clast_vector(s, a, false);
-}
-
-static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
-{
- return do_clast_vector(s, a, true);
-}
-
-/* Compute CLAST for a scalar. */
-static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
- bool before, TCGv_i64 reg_val)
-{
- TCGv_i32 last = tcg_temp_new_i32();
- TCGv_i64 ele, cmp, zero;
-
- find_last_active(s, last, esz, pg);
-
- /* Extend the original value of last prior to incrementing. */
- cmp = tcg_temp_new_i64();
- tcg_gen_ext_i32_i64(cmp, last);
-
- if (!before) {
- incr_last_active(s, last, esz);
- }
-
- /* The conceit here is that while last < 0 indicates not found, after
- * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
- * from which we can load garbage. We then discard the garbage with
- * a conditional move.
- */
- ele = load_last_active(s, last, rm, esz);
- tcg_temp_free_i32(last);
-
- zero = tcg_const_i64(0);
- tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
-
- tcg_temp_free_i64(zero);
- tcg_temp_free_i64(cmp);
- tcg_temp_free_i64(ele);
-}
-
-/* Compute CLAST for a Vreg. */
-static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
-{
- if (sve_access_check(s)) {
- int esz = a->esz;
- int ofs = vec_reg_offset(s, a->rd, 0, esz);
- TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
-
- do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
- write_fp_dreg(s, a->rd, reg);
- tcg_temp_free_i64(reg);
- }
- return true;
-}
-
-static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_clast_fp(s, a, false);
-}
-
-static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_clast_fp(s, a, true);
-}
-
-/* Compute CLAST for a Xreg. */
-static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
-{
- TCGv_i64 reg;
-
- if (!sve_access_check(s)) {
- return true;
- }
-
- reg = cpu_reg(s, a->rd);
- switch (a->esz) {
- case 0:
- tcg_gen_ext8u_i64(reg, reg);
- break;
- case 1:
- tcg_gen_ext16u_i64(reg, reg);
- break;
- case 2:
- tcg_gen_ext32u_i64(reg, reg);
- break;
- case 3:
- break;
- default:
- g_assert_not_reached();
- }
-
- do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
- return true;
-}
-
-static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_clast_general(s, a, false);
-}
-
-static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_clast_general(s, a, true);
-}
-
-/* Compute LAST for a scalar. */
-static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
- int pg, int rm, bool before)
-{
- TCGv_i32 last = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- find_last_active(s, last, esz, pg);
- if (before) {
- wrap_last_active(s, last, esz);
- } else {
- incr_last_active(s, last, esz);
- }
-
- ret = load_last_active(s, last, rm, esz);
- tcg_temp_free_i32(last);
- return ret;
-}
-
-/* Compute LAST for a Vreg. */
-static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
-{
- if (sve_access_check(s)) {
- TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
- write_fp_dreg(s, a->rd, val);
- tcg_temp_free_i64(val);
- }
- return true;
-}
-
-static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_last_fp(s, a, false);
-}
-
-static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_last_fp(s, a, true);
-}
-
-/* Compute LAST for a Xreg. */
-static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
-{
- if (sve_access_check(s)) {
- TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
- tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
- tcg_temp_free_i64(val);
- }
- return true;
-}
-
-static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_last_general(s, a, false);
-}
-
-static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_last_general(s, a, true);
-}
-
-static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
- }
- return true;
-}
-
-static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
- TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
- do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
- tcg_temp_free_i64(t);
- }
- return true;
-}
-
-static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL,
- gen_helper_sve_revb_h,
- gen_helper_sve_revb_s,
- gen_helper_sve_revb_d,
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- NULL,
- NULL,
- gen_helper_sve_revh_s,
- gen_helper_sve_revh_d,
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
-}
-
-static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[4] = {
- gen_helper_sve_rbit_b,
- gen_helper_sve_rbit_h,
- gen_helper_sve_rbit_s,
- gen_helper_sve_rbit_d,
- };
- return do_zpz_ool(s, a, fns[a->esz]);
-}
-
-static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- vsz, vsz, a->esz, gen_helper_sve_splice);
- }
- return true;
-}
-
-/*
- *** SVE Integer Compare - Vectors Group
- */
-
-static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
- gen_helper_gvec_flags_4 *gen_fn)
-{
- TCGv_ptr pd, zn, zm, pg;
- unsigned vsz;
- TCGv_i32 t;
-
- if (gen_fn == NULL) {
- return false;
- }
- if (!sve_access_check(s)) {
- return true;
- }
-
- vsz = vec_full_reg_size(s);
- t = tcg_const_i32(simd_desc(vsz, vsz, 0));
- pd = tcg_temp_new_ptr();
- zn = tcg_temp_new_ptr();
- zm = tcg_temp_new_ptr();
- pg = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
- tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
- tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
- tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
-
- gen_fn(t, pd, zn, zm, pg, t);
-
- tcg_temp_free_ptr(pd);
- tcg_temp_free_ptr(zn);
- tcg_temp_free_ptr(zm);
- tcg_temp_free_ptr(pg);
-
- do_pred_flags(t);
-
- tcg_temp_free_i32(t);
- return true;
-}
-
-#define DO_PPZZ(NAME, name) \
-static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_gvec_flags_4 * const fns[4] = { \
- gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
- gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
- }; \
- return do_ppzz_flags(s, a, fns[a->esz]); \
-}
-
-DO_PPZZ(CMPEQ, cmpeq)
-DO_PPZZ(CMPNE, cmpne)
-DO_PPZZ(CMPGT, cmpgt)
-DO_PPZZ(CMPGE, cmpge)
-DO_PPZZ(CMPHI, cmphi)
-DO_PPZZ(CMPHS, cmphs)
-
-#undef DO_PPZZ
-
-#define DO_PPZW(NAME, name) \
-static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_gvec_flags_4 * const fns[4] = { \
- gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
- gen_helper_sve_##name##_ppzw_s, NULL \
- }; \
- return do_ppzz_flags(s, a, fns[a->esz]); \
-}
-
-DO_PPZW(CMPEQ, cmpeq)
-DO_PPZW(CMPNE, cmpne)
-DO_PPZW(CMPGT, cmpgt)
-DO_PPZW(CMPGE, cmpge)
-DO_PPZW(CMPHI, cmphi)
-DO_PPZW(CMPHS, cmphs)
-DO_PPZW(CMPLT, cmplt)
-DO_PPZW(CMPLE, cmple)
-DO_PPZW(CMPLO, cmplo)
-DO_PPZW(CMPLS, cmpls)
-
-#undef DO_PPZW
-
-/*
- *** SVE Integer Compare - Immediate Groups
- */
-
-static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
- gen_helper_gvec_flags_3 *gen_fn)
-{
- TCGv_ptr pd, zn, pg;
- unsigned vsz;
- TCGv_i32 t;
-
- if (gen_fn == NULL) {
- return false;
- }
- if (!sve_access_check(s)) {
- return true;
- }
-
- vsz = vec_full_reg_size(s);
- t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
- pd = tcg_temp_new_ptr();
- zn = tcg_temp_new_ptr();
- pg = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
- tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
- tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
-
- gen_fn(t, pd, zn, pg, t);
-
- tcg_temp_free_ptr(pd);
- tcg_temp_free_ptr(zn);
- tcg_temp_free_ptr(pg);
-
- do_pred_flags(t);
-
- tcg_temp_free_i32(t);
- return true;
-}
-
-#define DO_PPZI(NAME, name) \
-static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_gvec_flags_3 * const fns[4] = { \
- gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
- gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
- }; \
- return do_ppzi_flags(s, a, fns[a->esz]); \
-}
-
-DO_PPZI(CMPEQ, cmpeq)
-DO_PPZI(CMPNE, cmpne)
-DO_PPZI(CMPGT, cmpgt)
-DO_PPZI(CMPGE, cmpge)
-DO_PPZI(CMPHI, cmphi)
-DO_PPZI(CMPHS, cmphs)
-DO_PPZI(CMPLT, cmplt)
-DO_PPZI(CMPLE, cmple)
-DO_PPZI(CMPLO, cmplo)
-DO_PPZI(CMPLS, cmpls)
-
-#undef DO_PPZI
-
-/*
- *** SVE Partition Break Group
- */
-
-static bool do_brk3(DisasContext *s, arg_rprr_s *a,
- gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned vsz = pred_full_reg_size(s);
-
- /* Predicate sizes may be smaller and cannot use simd_desc. */
- TCGv_ptr d = tcg_temp_new_ptr();
- TCGv_ptr n = tcg_temp_new_ptr();
- TCGv_ptr m = tcg_temp_new_ptr();
- TCGv_ptr g = tcg_temp_new_ptr();
- TCGv_i32 t = tcg_const_i32(vsz - 2);
-
- tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
- tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
- tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
- tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
-
- if (a->s) {
- fn_s(t, d, n, m, g, t);
- do_pred_flags(t);
- } else {
- fn(d, n, m, g, t);
- }
- tcg_temp_free_ptr(d);
- tcg_temp_free_ptr(n);
- tcg_temp_free_ptr(m);
- tcg_temp_free_ptr(g);
- tcg_temp_free_i32(t);
- return true;
-}
-
-static bool do_brk2(DisasContext *s, arg_rpr_s *a,
- gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned vsz = pred_full_reg_size(s);
-
- /* Predicate sizes may be smaller and cannot use simd_desc. */
- TCGv_ptr d = tcg_temp_new_ptr();
- TCGv_ptr n = tcg_temp_new_ptr();
- TCGv_ptr g = tcg_temp_new_ptr();
- TCGv_i32 t = tcg_const_i32(vsz - 2);
-
- tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
- tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
- tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
-
- if (a->s) {
- fn_s(t, d, n, g, t);
- do_pred_flags(t);
- } else {
- fn(d, n, g, t);
- }
- tcg_temp_free_ptr(d);
- tcg_temp_free_ptr(n);
- tcg_temp_free_ptr(g);
- tcg_temp_free_i32(t);
- return true;
-}
-
-static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
-}
-
-static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
-{
- return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
-}
-
-static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
-{
- return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
-}
-
-static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
-{
- return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
-}
-
-static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
-{
- return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
-}
-
-static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
-{
- return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
-}
-
-static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
-{
- return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
-}
-
-/*
- *** SVE Predicate Count Group
- */
-
-static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
-{
- unsigned psz = pred_full_reg_size(s);
-
- if (psz <= 8) {
- uint64_t psz_mask;
-
- tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
- if (pn != pg) {
- TCGv_i64 g = tcg_temp_new_i64();
- tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
- tcg_gen_and_i64(val, val, g);
- tcg_temp_free_i64(g);
- }
-
- /* Reduce the pred_esz_masks value simply to reduce the
- * size of the code generated here.
- */
- psz_mask = MAKE_64BIT_MASK(0, psz * 8);
- tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
-
- tcg_gen_ctpop_i64(val, val);
- } else {
- TCGv_ptr t_pn = tcg_temp_new_ptr();
- TCGv_ptr t_pg = tcg_temp_new_ptr();
- unsigned desc;
- TCGv_i32 t_desc;
-
- desc = psz - 2;
- desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
-
- tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
- t_desc = tcg_const_i32(desc);
-
- gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
- tcg_temp_free_ptr(t_pn);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(t_desc);
- }
-}
-
-static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
- }
- return true;
-}
-
-static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
- uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 reg = cpu_reg(s, a->rd);
- TCGv_i64 val = tcg_temp_new_i64();
-
- do_cntp(s, val, a->esz, a->pg, a->pg);
- if (a->d) {
- tcg_gen_sub_i64(reg, reg, val);
- } else {
- tcg_gen_add_i64(reg, reg, val);
- }
- tcg_temp_free_i64(val);
- }
- return true;
-}
-
-static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
- uint32_t insn)
-{
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_i64 val = tcg_temp_new_i64();
- GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
-
- do_cntp(s, val, a->esz, a->pg, a->pg);
- gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn), val, vsz, vsz);
- }
- return true;
-}
-
-static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
- uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 reg = cpu_reg(s, a->rd);
- TCGv_i64 val = tcg_temp_new_i64();
-
- do_cntp(s, val, a->esz, a->pg, a->pg);
- do_sat_addsub_32(reg, val, a->u, a->d);
- }
- return true;
-}
-
-static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
- uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 reg = cpu_reg(s, a->rd);
- TCGv_i64 val = tcg_temp_new_i64();
-
- do_cntp(s, val, a->esz, a->pg, a->pg);
- do_sat_addsub_64(reg, val, a->u, a->d);
- }
- return true;
-}
-
-static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
- uint32_t insn)
-{
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- TCGv_i64 val = tcg_temp_new_i64();
- do_cntp(s, val, a->esz, a->pg, a->pg);
- do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
- }
- return true;
-}
-
-/*
- *** SVE Integer Compare Scalars Group
- */
-
-static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
- TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
- TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
- TCGv_i64 cmp = tcg_temp_new_i64();
-
- tcg_gen_setcond_i64(cond, cmp, rn, rm);
- tcg_gen_extrl_i64_i32(cpu_NF, cmp);
- tcg_temp_free_i64(cmp);
-
- /* VF = !NF & !CF. */
- tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
- tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
-
- /* Both NF and VF actually look at bit 31. */
- tcg_gen_neg_i32(cpu_NF, cpu_NF);
- tcg_gen_neg_i32(cpu_VF, cpu_VF);
- return true;
-}
-
-static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
-{
- TCGv_i64 op0, op1, t0, t1, tmax;
- TCGv_i32 t2, t3;
- TCGv_ptr ptr;
- unsigned desc, vsz = vec_full_reg_size(s);
- TCGCond cond;
-
- if (!sve_access_check(s)) {
- return true;
- }
-
- op0 = read_cpu_reg(s, a->rn, 1);
- op1 = read_cpu_reg(s, a->rm, 1);
-
- if (!a->sf) {
- if (a->u) {
- tcg_gen_ext32u_i64(op0, op0);
- tcg_gen_ext32u_i64(op1, op1);
- } else {
- tcg_gen_ext32s_i64(op0, op0);
- tcg_gen_ext32s_i64(op1, op1);
- }
- }
-
- /* For the helper, compress the different conditions into a computation
- * of how many iterations for which the condition is true.
- */
- t0 = tcg_temp_new_i64();
- t1 = tcg_temp_new_i64();
- tcg_gen_sub_i64(t0, op1, op0);
-
- tmax = tcg_const_i64(vsz >> a->esz);
- if (a->eq) {
- /* Equality means one more iteration. */
- tcg_gen_addi_i64(t0, t0, 1);
-
- /* If op1 is max (un)signed integer (and the only time the addition
- * above could overflow), then we produce an all-true predicate by
- * setting the count to the vector length. This is because the
- * pseudocode is described as an increment + compare loop, and the
- * max integer would always compare true.
- */
- tcg_gen_movi_i64(t1, (a->sf
- ? (a->u ? UINT64_MAX : INT64_MAX)
- : (a->u ? UINT32_MAX : INT32_MAX)));
- tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
- }
-
- /* Bound to the maximum. */
- tcg_gen_umin_i64(t0, t0, tmax);
- tcg_temp_free_i64(tmax);
-
- /* Set the count to zero if the condition is false. */
- cond = (a->u
- ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
- : (a->eq ? TCG_COND_LE : TCG_COND_LT));
- tcg_gen_movi_i64(t1, 0);
- tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
- tcg_temp_free_i64(t1);
-
- /* Since we're bounded, pass as a 32-bit type. */
- t2 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(t2, t0);
- tcg_temp_free_i64(t0);
-
- /* Scale elements to bits. */
- tcg_gen_shli_i32(t2, t2, a->esz);
-
- desc = (vsz / 8) - 2;
- desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
- t3 = tcg_const_i32(desc);
-
- ptr = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
-
- gen_helper_sve_while(t2, ptr, t2, t3);
- do_pred_flags(t2);
-
- tcg_temp_free_ptr(ptr);
- tcg_temp_free_i32(t2);
- tcg_temp_free_i32(t3);
- return true;
-}
-
-/*
- *** SVE Integer Wide Immediate - Unpredicated Group
- */
-
-static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
-{
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- int dofs = vec_full_reg_offset(s, a->rd);
- uint64_t imm;
-
- /* Decode the VFP immediate. */
- imm = vfp_expand_imm(a->esz, a->imm);
- imm = dup_const(a->esz, imm);
-
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
- }
- return true;
-}
-
-static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
-{
- if (a->esz == 0 && extract32(insn, 13, 1)) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- int dofs = vec_full_reg_offset(s, a->rd);
-
- tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
- }
- return true;
-}
-
-static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- if (a->esz == 0 && extract32(insn, 13, 1)) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
- }
- return true;
-}
-
-static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- a->imm = -a->imm;
- return trans_ADD_zzi(s, a, insn);
-}
-
-static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- static const GVecGen2s op[4] = {
- { .fni8 = tcg_gen_vec_sub8_i64,
- .fniv = tcg_gen_sub_vec,
- .fno = gen_helper_sve_subri_b,
- .opc = INDEX_op_sub_vec,
- .vece = MO_8,
- .scalar_first = true },
- { .fni8 = tcg_gen_vec_sub16_i64,
- .fniv = tcg_gen_sub_vec,
- .fno = gen_helper_sve_subri_h,
- .opc = INDEX_op_sub_vec,
- .vece = MO_16,
- .scalar_first = true },
- { .fni4 = tcg_gen_sub_i32,
- .fniv = tcg_gen_sub_vec,
- .fno = gen_helper_sve_subri_s,
- .opc = INDEX_op_sub_vec,
- .vece = MO_32,
- .scalar_first = true },
- { .fni8 = tcg_gen_sub_i64,
- .fniv = tcg_gen_sub_vec,
- .fno = gen_helper_sve_subri_d,
- .opc = INDEX_op_sub_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64,
- .scalar_first = true }
- };
-
- if (a->esz == 0 && extract32(insn, 13, 1)) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_i64 c = tcg_const_i64(a->imm);
- tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vsz, vsz, c, &op[a->esz]);
- tcg_temp_free_i64(c);
- }
- return true;
-}
-
-static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
- }
- return true;
-}
-
-static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
- bool u, bool d)
-{
- if (a->esz == 0 && extract32(insn, 13, 1)) {
- return false;
- }
- if (sve_access_check(s)) {
- TCGv_i64 val = tcg_const_i64(a->imm);
- do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
- tcg_temp_free_i64(val);
- }
- return true;
-}
-
-static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- return do_zzi_sat(s, a, insn, false, false);
-}
-
-static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- return do_zzi_sat(s, a, insn, true, false);
-}
-
-static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- return do_zzi_sat(s, a, insn, false, true);
-}
-
-static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
-{
- return do_zzi_sat(s, a, insn, true, true);
-}
-
-static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_i64 c = tcg_const_i64(a->imm);
-
- tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- c, vsz, vsz, 0, fn);
- tcg_temp_free_i64(c);
- }
- return true;
-}
-
-#define DO_ZZI(NAME, name) \
-static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_gvec_2i * const fns[4] = { \
- gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
- gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
- }; \
- return do_zzi_ool(s, a, fns[a->esz]); \
-}
-
-DO_ZZI(SMAX, smax)
-DO_ZZI(UMAX, umax)
-DO_ZZI(SMIN, smin)
-DO_ZZI(UMIN, umin)
-
-#undef DO_ZZI
-
-static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[2][2] = {
- { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
- { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
- };
-
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, 0, fns[a->u][a->sz]);
- }
- return true;
-}
-
-static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a, uint32_t insn)
-{
- static gen_helper_gvec_3 * const fns[2][2] = {
- { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
- { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
- };
-
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vsz, vsz, a->index, fns[a->u][a->sz]);
- }
- return true;
-}
-
-
-/*
- *** SVE Floating Point Multiply-Add Indexed Group
- */
-
-static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
-{
- static gen_helper_gvec_4_ptr * const fns[3] = {
- gen_helper_gvec_fmla_idx_h,
- gen_helper_gvec_fmla_idx_s,
- gen_helper_gvec_fmla_idx_d,
- };
-
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
- tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- vec_full_reg_offset(s, a->ra),
- status, vsz, vsz, (a->index << 1) | a->sub,
- fns[a->esz - 1]);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-/*
- *** SVE Floating Point Multiply Indexed Group
- */
-
-static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
-{
- static gen_helper_gvec_3_ptr * const fns[3] = {
- gen_helper_gvec_fmul_idx_h,
- gen_helper_gvec_fmul_idx_s,
- gen_helper_gvec_fmul_idx_d,
- };
-
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- status, vsz, vsz, a->index, fns[a->esz - 1]);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-/*
- *** SVE Floating Point Fast Reduction Group
- */
-
-typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
- TCGv_ptr, TCGv_i32);
-
-static void do_reduce(DisasContext *s, arg_rpr_esz *a,
- gen_helper_fp_reduce *fn)
-{
- unsigned vsz = vec_full_reg_size(s);
- unsigned p2vsz = pow2ceil(vsz);
- TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
- TCGv_ptr t_zn, t_pg, status;
- TCGv_i64 temp;
-
- temp = tcg_temp_new_i64();
- t_zn = tcg_temp_new_ptr();
- t_pg = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
- status = get_fpstatus_ptr(a->esz == MO_16);
-
- fn(temp, t_zn, t_pg, status, t_desc);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_ptr(status);
- tcg_temp_free_i32(t_desc);
-
- write_fp_dreg(s, a->rd, temp);
- tcg_temp_free_i64(temp);
-}
-
-#define DO_VPZ(NAME, name) \
-static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
-{ \
- static gen_helper_fp_reduce * const fns[3] = { \
- gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, \
- gen_helper_sve_##name##_d, \
- }; \
- if (a->esz == 0) { \
- return false; \
- } \
- if (sve_access_check(s)) { \
- do_reduce(s, a, fns[a->esz - 1]); \
- } \
- return true; \
-}
-
-DO_VPZ(FADDV, faddv)
-DO_VPZ(FMINNMV, fminnmv)
-DO_VPZ(FMAXNMV, fmaxnmv)
-DO_VPZ(FMINV, fminv)
-DO_VPZ(FMAXV, fmaxv)
-
-/*
- *** SVE Floating Point Unary Operations - Unpredicated Group
- */
-
-static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
-{
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
-
- tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- status, vsz, vsz, 0, fn);
- tcg_temp_free_ptr(status);
-}
-
-static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_2_ptr * const fns[3] = {
- gen_helper_gvec_frecpe_h,
- gen_helper_gvec_frecpe_s,
- gen_helper_gvec_frecpe_d,
- };
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- do_zz_fp(s, a, fns[a->esz - 1]);
- }
- return true;
-}
-
-static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_2_ptr * const fns[3] = {
- gen_helper_gvec_frsqrte_h,
- gen_helper_gvec_frsqrte_s,
- gen_helper_gvec_frsqrte_d,
- };
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- do_zz_fp(s, a, fns[a->esz - 1]);
- }
- return true;
-}
-
-/*
- *** SVE Floating Point Compare with Zero Group
- */
-
-static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
- gen_helper_gvec_3_ptr *fn)
-{
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
-
- tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- status, vsz, vsz, 0, fn);
- tcg_temp_free_ptr(status);
-}
-
-#define DO_PPZ(NAME, name) \
-static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
-{ \
- static gen_helper_gvec_3_ptr * const fns[3] = { \
- gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, \
- gen_helper_sve_##name##_d, \
- }; \
- if (a->esz == 0) { \
- return false; \
- } \
- if (sve_access_check(s)) { \
- do_ppz_fp(s, a, fns[a->esz - 1]); \
- } \
- return true; \
-}
-
-DO_PPZ(FCMGE_ppz0, fcmge0)
-DO_PPZ(FCMGT_ppz0, fcmgt0)
-DO_PPZ(FCMLE_ppz0, fcmle0)
-DO_PPZ(FCMLT_ppz0, fcmlt0)
-DO_PPZ(FCMEQ_ppz0, fcmeq0)
-DO_PPZ(FCMNE_ppz0, fcmne0)
-
-#undef DO_PPZ
-
-/*
- *** SVE floating-point trig multiply-add coefficient
- */
-
-static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
-{
- static gen_helper_gvec_3_ptr * const fns[3] = {
- gen_helper_sve_ftmad_h,
- gen_helper_sve_ftmad_s,
- gen_helper_sve_ftmad_d,
- };
-
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- status, vsz, vsz, a->imm, fns[a->esz - 1]);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-/*
- *** SVE Floating Point Accumulating Reduction Group
- */
-
-static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
-{
- typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
- TCGv_ptr, TCGv_ptr, TCGv_i32);
- static fadda_fn * const fns[3] = {
- gen_helper_sve_fadda_h,
- gen_helper_sve_fadda_s,
- gen_helper_sve_fadda_d,
- };
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr t_rm, t_pg, t_fpst;
- TCGv_i64 t_val;
- TCGv_i32 t_desc;
-
- if (a->esz == 0) {
- return false;
- }
- if (!sve_access_check(s)) {
- return true;
- }
-
- t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
- t_rm = tcg_temp_new_ptr();
- t_pg = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
- t_fpst = get_fpstatus_ptr(a->esz == MO_16);
- t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
-
- fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
-
- tcg_temp_free_i32(t_desc);
- tcg_temp_free_ptr(t_fpst);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_ptr(t_rm);
-
- write_fp_dreg(s, a->rd, t_val);
- tcg_temp_free_i64(t_val);
- return true;
-}
-
-/*
- *** SVE Floating Point Arithmetic - Unpredicated Group
- */
-
-static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
- gen_helper_gvec_3_ptr *fn)
-{
- if (fn == NULL) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- status, vsz, vsz, 0, fn);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-
-#define DO_FP3(NAME, name) \
-static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
-{ \
- static gen_helper_gvec_3_ptr * const fns[4] = { \
- NULL, gen_helper_gvec_##name##_h, \
- gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
- }; \
- return do_zzz_fp(s, a, fns[a->esz]); \
-}
-
-DO_FP3(FADD_zzz, fadd)
-DO_FP3(FSUB_zzz, fsub)
-DO_FP3(FMUL_zzz, fmul)
-DO_FP3(FTSMUL, ftsmul)
-DO_FP3(FRECPS, recps)
-DO_FP3(FRSQRTS, rsqrts)
-
-#undef DO_FP3
-
-/*
- *** SVE Floating Point Arithmetic - Predicated Group
- */
-
-static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
- gen_helper_gvec_4_ptr *fn)
-{
- if (fn == NULL) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
- tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- status, vsz, vsz, 0, fn);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-#define DO_FP3(NAME, name) \
-static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
-{ \
- static gen_helper_gvec_4_ptr * const fns[4] = { \
- NULL, gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
- }; \
- return do_zpzz_fp(s, a, fns[a->esz]); \
-}
-
-DO_FP3(FADD_zpzz, fadd)
-DO_FP3(FSUB_zpzz, fsub)
-DO_FP3(FMUL_zpzz, fmul)
-DO_FP3(FMIN_zpzz, fmin)
-DO_FP3(FMAX_zpzz, fmax)
-DO_FP3(FMINNM_zpzz, fminnum)
-DO_FP3(FMAXNM_zpzz, fmaxnum)
-DO_FP3(FABD, fabd)
-DO_FP3(FSCALE, fscalbn)
-DO_FP3(FDIV, fdiv)
-DO_FP3(FMULX, fmulx)
-
-#undef DO_FP3
-
-typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
- TCGv_i64, TCGv_ptr, TCGv_i32);
-
-static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
- TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
-{
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr t_zd, t_zn, t_pg, status;
- TCGv_i32 desc;
-
- t_zd = tcg_temp_new_ptr();
- t_zn = tcg_temp_new_ptr();
- t_pg = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
- tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
-
- status = get_fpstatus_ptr(is_fp16);
- desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
- fn(t_zd, t_zn, t_pg, scalar, status, desc);
-
- tcg_temp_free_i32(desc);
- tcg_temp_free_ptr(status);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_ptr(t_zn);
- tcg_temp_free_ptr(t_zd);
-}
-
-static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
- gen_helper_sve_fp2scalar *fn)
-{
- TCGv_i64 temp = tcg_const_i64(imm);
- do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
- tcg_temp_free_i64(temp);
-}
-
-#define DO_FP_IMM(NAME, name, const0, const1) \
-static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_sve_fp2scalar * const fns[3] = { \
- gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, \
- gen_helper_sve_##name##_d \
- }; \
- static uint64_t const val[3][2] = { \
- { float16_##const0, float16_##const1 }, \
- { float32_##const0, float32_##const1 }, \
- { float64_##const0, float64_##const1 }, \
- }; \
- if (a->esz == 0) { \
- return false; \
- } \
- if (sve_access_check(s)) { \
- do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
- } \
- return true; \
-}
-
-#define float16_two make_float16(0x4000)
-#define float32_two make_float32(0x40000000)
-#define float64_two make_float64(0x4000000000000000ULL)
-
-DO_FP_IMM(FADD, fadds, half, one)
-DO_FP_IMM(FSUB, fsubs, half, one)
-DO_FP_IMM(FMUL, fmuls, half, two)
-DO_FP_IMM(FSUBR, fsubrs, half, one)
-DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
-DO_FP_IMM(FMINNM, fminnms, zero, one)
-DO_FP_IMM(FMAX, fmaxs, zero, one)
-DO_FP_IMM(FMIN, fmins, zero, one)
-
-#undef DO_FP_IMM
-
-static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
- gen_helper_gvec_4_ptr *fn)
-{
- if (fn == NULL) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
- tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- status, vsz, vsz, 0, fn);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-#define DO_FPCMP(NAME, name) \
-static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
- uint32_t insn) \
-{ \
- static gen_helper_gvec_4_ptr * const fns[4] = { \
- NULL, gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
- }; \
- return do_fp_cmp(s, a, fns[a->esz]); \
-}
-
-DO_FPCMP(FCMGE, fcmge)
-DO_FPCMP(FCMGT, fcmgt)
-DO_FPCMP(FCMEQ, fcmeq)
-DO_FPCMP(FCMNE, fcmne)
-DO_FPCMP(FCMUO, fcmuo)
-DO_FPCMP(FACGE, facge)
-DO_FPCMP(FACGT, facgt)
-
-#undef DO_FPCMP
-
-static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
-{
- static gen_helper_gvec_4_ptr * const fns[3] = {
- gen_helper_sve_fcadd_h,
- gen_helper_sve_fcadd_s,
- gen_helper_sve_fcadd_d
- };
-
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
- tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- pred_full_reg_offset(s, a->pg),
- status, vsz, vsz, a->rot, fns[a->esz - 1]);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
-
-static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
-{
- if (fn == NULL) {
- return false;
- }
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned vsz = vec_full_reg_size(s);
- unsigned desc;
- TCGv_i32 t_desc;
- TCGv_ptr pg = tcg_temp_new_ptr();
-
- /* We would need 7 operands to pass these arguments "properly".
- * So we encode all the register numbers into the descriptor.
- */
- desc = deposit32(a->rd, 5, 5, a->rn);
- desc = deposit32(desc, 10, 5, a->rm);
- desc = deposit32(desc, 15, 5, a->ra);
- desc = simd_desc(vsz, vsz, desc);
-
- t_desc = tcg_const_i32(desc);
- tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
- fn(cpu_env, pg, t_desc);
- tcg_temp_free_i32(t_desc);
- tcg_temp_free_ptr(pg);
- return true;
-}
-
-#define DO_FMLA(NAME, name) \
-static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
-{ \
- static gen_helper_sve_fmla * const fns[4] = { \
- NULL, gen_helper_sve_##name##_h, \
- gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
- }; \
- return do_fmla(s, a, fns[a->esz]); \
-}
-
-DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
-DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
-DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
-DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
-
-#undef DO_FMLA
-
-static bool trans_FCMLA_zpzzz(DisasContext *s,
- arg_FCMLA_zpzzz *a, uint32_t insn)
-{
- static gen_helper_sve_fmla * const fns[3] = {
- gen_helper_sve_fcmla_zpzzz_h,
- gen_helper_sve_fcmla_zpzzz_s,
- gen_helper_sve_fcmla_zpzzz_d,
- };
-
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- unsigned desc;
- TCGv_i32 t_desc;
- TCGv_ptr pg = tcg_temp_new_ptr();
-
- /* We would need 7 operands to pass these arguments "properly".
- * So we encode all the register numbers into the descriptor.
- */
- desc = deposit32(a->rd, 5, 5, a->rn);
- desc = deposit32(desc, 10, 5, a->rm);
- desc = deposit32(desc, 15, 5, a->ra);
- desc = deposit32(desc, 20, 2, a->rot);
- desc = sextract32(desc, 0, 22);
- desc = simd_desc(vsz, vsz, desc);
-
- t_desc = tcg_const_i32(desc);
- tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
- fns[a->esz - 1](cpu_env, pg, t_desc);
- tcg_temp_free_i32(t_desc);
- tcg_temp_free_ptr(pg);
- }
- return true;
-}
-
-static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a, uint32_t insn)
-{
- static gen_helper_gvec_3_ptr * const fns[2] = {
- gen_helper_gvec_fcmlah_idx,
- gen_helper_gvec_fcmlas_idx,
- };
-
- tcg_debug_assert(a->esz == 1 || a->esz == 2);
- tcg_debug_assert(a->rd == a->ra);
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- vec_full_reg_offset(s, a->rm),
- status, vsz, vsz,
- a->index * 4 + a->rot,
- fns[a->esz - 1]);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-/*
- *** SVE Floating Point Unary Operations Predicated Group
- */
-
-static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
- bool is_fp16, gen_helper_gvec_3_ptr *fn)
-{
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = get_fpstatus_ptr(is_fp16);
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
- vec_full_reg_offset(s, rn),
- pred_full_reg_offset(s, pg),
- status, vsz, vsz, 0, fn);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
-}
-
-static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
-}
-
-static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
-}
-
-static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
-}
-
-static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
-}
-
-static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
-}
-
-static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
-}
-
-static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
-}
-
-static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
-}
-
-static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
-}
-
-static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
-}
-
-static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
-}
-
-static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
-}
-
-static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
-}
-
-static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
-}
-
-static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
-}
-
-static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
-}
-
-static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
-}
-
-static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
-}
-
-static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
-}
-
-static gen_helper_gvec_3_ptr * const frint_fns[3] = {
- gen_helper_sve_frint_h,
- gen_helper_sve_frint_s,
- gen_helper_sve_frint_d
-};
-
-static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- if (a->esz == 0) {
- return false;
- }
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
- frint_fns[a->esz - 1]);
-}
-
-static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3_ptr * const fns[3] = {
- gen_helper_sve_frintx_h,
- gen_helper_sve_frintx_s,
- gen_helper_sve_frintx_d
- };
- if (a->esz == 0) {
- return false;
- }
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
-}
-
-static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
-{
- if (a->esz == 0) {
- return false;
- }
- if (sve_access_check(s)) {
- unsigned vsz = vec_full_reg_size(s);
- TCGv_i32 tmode = tcg_const_i32(mode);
- TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
-
- gen_helper_set_rmode(tmode, tmode, status);
-
- tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
- vec_full_reg_offset(s, a->rn),
- pred_full_reg_offset(s, a->pg),
- status, vsz, vsz, 0, frint_fns[a->esz - 1]);
-
- gen_helper_set_rmode(tmode, tmode, status);
- tcg_temp_free_i32(tmode);
- tcg_temp_free_ptr(status);
- }
- return true;
-}
-
-static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_frint_mode(s, a, float_round_nearest_even);
-}
-
-static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_frint_mode(s, a, float_round_up);
-}
-
-static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_frint_mode(s, a, float_round_down);
-}
-
-static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_frint_mode(s, a, float_round_to_zero);
-}
-
-static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_frint_mode(s, a, float_round_ties_away);
-}
-
-static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3_ptr * const fns[3] = {
- gen_helper_sve_frecpx_h,
- gen_helper_sve_frecpx_s,
- gen_helper_sve_frecpx_d
- };
- if (a->esz == 0) {
- return false;
- }
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
-}
-
-static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- static gen_helper_gvec_3_ptr * const fns[3] = {
- gen_helper_sve_fsqrt_h,
- gen_helper_sve_fsqrt_s,
- gen_helper_sve_fsqrt_d
- };
- if (a->esz == 0) {
- return false;
- }
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
-}
-
-static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
-}
-
-static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
-}
-
-static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
-}
-
-static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
-}
-
-static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
-}
-
-static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
-}
-
-static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
-}
-
-static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
-}
-
-static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
-}
-
-static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
-}
-
-static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
-}
-
-static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
-}
-
-static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
-}
-
-static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
-}
-
-/*
- *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
- */
-
-/* Subroutine loading a vector register at VOFS of LEN bytes.
- * The load should begin at the address Rn + IMM.
- */
-
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
-{
- int len_align = QEMU_ALIGN_DOWN(len, 8);
- int len_remain = len % 8;
- int nparts = len / 8 + ctpop8(len_remain);
- int midx = get_mem_index(s);
- TCGv_i64 addr, t0, t1;
-
- addr = tcg_temp_new_i64();
- t0 = tcg_temp_new_i64();
-
- /* Note that unpredicated load/store of vector/predicate registers
- * are defined as a stream of bytes, which equates to little-endian
- * operations on larger quantities. There is no nice way to force
- * a little-endian load for aarch64_be-linux-user out of line.
- *
- * Attempt to keep code expansion to a minimum by limiting the
- * amount of unrolling done.
- */
- if (nparts <= 4) {
- int i;
-
- for (i = 0; i < len_align; i += 8) {
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
- tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
- }
- } else {
- TCGLabel *loop = gen_new_label();
- TCGv_ptr tp, i = tcg_const_local_ptr(0);
-
- gen_set_label(loop);
-
- /* Minimize the number of local temps that must be re-read from
- * the stack each iteration. Instead, re-compute values other
- * than the loop counter.
- */
- tp = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(tp, i, imm);
- tcg_gen_extu_ptr_i64(addr, tp);
- tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
-
- tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
-
- tcg_gen_add_ptr(tp, cpu_env, i);
- tcg_gen_addi_ptr(i, i, 8);
- tcg_gen_st_i64(t0, tp, vofs);
- tcg_temp_free_ptr(tp);
-
- tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
- tcg_temp_free_ptr(i);
- }
-
- /* Predicate register loads can be any multiple of 2.
- * Note that we still store the entire 64-bit unit into cpu_env.
- */
- if (len_remain) {
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
-
- switch (len_remain) {
- case 2:
- case 4:
- case 8:
- tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
- break;
-
- case 6:
- t1 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
- tcg_gen_addi_i64(addr, addr, 4);
- tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
- tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
- tcg_temp_free_i64(t1);
- break;
-
- default:
- g_assert_not_reached();
- }
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
- }
- tcg_temp_free_i64(addr);
- tcg_temp_free_i64(t0);
-}
-
-/* Similarly for stores. */
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
-{
- int len_align = QEMU_ALIGN_DOWN(len, 8);
- int len_remain = len % 8;
- int nparts = len / 8 + ctpop8(len_remain);
- int midx = get_mem_index(s);
- TCGv_i64 addr, t0;
-
- addr = tcg_temp_new_i64();
- t0 = tcg_temp_new_i64();
-
- /* Note that unpredicated load/store of vector/predicate registers
- * are defined as a stream of bytes, which equates to little-endian
- * operations on larger quantities. There is no nice way to force
- * a little-endian store for aarch64_be-linux-user out of line.
- *
- * Attempt to keep code expansion to a minimum by limiting the
- * amount of unrolling done.
- */
- if (nparts <= 4) {
- int i;
-
- for (i = 0; i < len_align; i += 8) {
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
- }
- } else {
- TCGLabel *loop = gen_new_label();
- TCGv_ptr t2, i = tcg_const_local_ptr(0);
-
- gen_set_label(loop);
-
- t2 = tcg_temp_new_ptr();
- tcg_gen_add_ptr(t2, cpu_env, i);
- tcg_gen_ld_i64(t0, t2, vofs);
-
- /* Minimize the number of local temps that must be re-read from
- * the stack each iteration. Instead, re-compute values other
- * than the loop counter.
- */
- tcg_gen_addi_ptr(t2, i, imm);
- tcg_gen_extu_ptr_i64(addr, t2);
- tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
- tcg_temp_free_ptr(t2);
-
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
-
- tcg_gen_addi_ptr(i, i, 8);
-
- tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
- tcg_temp_free_ptr(i);
- }
-
- /* Predicate register stores can be any multiple of 2. */
- if (len_remain) {
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
-
- switch (len_remain) {
- case 2:
- case 4:
- case 8:
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
- break;
-
- case 6:
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
- tcg_gen_addi_i64(addr, addr, 4);
- tcg_gen_shri_i64(t0, t0, 32);
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
- break;
-
- default:
- g_assert_not_reached();
- }
- }
- tcg_temp_free_i64(addr);
- tcg_temp_free_i64(t0);
-}
-
-static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- int size = vec_full_reg_size(s);
- int off = vec_full_reg_offset(s, a->rd);
- do_ldr(s, off, size, a->rn, a->imm * size);
- }
- return true;
-}
-
-static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- int size = pred_full_reg_size(s);
- int off = pred_full_reg_offset(s, a->rd);
- do_ldr(s, off, size, a->rn, a->imm * size);
- }
- return true;
-}
-
-static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- int size = vec_full_reg_size(s);
- int off = vec_full_reg_offset(s, a->rd);
- do_str(s, off, size, a->rn, a->imm * size);
- }
- return true;
-}
-
-static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- int size = pred_full_reg_size(s);
- int off = pred_full_reg_offset(s, a->rd);
- do_str(s, off, size, a->rn, a->imm * size);
- }
- return true;
-}
-
-/*
- *** SVE Memory - Contiguous Load Group
- */
-
-/* The memory mode of the dtype. */
-static const TCGMemOp dtype_mop[16] = {
- MO_UB, MO_UB, MO_UB, MO_UB,
- MO_SL, MO_UW, MO_UW, MO_UW,
- MO_SW, MO_SW, MO_UL, MO_UL,
- MO_SB, MO_SB, MO_SB, MO_Q
-};
-
-#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
-
-/* The vector element size of dtype. */
-static const uint8_t dtype_esz[16] = {
- 0, 1, 2, 3,
- 3, 1, 2, 3,
- 3, 2, 2, 3,
- 3, 2, 1, 3
-};
-
-static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
-{
- return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
-}
-
-static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
- int dtype, gen_helper_gvec_mem *fn)
-{
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr t_pg;
- TCGv_i32 t_desc;
- int desc;
-
- /* For e.g. LD4, there are not enough arguments to pass all 4
- * registers as pointers, so encode the regno into the data field.
- * For consistency, do this even for LD1.
- */
- desc = sve_memopidx(s, dtype);
- desc |= zt << MEMOPIDX_SHIFT;
- desc = simd_desc(vsz, vsz, desc);
- t_desc = tcg_const_i32(desc);
- t_pg = tcg_temp_new_ptr();
-
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
- fn(cpu_env, t_pg, addr, t_desc);
-
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(t_desc);
-}
-
-static void do_ld_zpa(DisasContext *s, int zt, int pg,
- TCGv_i64 addr, int dtype, int nreg)
-{
- static gen_helper_gvec_mem * const fns[2][16][4] = {
- /* Little-endian */
- { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
- gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
- { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
- gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
- { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
- gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
- { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
- gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
-
- /* Big-endian */
- { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
- gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
- { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
- gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
- { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
- gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
- { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
- gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
- };
- gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
-
- /* While there are holes in the table, they are not
- * accessible via the instruction encoding.
- */
- assert(fn != NULL);
- do_mem_zpa(s, zt, pg, addr, dtype, fn);
-}
-
-static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
-{
- if (a->rm == 31) {
- return false;
- }
- if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
- tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
- tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
- do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
- }
- return true;
-}
-
-static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- int vsz = vec_full_reg_size(s);
- int elements = vsz >> dtype_esz[a->dtype];
- TCGv_i64 addr = new_tmp_a64(s);
-
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
- (a->imm * elements * (a->nreg + 1))
- << dtype_msz(a->dtype));
- do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
- }
- return true;
-}
-
-static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
-{
- static gen_helper_gvec_mem * const fns[2][16] = {
- /* Little-endian */
- { gen_helper_sve_ldff1bb_r,
- gen_helper_sve_ldff1bhu_r,
- gen_helper_sve_ldff1bsu_r,
- gen_helper_sve_ldff1bdu_r,
-
- gen_helper_sve_ldff1sds_le_r,
- gen_helper_sve_ldff1hh_le_r,
- gen_helper_sve_ldff1hsu_le_r,
- gen_helper_sve_ldff1hdu_le_r,
-
- gen_helper_sve_ldff1hds_le_r,
- gen_helper_sve_ldff1hss_le_r,
- gen_helper_sve_ldff1ss_le_r,
- gen_helper_sve_ldff1sdu_le_r,
-
- gen_helper_sve_ldff1bds_r,
- gen_helper_sve_ldff1bss_r,
- gen_helper_sve_ldff1bhs_r,
- gen_helper_sve_ldff1dd_le_r },
-
- /* Big-endian */
- { gen_helper_sve_ldff1bb_r,
- gen_helper_sve_ldff1bhu_r,
- gen_helper_sve_ldff1bsu_r,
- gen_helper_sve_ldff1bdu_r,
-
- gen_helper_sve_ldff1sds_be_r,
- gen_helper_sve_ldff1hh_be_r,
- gen_helper_sve_ldff1hsu_be_r,
- gen_helper_sve_ldff1hdu_be_r,
-
- gen_helper_sve_ldff1hds_be_r,
- gen_helper_sve_ldff1hss_be_r,
- gen_helper_sve_ldff1ss_be_r,
- gen_helper_sve_ldff1sdu_be_r,
-
- gen_helper_sve_ldff1bds_r,
- gen_helper_sve_ldff1bss_r,
- gen_helper_sve_ldff1bhs_r,
- gen_helper_sve_ldff1dd_be_r },
- };
-
- if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
- tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
- tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
- do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
- fns[s->be_data == MO_BE][a->dtype]);
- }
- return true;
-}
-
-static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
-{
- static gen_helper_gvec_mem * const fns[2][16] = {
- /* Little-endian */
- { gen_helper_sve_ldnf1bb_r,
- gen_helper_sve_ldnf1bhu_r,
- gen_helper_sve_ldnf1bsu_r,
- gen_helper_sve_ldnf1bdu_r,
-
- gen_helper_sve_ldnf1sds_le_r,
- gen_helper_sve_ldnf1hh_le_r,
- gen_helper_sve_ldnf1hsu_le_r,
- gen_helper_sve_ldnf1hdu_le_r,
-
- gen_helper_sve_ldnf1hds_le_r,
- gen_helper_sve_ldnf1hss_le_r,
- gen_helper_sve_ldnf1ss_le_r,
- gen_helper_sve_ldnf1sdu_le_r,
-
- gen_helper_sve_ldnf1bds_r,
- gen_helper_sve_ldnf1bss_r,
- gen_helper_sve_ldnf1bhs_r,
- gen_helper_sve_ldnf1dd_le_r },
-
- /* Big-endian */
- { gen_helper_sve_ldnf1bb_r,
- gen_helper_sve_ldnf1bhu_r,
- gen_helper_sve_ldnf1bsu_r,
- gen_helper_sve_ldnf1bdu_r,
-
- gen_helper_sve_ldnf1sds_be_r,
- gen_helper_sve_ldnf1hh_be_r,
- gen_helper_sve_ldnf1hsu_be_r,
- gen_helper_sve_ldnf1hdu_be_r,
-
- gen_helper_sve_ldnf1hds_be_r,
- gen_helper_sve_ldnf1hss_be_r,
- gen_helper_sve_ldnf1ss_be_r,
- gen_helper_sve_ldnf1sdu_be_r,
-
- gen_helper_sve_ldnf1bds_r,
- gen_helper_sve_ldnf1bss_r,
- gen_helper_sve_ldnf1bhs_r,
- gen_helper_sve_ldnf1dd_be_r },
- };
-
- if (sve_access_check(s)) {
- int vsz = vec_full_reg_size(s);
- int elements = vsz >> dtype_esz[a->dtype];
- int off = (a->imm * elements) << dtype_msz(a->dtype);
- TCGv_i64 addr = new_tmp_a64(s);
-
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
- do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
- fns[s->be_data == MO_BE][a->dtype]);
- }
- return true;
-}
-
-static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
-{
- static gen_helper_gvec_mem * const fns[2][4] = {
- { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
- gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
- { gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
- gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
- };
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr t_pg;
- TCGv_i32 t_desc;
- int desc, poff;
-
- /* Load the first quadword using the normal predicated load helpers. */
- desc = sve_memopidx(s, msz_dtype(msz));
- desc |= zt << MEMOPIDX_SHIFT;
- desc = simd_desc(16, 16, desc);
- t_desc = tcg_const_i32(desc);
-
- poff = pred_full_reg_offset(s, pg);
- if (vsz > 16) {
- /*
- * Zero-extend the first 16 bits of the predicate into a temporary.
- * This avoids triggering an assert making sure we don't have bits
- * set within a predicate beyond VQ, but we have lowered VQ to 1
- * for this load operation.
- */
- TCGv_i64 tmp = tcg_temp_new_i64();
-#ifdef HOST_WORDS_BIGENDIAN
- poff += 6;
-#endif
- tcg_gen_ld16u_i64(tmp, cpu_env, poff);
-
- poff = offsetof(CPUARMState, vfp.preg_tmp);
- tcg_gen_st_i64(tmp, cpu_env, poff);
- tcg_temp_free_i64(tmp);
- }
-
- t_pg = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(t_pg, cpu_env, poff);
-
- fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
-
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(t_desc);
-
- /* Replicate that first quadword. */
- if (vsz > 16) {
- unsigned dofs = vec_full_reg_offset(s, zt);
- tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
- }
-}
-
-static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
-{
- if (a->rm == 31) {
- return false;
- }
- if (sve_access_check(s)) {
- int msz = dtype_msz(a->dtype);
- TCGv_i64 addr = new_tmp_a64(s);
- tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
- tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
- do_ldrq(s, a->rd, a->pg, addr, msz);
- }
- return true;
-}
-
-static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
- do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
- }
- return true;
-}
-
-/* Load and broadcast element. */
-static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
-{
- if (!sve_access_check(s)) {
- return true;
- }
-
- unsigned vsz = vec_full_reg_size(s);
- unsigned psz = pred_full_reg_size(s);
- unsigned esz = dtype_esz[a->dtype];
- unsigned msz = dtype_msz(a->dtype);
- TCGLabel *over = gen_new_label();
- TCGv_i64 temp;
-
- /* If the guarding predicate has no bits set, no load occurs. */
- if (psz <= 8) {
- /* Reduce the pred_esz_masks value simply to reduce the
- * size of the code generated here.
- */
- uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
- temp = tcg_temp_new_i64();
- tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
- tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
- tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
- tcg_temp_free_i64(temp);
- } else {
- TCGv_i32 t32 = tcg_temp_new_i32();
- find_last_active(s, t32, esz, a->pg);
- tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
- tcg_temp_free_i32(t32);
- }
-
- /* Load the data. */
- temp = tcg_temp_new_i64();
- tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
- tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
- s->be_data | dtype_mop[a->dtype]);
-
- /* Broadcast to *all* elements. */
- tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
- vsz, vsz, temp);
- tcg_temp_free_i64(temp);
-
- /* Zero the inactive elements. */
- gen_set_label(over);
- do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
- return true;
-}
-
-static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
- int msz, int esz, int nreg)
-{
- static gen_helper_gvec_mem * const fn_single[2][4][4] = {
- { { gen_helper_sve_st1bb_r,
- gen_helper_sve_st1bh_r,
- gen_helper_sve_st1bs_r,
- gen_helper_sve_st1bd_r },
- { NULL,
- gen_helper_sve_st1hh_le_r,
- gen_helper_sve_st1hs_le_r,
- gen_helper_sve_st1hd_le_r },
- { NULL, NULL,
- gen_helper_sve_st1ss_le_r,
- gen_helper_sve_st1sd_le_r },
- { NULL, NULL, NULL,
- gen_helper_sve_st1dd_le_r } },
- { { gen_helper_sve_st1bb_r,
- gen_helper_sve_st1bh_r,
- gen_helper_sve_st1bs_r,
- gen_helper_sve_st1bd_r },
- { NULL,
- gen_helper_sve_st1hh_be_r,
- gen_helper_sve_st1hs_be_r,
- gen_helper_sve_st1hd_be_r },
- { NULL, NULL,
- gen_helper_sve_st1ss_be_r,
- gen_helper_sve_st1sd_be_r },
- { NULL, NULL, NULL,
- gen_helper_sve_st1dd_be_r } },
- };
- static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
- { { gen_helper_sve_st2bb_r,
- gen_helper_sve_st2hh_le_r,
- gen_helper_sve_st2ss_le_r,
- gen_helper_sve_st2dd_le_r },
- { gen_helper_sve_st3bb_r,
- gen_helper_sve_st3hh_le_r,
- gen_helper_sve_st3ss_le_r,
- gen_helper_sve_st3dd_le_r },
- { gen_helper_sve_st4bb_r,
- gen_helper_sve_st4hh_le_r,
- gen_helper_sve_st4ss_le_r,
- gen_helper_sve_st4dd_le_r } },
- { { gen_helper_sve_st2bb_r,
- gen_helper_sve_st2hh_be_r,
- gen_helper_sve_st2ss_be_r,
- gen_helper_sve_st2dd_be_r },
- { gen_helper_sve_st3bb_r,
- gen_helper_sve_st3hh_be_r,
- gen_helper_sve_st3ss_be_r,
- gen_helper_sve_st3dd_be_r },
- { gen_helper_sve_st4bb_r,
- gen_helper_sve_st4hh_be_r,
- gen_helper_sve_st4ss_be_r,
- gen_helper_sve_st4dd_be_r } },
- };
- gen_helper_gvec_mem *fn;
- int be = s->be_data == MO_BE;
-
- if (nreg == 0) {
- /* ST1 */
- fn = fn_single[be][msz][esz];
- } else {
- /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
- assert(msz == esz);
- fn = fn_multiple[be][nreg - 1][msz];
- }
- assert(fn != NULL);
- do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
-}
-
-static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
-{
- if (a->rm == 31 || a->msz > a->esz) {
- return false;
- }
- if (sve_access_check(s)) {
- TCGv_i64 addr = new_tmp_a64(s);
- tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
- tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
- do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
- }
- return true;
-}
-
-static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
-{
- if (a->msz > a->esz) {
- return false;
- }
- if (sve_access_check(s)) {
- int vsz = vec_full_reg_size(s);
- int elements = vsz >> a->esz;
- TCGv_i64 addr = new_tmp_a64(s);
-
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
- (a->imm * elements * (a->nreg + 1)) << a->msz);
- do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
- }
- return true;
-}
-
-/*
- *** SVE gather loads / scatter stores
- */
-
-static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
- int scale, TCGv_i64 scalar, int msz,
- gen_helper_gvec_mem_scatter *fn)
-{
- unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr t_zm = tcg_temp_new_ptr();
- TCGv_ptr t_pg = tcg_temp_new_ptr();
- TCGv_ptr t_zt = tcg_temp_new_ptr();
- TCGv_i32 t_desc;
- int desc;
-
- desc = sve_memopidx(s, msz_dtype(msz));
- desc |= scale << MEMOPIDX_SHIFT;
- desc = simd_desc(vsz, vsz, desc);
- t_desc = tcg_const_i32(desc);
-
- tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
- tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
- tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
- fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
-
- tcg_temp_free_ptr(t_zt);
- tcg_temp_free_ptr(t_zm);
- tcg_temp_free_ptr(t_pg);
- tcg_temp_free_i32(t_desc);
-}
-
-/* Indexed by [be][ff][xs][u][msz]. */
-static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
- /* Little-endian */
- { { { { gen_helper_sve_ldbss_zsu,
- gen_helper_sve_ldhss_le_zsu,
- NULL, },
- { gen_helper_sve_ldbsu_zsu,
- gen_helper_sve_ldhsu_le_zsu,
- gen_helper_sve_ldss_le_zsu, } },
- { { gen_helper_sve_ldbss_zss,
- gen_helper_sve_ldhss_le_zss,
- NULL, },
- { gen_helper_sve_ldbsu_zss,
- gen_helper_sve_ldhsu_le_zss,
- gen_helper_sve_ldss_le_zss, } } },
-
- /* First-fault */
- { { { gen_helper_sve_ldffbss_zsu,
- gen_helper_sve_ldffhss_le_zsu,
- NULL, },
- { gen_helper_sve_ldffbsu_zsu,
- gen_helper_sve_ldffhsu_le_zsu,
- gen_helper_sve_ldffss_le_zsu, } },
- { { gen_helper_sve_ldffbss_zss,
- gen_helper_sve_ldffhss_le_zss,
- NULL, },
- { gen_helper_sve_ldffbsu_zss,
- gen_helper_sve_ldffhsu_le_zss,
- gen_helper_sve_ldffss_le_zss, } } } },
-
- /* Big-endian */
- { { { { gen_helper_sve_ldbss_zsu,
- gen_helper_sve_ldhss_be_zsu,
- NULL, },
- { gen_helper_sve_ldbsu_zsu,
- gen_helper_sve_ldhsu_be_zsu,
- gen_helper_sve_ldss_be_zsu, } },
- { { gen_helper_sve_ldbss_zss,
- gen_helper_sve_ldhss_be_zss,
- NULL, },
- { gen_helper_sve_ldbsu_zss,
- gen_helper_sve_ldhsu_be_zss,
- gen_helper_sve_ldss_be_zss, } } },
-
- /* First-fault */
- { { { gen_helper_sve_ldffbss_zsu,
- gen_helper_sve_ldffhss_be_zsu,
- NULL, },
- { gen_helper_sve_ldffbsu_zsu,
- gen_helper_sve_ldffhsu_be_zsu,
- gen_helper_sve_ldffss_be_zsu, } },
- { { gen_helper_sve_ldffbss_zss,
- gen_helper_sve_ldffhss_be_zss,
- NULL, },
- { gen_helper_sve_ldffbsu_zss,
- gen_helper_sve_ldffhsu_be_zss,
- gen_helper_sve_ldffss_be_zss, } } } },
-};
-
-/* Note that we overload xs=2 to indicate 64-bit offset. */
-static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
- /* Little-endian */
- { { { { gen_helper_sve_ldbds_zsu,
- gen_helper_sve_ldhds_le_zsu,
- gen_helper_sve_ldsds_le_zsu,
- NULL, },
- { gen_helper_sve_ldbdu_zsu,
- gen_helper_sve_ldhdu_le_zsu,
- gen_helper_sve_ldsdu_le_zsu,
- gen_helper_sve_lddd_le_zsu, } },
- { { gen_helper_sve_ldbds_zss,
- gen_helper_sve_ldhds_le_zss,
- gen_helper_sve_ldsds_le_zss,
- NULL, },
- { gen_helper_sve_ldbdu_zss,
- gen_helper_sve_ldhdu_le_zss,
- gen_helper_sve_ldsdu_le_zss,
- gen_helper_sve_lddd_le_zss, } },
- { { gen_helper_sve_ldbds_zd,
- gen_helper_sve_ldhds_le_zd,
- gen_helper_sve_ldsds_le_zd,
- NULL, },
- { gen_helper_sve_ldbdu_zd,
- gen_helper_sve_ldhdu_le_zd,
- gen_helper_sve_ldsdu_le_zd,
- gen_helper_sve_lddd_le_zd, } } },
-
- /* First-fault */
- { { { gen_helper_sve_ldffbds_zsu,
- gen_helper_sve_ldffhds_le_zsu,
- gen_helper_sve_ldffsds_le_zsu,
- NULL, },
- { gen_helper_sve_ldffbdu_zsu,
- gen_helper_sve_ldffhdu_le_zsu,
- gen_helper_sve_ldffsdu_le_zsu,
- gen_helper_sve_ldffdd_le_zsu, } },
- { { gen_helper_sve_ldffbds_zss,
- gen_helper_sve_ldffhds_le_zss,
- gen_helper_sve_ldffsds_le_zss,
- NULL, },
- { gen_helper_sve_ldffbdu_zss,
- gen_helper_sve_ldffhdu_le_zss,
- gen_helper_sve_ldffsdu_le_zss,
- gen_helper_sve_ldffdd_le_zss, } },
- { { gen_helper_sve_ldffbds_zd,
- gen_helper_sve_ldffhds_le_zd,
- gen_helper_sve_ldffsds_le_zd,
- NULL, },
- { gen_helper_sve_ldffbdu_zd,
- gen_helper_sve_ldffhdu_le_zd,
- gen_helper_sve_ldffsdu_le_zd,
- gen_helper_sve_ldffdd_le_zd, } } } },
-
- /* Big-endian */
- { { { { gen_helper_sve_ldbds_zsu,
- gen_helper_sve_ldhds_be_zsu,
- gen_helper_sve_ldsds_be_zsu,
- NULL, },
- { gen_helper_sve_ldbdu_zsu,
- gen_helper_sve_ldhdu_be_zsu,
- gen_helper_sve_ldsdu_be_zsu,
- gen_helper_sve_lddd_be_zsu, } },
- { { gen_helper_sve_ldbds_zss,
- gen_helper_sve_ldhds_be_zss,
- gen_helper_sve_ldsds_be_zss,
- NULL, },
- { gen_helper_sve_ldbdu_zss,
- gen_helper_sve_ldhdu_be_zss,
- gen_helper_sve_ldsdu_be_zss,
- gen_helper_sve_lddd_be_zss, } },
- { { gen_helper_sve_ldbds_zd,
- gen_helper_sve_ldhds_be_zd,
- gen_helper_sve_ldsds_be_zd,
- NULL, },
- { gen_helper_sve_ldbdu_zd,
- gen_helper_sve_ldhdu_be_zd,
- gen_helper_sve_ldsdu_be_zd,
- gen_helper_sve_lddd_be_zd, } } },
-
- /* First-fault */
- { { { gen_helper_sve_ldffbds_zsu,
- gen_helper_sve_ldffhds_be_zsu,
- gen_helper_sve_ldffsds_be_zsu,
- NULL, },
- { gen_helper_sve_ldffbdu_zsu,
- gen_helper_sve_ldffhdu_be_zsu,
- gen_helper_sve_ldffsdu_be_zsu,
- gen_helper_sve_ldffdd_be_zsu, } },
- { { gen_helper_sve_ldffbds_zss,
- gen_helper_sve_ldffhds_be_zss,
- gen_helper_sve_ldffsds_be_zss,
- NULL, },
- { gen_helper_sve_ldffbdu_zss,
- gen_helper_sve_ldffhdu_be_zss,
- gen_helper_sve_ldffsdu_be_zss,
- gen_helper_sve_ldffdd_be_zss, } },
- { { gen_helper_sve_ldffbds_zd,
- gen_helper_sve_ldffhds_be_zd,
- gen_helper_sve_ldffsds_be_zd,
- NULL, },
- { gen_helper_sve_ldffbdu_zd,
- gen_helper_sve_ldffhdu_be_zd,
- gen_helper_sve_ldffsdu_be_zd,
- gen_helper_sve_ldffdd_be_zd, } } } },
-};
-
-static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
-{
- gen_helper_gvec_mem_scatter *fn = NULL;
- int be = s->be_data == MO_BE;
-
- if (!sve_access_check(s)) {
- return true;
- }
-
- switch (a->esz) {
- case MO_32:
- fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
- break;
- case MO_64:
- fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
- break;
- }
- assert(fn != NULL);
-
- do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
- cpu_reg_sp(s, a->rn), a->msz, fn);
- return true;
-}
-
-static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
-{
- gen_helper_gvec_mem_scatter *fn = NULL;
- int be = s->be_data == MO_BE;
- TCGv_i64 imm;
-
- if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
- return false;
- }
- if (!sve_access_check(s)) {
- return true;
- }
-
- switch (a->esz) {
- case MO_32:
- fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
- break;
- case MO_64:
- fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
- break;
- }
- assert(fn != NULL);
-
- /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
- * by loading the immediate into the scalar parameter.
- */
- imm = tcg_const_i64(a->imm << a->msz);
- do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
- tcg_temp_free_i64(imm);
- return true;
-}
-
-/* Indexed by [be][xs][msz]. */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
- /* Little-endian */
- { { gen_helper_sve_stbs_zsu,
- gen_helper_sve_sths_le_zsu,
- gen_helper_sve_stss_le_zsu, },
- { gen_helper_sve_stbs_zss,
- gen_helper_sve_sths_le_zss,
- gen_helper_sve_stss_le_zss, } },
- /* Big-endian */
- { { gen_helper_sve_stbs_zsu,
- gen_helper_sve_sths_be_zsu,
- gen_helper_sve_stss_be_zsu, },
- { gen_helper_sve_stbs_zss,
- gen_helper_sve_sths_be_zss,
- gen_helper_sve_stss_be_zss, } },
-};
-
-/* Note that we overload xs=2 to indicate 64-bit offset. */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
- /* Little-endian */
- { { gen_helper_sve_stbd_zsu,
- gen_helper_sve_sthd_le_zsu,
- gen_helper_sve_stsd_le_zsu,
- gen_helper_sve_stdd_le_zsu, },
- { gen_helper_sve_stbd_zss,
- gen_helper_sve_sthd_le_zss,
- gen_helper_sve_stsd_le_zss,
- gen_helper_sve_stdd_le_zss, },
- { gen_helper_sve_stbd_zd,
- gen_helper_sve_sthd_le_zd,
- gen_helper_sve_stsd_le_zd,
- gen_helper_sve_stdd_le_zd, } },
- /* Big-endian */
- { { gen_helper_sve_stbd_zsu,
- gen_helper_sve_sthd_be_zsu,
- gen_helper_sve_stsd_be_zsu,
- gen_helper_sve_stdd_be_zsu, },
- { gen_helper_sve_stbd_zss,
- gen_helper_sve_sthd_be_zss,
- gen_helper_sve_stsd_be_zss,
- gen_helper_sve_stdd_be_zss, },
- { gen_helper_sve_stbd_zd,
- gen_helper_sve_sthd_be_zd,
- gen_helper_sve_stsd_be_zd,
- gen_helper_sve_stdd_be_zd, } },
-};
-
-static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
-{
- gen_helper_gvec_mem_scatter *fn;
- int be = s->be_data == MO_BE;
-
- if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
- return false;
- }
- if (!sve_access_check(s)) {
- return true;
- }
- switch (a->esz) {
- case MO_32:
- fn = scatter_store_fn32[be][a->xs][a->msz];
- break;
- case MO_64:
- fn = scatter_store_fn64[be][a->xs][a->msz];
- break;
- default:
- g_assert_not_reached();
- }
- do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
- cpu_reg_sp(s, a->rn), a->msz, fn);
- return true;
-}
-
-static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
-{
- gen_helper_gvec_mem_scatter *fn = NULL;
- int be = s->be_data == MO_BE;
- TCGv_i64 imm;
-
- if (a->esz < a->msz) {
- return false;
- }
- if (!sve_access_check(s)) {
- return true;
- }
-
- switch (a->esz) {
- case MO_32:
- fn = scatter_store_fn32[be][0][a->msz];
- break;
- case MO_64:
- fn = scatter_store_fn64[be][2][a->msz];
- break;
- }
- assert(fn != NULL);
-
- /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
- * by loading the immediate into the scalar parameter.
- */
- imm = tcg_const_i64(a->imm << a->msz);
- do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
- tcg_temp_free_i64(imm);
- return true;
-}
-
-/*
- * Prefetches
- */
-
-static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
-{
- /* Prefetch is a nop within QEMU. */
- (void)sve_access_check(s);
- return true;
-}
-
-static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
-{
- if (a->rm == 31) {
- return false;
- }
- /* Prefetch is a nop within QEMU. */
- (void)sve_access_check(s);
- return true;
-}
-
-/*
- * Move Prefix
- *
- * TODO: The implementation so far could handle predicated merging movprfx.
- * The helper functions as written take an extra source register to
- * use in the operation, but the result is only written when predication
- * succeeds. For unpredicated movprfx, we need to rearrange the helpers
- * to allow the final write back to the destination to be unconditional.
- * For predicated zeroing movprfx, we need to rearrange the helpers to
- * allow the final write back to zero inactives.
- *
- * In the meantime, just emit the moves.
- */
-
-static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
-{
- return do_mov_z(s, a->rd, a->rn);
-}
-
-static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
- }
- return true;
-}
-
-static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
-{
- if (sve_access_check(s)) {
- do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
- }
- return true;
-}
diff --git a/target/arm/translate.c b/target/arm/translate.c
deleted file mode 100644
index 1b4bacb522..0000000000
--- a/target/arm/translate.c
+++ /dev/null
@@ -1,13199 +0,0 @@
-/*
- * ARM translation
- *
- * Copyright (c) 2003 Fabrice Bellard
- * Copyright (c) 2005-2007 CodeSourcery
- * Copyright (c) 2007 OpenedHand, Ltd.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#include "qemu/osdep.h"
-
-#include "cpu.h"
-#include "internals.h"
-#include "disas/disas.h"
-#include "exec/exec-all.h"
-#include "tcg-op.h"
-#include "tcg-op-gvec.h"
-#include "qemu/log.h"
-#include "qemu/bitops.h"
-#include "arm_ldst.h"
-#include "exec/semihost.h"
-
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-
-#include "trace-tcg.h"
-#include "exec/log.h"
-
-
-#define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
-#define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
-/* currently all emulated v5 cores are also v5TE, so don't bother */
-#define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
-#define ENABLE_ARCH_5J arm_dc_feature(s, ARM_FEATURE_JAZELLE)
-#define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
-#define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
-#define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
-#define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
-#define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
-
-#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
-
-#include "translate.h"
-
-#if defined(CONFIG_USER_ONLY)
-#define IS_USER(s) 1
-#else
-#define IS_USER(s) (s->user)
-#endif
-
-/* We reuse the same 64-bit temporaries for efficiency. */
-static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
-static TCGv_i32 cpu_R[16];
-TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
-TCGv_i64 cpu_exclusive_addr;
-TCGv_i64 cpu_exclusive_val;
-
-/* FIXME: These should be removed. */
-static TCGv_i32 cpu_F0s, cpu_F1s;
-static TCGv_i64 cpu_F0d, cpu_F1d;
-
-#include "exec/gen-icount.h"
-
-static const char *regnames[] =
- { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
- "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
-
-/* Function prototypes for gen_ functions calling Neon helpers. */
-typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
- TCGv_i32, TCGv_i32);
-
-/* initialize TCG globals. */
-void arm_translate_init(void)
-{
- int i;
-
- for (i = 0; i < 16; i++) {
- cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
- offsetof(CPUARMState, regs[i]),
- regnames[i]);
- }
- cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
- cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
- cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
- cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
-
- cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
- offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
- cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
- offsetof(CPUARMState, exclusive_val), "exclusive_val");
-
- a64_translate_init();
-}
-
-/* Flags for the disas_set_da_iss info argument:
- * lower bits hold the Rt register number, higher bits are flags.
- */
-typedef enum ISSInfo {
- ISSNone = 0,
- ISSRegMask = 0x1f,
- ISSInvalid = (1 << 5),
- ISSIsAcqRel = (1 << 6),
- ISSIsWrite = (1 << 7),
- ISSIs16Bit = (1 << 8),
-} ISSInfo;
-
-/* Save the syndrome information for a Data Abort */
-static void disas_set_da_iss(DisasContext *s, TCGMemOp memop, ISSInfo issinfo)
-{
- uint32_t syn;
- int sas = memop & MO_SIZE;
- bool sse = memop & MO_SIGN;
- bool is_acqrel = issinfo & ISSIsAcqRel;
- bool is_write = issinfo & ISSIsWrite;
- bool is_16bit = issinfo & ISSIs16Bit;
- int srt = issinfo & ISSRegMask;
-
- if (issinfo & ISSInvalid) {
- /* Some callsites want to conditionally provide ISS info,
- * eg "only if this was not a writeback"
- */
- return;
- }
-
- if (srt == 15) {
- /* For AArch32, insns where the src/dest is R15 never generate
- * ISS information. Catching that here saves checking at all
- * the call sites.
- */
- return;
- }
-
- syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
- 0, 0, 0, is_write, 0, is_16bit);
- disas_set_insn_syndrome(s, syn);
-}
-
-static inline int get_a32_user_mem_index(DisasContext *s)
-{
- /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
- * insns:
- * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
- * otherwise, access as if at PL0.
- */
- switch (s->mmu_idx) {
- case ARMMMUIdx_S1E2: /* this one is UNPREDICTABLE */
- case ARMMMUIdx_S12NSE0:
- case ARMMMUIdx_S12NSE1:
- return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
- case ARMMMUIdx_S1E3:
- case ARMMMUIdx_S1SE0:
- case ARMMMUIdx_S1SE1:
- return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
- case ARMMMUIdx_MUser:
- case ARMMMUIdx_MPriv:
- return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
- case ARMMMUIdx_MUserNegPri:
- case ARMMMUIdx_MPrivNegPri:
- return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
- case ARMMMUIdx_MSUser:
- case ARMMMUIdx_MSPriv:
- return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
- case ARMMMUIdx_MSUserNegPri:
- case ARMMMUIdx_MSPrivNegPri:
- return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
- case ARMMMUIdx_S2NS:
- default:
- g_assert_not_reached();
- }
-}
-
-static inline TCGv_i32 load_cpu_offset(int offset)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp, cpu_env, offset);
- return tmp;
-}
-
-#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
-
-static inline void store_cpu_offset(TCGv_i32 var, int offset)
-{
- tcg_gen_st_i32(var, cpu_env, offset);
- tcg_temp_free_i32(var);
-}
-
-#define store_cpu_field(var, name) \
- store_cpu_offset(var, offsetof(CPUARMState, name))
-
-/* Set a variable to the value of a CPU register. */
-static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
-{
- if (reg == 15) {
- uint32_t addr;
- /* normally, since we updated PC, we need only to add one insn */
- if (s->thumb)
- addr = (long)s->pc + 2;
- else
- addr = (long)s->pc + 4;
- tcg_gen_movi_i32(var, addr);
- } else {
- tcg_gen_mov_i32(var, cpu_R[reg]);
- }
-}
-
-/* Create a new temporary and set it to the value of a CPU register. */
-static inline TCGv_i32 load_reg(DisasContext *s, int reg)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- load_reg_var(s, tmp, reg);
- return tmp;
-}
-
-/* Set a CPU register. The source must be a temporary and will be
- marked as dead. */
-static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
-{
- if (reg == 15) {
- /* In Thumb mode, we must ignore bit 0.
- * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
- * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
- * We choose to ignore [1:0] in ARM mode for all architecture versions.
- */
- tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
- s->base.is_jmp = DISAS_JUMP;
- }
- tcg_gen_mov_i32(cpu_R[reg], var);
- tcg_temp_free_i32(var);
-}
-
-/*
- * Variant of store_reg which applies v8M stack-limit checks before updating
- * SP. If the check fails this will result in an exception being taken.
- * We disable the stack checks for CONFIG_USER_ONLY because we have
- * no idea what the stack limits should be in that case.
- * If stack checking is not being done this just acts like store_reg().
- */
-static void store_sp_checked(DisasContext *s, TCGv_i32 var)
-{
-#ifndef CONFIG_USER_ONLY
- if (s->v8m_stackcheck) {
- gen_helper_v8m_stackcheck(cpu_env, var);
- }
-#endif
- store_reg(s, 13, var);
-}
-
-/* Value extensions. */
-#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
-#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
-#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
-#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
-
-#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
-#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
-
-
-static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
-{
- TCGv_i32 tmp_mask = tcg_const_i32(mask);
- gen_helper_cpsr_write(cpu_env, var, tmp_mask);
- tcg_temp_free_i32(tmp_mask);
-}
-/* Set NZCV flags from the high 4 bits of var. */
-#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
-
-static void gen_exception_internal(int excp)
-{
- TCGv_i32 tcg_excp = tcg_const_i32(excp);
-
- assert(excp_is_internal(excp));
- gen_helper_exception_internal(cpu_env, tcg_excp);
- tcg_temp_free_i32(tcg_excp);
-}
-
-static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
-{
- TCGv_i32 tcg_excp = tcg_const_i32(excp);
- TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
- TCGv_i32 tcg_el = tcg_const_i32(target_el);
-
- gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
- tcg_syn, tcg_el);
-
- tcg_temp_free_i32(tcg_el);
- tcg_temp_free_i32(tcg_syn);
- tcg_temp_free_i32(tcg_excp);
-}
-
-static void gen_ss_advance(DisasContext *s)
-{
- /* If the singlestep state is Active-not-pending, advance to
- * Active-pending.
- */
- if (s->ss_active) {
- s->pstate_ss = 0;
- gen_helper_clear_pstate_ss(cpu_env);
- }
-}
-
-static void gen_step_complete_exception(DisasContext *s)
-{
- /* We just completed step of an insn. Move from Active-not-pending
- * to Active-pending, and then also take the swstep exception.
- * This corresponds to making the (IMPDEF) choice to prioritize
- * swstep exceptions over asynchronous exceptions taken to an exception
- * level where debug is disabled. This choice has the advantage that
- * we do not need to maintain internal state corresponding to the
- * ISV/EX syndrome bits between completion of the step and generation
- * of the exception, and our syndrome information is always correct.
- */
- gen_ss_advance(s);
- gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
- default_exception_el(s));
- s->base.is_jmp = DISAS_NORETURN;
-}
-
-static void gen_singlestep_exception(DisasContext *s)
-{
- /* Generate the right kind of exception for singlestep, which is
- * either the architectural singlestep or EXCP_DEBUG for QEMU's
- * gdb singlestepping.
- */
- if (s->ss_active) {
- gen_step_complete_exception(s);
- } else {
- gen_exception_internal(EXCP_DEBUG);
- }
-}
-
-static inline bool is_singlestepping(DisasContext *s)
-{
- /* Return true if we are singlestepping either because of
- * architectural singlestep or QEMU gdbstub singlestep. This does
- * not include the command line '-singlestep' mode which is rather
- * misnamed as it only means "one instruction per TB" and doesn't
- * affect the code we generate.
- */
- return s->base.singlestep_enabled || s->ss_active;
-}
-
-static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 tmp1 = tcg_temp_new_i32();
- TCGv_i32 tmp2 = tcg_temp_new_i32();
- tcg_gen_ext16s_i32(tmp1, a);
- tcg_gen_ext16s_i32(tmp2, b);
- tcg_gen_mul_i32(tmp1, tmp1, tmp2);
- tcg_temp_free_i32(tmp2);
- tcg_gen_sari_i32(a, a, 16);
- tcg_gen_sari_i32(b, b, 16);
- tcg_gen_mul_i32(b, b, a);
- tcg_gen_mov_i32(a, tmp1);
- tcg_temp_free_i32(tmp1);
-}
-
-/* Byteswap each halfword. */
-static void gen_rev16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
- tcg_gen_shri_i32(tmp, var, 8);
- tcg_gen_and_i32(tmp, tmp, mask);
- tcg_gen_and_i32(var, var, mask);
- tcg_gen_shli_i32(var, var, 8);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(mask);
- tcg_temp_free_i32(tmp);
-}
-
-/* Byteswap low halfword and sign extend. */
-static void gen_revsh(TCGv_i32 var)
-{
- tcg_gen_ext16u_i32(var, var);
- tcg_gen_bswap16_i32(var, var);
- tcg_gen_ext16s_i32(var, var);
-}
-
-/* Return (b << 32) + a. Mark inputs as dead */
-static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
-{
- TCGv_i64 tmp64 = tcg_temp_new_i64();
-
- tcg_gen_extu_i32_i64(tmp64, b);
- tcg_temp_free_i32(b);
- tcg_gen_shli_i64(tmp64, tmp64, 32);
- tcg_gen_add_i64(a, tmp64, a);
-
- tcg_temp_free_i64(tmp64);
- return a;
-}
-
-/* Return (b << 32) - a. Mark inputs as dead. */
-static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
-{
- TCGv_i64 tmp64 = tcg_temp_new_i64();
-
- tcg_gen_extu_i32_i64(tmp64, b);
- tcg_temp_free_i32(b);
- tcg_gen_shli_i64(tmp64, tmp64, 32);
- tcg_gen_sub_i64(a, tmp64, a);
-
- tcg_temp_free_i64(tmp64);
- return a;
-}
-
-/* 32x32->64 multiply. Marks inputs as dead. */
-static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 lo = tcg_temp_new_i32();
- TCGv_i32 hi = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- tcg_gen_mulu2_i32(lo, hi, a, b);
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
-
- ret = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(ret, lo, hi);
- tcg_temp_free_i32(lo);
- tcg_temp_free_i32(hi);
-
- return ret;
-}
-
-static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 lo = tcg_temp_new_i32();
- TCGv_i32 hi = tcg_temp_new_i32();
- TCGv_i64 ret;
-
- tcg_gen_muls2_i32(lo, hi, a, b);
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
-
- ret = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(ret, lo, hi);
- tcg_temp_free_i32(lo);
- tcg_temp_free_i32(hi);
-
- return ret;
-}
-
-/* Swap low and high halfwords. */
-static void gen_swap_half(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_shri_i32(tmp, var, 16);
- tcg_gen_shli_i32(var, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-/* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
- tmp = (t0 ^ t1) & 0x8000;
- t0 &= ~0x8000;
- t1 &= ~0x8000;
- t0 = (t0 + t1) ^ tmp;
- */
-
-static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_xor_i32(tmp, t0, t1);
- tcg_gen_andi_i32(tmp, tmp, 0x8000);
- tcg_gen_andi_i32(t0, t0, ~0x8000);
- tcg_gen_andi_i32(t1, t1, ~0x8000);
- tcg_gen_add_i32(t0, t0, t1);
- tcg_gen_xor_i32(t0, t0, tmp);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(t1);
-}
-
-/* Set CF to the top bit of var. */
-static void gen_set_CF_bit31(TCGv_i32 var)
-{
- tcg_gen_shri_i32(cpu_CF, var, 31);
-}
-
-/* Set N and Z flags from var. */
-static inline void gen_logic_CC(TCGv_i32 var)
-{
- tcg_gen_mov_i32(cpu_NF, var);
- tcg_gen_mov_i32(cpu_ZF, var);
-}
-
-/* T0 += T1 + CF. */
-static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
-{
- tcg_gen_add_i32(t0, t0, t1);
- tcg_gen_add_i32(t0, t0, cpu_CF);
-}
-
-/* dest = T0 + T1 + CF. */
-static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
-{
- tcg_gen_add_i32(dest, t0, t1);
- tcg_gen_add_i32(dest, dest, cpu_CF);
-}
-
-/* dest = T0 - T1 + CF - 1. */
-static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
-{
- tcg_gen_sub_i32(dest, t0, t1);
- tcg_gen_add_i32(dest, dest, cpu_CF);
- tcg_gen_subi_i32(dest, dest, 1);
-}
-
-/* dest = T0 + T1. Compute C, N, V and Z flags */
-static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
- tcg_gen_mov_i32(cpu_ZF, cpu_NF);
- tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
- tcg_gen_xor_i32(tmp, t0, t1);
- tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
- tcg_temp_free_i32(tmp);
- tcg_gen_mov_i32(dest, cpu_NF);
-}
-
-/* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
-static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- if (TCG_TARGET_HAS_add2_i32) {
- tcg_gen_movi_i32(tmp, 0);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
- tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
- } else {
- TCGv_i64 q0 = tcg_temp_new_i64();
- TCGv_i64 q1 = tcg_temp_new_i64();
- tcg_gen_extu_i32_i64(q0, t0);
- tcg_gen_extu_i32_i64(q1, t1);
- tcg_gen_add_i64(q0, q0, q1);
- tcg_gen_extu_i32_i64(q1, cpu_CF);
- tcg_gen_add_i64(q0, q0, q1);
- tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
- tcg_temp_free_i64(q0);
- tcg_temp_free_i64(q1);
- }
- tcg_gen_mov_i32(cpu_ZF, cpu_NF);
- tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
- tcg_gen_xor_i32(tmp, t0, t1);
- tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
- tcg_temp_free_i32(tmp);
- tcg_gen_mov_i32(dest, cpu_NF);
-}
-
-/* dest = T0 - T1. Compute C, N, V and Z flags */
-static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
-{
- TCGv_i32 tmp;
- tcg_gen_sub_i32(cpu_NF, t0, t1);
- tcg_gen_mov_i32(cpu_ZF, cpu_NF);
- tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
- tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
- tmp = tcg_temp_new_i32();
- tcg_gen_xor_i32(tmp, t0, t1);
- tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
- tcg_temp_free_i32(tmp);
- tcg_gen_mov_i32(dest, cpu_NF);
-}
-
-/* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
-static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_not_i32(tmp, t1);
- gen_adc_CC(dest, t0, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-#define GEN_SHIFT(name) \
-static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
-{ \
- TCGv_i32 tmp1, tmp2, tmp3; \
- tmp1 = tcg_temp_new_i32(); \
- tcg_gen_andi_i32(tmp1, t1, 0xff); \
- tmp2 = tcg_const_i32(0); \
- tmp3 = tcg_const_i32(0x1f); \
- tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
- tcg_temp_free_i32(tmp3); \
- tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
- tcg_gen_##name##_i32(dest, tmp2, tmp1); \
- tcg_temp_free_i32(tmp2); \
- tcg_temp_free_i32(tmp1); \
-}
-GEN_SHIFT(shl)
-GEN_SHIFT(shr)
-#undef GEN_SHIFT
-
-static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
-{
- TCGv_i32 tmp1, tmp2;
- tmp1 = tcg_temp_new_i32();
- tcg_gen_andi_i32(tmp1, t1, 0xff);
- tmp2 = tcg_const_i32(0x1f);
- tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
- tcg_temp_free_i32(tmp2);
- tcg_gen_sar_i32(dest, t0, tmp1);
- tcg_temp_free_i32(tmp1);
-}
-
-static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
-{
- TCGv_i32 c0 = tcg_const_i32(0);
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_neg_i32(tmp, src);
- tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
- tcg_temp_free_i32(c0);
- tcg_temp_free_i32(tmp);
-}
-
-static void shifter_out_im(TCGv_i32 var, int shift)
-{
- if (shift == 0) {
- tcg_gen_andi_i32(cpu_CF, var, 1);
- } else {
- tcg_gen_shri_i32(cpu_CF, var, shift);
- if (shift != 31) {
- tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
- }
- }
-}
-
-/* Shift by immediate. Includes special handling for shift == 0. */
-static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
- int shift, int flags)
-{
- switch (shiftop) {
- case 0: /* LSL */
- if (shift != 0) {
- if (flags)
- shifter_out_im(var, 32 - shift);
- tcg_gen_shli_i32(var, var, shift);
- }
- break;
- case 1: /* LSR */
- if (shift == 0) {
- if (flags) {
- tcg_gen_shri_i32(cpu_CF, var, 31);
- }
- tcg_gen_movi_i32(var, 0);
- } else {
- if (flags)
- shifter_out_im(var, shift - 1);
- tcg_gen_shri_i32(var, var, shift);
- }
- break;
- case 2: /* ASR */
- if (shift == 0)
- shift = 32;
- if (flags)
- shifter_out_im(var, shift - 1);
- if (shift == 32)
- shift = 31;
- tcg_gen_sari_i32(var, var, shift);
- break;
- case 3: /* ROR/RRX */
- if (shift != 0) {
- if (flags)
- shifter_out_im(var, shift - 1);
- tcg_gen_rotri_i32(var, var, shift); break;
- } else {
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_shli_i32(tmp, cpu_CF, 31);
- if (flags)
- shifter_out_im(var, 0);
- tcg_gen_shri_i32(var, var, 1);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
- }
- }
-};
-
-static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
- TCGv_i32 shift, int flags)
-{
- if (flags) {
- switch (shiftop) {
- case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
- case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
- case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
- case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
- }
- } else {
- switch (shiftop) {
- case 0:
- gen_shl(var, var, shift);
- break;
- case 1:
- gen_shr(var, var, shift);
- break;
- case 2:
- gen_sar(var, var, shift);
- break;
- case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
- tcg_gen_rotr_i32(var, var, shift); break;
- }
- }
- tcg_temp_free_i32(shift);
-}
-
-#define PAS_OP(pfx) \
- switch (op2) { \
- case 0: gen_pas_helper(glue(pfx,add16)); break; \
- case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
- case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
- case 3: gen_pas_helper(glue(pfx,sub16)); break; \
- case 4: gen_pas_helper(glue(pfx,add8)); break; \
- case 7: gen_pas_helper(glue(pfx,sub8)); break; \
- }
-static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_ptr tmp;
-
- switch (op1) {
-#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
- case 1:
- tmp = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
- PAS_OP(s)
- tcg_temp_free_ptr(tmp);
- break;
- case 5:
- tmp = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
- PAS_OP(u)
- tcg_temp_free_ptr(tmp);
- break;
-#undef gen_pas_helper
-#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
- case 2:
- PAS_OP(q);
- break;
- case 3:
- PAS_OP(sh);
- break;
- case 6:
- PAS_OP(uq);
- break;
- case 7:
- PAS_OP(uh);
- break;
-#undef gen_pas_helper
- }
-}
-#undef PAS_OP
-
-/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
-#define PAS_OP(pfx) \
- switch (op1) { \
- case 0: gen_pas_helper(glue(pfx,add8)); break; \
- case 1: gen_pas_helper(glue(pfx,add16)); break; \
- case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
- case 4: gen_pas_helper(glue(pfx,sub8)); break; \
- case 5: gen_pas_helper(glue(pfx,sub16)); break; \
- case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
- }
-static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_ptr tmp;
-
- switch (op2) {
-#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
- case 0:
- tmp = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
- PAS_OP(s)
- tcg_temp_free_ptr(tmp);
- break;
- case 4:
- tmp = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
- PAS_OP(u)
- tcg_temp_free_ptr(tmp);
- break;
-#undef gen_pas_helper
-#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
- case 1:
- PAS_OP(q);
- break;
- case 2:
- PAS_OP(sh);
- break;
- case 5:
- PAS_OP(uq);
- break;
- case 6:
- PAS_OP(uh);
- break;
-#undef gen_pas_helper
- }
-}
-#undef PAS_OP
-
-/*
- * Generate a conditional based on ARM condition code cc.
- * This is common between ARM and Aarch64 targets.
- */
-void arm_test_cc(DisasCompare *cmp, int cc)
-{
- TCGv_i32 value;
- TCGCond cond;
- bool global = true;
-
- switch (cc) {
- case 0: /* eq: Z */
- case 1: /* ne: !Z */
- cond = TCG_COND_EQ;
- value = cpu_ZF;
- break;
-
- case 2: /* cs: C */
- case 3: /* cc: !C */
- cond = TCG_COND_NE;
- value = cpu_CF;
- break;
-
- case 4: /* mi: N */
- case 5: /* pl: !N */
- cond = TCG_COND_LT;
- value = cpu_NF;
- break;
-
- case 6: /* vs: V */
- case 7: /* vc: !V */
- cond = TCG_COND_LT;
- value = cpu_VF;
- break;
-
- case 8: /* hi: C && !Z */
- case 9: /* ls: !C || Z -> !(C && !Z) */
- cond = TCG_COND_NE;
- value = tcg_temp_new_i32();
- global = false;
- /* CF is 1 for C, so -CF is an all-bits-set mask for C;
- ZF is non-zero for !Z; so AND the two subexpressions. */
- tcg_gen_neg_i32(value, cpu_CF);
- tcg_gen_and_i32(value, value, cpu_ZF);
- break;
-
- case 10: /* ge: N == V -> N ^ V == 0 */
- case 11: /* lt: N != V -> N ^ V != 0 */
- /* Since we're only interested in the sign bit, == 0 is >= 0. */
- cond = TCG_COND_GE;
- value = tcg_temp_new_i32();
- global = false;
- tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
- break;
-
- case 12: /* gt: !Z && N == V */
- case 13: /* le: Z || N != V */
- cond = TCG_COND_NE;
- value = tcg_temp_new_i32();
- global = false;
- /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
- * the sign bit then AND with ZF to yield the result. */
- tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
- tcg_gen_sari_i32(value, value, 31);
- tcg_gen_andc_i32(value, cpu_ZF, value);
- break;
-
- case 14: /* always */
- case 15: /* always */
- /* Use the ALWAYS condition, which will fold early.
- * It doesn't matter what we use for the value. */
- cond = TCG_COND_ALWAYS;
- value = cpu_ZF;
- goto no_invert;
-
- default:
- fprintf(stderr, "Bad condition code 0x%x\n", cc);
- abort();
- }
-
- if (cc & 1) {
- cond = tcg_invert_cond(cond);
- }
-
- no_invert:
- cmp->cond = cond;
- cmp->value = value;
- cmp->value_global = global;
-}
-
-void arm_free_cc(DisasCompare *cmp)
-{
- if (!cmp->value_global) {
- tcg_temp_free_i32(cmp->value);
- }
-}
-
-void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
-{
- tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
-}
-
-void arm_gen_test_cc(int cc, TCGLabel *label)
-{
- DisasCompare cmp;
- arm_test_cc(&cmp, cc);
- arm_jump_cc(&cmp, label);
- arm_free_cc(&cmp);
-}
-
-static const uint8_t table_logic_cc[16] = {
- 1, /* and */
- 1, /* xor */
- 0, /* sub */
- 0, /* rsb */
- 0, /* add */
- 0, /* adc */
- 0, /* sbc */
- 0, /* rsc */
- 1, /* andl */
- 1, /* xorl */
- 0, /* cmp */
- 0, /* cmn */
- 1, /* orr */
- 1, /* mov */
- 1, /* bic */
- 1, /* mvn */
-};
-
-static inline void gen_set_condexec(DisasContext *s)
-{
- if (s->condexec_mask) {
- uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, val);
- store_cpu_field(tmp, condexec_bits);
- }
-}
-
-static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
-{
- tcg_gen_movi_i32(cpu_R[15], val);
-}
-
-/* Set PC and Thumb state from an immediate address. */
-static inline void gen_bx_im(DisasContext *s, uint32_t addr)
-{
- TCGv_i32 tmp;
-
- s->base.is_jmp = DISAS_JUMP;
- if (s->thumb != (addr & 1)) {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, addr & 1);
- tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
- tcg_temp_free_i32(tmp);
- }
- tcg_gen_movi_i32(cpu_R[15], addr & ~1);
-}
-
-/* Set PC and Thumb state from var. var is marked as dead. */
-static inline void gen_bx(DisasContext *s, TCGv_i32 var)
-{
- s->base.is_jmp = DISAS_JUMP;
- tcg_gen_andi_i32(cpu_R[15], var, ~1);
- tcg_gen_andi_i32(var, var, 1);
- store_cpu_field(var, thumb);
-}
-
-/* Set PC and Thumb state from var. var is marked as dead.
- * For M-profile CPUs, include logic to detect exception-return
- * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
- * and BX reg, and no others, and happens only for code in Handler mode.
- */
-static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
-{
- /* Generate the same code here as for a simple bx, but flag via
- * s->base.is_jmp that we need to do the rest of the work later.
- */
- gen_bx(s, var);
- if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
- (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
- s->base.is_jmp = DISAS_BX_EXCRET;
- }
-}
-
-static inline void gen_bx_excret_final_code(DisasContext *s)
-{
- /* Generate the code to finish possible exception return and end the TB */
- TCGLabel *excret_label = gen_new_label();
- uint32_t min_magic;
-
- if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
- /* Covers FNC_RETURN and EXC_RETURN magic */
- min_magic = FNC_RETURN_MIN_MAGIC;
- } else {
- /* EXC_RETURN magic only */
- min_magic = EXC_RETURN_MIN_MAGIC;
- }
-
- /* Is the new PC value in the magic range indicating exception return? */
- tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
- /* No: end the TB as we would for a DISAS_JMP */
- if (is_singlestepping(s)) {
- gen_singlestep_exception(s);
- } else {
- tcg_gen_exit_tb(NULL, 0);
- }
- gen_set_label(excret_label);
- /* Yes: this is an exception return.
- * At this point in runtime env->regs[15] and env->thumb will hold
- * the exception-return magic number, which do_v7m_exception_exit()
- * will read. Nothing else will be able to see those values because
- * the cpu-exec main loop guarantees that we will always go straight
- * from raising the exception to the exception-handling code.
- *
- * gen_ss_advance(s) does nothing on M profile currently but
- * calling it is conceptually the right thing as we have executed
- * this instruction (compare SWI, HVC, SMC handling).
- */
- gen_ss_advance(s);
- gen_exception_internal(EXCP_EXCEPTION_EXIT);
-}
-
-static inline void gen_bxns(DisasContext *s, int rm)
-{
- TCGv_i32 var = load_reg(s, rm);
-
- /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
- * we need to sync state before calling it, but:
- * - we don't need to do gen_set_pc_im() because the bxns helper will
- * always set the PC itself
- * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
- * unless it's outside an IT block or the last insn in an IT block,
- * so we know that condexec == 0 (already set at the top of the TB)
- * is correct in the non-UNPREDICTABLE cases, and we can choose
- * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
- */
- gen_helper_v7m_bxns(cpu_env, var);
- tcg_temp_free_i32(var);
- s->base.is_jmp = DISAS_EXIT;
-}
-
-static inline void gen_blxns(DisasContext *s, int rm)
-{
- TCGv_i32 var = load_reg(s, rm);
-
- /* We don't need to sync condexec state, for the same reason as bxns.
- * We do however need to set the PC, because the blxns helper reads it.
- * The blxns helper may throw an exception.
- */
- gen_set_pc_im(s, s->pc);
- gen_helper_v7m_blxns(cpu_env, var);
- tcg_temp_free_i32(var);
- s->base.is_jmp = DISAS_EXIT;
-}
-
-/* Variant of store_reg which uses branch&exchange logic when storing
- to r15 in ARM architecture v7 and above. The source must be a temporary
- and will be marked as dead. */
-static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
-{
- if (reg == 15 && ENABLE_ARCH_7) {
- gen_bx(s, var);
- } else {
- store_reg(s, reg, var);
- }
-}
-
-/* Variant of store_reg which uses branch&exchange logic when storing
- * to r15 in ARM architecture v5T and above. This is used for storing
- * the results of a LDR/LDM/POP into r15, and corresponds to the cases
- * in the ARM ARM which use the LoadWritePC() pseudocode function. */
-static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
-{
- if (reg == 15 && ENABLE_ARCH_5) {
- gen_bx_excret(s, var);
- } else {
- store_reg(s, reg, var);
- }
-}
-
-#ifdef CONFIG_USER_ONLY
-#define IS_USER_ONLY 1
-#else
-#define IS_USER_ONLY 0
-#endif
-
-/* Abstractions of "generate code to do a guest load/store for
- * AArch32", where a vaddr is always 32 bits (and is zero
- * extended if we're a 64 bit core) and data is also
- * 32 bits unless specifically doing a 64 bit access.
- * These functions work like tcg_gen_qemu_{ld,st}* except
- * that the address argument is TCGv_i32 rather than TCGv.
- */
-
-static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
-{
- TCGv addr = tcg_temp_new();
- tcg_gen_extu_i32_tl(addr, a32);
-
- /* Not needed for user-mode BE32, where we use MO_BE instead. */
- if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
- tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
- }
- return addr;
-}
-
-static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
- int index, TCGMemOp opc)
-{
- TCGv addr;
-
- if (arm_dc_feature(s, ARM_FEATURE_M) &&
- !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
- opc |= MO_ALIGN;
- }
-
- addr = gen_aa32_addr(s, a32, opc);
- tcg_gen_qemu_ld_i32(val, addr, index, opc);
- tcg_temp_free(addr);
-}
-
-static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
- int index, TCGMemOp opc)
-{
- TCGv addr;
-
- if (arm_dc_feature(s, ARM_FEATURE_M) &&
- !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
- opc |= MO_ALIGN;
- }
-
- addr = gen_aa32_addr(s, a32, opc);
- tcg_gen_qemu_st_i32(val, addr, index, opc);
- tcg_temp_free(addr);
-}
-
-#define DO_GEN_LD(SUFF, OPC) \
-static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
- TCGv_i32 a32, int index) \
-{ \
- gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
-} \
-static inline void gen_aa32_ld##SUFF##_iss(DisasContext *s, \
- TCGv_i32 val, \
- TCGv_i32 a32, int index, \
- ISSInfo issinfo) \
-{ \
- gen_aa32_ld##SUFF(s, val, a32, index); \
- disas_set_da_iss(s, OPC, issinfo); \
-}
-
-#define DO_GEN_ST(SUFF, OPC) \
-static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
- TCGv_i32 a32, int index) \
-{ \
- gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
-} \
-static inline void gen_aa32_st##SUFF##_iss(DisasContext *s, \
- TCGv_i32 val, \
- TCGv_i32 a32, int index, \
- ISSInfo issinfo) \
-{ \
- gen_aa32_st##SUFF(s, val, a32, index); \
- disas_set_da_iss(s, OPC, issinfo | ISSIsWrite); \
-}
-
-static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
-{
- /* Not needed for user-mode BE32, where we use MO_BE instead. */
- if (!IS_USER_ONLY && s->sctlr_b) {
- tcg_gen_rotri_i64(val, val, 32);
- }
-}
-
-static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
- int index, TCGMemOp opc)
-{
- TCGv addr = gen_aa32_addr(s, a32, opc);
- tcg_gen_qemu_ld_i64(val, addr, index, opc);
- gen_aa32_frob64(s, val);
- tcg_temp_free(addr);
-}
-
-static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
- TCGv_i32 a32, int index)
-{
- gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
-}
-
-static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
- int index, TCGMemOp opc)
-{
- TCGv addr = gen_aa32_addr(s, a32, opc);
-
- /* Not needed for user-mode BE32, where we use MO_BE instead. */
- if (!IS_USER_ONLY && s->sctlr_b) {
- TCGv_i64 tmp = tcg_temp_new_i64();
- tcg_gen_rotri_i64(tmp, val, 32);
- tcg_gen_qemu_st_i64(tmp, addr, index, opc);
- tcg_temp_free_i64(tmp);
- } else {
- tcg_gen_qemu_st_i64(val, addr, index, opc);
- }
- tcg_temp_free(addr);
-}
-
-static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
- TCGv_i32 a32, int index)
-{
- gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
-}
-
-DO_GEN_LD(8s, MO_SB)
-DO_GEN_LD(8u, MO_UB)
-DO_GEN_LD(16s, MO_SW)
-DO_GEN_LD(16u, MO_UW)
-DO_GEN_LD(32u, MO_UL)
-DO_GEN_ST(8, MO_UB)
-DO_GEN_ST(16, MO_UW)
-DO_GEN_ST(32, MO_UL)
-
-static inline void gen_hvc(DisasContext *s, int imm16)
-{
- /* The pre HVC helper handles cases when HVC gets trapped
- * as an undefined insn by runtime configuration (ie before
- * the insn really executes).
- */
- gen_set_pc_im(s, s->pc - 4);
- gen_helper_pre_hvc(cpu_env);
- /* Otherwise we will treat this as a real exception which
- * happens after execution of the insn. (The distinction matters
- * for the PC value reported to the exception handler and also
- * for single stepping.)
- */
- s->svc_imm = imm16;
- gen_set_pc_im(s, s->pc);
- s->base.is_jmp = DISAS_HVC;
-}
-
-static inline void gen_smc(DisasContext *s)
-{
- /* As with HVC, we may take an exception either before or after
- * the insn executes.
- */
- TCGv_i32 tmp;
-
- gen_set_pc_im(s, s->pc - 4);
- tmp = tcg_const_i32(syn_aa32_smc());
- gen_helper_pre_smc(cpu_env, tmp);
- tcg_temp_free_i32(tmp);
- gen_set_pc_im(s, s->pc);
- s->base.is_jmp = DISAS_SMC;
-}
-
-static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
-{
- gen_set_condexec(s);
- gen_set_pc_im(s, s->pc - offset);
- gen_exception_internal(excp);
- s->base.is_jmp = DISAS_NORETURN;
-}
-
-static void gen_exception_insn(DisasContext *s, int offset, int excp,
- int syn, uint32_t target_el)
-{
- gen_set_condexec(s);
- gen_set_pc_im(s, s->pc - offset);
- gen_exception(excp, syn, target_el);
- s->base.is_jmp = DISAS_NORETURN;
-}
-
-static void gen_exception_bkpt_insn(DisasContext *s, int offset, uint32_t syn)
-{
- TCGv_i32 tcg_syn;
-
- gen_set_condexec(s);
- gen_set_pc_im(s, s->pc - offset);
- tcg_syn = tcg_const_i32(syn);
- gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
- tcg_temp_free_i32(tcg_syn);
- s->base.is_jmp = DISAS_NORETURN;
-}
-
-/* Force a TB lookup after an instruction that changes the CPU state. */
-static inline void gen_lookup_tb(DisasContext *s)
-{
- tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
- s->base.is_jmp = DISAS_EXIT;
-}
-
-static inline void gen_hlt(DisasContext *s, int imm)
-{
- /* HLT. This has two purposes.
- * Architecturally, it is an external halting debug instruction.
- * Since QEMU doesn't implement external debug, we treat this as
- * it is required for halting debug disabled: it will UNDEF.
- * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
- * and "HLT 0xF000" is an A32 semihosting syscall. These traps
- * must trigger semihosting even for ARMv7 and earlier, where
- * HLT was an undefined encoding.
- * In system mode, we don't allow userspace access to
- * semihosting, to provide some semblance of security
- * (and for consistency with our 32-bit semihosting).
- */
- if (semihosting_enabled() &&
-#ifndef CONFIG_USER_ONLY
- s->current_el != 0 &&
-#endif
- (imm == (s->thumb ? 0x3c : 0xf000))) {
- gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
- return;
- }
-
- gen_exception_insn(s, s->thumb ? 2 : 4, EXCP_UDEF, syn_uncategorized(),
- default_exception_el(s));
-}
-
-static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
- TCGv_i32 var)
-{
- int val, rm, shift, shiftop;
- TCGv_i32 offset;
-
- if (!(insn & (1 << 25))) {
- /* immediate */
- val = insn & 0xfff;
- if (!(insn & (1 << 23)))
- val = -val;
- if (val != 0)
- tcg_gen_addi_i32(var, var, val);
- } else {
- /* shift/register */
- rm = (insn) & 0xf;
- shift = (insn >> 7) & 0x1f;
- shiftop = (insn >> 5) & 3;
- offset = load_reg(s, rm);
- gen_arm_shift_im(offset, shiftop, shift, 0);
- if (!(insn & (1 << 23)))
- tcg_gen_sub_i32(var, var, offset);
- else
- tcg_gen_add_i32(var, var, offset);
- tcg_temp_free_i32(offset);
- }
-}
-
-static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
- int extra, TCGv_i32 var)
-{
- int val, rm;
- TCGv_i32 offset;
-
- if (insn & (1 << 22)) {
- /* immediate */
- val = (insn & 0xf) | ((insn >> 4) & 0xf0);
- if (!(insn & (1 << 23)))
- val = -val;
- val += extra;
- if (val != 0)
- tcg_gen_addi_i32(var, var, val);
- } else {
- /* register */
- if (extra)
- tcg_gen_addi_i32(var, var, extra);
- rm = (insn) & 0xf;
- offset = load_reg(s, rm);
- if (!(insn & (1 << 23)))
- tcg_gen_sub_i32(var, var, offset);
- else
- tcg_gen_add_i32(var, var, offset);
- tcg_temp_free_i32(offset);
- }
-}
-
-static TCGv_ptr get_fpstatus_ptr(int neon)
-{
- TCGv_ptr statusptr = tcg_temp_new_ptr();
- int offset;
- if (neon) {
- offset = offsetof(CPUARMState, vfp.standard_fp_status);
- } else {
- offset = offsetof(CPUARMState, vfp.fp_status);
- }
- tcg_gen_addi_ptr(statusptr, cpu_env, offset);
- return statusptr;
-}
-
-#define VFP_OP2(name) \
-static inline void gen_vfp_##name(int dp) \
-{ \
- TCGv_ptr fpst = get_fpstatus_ptr(0); \
- if (dp) { \
- gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \
- } else { \
- gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \
- } \
- tcg_temp_free_ptr(fpst); \
-}
-
-VFP_OP2(add)
-VFP_OP2(sub)
-VFP_OP2(mul)
-VFP_OP2(div)
-
-#undef VFP_OP2
-
-static inline void gen_vfp_F1_mul(int dp)
-{
- /* Like gen_vfp_mul() but put result in F1 */
- TCGv_ptr fpst = get_fpstatus_ptr(0);
- if (dp) {
- gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
- } else {
- gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
- }
- tcg_temp_free_ptr(fpst);
-}
-
-static inline void gen_vfp_F1_neg(int dp)
-{
- /* Like gen_vfp_neg() but put result in F1 */
- if (dp) {
- gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
- } else {
- gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
- }
-}
-
-static inline void gen_vfp_abs(int dp)
-{
- if (dp)
- gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
- else
- gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
-}
-
-static inline void gen_vfp_neg(int dp)
-{
- if (dp)
- gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
- else
- gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
-}
-
-static inline void gen_vfp_sqrt(int dp)
-{
- if (dp)
- gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
- else
- gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
-}
-
-static inline void gen_vfp_cmp(int dp)
-{
- if (dp)
- gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
- else
- gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
-}
-
-static inline void gen_vfp_cmpe(int dp)
-{
- if (dp)
- gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
- else
- gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
-}
-
-static inline void gen_vfp_F1_ld0(int dp)
-{
- if (dp)
- tcg_gen_movi_i64(cpu_F1d, 0);
- else
- tcg_gen_movi_i32(cpu_F1s, 0);
-}
-
-#define VFP_GEN_ITOF(name) \
-static inline void gen_vfp_##name(int dp, int neon) \
-{ \
- TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
- if (dp) { \
- gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
- } else { \
- gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
- } \
- tcg_temp_free_ptr(statusptr); \
-}
-
-VFP_GEN_ITOF(uito)
-VFP_GEN_ITOF(sito)
-#undef VFP_GEN_ITOF
-
-#define VFP_GEN_FTOI(name) \
-static inline void gen_vfp_##name(int dp, int neon) \
-{ \
- TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
- if (dp) { \
- gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
- } else { \
- gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
- } \
- tcg_temp_free_ptr(statusptr); \
-}
-
-VFP_GEN_FTOI(toui)
-VFP_GEN_FTOI(touiz)
-VFP_GEN_FTOI(tosi)
-VFP_GEN_FTOI(tosiz)
-#undef VFP_GEN_FTOI
-
-#define VFP_GEN_FIX(name, round) \
-static inline void gen_vfp_##name(int dp, int shift, int neon) \
-{ \
- TCGv_i32 tmp_shift = tcg_const_i32(shift); \
- TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
- if (dp) { \
- gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
- statusptr); \
- } else { \
- gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
- statusptr); \
- } \
- tcg_temp_free_i32(tmp_shift); \
- tcg_temp_free_ptr(statusptr); \
-}
-VFP_GEN_FIX(tosh, _round_to_zero)
-VFP_GEN_FIX(tosl, _round_to_zero)
-VFP_GEN_FIX(touh, _round_to_zero)
-VFP_GEN_FIX(toul, _round_to_zero)
-VFP_GEN_FIX(shto, )
-VFP_GEN_FIX(slto, )
-VFP_GEN_FIX(uhto, )
-VFP_GEN_FIX(ulto, )
-#undef VFP_GEN_FIX
-
-static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
-{
- if (dp) {
- gen_aa32_ld64(s, cpu_F0d, addr, get_mem_index(s));
- } else {
- gen_aa32_ld32u(s, cpu_F0s, addr, get_mem_index(s));
- }
-}
-
-static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
-{
- if (dp) {
- gen_aa32_st64(s, cpu_F0d, addr, get_mem_index(s));
- } else {
- gen_aa32_st32(s, cpu_F0s, addr, get_mem_index(s));
- }
-}
-
-static inline long vfp_reg_offset(bool dp, unsigned reg)
-{
- if (dp) {
- return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
- } else {
- long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
- if (reg & 1) {
- ofs += offsetof(CPU_DoubleU, l.upper);
- } else {
- ofs += offsetof(CPU_DoubleU, l.lower);
- }
- return ofs;
- }
-}
-
-/* Return the offset of a 32-bit piece of a NEON register.
- zero is the least significant end of the register. */
-static inline long
-neon_reg_offset (int reg, int n)
-{
- int sreg;
- sreg = reg * 2 + n;
- return vfp_reg_offset(0, sreg);
-}
-
-static TCGv_i32 neon_load_reg(int reg, int pass)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
- return tmp;
-}
-
-static void neon_store_reg(int reg, int pass, TCGv_i32 var)
-{
- tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
- tcg_temp_free_i32(var);
-}
-
-static inline void neon_load_reg64(TCGv_i64 var, int reg)
-{
- tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
-}
-
-static inline void neon_store_reg64(TCGv_i64 var, int reg)
-{
- tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
-}
-
-static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
-{
- TCGv_ptr ret = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
- return ret;
-}
-
-#define tcg_gen_ld_f32 tcg_gen_ld_i32
-#define tcg_gen_ld_f64 tcg_gen_ld_i64
-#define tcg_gen_st_f32 tcg_gen_st_i32
-#define tcg_gen_st_f64 tcg_gen_st_i64
-
-static inline void gen_mov_F0_vreg(int dp, int reg)
-{
- if (dp)
- tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
- else
- tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
-}
-
-static inline void gen_mov_F1_vreg(int dp, int reg)
-{
- if (dp)
- tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
- else
- tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
-}
-
-static inline void gen_mov_vreg_F0(int dp, int reg)
-{
- if (dp)
- tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
- else
- tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
-}
-
-#define ARM_CP_RW_BIT (1 << 20)
-
-static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
-{
- tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
-}
-
-static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
-{
- tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
-}
-
-static inline TCGv_i32 iwmmxt_load_creg(int reg)
-{
- TCGv_i32 var = tcg_temp_new_i32();
- tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
- return var;
-}
-
-static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
-{
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
- tcg_temp_free_i32(var);
-}
-
-static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
-{
- iwmmxt_store_reg(cpu_M0, rn);
-}
-
-static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
-{
- iwmmxt_load_reg(cpu_M0, rn);
-}
-
-static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
-{
- iwmmxt_load_reg(cpu_V1, rn);
- tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
-}
-
-static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
-{
- iwmmxt_load_reg(cpu_V1, rn);
- tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
-}
-
-static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
-{
- iwmmxt_load_reg(cpu_V1, rn);
- tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
-}
-
-#define IWMMXT_OP(name) \
-static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
-{ \
- iwmmxt_load_reg(cpu_V1, rn); \
- gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
-}
-
-#define IWMMXT_OP_ENV(name) \
-static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
-{ \
- iwmmxt_load_reg(cpu_V1, rn); \
- gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
-}
-
-#define IWMMXT_OP_ENV_SIZE(name) \
-IWMMXT_OP_ENV(name##b) \
-IWMMXT_OP_ENV(name##w) \
-IWMMXT_OP_ENV(name##l)
-
-#define IWMMXT_OP_ENV1(name) \
-static inline void gen_op_iwmmxt_##name##_M0(void) \
-{ \
- gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
-}
-
-IWMMXT_OP(maddsq)
-IWMMXT_OP(madduq)
-IWMMXT_OP(sadb)
-IWMMXT_OP(sadw)
-IWMMXT_OP(mulslw)
-IWMMXT_OP(mulshw)
-IWMMXT_OP(mululw)
-IWMMXT_OP(muluhw)
-IWMMXT_OP(macsw)
-IWMMXT_OP(macuw)
-
-IWMMXT_OP_ENV_SIZE(unpackl)
-IWMMXT_OP_ENV_SIZE(unpackh)
-
-IWMMXT_OP_ENV1(unpacklub)
-IWMMXT_OP_ENV1(unpackluw)
-IWMMXT_OP_ENV1(unpacklul)
-IWMMXT_OP_ENV1(unpackhub)
-IWMMXT_OP_ENV1(unpackhuw)
-IWMMXT_OP_ENV1(unpackhul)
-IWMMXT_OP_ENV1(unpacklsb)
-IWMMXT_OP_ENV1(unpacklsw)
-IWMMXT_OP_ENV1(unpacklsl)
-IWMMXT_OP_ENV1(unpackhsb)
-IWMMXT_OP_ENV1(unpackhsw)
-IWMMXT_OP_ENV1(unpackhsl)
-
-IWMMXT_OP_ENV_SIZE(cmpeq)
-IWMMXT_OP_ENV_SIZE(cmpgtu)
-IWMMXT_OP_ENV_SIZE(cmpgts)
-
-IWMMXT_OP_ENV_SIZE(mins)
-IWMMXT_OP_ENV_SIZE(minu)
-IWMMXT_OP_ENV_SIZE(maxs)
-IWMMXT_OP_ENV_SIZE(maxu)
-
-IWMMXT_OP_ENV_SIZE(subn)
-IWMMXT_OP_ENV_SIZE(addn)
-IWMMXT_OP_ENV_SIZE(subu)
-IWMMXT_OP_ENV_SIZE(addu)
-IWMMXT_OP_ENV_SIZE(subs)
-IWMMXT_OP_ENV_SIZE(adds)
-
-IWMMXT_OP_ENV(avgb0)
-IWMMXT_OP_ENV(avgb1)
-IWMMXT_OP_ENV(avgw0)
-IWMMXT_OP_ENV(avgw1)
-
-IWMMXT_OP_ENV(packuw)
-IWMMXT_OP_ENV(packul)
-IWMMXT_OP_ENV(packuq)
-IWMMXT_OP_ENV(packsw)
-IWMMXT_OP_ENV(packsl)
-IWMMXT_OP_ENV(packsq)
-
-static void gen_op_iwmmxt_set_mup(void)
-{
- TCGv_i32 tmp;
- tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
- tcg_gen_ori_i32(tmp, tmp, 2);
- store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
-}
-
-static void gen_op_iwmmxt_set_cup(void)
-{
- TCGv_i32 tmp;
- tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
- tcg_gen_ori_i32(tmp, tmp, 1);
- store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
-}
-
-static void gen_op_iwmmxt_setpsr_nz(void)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
- store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
-}
-
-static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
-{
- iwmmxt_load_reg(cpu_V1, rn);
- tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
- tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
-}
-
-static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
- TCGv_i32 dest)
-{
- int rd;
- uint32_t offset;
- TCGv_i32 tmp;
-
- rd = (insn >> 16) & 0xf;
- tmp = load_reg(s, rd);
-
- offset = (insn & 0xff) << ((insn >> 7) & 2);
- if (insn & (1 << 24)) {
- /* Pre indexed */
- if (insn & (1 << 23))
- tcg_gen_addi_i32(tmp, tmp, offset);
- else
- tcg_gen_addi_i32(tmp, tmp, -offset);
- tcg_gen_mov_i32(dest, tmp);
- if (insn & (1 << 21))
- store_reg(s, rd, tmp);
- else
- tcg_temp_free_i32(tmp);
- } else if (insn & (1 << 21)) {
- /* Post indexed */
- tcg_gen_mov_i32(dest, tmp);
- if (insn & (1 << 23))
- tcg_gen_addi_i32(tmp, tmp, offset);
- else
- tcg_gen_addi_i32(tmp, tmp, -offset);
- store_reg(s, rd, tmp);
- } else if (!(insn & (1 << 23)))
- return 1;
- return 0;
-}
-
-static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
-{
- int rd = (insn >> 0) & 0xf;
- TCGv_i32 tmp;
-
- if (insn & (1 << 8)) {
- if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
- return 1;
- } else {
- tmp = iwmmxt_load_creg(rd);
- }
- } else {
- tmp = tcg_temp_new_i32();
- iwmmxt_load_reg(cpu_V0, rd);
- tcg_gen_extrl_i64_i32(tmp, cpu_V0);
- }
- tcg_gen_andi_i32(tmp, tmp, mask);
- tcg_gen_mov_i32(dest, tmp);
- tcg_temp_free_i32(tmp);
- return 0;
-}
-
-/* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
- (ie. an undefined instruction). */
-static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
-{
- int rd, wrd;
- int rdhi, rdlo, rd0, rd1, i;
- TCGv_i32 addr;
- TCGv_i32 tmp, tmp2, tmp3;
-
- if ((insn & 0x0e000e00) == 0x0c000000) {
- if ((insn & 0x0fe00ff0) == 0x0c400000) {
- wrd = insn & 0xf;
- rdlo = (insn >> 12) & 0xf;
- rdhi = (insn >> 16) & 0xf;
- if (insn & ARM_CP_RW_BIT) { /* TMRRC */
- iwmmxt_load_reg(cpu_V0, wrd);
- tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
- tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
- } else { /* TMCRR */
- tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
- iwmmxt_store_reg(cpu_V0, wrd);
- gen_op_iwmmxt_set_mup();
- }
- return 0;
- }
-
- wrd = (insn >> 12) & 0xf;
- addr = tcg_temp_new_i32();
- if (gen_iwmmxt_address(s, insn, addr)) {
- tcg_temp_free_i32(addr);
- return 1;
- }
- if (insn & ARM_CP_RW_BIT) {
- if ((insn >> 28) == 0xf) { /* WLDRW wCx */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- iwmmxt_store_creg(wrd, tmp);
- } else {
- i = 1;
- if (insn & (1 << 8)) {
- if (insn & (1 << 22)) { /* WLDRD */
- gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
- i = 0;
- } else { /* WLDRW wRd */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- }
- } else {
- tmp = tcg_temp_new_i32();
- if (insn & (1 << 22)) { /* WLDRH */
- gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- } else { /* WLDRB */
- gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
- }
- }
- if (i) {
- tcg_gen_extu_i32_i64(cpu_M0, tmp);
- tcg_temp_free_i32(tmp);
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- }
- } else {
- if ((insn >> 28) == 0xf) { /* WSTRW wCx */
- tmp = iwmmxt_load_creg(wrd);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- } else {
- gen_op_iwmmxt_movq_M0_wRn(wrd);
- tmp = tcg_temp_new_i32();
- if (insn & (1 << 8)) {
- if (insn & (1 << 22)) { /* WSTRD */
- gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
- } else { /* WSTRW wRd */
- tcg_gen_extrl_i64_i32(tmp, cpu_M0);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- }
- } else {
- if (insn & (1 << 22)) { /* WSTRH */
- tcg_gen_extrl_i64_i32(tmp, cpu_M0);
- gen_aa32_st16(s, tmp, addr, get_mem_index(s));
- } else { /* WSTRB */
- tcg_gen_extrl_i64_i32(tmp, cpu_M0);
- gen_aa32_st8(s, tmp, addr, get_mem_index(s));
- }
- }
- }
- tcg_temp_free_i32(tmp);
- }
- tcg_temp_free_i32(addr);
- return 0;
- }
-
- if ((insn & 0x0f000000) != 0x0e000000)
- return 1;
-
- switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
- case 0x000: /* WOR */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 0) & 0xf;
- rd1 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- gen_op_iwmmxt_orq_M0_wRn(rd1);
- gen_op_iwmmxt_setpsr_nz();
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x011: /* TMCR */
- if (insn & 0xf)
- return 1;
- rd = (insn >> 12) & 0xf;
- wrd = (insn >> 16) & 0xf;
- switch (wrd) {
- case ARM_IWMMXT_wCID:
- case ARM_IWMMXT_wCASF:
- break;
- case ARM_IWMMXT_wCon:
- gen_op_iwmmxt_set_cup();
- /* Fall through. */
- case ARM_IWMMXT_wCSSF:
- tmp = iwmmxt_load_creg(wrd);
- tmp2 = load_reg(s, rd);
- tcg_gen_andc_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- iwmmxt_store_creg(wrd, tmp);
- break;
- case ARM_IWMMXT_wCGR0:
- case ARM_IWMMXT_wCGR1:
- case ARM_IWMMXT_wCGR2:
- case ARM_IWMMXT_wCGR3:
- gen_op_iwmmxt_set_cup();
- tmp = load_reg(s, rd);
- iwmmxt_store_creg(wrd, tmp);
- break;
- default:
- return 1;
- }
- break;
- case 0x100: /* WXOR */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 0) & 0xf;
- rd1 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- gen_op_iwmmxt_xorq_M0_wRn(rd1);
- gen_op_iwmmxt_setpsr_nz();
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x111: /* TMRC */
- if (insn & 0xf)
- return 1;
- rd = (insn >> 12) & 0xf;
- wrd = (insn >> 16) & 0xf;
- tmp = iwmmxt_load_creg(wrd);
- store_reg(s, rd, tmp);
- break;
- case 0x300: /* WANDN */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 0) & 0xf;
- rd1 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tcg_gen_neg_i64(cpu_M0, cpu_M0);
- gen_op_iwmmxt_andq_M0_wRn(rd1);
- gen_op_iwmmxt_setpsr_nz();
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x200: /* WAND */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 0) & 0xf;
- rd1 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- gen_op_iwmmxt_andq_M0_wRn(rd1);
- gen_op_iwmmxt_setpsr_nz();
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x810: case 0xa10: /* WMADD */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 0) & 0xf;
- rd1 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (insn & (1 << 21))
- gen_op_iwmmxt_maddsq_M0_wRn(rd1);
- else
- gen_op_iwmmxt_madduq_M0_wRn(rd1);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
- break;
- case 1:
- gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
- break;
- case 2:
- gen_op_iwmmxt_unpackll_M0_wRn(rd1);
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
- break;
- case 1:
- gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
- break;
- case 2:
- gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (insn & (1 << 22))
- gen_op_iwmmxt_sadw_M0_wRn(rd1);
- else
- gen_op_iwmmxt_sadb_M0_wRn(rd1);
- if (!(insn & (1 << 20)))
- gen_op_iwmmxt_addl_M0_wRn(wrd);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (insn & (1 << 21)) {
- if (insn & (1 << 20))
- gen_op_iwmmxt_mulshw_M0_wRn(rd1);
- else
- gen_op_iwmmxt_mulslw_M0_wRn(rd1);
- } else {
- if (insn & (1 << 20))
- gen_op_iwmmxt_muluhw_M0_wRn(rd1);
- else
- gen_op_iwmmxt_mululw_M0_wRn(rd1);
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (insn & (1 << 21))
- gen_op_iwmmxt_macsw_M0_wRn(rd1);
- else
- gen_op_iwmmxt_macuw_M0_wRn(rd1);
- if (!(insn & (1 << 20))) {
- iwmmxt_load_reg(cpu_V1, wrd);
- tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
- break;
- case 1:
- gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
- break;
- case 2:
- gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- if (insn & (1 << 22)) {
- if (insn & (1 << 20))
- gen_op_iwmmxt_avgw1_M0_wRn(rd1);
- else
- gen_op_iwmmxt_avgw0_M0_wRn(rd1);
- } else {
- if (insn & (1 << 20))
- gen_op_iwmmxt_avgb1_M0_wRn(rd1);
- else
- gen_op_iwmmxt_avgb0_M0_wRn(rd1);
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
- tcg_gen_andi_i32(tmp, tmp, 7);
- iwmmxt_load_reg(cpu_V1, rd1);
- gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
- if (((insn >> 6) & 3) == 3)
- return 1;
- rd = (insn >> 12) & 0xf;
- wrd = (insn >> 16) & 0xf;
- tmp = load_reg(s, rd);
- gen_op_iwmmxt_movq_M0_wRn(wrd);
- switch ((insn >> 6) & 3) {
- case 0:
- tmp2 = tcg_const_i32(0xff);
- tmp3 = tcg_const_i32((insn & 7) << 3);
- break;
- case 1:
- tmp2 = tcg_const_i32(0xffff);
- tmp3 = tcg_const_i32((insn & 3) << 4);
- break;
- case 2:
- tmp2 = tcg_const_i32(0xffffffff);
- tmp3 = tcg_const_i32((insn & 1) << 5);
- break;
- default:
- tmp2 = NULL;
- tmp3 = NULL;
- }
- gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
- rd = (insn >> 12) & 0xf;
- wrd = (insn >> 16) & 0xf;
- if (rd == 15 || ((insn >> 22) & 3) == 3)
- return 1;
- gen_op_iwmmxt_movq_M0_wRn(wrd);
- tmp = tcg_temp_new_i32();
- switch ((insn >> 22) & 3) {
- case 0:
- tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
- tcg_gen_extrl_i64_i32(tmp, cpu_M0);
- if (insn & 8) {
- tcg_gen_ext8s_i32(tmp, tmp);
- } else {
- tcg_gen_andi_i32(tmp, tmp, 0xff);
- }
- break;
- case 1:
- tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
- tcg_gen_extrl_i64_i32(tmp, cpu_M0);
- if (insn & 8) {
- tcg_gen_ext16s_i32(tmp, tmp);
- } else {
- tcg_gen_andi_i32(tmp, tmp, 0xffff);
- }
- break;
- case 2:
- tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
- tcg_gen_extrl_i64_i32(tmp, cpu_M0);
- break;
- }
- store_reg(s, rd, tmp);
- break;
- case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
- if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
- return 1;
- tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
- switch ((insn >> 22) & 3) {
- case 0:
- tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
- break;
- case 1:
- tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
- break;
- case 2:
- tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
- break;
- }
- tcg_gen_shli_i32(tmp, tmp, 28);
- gen_set_nzcv(tmp);
- tcg_temp_free_i32(tmp);
- break;
- case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
- if (((insn >> 6) & 3) == 3)
- return 1;
- rd = (insn >> 12) & 0xf;
- wrd = (insn >> 16) & 0xf;
- tmp = load_reg(s, rd);
- switch ((insn >> 6) & 3) {
- case 0:
- gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
- break;
- case 1:
- gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
- break;
- case 2:
- gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
- break;
- }
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
- if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
- return 1;
- tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp2, tmp);
- switch ((insn >> 22) & 3) {
- case 0:
- for (i = 0; i < 7; i ++) {
- tcg_gen_shli_i32(tmp2, tmp2, 4);
- tcg_gen_and_i32(tmp, tmp, tmp2);
- }
- break;
- case 1:
- for (i = 0; i < 3; i ++) {
- tcg_gen_shli_i32(tmp2, tmp2, 8);
- tcg_gen_and_i32(tmp, tmp, tmp2);
- }
- break;
- case 2:
- tcg_gen_shli_i32(tmp2, tmp2, 16);
- tcg_gen_and_i32(tmp, tmp, tmp2);
- break;
- }
- gen_set_nzcv(tmp);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
- case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
- break;
- case 1:
- gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
- break;
- case 2:
- gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
- if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
- return 1;
- tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp2, tmp);
- switch ((insn >> 22) & 3) {
- case 0:
- for (i = 0; i < 7; i ++) {
- tcg_gen_shli_i32(tmp2, tmp2, 4);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- }
- break;
- case 1:
- for (i = 0; i < 3; i ++) {
- tcg_gen_shli_i32(tmp2, tmp2, 8);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- }
- break;
- case 2:
- tcg_gen_shli_i32(tmp2, tmp2, 16);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- break;
- }
- gen_set_nzcv(tmp);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
- case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
- rd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
- return 1;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tmp = tcg_temp_new_i32();
- switch ((insn >> 22) & 3) {
- case 0:
- gen_helper_iwmmxt_msbb(tmp, cpu_M0);
- break;
- case 1:
- gen_helper_iwmmxt_msbw(tmp, cpu_M0);
- break;
- case 2:
- gen_helper_iwmmxt_msbl(tmp, cpu_M0);
- break;
- }
- store_reg(s, rd, tmp);
- break;
- case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
- case 0x906: case 0xb06: case 0xd06: case 0xf06:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- if (insn & (1 << 21))
- gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
- else
- gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
- break;
- case 1:
- if (insn & (1 << 21))
- gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
- else
- gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
- break;
- case 2:
- if (insn & (1 << 21))
- gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
- else
- gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
- case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- if (insn & (1 << 21))
- gen_op_iwmmxt_unpacklsb_M0();
- else
- gen_op_iwmmxt_unpacklub_M0();
- break;
- case 1:
- if (insn & (1 << 21))
- gen_op_iwmmxt_unpacklsw_M0();
- else
- gen_op_iwmmxt_unpackluw_M0();
- break;
- case 2:
- if (insn & (1 << 21))
- gen_op_iwmmxt_unpacklsl_M0();
- else
- gen_op_iwmmxt_unpacklul_M0();
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
- case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- if (insn & (1 << 21))
- gen_op_iwmmxt_unpackhsb_M0();
- else
- gen_op_iwmmxt_unpackhub_M0();
- break;
- case 1:
- if (insn & (1 << 21))
- gen_op_iwmmxt_unpackhsw_M0();
- else
- gen_op_iwmmxt_unpackhuw_M0();
- break;
- case 2:
- if (insn & (1 << 21))
- gen_op_iwmmxt_unpackhsl_M0();
- else
- gen_op_iwmmxt_unpackhul_M0();
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
- case 0x214: case 0x614: case 0xa14: case 0xe14:
- if (((insn >> 22) & 3) == 0)
- return 1;
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tmp = tcg_temp_new_i32();
- if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
- tcg_temp_free_i32(tmp);
- return 1;
- }
- switch ((insn >> 22) & 3) {
- case 1:
- gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- case 2:
- gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- case 3:
- gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- }
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
- case 0x014: case 0x414: case 0x814: case 0xc14:
- if (((insn >> 22) & 3) == 0)
- return 1;
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tmp = tcg_temp_new_i32();
- if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
- tcg_temp_free_i32(tmp);
- return 1;
- }
- switch ((insn >> 22) & 3) {
- case 1:
- gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- case 2:
- gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- case 3:
- gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- }
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
- case 0x114: case 0x514: case 0x914: case 0xd14:
- if (((insn >> 22) & 3) == 0)
- return 1;
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tmp = tcg_temp_new_i32();
- if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
- tcg_temp_free_i32(tmp);
- return 1;
- }
- switch ((insn >> 22) & 3) {
- case 1:
- gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- case 2:
- gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- case 3:
- gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- }
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
- case 0x314: case 0x714: case 0xb14: case 0xf14:
- if (((insn >> 22) & 3) == 0)
- return 1;
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tmp = tcg_temp_new_i32();
- switch ((insn >> 22) & 3) {
- case 1:
- if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
- tcg_temp_free_i32(tmp);
- return 1;
- }
- gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- case 2:
- if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
- tcg_temp_free_i32(tmp);
- return 1;
- }
- gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- case 3:
- if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
- tcg_temp_free_i32(tmp);
- return 1;
- }
- gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
- break;
- }
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
- case 0x916: case 0xb16: case 0xd16: case 0xf16:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- if (insn & (1 << 21))
- gen_op_iwmmxt_minsb_M0_wRn(rd1);
- else
- gen_op_iwmmxt_minub_M0_wRn(rd1);
- break;
- case 1:
- if (insn & (1 << 21))
- gen_op_iwmmxt_minsw_M0_wRn(rd1);
- else
- gen_op_iwmmxt_minuw_M0_wRn(rd1);
- break;
- case 2:
- if (insn & (1 << 21))
- gen_op_iwmmxt_minsl_M0_wRn(rd1);
- else
- gen_op_iwmmxt_minul_M0_wRn(rd1);
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
- case 0x816: case 0xa16: case 0xc16: case 0xe16:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 0:
- if (insn & (1 << 21))
- gen_op_iwmmxt_maxsb_M0_wRn(rd1);
- else
- gen_op_iwmmxt_maxub_M0_wRn(rd1);
- break;
- case 1:
- if (insn & (1 << 21))
- gen_op_iwmmxt_maxsw_M0_wRn(rd1);
- else
- gen_op_iwmmxt_maxuw_M0_wRn(rd1);
- break;
- case 2:
- if (insn & (1 << 21))
- gen_op_iwmmxt_maxsl_M0_wRn(rd1);
- else
- gen_op_iwmmxt_maxul_M0_wRn(rd1);
- break;
- case 3:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
- case 0x402: case 0x502: case 0x602: case 0x702:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tmp = tcg_const_i32((insn >> 20) & 3);
- iwmmxt_load_reg(cpu_V1, rd1);
- gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
- case 0x41a: case 0x51a: case 0x61a: case 0x71a:
- case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
- case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 20) & 0xf) {
- case 0x0:
- gen_op_iwmmxt_subnb_M0_wRn(rd1);
- break;
- case 0x1:
- gen_op_iwmmxt_subub_M0_wRn(rd1);
- break;
- case 0x3:
- gen_op_iwmmxt_subsb_M0_wRn(rd1);
- break;
- case 0x4:
- gen_op_iwmmxt_subnw_M0_wRn(rd1);
- break;
- case 0x5:
- gen_op_iwmmxt_subuw_M0_wRn(rd1);
- break;
- case 0x7:
- gen_op_iwmmxt_subsw_M0_wRn(rd1);
- break;
- case 0x8:
- gen_op_iwmmxt_subnl_M0_wRn(rd1);
- break;
- case 0x9:
- gen_op_iwmmxt_subul_M0_wRn(rd1);
- break;
- case 0xb:
- gen_op_iwmmxt_subsl_M0_wRn(rd1);
- break;
- default:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
- case 0x41e: case 0x51e: case 0x61e: case 0x71e:
- case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
- case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
- gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
- case 0x418: case 0x518: case 0x618: case 0x718:
- case 0x818: case 0x918: case 0xa18: case 0xb18:
- case 0xc18: case 0xd18: case 0xe18: case 0xf18:
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 20) & 0xf) {
- case 0x0:
- gen_op_iwmmxt_addnb_M0_wRn(rd1);
- break;
- case 0x1:
- gen_op_iwmmxt_addub_M0_wRn(rd1);
- break;
- case 0x3:
- gen_op_iwmmxt_addsb_M0_wRn(rd1);
- break;
- case 0x4:
- gen_op_iwmmxt_addnw_M0_wRn(rd1);
- break;
- case 0x5:
- gen_op_iwmmxt_adduw_M0_wRn(rd1);
- break;
- case 0x7:
- gen_op_iwmmxt_addsw_M0_wRn(rd1);
- break;
- case 0x8:
- gen_op_iwmmxt_addnl_M0_wRn(rd1);
- break;
- case 0x9:
- gen_op_iwmmxt_addul_M0_wRn(rd1);
- break;
- case 0xb:
- gen_op_iwmmxt_addsl_M0_wRn(rd1);
- break;
- default:
- return 1;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
- case 0x408: case 0x508: case 0x608: case 0x708:
- case 0x808: case 0x908: case 0xa08: case 0xb08:
- case 0xc08: case 0xd08: case 0xe08: case 0xf08:
- if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
- return 1;
- wrd = (insn >> 12) & 0xf;
- rd0 = (insn >> 16) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- gen_op_iwmmxt_movq_M0_wRn(rd0);
- switch ((insn >> 22) & 3) {
- case 1:
- if (insn & (1 << 21))
- gen_op_iwmmxt_packsw_M0_wRn(rd1);
- else
- gen_op_iwmmxt_packuw_M0_wRn(rd1);
- break;
- case 2:
- if (insn & (1 << 21))
- gen_op_iwmmxt_packsl_M0_wRn(rd1);
- else
- gen_op_iwmmxt_packul_M0_wRn(rd1);
- break;
- case 3:
- if (insn & (1 << 21))
- gen_op_iwmmxt_packsq_M0_wRn(rd1);
- else
- gen_op_iwmmxt_packuq_M0_wRn(rd1);
- break;
- }
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- gen_op_iwmmxt_set_cup();
- break;
- case 0x201: case 0x203: case 0x205: case 0x207:
- case 0x209: case 0x20b: case 0x20d: case 0x20f:
- case 0x211: case 0x213: case 0x215: case 0x217:
- case 0x219: case 0x21b: case 0x21d: case 0x21f:
- wrd = (insn >> 5) & 0xf;
- rd0 = (insn >> 12) & 0xf;
- rd1 = (insn >> 0) & 0xf;
- if (rd0 == 0xf || rd1 == 0xf)
- return 1;
- gen_op_iwmmxt_movq_M0_wRn(wrd);
- tmp = load_reg(s, rd0);
- tmp2 = load_reg(s, rd1);
- switch ((insn >> 16) & 0xf) {
- case 0x0: /* TMIA */
- gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
- break;
- case 0x8: /* TMIAPH */
- gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
- break;
- case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
- if (insn & (1 << 16))
- tcg_gen_shri_i32(tmp, tmp, 16);
- if (insn & (1 << 17))
- tcg_gen_shri_i32(tmp2, tmp2, 16);
- gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
- break;
- default:
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- return 1;
- }
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- gen_op_iwmmxt_movq_wRn_M0(wrd);
- gen_op_iwmmxt_set_mup();
- break;
- default:
- return 1;
- }
-
- return 0;
-}
-
-/* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
- (ie. an undefined instruction). */
-static int disas_dsp_insn(DisasContext *s, uint32_t insn)
-{
- int acc, rd0, rd1, rdhi, rdlo;
- TCGv_i32 tmp, tmp2;
-
- if ((insn & 0x0ff00f10) == 0x0e200010) {
- /* Multiply with Internal Accumulate Format */
- rd0 = (insn >> 12) & 0xf;
- rd1 = insn & 0xf;
- acc = (insn >> 5) & 7;
-
- if (acc != 0)
- return 1;
-
- tmp = load_reg(s, rd0);
- tmp2 = load_reg(s, rd1);
- switch ((insn >> 16) & 0xf) {
- case 0x0: /* MIA */
- gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
- break;
- case 0x8: /* MIAPH */
- gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
- break;
- case 0xc: /* MIABB */
- case 0xd: /* MIABT */
- case 0xe: /* MIATB */
- case 0xf: /* MIATT */
- if (insn & (1 << 16))
- tcg_gen_shri_i32(tmp, tmp, 16);
- if (insn & (1 << 17))
- tcg_gen_shri_i32(tmp2, tmp2, 16);
- gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
- break;
- default:
- return 1;
- }
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
-
- gen_op_iwmmxt_movq_wRn_M0(acc);
- return 0;
- }
-
- if ((insn & 0x0fe00ff8) == 0x0c400000) {
- /* Internal Accumulator Access Format */
- rdhi = (insn >> 16) & 0xf;
- rdlo = (insn >> 12) & 0xf;
- acc = insn & 7;
-
- if (acc != 0)
- return 1;
-
- if (insn & ARM_CP_RW_BIT) { /* MRA */
- iwmmxt_load_reg(cpu_V0, acc);
- tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
- tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
- tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
- } else { /* MAR */
- tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
- iwmmxt_store_reg(cpu_V0, acc);
- }
- return 0;
- }
-
- return 1;
-}
-
-#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
-#define VFP_SREG(insn, bigbit, smallbit) \
- ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
-#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
- if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
- reg = (((insn) >> (bigbit)) & 0x0f) \
- | (((insn) >> ((smallbit) - 4)) & 0x10); \
- } else { \
- if (insn & (1 << (smallbit))) \
- return 1; \
- reg = ((insn) >> (bigbit)) & 0x0f; \
- }} while (0)
-
-#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
-#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
-#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
-#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
-#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
-#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
-
-/* Move between integer and VFP cores. */
-static TCGv_i32 gen_vfp_mrs(void)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp, cpu_F0s);
- return tmp;
-}
-
-static void gen_vfp_msr(TCGv_i32 tmp)
-{
- tcg_gen_mov_i32(cpu_F0s, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-static void gen_neon_dup_u8(TCGv_i32 var, int shift)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- if (shift)
- tcg_gen_shri_i32(var, var, shift);
- tcg_gen_ext8u_i32(var, var);
- tcg_gen_shli_i32(tmp, var, 8);
- tcg_gen_or_i32(var, var, tmp);
- tcg_gen_shli_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-static void gen_neon_dup_low16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(var, var);
- tcg_gen_shli_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-static void gen_neon_dup_high16(TCGv_i32 var)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_andi_i32(var, var, 0xffff0000);
- tcg_gen_shri_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
-static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
-{
- /* Load a single Neon element and replicate into a 32 bit TCG reg */
- TCGv_i32 tmp = tcg_temp_new_i32();
- switch (size) {
- case 0:
- gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
- gen_neon_dup_u8(tmp, 0);
- break;
- case 1:
- gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- gen_neon_dup_low16(tmp);
- break;
- case 2:
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- break;
- default: /* Avoid compiler warnings. */
- abort();
- }
- return tmp;
-}
-
-static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
- uint32_t dp)
-{
- uint32_t cc = extract32(insn, 20, 2);
-
- if (dp) {
- TCGv_i64 frn, frm, dest;
- TCGv_i64 tmp, zero, zf, nf, vf;
-
- zero = tcg_const_i64(0);
-
- frn = tcg_temp_new_i64();
- frm = tcg_temp_new_i64();
- dest = tcg_temp_new_i64();
-
- zf = tcg_temp_new_i64();
- nf = tcg_temp_new_i64();
- vf = tcg_temp_new_i64();
-
- tcg_gen_extu_i32_i64(zf, cpu_ZF);
- tcg_gen_ext_i32_i64(nf, cpu_NF);
- tcg_gen_ext_i32_i64(vf, cpu_VF);
-
- tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
- tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
- switch (cc) {
- case 0: /* eq: Z */
- tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
- frn, frm);
- break;
- case 1: /* vs: V */
- tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
- frn, frm);
- break;
- case 2: /* ge: N == V -> N ^ V == 0 */
- tmp = tcg_temp_new_i64();
- tcg_gen_xor_i64(tmp, vf, nf);
- tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
- frn, frm);
- tcg_temp_free_i64(tmp);
- break;
- case 3: /* gt: !Z && N == V */
- tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
- frn, frm);
- tmp = tcg_temp_new_i64();
- tcg_gen_xor_i64(tmp, vf, nf);
- tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
- dest, frm);
- tcg_temp_free_i64(tmp);
- break;
- }
- tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
- tcg_temp_free_i64(frn);
- tcg_temp_free_i64(frm);
- tcg_temp_free_i64(dest);
-
- tcg_temp_free_i64(zf);
- tcg_temp_free_i64(nf);
- tcg_temp_free_i64(vf);
-
- tcg_temp_free_i64(zero);
- } else {
- TCGv_i32 frn, frm, dest;
- TCGv_i32 tmp, zero;
-
- zero = tcg_const_i32(0);
-
- frn = tcg_temp_new_i32();
- frm = tcg_temp_new_i32();
- dest = tcg_temp_new_i32();
- tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
- tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
- switch (cc) {
- case 0: /* eq: Z */
- tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
- frn, frm);
- break;
- case 1: /* vs: V */
- tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
- frn, frm);
- break;
- case 2: /* ge: N == V -> N ^ V == 0 */
- tmp = tcg_temp_new_i32();
- tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
- tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
- frn, frm);
- tcg_temp_free_i32(tmp);
- break;
- case 3: /* gt: !Z && N == V */
- tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
- frn, frm);
- tmp = tcg_temp_new_i32();
- tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
- tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
- dest, frm);
- tcg_temp_free_i32(tmp);
- break;
- }
- tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
- tcg_temp_free_i32(frn);
- tcg_temp_free_i32(frm);
- tcg_temp_free_i32(dest);
-
- tcg_temp_free_i32(zero);
- }
-
- return 0;
-}
-
-static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
- uint32_t rm, uint32_t dp)
-{
- uint32_t vmin = extract32(insn, 6, 1);
- TCGv_ptr fpst = get_fpstatus_ptr(0);
-
- if (dp) {
- TCGv_i64 frn, frm, dest;
-
- frn = tcg_temp_new_i64();
- frm = tcg_temp_new_i64();
- dest = tcg_temp_new_i64();
-
- tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
- tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
- if (vmin) {
- gen_helper_vfp_minnumd(dest, frn, frm, fpst);
- } else {
- gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
- }
- tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
- tcg_temp_free_i64(frn);
- tcg_temp_free_i64(frm);
- tcg_temp_free_i64(dest);
- } else {
- TCGv_i32 frn, frm, dest;
-
- frn = tcg_temp_new_i32();
- frm = tcg_temp_new_i32();
- dest = tcg_temp_new_i32();
-
- tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
- tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
- if (vmin) {
- gen_helper_vfp_minnums(dest, frn, frm, fpst);
- } else {
- gen_helper_vfp_maxnums(dest, frn, frm, fpst);
- }
- tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
- tcg_temp_free_i32(frn);
- tcg_temp_free_i32(frm);
- tcg_temp_free_i32(dest);
- }
-
- tcg_temp_free_ptr(fpst);
- return 0;
-}
-
-static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
- int rounding)
-{
- TCGv_ptr fpst = get_fpstatus_ptr(0);
- TCGv_i32 tcg_rmode;
-
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-
- if (dp) {
- TCGv_i64 tcg_op;
- TCGv_i64 tcg_res;
- tcg_op = tcg_temp_new_i64();
- tcg_res = tcg_temp_new_i64();
- tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
- gen_helper_rintd(tcg_res, tcg_op, fpst);
- tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
- tcg_temp_free_i64(tcg_op);
- tcg_temp_free_i64(tcg_res);
- } else {
- TCGv_i32 tcg_op;
- TCGv_i32 tcg_res;
- tcg_op = tcg_temp_new_i32();
- tcg_res = tcg_temp_new_i32();
- tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
- gen_helper_rints(tcg_res, tcg_op, fpst);
- tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
- tcg_temp_free_i32(tcg_op);
- tcg_temp_free_i32(tcg_res);
- }
-
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- tcg_temp_free_i32(tcg_rmode);
-
- tcg_temp_free_ptr(fpst);
- return 0;
-}
-
-static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
- int rounding)
-{
- bool is_signed = extract32(insn, 7, 1);
- TCGv_ptr fpst = get_fpstatus_ptr(0);
- TCGv_i32 tcg_rmode, tcg_shift;
-
- tcg_shift = tcg_const_i32(0);
-
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
-
- if (dp) {
- TCGv_i64 tcg_double, tcg_res;
- TCGv_i32 tcg_tmp;
- /* Rd is encoded as a single precision register even when the source
- * is double precision.
- */
- rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
- tcg_double = tcg_temp_new_i64();
- tcg_res = tcg_temp_new_i64();
- tcg_tmp = tcg_temp_new_i32();
- tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
- if (is_signed) {
- gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
- } else {
- gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
- }
- tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
- tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
- tcg_temp_free_i32(tcg_tmp);
- tcg_temp_free_i64(tcg_res);
- tcg_temp_free_i64(tcg_double);
- } else {
- TCGv_i32 tcg_single, tcg_res;
- tcg_single = tcg_temp_new_i32();
- tcg_res = tcg_temp_new_i32();
- tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
- if (is_signed) {
- gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
- } else {
- gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
- }
- tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
- tcg_temp_free_i32(tcg_res);
- tcg_temp_free_i32(tcg_single);
- }
-
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- tcg_temp_free_i32(tcg_rmode);
-
- tcg_temp_free_i32(tcg_shift);
-
- tcg_temp_free_ptr(fpst);
-
- return 0;
-}
-
-/* Table for converting the most common AArch32 encoding of
- * rounding mode to arm_fprounding order (which matches the
- * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
- */
-static const uint8_t fp_decode_rm[] = {
- FPROUNDING_TIEAWAY,
- FPROUNDING_TIEEVEN,
- FPROUNDING_POSINF,
- FPROUNDING_NEGINF,
-};
-
-static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
-{
- uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
-
- if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
-
- if (dp) {
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- } else {
- rd = VFP_SREG_D(insn);
- rn = VFP_SREG_N(insn);
- rm = VFP_SREG_M(insn);
- }
-
- if ((insn & 0x0f800e50) == 0x0e000a00) {
- return handle_vsel(insn, rd, rn, rm, dp);
- } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
- return handle_vminmaxnm(insn, rd, rn, rm, dp);
- } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
- /* VRINTA, VRINTN, VRINTP, VRINTM */
- int rounding = fp_decode_rm[extract32(insn, 16, 2)];
- return handle_vrint(insn, rd, rm, dp, rounding);
- } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
- /* VCVTA, VCVTN, VCVTP, VCVTM */
- int rounding = fp_decode_rm[extract32(insn, 16, 2)];
- return handle_vcvt(insn, rd, rm, dp, rounding);
- }
- return 1;
-}
-
-/* Disassemble a VFP instruction. Returns nonzero if an error occurred
- (ie. an undefined instruction). */
-static int disas_vfp_insn(DisasContext *s, uint32_t insn)
-{
- uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
- int dp, veclen;
- TCGv_i32 addr;
- TCGv_i32 tmp;
- TCGv_i32 tmp2;
-
- if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
- return 1;
- }
-
- /* FIXME: this access check should not take precedence over UNDEF
- * for invalid encodings; we will generate incorrect syndrome information
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
- */
- if (s->fp_excp_el) {
- gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
-
- if (!s->vfp_enabled) {
- /* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */
- if ((insn & 0x0fe00fff) != 0x0ee00a10)
- return 1;
- rn = (insn >> 16) & 0xf;
- if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
- && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
- return 1;
- }
- }
-
- if (extract32(insn, 28, 4) == 0xf) {
- /* Encodings with T=1 (Thumb) or unconditional (ARM):
- * only used in v8 and above.
- */
- return disas_vfp_v8_insn(s, insn);
- }
-
- dp = ((insn & 0xf00) == 0xb00);
- switch ((insn >> 24) & 0xf) {
- case 0xe:
- if (insn & (1 << 4)) {
- /* single register transfer */
- rd = (insn >> 12) & 0xf;
- if (dp) {
- int size;
- int pass;
-
- VFP_DREG_N(rn, insn);
- if (insn & 0xf)
- return 1;
- if (insn & 0x00c00060
- && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return 1;
- }
-
- pass = (insn >> 21) & 1;
- if (insn & (1 << 22)) {
- size = 0;
- offset = ((insn >> 5) & 3) * 8;
- } else if (insn & (1 << 5)) {
- size = 1;
- offset = (insn & (1 << 6)) ? 16 : 0;
- } else {
- size = 2;
- offset = 0;
- }
- if (insn & ARM_CP_RW_BIT) {
- /* vfp->arm */
- tmp = neon_load_reg(rn, pass);
- switch (size) {
- case 0:
- if (offset)
- tcg_gen_shri_i32(tmp, tmp, offset);
- if (insn & (1 << 23))
- gen_uxtb(tmp);
- else
- gen_sxtb(tmp);
- break;
- case 1:
- if (insn & (1 << 23)) {
- if (offset) {
- tcg_gen_shri_i32(tmp, tmp, 16);
- } else {
- gen_uxth(tmp);
- }
- } else {
- if (offset) {
- tcg_gen_sari_i32(tmp, tmp, 16);
- } else {
- gen_sxth(tmp);
- }
- }
- break;
- case 2:
- break;
- }
- store_reg(s, rd, tmp);
- } else {
- /* arm->vfp */
- tmp = load_reg(s, rd);
- if (insn & (1 << 23)) {
- /* VDUP */
- if (size == 0) {
- gen_neon_dup_u8(tmp, 0);
- } else if (size == 1) {
- gen_neon_dup_low16(tmp);
- }
- for (n = 0; n <= pass * 2; n++) {
- tmp2 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp2, tmp);
- neon_store_reg(rn, n, tmp2);
- }
- neon_store_reg(rn, n, tmp);
- } else {
- /* VMOV */
- switch (size) {
- case 0:
- tmp2 = neon_load_reg(rn, pass);
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
- tcg_temp_free_i32(tmp2);
- break;
- case 1:
- tmp2 = neon_load_reg(rn, pass);
- tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
- tcg_temp_free_i32(tmp2);
- break;
- case 2:
- break;
- }
- neon_store_reg(rn, pass, tmp);
- }
- }
- } else { /* !dp */
- if ((insn & 0x6f) != 0x00)
- return 1;
- rn = VFP_SREG_N(insn);
- if (insn & ARM_CP_RW_BIT) {
- /* vfp->arm */
- if (insn & (1 << 21)) {
- /* system register */
- rn >>= 1;
-
- switch (rn) {
- case ARM_VFP_FPSID:
- /* VFP2 allows access to FSID from userspace.
- VFP3 restricts all id registers to privileged
- accesses. */
- if (IS_USER(s)
- && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- tmp = load_cpu_field(vfp.xregs[rn]);
- break;
- case ARM_VFP_FPEXC:
- if (IS_USER(s))
- return 1;
- tmp = load_cpu_field(vfp.xregs[rn]);
- break;
- case ARM_VFP_FPINST:
- case ARM_VFP_FPINST2:
- /* Not present in VFP3. */
- if (IS_USER(s)
- || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- tmp = load_cpu_field(vfp.xregs[rn]);
- break;
- case ARM_VFP_FPSCR:
- if (rd == 15) {
- tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
- tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
- } else {
- tmp = tcg_temp_new_i32();
- gen_helper_vfp_get_fpscr(tmp, cpu_env);
- }
- break;
- case ARM_VFP_MVFR2:
- if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
- /* fall through */
- case ARM_VFP_MVFR0:
- case ARM_VFP_MVFR1:
- if (IS_USER(s)
- || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
- return 1;
- }
- tmp = load_cpu_field(vfp.xregs[rn]);
- break;
- default:
- return 1;
- }
- } else {
- gen_mov_F0_vreg(0, rn);
- tmp = gen_vfp_mrs();
- }
- if (rd == 15) {
- /* Set the 4 flag bits in the CPSR. */
- gen_set_nzcv(tmp);
- tcg_temp_free_i32(tmp);
- } else {
- store_reg(s, rd, tmp);
- }
- } else {
- /* arm->vfp */
- if (insn & (1 << 21)) {
- rn >>= 1;
- /* system register */
- switch (rn) {
- case ARM_VFP_FPSID:
- case ARM_VFP_MVFR0:
- case ARM_VFP_MVFR1:
- /* Writes are ignored. */
- break;
- case ARM_VFP_FPSCR:
- tmp = load_reg(s, rd);
- gen_helper_vfp_set_fpscr(cpu_env, tmp);
- tcg_temp_free_i32(tmp);
- gen_lookup_tb(s);
- break;
- case ARM_VFP_FPEXC:
- if (IS_USER(s))
- return 1;
- /* TODO: VFP subarchitecture support.
- * For now, keep the EN bit only */
- tmp = load_reg(s, rd);
- tcg_gen_andi_i32(tmp, tmp, 1 << 30);
- store_cpu_field(tmp, vfp.xregs[rn]);
- gen_lookup_tb(s);
- break;
- case ARM_VFP_FPINST:
- case ARM_VFP_FPINST2:
- if (IS_USER(s)) {
- return 1;
- }
- tmp = load_reg(s, rd);
- store_cpu_field(tmp, vfp.xregs[rn]);
- break;
- default:
- return 1;
- }
- } else {
- tmp = load_reg(s, rd);
- gen_vfp_msr(tmp);
- gen_mov_vreg_F0(0, rn);
- }
- }
- }
- } else {
- /* data processing */
- /* The opcode is in bits 23, 21, 20 and 6. */
- op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
- if (dp) {
- if (op == 15) {
- /* rn is opcode */
- rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
- } else {
- /* rn is register number */
- VFP_DREG_N(rn, insn);
- }
-
- if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
- ((rn & 0x1e) == 0x6))) {
- /* Integer or single/half precision destination. */
- rd = VFP_SREG_D(insn);
- } else {
- VFP_DREG_D(rd, insn);
- }
- if (op == 15 &&
- (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
- ((rn & 0x1e) == 0x4))) {
- /* VCVT from int or half precision is always from S reg
- * regardless of dp bit. VCVT with immediate frac_bits
- * has same format as SREG_M.
- */
- rm = VFP_SREG_M(insn);
- } else {
- VFP_DREG_M(rm, insn);
- }
- } else {
- rn = VFP_SREG_N(insn);
- if (op == 15 && rn == 15) {
- /* Double precision destination. */
- VFP_DREG_D(rd, insn);
- } else {
- rd = VFP_SREG_D(insn);
- }
- /* NB that we implicitly rely on the encoding for the frac_bits
- * in VCVT of fixed to float being the same as that of an SREG_M
- */
- rm = VFP_SREG_M(insn);
- }
-
- veclen = s->vec_len;
- if (op == 15 && rn > 3)
- veclen = 0;
-
- /* Shut up compiler warnings. */
- delta_m = 0;
- delta_d = 0;
- bank_mask = 0;
-
- if (veclen > 0) {
- if (dp)
- bank_mask = 0xc;
- else
- bank_mask = 0x18;
-
- /* Figure out what type of vector operation this is. */
- if ((rd & bank_mask) == 0) {
- /* scalar */
- veclen = 0;
- } else {
- if (dp)
- delta_d = (s->vec_stride >> 1) + 1;
- else
- delta_d = s->vec_stride + 1;
-
- if ((rm & bank_mask) == 0) {
- /* mixed scalar/vector */
- delta_m = 0;
- } else {
- /* vector */
- delta_m = delta_d;
- }
- }
- }
-
- /* Load the initial operands. */
- if (op == 15) {
- switch (rn) {
- case 16:
- case 17:
- /* Integer source */
- gen_mov_F0_vreg(0, rm);
- break;
- case 8:
- case 9:
- /* Compare */
- gen_mov_F0_vreg(dp, rd);
- gen_mov_F1_vreg(dp, rm);
- break;
- case 10:
- case 11:
- /* Compare with zero */
- gen_mov_F0_vreg(dp, rd);
- gen_vfp_F1_ld0(dp);
- break;
- case 20:
- case 21:
- case 22:
- case 23:
- case 28:
- case 29:
- case 30:
- case 31:
- /* Source and destination the same. */
- gen_mov_F0_vreg(dp, rd);
- break;
- case 4:
- case 5:
- case 6:
- case 7:
- /* VCVTB, VCVTT: only present with the halfprec extension
- * UNPREDICTABLE if bit 8 is set prior to ARMv8
- * (we choose to UNDEF)
- */
- if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
- !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
- return 1;
- }
- if (!extract32(rn, 1, 1)) {
- /* Half precision source. */
- gen_mov_F0_vreg(0, rm);
- break;
- }
- /* Otherwise fall through */
- default:
- /* One source operand. */
- gen_mov_F0_vreg(dp, rm);
- break;
- }
- } else {
- /* Two source operands. */
- gen_mov_F0_vreg(dp, rn);
- gen_mov_F1_vreg(dp, rm);
- }
-
- for (;;) {
- /* Perform the calculation. */
- switch (op) {
- case 0: /* VMLA: fd + (fn * fm) */
- /* Note that order of inputs to the add matters for NaNs */
- gen_vfp_F1_mul(dp);
- gen_mov_F0_vreg(dp, rd);
- gen_vfp_add(dp);
- break;
- case 1: /* VMLS: fd + -(fn * fm) */
- gen_vfp_mul(dp);
- gen_vfp_F1_neg(dp);
- gen_mov_F0_vreg(dp, rd);
- gen_vfp_add(dp);
- break;
- case 2: /* VNMLS: -fd + (fn * fm) */
- /* Note that it isn't valid to replace (-A + B) with (B - A)
- * or similar plausible looking simplifications
- * because this will give wrong results for NaNs.
- */
- gen_vfp_F1_mul(dp);
- gen_mov_F0_vreg(dp, rd);
- gen_vfp_neg(dp);
- gen_vfp_add(dp);
- break;
- case 3: /* VNMLA: -fd + -(fn * fm) */
- gen_vfp_mul(dp);
- gen_vfp_F1_neg(dp);
- gen_mov_F0_vreg(dp, rd);
- gen_vfp_neg(dp);
- gen_vfp_add(dp);
- break;
- case 4: /* mul: fn * fm */
- gen_vfp_mul(dp);
- break;
- case 5: /* nmul: -(fn * fm) */
- gen_vfp_mul(dp);
- gen_vfp_neg(dp);
- break;
- case 6: /* add: fn + fm */
- gen_vfp_add(dp);
- break;
- case 7: /* sub: fn - fm */
- gen_vfp_sub(dp);
- break;
- case 8: /* div: fn / fm */
- gen_vfp_div(dp);
- break;
- case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
- case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
- case 12: /* VFMA : fd = muladd( fd, fn, fm) */
- case 13: /* VFMS : fd = muladd( fd, -fn, fm) */
- /* These are fused multiply-add, and must be done as one
- * floating point operation with no rounding between the
- * multiplication and addition steps.
- * NB that doing the negations here as separate steps is
- * correct : an input NaN should come out with its sign bit
- * flipped if it is a negated-input.
- */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
- return 1;
- }
- if (dp) {
- TCGv_ptr fpst;
- TCGv_i64 frd;
- if (op & 1) {
- /* VFNMS, VFMS */
- gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
- }
- frd = tcg_temp_new_i64();
- tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
- if (op & 2) {
- /* VFNMA, VFNMS */
- gen_helper_vfp_negd(frd, frd);
- }
- fpst = get_fpstatus_ptr(0);
- gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
- cpu_F1d, frd, fpst);
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i64(frd);
- } else {
- TCGv_ptr fpst;
- TCGv_i32 frd;
- if (op & 1) {
- /* VFNMS, VFMS */
- gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
- }
- frd = tcg_temp_new_i32();
- tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
- if (op & 2) {
- gen_helper_vfp_negs(frd, frd);
- }
- fpst = get_fpstatus_ptr(0);
- gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
- cpu_F1s, frd, fpst);
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(frd);
- }
- break;
- case 14: /* fconst */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
-
- n = (insn << 12) & 0x80000000;
- i = ((insn >> 12) & 0x70) | (insn & 0xf);
- if (dp) {
- if (i & 0x40)
- i |= 0x3f80;
- else
- i |= 0x4000;
- n |= i << 16;
- tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
- } else {
- if (i & 0x40)
- i |= 0x780;
- else
- i |= 0x800;
- n |= i << 19;
- tcg_gen_movi_i32(cpu_F0s, n);
- }
- break;
- case 15: /* extension space */
- switch (rn) {
- case 0: /* cpy */
- /* no-op */
- break;
- case 1: /* abs */
- gen_vfp_abs(dp);
- break;
- case 2: /* neg */
- gen_vfp_neg(dp);
- break;
- case 3: /* sqrt */
- gen_vfp_sqrt(dp);
- break;
- case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(false);
- TCGv_i32 ahp_mode = get_ahp_flag();
- tmp = gen_vfp_mrs();
- tcg_gen_ext16u_i32(tmp, tmp);
- if (dp) {
- gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
- fpst, ahp_mode);
- } else {
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
- fpst, ahp_mode);
- }
- tcg_temp_free_i32(ahp_mode);
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(tmp);
- break;
- }
- case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(false);
- TCGv_i32 ahp = get_ahp_flag();
- tmp = gen_vfp_mrs();
- tcg_gen_shri_i32(tmp, tmp, 16);
- if (dp) {
- gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
- fpst, ahp);
- } else {
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
- fpst, ahp);
- }
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- break;
- }
- case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(false);
- TCGv_i32 ahp = get_ahp_flag();
- tmp = tcg_temp_new_i32();
-
- if (dp) {
- gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
- fpst, ahp);
- } else {
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
- fpst, ahp);
- }
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- gen_mov_F0_vreg(0, rd);
- tmp2 = gen_vfp_mrs();
- tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- gen_vfp_msr(tmp);
- break;
- }
- case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(false);
- TCGv_i32 ahp = get_ahp_flag();
- tmp = tcg_temp_new_i32();
- if (dp) {
- gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
- fpst, ahp);
- } else {
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
- fpst, ahp);
- }
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- tcg_gen_shli_i32(tmp, tmp, 16);
- gen_mov_F0_vreg(0, rd);
- tmp2 = gen_vfp_mrs();
- tcg_gen_ext16u_i32(tmp2, tmp2);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- gen_vfp_msr(tmp);
- break;
- }
- case 8: /* cmp */
- gen_vfp_cmp(dp);
- break;
- case 9: /* cmpe */
- gen_vfp_cmpe(dp);
- break;
- case 10: /* cmpz */
- gen_vfp_cmp(dp);
- break;
- case 11: /* cmpez */
- gen_vfp_F1_ld0(dp);
- gen_vfp_cmpe(dp);
- break;
- case 12: /* vrintr */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(0);
- if (dp) {
- gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
- } else {
- gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
- }
- tcg_temp_free_ptr(fpst);
- break;
- }
- case 13: /* vrintz */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(0);
- TCGv_i32 tcg_rmode;
- tcg_rmode = tcg_const_i32(float_round_to_zero);
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- if (dp) {
- gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
- } else {
- gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
- }
- gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- tcg_temp_free_i32(tcg_rmode);
- tcg_temp_free_ptr(fpst);
- break;
- }
- case 14: /* vrintx */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(0);
- if (dp) {
- gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
- } else {
- gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
- }
- tcg_temp_free_ptr(fpst);
- break;
- }
- case 15: /* single<->double conversion */
- if (dp)
- gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
- else
- gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
- break;
- case 16: /* fuito */
- gen_vfp_uito(dp, 0);
- break;
- case 17: /* fsito */
- gen_vfp_sito(dp, 0);
- break;
- case 20: /* fshto */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- gen_vfp_shto(dp, 16 - rm, 0);
- break;
- case 21: /* fslto */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- gen_vfp_slto(dp, 32 - rm, 0);
- break;
- case 22: /* fuhto */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- gen_vfp_uhto(dp, 16 - rm, 0);
- break;
- case 23: /* fulto */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- gen_vfp_ulto(dp, 32 - rm, 0);
- break;
- case 24: /* ftoui */
- gen_vfp_toui(dp, 0);
- break;
- case 25: /* ftouiz */
- gen_vfp_touiz(dp, 0);
- break;
- case 26: /* ftosi */
- gen_vfp_tosi(dp, 0);
- break;
- case 27: /* ftosiz */
- gen_vfp_tosiz(dp, 0);
- break;
- case 28: /* ftosh */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- gen_vfp_tosh(dp, 16 - rm, 0);
- break;
- case 29: /* ftosl */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- gen_vfp_tosl(dp, 32 - rm, 0);
- break;
- case 30: /* ftouh */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- gen_vfp_touh(dp, 16 - rm, 0);
- break;
- case 31: /* ftoul */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
- gen_vfp_toul(dp, 32 - rm, 0);
- break;
- default: /* undefined */
- return 1;
- }
- break;
- default: /* undefined */
- return 1;
- }
-
- /* Write back the result. */
- if (op == 15 && (rn >= 8 && rn <= 11)) {
- /* Comparison, do nothing. */
- } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
- (rn & 0x1e) == 0x6)) {
- /* VCVT double to int: always integer result.
- * VCVT double to half precision is always a single
- * precision result.
- */
- gen_mov_vreg_F0(0, rd);
- } else if (op == 15 && rn == 15) {
- /* conversion */
- gen_mov_vreg_F0(!dp, rd);
- } else {
- gen_mov_vreg_F0(dp, rd);
- }
-
- /* break out of the loop if we have finished */
- if (veclen == 0)
- break;
-
- if (op == 15 && delta_m == 0) {
- /* single source one-many */
- while (veclen--) {
- rd = ((rd + delta_d) & (bank_mask - 1))
- | (rd & bank_mask);
- gen_mov_vreg_F0(dp, rd);
- }
- break;
- }
- /* Setup the next operands. */
- veclen--;
- rd = ((rd + delta_d) & (bank_mask - 1))
- | (rd & bank_mask);
-
- if (op == 15) {
- /* One source operand. */
- rm = ((rm + delta_m) & (bank_mask - 1))
- | (rm & bank_mask);
- gen_mov_F0_vreg(dp, rm);
- } else {
- /* Two source operands. */
- rn = ((rn + delta_d) & (bank_mask - 1))
- | (rn & bank_mask);
- gen_mov_F0_vreg(dp, rn);
- if (delta_m) {
- rm = ((rm + delta_m) & (bank_mask - 1))
- | (rm & bank_mask);
- gen_mov_F1_vreg(dp, rm);
- }
- }
- }
- }
- break;
- case 0xc:
- case 0xd:
- if ((insn & 0x03e00000) == 0x00400000) {
- /* two-register transfer */
- rn = (insn >> 16) & 0xf;
- rd = (insn >> 12) & 0xf;
- if (dp) {
- VFP_DREG_M(rm, insn);
- } else {
- rm = VFP_SREG_M(insn);
- }
-
- if (insn & ARM_CP_RW_BIT) {
- /* vfp->arm */
- if (dp) {
- gen_mov_F0_vreg(0, rm * 2);
- tmp = gen_vfp_mrs();
- store_reg(s, rd, tmp);
- gen_mov_F0_vreg(0, rm * 2 + 1);
- tmp = gen_vfp_mrs();
- store_reg(s, rn, tmp);
- } else {
- gen_mov_F0_vreg(0, rm);
- tmp = gen_vfp_mrs();
- store_reg(s, rd, tmp);
- gen_mov_F0_vreg(0, rm + 1);
- tmp = gen_vfp_mrs();
- store_reg(s, rn, tmp);
- }
- } else {
- /* arm->vfp */
- if (dp) {
- tmp = load_reg(s, rd);
- gen_vfp_msr(tmp);
- gen_mov_vreg_F0(0, rm * 2);
- tmp = load_reg(s, rn);
- gen_vfp_msr(tmp);
- gen_mov_vreg_F0(0, rm * 2 + 1);
- } else {
- tmp = load_reg(s, rd);
- gen_vfp_msr(tmp);
- gen_mov_vreg_F0(0, rm);
- tmp = load_reg(s, rn);
- gen_vfp_msr(tmp);
- gen_mov_vreg_F0(0, rm + 1);
- }
- }
- } else {
- /* Load/store */
- rn = (insn >> 16) & 0xf;
- if (dp)
- VFP_DREG_D(rd, insn);
- else
- rd = VFP_SREG_D(insn);
- if ((insn & 0x01200000) == 0x01000000) {
- /* Single load/store */
- offset = (insn & 0xff) << 2;
- if ((insn & (1 << 23)) == 0)
- offset = -offset;
- if (s->thumb && rn == 15) {
- /* This is actually UNPREDICTABLE */
- addr = tcg_temp_new_i32();
- tcg_gen_movi_i32(addr, s->pc & ~2);
- } else {
- addr = load_reg(s, rn);
- }
- tcg_gen_addi_i32(addr, addr, offset);
- if (insn & (1 << 20)) {
- gen_vfp_ld(s, dp, addr);
- gen_mov_vreg_F0(dp, rd);
- } else {
- gen_mov_F0_vreg(dp, rd);
- gen_vfp_st(s, dp, addr);
- }
- tcg_temp_free_i32(addr);
- } else {
- /* load/store multiple */
- int w = insn & (1 << 21);
- if (dp)
- n = (insn >> 1) & 0x7f;
- else
- n = insn & 0xff;
-
- if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
- /* P == U , W == 1 => UNDEF */
- return 1;
- }
- if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
- /* UNPREDICTABLE cases for bad immediates: we choose to
- * UNDEF to avoid generating huge numbers of TCG ops
- */
- return 1;
- }
- if (rn == 15 && w) {
- /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
- return 1;
- }
-
- if (s->thumb && rn == 15) {
- /* This is actually UNPREDICTABLE */
- addr = tcg_temp_new_i32();
- tcg_gen_movi_i32(addr, s->pc & ~2);
- } else {
- addr = load_reg(s, rn);
- }
- if (insn & (1 << 24)) /* pre-decrement */
- tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
-
- if (s->v8m_stackcheck && rn == 13 && w) {
- /*
- * Here 'addr' is the lowest address we will store to,
- * and is either the old SP (if post-increment) or
- * the new SP (if pre-decrement). For post-increment
- * where the old value is below the limit and the new
- * value is above, it is UNKNOWN whether the limit check
- * triggers; we choose to trigger.
- */
- gen_helper_v8m_stackcheck(cpu_env, addr);
- }
-
- if (dp)
- offset = 8;
- else
- offset = 4;
- for (i = 0; i < n; i++) {
- if (insn & ARM_CP_RW_BIT) {
- /* load */
- gen_vfp_ld(s, dp, addr);
- gen_mov_vreg_F0(dp, rd + i);
- } else {
- /* store */
- gen_mov_F0_vreg(dp, rd + i);
- gen_vfp_st(s, dp, addr);
- }
- tcg_gen_addi_i32(addr, addr, offset);
- }
- if (w) {
- /* writeback */
- if (insn & (1 << 24))
- offset = -offset * n;
- else if (dp && (insn & 1))
- offset = 4;
- else
- offset = 0;
-
- if (offset != 0)
- tcg_gen_addi_i32(addr, addr, offset);
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- }
- }
- break;
- default:
- /* Should never happen. */
- return 1;
- }
- return 0;
-}
-
-static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
-{
-#ifndef CONFIG_USER_ONLY
- return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
- ((s->pc - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
- return true;
-#endif
-}
-
-static void gen_goto_ptr(void)
-{
- tcg_gen_lookup_and_goto_ptr();
-}
-
-/* This will end the TB but doesn't guarantee we'll return to
- * cpu_loop_exec. Any live exit_requests will be processed as we
- * enter the next TB.
- */
-static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
-{
- if (use_goto_tb(s, dest)) {
- tcg_gen_goto_tb(n);
- gen_set_pc_im(s, dest);
- tcg_gen_exit_tb(s->base.tb, n);
- } else {
- gen_set_pc_im(s, dest);
- gen_goto_ptr();
- }
- s->base.is_jmp = DISAS_NORETURN;
-}
-
-static inline void gen_jmp (DisasContext *s, uint32_t dest)
-{
- if (unlikely(is_singlestepping(s))) {
- /* An indirect jump so that we still trigger the debug exception. */
- if (s->thumb)
- dest |= 1;
- gen_bx_im(s, dest);
- } else {
- gen_goto_tb(s, 0, dest);
- }
-}
-
-static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
-{
- if (x)
- tcg_gen_sari_i32(t0, t0, 16);
- else
- gen_sxth(t0);
- if (y)
- tcg_gen_sari_i32(t1, t1, 16);
- else
- gen_sxth(t1);
- tcg_gen_mul_i32(t0, t0, t1);
-}
-
-/* Return the mask of PSR bits set by a MSR instruction. */
-static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
-{
- uint32_t mask;
-
- mask = 0;
- if (flags & (1 << 0))
- mask |= 0xff;
- if (flags & (1 << 1))
- mask |= 0xff00;
- if (flags & (1 << 2))
- mask |= 0xff0000;
- if (flags & (1 << 3))
- mask |= 0xff000000;
-
- /* Mask out undefined bits. */
- mask &= ~CPSR_RESERVED;
- if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
- mask &= ~CPSR_T;
- }
- if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
- mask &= ~CPSR_Q; /* V5TE in reality*/
- }
- if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
- mask &= ~(CPSR_E | CPSR_GE);
- }
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
- mask &= ~CPSR_IT;
- }
- /* Mask out execution state and reserved bits. */
- if (!spsr) {
- mask &= ~(CPSR_EXEC | CPSR_RESERVED);
- }
- /* Mask out privileged bits. */
- if (IS_USER(s))
- mask &= CPSR_USER;
- return mask;
-}
-
-/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
-static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
-{
- TCGv_i32 tmp;
- if (spsr) {
- /* ??? This is also undefined in system mode. */
- if (IS_USER(s))
- return 1;
-
- tmp = load_cpu_field(spsr);
- tcg_gen_andi_i32(tmp, tmp, ~mask);
- tcg_gen_andi_i32(t0, t0, mask);
- tcg_gen_or_i32(tmp, tmp, t0);
- store_cpu_field(tmp, spsr);
- } else {
- gen_set_cpsr(t0, mask);
- }
- tcg_temp_free_i32(t0);
- gen_lookup_tb(s);
- return 0;
-}
-
-/* Returns nonzero if access to the PSR is not permitted. */
-static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
-{
- TCGv_i32 tmp;
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, val);
- return gen_set_psr(s, mask, spsr, tmp);
-}
-
-static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
- int *tgtmode, int *regno)
-{
- /* Decode the r and sysm fields of MSR/MRS banked accesses into
- * the target mode and register number, and identify the various
- * unpredictable cases.
- * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
- * + executed in user mode
- * + using R15 as the src/dest register
- * + accessing an unimplemented register
- * + accessing a register that's inaccessible at current PL/security state*
- * + accessing a register that you could access with a different insn
- * We choose to UNDEF in all these cases.
- * Since we don't know which of the various AArch32 modes we are in
- * we have to defer some checks to runtime.
- * Accesses to Monitor mode registers from Secure EL1 (which implies
- * that EL3 is AArch64) must trap to EL3.
- *
- * If the access checks fail this function will emit code to take
- * an exception and return false. Otherwise it will return true,
- * and set *tgtmode and *regno appropriately.
- */
- int exc_target = default_exception_el(s);
-
- /* These instructions are present only in ARMv8, or in ARMv7 with the
- * Virtualization Extensions.
- */
- if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
- !arm_dc_feature(s, ARM_FEATURE_EL2)) {
- goto undef;
- }
-
- if (IS_USER(s) || rn == 15) {
- goto undef;
- }
-
- /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
- * of registers into (r, sysm).
- */
- if (r) {
- /* SPSRs for other modes */
- switch (sysm) {
- case 0xe: /* SPSR_fiq */
- *tgtmode = ARM_CPU_MODE_FIQ;
- break;
- case 0x10: /* SPSR_irq */
- *tgtmode = ARM_CPU_MODE_IRQ;
- break;
- case 0x12: /* SPSR_svc */
- *tgtmode = ARM_CPU_MODE_SVC;
- break;
- case 0x14: /* SPSR_abt */
- *tgtmode = ARM_CPU_MODE_ABT;
- break;
- case 0x16: /* SPSR_und */
- *tgtmode = ARM_CPU_MODE_UND;
- break;
- case 0x1c: /* SPSR_mon */
- *tgtmode = ARM_CPU_MODE_MON;
- break;
- case 0x1e: /* SPSR_hyp */
- *tgtmode = ARM_CPU_MODE_HYP;
- break;
- default: /* unallocated */
- goto undef;
- }
- /* We arbitrarily assign SPSR a register number of 16. */
- *regno = 16;
- } else {
- /* general purpose registers for other modes */
- switch (sysm) {
- case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
- *tgtmode = ARM_CPU_MODE_USR;
- *regno = sysm + 8;
- break;
- case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
- *tgtmode = ARM_CPU_MODE_FIQ;
- *regno = sysm;
- break;
- case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
- *tgtmode = ARM_CPU_MODE_IRQ;
- *regno = sysm & 1 ? 13 : 14;
- break;
- case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
- *tgtmode = ARM_CPU_MODE_SVC;
- *regno = sysm & 1 ? 13 : 14;
- break;
- case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
- *tgtmode = ARM_CPU_MODE_ABT;
- *regno = sysm & 1 ? 13 : 14;
- break;
- case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
- *tgtmode = ARM_CPU_MODE_UND;
- *regno = sysm & 1 ? 13 : 14;
- break;
- case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
- *tgtmode = ARM_CPU_MODE_MON;
- *regno = sysm & 1 ? 13 : 14;
- break;
- case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
- *tgtmode = ARM_CPU_MODE_HYP;
- /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
- *regno = sysm & 1 ? 13 : 17;
- break;
- default: /* unallocated */
- goto undef;
- }
- }
-
- /* Catch the 'accessing inaccessible register' cases we can detect
- * at translate time.
- */
- switch (*tgtmode) {
- case ARM_CPU_MODE_MON:
- if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
- goto undef;
- }
- if (s->current_el == 1) {
- /* If we're in Secure EL1 (which implies that EL3 is AArch64)
- * then accesses to Mon registers trap to EL3
- */
- exc_target = 3;
- goto undef;
- }
- break;
- case ARM_CPU_MODE_HYP:
- /*
- * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
- * (and so we can forbid accesses from EL2 or below). elr_hyp
- * can be accessed also from Hyp mode, so forbid accesses from
- * EL0 or EL1.
- */
- if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
- (s->current_el < 3 && *regno != 17)) {
- goto undef;
- }
- break;
- default:
- break;
- }
-
- return true;
-
-undef:
- /* If we get here then some access check did not pass */
- gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), exc_target);
- return false;
-}
-
-static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
-{
- TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
- int tgtmode = 0, regno = 0;
-
- if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
- return;
- }
-
- /* Sync state because msr_banked() can raise exceptions */
- gen_set_condexec(s);
- gen_set_pc_im(s, s->pc - 4);
- tcg_reg = load_reg(s, rn);
- tcg_tgtmode = tcg_const_i32(tgtmode);
- tcg_regno = tcg_const_i32(regno);
- gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
- tcg_temp_free_i32(tcg_tgtmode);
- tcg_temp_free_i32(tcg_regno);
- tcg_temp_free_i32(tcg_reg);
- s->base.is_jmp = DISAS_UPDATE;
-}
-
-static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
-{
- TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
- int tgtmode = 0, regno = 0;
-
- if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
- return;
- }
-
- /* Sync state because mrs_banked() can raise exceptions */
- gen_set_condexec(s);
- gen_set_pc_im(s, s->pc - 4);
- tcg_reg = tcg_temp_new_i32();
- tcg_tgtmode = tcg_const_i32(tgtmode);
- tcg_regno = tcg_const_i32(regno);
- gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
- tcg_temp_free_i32(tcg_tgtmode);
- tcg_temp_free_i32(tcg_regno);
- store_reg(s, rn, tcg_reg);
- s->base.is_jmp = DISAS_UPDATE;
-}
-
-/* Store value to PC as for an exception return (ie don't
- * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
- * will do the masking based on the new value of the Thumb bit.
- */
-static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
-{
- tcg_gen_mov_i32(cpu_R[15], pc);
- tcg_temp_free_i32(pc);
-}
-
-/* Generate a v6 exception return. Marks both values as dead. */
-static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
-{
- store_pc_exc_ret(s, pc);
- /* The cpsr_write_eret helper will mask the low bits of PC
- * appropriately depending on the new Thumb bit, so it must
- * be called after storing the new PC.
- */
- if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
- gen_io_start();
- }
- gen_helper_cpsr_write_eret(cpu_env, cpsr);
- if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
- gen_io_end();
- }
- tcg_temp_free_i32(cpsr);
- /* Must exit loop to check un-masked IRQs */
- s->base.is_jmp = DISAS_EXIT;
-}
-
-/* Generate an old-style exception return. Marks pc as dead. */
-static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
-{
- gen_rfe(s, pc, load_cpu_field(spsr));
-}
-
-/*
- * For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
- * only call the helper when running single threaded TCG code to ensure
- * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
- * just skip this instruction. Currently the SEV/SEVL instructions
- * which are *one* of many ways to wake the CPU from WFE are not
- * implemented so we can't sleep like WFI does.
- */
-static void gen_nop_hint(DisasContext *s, int val)
-{
- switch (val) {
- /* When running in MTTCG we don't generate jumps to the yield and
- * WFE helpers as it won't affect the scheduling of other vCPUs.
- * If we wanted to more completely model WFE/SEV so we don't busy
- * spin unnecessarily we would need to do something more involved.
- */
- case 1: /* yield */
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
- gen_set_pc_im(s, s->pc);
- s->base.is_jmp = DISAS_YIELD;
- }
- break;
- case 3: /* wfi */
- gen_set_pc_im(s, s->pc);
- s->base.is_jmp = DISAS_WFI;
- break;
- case 2: /* wfe */
- if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
- gen_set_pc_im(s, s->pc);
- s->base.is_jmp = DISAS_WFE;
- }
- break;
- case 4: /* sev */
- case 5: /* sevl */
- /* TODO: Implement SEV, SEVL and WFE. May help SMP performance. */
- default: /* nop */
- break;
- }
-}
-
-#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
-
-static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
-{
- switch (size) {
- case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
- case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
- case 2: tcg_gen_add_i32(t0, t0, t1); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
-{
- switch (size) {
- case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
- case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
- case 2: tcg_gen_sub_i32(t0, t1, t0); break;
- default: return;
- }
-}
-
-/* 32-bit pairwise ops end up the same as the elementwise versions. */
-#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
-#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
-#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
-#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
-
-#define GEN_NEON_INTEGER_OP_ENV(name) do { \
- switch ((size << 1) | u) { \
- case 0: \
- gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 1: \
- gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 2: \
- gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 3: \
- gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 4: \
- gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 5: \
- gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
- break; \
- default: return 1; \
- }} while (0)
-
-#define GEN_NEON_INTEGER_OP(name) do { \
- switch ((size << 1) | u) { \
- case 0: \
- gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
- break; \
- case 1: \
- gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
- break; \
- case 2: \
- gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
- break; \
- case 3: \
- gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
- break; \
- case 4: \
- gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
- break; \
- case 5: \
- gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
- break; \
- default: return 1; \
- }} while (0)
-
-static TCGv_i32 neon_load_scratch(int scratch)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
- return tmp;
-}
-
-static void neon_store_scratch(int scratch, TCGv_i32 var)
-{
- tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
- tcg_temp_free_i32(var);
-}
-
-static inline TCGv_i32 neon_get_scalar(int size, int reg)
-{
- TCGv_i32 tmp;
- if (size == 1) {
- tmp = neon_load_reg(reg & 7, reg >> 4);
- if (reg & 8) {
- gen_neon_dup_high16(tmp);
- } else {
- gen_neon_dup_low16(tmp);
- }
- } else {
- tmp = neon_load_reg(reg & 15, reg >> 4);
- }
- return tmp;
-}
-
-static int gen_neon_unzip(int rd, int rm, int size, int q)
-{
- TCGv_ptr pd, pm;
-
- if (!q && size == 2) {
- return 1;
- }
- pd = vfp_reg_ptr(true, rd);
- pm = vfp_reg_ptr(true, rm);
- if (q) {
- switch (size) {
- case 0:
- gen_helper_neon_qunzip8(pd, pm);
- break;
- case 1:
- gen_helper_neon_qunzip16(pd, pm);
- break;
- case 2:
- gen_helper_neon_qunzip32(pd, pm);
- break;
- default:
- abort();
- }
- } else {
- switch (size) {
- case 0:
- gen_helper_neon_unzip8(pd, pm);
- break;
- case 1:
- gen_helper_neon_unzip16(pd, pm);
- break;
- default:
- abort();
- }
- }
- tcg_temp_free_ptr(pd);
- tcg_temp_free_ptr(pm);
- return 0;
-}
-
-static int gen_neon_zip(int rd, int rm, int size, int q)
-{
- TCGv_ptr pd, pm;
-
- if (!q && size == 2) {
- return 1;
- }
- pd = vfp_reg_ptr(true, rd);
- pm = vfp_reg_ptr(true, rm);
- if (q) {
- switch (size) {
- case 0:
- gen_helper_neon_qzip8(pd, pm);
- break;
- case 1:
- gen_helper_neon_qzip16(pd, pm);
- break;
- case 2:
- gen_helper_neon_qzip32(pd, pm);
- break;
- default:
- abort();
- }
- } else {
- switch (size) {
- case 0:
- gen_helper_neon_zip8(pd, pm);
- break;
- case 1:
- gen_helper_neon_zip16(pd, pm);
- break;
- default:
- abort();
- }
- }
- tcg_temp_free_ptr(pd);
- tcg_temp_free_ptr(pm);
- return 0;
-}
-
-static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
-{
- TCGv_i32 rd, tmp;
-
- rd = tcg_temp_new_i32();
- tmp = tcg_temp_new_i32();
-
- tcg_gen_shli_i32(rd, t0, 8);
- tcg_gen_andi_i32(rd, rd, 0xff00ff00);
- tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
- tcg_gen_or_i32(rd, rd, tmp);
-
- tcg_gen_shri_i32(t1, t1, 8);
- tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
- tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
- tcg_gen_or_i32(t1, t1, tmp);
- tcg_gen_mov_i32(t0, rd);
-
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(rd);
-}
-
-static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
-{
- TCGv_i32 rd, tmp;
-
- rd = tcg_temp_new_i32();
- tmp = tcg_temp_new_i32();
-
- tcg_gen_shli_i32(rd, t0, 16);
- tcg_gen_andi_i32(tmp, t1, 0xffff);
- tcg_gen_or_i32(rd, rd, tmp);
- tcg_gen_shri_i32(t1, t1, 16);
- tcg_gen_andi_i32(tmp, t0, 0xffff0000);
- tcg_gen_or_i32(t1, t1, tmp);
- tcg_gen_mov_i32(t0, rd);
-
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(rd);
-}
-
-
-static struct {
- int nregs;
- int interleave;
- int spacing;
-} neon_ls_element_type[11] = {
- {4, 4, 1},
- {4, 4, 2},
- {4, 1, 1},
- {4, 2, 1},
- {3, 3, 1},
- {3, 3, 2},
- {3, 1, 1},
- {1, 1, 1},
- {2, 2, 1},
- {2, 2, 2},
- {2, 1, 1}
-};
-
-/* Translate a NEON load/store element instruction. Return nonzero if the
- instruction is invalid. */
-static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
-{
- int rd, rn, rm;
- int op;
- int nregs;
- int interleave;
- int spacing;
- int stride;
- int size;
- int reg;
- int pass;
- int load;
- int shift;
- int n;
- TCGv_i32 addr;
- TCGv_i32 tmp;
- TCGv_i32 tmp2;
- TCGv_i64 tmp64;
-
- /* FIXME: this access check should not take precedence over UNDEF
- * for invalid encodings; we will generate incorrect syndrome information
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
- */
- if (s->fp_excp_el) {
- gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
-
- if (!s->vfp_enabled)
- return 1;
- VFP_DREG_D(rd, insn);
- rn = (insn >> 16) & 0xf;
- rm = insn & 0xf;
- load = (insn & (1 << 21)) != 0;
- if ((insn & (1 << 23)) == 0) {
- /* Load store all elements. */
- op = (insn >> 8) & 0xf;
- size = (insn >> 6) & 3;
- if (op > 10)
- return 1;
- /* Catch UNDEF cases for bad values of align field */
- switch (op & 0xc) {
- case 4:
- if (((insn >> 5) & 1) == 1) {
- return 1;
- }
- break;
- case 8:
- if (((insn >> 4) & 3) == 3) {
- return 1;
- }
- break;
- default:
- break;
- }
- nregs = neon_ls_element_type[op].nregs;
- interleave = neon_ls_element_type[op].interleave;
- spacing = neon_ls_element_type[op].spacing;
- if (size == 3 && (interleave | spacing) != 1)
- return 1;
- addr = tcg_temp_new_i32();
- load_reg_var(s, addr, rn);
- stride = (1 << size) * interleave;
- for (reg = 0; reg < nregs; reg++) {
- if (interleave > 2 || (interleave == 2 && nregs == 2)) {
- load_reg_var(s, addr, rn);
- tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
- } else if (interleave == 2 && nregs == 4 && reg == 2) {
- load_reg_var(s, addr, rn);
- tcg_gen_addi_i32(addr, addr, 1 << size);
- }
- if (size == 3) {
- tmp64 = tcg_temp_new_i64();
- if (load) {
- gen_aa32_ld64(s, tmp64, addr, get_mem_index(s));
- neon_store_reg64(tmp64, rd);
- } else {
- neon_load_reg64(tmp64, rd);
- gen_aa32_st64(s, tmp64, addr, get_mem_index(s));
- }
- tcg_temp_free_i64(tmp64);
- tcg_gen_addi_i32(addr, addr, stride);
- } else {
- for (pass = 0; pass < 2; pass++) {
- if (size == 2) {
- if (load) {
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- neon_store_reg(rd, pass, tmp);
- } else {
- tmp = neon_load_reg(rd, pass);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- tcg_gen_addi_i32(addr, addr, stride);
- } else if (size == 1) {
- if (load) {
- tmp = tcg_temp_new_i32();
- gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- tcg_gen_addi_i32(addr, addr, stride);
- tmp2 = tcg_temp_new_i32();
- gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s));
- tcg_gen_addi_i32(addr, addr, stride);
- tcg_gen_shli_i32(tmp2, tmp2, 16);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- neon_store_reg(rd, pass, tmp);
- } else {
- tmp = neon_load_reg(rd, pass);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_shri_i32(tmp2, tmp, 16);
- gen_aa32_st16(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- tcg_gen_addi_i32(addr, addr, stride);
- gen_aa32_st16(s, tmp2, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp2);
- tcg_gen_addi_i32(addr, addr, stride);
- }
- } else /* size == 0 */ {
- if (load) {
- tmp2 = NULL;
- for (n = 0; n < 4; n++) {
- tmp = tcg_temp_new_i32();
- gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
- tcg_gen_addi_i32(addr, addr, stride);
- if (n == 0) {
- tmp2 = tmp;
- } else {
- tcg_gen_shli_i32(tmp, tmp, n * 8);
- tcg_gen_or_i32(tmp2, tmp2, tmp);
- tcg_temp_free_i32(tmp);
- }
- }
- neon_store_reg(rd, pass, tmp2);
- } else {
- tmp2 = neon_load_reg(rd, pass);
- for (n = 0; n < 4; n++) {
- tmp = tcg_temp_new_i32();
- if (n == 0) {
- tcg_gen_mov_i32(tmp, tmp2);
- } else {
- tcg_gen_shri_i32(tmp, tmp2, n * 8);
- }
- gen_aa32_st8(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- tcg_gen_addi_i32(addr, addr, stride);
- }
- tcg_temp_free_i32(tmp2);
- }
- }
- }
- }
- rd += spacing;
- }
- tcg_temp_free_i32(addr);
- stride = nregs * 8;
- } else {
- size = (insn >> 10) & 3;
- if (size == 3) {
- /* Load single element to all lanes. */
- int a = (insn >> 4) & 1;
- if (!load) {
- return 1;
- }
- size = (insn >> 6) & 3;
- nregs = ((insn >> 8) & 3) + 1;
-
- if (size == 3) {
- if (nregs != 4 || a == 0) {
- return 1;
- }
- /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
- size = 2;
- }
- if (nregs == 1 && a == 1 && size == 0) {
- return 1;
- }
- if (nregs == 3 && a == 1) {
- return 1;
- }
- addr = tcg_temp_new_i32();
- load_reg_var(s, addr, rn);
- if (nregs == 1) {
- /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
- tmp = gen_load_and_replicate(s, addr, size);
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
- if (insn & (1 << 5)) {
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
- }
- tcg_temp_free_i32(tmp);
- } else {
- /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
- stride = (insn & (1 << 5)) ? 2 : 1;
- for (reg = 0; reg < nregs; reg++) {
- tmp = gen_load_and_replicate(s, addr, size);
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
- tcg_temp_free_i32(tmp);
- tcg_gen_addi_i32(addr, addr, 1 << size);
- rd += stride;
- }
- }
- tcg_temp_free_i32(addr);
- stride = (1 << size) * nregs;
- } else {
- /* Single element. */
- int idx = (insn >> 4) & 0xf;
- pass = (insn >> 7) & 1;
- switch (size) {
- case 0:
- shift = ((insn >> 5) & 3) * 8;
- stride = 1;
- break;
- case 1:
- shift = ((insn >> 6) & 1) * 16;
- stride = (insn & (1 << 5)) ? 2 : 1;
- break;
- case 2:
- shift = 0;
- stride = (insn & (1 << 6)) ? 2 : 1;
- break;
- default:
- abort();
- }
- nregs = ((insn >> 8) & 3) + 1;
- /* Catch the UNDEF cases. This is unavoidably a bit messy. */
- switch (nregs) {
- case 1:
- if (((idx & (1 << size)) != 0) ||
- (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
- return 1;
- }
- break;
- case 3:
- if ((idx & 1) != 0) {
- return 1;
- }
- /* fall through */
- case 2:
- if (size == 2 && (idx & 2) != 0) {
- return 1;
- }
- break;
- case 4:
- if ((size == 2) && ((idx & 3) == 3)) {
- return 1;
- }
- break;
- default:
- abort();
- }
- if ((rd + stride * (nregs - 1)) > 31) {
- /* Attempts to write off the end of the register file
- * are UNPREDICTABLE; we choose to UNDEF because otherwise
- * the neon_load_reg() would write off the end of the array.
- */
- return 1;
- }
- addr = tcg_temp_new_i32();
- load_reg_var(s, addr, rn);
- for (reg = 0; reg < nregs; reg++) {
- if (load) {
- tmp = tcg_temp_new_i32();
- switch (size) {
- case 0:
- gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
- break;
- case 1:
- gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- break;
- case 2:
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- break;
- default: /* Avoid compiler warnings. */
- abort();
- }
- if (size != 2) {
- tmp2 = neon_load_reg(rd, pass);
- tcg_gen_deposit_i32(tmp, tmp2, tmp,
- shift, size ? 16 : 8);
- tcg_temp_free_i32(tmp2);
- }
- neon_store_reg(rd, pass, tmp);
- } else { /* Store */
- tmp = neon_load_reg(rd, pass);
- if (shift)
- tcg_gen_shri_i32(tmp, tmp, shift);
- switch (size) {
- case 0:
- gen_aa32_st8(s, tmp, addr, get_mem_index(s));
- break;
- case 1:
- gen_aa32_st16(s, tmp, addr, get_mem_index(s));
- break;
- case 2:
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- break;
- }
- tcg_temp_free_i32(tmp);
- }
- rd += stride;
- tcg_gen_addi_i32(addr, addr, 1 << size);
- }
- tcg_temp_free_i32(addr);
- stride = nregs * (1 << size);
- }
- }
- if (rm != 15) {
- TCGv_i32 base;
-
- base = load_reg(s, rn);
- if (rm == 13) {
- tcg_gen_addi_i32(base, base, stride);
- } else {
- TCGv_i32 index;
- index = load_reg(s, rm);
- tcg_gen_add_i32(base, base, index);
- tcg_temp_free_i32(index);
- }
- store_reg(s, rn, base);
- }
- return 0;
-}
-
-/* Bitwise select. dest = c ? t : f. Clobbers T and F. */
-static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c)
-{
- tcg_gen_and_i32(t, t, c);
- tcg_gen_andc_i32(f, f, c);
- tcg_gen_or_i32(dest, t, f);
-}
-
-static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
-{
- switch (size) {
- case 0: gen_helper_neon_narrow_u8(dest, src); break;
- case 1: gen_helper_neon_narrow_u16(dest, src); break;
- case 2: tcg_gen_extrl_i64_i32(dest, src); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
-{
- switch (size) {
- case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
- case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
- case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
-{
- switch (size) {
- case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
- case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
- case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
-{
- switch (size) {
- case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
- case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
- case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
- int q, int u)
-{
- if (q) {
- if (u) {
- switch (size) {
- case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
- case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
- default: abort();
- }
- } else {
- switch (size) {
- case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
- case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
- default: abort();
- }
- }
- } else {
- if (u) {
- switch (size) {
- case 1: gen_helper_neon_shl_u16(var, var, shift); break;
- case 2: gen_helper_neon_shl_u32(var, var, shift); break;
- default: abort();
- }
- } else {
- switch (size) {
- case 1: gen_helper_neon_shl_s16(var, var, shift); break;
- case 2: gen_helper_neon_shl_s32(var, var, shift); break;
- default: abort();
- }
- }
- }
-}
-
-static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
-{
- if (u) {
- switch (size) {
- case 0: gen_helper_neon_widen_u8(dest, src); break;
- case 1: gen_helper_neon_widen_u16(dest, src); break;
- case 2: tcg_gen_extu_i32_i64(dest, src); break;
- default: abort();
- }
- } else {
- switch (size) {
- case 0: gen_helper_neon_widen_s8(dest, src); break;
- case 1: gen_helper_neon_widen_s16(dest, src); break;
- case 2: tcg_gen_ext_i32_i64(dest, src); break;
- default: abort();
- }
- }
- tcg_temp_free_i32(src);
-}
-
-static inline void gen_neon_addl(int size)
-{
- switch (size) {
- case 0: gen_helper_neon_addl_u16(CPU_V001); break;
- case 1: gen_helper_neon_addl_u32(CPU_V001); break;
- case 2: tcg_gen_add_i64(CPU_V001); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_subl(int size)
-{
- switch (size) {
- case 0: gen_helper_neon_subl_u16(CPU_V001); break;
- case 1: gen_helper_neon_subl_u32(CPU_V001); break;
- case 2: tcg_gen_sub_i64(CPU_V001); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_negl(TCGv_i64 var, int size)
-{
- switch (size) {
- case 0: gen_helper_neon_negl_u16(var, var); break;
- case 1: gen_helper_neon_negl_u32(var, var); break;
- case 2:
- tcg_gen_neg_i64(var, var);
- break;
- default: abort();
- }
-}
-
-static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
-{
- switch (size) {
- case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
- case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
- default: abort();
- }
-}
-
-static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
- int size, int u)
-{
- TCGv_i64 tmp;
-
- switch ((size << 1) | u) {
- case 0: gen_helper_neon_mull_s8(dest, a, b); break;
- case 1: gen_helper_neon_mull_u8(dest, a, b); break;
- case 2: gen_helper_neon_mull_s16(dest, a, b); break;
- case 3: gen_helper_neon_mull_u16(dest, a, b); break;
- case 4:
- tmp = gen_muls_i64_i32(a, b);
- tcg_gen_mov_i64(dest, tmp);
- tcg_temp_free_i64(tmp);
- break;
- case 5:
- tmp = gen_mulu_i64_i32(a, b);
- tcg_gen_mov_i64(dest, tmp);
- tcg_temp_free_i64(tmp);
- break;
- default: abort();
- }
-
- /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
- Don't forget to clean them now. */
- if (size < 2) {
- tcg_temp_free_i32(a);
- tcg_temp_free_i32(b);
- }
-}
-
-static void gen_neon_narrow_op(int op, int u, int size,
- TCGv_i32 dest, TCGv_i64 src)
-{
- if (op) {
- if (u) {
- gen_neon_unarrow_sats(size, dest, src);
- } else {
- gen_neon_narrow(size, dest, src);
- }
- } else {
- if (u) {
- gen_neon_narrow_satu(size, dest, src);
- } else {
- gen_neon_narrow_sats(size, dest, src);
- }
- }
-}
-
-/* Symbolic constants for op fields for Neon 3-register same-length.
- * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
- * table A7-9.
- */
-#define NEON_3R_VHADD 0
-#define NEON_3R_VQADD 1
-#define NEON_3R_VRHADD 2
-#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
-#define NEON_3R_VHSUB 4
-#define NEON_3R_VQSUB 5
-#define NEON_3R_VCGT 6
-#define NEON_3R_VCGE 7
-#define NEON_3R_VSHL 8
-#define NEON_3R_VQSHL 9
-#define NEON_3R_VRSHL 10
-#define NEON_3R_VQRSHL 11
-#define NEON_3R_VMAX 12
-#define NEON_3R_VMIN 13
-#define NEON_3R_VABD 14
-#define NEON_3R_VABA 15
-#define NEON_3R_VADD_VSUB 16
-#define NEON_3R_VTST_VCEQ 17
-#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
-#define NEON_3R_VMUL 19
-#define NEON_3R_VPMAX 20
-#define NEON_3R_VPMIN 21
-#define NEON_3R_VQDMULH_VQRDMULH 22
-#define NEON_3R_VPADD_VQRDMLAH 23
-#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
-#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
-#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
-#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
-#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
-#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
-#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
-#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
-
-static const uint8_t neon_3r_sizes[] = {
- [NEON_3R_VHADD] = 0x7,
- [NEON_3R_VQADD] = 0xf,
- [NEON_3R_VRHADD] = 0x7,
- [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
- [NEON_3R_VHSUB] = 0x7,
- [NEON_3R_VQSUB] = 0xf,
- [NEON_3R_VCGT] = 0x7,
- [NEON_3R_VCGE] = 0x7,
- [NEON_3R_VSHL] = 0xf,
- [NEON_3R_VQSHL] = 0xf,
- [NEON_3R_VRSHL] = 0xf,
- [NEON_3R_VQRSHL] = 0xf,
- [NEON_3R_VMAX] = 0x7,
- [NEON_3R_VMIN] = 0x7,
- [NEON_3R_VABD] = 0x7,
- [NEON_3R_VABA] = 0x7,
- [NEON_3R_VADD_VSUB] = 0xf,
- [NEON_3R_VTST_VCEQ] = 0x7,
- [NEON_3R_VML] = 0x7,
- [NEON_3R_VMUL] = 0x7,
- [NEON_3R_VPMAX] = 0x7,
- [NEON_3R_VPMIN] = 0x7,
- [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
- [NEON_3R_VPADD_VQRDMLAH] = 0x7,
- [NEON_3R_SHA] = 0xf, /* size field encodes op type */
- [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
- [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
-};
-
-/* Symbolic constants for op fields for Neon 2-register miscellaneous.
- * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
- * table A7-13.
- */
-#define NEON_2RM_VREV64 0
-#define NEON_2RM_VREV32 1
-#define NEON_2RM_VREV16 2
-#define NEON_2RM_VPADDL 4
-#define NEON_2RM_VPADDL_U 5
-#define NEON_2RM_AESE 6 /* Includes AESD */
-#define NEON_2RM_AESMC 7 /* Includes AESIMC */
-#define NEON_2RM_VCLS 8
-#define NEON_2RM_VCLZ 9
-#define NEON_2RM_VCNT 10
-#define NEON_2RM_VMVN 11
-#define NEON_2RM_VPADAL 12
-#define NEON_2RM_VPADAL_U 13
-#define NEON_2RM_VQABS 14
-#define NEON_2RM_VQNEG 15
-#define NEON_2RM_VCGT0 16
-#define NEON_2RM_VCGE0 17
-#define NEON_2RM_VCEQ0 18
-#define NEON_2RM_VCLE0 19
-#define NEON_2RM_VCLT0 20
-#define NEON_2RM_SHA1H 21
-#define NEON_2RM_VABS 22
-#define NEON_2RM_VNEG 23
-#define NEON_2RM_VCGT0_F 24
-#define NEON_2RM_VCGE0_F 25
-#define NEON_2RM_VCEQ0_F 26
-#define NEON_2RM_VCLE0_F 27
-#define NEON_2RM_VCLT0_F 28
-#define NEON_2RM_VABS_F 30
-#define NEON_2RM_VNEG_F 31
-#define NEON_2RM_VSWP 32
-#define NEON_2RM_VTRN 33
-#define NEON_2RM_VUZP 34
-#define NEON_2RM_VZIP 35
-#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
-#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
-#define NEON_2RM_VSHLL 38
-#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
-#define NEON_2RM_VRINTN 40
-#define NEON_2RM_VRINTX 41
-#define NEON_2RM_VRINTA 42
-#define NEON_2RM_VRINTZ 43
-#define NEON_2RM_VCVT_F16_F32 44
-#define NEON_2RM_VRINTM 45
-#define NEON_2RM_VCVT_F32_F16 46
-#define NEON_2RM_VRINTP 47
-#define NEON_2RM_VCVTAU 48
-#define NEON_2RM_VCVTAS 49
-#define NEON_2RM_VCVTNU 50
-#define NEON_2RM_VCVTNS 51
-#define NEON_2RM_VCVTPU 52
-#define NEON_2RM_VCVTPS 53
-#define NEON_2RM_VCVTMU 54
-#define NEON_2RM_VCVTMS 55
-#define NEON_2RM_VRECPE 56
-#define NEON_2RM_VRSQRTE 57
-#define NEON_2RM_VRECPE_F 58
-#define NEON_2RM_VRSQRTE_F 59
-#define NEON_2RM_VCVT_FS 60
-#define NEON_2RM_VCVT_FU 61
-#define NEON_2RM_VCVT_SF 62
-#define NEON_2RM_VCVT_UF 63
-
-static int neon_2rm_is_float_op(int op)
-{
- /* Return true if this neon 2reg-misc op is float-to-float */
- return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
- (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
- op == NEON_2RM_VRINTM ||
- (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
- op >= NEON_2RM_VRECPE_F);
-}
-
-static bool neon_2rm_is_v8_op(int op)
-{
- /* Return true if this neon 2reg-misc op is ARMv8 and up */
- switch (op) {
- case NEON_2RM_VRINTN:
- case NEON_2RM_VRINTA:
- case NEON_2RM_VRINTM:
- case NEON_2RM_VRINTP:
- case NEON_2RM_VRINTZ:
- case NEON_2RM_VRINTX:
- case NEON_2RM_VCVTAU:
- case NEON_2RM_VCVTAS:
- case NEON_2RM_VCVTNU:
- case NEON_2RM_VCVTNS:
- case NEON_2RM_VCVTPU:
- case NEON_2RM_VCVTPS:
- case NEON_2RM_VCVTMU:
- case NEON_2RM_VCVTMS:
- return true;
- default:
- return false;
- }
-}
-
-/* Each entry in this array has bit n set if the insn allows
- * size value n (otherwise it will UNDEF). Since unallocated
- * op values will have no bits set they always UNDEF.
- */
-static const uint8_t neon_2rm_sizes[] = {
- [NEON_2RM_VREV64] = 0x7,
- [NEON_2RM_VREV32] = 0x3,
- [NEON_2RM_VREV16] = 0x1,
- [NEON_2RM_VPADDL] = 0x7,
- [NEON_2RM_VPADDL_U] = 0x7,
- [NEON_2RM_AESE] = 0x1,
- [NEON_2RM_AESMC] = 0x1,
- [NEON_2RM_VCLS] = 0x7,
- [NEON_2RM_VCLZ] = 0x7,
- [NEON_2RM_VCNT] = 0x1,
- [NEON_2RM_VMVN] = 0x1,
- [NEON_2RM_VPADAL] = 0x7,
- [NEON_2RM_VPADAL_U] = 0x7,
- [NEON_2RM_VQABS] = 0x7,
- [NEON_2RM_VQNEG] = 0x7,
- [NEON_2RM_VCGT0] = 0x7,
- [NEON_2RM_VCGE0] = 0x7,
- [NEON_2RM_VCEQ0] = 0x7,
- [NEON_2RM_VCLE0] = 0x7,
- [NEON_2RM_VCLT0] = 0x7,
- [NEON_2RM_SHA1H] = 0x4,
- [NEON_2RM_VABS] = 0x7,
- [NEON_2RM_VNEG] = 0x7,
- [NEON_2RM_VCGT0_F] = 0x4,
- [NEON_2RM_VCGE0_F] = 0x4,
- [NEON_2RM_VCEQ0_F] = 0x4,
- [NEON_2RM_VCLE0_F] = 0x4,
- [NEON_2RM_VCLT0_F] = 0x4,
- [NEON_2RM_VABS_F] = 0x4,
- [NEON_2RM_VNEG_F] = 0x4,
- [NEON_2RM_VSWP] = 0x1,
- [NEON_2RM_VTRN] = 0x7,
- [NEON_2RM_VUZP] = 0x7,
- [NEON_2RM_VZIP] = 0x7,
- [NEON_2RM_VMOVN] = 0x7,
- [NEON_2RM_VQMOVN] = 0x7,
- [NEON_2RM_VSHLL] = 0x7,
- [NEON_2RM_SHA1SU1] = 0x4,
- [NEON_2RM_VRINTN] = 0x4,
- [NEON_2RM_VRINTX] = 0x4,
- [NEON_2RM_VRINTA] = 0x4,
- [NEON_2RM_VRINTZ] = 0x4,
- [NEON_2RM_VCVT_F16_F32] = 0x2,
- [NEON_2RM_VRINTM] = 0x4,
- [NEON_2RM_VCVT_F32_F16] = 0x2,
- [NEON_2RM_VRINTP] = 0x4,
- [NEON_2RM_VCVTAU] = 0x4,
- [NEON_2RM_VCVTAS] = 0x4,
- [NEON_2RM_VCVTNU] = 0x4,
- [NEON_2RM_VCVTNS] = 0x4,
- [NEON_2RM_VCVTPU] = 0x4,
- [NEON_2RM_VCVTPS] = 0x4,
- [NEON_2RM_VCVTMU] = 0x4,
- [NEON_2RM_VCVTMS] = 0x4,
- [NEON_2RM_VRECPE] = 0x4,
- [NEON_2RM_VRSQRTE] = 0x4,
- [NEON_2RM_VRECPE_F] = 0x4,
- [NEON_2RM_VRSQRTE_F] = 0x4,
- [NEON_2RM_VCVT_FS] = 0x4,
- [NEON_2RM_VCVT_FU] = 0x4,
- [NEON_2RM_VCVT_SF] = 0x4,
- [NEON_2RM_VCVT_UF] = 0x4,
-};
-
-
-/* Expand v8.1 simd helper. */
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
- int q, int rd, int rn, int rm)
-{
- if (arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
- int opr_sz = (1 + q) * 8;
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), cpu_env,
- opr_sz, opr_sz, 0, fn);
- return 0;
- }
- return 1;
-}
-
-/* Translate a NEON data processing instruction. Return nonzero if the
- instruction is invalid.
- We process data in a mixture of 32-bit and 64-bit chunks.
- Mostly we use 32-bit chunks so we can use normal scalar instructions. */
-
-static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
-{
- int op;
- int q;
- int rd, rn, rm;
- int size;
- int shift;
- int pass;
- int count;
- int pairwise;
- int u;
- uint32_t imm, mask;
- TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
- TCGv_ptr ptr1, ptr2, ptr3;
- TCGv_i64 tmp64;
-
- /* FIXME: this access check should not take precedence over UNDEF
- * for invalid encodings; we will generate incorrect syndrome information
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
- */
- if (s->fp_excp_el) {
- gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
-
- if (!s->vfp_enabled)
- return 1;
- q = (insn & (1 << 6)) != 0;
- u = (insn >> 24) & 1;
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- size = (insn >> 20) & 3;
- if ((insn & (1 << 23)) == 0) {
- /* Three register same length. */
- op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
- /* Catch invalid op and bad size combinations: UNDEF */
- if ((neon_3r_sizes[op] & (1 << size)) == 0) {
- return 1;
- }
- /* All insns of this form UNDEF for either this condition or the
- * superset of cases "Q==1"; we catch the latter later.
- */
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
- switch (op) {
- case NEON_3R_SHA:
- /* The SHA-1/SHA-256 3-register instructions require special
- * treatment here, as their size field is overloaded as an
- * op type selector, and they all consume their input in a
- * single pass.
- */
- if (!q) {
- return 1;
- }
- if (!u) { /* SHA-1 */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- tmp4 = tcg_const_i32(size);
- gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
- tcg_temp_free_i32(tmp4);
- } else { /* SHA-256 */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- switch (size) {
- case 0:
- gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
- break;
- case 1:
- gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
- break;
- case 2:
- gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
- break;
- }
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_ptr(ptr3);
- return 0;
-
- case NEON_3R_VPADD_VQRDMLAH:
- if (!u) {
- break; /* VPADD */
- }
- /* VQRDMLAH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
- q, rd, rn, rm);
- }
- return 1;
-
- case NEON_3R_VFM_VQRDMLSH:
- if (!u) {
- /* VFM, VFMS */
- if (size == 1) {
- return 1;
- }
- break;
- }
- /* VQRDMLSH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
- q, rd, rn, rm);
- }
- return 1;
- }
- if (size == 3 && op != NEON_3R_LOGIC) {
- /* 64-bit element instructions. */
- for (pass = 0; pass < (q ? 2 : 1); pass++) {
- neon_load_reg64(cpu_V0, rn + pass);
- neon_load_reg64(cpu_V1, rm + pass);
- switch (op) {
- case NEON_3R_VQADD:
- if (u) {
- gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
- case NEON_3R_VQSUB:
- if (u) {
- gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
- case NEON_3R_VSHL:
- if (u) {
- gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VQSHL:
- if (u) {
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VRSHL:
- if (u) {
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VQRSHL:
- if (u) {
- gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VADD_VSUB:
- if (u) {
- tcg_gen_sub_i64(CPU_V001);
- } else {
- tcg_gen_add_i64(CPU_V001);
- }
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- return 0;
- }
- pairwise = 0;
- switch (op) {
- case NEON_3R_VSHL:
- case NEON_3R_VQSHL:
- case NEON_3R_VRSHL:
- case NEON_3R_VQRSHL:
- {
- int rtmp;
- /* Shift instruction operands are reversed. */
- rtmp = rn;
- rn = rm;
- rm = rtmp;
- }
- break;
- case NEON_3R_VPADD_VQRDMLAH:
- case NEON_3R_VPMAX:
- case NEON_3R_VPMIN:
- pairwise = 1;
- break;
- case NEON_3R_FLOAT_ARITH:
- pairwise = (u && size < 2); /* if VPADD (float) */
- break;
- case NEON_3R_FLOAT_MINMAX:
- pairwise = u; /* if VPMIN/VPMAX (float) */
- break;
- case NEON_3R_FLOAT_CMP:
- if (!u && size) {
- /* no encoding for U=0 C=1x */
- return 1;
- }
- break;
- case NEON_3R_FLOAT_ACMP:
- if (!u) {
- return 1;
- }
- break;
- case NEON_3R_FLOAT_MISC:
- /* VMAXNM/VMINNM in ARMv8 */
- if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
- break;
- case NEON_3R_VMUL:
- if (u && (size != 0)) {
- /* UNDEF on invalid size for polynomial subcase */
- return 1;
- }
- break;
- case NEON_3R_VFM_VQRDMLSH:
- if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
- return 1;
- }
- break;
- default:
- break;
- }
-
- if (pairwise && q) {
- /* All the pairwise insns UNDEF if Q is set */
- return 1;
- }
-
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
-
- if (pairwise) {
- /* Pairwise. */
- if (pass < 1) {
- tmp = neon_load_reg(rn, 0);
- tmp2 = neon_load_reg(rn, 1);
- } else {
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- }
- } else {
- /* Elementwise. */
- tmp = neon_load_reg(rn, pass);
- tmp2 = neon_load_reg(rm, pass);
- }
- switch (op) {
- case NEON_3R_VHADD:
- GEN_NEON_INTEGER_OP(hadd);
- break;
- case NEON_3R_VQADD:
- GEN_NEON_INTEGER_OP_ENV(qadd);
- break;
- case NEON_3R_VRHADD:
- GEN_NEON_INTEGER_OP(rhadd);
- break;
- case NEON_3R_LOGIC: /* Logic ops. */
- switch ((u << 2) | size) {
- case 0: /* VAND */
- tcg_gen_and_i32(tmp, tmp, tmp2);
- break;
- case 1: /* BIC */
- tcg_gen_andc_i32(tmp, tmp, tmp2);
- break;
- case 2: /* VORR */
- tcg_gen_or_i32(tmp, tmp, tmp2);
- break;
- case 3: /* VORN */
- tcg_gen_orc_i32(tmp, tmp, tmp2);
- break;
- case 4: /* VEOR */
- tcg_gen_xor_i32(tmp, tmp, tmp2);
- break;
- case 5: /* VBSL */
- tmp3 = neon_load_reg(rd, pass);
- gen_neon_bsl(tmp, tmp, tmp2, tmp3);
- tcg_temp_free_i32(tmp3);
- break;
- case 6: /* VBIT */
- tmp3 = neon_load_reg(rd, pass);
- gen_neon_bsl(tmp, tmp, tmp3, tmp2);
- tcg_temp_free_i32(tmp3);
- break;
- case 7: /* VBIF */
- tmp3 = neon_load_reg(rd, pass);
- gen_neon_bsl(tmp, tmp3, tmp, tmp2);
- tcg_temp_free_i32(tmp3);
- break;
- }
- break;
- case NEON_3R_VHSUB:
- GEN_NEON_INTEGER_OP(hsub);
- break;
- case NEON_3R_VQSUB:
- GEN_NEON_INTEGER_OP_ENV(qsub);
- break;
- case NEON_3R_VCGT:
- GEN_NEON_INTEGER_OP(cgt);
- break;
- case NEON_3R_VCGE:
- GEN_NEON_INTEGER_OP(cge);
- break;
- case NEON_3R_VSHL:
- GEN_NEON_INTEGER_OP(shl);
- break;
- case NEON_3R_VQSHL:
- GEN_NEON_INTEGER_OP_ENV(qshl);
- break;
- case NEON_3R_VRSHL:
- GEN_NEON_INTEGER_OP(rshl);
- break;
- case NEON_3R_VQRSHL:
- GEN_NEON_INTEGER_OP_ENV(qrshl);
- break;
- case NEON_3R_VMAX:
- GEN_NEON_INTEGER_OP(max);
- break;
- case NEON_3R_VMIN:
- GEN_NEON_INTEGER_OP(min);
- break;
- case NEON_3R_VABD:
- GEN_NEON_INTEGER_OP(abd);
- break;
- case NEON_3R_VABA:
- GEN_NEON_INTEGER_OP(abd);
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- break;
- case NEON_3R_VADD_VSUB:
- if (!u) { /* VADD */
- gen_neon_add(size, tmp, tmp2);
- } else { /* VSUB */
- switch (size) {
- case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VTST_VCEQ:
- if (!u) { /* VTST */
- switch (size) {
- case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
- default: abort();
- }
- } else { /* VCEQ */
- switch (size) {
- case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
- switch (size) {
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- if (u) { /* VMLS */
- gen_neon_rsb(size, tmp, tmp2);
- } else { /* VMLA */
- gen_neon_add(size, tmp, tmp2);
- }
- break;
- case NEON_3R_VMUL:
- if (u) { /* polynomial */
- gen_helper_neon_mul_p8(tmp, tmp, tmp2);
- } else { /* Integer */
- switch (size) {
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VPMAX:
- GEN_NEON_INTEGER_OP(pmax);
- break;
- case NEON_3R_VPMIN:
- GEN_NEON_INTEGER_OP(pmin);
- break;
- case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
- if (!u) { /* VQDMULH */
- switch (size) {
- case 1:
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
- break;
- default: abort();
- }
- } else { /* VQRDMULH */
- switch (size) {
- case 1:
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
- break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VPADD_VQRDMLAH:
- switch (size) {
- case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- break;
- case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- switch ((u << 2) | size) {
- case 0: /* VADD */
- case 4: /* VPADD */
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- break;
- case 2: /* VSUB */
- gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
- break;
- case 6: /* VABD */
- gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
- break;
- default:
- abort();
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MULTIPLY:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
- if (!u) {
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- if (size == 0) {
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
- }
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_CMP:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (!u) {
- gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- if (size == 0) {
- gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
- }
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_ACMP:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MINMAX:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MISC:
- if (u) {
- /* VMAXNM/VMINNM */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- } else {
- if (size == 0) {
- gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
- } else {
- gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
- }
- }
- break;
- case NEON_3R_VFM_VQRDMLSH:
- {
- /* VFMA, VFMS: fused multiply-add */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- TCGv_i32 tmp3 = neon_load_reg(rd, pass);
- if (size) {
- /* VFMS */
- gen_helper_vfp_negs(tmp, tmp);
- }
- gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
-
- /* Save the result. For elementwise operations we can put it
- straight into the destination register. For pairwise operations
- we have to be careful to avoid clobbering the source operands. */
- if (pairwise && rd == rm) {
- neon_store_scratch(pass, tmp);
- } else {
- neon_store_reg(rd, pass, tmp);
- }
-
- } /* for pass */
- if (pairwise && rd == rm) {
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- tmp = neon_load_scratch(pass);
- neon_store_reg(rd, pass, tmp);
- }
- }
- /* End of 3 register same size operations. */
- } else if (insn & (1 << 4)) {
- if ((insn & 0x00380080) != 0) {
- /* Two registers and shift. */
- op = (insn >> 8) & 0xf;
- if (insn & (1 << 7)) {
- /* 64-bit shift. */
- if (op > 7) {
- return 1;
- }
- size = 3;
- } else {
- size = 2;
- while ((insn & (1 << (size + 19))) == 0)
- size--;
- }
- shift = (insn >> 16) & ((1 << (3 + size)) - 1);
- /* To avoid excessive duplication of ops we implement shift
- by immediate using the variable shift operations. */
- if (op < 8) {
- /* Shift by immediate:
- VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
- if (q && ((rd | rm) & 1)) {
- return 1;
- }
- if (!u && (op == 4 || op == 6)) {
- return 1;
- }
- /* Right shifts are encoded as N - shift, where N is the
- element size in bits. */
- if (op <= 4)
- shift = shift - (1 << (size + 3));
- if (size == 3) {
- count = q + 1;
- } else {
- count = q ? 4: 2;
- }
- switch (size) {
- case 0:
- imm = (uint8_t) shift;
- imm |= imm << 8;
- imm |= imm << 16;
- break;
- case 1:
- imm = (uint16_t) shift;
- imm |= imm << 16;
- break;
- case 2:
- case 3:
- imm = shift;
- break;
- default:
- abort();
- }
-
- for (pass = 0; pass < count; pass++) {
- if (size == 3) {
- neon_load_reg64(cpu_V0, rm + pass);
- tcg_gen_movi_i64(cpu_V1, imm);
- switch (op) {
- case 0: /* VSHR */
- case 1: /* VSRA */
- if (u)
- gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
- else
- gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
- break;
- case 2: /* VRSHR */
- case 3: /* VRSRA */
- if (u)
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
- else
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
- break;
- case 4: /* VSRI */
- case 5: /* VSHL, VSLI */
- gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
- break;
- case 6: /* VQSHLU */
- gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- break;
- case 7: /* VQSHL */
- if (u) {
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
- }
- if (op == 1 || op == 3) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
- } else if (op == 4 || (op == 5 && u)) {
- /* Insert */
- neon_load_reg64(cpu_V1, rd + pass);
- uint64_t mask;
- if (shift < -63 || shift > 63) {
- mask = 0;
- } else {
- if (op == 4) {
- mask = 0xffffffffffffffffull >> -shift;
- } else {
- mask = 0xffffffffffffffffull << shift;
- }
- }
- tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- }
- neon_store_reg64(cpu_V0, rd + pass);
- } else { /* size < 3 */
- /* Operands in T0 and T1. */
- tmp = neon_load_reg(rm, pass);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, imm);
- switch (op) {
- case 0: /* VSHR */
- case 1: /* VSRA */
- GEN_NEON_INTEGER_OP(shl);
- break;
- case 2: /* VRSHR */
- case 3: /* VRSRA */
- GEN_NEON_INTEGER_OP(rshl);
- break;
- case 4: /* VSRI */
- case 5: /* VSHL, VSLI */
- switch (size) {
- case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
- default: abort();
- }
- break;
- case 6: /* VQSHLU */
- switch (size) {
- case 0:
- gen_helper_neon_qshlu_s8(tmp, cpu_env,
- tmp, tmp2);
- break;
- case 1:
- gen_helper_neon_qshlu_s16(tmp, cpu_env,
- tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qshlu_s32(tmp, cpu_env,
- tmp, tmp2);
- break;
- default:
- abort();
- }
- break;
- case 7: /* VQSHL */
- GEN_NEON_INTEGER_OP_ENV(qshl);
- break;
- }
- tcg_temp_free_i32(tmp2);
-
- if (op == 1 || op == 3) {
- /* Accumulate. */
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- } else if (op == 4 || (op == 5 && u)) {
- /* Insert */
- switch (size) {
- case 0:
- if (op == 4)
- mask = 0xff >> -shift;
- else
- mask = (uint8_t)(0xff << shift);
- mask |= mask << 8;
- mask |= mask << 16;
- break;
- case 1:
- if (op == 4)
- mask = 0xffff >> -shift;
- else
- mask = (uint16_t)(0xffff << shift);
- mask |= mask << 16;
- break;
- case 2:
- if (shift < -31 || shift > 31) {
- mask = 0;
- } else {
- if (op == 4)
- mask = 0xffffffffu >> -shift;
- else
- mask = 0xffffffffu << shift;
- }
- break;
- default:
- abort();
- }
- tmp2 = neon_load_reg(rd, pass);
- tcg_gen_andi_i32(tmp, tmp, mask);
- tcg_gen_andi_i32(tmp2, tmp2, ~mask);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- neon_store_reg(rd, pass, tmp);
- }
- } /* for pass */
- } else if (op < 10) {
- /* Shift by immediate and narrow:
- VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
- int input_unsigned = (op == 8) ? !u : u;
- if (rm & 1) {
- return 1;
- }
- shift = shift - (1 << (size + 3));
- size++;
- if (size == 3) {
- tmp64 = tcg_const_i64(shift);
- neon_load_reg64(cpu_V0, rm);
- neon_load_reg64(cpu_V1, rm + 1);
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 in;
- if (pass == 0) {
- in = cpu_V0;
- } else {
- in = cpu_V1;
- }
- if (q) {
- if (input_unsigned) {
- gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
- } else {
- gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
- }
- } else {
- if (input_unsigned) {
- gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
- } else {
- gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
- }
- }
- tmp = tcg_temp_new_i32();
- gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
- neon_store_reg(rd, pass, tmp);
- } /* for pass */
- tcg_temp_free_i64(tmp64);
- } else {
- if (size == 1) {
- imm = (uint16_t)shift;
- imm |= imm << 16;
- } else {
- /* size == 2 */
- imm = (uint32_t)shift;
- }
- tmp2 = tcg_const_i32(imm);
- tmp4 = neon_load_reg(rm + 1, 0);
- tmp5 = neon_load_reg(rm + 1, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 0) {
- tmp = neon_load_reg(rm, 0);
- } else {
- tmp = tmp4;
- }
- gen_neon_shift_narrow(size, tmp, tmp2, q,
- input_unsigned);
- if (pass == 0) {
- tmp3 = neon_load_reg(rm, 1);
- } else {
- tmp3 = tmp5;
- }
- gen_neon_shift_narrow(size, tmp3, tmp2, q,
- input_unsigned);
- tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp3);
- tmp = tcg_temp_new_i32();
- gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
- neon_store_reg(rd, pass, tmp);
- } /* for pass */
- tcg_temp_free_i32(tmp2);
- }
- } else if (op == 10) {
- /* VSHLL, VMOVL */
- if (q || (rd & 1)) {
- return 1;
- }
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 1)
- tmp = tmp2;
-
- gen_neon_widen(cpu_V0, tmp, size, u);
-
- if (shift != 0) {
- /* The shift is less than the width of the source
- type, so we can just shift the whole register. */
- tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
- /* Widen the result of shift: we need to clear
- * the potential overflow bits resulting from
- * left bits of the narrow input appearing as
- * right bits of left the neighbour narrow
- * input. */
- if (size < 2 || !u) {
- uint64_t imm64;
- if (size == 0) {
- imm = (0xffu >> (8 - shift));
- imm |= imm << 16;
- } else if (size == 1) {
- imm = 0xffff >> (16 - shift);
- } else {
- /* size == 2 */
- imm = 0xffffffff >> (32 - shift);
- }
- if (size < 2) {
- imm64 = imm | (((uint64_t)imm) << 32);
- } else {
- imm64 = imm;
- }
- tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
- }
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- } else if (op >= 14) {
- /* VCVT fixed-point. */
- if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
- return 1;
- }
- /* We have already masked out the must-be-1 top bit of imm6,
- * hence this 32-shift where the ARM ARM has 64-imm6.
- */
- shift = 32 - shift;
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
- if (!(op & 1)) {
- if (u)
- gen_vfp_ulto(0, shift, 1);
- else
- gen_vfp_slto(0, shift, 1);
- } else {
- if (u)
- gen_vfp_toul(0, shift, 1);
- else
- gen_vfp_tosl(0, shift, 1);
- }
- tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
- }
- } else {
- return 1;
- }
- } else { /* (insn & 0x00380080) == 0 */
- int invert;
- if (q && (rd & 1)) {
- return 1;
- }
-
- op = (insn >> 8) & 0xf;
- /* One register and immediate. */
- imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
- invert = (insn & (1 << 5)) != 0;
- /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
- * We choose to not special-case this and will behave as if a
- * valid constant encoding of 0 had been given.
- */
- switch (op) {
- case 0: case 1:
- /* no-op */
- break;
- case 2: case 3:
- imm <<= 8;
- break;
- case 4: case 5:
- imm <<= 16;
- break;
- case 6: case 7:
- imm <<= 24;
- break;
- case 8: case 9:
- imm |= imm << 16;
- break;
- case 10: case 11:
- imm = (imm << 8) | (imm << 24);
- break;
- case 12:
- imm = (imm << 8) | 0xff;
- break;
- case 13:
- imm = (imm << 16) | 0xffff;
- break;
- case 14:
- imm |= (imm << 8) | (imm << 16) | (imm << 24);
- if (invert)
- imm = ~imm;
- break;
- case 15:
- if (invert) {
- return 1;
- }
- imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
- | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
- break;
- }
- if (invert)
- imm = ~imm;
-
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- if (op & 1 && op < 12) {
- tmp = neon_load_reg(rd, pass);
- if (invert) {
- /* The immediate value has already been inverted, so
- BIC becomes AND. */
- tcg_gen_andi_i32(tmp, tmp, imm);
- } else {
- tcg_gen_ori_i32(tmp, tmp, imm);
- }
- } else {
- /* VMOV, VMVN. */
- tmp = tcg_temp_new_i32();
- if (op == 14 && invert) {
- int n;
- uint32_t val;
- val = 0;
- for (n = 0; n < 4; n++) {
- if (imm & (1 << (n + (pass & 1) * 4)))
- val |= 0xff << (n * 8);
- }
- tcg_gen_movi_i32(tmp, val);
- } else {
- tcg_gen_movi_i32(tmp, imm);
- }
- }
- neon_store_reg(rd, pass, tmp);
- }
- }
- } else { /* (insn & 0x00800010 == 0x00800000) */
- if (size != 3) {
- op = (insn >> 8) & 0xf;
- if ((insn & (1 << 6)) == 0) {
- /* Three registers of different lengths. */
- int src1_wide;
- int src2_wide;
- int prewiden;
- /* undefreq: bit 0 : UNDEF if size == 0
- * bit 1 : UNDEF if size == 1
- * bit 2 : UNDEF if size == 2
- * bit 3 : UNDEF if U == 1
- * Note that [2:0] set implies 'always UNDEF'
- */
- int undefreq;
- /* prewiden, src1_wide, src2_wide, undefreq */
- static const int neon_3reg_wide[16][4] = {
- {1, 0, 0, 0}, /* VADDL */
- {1, 1, 0, 0}, /* VADDW */
- {1, 0, 0, 0}, /* VSUBL */
- {1, 1, 0, 0}, /* VSUBW */
- {0, 1, 1, 0}, /* VADDHN */
- {0, 0, 0, 0}, /* VABAL */
- {0, 1, 1, 0}, /* VSUBHN */
- {0, 0, 0, 0}, /* VABDL */
- {0, 0, 0, 0}, /* VMLAL */
- {0, 0, 0, 9}, /* VQDMLAL */
- {0, 0, 0, 0}, /* VMLSL */
- {0, 0, 0, 9}, /* VQDMLSL */
- {0, 0, 0, 0}, /* Integer VMULL */
- {0, 0, 0, 1}, /* VQDMULL */
- {0, 0, 0, 0xa}, /* Polynomial VMULL */
- {0, 0, 0, 7}, /* Reserved: always UNDEF */
- };
-
- prewiden = neon_3reg_wide[op][0];
- src1_wide = neon_3reg_wide[op][1];
- src2_wide = neon_3reg_wide[op][2];
- undefreq = neon_3reg_wide[op][3];
-
- if ((undefreq & (1 << size)) ||
- ((undefreq & 8) && u)) {
- return 1;
- }
- if ((src1_wide && (rn & 1)) ||
- (src2_wide && (rm & 1)) ||
- (!src2_wide && (rd & 1))) {
- return 1;
- }
-
- /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
- * outside the loop below as it only performs a single pass.
- */
- if (op == 14 && size == 2) {
- TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
-
- if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
- return 1;
- }
- tcg_rn = tcg_temp_new_i64();
- tcg_rm = tcg_temp_new_i64();
- tcg_rd = tcg_temp_new_i64();
- neon_load_reg64(tcg_rn, rn);
- neon_load_reg64(tcg_rm, rm);
- gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
- neon_store_reg64(tcg_rd, rd);
- gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
- neon_store_reg64(tcg_rd, rd + 1);
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rm);
- tcg_temp_free_i64(tcg_rd);
- return 0;
- }
-
- /* Avoid overlapping operands. Wide source operands are
- always aligned so will never overlap with wide
- destinations in problematic ways. */
- if (rd == rm && !src2_wide) {
- tmp = neon_load_reg(rm, 1);
- neon_store_scratch(2, tmp);
- } else if (rd == rn && !src1_wide) {
- tmp = neon_load_reg(rn, 1);
- neon_store_scratch(2, tmp);
- }
- tmp3 = NULL;
- for (pass = 0; pass < 2; pass++) {
- if (src1_wide) {
- neon_load_reg64(cpu_V0, rn + pass);
- tmp = NULL;
- } else {
- if (pass == 1 && rd == rn) {
- tmp = neon_load_scratch(2);
- } else {
- tmp = neon_load_reg(rn, pass);
- }
- if (prewiden) {
- gen_neon_widen(cpu_V0, tmp, size, u);
- }
- }
- if (src2_wide) {
- neon_load_reg64(cpu_V1, rm + pass);
- tmp2 = NULL;
- } else {
- if (pass == 1 && rd == rm) {
- tmp2 = neon_load_scratch(2);
- } else {
- tmp2 = neon_load_reg(rm, pass);
- }
- if (prewiden) {
- gen_neon_widen(cpu_V1, tmp2, size, u);
- }
- }
- switch (op) {
- case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
- gen_neon_addl(size);
- break;
- case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
- gen_neon_subl(size);
- break;
- case 5: case 7: /* VABAL, VABDL */
- switch ((size << 1) | u) {
- case 0:
- gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
- break;
- case 1:
- gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
- break;
- case 3:
- gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
- break;
- case 4:
- gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
- break;
- case 5:
- gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
- break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
- case 8: case 9: case 10: case 11: case 12: case 13:
- /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- break;
- case 14: /* Polynomial VMULL */
- gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
- default: /* 15 is RESERVED: caught earlier */
- abort();
- }
- if (op == 13) {
- /* VQDMULL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- neon_store_reg64(cpu_V0, rd + pass);
- } else if (op == 5 || (op >= 8 && op <= 11)) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- switch (op) {
- case 10: /* VMLSL */
- gen_neon_negl(cpu_V0, size);
- /* Fall through */
- case 5: case 8: /* VABAL, VMLAL */
- gen_neon_addl(size);
- break;
- case 9: case 11: /* VQDMLAL, VQDMLSL */
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- if (op == 11) {
- gen_neon_negl(cpu_V0, size);
- }
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- } else if (op == 4 || op == 6) {
- /* Narrowing operation. */
- tmp = tcg_temp_new_i32();
- if (!u) {
- switch (size) {
- case 0:
- gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
- break;
- case 1:
- gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
- break;
- case 2:
- tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
- tcg_gen_extrl_i64_i32(tmp, cpu_V0);
- break;
- default: abort();
- }
- } else {
- switch (size) {
- case 0:
- gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
- break;
- case 1:
- gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
- break;
- case 2:
- tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
- tcg_gen_extrl_i64_i32(tmp, cpu_V0);
- break;
- default: abort();
- }
- }
- if (pass == 0) {
- tmp3 = tmp;
- } else {
- neon_store_reg(rd, 0, tmp3);
- neon_store_reg(rd, 1, tmp);
- }
- } else {
- /* Write back the result. */
- neon_store_reg64(cpu_V0, rd + pass);
- }
- }
- } else {
- /* Two registers and a scalar. NB that for ops of this form
- * the ARM ARM labels bit 24 as Q, but it is in our variable
- * 'u', not 'q'.
- */
- if (size == 0) {
- return 1;
- }
- switch (op) {
- case 1: /* Float VMLA scalar */
- case 5: /* Floating point VMLS scalar */
- case 9: /* Floating point VMUL scalar */
- if (size == 1) {
- return 1;
- }
- /* fall through */
- case 0: /* Integer VMLA scalar */
- case 4: /* Integer VMLS scalar */
- case 8: /* Integer VMUL scalar */
- case 12: /* VQDMULH scalar */
- case 13: /* VQRDMULH scalar */
- if (u && ((rd | rn) & 1)) {
- return 1;
- }
- tmp = neon_get_scalar(size, rm);
- neon_store_scratch(0, tmp);
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
- tmp = neon_load_scratch(0);
- tmp2 = neon_load_reg(rn, pass);
- if (op == 12) {
- if (size == 1) {
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
- }
- } else if (op == 13) {
- if (size == 1) {
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
- }
- } else if (op & 1) {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- } else {
- switch (size) {
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- tcg_temp_free_i32(tmp2);
- if (op < 8) {
- /* Accumulate. */
- tmp2 = neon_load_reg(rd, pass);
- switch (op) {
- case 0:
- gen_neon_add(size, tmp, tmp2);
- break;
- case 1:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case 4:
- gen_neon_rsb(size, tmp, tmp2);
- break;
- case 5:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
- }
- neon_store_reg(rd, pass, tmp);
- }
- break;
- case 3: /* VQDMLAL scalar */
- case 7: /* VQDMLSL scalar */
- case 11: /* VQDMULL scalar */
- if (u == 1) {
- return 1;
- }
- /* fall through */
- case 2: /* VMLAL sclar */
- case 6: /* VMLSL scalar */
- case 10: /* VMULL scalar */
- if (rd & 1) {
- return 1;
- }
- tmp2 = neon_get_scalar(size, rm);
- /* We need a copy of tmp2 because gen_neon_mull
- * deletes it during pass 0. */
- tmp4 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp4, tmp2);
- tmp3 = neon_load_reg(rn, 1);
-
- for (pass = 0; pass < 2; pass++) {
- if (pass == 0) {
- tmp = neon_load_reg(rn, 0);
- } else {
- tmp = tmp3;
- tmp2 = tmp4;
- }
- gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
- if (op != 11) {
- neon_load_reg64(cpu_V1, rd + pass);
- }
- switch (op) {
- case 6:
- gen_neon_negl(cpu_V0, size);
- /* Fall through */
- case 2:
- gen_neon_addl(size);
- break;
- case 3: case 7:
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- if (op == 7) {
- gen_neon_negl(cpu_V0, size);
- }
- gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
- break;
- case 10:
- /* no-op */
- break;
- case 11:
- gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- break;
- case 14: /* VQRDMLAH scalar */
- case 15: /* VQRDMLSH scalar */
- {
- NeonGenThreeOpEnvFn *fn;
-
- if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
- return 1;
- }
- if (u && ((rd | rn) & 1)) {
- return 1;
- }
- if (op == 14) {
- if (size == 1) {
- fn = gen_helper_neon_qrdmlah_s16;
- } else {
- fn = gen_helper_neon_qrdmlah_s32;
- }
- } else {
- if (size == 1) {
- fn = gen_helper_neon_qrdmlsh_s16;
- } else {
- fn = gen_helper_neon_qrdmlsh_s32;
- }
- }
-
- tmp2 = neon_get_scalar(size, rm);
- for (pass = 0; pass < (u ? 4 : 2); pass++) {
- tmp = neon_load_reg(rn, pass);
- tmp3 = neon_load_reg(rd, pass);
- fn(tmp, cpu_env, tmp, tmp2, tmp3);
- tcg_temp_free_i32(tmp3);
- neon_store_reg(rd, pass, tmp);
- }
- tcg_temp_free_i32(tmp2);
- }
- break;
- default:
- g_assert_not_reached();
- }
- }
- } else { /* size == 3 */
- if (!u) {
- /* Extract. */
- imm = (insn >> 8) & 0xf;
-
- if (imm > 7 && !q)
- return 1;
-
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
-
- if (imm == 0) {
- neon_load_reg64(cpu_V0, rn);
- if (q) {
- neon_load_reg64(cpu_V1, rn + 1);
- }
- } else if (imm == 8) {
- neon_load_reg64(cpu_V0, rn + 1);
- if (q) {
- neon_load_reg64(cpu_V1, rm);
- }
- } else if (q) {
- tmp64 = tcg_temp_new_i64();
- if (imm < 8) {
- neon_load_reg64(cpu_V0, rn);
- neon_load_reg64(tmp64, rn + 1);
- } else {
- neon_load_reg64(cpu_V0, rn + 1);
- neon_load_reg64(tmp64, rm);
- }
- tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
- tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- if (imm < 8) {
- neon_load_reg64(cpu_V1, rm);
- } else {
- neon_load_reg64(cpu_V1, rm + 1);
- imm -= 8;
- }
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
- tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
- tcg_temp_free_i64(tmp64);
- } else {
- /* BUGFIX */
- neon_load_reg64(cpu_V0, rn);
- tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
- neon_load_reg64(cpu_V1, rm);
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
- }
- neon_store_reg64(cpu_V0, rd);
- if (q) {
- neon_store_reg64(cpu_V1, rd + 1);
- }
- } else if ((insn & (1 << 11)) == 0) {
- /* Two register misc. */
- op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
- size = (insn >> 18) & 3;
- /* UNDEF for unknown op values and bad op-size combinations */
- if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
- return 1;
- }
- if (neon_2rm_is_v8_op(op) &&
- !arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
- if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
- q && ((rm | rd) & 1)) {
- return 1;
- }
- switch (op) {
- case NEON_2RM_VREV64:
- for (pass = 0; pass < (q ? 2 : 1); pass++) {
- tmp = neon_load_reg(rm, pass * 2);
- tmp2 = neon_load_reg(rm, pass * 2 + 1);
- switch (size) {
- case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
- case 1: gen_swap_half(tmp); break;
- case 2: /* no-op */ break;
- default: abort();
- }
- neon_store_reg(rd, pass * 2 + 1, tmp);
- if (size == 2) {
- neon_store_reg(rd, pass * 2, tmp2);
- } else {
- switch (size) {
- case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
- case 1: gen_swap_half(tmp2); break;
- default: abort();
- }
- neon_store_reg(rd, pass * 2, tmp2);
- }
- }
- break;
- case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
- case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
- for (pass = 0; pass < q + 1; pass++) {
- tmp = neon_load_reg(rm, pass * 2);
- gen_neon_widen(cpu_V0, tmp, size, op & 1);
- tmp = neon_load_reg(rm, pass * 2 + 1);
- gen_neon_widen(cpu_V1, tmp, size, op & 1);
- switch (size) {
- case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
- case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
- case 2: tcg_gen_add_i64(CPU_V001); break;
- default: abort();
- }
- if (op >= NEON_2RM_VPADAL) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- gen_neon_addl(size);
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- break;
- case NEON_2RM_VTRN:
- if (size == 2) {
- int n;
- for (n = 0; n < (q ? 4 : 2); n += 2) {
- tmp = neon_load_reg(rm, n);
- tmp2 = neon_load_reg(rd, n + 1);
- neon_store_reg(rm, n, tmp2);
- neon_store_reg(rd, n + 1, tmp);
- }
- } else {
- goto elementwise;
- }
- break;
- case NEON_2RM_VUZP:
- if (gen_neon_unzip(rd, rm, size, q)) {
- return 1;
- }
- break;
- case NEON_2RM_VZIP:
- if (gen_neon_zip(rd, rm, size, q)) {
- return 1;
- }
- break;
- case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
- /* also VQMOVUN; op field and mnemonics don't line up */
- if (rm & 1) {
- return 1;
- }
- tmp2 = NULL;
- for (pass = 0; pass < 2; pass++) {
- neon_load_reg64(cpu_V0, rm + pass);
- tmp = tcg_temp_new_i32();
- gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
- tmp, cpu_V0);
- if (pass == 0) {
- tmp2 = tmp;
- } else {
- neon_store_reg(rd, 0, tmp2);
- neon_store_reg(rd, 1, tmp);
- }
- }
- break;
- case NEON_2RM_VSHLL:
- if (q || (rd & 1)) {
- return 1;
- }
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 1)
- tmp = tmp2;
- gen_neon_widen(cpu_V0, tmp, size, 1);
- tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
- neon_store_reg64(cpu_V0, rd + pass);
- }
- break;
- case NEON_2RM_VCVT_F16_F32:
- {
- TCGv_ptr fpst;
- TCGv_i32 ahp;
-
- if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
- q || (rm & 1)) {
- return 1;
- }
- tmp = tcg_temp_new_i32();
- tmp2 = tcg_temp_new_i32();
- fpst = get_fpstatus_ptr(true);
- ahp = get_ahp_flag();
- tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
- tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
- gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
- tcg_gen_shli_i32(tmp2, tmp2, 16);
- tcg_gen_or_i32(tmp2, tmp2, tmp);
- tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
- tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
- neon_store_reg(rd, 0, tmp2);
- tmp2 = tcg_temp_new_i32();
- gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
- tcg_gen_shli_i32(tmp2, tmp2, 16);
- tcg_gen_or_i32(tmp2, tmp2, tmp);
- neon_store_reg(rd, 1, tmp2);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- break;
- }
- case NEON_2RM_VCVT_F32_F16:
- {
- TCGv_ptr fpst;
- TCGv_i32 ahp;
- if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
- q || (rd & 1)) {
- return 1;
- }
- fpst = get_fpstatus_ptr(true);
- ahp = get_ahp_flag();
- tmp3 = tcg_temp_new_i32();
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- tcg_gen_ext16u_i32(tmp3, tmp);
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
- tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
- tcg_gen_shri_i32(tmp3, tmp, 16);
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
- tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
- tcg_temp_free_i32(tmp);
- tcg_gen_ext16u_i32(tmp3, tmp2);
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
- tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
- tcg_gen_shri_i32(tmp3, tmp2, 16);
- gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
- tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- break;
- }
- case NEON_2RM_AESE: case NEON_2RM_AESMC:
- if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
- || ((rm | rd) & 1)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
-
- /* Bit 6 is the lowest opcode bit; it distinguishes between
- * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
- */
- tmp3 = tcg_const_i32(extract32(insn, 6, 1));
-
- if (op == NEON_2RM_AESE) {
- gen_helper_crypto_aese(ptr1, ptr2, tmp3);
- } else {
- gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_i32(tmp3);
- break;
- case NEON_2RM_SHA1H:
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)
- || ((rm | rd) & 1)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
-
- gen_helper_crypto_sha1h(ptr1, ptr2);
-
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- break;
- case NEON_2RM_SHA1SU1:
- if ((rm | rd) & 1) {
- return 1;
- }
- /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
- if (q) {
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) {
- return 1;
- }
- } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
- if (q) {
- gen_helper_crypto_sha256su0(ptr1, ptr2);
- } else {
- gen_helper_crypto_sha1su1(ptr1, ptr2);
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- break;
- default:
- elementwise:
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- if (neon_2rm_is_float_op(op)) {
- tcg_gen_ld_f32(cpu_F0s, cpu_env,
- neon_reg_offset(rm, pass));
- tmp = NULL;
- } else {
- tmp = neon_load_reg(rm, pass);
- }
- switch (op) {
- case NEON_2RM_VREV32:
- switch (size) {
- case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
- case 1: gen_swap_half(tmp); break;
- default: abort();
- }
- break;
- case NEON_2RM_VREV16:
- gen_rev16(tmp);
- break;
- case NEON_2RM_VCLS:
- switch (size) {
- case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
- case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
- case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
- default: abort();
- }
- break;
- case NEON_2RM_VCLZ:
- switch (size) {
- case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
- case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
- case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
- default: abort();
- }
- break;
- case NEON_2RM_VCNT:
- gen_helper_neon_cnt_u8(tmp, tmp);
- break;
- case NEON_2RM_VMVN:
- tcg_gen_not_i32(tmp, tmp);
- break;
- case NEON_2RM_VQABS:
- switch (size) {
- case 0:
- gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
- break;
- case 1:
- gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
- break;
- case 2:
- gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
- break;
- default: abort();
- }
- break;
- case NEON_2RM_VQNEG:
- switch (size) {
- case 0:
- gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
- break;
- case 1:
- gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
- break;
- case 2:
- gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
- break;
- default: abort();
- }
- break;
- case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
- tmp2 = tcg_const_i32(0);
- switch(size) {
- case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- if (op == NEON_2RM_VCLE0) {
- tcg_gen_not_i32(tmp, tmp);
- }
- break;
- case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
- tmp2 = tcg_const_i32(0);
- switch(size) {
- case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- if (op == NEON_2RM_VCLT0) {
- tcg_gen_not_i32(tmp, tmp);
- }
- break;
- case NEON_2RM_VCEQ0:
- tmp2 = tcg_const_i32(0);
- switch(size) {
- case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- break;
- case NEON_2RM_VABS:
- switch(size) {
- case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
- case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
- case 2: tcg_gen_abs_i32(tmp, tmp); break;
- default: abort();
- }
- break;
- case NEON_2RM_VNEG:
- tmp2 = tcg_const_i32(0);
- gen_neon_rsb(size, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- break;
- case NEON_2RM_VCGT0_F:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- tmp2 = tcg_const_i32(0);
- gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VCGE0_F:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- tmp2 = tcg_const_i32(0);
- gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VCEQ0_F:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- tmp2 = tcg_const_i32(0);
- gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VCLE0_F:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- tmp2 = tcg_const_i32(0);
- gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VCLT0_F:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- tmp2 = tcg_const_i32(0);
- gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VABS_F:
- gen_vfp_abs(0);
- break;
- case NEON_2RM_VNEG_F:
- gen_vfp_neg(0);
- break;
- case NEON_2RM_VSWP:
- tmp2 = neon_load_reg(rd, pass);
- neon_store_reg(rm, pass, tmp2);
- break;
- case NEON_2RM_VTRN:
- tmp2 = neon_load_reg(rd, pass);
- switch (size) {
- case 0: gen_neon_trn_u8(tmp, tmp2); break;
- case 1: gen_neon_trn_u16(tmp, tmp2); break;
- default: abort();
- }
- neon_store_reg(rm, pass, tmp2);
- break;
- case NEON_2RM_VRINTN:
- case NEON_2RM_VRINTA:
- case NEON_2RM_VRINTM:
- case NEON_2RM_VRINTP:
- case NEON_2RM_VRINTZ:
- {
- TCGv_i32 tcg_rmode;
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- int rmode;
-
- if (op == NEON_2RM_VRINTZ) {
- rmode = FPROUNDING_ZERO;
- } else {
- rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
- }
-
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
- cpu_env);
- gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
- gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
- cpu_env);
- tcg_temp_free_ptr(fpstatus);
- tcg_temp_free_i32(tcg_rmode);
- break;
- }
- case NEON_2RM_VRINTX:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VCVTAU:
- case NEON_2RM_VCVTAS:
- case NEON_2RM_VCVTNU:
- case NEON_2RM_VCVTNS:
- case NEON_2RM_VCVTPU:
- case NEON_2RM_VCVTPS:
- case NEON_2RM_VCVTMU:
- case NEON_2RM_VCVTMS:
- {
- bool is_signed = !extract32(insn, 7, 1);
- TCGv_ptr fpst = get_fpstatus_ptr(1);
- TCGv_i32 tcg_rmode, tcg_shift;
- int rmode = fp_decode_rm[extract32(insn, 8, 2)];
-
- tcg_shift = tcg_const_i32(0);
- tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
- gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
- cpu_env);
-
- if (is_signed) {
- gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
- tcg_shift, fpst);
- } else {
- gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
- tcg_shift, fpst);
- }
-
- gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
- cpu_env);
- tcg_temp_free_i32(tcg_rmode);
- tcg_temp_free_i32(tcg_shift);
- tcg_temp_free_ptr(fpst);
- break;
- }
- case NEON_2RM_VRECPE:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_recpe_u32(tmp, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VRSQRTE:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VRECPE_F:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VRSQRTE_F:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
- gen_vfp_sito(0, 1);
- break;
- case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
- gen_vfp_uito(0, 1);
- break;
- case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
- gen_vfp_tosiz(0, 1);
- break;
- case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
- gen_vfp_touiz(0, 1);
- break;
- default:
- /* Reserved op values were caught by the
- * neon_2rm_sizes[] check earlier.
- */
- abort();
- }
- if (neon_2rm_is_float_op(op)) {
- tcg_gen_st_f32(cpu_F0s, cpu_env,
- neon_reg_offset(rd, pass));
- } else {
- neon_store_reg(rd, pass, tmp);
- }
- }
- break;
- }
- } else if ((insn & (1 << 10)) == 0) {
- /* VTBL, VTBX. */
- int n = ((insn >> 8) & 3) + 1;
- if ((rn + n) > 32) {
- /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
- * helper function running off the end of the register file.
- */
- return 1;
- }
- n <<= 3;
- if (insn & (1 << 6)) {
- tmp = neon_load_reg(rd, 0);
- } else {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- }
- tmp2 = neon_load_reg(rm, 0);
- ptr1 = vfp_reg_ptr(true, rn);
- tmp5 = tcg_const_i32(n);
- gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
- tcg_temp_free_i32(tmp);
- if (insn & (1 << 6)) {
- tmp = neon_load_reg(rd, 1);
- } else {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- }
- tmp3 = neon_load_reg(rm, 1);
- gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
- tcg_temp_free_i32(tmp5);
- tcg_temp_free_ptr(ptr1);
- neon_store_reg(rd, 0, tmp2);
- neon_store_reg(rd, 1, tmp3);
- tcg_temp_free_i32(tmp);
- } else if ((insn & 0x380) == 0) {
- /* VDUP */
- if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
- return 1;
- }
- if (insn & (1 << 19)) {
- tmp = neon_load_reg(rm, 1);
- } else {
- tmp = neon_load_reg(rm, 0);
- }
- if (insn & (1 << 16)) {
- gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
- } else if (insn & (1 << 17)) {
- if ((insn >> 18) & 1)
- gen_neon_dup_high16(tmp);
- else
- gen_neon_dup_low16(tmp);
- }
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- tmp2 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp2, tmp);
- neon_store_reg(rd, pass, tmp2);
- }
- tcg_temp_free_i32(tmp);
- } else {
- return 1;
- }
- }
- }
- return 0;
-}
-
-/* Advanced SIMD three registers of the same length extension.
- * 31 25 23 22 20 16 12 11 10 9 8 3 0
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
- * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
- */
-static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
-{
- gen_helper_gvec_3 *fn_gvec = NULL;
- gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
- int rd, rn, rm, opr_sz;
- int data = 0;
- bool q;
-
- q = extract32(insn, 6, 1);
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- if ((rd | rn | rm) & q) {
- return 1;
- }
-
- if ((insn & 0xfe200f10) == 0xfc200800) {
- /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
- int size = extract32(insn, 20, 1);
- data = extract32(insn, 23, 2); /* rot */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
- || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
- return 1;
- }
- fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
- } else if ((insn & 0xfea00f10) == 0xfc800800) {
- /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
- int size = extract32(insn, 20, 1);
- data = extract32(insn, 24, 1); /* rot */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
- || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
- return 1;
- }
- fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
- } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
- /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
- bool u = extract32(insn, 4, 1);
- if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
- return 1;
- }
- fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
- } else {
- return 1;
- }
-
- if (s->fp_excp_el) {
- gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
- if (!s->vfp_enabled) {
- return 1;
- }
-
- opr_sz = (1 + q) * 8;
- if (fn_gvec_ptr) {
- TCGv_ptr fpst = get_fpstatus_ptr(1);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), fpst,
- opr_sz, opr_sz, data, fn_gvec_ptr);
- tcg_temp_free_ptr(fpst);
- } else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm),
- opr_sz, opr_sz, data, fn_gvec);
- }
- return 0;
-}
-
-/* Advanced SIMD two registers and a scalar extension.
- * 31 24 23 22 20 16 12 11 10 9 8 3 0
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
- * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
- *
- */
-
-static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
-{
- gen_helper_gvec_3 *fn_gvec = NULL;
- gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
- int rd, rn, rm, opr_sz, data;
- bool q;
-
- q = extract32(insn, 6, 1);
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- if ((rd | rn) & q) {
- return 1;
- }
-
- if ((insn & 0xff000f10) == 0xfe000800) {
- /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
- int rot = extract32(insn, 20, 2);
- int size = extract32(insn, 23, 1);
- int index;
-
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
- return 1;
- }
- if (size == 0) {
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
- return 1;
- }
- /* For fp16, rm is just Vm, and index is M. */
- rm = extract32(insn, 0, 4);
- index = extract32(insn, 5, 1);
- } else {
- /* For fp32, rm is the usual M:Vm, and index is 0. */
- VFP_DREG_M(rm, insn);
- index = 0;
- }
- data = (index << 2) | rot;
- fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
- : gen_helper_gvec_fcmlah_idx);
- } else if ((insn & 0xffb00f00) == 0xfe200d00) {
- /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
- int u = extract32(insn, 4, 1);
- if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
- return 1;
- }
- fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
- /* rm is just Vm, and index is M. */
- data = extract32(insn, 5, 1); /* index */
- rm = extract32(insn, 0, 4);
- } else {
- return 1;
- }
-
- if (s->fp_excp_el) {
- gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
- if (!s->vfp_enabled) {
- return 1;
- }
-
- opr_sz = (1 + q) * 8;
- if (fn_gvec_ptr) {
- TCGv_ptr fpst = get_fpstatus_ptr(1);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), fpst,
- opr_sz, opr_sz, data, fn_gvec_ptr);
- tcg_temp_free_ptr(fpst);
- } else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm),
- opr_sz, opr_sz, data, fn_gvec);
- }
- return 0;
-}
-
-static int disas_coproc_insn(DisasContext *s, uint32_t insn)
-{
- int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
- const ARMCPRegInfo *ri;
-
- cpnum = (insn >> 8) & 0xf;
-
- /* First check for coprocessor space used for XScale/iwMMXt insns */
- if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
- if (extract32(s->c15_cpar, cpnum, 1) == 0) {
- return 1;
- }
- if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
- return disas_iwmmxt_insn(s, insn);
- } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
- return disas_dsp_insn(s, insn);
- }
- return 1;
- }
-
- /* Otherwise treat as a generic register access */
- is64 = (insn & (1 << 25)) == 0;
- if (!is64 && ((insn & (1 << 4)) == 0)) {
- /* cdp */
- return 1;
- }
-
- crm = insn & 0xf;
- if (is64) {
- crn = 0;
- opc1 = (insn >> 4) & 0xf;
- opc2 = 0;
- rt2 = (insn >> 16) & 0xf;
- } else {
- crn = (insn >> 16) & 0xf;
- opc1 = (insn >> 21) & 7;
- opc2 = (insn >> 5) & 7;
- rt2 = 0;
- }
- isread = (insn >> 20) & 1;
- rt = (insn >> 12) & 0xf;
-
- ri = get_arm_cp_reginfo(s->cp_regs,
- ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
- if (ri) {
- /* Check access permissions */
- if (!cp_access_ok(s->current_el, ri, isread)) {
- return 1;
- }
-
- if (ri->accessfn ||
- (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
- /* Emit code to perform further access permissions checks at
- * runtime; this may result in an exception.
- * Note that on XScale all cp0..c13 registers do an access check
- * call in order to handle c15_cpar.
- */
- TCGv_ptr tmpptr;
- TCGv_i32 tcg_syn, tcg_isread;
- uint32_t syndrome;
-
- /* Note that since we are an implementation which takes an
- * exception on a trapped conditional instruction only if the
- * instruction passes its condition code check, we can take
- * advantage of the clause in the ARM ARM that allows us to set
- * the COND field in the instruction to 0xE in all cases.
- * We could fish the actual condition out of the insn (ARM)
- * or the condexec bits (Thumb) but it isn't necessary.
- */
- switch (cpnum) {
- case 14:
- if (is64) {
- syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
- isread, false);
- } else {
- syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
- rt, isread, false);
- }
- break;
- case 15:
- if (is64) {
- syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
- isread, false);
- } else {
- syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
- rt, isread, false);
- }
- break;
- default:
- /* ARMv8 defines that only coprocessors 14 and 15 exist,
- * so this can only happen if this is an ARMv7 or earlier CPU,
- * in which case the syndrome information won't actually be
- * guest visible.
- */
- assert(!arm_dc_feature(s, ARM_FEATURE_V8));
- syndrome = syn_uncategorized();
- break;
- }
-
- gen_set_condexec(s);
- gen_set_pc_im(s, s->pc - 4);
- tmpptr = tcg_const_ptr(ri);
- tcg_syn = tcg_const_i32(syndrome);
- tcg_isread = tcg_const_i32(isread);
- gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
- tcg_isread);
- tcg_temp_free_ptr(tmpptr);
- tcg_temp_free_i32(tcg_syn);
- tcg_temp_free_i32(tcg_isread);
- }
-
- /* Handle special cases first */
- switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
- case ARM_CP_NOP:
- return 0;
- case ARM_CP_WFI:
- if (isread) {
- return 1;
- }
- gen_set_pc_im(s, s->pc);
- s->base.is_jmp = DISAS_WFI;
- return 0;
- default:
- break;
- }
-
- if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
- gen_io_start();
- }
-
- if (isread) {
- /* Read */
- if (is64) {
- TCGv_i64 tmp64;
- TCGv_i32 tmp;
- if (ri->type & ARM_CP_CONST) {
- tmp64 = tcg_const_i64(ri->resetvalue);
- } else if (ri->readfn) {
- TCGv_ptr tmpptr;
- tmp64 = tcg_temp_new_i64();
- tmpptr = tcg_const_ptr(ri);
- gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
- tcg_temp_free_ptr(tmpptr);
- } else {
- tmp64 = tcg_temp_new_i64();
- tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
- }
- tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, tmp64);
- store_reg(s, rt, tmp);
- tcg_gen_shri_i64(tmp64, tmp64, 32);
- tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, tmp64);
- tcg_temp_free_i64(tmp64);
- store_reg(s, rt2, tmp);
- } else {
- TCGv_i32 tmp;
- if (ri->type & ARM_CP_CONST) {
- tmp = tcg_const_i32(ri->resetvalue);
- } else if (ri->readfn) {
- TCGv_ptr tmpptr;
- tmp = tcg_temp_new_i32();
- tmpptr = tcg_const_ptr(ri);
- gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
- tcg_temp_free_ptr(tmpptr);
- } else {
- tmp = load_cpu_offset(ri->fieldoffset);
- }
- if (rt == 15) {
- /* Destination register of r15 for 32 bit loads sets
- * the condition codes from the high 4 bits of the value
- */
- gen_set_nzcv(tmp);
- tcg_temp_free_i32(tmp);
- } else {
- store_reg(s, rt, tmp);
- }
- }
- } else {
- /* Write */
- if (ri->type & ARM_CP_CONST) {
- /* If not forbidden by access permissions, treat as WI */
- return 0;
- }
-
- if (is64) {
- TCGv_i32 tmplo, tmphi;
- TCGv_i64 tmp64 = tcg_temp_new_i64();
- tmplo = load_reg(s, rt);
- tmphi = load_reg(s, rt2);
- tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
- tcg_temp_free_i32(tmplo);
- tcg_temp_free_i32(tmphi);
- if (ri->writefn) {
- TCGv_ptr tmpptr = tcg_const_ptr(ri);
- gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
- tcg_temp_free_ptr(tmpptr);
- } else {
- tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
- }
- tcg_temp_free_i64(tmp64);
- } else {
- if (ri->writefn) {
- TCGv_i32 tmp;
- TCGv_ptr tmpptr;
- tmp = load_reg(s, rt);
- tmpptr = tcg_const_ptr(ri);
- gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
- tcg_temp_free_ptr(tmpptr);
- tcg_temp_free_i32(tmp);
- } else {
- TCGv_i32 tmp = load_reg(s, rt);
- store_cpu_offset(tmp, ri->fieldoffset);
- }
- }
- }
-
- if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
- /* I/O operations must end the TB here (whether read or write) */
- gen_io_end();
- gen_lookup_tb(s);
- } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
- /* We default to ending the TB on a coprocessor register write,
- * but allow this to be suppressed by the register definition
- * (usually only necessary to work around guest bugs).
- */
- gen_lookup_tb(s);
- }
-
- return 0;
- }
-
- /* Unknown register; this might be a guest error or a QEMU
- * unimplemented feature.
- */
- if (is64) {
- qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
- "64 bit system register cp:%d opc1: %d crm:%d "
- "(%s)\n",
- isread ? "read" : "write", cpnum, opc1, crm,
- s->ns ? "non-secure" : "secure");
- } else {
- qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
- "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
- "(%s)\n",
- isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
- s->ns ? "non-secure" : "secure");
- }
-
- return 1;
-}
-
-
-/* Store a 64-bit value to a register pair. Clobbers val. */
-static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
-{
- TCGv_i32 tmp;
- tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, val);
- store_reg(s, rlow, tmp);
- tmp = tcg_temp_new_i32();
- tcg_gen_shri_i64(val, val, 32);
- tcg_gen_extrl_i64_i32(tmp, val);
- store_reg(s, rhigh, tmp);
-}
-
-/* load a 32-bit value from a register and perform a 64-bit accumulate. */
-static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
-{
- TCGv_i64 tmp;
- TCGv_i32 tmp2;
-
- /* Load value and extend to 64 bits. */
- tmp = tcg_temp_new_i64();
- tmp2 = load_reg(s, rlow);
- tcg_gen_extu_i32_i64(tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- tcg_gen_add_i64(val, val, tmp);
- tcg_temp_free_i64(tmp);
-}
-
-/* load and add a 64-bit value from a register pair. */
-static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
-{
- TCGv_i64 tmp;
- TCGv_i32 tmpl;
- TCGv_i32 tmph;
-
- /* Load 64-bit value rd:rn. */
- tmpl = load_reg(s, rlow);
- tmph = load_reg(s, rhigh);
- tmp = tcg_temp_new_i64();
- tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
- tcg_temp_free_i32(tmpl);
- tcg_temp_free_i32(tmph);
- tcg_gen_add_i64(val, val, tmp);
- tcg_temp_free_i64(tmp);
-}
-
-/* Set N and Z flags from hi|lo. */
-static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
-{
- tcg_gen_mov_i32(cpu_NF, hi);
- tcg_gen_or_i32(cpu_ZF, lo, hi);
-}
-
-/* Load/Store exclusive instructions are implemented by remembering
- the value/address loaded, and seeing if these are the same
- when the store is performed. This should be sufficient to implement
- the architecturally mandated semantics, and avoids having to monitor
- regular stores. The compare vs the remembered value is done during
- the cmpxchg operation, but we must compare the addresses manually. */
-static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
- TCGv_i32 addr, int size)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- TCGMemOp opc = size | MO_ALIGN | s->be_data;
-
- s->is_ldex = true;
-
- if (size == 3) {
- TCGv_i32 tmp2 = tcg_temp_new_i32();
- TCGv_i64 t64 = tcg_temp_new_i64();
-
- /* For AArch32, architecturally the 32-bit word at the lowest
- * address is always Rt and the one at addr+4 is Rt2, even if
- * the CPU is big-endian. That means we don't want to do a
- * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
- * for an architecturally 64-bit access, but instead do a
- * 64-bit access using MO_BE if appropriate and then split
- * the two halves.
- * This only makes a difference for BE32 user-mode, where
- * frob64() must not flip the two halves of the 64-bit data
- * but this code must treat BE32 user-mode like BE32 system.
- */
- TCGv taddr = gen_aa32_addr(s, addr, opc);
-
- tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
- tcg_temp_free(taddr);
- tcg_gen_mov_i64(cpu_exclusive_val, t64);
- if (s->be_data == MO_BE) {
- tcg_gen_extr_i64_i32(tmp2, tmp, t64);
- } else {
- tcg_gen_extr_i64_i32(tmp, tmp2, t64);
- }
- tcg_temp_free_i64(t64);
-
- store_reg(s, rt2, tmp2);
- } else {
- gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
- tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
- }
-
- store_reg(s, rt, tmp);
- tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
-}
-
-static void gen_clrex(DisasContext *s)
-{
- tcg_gen_movi_i64(cpu_exclusive_addr, -1);
-}
-
-static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
- TCGv_i32 addr, int size)
-{
- TCGv_i32 t0, t1, t2;
- TCGv_i64 extaddr;
- TCGv taddr;
- TCGLabel *done_label;
- TCGLabel *fail_label;
- TCGMemOp opc = size | MO_ALIGN | s->be_data;
-
- /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
- [addr] = {Rt};
- {Rd} = 0;
- } else {
- {Rd} = 1;
- } */
- fail_label = gen_new_label();
- done_label = gen_new_label();
- extaddr = tcg_temp_new_i64();
- tcg_gen_extu_i32_i64(extaddr, addr);
- tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
- tcg_temp_free_i64(extaddr);
-
- taddr = gen_aa32_addr(s, addr, opc);
- t0 = tcg_temp_new_i32();
- t1 = load_reg(s, rt);
- if (size == 3) {
- TCGv_i64 o64 = tcg_temp_new_i64();
- TCGv_i64 n64 = tcg_temp_new_i64();
-
- t2 = load_reg(s, rt2);
- /* For AArch32, architecturally the 32-bit word at the lowest
- * address is always Rt and the one at addr+4 is Rt2, even if
- * the CPU is big-endian. Since we're going to treat this as a
- * single 64-bit BE store, we need to put the two halves in the
- * opposite order for BE to LE, so that they end up in the right
- * places.
- * We don't want gen_aa32_frob64() because that does the wrong
- * thing for BE32 usermode.
- */
- if (s->be_data == MO_BE) {
- tcg_gen_concat_i32_i64(n64, t2, t1);
- } else {
- tcg_gen_concat_i32_i64(n64, t1, t2);
- }
- tcg_temp_free_i32(t2);
-
- tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
- get_mem_index(s), opc);
- tcg_temp_free_i64(n64);
-
- tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
- tcg_gen_extrl_i64_i32(t0, o64);
-
- tcg_temp_free_i64(o64);
- } else {
- t2 = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
- tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
- tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
- tcg_temp_free_i32(t2);
- }
- tcg_temp_free_i32(t1);
- tcg_temp_free(taddr);
- tcg_gen_mov_i32(cpu_R[rd], t0);
- tcg_temp_free_i32(t0);
- tcg_gen_br(done_label);
-
- gen_set_label(fail_label);
- tcg_gen_movi_i32(cpu_R[rd], 1);
- gen_set_label(done_label);
- tcg_gen_movi_i64(cpu_exclusive_addr, -1);
-}
-
-/* gen_srs:
- * @env: CPUARMState
- * @s: DisasContext
- * @mode: mode field from insn (which stack to store to)
- * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
- * @writeback: true if writeback bit set
- *
- * Generate code for the SRS (Store Return State) insn.
- */
-static void gen_srs(DisasContext *s,
- uint32_t mode, uint32_t amode, bool writeback)
-{
- int32_t offset;
- TCGv_i32 addr, tmp;
- bool undef = false;
-
- /* SRS is:
- * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
- * and specified mode is monitor mode
- * - UNDEFINED in Hyp mode
- * - UNPREDICTABLE in User or System mode
- * - UNPREDICTABLE if the specified mode is:
- * -- not implemented
- * -- not a valid mode number
- * -- a mode that's at a higher exception level
- * -- Monitor, if we are Non-secure
- * For the UNPREDICTABLE cases we choose to UNDEF.
- */
- if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
- gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), 3);
- return;
- }
-
- if (s->current_el == 0 || s->current_el == 2) {
- undef = true;
- }
-
- switch (mode) {
- case ARM_CPU_MODE_USR:
- case ARM_CPU_MODE_FIQ:
- case ARM_CPU_MODE_IRQ:
- case ARM_CPU_MODE_SVC:
- case ARM_CPU_MODE_ABT:
- case ARM_CPU_MODE_UND:
- case ARM_CPU_MODE_SYS:
- break;
- case ARM_CPU_MODE_HYP:
- if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
- undef = true;
- }
- break;
- case ARM_CPU_MODE_MON:
- /* No need to check specifically for "are we non-secure" because
- * we've already made EL0 UNDEF and handled the trap for S-EL1;
- * so if this isn't EL3 then we must be non-secure.
- */
- if (s->current_el != 3) {
- undef = true;
- }
- break;
- default:
- undef = true;
- }
-
- if (undef) {
- gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
- default_exception_el(s));
- return;
- }
-
- addr = tcg_temp_new_i32();
- tmp = tcg_const_i32(mode);
- /* get_r13_banked() will raise an exception if called from System mode */
- gen_set_condexec(s);
- gen_set_pc_im(s, s->pc - 4);
- gen_helper_get_r13_banked(addr, cpu_env, tmp);
- tcg_temp_free_i32(tmp);
- switch (amode) {
- case 0: /* DA */
- offset = -4;
- break;
- case 1: /* IA */
- offset = 0;
- break;
- case 2: /* DB */
- offset = -8;
- break;
- case 3: /* IB */
- offset = 4;
- break;
- default:
- abort();
- }
- tcg_gen_addi_i32(addr, addr, offset);
- tmp = load_reg(s, 14);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- tmp = load_cpu_field(spsr);
- tcg_gen_addi_i32(addr, addr, 4);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- if (writeback) {
- switch (amode) {
- case 0:
- offset = -8;
- break;
- case 1:
- offset = 4;
- break;
- case 2:
- offset = -4;
- break;
- case 3:
- offset = 0;
- break;
- default:
- abort();
- }
- tcg_gen_addi_i32(addr, addr, offset);
- tmp = tcg_const_i32(mode);
- gen_helper_set_r13_banked(cpu_env, tmp, addr);
- tcg_temp_free_i32(tmp);
- }
- tcg_temp_free_i32(addr);
- s->base.is_jmp = DISAS_UPDATE;
-}
-
-/* Generate a label used for skipping this instruction */
-static void arm_gen_condlabel(DisasContext *s)
-{
- if (!s->condjmp) {
- s->condlabel = gen_new_label();
- s->condjmp = 1;
- }
-}
-
-/* Skip this instruction if the ARM condition is false */
-static void arm_skip_unless(DisasContext *s, uint32_t cond)
-{
- arm_gen_condlabel(s);
- arm_gen_test_cc(cond ^ 1, s->condlabel);
-}
-
-static void disas_arm_insn(DisasContext *s, unsigned int insn)
-{
- unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh;
- TCGv_i32 tmp;
- TCGv_i32 tmp2;
- TCGv_i32 tmp3;
- TCGv_i32 addr;
- TCGv_i64 tmp64;
-
- /* M variants do not implement ARM mode; this must raise the INVSTATE
- * UsageFault exception.
- */
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
- gen_exception_insn(s, 4, EXCP_INVSTATE, syn_uncategorized(),
- default_exception_el(s));
- return;
- }
- cond = insn >> 28;
- if (cond == 0xf){
- /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
- * choose to UNDEF. In ARMv5 and above the space is used
- * for miscellaneous unconditional instructions.
- */
- ARCH(5);
-
- /* Unconditional instructions. */
- if (((insn >> 25) & 7) == 1) {
- /* NEON Data processing. */
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- goto illegal_op;
- }
-
- if (disas_neon_data_insn(s, insn)) {
- goto illegal_op;
- }
- return;
- }
- if ((insn & 0x0f100000) == 0x04000000) {
- /* NEON load/store. */
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- goto illegal_op;
- }
-
- if (disas_neon_ls_insn(s, insn)) {
- goto illegal_op;
- }
- return;
- }
- if ((insn & 0x0f000e10) == 0x0e000a00) {
- /* VFP. */
- if (disas_vfp_insn(s, insn)) {
- goto illegal_op;
- }
- return;
- }
- if (((insn & 0x0f30f000) == 0x0510f000) ||
- ((insn & 0x0f30f010) == 0x0710f000)) {
- if ((insn & (1 << 22)) == 0) {
- /* PLDW; v7MP */
- if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
- goto illegal_op;
- }
- }
- /* Otherwise PLD; v5TE+ */
- ARCH(5TE);
- return;
- }
- if (((insn & 0x0f70f000) == 0x0450f000) ||
- ((insn & 0x0f70f010) == 0x0650f000)) {
- ARCH(7);
- return; /* PLI; V7 */
- }
- if (((insn & 0x0f700000) == 0x04100000) ||
- ((insn & 0x0f700010) == 0x06100000)) {
- if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
- goto illegal_op;
- }
- return; /* v7MP: Unallocated memory hint: must NOP */
- }
-
- if ((insn & 0x0ffffdff) == 0x01010000) {
- ARCH(6);
- /* setend */
- if (((insn >> 9) & 1) != !!(s->be_data == MO_BE)) {
- gen_helper_setend(cpu_env);
- s->base.is_jmp = DISAS_UPDATE;
- }
- return;
- } else if ((insn & 0x0fffff00) == 0x057ff000) {
- switch ((insn >> 4) & 0xf) {
- case 1: /* clrex */
- ARCH(6K);
- gen_clrex(s);
- return;
- case 4: /* dsb */
- case 5: /* dmb */
- ARCH(7);
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
- return;
- case 6: /* isb */
- /* We need to break the TB after this insn to execute
- * self-modifying code correctly and also to take
- * any pending interrupts immediately.
- */
- gen_goto_tb(s, 0, s->pc & ~1);
- return;
- default:
- goto illegal_op;
- }
- } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
- /* srs */
- ARCH(6);
- gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21));
- return;
- } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
- /* rfe */
- int32_t offset;
- if (IS_USER(s))
- goto illegal_op;
- ARCH(6);
- rn = (insn >> 16) & 0xf;
- addr = load_reg(s, rn);
- i = (insn >> 23) & 3;
- switch (i) {
- case 0: offset = -4; break; /* DA */
- case 1: offset = 0; break; /* IA */
- case 2: offset = -8; break; /* DB */
- case 3: offset = 4; break; /* IB */
- default: abort();
- }
- if (offset)
- tcg_gen_addi_i32(addr, addr, offset);
- /* Load PC into tmp and CPSR into tmp2. */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- tcg_gen_addi_i32(addr, addr, 4);
- tmp2 = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
- if (insn & (1 << 21)) {
- /* Base writeback. */
- switch (i) {
- case 0: offset = -8; break;
- case 1: offset = 4; break;
- case 2: offset = -4; break;
- case 3: offset = 0; break;
- default: abort();
- }
- if (offset)
- tcg_gen_addi_i32(addr, addr, offset);
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- gen_rfe(s, tmp, tmp2);
- return;
- } else if ((insn & 0x0e000000) == 0x0a000000) {
- /* branch link and change to thumb (blx <offset>) */
- int32_t offset;
-
- val = (uint32_t)s->pc;
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, val);
- store_reg(s, 14, tmp);
- /* Sign-extend the 24-bit offset */
- offset = (((int32_t)insn) << 8) >> 8;
- /* offset * 4 + bit24 * 2 + (thumb bit) */
- val += (offset << 2) | ((insn >> 23) & 2) | 1;
- /* pipeline offset */
- val += 4;
- /* protected by ARCH(5); above, near the start of uncond block */
- gen_bx_im(s, val);
- return;
- } else if ((insn & 0x0e000f00) == 0x0c000100) {
- if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
- /* iWMMXt register transfer. */
- if (extract32(s->c15_cpar, 1, 1)) {
- if (!disas_iwmmxt_insn(s, insn)) {
- return;
- }
- }
- }
- } else if ((insn & 0x0e000a00) == 0x0c000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- if (disas_neon_insn_3same_ext(s, insn)) {
- goto illegal_op;
- }
- return;
- } else if ((insn & 0x0f000a00) == 0x0e000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
- goto illegal_op;
- }
- return;
- } else if ((insn & 0x0fe00000) == 0x0c400000) {
- /* Coprocessor double register transfer. */
- ARCH(5TE);
- } else if ((insn & 0x0f000010) == 0x0e000010) {
- /* Additional coprocessor register transfer. */
- } else if ((insn & 0x0ff10020) == 0x01000000) {
- uint32_t mask;
- uint32_t val;
- /* cps (privileged) */
- if (IS_USER(s))
- return;
- mask = val = 0;
- if (insn & (1 << 19)) {
- if (insn & (1 << 8))
- mask |= CPSR_A;
- if (insn & (1 << 7))
- mask |= CPSR_I;
- if (insn & (1 << 6))
- mask |= CPSR_F;
- if (insn & (1 << 18))
- val |= mask;
- }
- if (insn & (1 << 17)) {
- mask |= CPSR_M;
- val |= (insn & 0x1f);
- }
- if (mask) {
- gen_set_psr_im(s, mask, 0, val);
- }
- return;
- }
- goto illegal_op;
- }
- if (cond != 0xe) {
- /* if not always execute, we generate a conditional jump to
- next instruction */
- arm_skip_unless(s, cond);
- }
- if ((insn & 0x0f900000) == 0x03000000) {
- if ((insn & (1 << 21)) == 0) {
- ARCH(6T2);
- rd = (insn >> 12) & 0xf;
- val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
- if ((insn & (1 << 22)) == 0) {
- /* MOVW */
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, val);
- } else {
- /* MOVT */
- tmp = load_reg(s, rd);
- tcg_gen_ext16u_i32(tmp, tmp);
- tcg_gen_ori_i32(tmp, tmp, val << 16);
- }
- store_reg(s, rd, tmp);
- } else {
- if (((insn >> 12) & 0xf) != 0xf)
- goto illegal_op;
- if (((insn >> 16) & 0xf) == 0) {
- gen_nop_hint(s, insn & 0xff);
- } else {
- /* CPSR = immediate */
- val = insn & 0xff;
- shift = ((insn >> 8) & 0xf) * 2;
- if (shift)
- val = (val >> shift) | (val << (32 - shift));
- i = ((insn & (1 << 22)) != 0);
- if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i),
- i, val)) {
- goto illegal_op;
- }
- }
- }
- } else if ((insn & 0x0f900000) == 0x01000000
- && (insn & 0x00000090) != 0x00000090) {
- /* miscellaneous instructions */
- op1 = (insn >> 21) & 3;
- sh = (insn >> 4) & 0xf;
- rm = insn & 0xf;
- switch (sh) {
- case 0x0: /* MSR, MRS */
- if (insn & (1 << 9)) {
- /* MSR (banked) and MRS (banked) */
- int sysm = extract32(insn, 16, 4) |
- (extract32(insn, 8, 1) << 4);
- int r = extract32(insn, 22, 1);
-
- if (op1 & 1) {
- /* MSR (banked) */
- gen_msr_banked(s, r, sysm, rm);
- } else {
- /* MRS (banked) */
- int rd = extract32(insn, 12, 4);
-
- gen_mrs_banked(s, r, sysm, rd);
- }
- break;
- }
-
- /* MSR, MRS (for PSRs) */
- if (op1 & 1) {
- /* PSR = reg */
- tmp = load_reg(s, rm);
- i = ((op1 & 2) != 0);
- if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp))
- goto illegal_op;
- } else {
- /* reg = PSR */
- rd = (insn >> 12) & 0xf;
- if (op1 & 2) {
- if (IS_USER(s))
- goto illegal_op;
- tmp = load_cpu_field(spsr);
- } else {
- tmp = tcg_temp_new_i32();
- gen_helper_cpsr_read(tmp, cpu_env);
- }
- store_reg(s, rd, tmp);
- }
- break;
- case 0x1:
- if (op1 == 1) {
- /* branch/exchange thumb (bx). */
- ARCH(4T);
- tmp = load_reg(s, rm);
- gen_bx(s, tmp);
- } else if (op1 == 3) {
- /* clz */
- ARCH(5);
- rd = (insn >> 12) & 0xf;
- tmp = load_reg(s, rm);
- tcg_gen_clzi_i32(tmp, tmp, 32);
- store_reg(s, rd, tmp);
- } else {
- goto illegal_op;
- }
- break;
- case 0x2:
- if (op1 == 1) {
- ARCH(5J); /* bxj */
- /* Trivial implementation equivalent to bx. */
- tmp = load_reg(s, rm);
- gen_bx(s, tmp);
- } else {
- goto illegal_op;
- }
- break;
- case 0x3:
- if (op1 != 1)
- goto illegal_op;
-
- ARCH(5);
- /* branch link/exchange thumb (blx) */
- tmp = load_reg(s, rm);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, s->pc);
- store_reg(s, 14, tmp2);
- gen_bx(s, tmp);
- break;
- case 0x4:
- {
- /* crc32/crc32c */
- uint32_t c = extract32(insn, 8, 4);
-
- /* Check this CPU supports ARMv8 CRC instructions.
- * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
- * Bits 8, 10 and 11 should be zero.
- */
- if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 ||
- (c & 0xd) != 0) {
- goto illegal_op;
- }
-
- rn = extract32(insn, 16, 4);
- rd = extract32(insn, 12, 4);
-
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- if (op1 == 0) {
- tcg_gen_andi_i32(tmp2, tmp2, 0xff);
- } else if (op1 == 1) {
- tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
- }
- tmp3 = tcg_const_i32(1 << op1);
- if (c & 0x2) {
- gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
- } else {
- gen_helper_crc32(tmp, tmp, tmp2, tmp3);
- }
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp3);
- store_reg(s, rd, tmp);
- break;
- }
- case 0x5: /* saturating add/subtract */
- ARCH(5TE);
- rd = (insn >> 12) & 0xf;
- rn = (insn >> 16) & 0xf;
- tmp = load_reg(s, rm);
- tmp2 = load_reg(s, rn);
- if (op1 & 2)
- gen_helper_double_saturate(tmp2, cpu_env, tmp2);
- if (op1 & 1)
- gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
- else
- gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- break;
- case 0x6: /* ERET */
- if (op1 != 3) {
- goto illegal_op;
- }
- if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
- goto illegal_op;
- }
- if ((insn & 0x000fff0f) != 0x0000000e) {
- /* UNPREDICTABLE; we choose to UNDEF */
- goto illegal_op;
- }
-
- if (s->current_el == 2) {
- tmp = load_cpu_field(elr_el[2]);
- } else {
- tmp = load_reg(s, 14);
- }
- gen_exception_return(s, tmp);
- break;
- case 7:
- {
- int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
- switch (op1) {
- case 0:
- /* HLT */
- gen_hlt(s, imm16);
- break;
- case 1:
- /* bkpt */
- ARCH(5);
- gen_exception_bkpt_insn(s, 4, syn_aa32_bkpt(imm16, false));
- break;
- case 2:
- /* Hypervisor call (v7) */
- ARCH(7);
- if (IS_USER(s)) {
- goto illegal_op;
- }
- gen_hvc(s, imm16);
- break;
- case 3:
- /* Secure monitor call (v6+) */
- ARCH(6K);
- if (IS_USER(s)) {
- goto illegal_op;
- }
- gen_smc(s);
- break;
- default:
- g_assert_not_reached();
- }
- break;
- }
- case 0x8: /* signed multiply */
- case 0xa:
- case 0xc:
- case 0xe:
- ARCH(5TE);
- rs = (insn >> 8) & 0xf;
- rn = (insn >> 12) & 0xf;
- rd = (insn >> 16) & 0xf;
- if (op1 == 1) {
- /* (32 * 16) >> 16 */
- tmp = load_reg(s, rm);
- tmp2 = load_reg(s, rs);
- if (sh & 4)
- tcg_gen_sari_i32(tmp2, tmp2, 16);
- else
- gen_sxth(tmp2);
- tmp64 = gen_muls_i64_i32(tmp, tmp2);
- tcg_gen_shri_i64(tmp64, tmp64, 16);
- tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, tmp64);
- tcg_temp_free_i64(tmp64);
- if ((sh & 2) == 0) {
- tmp2 = load_reg(s, rn);
- gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- store_reg(s, rd, tmp);
- } else {
- /* 16 * 16 */
- tmp = load_reg(s, rm);
- tmp2 = load_reg(s, rs);
- gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
- tcg_temp_free_i32(tmp2);
- if (op1 == 2) {
- tmp64 = tcg_temp_new_i64();
- tcg_gen_ext_i32_i64(tmp64, tmp);
- tcg_temp_free_i32(tmp);
- gen_addq(s, tmp64, rn, rd);
- gen_storeq_reg(s, rn, rd, tmp64);
- tcg_temp_free_i64(tmp64);
- } else {
- if (op1 == 0) {
- tmp2 = load_reg(s, rn);
- gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- store_reg(s, rd, tmp);
- }
- }
- break;
- default:
- goto illegal_op;
- }
- } else if (((insn & 0x0e000000) == 0 &&
- (insn & 0x00000090) != 0x90) ||
- ((insn & 0x0e000000) == (1 << 25))) {
- int set_cc, logic_cc, shiftop;
-
- op1 = (insn >> 21) & 0xf;
- set_cc = (insn >> 20) & 1;
- logic_cc = table_logic_cc[op1] & set_cc;
-
- /* data processing instruction */
- if (insn & (1 << 25)) {
- /* immediate operand */
- val = insn & 0xff;
- shift = ((insn >> 8) & 0xf) * 2;
- if (shift) {
- val = (val >> shift) | (val << (32 - shift));
- }
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, val);
- if (logic_cc && shift) {
- gen_set_CF_bit31(tmp2);
- }
- } else {
- /* register */
- rm = (insn) & 0xf;
- tmp2 = load_reg(s, rm);
- shiftop = (insn >> 5) & 3;
- if (!(insn & (1 << 4))) {
- shift = (insn >> 7) & 0x1f;
- gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
- } else {
- rs = (insn >> 8) & 0xf;
- tmp = load_reg(s, rs);
- gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
- }
- }
- if (op1 != 0x0f && op1 != 0x0d) {
- rn = (insn >> 16) & 0xf;
- tmp = load_reg(s, rn);
- } else {
- tmp = NULL;
- }
- rd = (insn >> 12) & 0xf;
- switch(op1) {
- case 0x00:
- tcg_gen_and_i32(tmp, tmp, tmp2);
- if (logic_cc) {
- gen_logic_CC(tmp);
- }
- store_reg_bx(s, rd, tmp);
- break;
- case 0x01:
- tcg_gen_xor_i32(tmp, tmp, tmp2);
- if (logic_cc) {
- gen_logic_CC(tmp);
- }
- store_reg_bx(s, rd, tmp);
- break;
- case 0x02:
- if (set_cc && rd == 15) {
- /* SUBS r15, ... is used for exception return. */
- if (IS_USER(s)) {
- goto illegal_op;
- }
- gen_sub_CC(tmp, tmp, tmp2);
- gen_exception_return(s, tmp);
- } else {
- if (set_cc) {
- gen_sub_CC(tmp, tmp, tmp2);
- } else {
- tcg_gen_sub_i32(tmp, tmp, tmp2);
- }
- store_reg_bx(s, rd, tmp);
- }
- break;
- case 0x03:
- if (set_cc) {
- gen_sub_CC(tmp, tmp2, tmp);
- } else {
- tcg_gen_sub_i32(tmp, tmp2, tmp);
- }
- store_reg_bx(s, rd, tmp);
- break;
- case 0x04:
- if (set_cc) {
- gen_add_CC(tmp, tmp, tmp2);
- } else {
- tcg_gen_add_i32(tmp, tmp, tmp2);
- }
- store_reg_bx(s, rd, tmp);
- break;
- case 0x05:
- if (set_cc) {
- gen_adc_CC(tmp, tmp, tmp2);
- } else {
- gen_add_carry(tmp, tmp, tmp2);
- }
- store_reg_bx(s, rd, tmp);
- break;
- case 0x06:
- if (set_cc) {
- gen_sbc_CC(tmp, tmp, tmp2);
- } else {
- gen_sub_carry(tmp, tmp, tmp2);
- }
- store_reg_bx(s, rd, tmp);
- break;
- case 0x07:
- if (set_cc) {
- gen_sbc_CC(tmp, tmp2, tmp);
- } else {
- gen_sub_carry(tmp, tmp2, tmp);
- }
- store_reg_bx(s, rd, tmp);
- break;
- case 0x08:
- if (set_cc) {
- tcg_gen_and_i32(tmp, tmp, tmp2);
- gen_logic_CC(tmp);
- }
- tcg_temp_free_i32(tmp);
- break;
- case 0x09:
- if (set_cc) {
- tcg_gen_xor_i32(tmp, tmp, tmp2);
- gen_logic_CC(tmp);
- }
- tcg_temp_free_i32(tmp);
- break;
- case 0x0a:
- if (set_cc) {
- gen_sub_CC(tmp, tmp, tmp2);
- }
- tcg_temp_free_i32(tmp);
- break;
- case 0x0b:
- if (set_cc) {
- gen_add_CC(tmp, tmp, tmp2);
- }
- tcg_temp_free_i32(tmp);
- break;
- case 0x0c:
- tcg_gen_or_i32(tmp, tmp, tmp2);
- if (logic_cc) {
- gen_logic_CC(tmp);
- }
- store_reg_bx(s, rd, tmp);
- break;
- case 0x0d:
- if (logic_cc && rd == 15) {
- /* MOVS r15, ... is used for exception return. */
- if (IS_USER(s)) {
- goto illegal_op;
- }
- gen_exception_return(s, tmp2);
- } else {
- if (logic_cc) {
- gen_logic_CC(tmp2);
- }
- store_reg_bx(s, rd, tmp2);
- }
- break;
- case 0x0e:
- tcg_gen_andc_i32(tmp, tmp, tmp2);
- if (logic_cc) {
- gen_logic_CC(tmp);
- }
- store_reg_bx(s, rd, tmp);
- break;
- default:
- case 0x0f:
- tcg_gen_not_i32(tmp2, tmp2);
- if (logic_cc) {
- gen_logic_CC(tmp2);
- }
- store_reg_bx(s, rd, tmp2);
- break;
- }
- if (op1 != 0x0f && op1 != 0x0d) {
- tcg_temp_free_i32(tmp2);
- }
- } else {
- /* other instructions */
- op1 = (insn >> 24) & 0xf;
- switch(op1) {
- case 0x0:
- case 0x1:
- /* multiplies, extra load/stores */
- sh = (insn >> 5) & 3;
- if (sh == 0) {
- if (op1 == 0x0) {
- rd = (insn >> 16) & 0xf;
- rn = (insn >> 12) & 0xf;
- rs = (insn >> 8) & 0xf;
- rm = (insn) & 0xf;
- op1 = (insn >> 20) & 0xf;
- switch (op1) {
- case 0: case 1: case 2: case 3: case 6:
- /* 32 bit mul */
- tmp = load_reg(s, rs);
- tmp2 = load_reg(s, rm);
- tcg_gen_mul_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- if (insn & (1 << 22)) {
- /* Subtract (mls) */
- ARCH(6T2);
- tmp2 = load_reg(s, rn);
- tcg_gen_sub_i32(tmp, tmp2, tmp);
- tcg_temp_free_i32(tmp2);
- } else if (insn & (1 << 21)) {
- /* Add */
- tmp2 = load_reg(s, rn);
- tcg_gen_add_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- if (insn & (1 << 20))
- gen_logic_CC(tmp);
- store_reg(s, rd, tmp);
- break;
- case 4:
- /* 64 bit mul double accumulate (UMAAL) */
- ARCH(6);
- tmp = load_reg(s, rs);
- tmp2 = load_reg(s, rm);
- tmp64 = gen_mulu_i64_i32(tmp, tmp2);
- gen_addq_lo(s, tmp64, rn);
- gen_addq_lo(s, tmp64, rd);
- gen_storeq_reg(s, rn, rd, tmp64);
- tcg_temp_free_i64(tmp64);
- break;
- case 8: case 9: case 10: case 11:
- case 12: case 13: case 14: case 15:
- /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
- tmp = load_reg(s, rs);
- tmp2 = load_reg(s, rm);
- if (insn & (1 << 22)) {
- tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
- } else {
- tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
- }
- if (insn & (1 << 21)) { /* mult accumulate */
- TCGv_i32 al = load_reg(s, rn);
- TCGv_i32 ah = load_reg(s, rd);
- tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
- tcg_temp_free_i32(al);
- tcg_temp_free_i32(ah);
- }
- if (insn & (1 << 20)) {
- gen_logicq_cc(tmp, tmp2);
- }
- store_reg(s, rn, tmp);
- store_reg(s, rd, tmp2);
- break;
- default:
- goto illegal_op;
- }
- } else {
- rn = (insn >> 16) & 0xf;
- rd = (insn >> 12) & 0xf;
- if (insn & (1 << 23)) {
- /* load/store exclusive */
- int op2 = (insn >> 8) & 3;
- op1 = (insn >> 21) & 0x3;
-
- switch (op2) {
- case 0: /* lda/stl */
- if (op1 == 1) {
- goto illegal_op;
- }
- ARCH(8);
- break;
- case 1: /* reserved */
- goto illegal_op;
- case 2: /* ldaex/stlex */
- ARCH(8);
- break;
- case 3: /* ldrex/strex */
- if (op1) {
- ARCH(6K);
- } else {
- ARCH(6);
- }
- break;
- }
-
- addr = tcg_temp_local_new_i32();
- load_reg_var(s, addr, rn);
-
- /* Since the emulation does not have barriers,
- the acquire/release semantics need no special
- handling */
- if (op2 == 0) {
- if (insn & (1 << 20)) {
- tmp = tcg_temp_new_i32();
- switch (op1) {
- case 0: /* lda */
- gen_aa32_ld32u_iss(s, tmp, addr,
- get_mem_index(s),
- rd | ISSIsAcqRel);
- break;
- case 2: /* ldab */
- gen_aa32_ld8u_iss(s, tmp, addr,
- get_mem_index(s),
- rd | ISSIsAcqRel);
- break;
- case 3: /* ldah */
- gen_aa32_ld16u_iss(s, tmp, addr,
- get_mem_index(s),
- rd | ISSIsAcqRel);
- break;
- default:
- abort();
- }
- store_reg(s, rd, tmp);
- } else {
- rm = insn & 0xf;
- tmp = load_reg(s, rm);
- switch (op1) {
- case 0: /* stl */
- gen_aa32_st32_iss(s, tmp, addr,
- get_mem_index(s),
- rm | ISSIsAcqRel);
- break;
- case 2: /* stlb */
- gen_aa32_st8_iss(s, tmp, addr,
- get_mem_index(s),
- rm | ISSIsAcqRel);
- break;
- case 3: /* stlh */
- gen_aa32_st16_iss(s, tmp, addr,
- get_mem_index(s),
- rm | ISSIsAcqRel);
- break;
- default:
- abort();
- }
- tcg_temp_free_i32(tmp);
- }
- } else if (insn & (1 << 20)) {
- switch (op1) {
- case 0: /* ldrex */
- gen_load_exclusive(s, rd, 15, addr, 2);
- break;
- case 1: /* ldrexd */
- gen_load_exclusive(s, rd, rd + 1, addr, 3);
- break;
- case 2: /* ldrexb */
- gen_load_exclusive(s, rd, 15, addr, 0);
- break;
- case 3: /* ldrexh */
- gen_load_exclusive(s, rd, 15, addr, 1);
- break;
- default:
- abort();
- }
- } else {
- rm = insn & 0xf;
- switch (op1) {
- case 0: /* strex */
- gen_store_exclusive(s, rd, rm, 15, addr, 2);
- break;
- case 1: /* strexd */
- gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
- break;
- case 2: /* strexb */
- gen_store_exclusive(s, rd, rm, 15, addr, 0);
- break;
- case 3: /* strexh */
- gen_store_exclusive(s, rd, rm, 15, addr, 1);
- break;
- default:
- abort();
- }
- }
- tcg_temp_free_i32(addr);
- } else if ((insn & 0x00300f00) == 0) {
- /* 0bcccc_0001_0x00_xxxx_xxxx_0000_1001_xxxx
- * - SWP, SWPB
- */
-
- TCGv taddr;
- TCGMemOp opc = s->be_data;
-
- rm = (insn) & 0xf;
-
- if (insn & (1 << 22)) {
- opc |= MO_UB;
- } else {
- opc |= MO_UL | MO_ALIGN;
- }
-
- addr = load_reg(s, rn);
- taddr = gen_aa32_addr(s, addr, opc);
- tcg_temp_free_i32(addr);
-
- tmp = load_reg(s, rm);
- tcg_gen_atomic_xchg_i32(tmp, taddr, tmp,
- get_mem_index(s), opc);
- tcg_temp_free(taddr);
- store_reg(s, rd, tmp);
- } else {
- goto illegal_op;
- }
- }
- } else {
- int address_offset;
- bool load = insn & (1 << 20);
- bool wbit = insn & (1 << 21);
- bool pbit = insn & (1 << 24);
- bool doubleword = false;
- ISSInfo issinfo;
-
- /* Misc load/store */
- rn = (insn >> 16) & 0xf;
- rd = (insn >> 12) & 0xf;
-
- /* ISS not valid if writeback */
- issinfo = (pbit & !wbit) ? rd : ISSInvalid;
-
- if (!load && (sh & 2)) {
- /* doubleword */
- ARCH(5TE);
- if (rd & 1) {
- /* UNPREDICTABLE; we choose to UNDEF */
- goto illegal_op;
- }
- load = (sh & 1) == 0;
- doubleword = true;
- }
-
- addr = load_reg(s, rn);
- if (pbit) {
- gen_add_datah_offset(s, insn, 0, addr);
- }
- address_offset = 0;
-
- if (doubleword) {
- if (!load) {
- /* store */
- tmp = load_reg(s, rd);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- tcg_gen_addi_i32(addr, addr, 4);
- tmp = load_reg(s, rd + 1);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- } else {
- /* load */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- store_reg(s, rd, tmp);
- tcg_gen_addi_i32(addr, addr, 4);
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- rd++;
- }
- address_offset = -4;
- } else if (load) {
- /* load */
- tmp = tcg_temp_new_i32();
- switch (sh) {
- case 1:
- gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s),
- issinfo);
- break;
- case 2:
- gen_aa32_ld8s_iss(s, tmp, addr, get_mem_index(s),
- issinfo);
- break;
- default:
- case 3:
- gen_aa32_ld16s_iss(s, tmp, addr, get_mem_index(s),
- issinfo);
- break;
- }
- } else {
- /* store */
- tmp = load_reg(s, rd);
- gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), issinfo);
- tcg_temp_free_i32(tmp);
- }
- /* Perform base writeback before the loaded value to
- ensure correct behavior with overlapping index registers.
- ldrd with base writeback is undefined if the
- destination and index registers overlap. */
- if (!pbit) {
- gen_add_datah_offset(s, insn, address_offset, addr);
- store_reg(s, rn, addr);
- } else if (wbit) {
- if (address_offset)
- tcg_gen_addi_i32(addr, addr, address_offset);
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- if (load) {
- /* Complete the load. */
- store_reg(s, rd, tmp);
- }
- }
- break;
- case 0x4:
- case 0x5:
- goto do_ldst;
- case 0x6:
- case 0x7:
- if (insn & (1 << 4)) {
- ARCH(6);
- /* Armv6 Media instructions. */
- rm = insn & 0xf;
- rn = (insn >> 16) & 0xf;
- rd = (insn >> 12) & 0xf;
- rs = (insn >> 8) & 0xf;
- switch ((insn >> 23) & 3) {
- case 0: /* Parallel add/subtract. */
- op1 = (insn >> 20) & 7;
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- sh = (insn >> 5) & 7;
- if ((op1 & 3) == 0 || sh == 5 || sh == 6)
- goto illegal_op;
- gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- break;
- case 1:
- if ((insn & 0x00700020) == 0) {
- /* Halfword pack. */
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- shift = (insn >> 7) & 0x1f;
- if (insn & (1 << 6)) {
- /* pkhtb */
- if (shift == 0)
- shift = 31;
- tcg_gen_sari_i32(tmp2, tmp2, shift);
- tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
- tcg_gen_ext16u_i32(tmp2, tmp2);
- } else {
- /* pkhbt */
- if (shift)
- tcg_gen_shli_i32(tmp2, tmp2, shift);
- tcg_gen_ext16u_i32(tmp, tmp);
- tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
- }
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- } else if ((insn & 0x00200020) == 0x00200000) {
- /* [us]sat */
- tmp = load_reg(s, rm);
- shift = (insn >> 7) & 0x1f;
- if (insn & (1 << 6)) {
- if (shift == 0)
- shift = 31;
- tcg_gen_sari_i32(tmp, tmp, shift);
- } else {
- tcg_gen_shli_i32(tmp, tmp, shift);
- }
- sh = (insn >> 16) & 0x1f;
- tmp2 = tcg_const_i32(sh);
- if (insn & (1 << 22))
- gen_helper_usat(tmp, cpu_env, tmp, tmp2);
- else
- gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- } else if ((insn & 0x00300fe0) == 0x00200f20) {
- /* [us]sat16 */
- tmp = load_reg(s, rm);
- sh = (insn >> 16) & 0x1f;
- tmp2 = tcg_const_i32(sh);
- if (insn & (1 << 22))
- gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
- else
- gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- } else if ((insn & 0x00700fe0) == 0x00000fa0) {
- /* Select bytes. */
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- tmp3 = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
- gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- } else if ((insn & 0x000003e0) == 0x00000060) {
- tmp = load_reg(s, rm);
- shift = (insn >> 10) & 3;
- /* ??? In many cases it's not necessary to do a
- rotate, a shift is sufficient. */
- if (shift != 0)
- tcg_gen_rotri_i32(tmp, tmp, shift * 8);
- op1 = (insn >> 20) & 7;
- switch (op1) {
- case 0: gen_sxtb16(tmp); break;
- case 2: gen_sxtb(tmp); break;
- case 3: gen_sxth(tmp); break;
- case 4: gen_uxtb16(tmp); break;
- case 6: gen_uxtb(tmp); break;
- case 7: gen_uxth(tmp); break;
- default: goto illegal_op;
- }
- if (rn != 15) {
- tmp2 = load_reg(s, rn);
- if ((op1 & 3) == 0) {
- gen_add16(tmp, tmp2);
- } else {
- tcg_gen_add_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- }
- store_reg(s, rd, tmp);
- } else if ((insn & 0x003f0f60) == 0x003f0f20) {
- /* rev */
- tmp = load_reg(s, rm);
- if (insn & (1 << 22)) {
- if (insn & (1 << 7)) {
- gen_revsh(tmp);
- } else {
- ARCH(6T2);
- gen_helper_rbit(tmp, tmp);
- }
- } else {
- if (insn & (1 << 7))
- gen_rev16(tmp);
- else
- tcg_gen_bswap32_i32(tmp, tmp);
- }
- store_reg(s, rd, tmp);
- } else {
- goto illegal_op;
- }
- break;
- case 2: /* Multiplies (Type 3). */
- switch ((insn >> 20) & 0x7) {
- case 5:
- if (((insn >> 6) ^ (insn >> 7)) & 1) {
- /* op2 not 00x or 11x : UNDEF */
- goto illegal_op;
- }
- /* Signed multiply most significant [accumulate].
- (SMMUL, SMMLA, SMMLS) */
- tmp = load_reg(s, rm);
- tmp2 = load_reg(s, rs);
- tmp64 = gen_muls_i64_i32(tmp, tmp2);
-
- if (rd != 15) {
- tmp = load_reg(s, rd);
- if (insn & (1 << 6)) {
- tmp64 = gen_subq_msw(tmp64, tmp);
- } else {
- tmp64 = gen_addq_msw(tmp64, tmp);
- }
- }
- if (insn & (1 << 5)) {
- tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
- }
- tcg_gen_shri_i64(tmp64, tmp64, 32);
- tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, tmp64);
- tcg_temp_free_i64(tmp64);
- store_reg(s, rn, tmp);
- break;
- case 0:
- case 4:
- /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
- if (insn & (1 << 7)) {
- goto illegal_op;
- }
- tmp = load_reg(s, rm);
- tmp2 = load_reg(s, rs);
- if (insn & (1 << 5))
- gen_swap_half(tmp2);
- gen_smul_dual(tmp, tmp2);
- if (insn & (1 << 22)) {
- /* smlald, smlsld */
- TCGv_i64 tmp64_2;
-
- tmp64 = tcg_temp_new_i64();
- tmp64_2 = tcg_temp_new_i64();
- tcg_gen_ext_i32_i64(tmp64, tmp);
- tcg_gen_ext_i32_i64(tmp64_2, tmp2);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp2);
- if (insn & (1 << 6)) {
- tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
- } else {
- tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
- }
- tcg_temp_free_i64(tmp64_2);
- gen_addq(s, tmp64, rd, rn);
- gen_storeq_reg(s, rd, rn, tmp64);
- tcg_temp_free_i64(tmp64);
- } else {
- /* smuad, smusd, smlad, smlsd */
- if (insn & (1 << 6)) {
- /* This subtraction cannot overflow. */
- tcg_gen_sub_i32(tmp, tmp, tmp2);
- } else {
- /* This addition cannot overflow 32 bits;
- * however it may overflow considered as a
- * signed operation, in which case we must set
- * the Q flag.
- */
- gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
- }
- tcg_temp_free_i32(tmp2);
- if (rd != 15)
- {
- tmp2 = load_reg(s, rd);
- gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- store_reg(s, rn, tmp);
- }
- break;
- case 1:
- case 3:
- /* SDIV, UDIV */
- if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) {
- goto illegal_op;
- }
- if (((insn >> 5) & 7) || (rd != 15)) {
- goto illegal_op;
- }
- tmp = load_reg(s, rm);
- tmp2 = load_reg(s, rs);
- if (insn & (1 << 21)) {
- gen_helper_udiv(tmp, tmp, tmp2);
- } else {
- gen_helper_sdiv(tmp, tmp, tmp2);
- }
- tcg_temp_free_i32(tmp2);
- store_reg(s, rn, tmp);
- break;
- default:
- goto illegal_op;
- }
- break;
- case 3:
- op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
- switch (op1) {
- case 0: /* Unsigned sum of absolute differences. */
- ARCH(6);
- tmp = load_reg(s, rm);
- tmp2 = load_reg(s, rs);
- gen_helper_usad8(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- if (rd != 15) {
- tmp2 = load_reg(s, rd);
- tcg_gen_add_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- store_reg(s, rn, tmp);
- break;
- case 0x20: case 0x24: case 0x28: case 0x2c:
- /* Bitfield insert/clear. */
- ARCH(6T2);
- shift = (insn >> 7) & 0x1f;
- i = (insn >> 16) & 0x1f;
- if (i < shift) {
- /* UNPREDICTABLE; we choose to UNDEF */
- goto illegal_op;
- }
- i = i + 1 - shift;
- if (rm == 15) {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- } else {
- tmp = load_reg(s, rm);
- }
- if (i != 32) {
- tmp2 = load_reg(s, rd);
- tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
- tcg_temp_free_i32(tmp2);
- }
- store_reg(s, rd, tmp);
- break;
- case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
- case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
- ARCH(6T2);
- tmp = load_reg(s, rm);
- shift = (insn >> 7) & 0x1f;
- i = ((insn >> 16) & 0x1f) + 1;
- if (shift + i > 32)
- goto illegal_op;
- if (i < 32) {
- if (op1 & 0x20) {
- tcg_gen_extract_i32(tmp, tmp, shift, i);
- } else {
- tcg_gen_sextract_i32(tmp, tmp, shift, i);
- }
- }
- store_reg(s, rd, tmp);
- break;
- default:
- goto illegal_op;
- }
- break;
- }
- break;
- }
- do_ldst:
- /* Check for undefined extension instructions
- * per the ARM Bible IE:
- * xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
- */
- sh = (0xf << 20) | (0xf << 4);
- if (op1 == 0x7 && ((insn & sh) == sh))
- {
- goto illegal_op;
- }
- /* load/store byte/word */
- rn = (insn >> 16) & 0xf;
- rd = (insn >> 12) & 0xf;
- tmp2 = load_reg(s, rn);
- if ((insn & 0x01200000) == 0x00200000) {
- /* ldrt/strt */
- i = get_a32_user_mem_index(s);
- } else {
- i = get_mem_index(s);
- }
- if (insn & (1 << 24))
- gen_add_data_offset(s, insn, tmp2);
- if (insn & (1 << 20)) {
- /* load */
- tmp = tcg_temp_new_i32();
- if (insn & (1 << 22)) {
- gen_aa32_ld8u_iss(s, tmp, tmp2, i, rd);
- } else {
- gen_aa32_ld32u_iss(s, tmp, tmp2, i, rd);
- }
- } else {
- /* store */
- tmp = load_reg(s, rd);
- if (insn & (1 << 22)) {
- gen_aa32_st8_iss(s, tmp, tmp2, i, rd);
- } else {
- gen_aa32_st32_iss(s, tmp, tmp2, i, rd);
- }
- tcg_temp_free_i32(tmp);
- }
- if (!(insn & (1 << 24))) {
- gen_add_data_offset(s, insn, tmp2);
- store_reg(s, rn, tmp2);
- } else if (insn & (1 << 21)) {
- store_reg(s, rn, tmp2);
- } else {
- tcg_temp_free_i32(tmp2);
- }
- if (insn & (1 << 20)) {
- /* Complete the load. */
- store_reg_from_load(s, rd, tmp);
- }
- break;
- case 0x08:
- case 0x09:
- {
- int j, n, loaded_base;
- bool exc_return = false;
- bool is_load = extract32(insn, 20, 1);
- bool user = false;
- TCGv_i32 loaded_var;
- /* load/store multiple words */
- /* XXX: store correct base if write back */
- if (insn & (1 << 22)) {
- /* LDM (user), LDM (exception return) and STM (user) */
- if (IS_USER(s))
- goto illegal_op; /* only usable in supervisor mode */
-
- if (is_load && extract32(insn, 15, 1)) {
- exc_return = true;
- } else {
- user = true;
- }
- }
- rn = (insn >> 16) & 0xf;
- addr = load_reg(s, rn);
-
- /* compute total size */
- loaded_base = 0;
- loaded_var = NULL;
- n = 0;
- for(i=0;i<16;i++) {
- if (insn & (1 << i))
- n++;
- }
- /* XXX: test invalid n == 0 case ? */
- if (insn & (1 << 23)) {
- if (insn & (1 << 24)) {
- /* pre increment */
- tcg_gen_addi_i32(addr, addr, 4);
- } else {
- /* post increment */
- }
- } else {
- if (insn & (1 << 24)) {
- /* pre decrement */
- tcg_gen_addi_i32(addr, addr, -(n * 4));
- } else {
- /* post decrement */
- if (n != 1)
- tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
- }
- }
- j = 0;
- for(i=0;i<16;i++) {
- if (insn & (1 << i)) {
- if (is_load) {
- /* load */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- if (user) {
- tmp2 = tcg_const_i32(i);
- gen_helper_set_user_reg(cpu_env, tmp2, tmp);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- } else if (i == rn) {
- loaded_var = tmp;
- loaded_base = 1;
- } else if (rn == 15 && exc_return) {
- store_pc_exc_ret(s, tmp);
- } else {
- store_reg_from_load(s, i, tmp);
- }
- } else {
- /* store */
- if (i == 15) {
- /* special case: r15 = PC + 8 */
- val = (long)s->pc + 4;
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, val);
- } else if (user) {
- tmp = tcg_temp_new_i32();
- tmp2 = tcg_const_i32(i);
- gen_helper_get_user_reg(tmp, cpu_env, tmp2);
- tcg_temp_free_i32(tmp2);
- } else {
- tmp = load_reg(s, i);
- }
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- j++;
- /* no need to add after the last transfer */
- if (j != n)
- tcg_gen_addi_i32(addr, addr, 4);
- }
- }
- if (insn & (1 << 21)) {
- /* write back */
- if (insn & (1 << 23)) {
- if (insn & (1 << 24)) {
- /* pre increment */
- } else {
- /* post increment */
- tcg_gen_addi_i32(addr, addr, 4);
- }
- } else {
- if (insn & (1 << 24)) {
- /* pre decrement */
- if (n != 1)
- tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
- } else {
- /* post decrement */
- tcg_gen_addi_i32(addr, addr, -(n * 4));
- }
- }
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- if (loaded_base) {
- store_reg(s, rn, loaded_var);
- }
- if (exc_return) {
- /* Restore CPSR from SPSR. */
- tmp = load_cpu_field(spsr);
- if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
- gen_io_start();
- }
- gen_helper_cpsr_write_eret(cpu_env, tmp);
- if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
- gen_io_end();
- }
- tcg_temp_free_i32(tmp);
- /* Must exit loop to check un-masked IRQs */
- s->base.is_jmp = DISAS_EXIT;
- }
- }
- break;
- case 0xa:
- case 0xb:
- {
- int32_t offset;
-
- /* branch (and link) */
- val = (int32_t)s->pc;
- if (insn & (1 << 24)) {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, val);
- store_reg(s, 14, tmp);
- }
- offset = sextract32(insn << 2, 0, 26);
- val += offset + 4;
- gen_jmp(s, val);
- }
- break;
- case 0xc:
- case 0xd:
- case 0xe:
- if (((insn >> 8) & 0xe) == 10) {
- /* VFP. */
- if (disas_vfp_insn(s, insn)) {
- goto illegal_op;
- }
- } else if (disas_coproc_insn(s, insn)) {
- /* Coprocessor. */
- goto illegal_op;
- }
- break;
- case 0xf:
- /* swi */
- gen_set_pc_im(s, s->pc);
- s->svc_imm = extract32(insn, 0, 24);
- s->base.is_jmp = DISAS_SWI;
- break;
- default:
- illegal_op:
- gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
- default_exception_el(s));
- break;
- }
- }
-}
-
-static bool thumb_insn_is_16bit(DisasContext *s, uint32_t insn)
-{
- /* Return true if this is a 16 bit instruction. We must be precise
- * about this (matching the decode). We assume that s->pc still
- * points to the first 16 bits of the insn.
- */
- if ((insn >> 11) < 0x1d) {
- /* Definitely a 16-bit instruction */
- return true;
- }
-
- /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
- * first half of a 32-bit Thumb insn. Thumb-1 cores might
- * end up actually treating this as two 16-bit insns, though,
- * if it's half of a bl/blx pair that might span a page boundary.
- */
- if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
- arm_dc_feature(s, ARM_FEATURE_M)) {
- /* Thumb2 cores (including all M profile ones) always treat
- * 32-bit insns as 32-bit.
- */
- return false;
- }
-
- if ((insn >> 11) == 0x1e && s->pc - s->page_start < TARGET_PAGE_SIZE - 3) {
- /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
- * is not on the next page; we merge this into a 32-bit
- * insn.
- */
- return false;
- }
- /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
- * 0b1111_1xxx_xxxx_xxxx : BL suffix;
- * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
- * -- handle as single 16 bit insn
- */
- return true;
-}
-
-/* Return true if this is a Thumb-2 logical op. */
-static int
-thumb2_logic_op(int op)
-{
- return (op < 8);
-}
-
-/* Generate code for a Thumb-2 data processing operation. If CONDS is nonzero
- then set condition code flags based on the result of the operation.
- If SHIFTER_OUT is nonzero then set the carry flag for logical operations
- to the high bit of T1.
- Returns zero if the opcode is valid. */
-
-static int
-gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out,
- TCGv_i32 t0, TCGv_i32 t1)
-{
- int logic_cc;
-
- logic_cc = 0;
- switch (op) {
- case 0: /* and */
- tcg_gen_and_i32(t0, t0, t1);
- logic_cc = conds;
- break;
- case 1: /* bic */
- tcg_gen_andc_i32(t0, t0, t1);
- logic_cc = conds;
- break;
- case 2: /* orr */
- tcg_gen_or_i32(t0, t0, t1);
- logic_cc = conds;
- break;
- case 3: /* orn */
- tcg_gen_orc_i32(t0, t0, t1);
- logic_cc = conds;
- break;
- case 4: /* eor */
- tcg_gen_xor_i32(t0, t0, t1);
- logic_cc = conds;
- break;
- case 8: /* add */
- if (conds)
- gen_add_CC(t0, t0, t1);
- else
- tcg_gen_add_i32(t0, t0, t1);
- break;
- case 10: /* adc */
- if (conds)
- gen_adc_CC(t0, t0, t1);
- else
- gen_adc(t0, t1);
- break;
- case 11: /* sbc */
- if (conds) {
- gen_sbc_CC(t0, t0, t1);
- } else {
- gen_sub_carry(t0, t0, t1);
- }
- break;
- case 13: /* sub */
- if (conds)
- gen_sub_CC(t0, t0, t1);
- else
- tcg_gen_sub_i32(t0, t0, t1);
- break;
- case 14: /* rsb */
- if (conds)
- gen_sub_CC(t0, t1, t0);
- else
- tcg_gen_sub_i32(t0, t1, t0);
- break;
- default: /* 5, 6, 7, 9, 12, 15. */
- return 1;
- }
- if (logic_cc) {
- gen_logic_CC(t0);
- if (shifter_out)
- gen_set_CF_bit31(t1);
- }
- return 0;
-}
-
-/* Translate a 32-bit thumb instruction. */
-static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
-{
- uint32_t imm, shift, offset;
- uint32_t rd, rn, rm, rs;
- TCGv_i32 tmp;
- TCGv_i32 tmp2;
- TCGv_i32 tmp3;
- TCGv_i32 addr;
- TCGv_i64 tmp64;
- int op;
- int shiftop;
- int conds;
- int logic_cc;
-
- /*
- * ARMv6-M supports a limited subset of Thumb2 instructions.
- * Other Thumb1 architectures allow only 32-bit
- * combined BL/BLX prefix and suffix.
- */
- if (arm_dc_feature(s, ARM_FEATURE_M) &&
- !arm_dc_feature(s, ARM_FEATURE_V7)) {
- int i;
- bool found = false;
- static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
- 0xf3b08040 /* dsb */,
- 0xf3b08050 /* dmb */,
- 0xf3b08060 /* isb */,
- 0xf3e08000 /* mrs */,
- 0xf000d000 /* bl */};
- static const uint32_t armv6m_mask[] = {0xffe0d000,
- 0xfff0d0f0,
- 0xfff0d0f0,
- 0xfff0d0f0,
- 0xffe0d000,
- 0xf800d000};
-
- for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
- if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
- found = true;
- break;
- }
- }
- if (!found) {
- goto illegal_op;
- }
- } else if ((insn & 0xf800e800) != 0xf000e800) {
- ARCH(6T2);
- }
-
- rn = (insn >> 16) & 0xf;
- rs = (insn >> 12) & 0xf;
- rd = (insn >> 8) & 0xf;
- rm = insn & 0xf;
- switch ((insn >> 25) & 0xf) {
- case 0: case 1: case 2: case 3:
- /* 16-bit instructions. Should never happen. */
- abort();
- case 4:
- if (insn & (1 << 22)) {
- /* 0b1110_100x_x1xx_xxxx_xxxx_xxxx_xxxx_xxxx
- * - load/store doubleword, load/store exclusive, ldacq/strel,
- * table branch, TT.
- */
- if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_M) &&
- arm_dc_feature(s, ARM_FEATURE_V8)) {
- /* 0b1110_1001_0111_1111_1110_1001_0111_111
- * - SG (v8M only)
- * The bulk of the behaviour for this instruction is implemented
- * in v7m_handle_execute_nsc(), which deals with the insn when
- * it is executed by a CPU in non-secure state from memory
- * which is Secure & NonSecure-Callable.
- * Here we only need to handle the remaining cases:
- * * in NS memory (including the "security extension not
- * implemented" case) : NOP
- * * in S memory but CPU already secure (clear IT bits)
- * We know that the attribute for the memory this insn is
- * in must match the current CPU state, because otherwise
- * get_phys_addr_pmsav8 would have generated an exception.
- */
- if (s->v8m_secure) {
- /* Like the IT insn, we don't need to generate any code */
- s->condexec_cond = 0;
- s->condexec_mask = 0;
- }
- } else if (insn & 0x01200000) {
- /* 0b1110_1000_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
- * - load/store dual (post-indexed)
- * 0b1111_1001_x10x_xxxx_xxxx_xxxx_xxxx_xxxx
- * - load/store dual (literal and immediate)
- * 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
- * - load/store dual (pre-indexed)
- */
- bool wback = extract32(insn, 21, 1);
-
- if (rn == 15) {
- if (insn & (1 << 21)) {
- /* UNPREDICTABLE */
- goto illegal_op;
- }
- addr = tcg_temp_new_i32();
- tcg_gen_movi_i32(addr, s->pc & ~3);
- } else {
- addr = load_reg(s, rn);
- }
- offset = (insn & 0xff) * 4;
- if ((insn & (1 << 23)) == 0) {
- offset = -offset;
- }
-
- if (s->v8m_stackcheck && rn == 13 && wback) {
- /*
- * Here 'addr' is the current SP; if offset is +ve we're
- * moving SP up, else down. It is UNKNOWN whether the limit
- * check triggers when SP starts below the limit and ends
- * up above it; check whichever of the current and final
- * SP is lower, so QEMU will trigger in that situation.
- */
- if ((int32_t)offset < 0) {
- TCGv_i32 newsp = tcg_temp_new_i32();
-
- tcg_gen_addi_i32(newsp, addr, offset);
- gen_helper_v8m_stackcheck(cpu_env, newsp);
- tcg_temp_free_i32(newsp);
- } else {
- gen_helper_v8m_stackcheck(cpu_env, addr);
- }
- }
-
- if (insn & (1 << 24)) {
- tcg_gen_addi_i32(addr, addr, offset);
- offset = 0;
- }
- if (insn & (1 << 20)) {
- /* ldrd */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- store_reg(s, rs, tmp);
- tcg_gen_addi_i32(addr, addr, 4);
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- store_reg(s, rd, tmp);
- } else {
- /* strd */
- tmp = load_reg(s, rs);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- tcg_gen_addi_i32(addr, addr, 4);
- tmp = load_reg(s, rd);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- if (wback) {
- /* Base writeback. */
- tcg_gen_addi_i32(addr, addr, offset - 4);
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- } else if ((insn & (1 << 23)) == 0) {
- /* 0b1110_1000_010x_xxxx_xxxx_xxxx_xxxx_xxxx
- * - load/store exclusive word
- * - TT (v8M only)
- */
- if (rs == 15) {
- if (!(insn & (1 << 20)) &&
- arm_dc_feature(s, ARM_FEATURE_M) &&
- arm_dc_feature(s, ARM_FEATURE_V8)) {
- /* 0b1110_1000_0100_xxxx_1111_xxxx_xxxx_xxxx
- * - TT (v8M only)
- */
- bool alt = insn & (1 << 7);
- TCGv_i32 addr, op, ttresp;
-
- if ((insn & 0x3f) || rd == 13 || rd == 15 || rn == 15) {
- /* we UNDEF for these UNPREDICTABLE cases */
- goto illegal_op;
- }
-
- if (alt && !s->v8m_secure) {
- goto illegal_op;
- }
-
- addr = load_reg(s, rn);
- op = tcg_const_i32(extract32(insn, 6, 2));
- ttresp = tcg_temp_new_i32();
- gen_helper_v7m_tt(ttresp, cpu_env, addr, op);
- tcg_temp_free_i32(addr);
- tcg_temp_free_i32(op);
- store_reg(s, rd, ttresp);
- break;
- }
- goto illegal_op;
- }
- addr = tcg_temp_local_new_i32();
- load_reg_var(s, addr, rn);
- tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
- if (insn & (1 << 20)) {
- gen_load_exclusive(s, rs, 15, addr, 2);
- } else {
- gen_store_exclusive(s, rd, rs, 15, addr, 2);
- }
- tcg_temp_free_i32(addr);
- } else if ((insn & (7 << 5)) == 0) {
- /* Table Branch. */
- if (rn == 15) {
- addr = tcg_temp_new_i32();
- tcg_gen_movi_i32(addr, s->pc);
- } else {
- addr = load_reg(s, rn);
- }
- tmp = load_reg(s, rm);
- tcg_gen_add_i32(addr, addr, tmp);
- if (insn & (1 << 4)) {
- /* tbh */
- tcg_gen_add_i32(addr, addr, tmp);
- tcg_temp_free_i32(tmp);
- tmp = tcg_temp_new_i32();
- gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- } else { /* tbb */
- tcg_temp_free_i32(tmp);
- tmp = tcg_temp_new_i32();
- gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
- }
- tcg_temp_free_i32(addr);
- tcg_gen_shli_i32(tmp, tmp, 1);
- tcg_gen_addi_i32(tmp, tmp, s->pc);
- store_reg(s, 15, tmp);
- } else {
- int op2 = (insn >> 6) & 0x3;
- op = (insn >> 4) & 0x3;
- switch (op2) {
- case 0:
- goto illegal_op;
- case 1:
- /* Load/store exclusive byte/halfword/doubleword */
- if (op == 2) {
- goto illegal_op;
- }
- ARCH(7);
- break;
- case 2:
- /* Load-acquire/store-release */
- if (op == 3) {
- goto illegal_op;
- }
- /* Fall through */
- case 3:
- /* Load-acquire/store-release exclusive */
- ARCH(8);
- break;
- }
- addr = tcg_temp_local_new_i32();
- load_reg_var(s, addr, rn);
- if (!(op2 & 1)) {
- if (insn & (1 << 20)) {
- tmp = tcg_temp_new_i32();
- switch (op) {
- case 0: /* ldab */
- gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s),
- rs | ISSIsAcqRel);
- break;
- case 1: /* ldah */
- gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s),
- rs | ISSIsAcqRel);
- break;
- case 2: /* lda */
- gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s),
- rs | ISSIsAcqRel);
- break;
- default:
- abort();
- }
- store_reg(s, rs, tmp);
- } else {
- tmp = load_reg(s, rs);
- switch (op) {
- case 0: /* stlb */
- gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s),
- rs | ISSIsAcqRel);
- break;
- case 1: /* stlh */
- gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s),
- rs | ISSIsAcqRel);
- break;
- case 2: /* stl */
- gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s),
- rs | ISSIsAcqRel);
- break;
- default:
- abort();
- }
- tcg_temp_free_i32(tmp);
- }
- } else if (insn & (1 << 20)) {
- gen_load_exclusive(s, rs, rd, addr, op);
- } else {
- gen_store_exclusive(s, rm, rs, rd, addr, op);
- }
- tcg_temp_free_i32(addr);
- }
- } else {
- /* Load/store multiple, RFE, SRS. */
- if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
- /* RFE, SRS: not available in user mode or on M profile */
- if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
- goto illegal_op;
- }
- if (insn & (1 << 20)) {
- /* rfe */
- addr = load_reg(s, rn);
- if ((insn & (1 << 24)) == 0)
- tcg_gen_addi_i32(addr, addr, -8);
- /* Load PC into tmp and CPSR into tmp2. */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- tcg_gen_addi_i32(addr, addr, 4);
- tmp2 = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
- if (insn & (1 << 21)) {
- /* Base writeback. */
- if (insn & (1 << 24)) {
- tcg_gen_addi_i32(addr, addr, 4);
- } else {
- tcg_gen_addi_i32(addr, addr, -4);
- }
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- gen_rfe(s, tmp, tmp2);
- } else {
- /* srs */
- gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2,
- insn & (1 << 21));
- }
- } else {
- int i, loaded_base = 0;
- TCGv_i32 loaded_var;
- bool wback = extract32(insn, 21, 1);
- /* Load/store multiple. */
- addr = load_reg(s, rn);
- offset = 0;
- for (i = 0; i < 16; i++) {
- if (insn & (1 << i))
- offset += 4;
- }
-
- if (insn & (1 << 24)) {
- tcg_gen_addi_i32(addr, addr, -offset);
- }
-
- if (s->v8m_stackcheck && rn == 13 && wback) {
- /*
- * If the writeback is incrementing SP rather than
- * decrementing it, and the initial SP is below the
- * stack limit but the final written-back SP would
- * be above, then then we must not perform any memory
- * accesses, but it is IMPDEF whether we generate
- * an exception. We choose to do so in this case.
- * At this point 'addr' is the lowest address, so
- * either the original SP (if incrementing) or our
- * final SP (if decrementing), so that's what we check.
- */
- gen_helper_v8m_stackcheck(cpu_env, addr);
- }
-
- loaded_var = NULL;
- for (i = 0; i < 16; i++) {
- if ((insn & (1 << i)) == 0)
- continue;
- if (insn & (1 << 20)) {
- /* Load. */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- if (i == 15) {
- gen_bx_excret(s, tmp);
- } else if (i == rn) {
- loaded_var = tmp;
- loaded_base = 1;
- } else {
- store_reg(s, i, tmp);
- }
- } else {
- /* Store. */
- tmp = load_reg(s, i);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- tcg_gen_addi_i32(addr, addr, 4);
- }
- if (loaded_base) {
- store_reg(s, rn, loaded_var);
- }
- if (wback) {
- /* Base register writeback. */
- if (insn & (1 << 24)) {
- tcg_gen_addi_i32(addr, addr, -offset);
- }
- /* Fault if writeback register is in register list. */
- if (insn & (1 << rn))
- goto illegal_op;
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- }
- }
- break;
- case 5:
-
- op = (insn >> 21) & 0xf;
- if (op == 6) {
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- goto illegal_op;
- }
- /* Halfword pack. */
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
- if (insn & (1 << 5)) {
- /* pkhtb */
- if (shift == 0)
- shift = 31;
- tcg_gen_sari_i32(tmp2, tmp2, shift);
- tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
- tcg_gen_ext16u_i32(tmp2, tmp2);
- } else {
- /* pkhbt */
- if (shift)
- tcg_gen_shli_i32(tmp2, tmp2, shift);
- tcg_gen_ext16u_i32(tmp, tmp);
- tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
- }
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- } else {
- /* Data processing register constant shift. */
- if (rn == 15) {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- } else {
- tmp = load_reg(s, rn);
- }
- tmp2 = load_reg(s, rm);
-
- shiftop = (insn >> 4) & 3;
- shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
- conds = (insn & (1 << 20)) != 0;
- logic_cc = (conds && thumb2_logic_op(op));
- gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
- if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
- goto illegal_op;
- tcg_temp_free_i32(tmp2);
- if (rd == 13 &&
- ((op == 2 && rn == 15) ||
- (op == 8 && rn == 13) ||
- (op == 13 && rn == 13))) {
- /* MOV SP, ... or ADD SP, SP, ... or SUB SP, SP, ... */
- store_sp_checked(s, tmp);
- } else if (rd != 15) {
- store_reg(s, rd, tmp);
- } else {
- tcg_temp_free_i32(tmp);
- }
- }
- break;
- case 13: /* Misc data processing. */
- op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
- if (op < 4 && (insn & 0xf000) != 0xf000)
- goto illegal_op;
- switch (op) {
- case 0: /* Register controlled shift. */
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- if ((insn & 0x70) != 0)
- goto illegal_op;
- /*
- * 0b1111_1010_0xxx_xxxx_1111_xxxx_0000_xxxx:
- * - MOV, MOVS (register-shifted register), flagsetting
- */
- op = (insn >> 21) & 3;
- logic_cc = (insn & (1 << 20)) != 0;
- gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
- if (logic_cc)
- gen_logic_CC(tmp);
- store_reg(s, rd, tmp);
- break;
- case 1: /* Sign/zero extend. */
- op = (insn >> 20) & 7;
- switch (op) {
- case 0: /* SXTAH, SXTH */
- case 1: /* UXTAH, UXTH */
- case 4: /* SXTAB, SXTB */
- case 5: /* UXTAB, UXTB */
- break;
- case 2: /* SXTAB16, SXTB16 */
- case 3: /* UXTAB16, UXTB16 */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- goto illegal_op;
- }
- break;
- default:
- goto illegal_op;
- }
- if (rn != 15) {
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- goto illegal_op;
- }
- }
- tmp = load_reg(s, rm);
- shift = (insn >> 4) & 3;
- /* ??? In many cases it's not necessary to do a
- rotate, a shift is sufficient. */
- if (shift != 0)
- tcg_gen_rotri_i32(tmp, tmp, shift * 8);
- op = (insn >> 20) & 7;
- switch (op) {
- case 0: gen_sxth(tmp); break;
- case 1: gen_uxth(tmp); break;
- case 2: gen_sxtb16(tmp); break;
- case 3: gen_uxtb16(tmp); break;
- case 4: gen_sxtb(tmp); break;
- case 5: gen_uxtb(tmp); break;
- default:
- g_assert_not_reached();
- }
- if (rn != 15) {
- tmp2 = load_reg(s, rn);
- if ((op >> 1) == 1) {
- gen_add16(tmp, tmp2);
- } else {
- tcg_gen_add_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- }
- store_reg(s, rd, tmp);
- break;
- case 2: /* SIMD add/subtract. */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- goto illegal_op;
- }
- op = (insn >> 20) & 7;
- shift = (insn >> 4) & 7;
- if ((op & 3) == 3 || (shift & 3) == 3)
- goto illegal_op;
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- break;
- case 3: /* Other data processing. */
- op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
- if (op < 4) {
- /* Saturating add/subtract. */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- goto illegal_op;
- }
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- if (op & 1)
- gen_helper_double_saturate(tmp, cpu_env, tmp);
- if (op & 2)
- gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
- else
- gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- } else {
- switch (op) {
- case 0x0a: /* rbit */
- case 0x08: /* rev */
- case 0x09: /* rev16 */
- case 0x0b: /* revsh */
- case 0x18: /* clz */
- break;
- case 0x10: /* sel */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- goto illegal_op;
- }
- break;
- case 0x20: /* crc32/crc32c */
- case 0x21:
- case 0x22:
- case 0x28:
- case 0x29:
- case 0x2a:
- if (!arm_dc_feature(s, ARM_FEATURE_CRC)) {
- goto illegal_op;
- }
- break;
- default:
- goto illegal_op;
- }
- tmp = load_reg(s, rn);
- switch (op) {
- case 0x0a: /* rbit */
- gen_helper_rbit(tmp, tmp);
- break;
- case 0x08: /* rev */
- tcg_gen_bswap32_i32(tmp, tmp);
- break;
- case 0x09: /* rev16 */
- gen_rev16(tmp);
- break;
- case 0x0b: /* revsh */
- gen_revsh(tmp);
- break;
- case 0x10: /* sel */
- tmp2 = load_reg(s, rm);
- tmp3 = tcg_temp_new_i32();
- tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
- gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_i32(tmp2);
- break;
- case 0x18: /* clz */
- tcg_gen_clzi_i32(tmp, tmp, 32);
- break;
- case 0x20:
- case 0x21:
- case 0x22:
- case 0x28:
- case 0x29:
- case 0x2a:
- {
- /* crc32/crc32c */
- uint32_t sz = op & 0x3;
- uint32_t c = op & 0x8;
-
- tmp2 = load_reg(s, rm);
- if (sz == 0) {
- tcg_gen_andi_i32(tmp2, tmp2, 0xff);
- } else if (sz == 1) {
- tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
- }
- tmp3 = tcg_const_i32(1 << sz);
- if (c) {
- gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
- } else {
- gen_helper_crc32(tmp, tmp, tmp2, tmp3);
- }
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp3);
- break;
- }
- default:
- g_assert_not_reached();
- }
- }
- store_reg(s, rd, tmp);
- break;
- case 4: case 5: /* 32-bit multiply. Sum of absolute differences. */
- switch ((insn >> 20) & 7) {
- case 0: /* 32 x 32 -> 32 */
- case 7: /* Unsigned sum of absolute differences. */
- break;
- case 1: /* 16 x 16 -> 32 */
- case 2: /* Dual multiply add. */
- case 3: /* 32 * 16 -> 32msb */
- case 4: /* Dual multiply subtract. */
- case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- goto illegal_op;
- }
- break;
- }
- op = (insn >> 4) & 0xf;
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- switch ((insn >> 20) & 7) {
- case 0: /* 32 x 32 -> 32 */
- tcg_gen_mul_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- if (rs != 15) {
- tmp2 = load_reg(s, rs);
- if (op)
- tcg_gen_sub_i32(tmp, tmp2, tmp);
- else
- tcg_gen_add_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- break;
- case 1: /* 16 x 16 -> 32 */
- gen_mulxy(tmp, tmp2, op & 2, op & 1);
- tcg_temp_free_i32(tmp2);
- if (rs != 15) {
- tmp2 = load_reg(s, rs);
- gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- break;
- case 2: /* Dual multiply add. */
- case 4: /* Dual multiply subtract. */
- if (op)
- gen_swap_half(tmp2);
- gen_smul_dual(tmp, tmp2);
- if (insn & (1 << 22)) {
- /* This subtraction cannot overflow. */
- tcg_gen_sub_i32(tmp, tmp, tmp2);
- } else {
- /* This addition cannot overflow 32 bits;
- * however it may overflow considered as a signed
- * operation, in which case we must set the Q flag.
- */
- gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
- }
- tcg_temp_free_i32(tmp2);
- if (rs != 15)
- {
- tmp2 = load_reg(s, rs);
- gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- break;
- case 3: /* 32 * 16 -> 32msb */
- if (op)
- tcg_gen_sari_i32(tmp2, tmp2, 16);
- else
- gen_sxth(tmp2);
- tmp64 = gen_muls_i64_i32(tmp, tmp2);
- tcg_gen_shri_i64(tmp64, tmp64, 16);
- tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, tmp64);
- tcg_temp_free_i64(tmp64);
- if (rs != 15)
- {
- tmp2 = load_reg(s, rs);
- gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- break;
- case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
- tmp64 = gen_muls_i64_i32(tmp, tmp2);
- if (rs != 15) {
- tmp = load_reg(s, rs);
- if (insn & (1 << 20)) {
- tmp64 = gen_addq_msw(tmp64, tmp);
- } else {
- tmp64 = gen_subq_msw(tmp64, tmp);
- }
- }
- if (insn & (1 << 4)) {
- tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
- }
- tcg_gen_shri_i64(tmp64, tmp64, 32);
- tmp = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(tmp, tmp64);
- tcg_temp_free_i64(tmp64);
- break;
- case 7: /* Unsigned sum of absolute differences. */
- gen_helper_usad8(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- if (rs != 15) {
- tmp2 = load_reg(s, rs);
- tcg_gen_add_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- break;
- }
- store_reg(s, rd, tmp);
- break;
- case 6: case 7: /* 64-bit multiply, Divide. */
- op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
- tmp = load_reg(s, rn);
- tmp2 = load_reg(s, rm);
- if ((op & 0x50) == 0x10) {
- /* sdiv, udiv */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) {
- goto illegal_op;
- }
- if (op & 0x20)
- gen_helper_udiv(tmp, tmp, tmp2);
- else
- gen_helper_sdiv(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- } else if ((op & 0xe) == 0xc) {
- /* Dual multiply accumulate long. */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp2);
- goto illegal_op;
- }
- if (op & 1)
- gen_swap_half(tmp2);
- gen_smul_dual(tmp, tmp2);
- if (op & 0x10) {
- tcg_gen_sub_i32(tmp, tmp, tmp2);
- } else {
- tcg_gen_add_i32(tmp, tmp, tmp2);
- }
- tcg_temp_free_i32(tmp2);
- /* BUGFIX */
- tmp64 = tcg_temp_new_i64();
- tcg_gen_ext_i32_i64(tmp64, tmp);
- tcg_temp_free_i32(tmp);
- gen_addq(s, tmp64, rs, rd);
- gen_storeq_reg(s, rs, rd, tmp64);
- tcg_temp_free_i64(tmp64);
- } else {
- if (op & 0x20) {
- /* Unsigned 64-bit multiply */
- tmp64 = gen_mulu_i64_i32(tmp, tmp2);
- } else {
- if (op & 8) {
- /* smlalxy */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- goto illegal_op;
- }
- gen_mulxy(tmp, tmp2, op & 2, op & 1);
- tcg_temp_free_i32(tmp2);
- tmp64 = tcg_temp_new_i64();
- tcg_gen_ext_i32_i64(tmp64, tmp);
- tcg_temp_free_i32(tmp);
- } else {
- /* Signed 64-bit multiply */
- tmp64 = gen_muls_i64_i32(tmp, tmp2);
- }
- }
- if (op & 4) {
- /* umaal */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- tcg_temp_free_i64(tmp64);
- goto illegal_op;
- }
- gen_addq_lo(s, tmp64, rs);
- gen_addq_lo(s, tmp64, rd);
- } else if (op & 0x40) {
- /* 64-bit accumulate. */
- gen_addq(s, tmp64, rs, rd);
- }
- gen_storeq_reg(s, rs, rd, tmp64);
- tcg_temp_free_i64(tmp64);
- }
- break;
- }
- break;
- case 6: case 7: case 14: case 15:
- /* Coprocessor. */
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
- /* We don't currently implement M profile FP support,
- * so this entire space should give a NOCP fault, with
- * the exception of the v8M VLLDM and VLSTM insns, which
- * must be NOPs in Secure state and UNDEF in Nonsecure state.
- */
- if (arm_dc_feature(s, ARM_FEATURE_V8) &&
- (insn & 0xffa00f00) == 0xec200a00) {
- /* 0b1110_1100_0x1x_xxxx_xxxx_1010_xxxx_xxxx
- * - VLLDM, VLSTM
- * We choose to UNDEF if the RAZ bits are non-zero.
- */
- if (!s->v8m_secure || (insn & 0x0040f0ff)) {
- goto illegal_op;
- }
- /* Just NOP since FP support is not implemented */
- break;
- }
- /* All other insns: NOCP */
- gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
- default_exception_el(s));
- break;
- }
- if ((insn & 0xfe000a00) == 0xfc000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- /* The Thumb2 and ARM encodings are identical. */
- if (disas_neon_insn_3same_ext(s, insn)) {
- goto illegal_op;
- }
- } else if ((insn & 0xff000a00) == 0xfe000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- /* The Thumb2 and ARM encodings are identical. */
- if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
- goto illegal_op;
- }
- } else if (((insn >> 24) & 3) == 3) {
- /* Translate into the equivalent ARM encoding. */
- insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
- if (disas_neon_data_insn(s, insn)) {
- goto illegal_op;
- }
- } else if (((insn >> 8) & 0xe) == 10) {
- if (disas_vfp_insn(s, insn)) {
- goto illegal_op;
- }
- } else {
- if (insn & (1 << 28))
- goto illegal_op;
- if (disas_coproc_insn(s, insn)) {
- goto illegal_op;
- }
- }
- break;
- case 8: case 9: case 10: case 11:
- if (insn & (1 << 15)) {
- /* Branches, misc control. */
- if (insn & 0x5000) {
- /* Unconditional branch. */
- /* signextend(hw1[10:0]) -> offset[:12]. */
- offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
- /* hw1[10:0] -> offset[11:1]. */
- offset |= (insn & 0x7ff) << 1;
- /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
- offset[24:22] already have the same value because of the
- sign extension above. */
- offset ^= ((~insn) & (1 << 13)) << 10;
- offset ^= ((~insn) & (1 << 11)) << 11;
-
- if (insn & (1 << 14)) {
- /* Branch and link. */
- tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
- }
-
- offset += s->pc;
- if (insn & (1 << 12)) {
- /* b/bl */
- gen_jmp(s, offset);
- } else {
- /* blx */
- offset &= ~(uint32_t)2;
- /* thumb2 bx, no need to check */
- gen_bx_im(s, offset);
- }
- } else if (((insn >> 23) & 7) == 7) {
- /* Misc control */
- if (insn & (1 << 13))
- goto illegal_op;
-
- if (insn & (1 << 26)) {
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
- goto illegal_op;
- }
- if (!(insn & (1 << 20))) {
- /* Hypervisor call (v7) */
- int imm16 = extract32(insn, 16, 4) << 12
- | extract32(insn, 0, 12);
- ARCH(7);
- if (IS_USER(s)) {
- goto illegal_op;
- }
- gen_hvc(s, imm16);
- } else {
- /* Secure monitor call (v6+) */
- ARCH(6K);
- if (IS_USER(s)) {
- goto illegal_op;
- }
- gen_smc(s);
- }
- } else {
- op = (insn >> 20) & 7;
- switch (op) {
- case 0: /* msr cpsr. */
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
- tmp = load_reg(s, rn);
- /* the constant is the mask and SYSm fields */
- addr = tcg_const_i32(insn & 0xfff);
- gen_helper_v7m_msr(cpu_env, addr, tmp);
- tcg_temp_free_i32(addr);
- tcg_temp_free_i32(tmp);
- gen_lookup_tb(s);
- break;
- }
- /* fall through */
- case 1: /* msr spsr. */
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
- goto illegal_op;
- }
-
- if (extract32(insn, 5, 1)) {
- /* MSR (banked) */
- int sysm = extract32(insn, 8, 4) |
- (extract32(insn, 4, 1) << 4);
- int r = op & 1;
-
- gen_msr_banked(s, r, sysm, rm);
- break;
- }
-
- /* MSR (for PSRs) */
- tmp = load_reg(s, rn);
- if (gen_set_psr(s,
- msr_mask(s, (insn >> 8) & 0xf, op == 1),
- op == 1, tmp))
- goto illegal_op;
- break;
- case 2: /* cps, nop-hint. */
- if (((insn >> 8) & 7) == 0) {
- gen_nop_hint(s, insn & 0xff);
- }
- /* Implemented as NOP in user mode. */
- if (IS_USER(s))
- break;
- offset = 0;
- imm = 0;
- if (insn & (1 << 10)) {
- if (insn & (1 << 7))
- offset |= CPSR_A;
- if (insn & (1 << 6))
- offset |= CPSR_I;
- if (insn & (1 << 5))
- offset |= CPSR_F;
- if (insn & (1 << 9))
- imm = CPSR_A | CPSR_I | CPSR_F;
- }
- if (insn & (1 << 8)) {
- offset |= 0x1f;
- imm |= (insn & 0x1f);
- }
- if (offset) {
- gen_set_psr_im(s, offset, 0, imm);
- }
- break;
- case 3: /* Special control operations. */
- if (!arm_dc_feature(s, ARM_FEATURE_V7) &&
- !arm_dc_feature(s, ARM_FEATURE_M)) {
- goto illegal_op;
- }
- op = (insn >> 4) & 0xf;
- switch (op) {
- case 2: /* clrex */
- gen_clrex(s);
- break;
- case 4: /* dsb */
- case 5: /* dmb */
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
- break;
- case 6: /* isb */
- /* We need to break the TB after this insn
- * to execute self-modifying code correctly
- * and also to take any pending interrupts
- * immediately.
- */
- gen_goto_tb(s, 0, s->pc & ~1);
- break;
- default:
- goto illegal_op;
- }
- break;
- case 4: /* bxj */
- /* Trivial implementation equivalent to bx.
- * This instruction doesn't exist at all for M-profile.
- */
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
- goto illegal_op;
- }
- tmp = load_reg(s, rn);
- gen_bx(s, tmp);
- break;
- case 5: /* Exception return. */
- if (IS_USER(s)) {
- goto illegal_op;
- }
- if (rn != 14 || rd != 15) {
- goto illegal_op;
- }
- if (s->current_el == 2) {
- /* ERET from Hyp uses ELR_Hyp, not LR */
- if (insn & 0xff) {
- goto illegal_op;
- }
- tmp = load_cpu_field(elr_el[2]);
- } else {
- tmp = load_reg(s, rn);
- tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
- }
- gen_exception_return(s, tmp);
- break;
- case 6: /* MRS */
- if (extract32(insn, 5, 1) &&
- !arm_dc_feature(s, ARM_FEATURE_M)) {
- /* MRS (banked) */
- int sysm = extract32(insn, 16, 4) |
- (extract32(insn, 4, 1) << 4);
-
- gen_mrs_banked(s, 0, sysm, rd);
- break;
- }
-
- if (extract32(insn, 16, 4) != 0xf) {
- goto illegal_op;
- }
- if (!arm_dc_feature(s, ARM_FEATURE_M) &&
- extract32(insn, 0, 8) != 0) {
- goto illegal_op;
- }
-
- /* mrs cpsr */
- tmp = tcg_temp_new_i32();
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
- addr = tcg_const_i32(insn & 0xff);
- gen_helper_v7m_mrs(tmp, cpu_env, addr);
- tcg_temp_free_i32(addr);
- } else {
- gen_helper_cpsr_read(tmp, cpu_env);
- }
- store_reg(s, rd, tmp);
- break;
- case 7: /* MRS */
- if (extract32(insn, 5, 1) &&
- !arm_dc_feature(s, ARM_FEATURE_M)) {
- /* MRS (banked) */
- int sysm = extract32(insn, 16, 4) |
- (extract32(insn, 4, 1) << 4);
-
- gen_mrs_banked(s, 1, sysm, rd);
- break;
- }
-
- /* mrs spsr. */
- /* Not accessible in user mode. */
- if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
- goto illegal_op;
- }
-
- if (extract32(insn, 16, 4) != 0xf ||
- extract32(insn, 0, 8) != 0) {
- goto illegal_op;
- }
-
- tmp = load_cpu_field(spsr);
- store_reg(s, rd, tmp);
- break;
- }
- }
- } else {
- /* Conditional branch. */
- op = (insn >> 22) & 0xf;
- /* Generate a conditional jump to next instruction. */
- arm_skip_unless(s, op);
-
- /* offset[11:1] = insn[10:0] */
- offset = (insn & 0x7ff) << 1;
- /* offset[17:12] = insn[21:16]. */
- offset |= (insn & 0x003f0000) >> 4;
- /* offset[31:20] = insn[26]. */
- offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
- /* offset[18] = insn[13]. */
- offset |= (insn & (1 << 13)) << 5;
- /* offset[19] = insn[11]. */
- offset |= (insn & (1 << 11)) << 8;
-
- /* jump to the offset */
- gen_jmp(s, s->pc + offset);
- }
- } else {
- /*
- * 0b1111_0xxx_xxxx_0xxx_xxxx_xxxx
- * - Data-processing (modified immediate, plain binary immediate)
- */
- if (insn & (1 << 25)) {
- /*
- * 0b1111_0x1x_xxxx_0xxx_xxxx_xxxx
- * - Data-processing (plain binary immediate)
- */
- if (insn & (1 << 24)) {
- if (insn & (1 << 20))
- goto illegal_op;
- /* Bitfield/Saturate. */
- op = (insn >> 21) & 7;
- imm = insn & 0x1f;
- shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
- if (rn == 15) {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- } else {
- tmp = load_reg(s, rn);
- }
- switch (op) {
- case 2: /* Signed bitfield extract. */
- imm++;
- if (shift + imm > 32)
- goto illegal_op;
- if (imm < 32) {
- tcg_gen_sextract_i32(tmp, tmp, shift, imm);
- }
- break;
- case 6: /* Unsigned bitfield extract. */
- imm++;
- if (shift + imm > 32)
- goto illegal_op;
- if (imm < 32) {
- tcg_gen_extract_i32(tmp, tmp, shift, imm);
- }
- break;
- case 3: /* Bitfield insert/clear. */
- if (imm < shift)
- goto illegal_op;
- imm = imm + 1 - shift;
- if (imm != 32) {
- tmp2 = load_reg(s, rd);
- tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
- tcg_temp_free_i32(tmp2);
- }
- break;
- case 7:
- goto illegal_op;
- default: /* Saturate. */
- if (shift) {
- if (op & 1)
- tcg_gen_sari_i32(tmp, tmp, shift);
- else
- tcg_gen_shli_i32(tmp, tmp, shift);
- }
- tmp2 = tcg_const_i32(imm);
- if (op & 4) {
- /* Unsigned. */
- if ((op & 1) && shift == 0) {
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp2);
- goto illegal_op;
- }
- gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_usat(tmp, cpu_env, tmp, tmp2);
- }
- } else {
- /* Signed. */
- if ((op & 1) && shift == 0) {
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp2);
- goto illegal_op;
- }
- gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
- } else {
- gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
- }
- }
- tcg_temp_free_i32(tmp2);
- break;
- }
- store_reg(s, rd, tmp);
- } else {
- imm = ((insn & 0x04000000) >> 15)
- | ((insn & 0x7000) >> 4) | (insn & 0xff);
- if (insn & (1 << 22)) {
- /* 16-bit immediate. */
- imm |= (insn >> 4) & 0xf000;
- if (insn & (1 << 23)) {
- /* movt */
- tmp = load_reg(s, rd);
- tcg_gen_ext16u_i32(tmp, tmp);
- tcg_gen_ori_i32(tmp, tmp, imm << 16);
- } else {
- /* movw */
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, imm);
- }
- store_reg(s, rd, tmp);
- } else {
- /* Add/sub 12-bit immediate. */
- if (rn == 15) {
- offset = s->pc & ~(uint32_t)3;
- if (insn & (1 << 23))
- offset -= imm;
- else
- offset += imm;
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, offset);
- store_reg(s, rd, tmp);
- } else {
- tmp = load_reg(s, rn);
- if (insn & (1 << 23))
- tcg_gen_subi_i32(tmp, tmp, imm);
- else
- tcg_gen_addi_i32(tmp, tmp, imm);
- if (rn == 13 && rd == 13) {
- /* ADD SP, SP, imm or SUB SP, SP, imm */
- store_sp_checked(s, tmp);
- } else {
- store_reg(s, rd, tmp);
- }
- }
- }
- }
- } else {
- /*
- * 0b1111_0x0x_xxxx_0xxx_xxxx_xxxx
- * - Data-processing (modified immediate)
- */
- int shifter_out = 0;
- /* modified 12-bit immediate. */
- shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
- imm = (insn & 0xff);
- switch (shift) {
- case 0: /* XY */
- /* Nothing to do. */
- break;
- case 1: /* 00XY00XY */
- imm |= imm << 16;
- break;
- case 2: /* XY00XY00 */
- imm |= imm << 16;
- imm <<= 8;
- break;
- case 3: /* XYXYXYXY */
- imm |= imm << 16;
- imm |= imm << 8;
- break;
- default: /* Rotated constant. */
- shift = (shift << 1) | (imm >> 7);
- imm |= 0x80;
- imm = imm << (32 - shift);
- shifter_out = 1;
- break;
- }
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, imm);
- rn = (insn >> 16) & 0xf;
- if (rn == 15) {
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- } else {
- tmp = load_reg(s, rn);
- }
- op = (insn >> 21) & 0xf;
- if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
- shifter_out, tmp, tmp2))
- goto illegal_op;
- tcg_temp_free_i32(tmp2);
- rd = (insn >> 8) & 0xf;
- if (rd == 13 && rn == 13
- && (op == 8 || op == 13)) {
- /* ADD(S) SP, SP, imm or SUB(S) SP, SP, imm */
- store_sp_checked(s, tmp);
- } else if (rd != 15) {
- store_reg(s, rd, tmp);
- } else {
- tcg_temp_free_i32(tmp);
- }
- }
- }
- break;
- case 12: /* Load/store single data item. */
- {
- int postinc = 0;
- int writeback = 0;
- int memidx;
- ISSInfo issinfo;
-
- if ((insn & 0x01100000) == 0x01000000) {
- if (disas_neon_ls_insn(s, insn)) {
- goto illegal_op;
- }
- break;
- }
- op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
- if (rs == 15) {
- if (!(insn & (1 << 20))) {
- goto illegal_op;
- }
- if (op != 2) {
- /* Byte or halfword load space with dest == r15 : memory hints.
- * Catch them early so we don't emit pointless addressing code.
- * This space is a mix of:
- * PLD/PLDW/PLI, which we implement as NOPs (note that unlike
- * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
- * cores)
- * unallocated hints, which must be treated as NOPs
- * UNPREDICTABLE space, which we NOP or UNDEF depending on
- * which is easiest for the decoding logic
- * Some space which must UNDEF
- */
- int op1 = (insn >> 23) & 3;
- int op2 = (insn >> 6) & 0x3f;
- if (op & 2) {
- goto illegal_op;
- }
- if (rn == 15) {
- /* UNPREDICTABLE, unallocated hint or
- * PLD/PLDW/PLI (literal)
- */
- return;
- }
- if (op1 & 1) {
- return; /* PLD/PLDW/PLI or unallocated hint */
- }
- if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
- return; /* PLD/PLDW/PLI or unallocated hint */
- }
- /* UNDEF space, or an UNPREDICTABLE */
- goto illegal_op;
- }
- }
- memidx = get_mem_index(s);
- if (rn == 15) {
- addr = tcg_temp_new_i32();
- /* PC relative. */
- /* s->pc has already been incremented by 4. */
- imm = s->pc & 0xfffffffc;
- if (insn & (1 << 23))
- imm += insn & 0xfff;
- else
- imm -= insn & 0xfff;
- tcg_gen_movi_i32(addr, imm);
- } else {
- addr = load_reg(s, rn);
- if (insn & (1 << 23)) {
- /* Positive offset. */
- imm = insn & 0xfff;
- tcg_gen_addi_i32(addr, addr, imm);
- } else {
- imm = insn & 0xff;
- switch ((insn >> 8) & 0xf) {
- case 0x0: /* Shifted Register. */
- shift = (insn >> 4) & 0xf;
- if (shift > 3) {
- tcg_temp_free_i32(addr);
- goto illegal_op;
- }
- tmp = load_reg(s, rm);
- if (shift)
- tcg_gen_shli_i32(tmp, tmp, shift);
- tcg_gen_add_i32(addr, addr, tmp);
- tcg_temp_free_i32(tmp);
- break;
- case 0xc: /* Negative offset. */
- tcg_gen_addi_i32(addr, addr, -imm);
- break;
- case 0xe: /* User privilege. */
- tcg_gen_addi_i32(addr, addr, imm);
- memidx = get_a32_user_mem_index(s);
- break;
- case 0x9: /* Post-decrement. */
- imm = -imm;
- /* Fall through. */
- case 0xb: /* Post-increment. */
- postinc = 1;
- writeback = 1;
- break;
- case 0xd: /* Pre-decrement. */
- imm = -imm;
- /* Fall through. */
- case 0xf: /* Pre-increment. */
- writeback = 1;
- break;
- default:
- tcg_temp_free_i32(addr);
- goto illegal_op;
- }
- }
- }
-
- issinfo = writeback ? ISSInvalid : rs;
-
- if (s->v8m_stackcheck && rn == 13 && writeback) {
- /*
- * Stackcheck. Here we know 'addr' is the current SP;
- * if imm is +ve we're moving SP up, else down. It is
- * UNKNOWN whether the limit check triggers when SP starts
- * below the limit and ends up above it; we chose to do so.
- */
- if ((int32_t)imm < 0) {
- TCGv_i32 newsp = tcg_temp_new_i32();
-
- tcg_gen_addi_i32(newsp, addr, imm);
- gen_helper_v8m_stackcheck(cpu_env, newsp);
- tcg_temp_free_i32(newsp);
- } else {
- gen_helper_v8m_stackcheck(cpu_env, addr);
- }
- }
-
- if (writeback && !postinc) {
- tcg_gen_addi_i32(addr, addr, imm);
- }
-
- if (insn & (1 << 20)) {
- /* Load. */
- tmp = tcg_temp_new_i32();
- switch (op) {
- case 0:
- gen_aa32_ld8u_iss(s, tmp, addr, memidx, issinfo);
- break;
- case 4:
- gen_aa32_ld8s_iss(s, tmp, addr, memidx, issinfo);
- break;
- case 1:
- gen_aa32_ld16u_iss(s, tmp, addr, memidx, issinfo);
- break;
- case 5:
- gen_aa32_ld16s_iss(s, tmp, addr, memidx, issinfo);
- break;
- case 2:
- gen_aa32_ld32u_iss(s, tmp, addr, memidx, issinfo);
- break;
- default:
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(addr);
- goto illegal_op;
- }
- if (rs == 15) {
- gen_bx_excret(s, tmp);
- } else {
- store_reg(s, rs, tmp);
- }
- } else {
- /* Store. */
- tmp = load_reg(s, rs);
- switch (op) {
- case 0:
- gen_aa32_st8_iss(s, tmp, addr, memidx, issinfo);
- break;
- case 1:
- gen_aa32_st16_iss(s, tmp, addr, memidx, issinfo);
- break;
- case 2:
- gen_aa32_st32_iss(s, tmp, addr, memidx, issinfo);
- break;
- default:
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(addr);
- goto illegal_op;
- }
- tcg_temp_free_i32(tmp);
- }
- if (postinc)
- tcg_gen_addi_i32(addr, addr, imm);
- if (writeback) {
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- }
- break;
- default:
- goto illegal_op;
- }
- return;
-illegal_op:
- gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
- default_exception_el(s));
-}
-
-static void disas_thumb_insn(DisasContext *s, uint32_t insn)
-{
- uint32_t val, op, rm, rn, rd, shift, cond;
- int32_t offset;
- int i;
- TCGv_i32 tmp;
- TCGv_i32 tmp2;
- TCGv_i32 addr;
-
- switch (insn >> 12) {
- case 0: case 1:
-
- rd = insn & 7;
- op = (insn >> 11) & 3;
- if (op == 3) {
- /*
- * 0b0001_1xxx_xxxx_xxxx
- * - Add, subtract (three low registers)
- * - Add, subtract (two low registers and immediate)
- */
- rn = (insn >> 3) & 7;
- tmp = load_reg(s, rn);
- if (insn & (1 << 10)) {
- /* immediate */
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
- } else {
- /* reg */
- rm = (insn >> 6) & 7;
- tmp2 = load_reg(s, rm);
- }
- if (insn & (1 << 9)) {
- if (s->condexec_mask)
- tcg_gen_sub_i32(tmp, tmp, tmp2);
- else
- gen_sub_CC(tmp, tmp, tmp2);
- } else {
- if (s->condexec_mask)
- tcg_gen_add_i32(tmp, tmp, tmp2);
- else
- gen_add_CC(tmp, tmp, tmp2);
- }
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- } else {
- /* shift immediate */
- rm = (insn >> 3) & 7;
- shift = (insn >> 6) & 0x1f;
- tmp = load_reg(s, rm);
- gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
- if (!s->condexec_mask)
- gen_logic_CC(tmp);
- store_reg(s, rd, tmp);
- }
- break;
- case 2: case 3:
- /*
- * 0b001x_xxxx_xxxx_xxxx
- * - Add, subtract, compare, move (one low register and immediate)
- */
- op = (insn >> 11) & 3;
- rd = (insn >> 8) & 0x7;
- if (op == 0) { /* mov */
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, insn & 0xff);
- if (!s->condexec_mask)
- gen_logic_CC(tmp);
- store_reg(s, rd, tmp);
- } else {
- tmp = load_reg(s, rd);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, insn & 0xff);
- switch (op) {
- case 1: /* cmp */
- gen_sub_CC(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp2);
- break;
- case 2: /* add */
- if (s->condexec_mask)
- tcg_gen_add_i32(tmp, tmp, tmp2);
- else
- gen_add_CC(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- break;
- case 3: /* sub */
- if (s->condexec_mask)
- tcg_gen_sub_i32(tmp, tmp, tmp2);
- else
- gen_sub_CC(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
- break;
- }
- }
- break;
- case 4:
- if (insn & (1 << 11)) {
- rd = (insn >> 8) & 7;
- /* load pc-relative. Bit 1 of PC is ignored. */
- val = s->pc + 2 + ((insn & 0xff) * 4);
- val &= ~(uint32_t)2;
- addr = tcg_temp_new_i32();
- tcg_gen_movi_i32(addr, val);
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s),
- rd | ISSIs16Bit);
- tcg_temp_free_i32(addr);
- store_reg(s, rd, tmp);
- break;
- }
- if (insn & (1 << 10)) {
- /* 0b0100_01xx_xxxx_xxxx
- * - data processing extended, branch and exchange
- */
- rd = (insn & 7) | ((insn >> 4) & 8);
- rm = (insn >> 3) & 0xf;
- op = (insn >> 8) & 3;
- switch (op) {
- case 0: /* add */
- tmp = load_reg(s, rd);
- tmp2 = load_reg(s, rm);
- tcg_gen_add_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- if (rd == 13) {
- /* ADD SP, SP, reg */
- store_sp_checked(s, tmp);
- } else {
- store_reg(s, rd, tmp);
- }
- break;
- case 1: /* cmp */
- tmp = load_reg(s, rd);
- tmp2 = load_reg(s, rm);
- gen_sub_CC(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i32(tmp);
- break;
- case 2: /* mov/cpy */
- tmp = load_reg(s, rm);
- if (rd == 13) {
- /* MOV SP, reg */
- store_sp_checked(s, tmp);
- } else {
- store_reg(s, rd, tmp);
- }
- break;
- case 3:
- {
- /* 0b0100_0111_xxxx_xxxx
- * - branch [and link] exchange thumb register
- */
- bool link = insn & (1 << 7);
-
- if (insn & 3) {
- goto undef;
- }
- if (link) {
- ARCH(5);
- }
- if ((insn & 4)) {
- /* BXNS/BLXNS: only exists for v8M with the
- * security extensions, and always UNDEF if NonSecure.
- * We don't implement these in the user-only mode
- * either (in theory you can use them from Secure User
- * mode but they are too tied in to system emulation.)
- */
- if (!s->v8m_secure || IS_USER_ONLY) {
- goto undef;
- }
- if (link) {
- gen_blxns(s, rm);
- } else {
- gen_bxns(s, rm);
- }
- break;
- }
- /* BLX/BX */
- tmp = load_reg(s, rm);
- if (link) {
- val = (uint32_t)s->pc | 1;
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, val);
- store_reg(s, 14, tmp2);
- gen_bx(s, tmp);
- } else {
- /* Only BX works as exception-return, not BLX */
- gen_bx_excret(s, tmp);
- }
- break;
- }
- }
- break;
- }
-
- /*
- * 0b0100_00xx_xxxx_xxxx
- * - Data-processing (two low registers)
- */
- rd = insn & 7;
- rm = (insn >> 3) & 7;
- op = (insn >> 6) & 0xf;
- if (op == 2 || op == 3 || op == 4 || op == 7) {
- /* the shift/rotate ops want the operands backwards */
- val = rm;
- rm = rd;
- rd = val;
- val = 1;
- } else {
- val = 0;
- }
-
- if (op == 9) { /* neg */
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- } else if (op != 0xf) { /* mvn doesn't read its first operand */
- tmp = load_reg(s, rd);
- } else {
- tmp = NULL;
- }
-
- tmp2 = load_reg(s, rm);
- switch (op) {
- case 0x0: /* and */
- tcg_gen_and_i32(tmp, tmp, tmp2);
- if (!s->condexec_mask)
- gen_logic_CC(tmp);
- break;
- case 0x1: /* eor */
- tcg_gen_xor_i32(tmp, tmp, tmp2);
- if (!s->condexec_mask)
- gen_logic_CC(tmp);
- break;
- case 0x2: /* lsl */
- if (s->condexec_mask) {
- gen_shl(tmp2, tmp2, tmp);
- } else {
- gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
- gen_logic_CC(tmp2);
- }
- break;
- case 0x3: /* lsr */
- if (s->condexec_mask) {
- gen_shr(tmp2, tmp2, tmp);
- } else {
- gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
- gen_logic_CC(tmp2);
- }
- break;
- case 0x4: /* asr */
- if (s->condexec_mask) {
- gen_sar(tmp2, tmp2, tmp);
- } else {
- gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
- gen_logic_CC(tmp2);
- }
- break;
- case 0x5: /* adc */
- if (s->condexec_mask) {
- gen_adc(tmp, tmp2);
- } else {
- gen_adc_CC(tmp, tmp, tmp2);
- }
- break;
- case 0x6: /* sbc */
- if (s->condexec_mask) {
- gen_sub_carry(tmp, tmp, tmp2);
- } else {
- gen_sbc_CC(tmp, tmp, tmp2);
- }
- break;
- case 0x7: /* ror */
- if (s->condexec_mask) {
- tcg_gen_andi_i32(tmp, tmp, 0x1f);
- tcg_gen_rotr_i32(tmp2, tmp2, tmp);
- } else {
- gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
- gen_logic_CC(tmp2);
- }
- break;
- case 0x8: /* tst */
- tcg_gen_and_i32(tmp, tmp, tmp2);
- gen_logic_CC(tmp);
- rd = 16;
- break;
- case 0x9: /* neg */
- if (s->condexec_mask)
- tcg_gen_neg_i32(tmp, tmp2);
- else
- gen_sub_CC(tmp, tmp, tmp2);
- break;
- case 0xa: /* cmp */
- gen_sub_CC(tmp, tmp, tmp2);
- rd = 16;
- break;
- case 0xb: /* cmn */
- gen_add_CC(tmp, tmp, tmp2);
- rd = 16;
- break;
- case 0xc: /* orr */
- tcg_gen_or_i32(tmp, tmp, tmp2);
- if (!s->condexec_mask)
- gen_logic_CC(tmp);
- break;
- case 0xd: /* mul */
- tcg_gen_mul_i32(tmp, tmp, tmp2);
- if (!s->condexec_mask)
- gen_logic_CC(tmp);
- break;
- case 0xe: /* bic */
- tcg_gen_andc_i32(tmp, tmp, tmp2);
- if (!s->condexec_mask)
- gen_logic_CC(tmp);
- break;
- case 0xf: /* mvn */
- tcg_gen_not_i32(tmp2, tmp2);
- if (!s->condexec_mask)
- gen_logic_CC(tmp2);
- val = 1;
- rm = rd;
- break;
- }
- if (rd != 16) {
- if (val) {
- store_reg(s, rm, tmp2);
- if (op != 0xf)
- tcg_temp_free_i32(tmp);
- } else {
- store_reg(s, rd, tmp);
- tcg_temp_free_i32(tmp2);
- }
- } else {
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp2);
- }
- break;
-
- case 5:
- /* load/store register offset. */
- rd = insn & 7;
- rn = (insn >> 3) & 7;
- rm = (insn >> 6) & 7;
- op = (insn >> 9) & 7;
- addr = load_reg(s, rn);
- tmp = load_reg(s, rm);
- tcg_gen_add_i32(addr, addr, tmp);
- tcg_temp_free_i32(tmp);
-
- if (op < 3) { /* store */
- tmp = load_reg(s, rd);
- } else {
- tmp = tcg_temp_new_i32();
- }
-
- switch (op) {
- case 0: /* str */
- gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- break;
- case 1: /* strh */
- gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- break;
- case 2: /* strb */
- gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- break;
- case 3: /* ldrsb */
- gen_aa32_ld8s_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- break;
- case 4: /* ldr */
- gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- break;
- case 5: /* ldrh */
- gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- break;
- case 6: /* ldrb */
- gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- break;
- case 7: /* ldrsh */
- gen_aa32_ld16s_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- break;
- }
- if (op >= 3) { /* load */
- store_reg(s, rd, tmp);
- } else {
- tcg_temp_free_i32(tmp);
- }
- tcg_temp_free_i32(addr);
- break;
-
- case 6:
- /* load/store word immediate offset */
- rd = insn & 7;
- rn = (insn >> 3) & 7;
- addr = load_reg(s, rn);
- val = (insn >> 4) & 0x7c;
- tcg_gen_addi_i32(addr, addr, val);
-
- if (insn & (1 << 11)) {
- /* load */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- store_reg(s, rd, tmp);
- } else {
- /* store */
- tmp = load_reg(s, rd);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- tcg_temp_free_i32(addr);
- break;
-
- case 7:
- /* load/store byte immediate offset */
- rd = insn & 7;
- rn = (insn >> 3) & 7;
- addr = load_reg(s, rn);
- val = (insn >> 6) & 0x1f;
- tcg_gen_addi_i32(addr, addr, val);
-
- if (insn & (1 << 11)) {
- /* load */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- store_reg(s, rd, tmp);
- } else {
- /* store */
- tmp = load_reg(s, rd);
- gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- tcg_temp_free_i32(tmp);
- }
- tcg_temp_free_i32(addr);
- break;
-
- case 8:
- /* load/store halfword immediate offset */
- rd = insn & 7;
- rn = (insn >> 3) & 7;
- addr = load_reg(s, rn);
- val = (insn >> 5) & 0x3e;
- tcg_gen_addi_i32(addr, addr, val);
-
- if (insn & (1 << 11)) {
- /* load */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- store_reg(s, rd, tmp);
- } else {
- /* store */
- tmp = load_reg(s, rd);
- gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- tcg_temp_free_i32(tmp);
- }
- tcg_temp_free_i32(addr);
- break;
-
- case 9:
- /* load/store from stack */
- rd = (insn >> 8) & 7;
- addr = load_reg(s, 13);
- val = (insn & 0xff) * 4;
- tcg_gen_addi_i32(addr, addr, val);
-
- if (insn & (1 << 11)) {
- /* load */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- store_reg(s, rd, tmp);
- } else {
- /* store */
- tmp = load_reg(s, rd);
- gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
- tcg_temp_free_i32(tmp);
- }
- tcg_temp_free_i32(addr);
- break;
-
- case 10:
- /*
- * 0b1010_xxxx_xxxx_xxxx
- * - Add PC/SP (immediate)
- */
- rd = (insn >> 8) & 7;
- if (insn & (1 << 11)) {
- /* SP */
- tmp = load_reg(s, 13);
- } else {
- /* PC. bit 1 is ignored. */
- tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
- }
- val = (insn & 0xff) * 4;
- tcg_gen_addi_i32(tmp, tmp, val);
- store_reg(s, rd, tmp);
- break;
-
- case 11:
- /* misc */
- op = (insn >> 8) & 0xf;
- switch (op) {
- case 0:
- /*
- * 0b1011_0000_xxxx_xxxx
- * - ADD (SP plus immediate)
- * - SUB (SP minus immediate)
- */
- tmp = load_reg(s, 13);
- val = (insn & 0x7f) * 4;
- if (insn & (1 << 7))
- val = -(int32_t)val;
- tcg_gen_addi_i32(tmp, tmp, val);
- store_sp_checked(s, tmp);
- break;
-
- case 2: /* sign/zero extend. */
- ARCH(6);
- rd = insn & 7;
- rm = (insn >> 3) & 7;
- tmp = load_reg(s, rm);
- switch ((insn >> 6) & 3) {
- case 0: gen_sxth(tmp); break;
- case 1: gen_sxtb(tmp); break;
- case 2: gen_uxth(tmp); break;
- case 3: gen_uxtb(tmp); break;
- }
- store_reg(s, rd, tmp);
- break;
- case 4: case 5: case 0xc: case 0xd:
- /*
- * 0b1011_x10x_xxxx_xxxx
- * - push/pop
- */
- addr = load_reg(s, 13);
- if (insn & (1 << 8))
- offset = 4;
- else
- offset = 0;
- for (i = 0; i < 8; i++) {
- if (insn & (1 << i))
- offset += 4;
- }
- if ((insn & (1 << 11)) == 0) {
- tcg_gen_addi_i32(addr, addr, -offset);
- }
-
- if (s->v8m_stackcheck) {
- /*
- * Here 'addr' is the lower of "old SP" and "new SP";
- * if this is a pop that starts below the limit and ends
- * above it, it is UNKNOWN whether the limit check triggers;
- * we choose to trigger.
- */
- gen_helper_v8m_stackcheck(cpu_env, addr);
- }
-
- for (i = 0; i < 8; i++) {
- if (insn & (1 << i)) {
- if (insn & (1 << 11)) {
- /* pop */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- store_reg(s, i, tmp);
- } else {
- /* push */
- tmp = load_reg(s, i);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- /* advance to the next address. */
- tcg_gen_addi_i32(addr, addr, 4);
- }
- }
- tmp = NULL;
- if (insn & (1 << 8)) {
- if (insn & (1 << 11)) {
- /* pop pc */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- /* don't set the pc until the rest of the instruction
- has completed */
- } else {
- /* push lr */
- tmp = load_reg(s, 14);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- tcg_gen_addi_i32(addr, addr, 4);
- }
- if ((insn & (1 << 11)) == 0) {
- tcg_gen_addi_i32(addr, addr, -offset);
- }
- /* write back the new stack pointer */
- store_reg(s, 13, addr);
- /* set the new PC value */
- if ((insn & 0x0900) == 0x0900) {
- store_reg_from_load(s, 15, tmp);
- }
- break;
-
- case 1: case 3: case 9: case 11: /* czb */
- rm = insn & 7;
- tmp = load_reg(s, rm);
- arm_gen_condlabel(s);
- if (insn & (1 << 11))
- tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
- else
- tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
- tcg_temp_free_i32(tmp);
- offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
- val = (uint32_t)s->pc + 2;
- val += offset;
- gen_jmp(s, val);
- break;
-
- case 15: /* IT, nop-hint. */
- if ((insn & 0xf) == 0) {
- gen_nop_hint(s, (insn >> 4) & 0xf);
- break;
- }
- /* If Then. */
- s->condexec_cond = (insn >> 4) & 0xe;
- s->condexec_mask = insn & 0x1f;
- /* No actual code generated for this insn, just setup state. */
- break;
-
- case 0xe: /* bkpt */
- {
- int imm8 = extract32(insn, 0, 8);
- ARCH(5);
- gen_exception_bkpt_insn(s, 2, syn_aa32_bkpt(imm8, true));
- break;
- }
-
- case 0xa: /* rev, and hlt */
- {
- int op1 = extract32(insn, 6, 2);
-
- if (op1 == 2) {
- /* HLT */
- int imm6 = extract32(insn, 0, 6);
-
- gen_hlt(s, imm6);
- break;
- }
-
- /* Otherwise this is rev */
- ARCH(6);
- rn = (insn >> 3) & 0x7;
- rd = insn & 0x7;
- tmp = load_reg(s, rn);
- switch (op1) {
- case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
- case 1: gen_rev16(tmp); break;
- case 3: gen_revsh(tmp); break;
- default:
- g_assert_not_reached();
- }
- store_reg(s, rd, tmp);
- break;
- }
-
- case 6:
- switch ((insn >> 5) & 7) {
- case 2:
- /* setend */
- ARCH(6);
- if (((insn >> 3) & 1) != !!(s->be_data == MO_BE)) {
- gen_helper_setend(cpu_env);
- s->base.is_jmp = DISAS_UPDATE;
- }
- break;
- case 3:
- /* cps */
- ARCH(6);
- if (IS_USER(s)) {
- break;
- }
- if (arm_dc_feature(s, ARM_FEATURE_M)) {
- tmp = tcg_const_i32((insn & (1 << 4)) != 0);
- /* FAULTMASK */
- if (insn & 1) {
- addr = tcg_const_i32(19);
- gen_helper_v7m_msr(cpu_env, addr, tmp);
- tcg_temp_free_i32(addr);
- }
- /* PRIMASK */
- if (insn & 2) {
- addr = tcg_const_i32(16);
- gen_helper_v7m_msr(cpu_env, addr, tmp);
- tcg_temp_free_i32(addr);
- }
- tcg_temp_free_i32(tmp);
- gen_lookup_tb(s);
- } else {
- if (insn & (1 << 4)) {
- shift = CPSR_A | CPSR_I | CPSR_F;
- } else {
- shift = 0;
- }
- gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
- }
- break;
- default:
- goto undef;
- }
- break;
-
- default:
- goto undef;
- }
- break;
-
- case 12:
- {
- /* load/store multiple */
- TCGv_i32 loaded_var = NULL;
- rn = (insn >> 8) & 0x7;
- addr = load_reg(s, rn);
- for (i = 0; i < 8; i++) {
- if (insn & (1 << i)) {
- if (insn & (1 << 11)) {
- /* load */
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- if (i == rn) {
- loaded_var = tmp;
- } else {
- store_reg(s, i, tmp);
- }
- } else {
- /* store */
- tmp = load_reg(s, i);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- /* advance to the next address */
- tcg_gen_addi_i32(addr, addr, 4);
- }
- }
- if ((insn & (1 << rn)) == 0) {
- /* base reg not in list: base register writeback */
- store_reg(s, rn, addr);
- } else {
- /* base reg in list: if load, complete it now */
- if (insn & (1 << 11)) {
- store_reg(s, rn, loaded_var);
- }
- tcg_temp_free_i32(addr);
- }
- break;
- }
- case 13:
- /* conditional branch or swi */
- cond = (insn >> 8) & 0xf;
- if (cond == 0xe)
- goto undef;
-
- if (cond == 0xf) {
- /* swi */
- gen_set_pc_im(s, s->pc);
- s->svc_imm = extract32(insn, 0, 8);
- s->base.is_jmp = DISAS_SWI;
- break;
- }
- /* generate a conditional jump to next instruction */
- arm_skip_unless(s, cond);
-
- /* jump to the offset */
- val = (uint32_t)s->pc + 2;
- offset = ((int32_t)insn << 24) >> 24;
- val += offset << 1;
- gen_jmp(s, val);
- break;
-
- case 14:
- if (insn & (1 << 11)) {
- /* thumb_insn_is_16bit() ensures we can't get here for
- * a Thumb2 CPU, so this must be a thumb1 split BL/BLX:
- * 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF)
- */
- assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
- ARCH(5);
- offset = ((insn & 0x7ff) << 1);
- tmp = load_reg(s, 14);
- tcg_gen_addi_i32(tmp, tmp, offset);
- tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
-
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, s->pc | 1);
- store_reg(s, 14, tmp2);
- gen_bx(s, tmp);
- break;
- }
- /* unconditional branch */
- val = (uint32_t)s->pc;
- offset = ((int32_t)insn << 21) >> 21;
- val += (offset << 1) + 2;
- gen_jmp(s, val);
- break;
-
- case 15:
- /* thumb_insn_is_16bit() ensures we can't get here for
- * a Thumb2 CPU, so this must be a thumb1 split BL/BLX.
- */
- assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
-
- if (insn & (1 << 11)) {
- /* 0b1111_1xxx_xxxx_xxxx : BL suffix */
- offset = ((insn & 0x7ff) << 1) | 1;
- tmp = load_reg(s, 14);
- tcg_gen_addi_i32(tmp, tmp, offset);
-
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, s->pc | 1);
- store_reg(s, 14, tmp2);
- gen_bx(s, tmp);
- } else {
- /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix */
- uint32_t uoffset = ((int32_t)insn << 21) >> 9;
-
- tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + uoffset);
- }
- break;
- }
- return;
-illegal_op:
-undef:
- gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized(),
- default_exception_el(s));
-}
-
-static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
-{
- /* Return true if the insn at dc->pc might cross a page boundary.
- * (False positives are OK, false negatives are not.)
- * We know this is a Thumb insn, and our caller ensures we are
- * only called if dc->pc is less than 4 bytes from the page
- * boundary, so we cross the page if the first 16 bits indicate
- * that this is a 32 bit insn.
- */
- uint16_t insn = arm_lduw_code(env, s->pc, s->sctlr_b);
-
- return !thumb_insn_is_16bit(s, insn);
-}
-
-static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
- CPUARMState *env = cs->env_ptr;
- ARMCPU *cpu = arm_env_get_cpu(env);
-
- dc->pc = dc->base.pc_first;
- dc->condjmp = 0;
-
- dc->aarch64 = 0;
- /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
- * there is no secure EL1, so we route exceptions to EL3.
- */
- dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
- !arm_el_is_aa64(env, 3);
- dc->thumb = ARM_TBFLAG_THUMB(dc->base.tb->flags);
- dc->sctlr_b = ARM_TBFLAG_SCTLR_B(dc->base.tb->flags);
- dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
- dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(dc->base.tb->flags) & 0xf) << 1;
- dc->condexec_cond = ARM_TBFLAG_CONDEXEC(dc->base.tb->flags) >> 4;
- dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
- dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
-#if !defined(CONFIG_USER_ONLY)
- dc->user = (dc->current_el == 0);
-#endif
- dc->ns = ARM_TBFLAG_NS(dc->base.tb->flags);
- dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
- dc->vfp_enabled = ARM_TBFLAG_VFPEN(dc->base.tb->flags);
- dc->vec_len = ARM_TBFLAG_VECLEN(dc->base.tb->flags);
- dc->vec_stride = ARM_TBFLAG_VECSTRIDE(dc->base.tb->flags);
- dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(dc->base.tb->flags);
- dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(dc->base.tb->flags);
- dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
- regime_is_secure(env, dc->mmu_idx);
- dc->v8m_stackcheck = ARM_TBFLAG_STACKCHECK(dc->base.tb->flags);
- dc->cp_regs = cpu->cp_regs;
- dc->features = env->features;
-
- /* Single step state. The code-generation logic here is:
- * SS_ACTIVE == 0:
- * generate code with no special handling for single-stepping (except
- * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
- * this happens anyway because those changes are all system register or
- * PSTATE writes).
- * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
- * emit code for one insn
- * emit code to clear PSTATE.SS
- * emit code to generate software step exception for completed step
- * end TB (as usual for having generated an exception)
- * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
- * emit code to generate a software step exception
- * end the TB
- */
- dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
- dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
- dc->is_ldex = false;
- dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
-
- dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
-
- /* If architectural single step active, limit to 1. */
- if (is_singlestepping(dc)) {
- dc->base.max_insns = 1;
- }
-
- /* ARM is a fixed-length ISA. Bound the number of insns to execute
- to those left on the page. */
- if (!dc->thumb) {
- int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
- dc->base.max_insns = MIN(dc->base.max_insns, bound);
- }
-
- cpu_F0s = tcg_temp_new_i32();
- cpu_F1s = tcg_temp_new_i32();
- cpu_F0d = tcg_temp_new_i64();
- cpu_F1d = tcg_temp_new_i64();
- cpu_V0 = cpu_F0d;
- cpu_V1 = cpu_F1d;
- /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
- cpu_M0 = tcg_temp_new_i64();
-}
-
-static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
-
- /* A note on handling of the condexec (IT) bits:
- *
- * We want to avoid the overhead of having to write the updated condexec
- * bits back to the CPUARMState for every instruction in an IT block. So:
- * (1) if the condexec bits are not already zero then we write
- * zero back into the CPUARMState now. This avoids complications trying
- * to do it at the end of the block. (For example if we don't do this
- * it's hard to identify whether we can safely skip writing condexec
- * at the end of the TB, which we definitely want to do for the case
- * where a TB doesn't do anything with the IT state at all.)
- * (2) if we are going to leave the TB then we call gen_set_condexec()
- * which will write the correct value into CPUARMState if zero is wrong.
- * This is done both for leaving the TB at the end, and for leaving
- * it because of an exception we know will happen, which is done in
- * gen_exception_insn(). The latter is necessary because we need to
- * leave the TB with the PC/IT state just prior to execution of the
- * instruction which caused the exception.
- * (3) if we leave the TB unexpectedly (eg a data abort on a load)
- * then the CPUARMState will be wrong and we need to reset it.
- * This is handled in the same way as restoration of the
- * PC in these situations; we save the value of the condexec bits
- * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
- * then uses this to restore them after an exception.
- *
- * Note that there are no instructions which can read the condexec
- * bits, and none which can write non-static values to them, so
- * we don't need to care about whether CPUARMState is correct in the
- * middle of a TB.
- */
-
- /* Reset the conditional execution bits immediately. This avoids
- complications trying to do it at the end of the block. */
- if (dc->condexec_mask || dc->condexec_cond) {
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp, 0);
- store_cpu_field(tmp, condexec_bits);
- }
- tcg_clear_temp_count();
-}
-
-static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
-
- tcg_gen_insn_start(dc->pc,
- (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
- 0);
- dc->insn_start = tcg_last_op();
-}
-
-static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
- const CPUBreakpoint *bp)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
-
- if (bp->flags & BP_CPU) {
- gen_set_condexec(dc);
- gen_set_pc_im(dc, dc->pc);
- gen_helper_check_breakpoints(cpu_env);
- /* End the TB early; it's likely not going to be executed */
- dc->base.is_jmp = DISAS_TOO_MANY;
- } else {
- gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
- /* The address covered by the breakpoint must be
- included in [tb->pc, tb->pc + tb->size) in order
- to for it to be properly cleared -- thus we
- increment the PC here so that the logic setting
- tb->size below does the right thing. */
- /* TODO: Advance PC by correct instruction length to
- * avoid disassembler error messages */
- dc->pc += 2;
- dc->base.is_jmp = DISAS_NORETURN;
- }
-
- return true;
-}
-
-static bool arm_pre_translate_insn(DisasContext *dc)
-{
-#ifdef CONFIG_USER_ONLY
- /* Intercept jump to the magic kernel page. */
- if (dc->pc >= 0xffff0000) {
- /* We always get here via a jump, so know we are not in a
- conditional execution block. */
- gen_exception_internal(EXCP_KERNEL_TRAP);
- dc->base.is_jmp = DISAS_NORETURN;
- return true;
- }
-#endif
-
- if (dc->ss_active && !dc->pstate_ss) {
- /* Singlestep state is Active-pending.
- * If we're in this state at the start of a TB then either
- * a) we just took an exception to an EL which is being debugged
- * and this is the first insn in the exception handler
- * b) debug exceptions were masked and we just unmasked them
- * without changing EL (eg by clearing PSTATE.D)
- * In either case we're going to take a swstep exception in the
- * "did not step an insn" case, and so the syndrome ISV and EX
- * bits should be zero.
- */
- assert(dc->base.num_insns == 1);
- gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
- default_exception_el(dc));
- dc->base.is_jmp = DISAS_NORETURN;
- return true;
- }
-
- return false;
-}
-
-static void arm_post_translate_insn(DisasContext *dc)
-{
- if (dc->condjmp && !dc->base.is_jmp) {
- gen_set_label(dc->condlabel);
- dc->condjmp = 0;
- }
- dc->base.pc_next = dc->pc;
- translator_loop_temp_check(&dc->base);
-}
-
-static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
- CPUARMState *env = cpu->env_ptr;
- unsigned int insn;
-
- if (arm_pre_translate_insn(dc)) {
- return;
- }
-
- insn = arm_ldl_code(env, dc->pc, dc->sctlr_b);
- dc->insn = insn;
- dc->pc += 4;
- disas_arm_insn(dc, insn);
-
- arm_post_translate_insn(dc);
-
- /* ARM is a fixed-length ISA. We performed the cross-page check
- in init_disas_context by adjusting max_insns. */
-}
-
-static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
-{
- /* Return true if this Thumb insn is always unconditional,
- * even inside an IT block. This is true of only a very few
- * instructions: BKPT, HLT, and SG.
- *
- * A larger class of instructions are UNPREDICTABLE if used
- * inside an IT block; we do not need to detect those here, because
- * what we do by default (perform the cc check and update the IT
- * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
- * choice for those situations.
- *
- * insn is either a 16-bit or a 32-bit instruction; the two are
- * distinguishable because for the 16-bit case the top 16 bits
- * are zeroes, and that isn't a valid 32-bit encoding.
- */
- if ((insn & 0xffffff00) == 0xbe00) {
- /* BKPT */
- return true;
- }
-
- if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
- !arm_dc_feature(s, ARM_FEATURE_M)) {
- /* HLT: v8A only. This is unconditional even when it is going to
- * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
- * For v7 cores this was a plain old undefined encoding and so
- * honours its cc check. (We might be using the encoding as
- * a semihosting trap, but we don't change the cc check behaviour
- * on that account, because a debugger connected to a real v7A
- * core and emulating semihosting traps by catching the UNDEF
- * exception would also only see cases where the cc check passed.
- * No guest code should be trying to do a HLT semihosting trap
- * in an IT block anyway.
- */
- return true;
- }
-
- if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
- arm_dc_feature(s, ARM_FEATURE_M)) {
- /* SG: v8M only */
- return true;
- }
-
- return false;
-}
-
-static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
- CPUARMState *env = cpu->env_ptr;
- uint32_t insn;
- bool is_16bit;
-
- if (arm_pre_translate_insn(dc)) {
- return;
- }
-
- insn = arm_lduw_code(env, dc->pc, dc->sctlr_b);
- is_16bit = thumb_insn_is_16bit(dc, insn);
- dc->pc += 2;
- if (!is_16bit) {
- uint32_t insn2 = arm_lduw_code(env, dc->pc, dc->sctlr_b);
-
- insn = insn << 16 | insn2;
- dc->pc += 2;
- }
- dc->insn = insn;
-
- if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
- uint32_t cond = dc->condexec_cond;
-
- if (cond != 0x0e) { /* Skip conditional when condition is AL. */
- arm_skip_unless(dc, cond);
- }
- }
-
- if (is_16bit) {
- disas_thumb_insn(dc, insn);
- } else {
- disas_thumb2_insn(dc, insn);
- }
-
- /* Advance the Thumb condexec condition. */
- if (dc->condexec_mask) {
- dc->condexec_cond = ((dc->condexec_cond & 0xe) |
- ((dc->condexec_mask >> 4) & 1));
- dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
- if (dc->condexec_mask == 0) {
- dc->condexec_cond = 0;
- }
- }
-
- arm_post_translate_insn(dc);
-
- /* Thumb is a variable-length ISA. Stop translation when the next insn
- * will touch a new page. This ensures that prefetch aborts occur at
- * the right place.
- *
- * We want to stop the TB if the next insn starts in a new page,
- * or if it spans between this page and the next. This means that
- * if we're looking at the last halfword in the page we need to
- * see if it's a 16-bit Thumb insn (which will fit in this TB)
- * or a 32-bit Thumb insn (which won't).
- * This is to avoid generating a silly TB with a single 16-bit insn
- * in it at the end of this page (which would execute correctly
- * but isn't very efficient).
- */
- if (dc->base.is_jmp == DISAS_NEXT
- && (dc->pc - dc->page_start >= TARGET_PAGE_SIZE
- || (dc->pc - dc->page_start >= TARGET_PAGE_SIZE - 3
- && insn_crosses_page(env, dc)))) {
- dc->base.is_jmp = DISAS_TOO_MANY;
- }
-}
-
-static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
-
- if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
- /* FIXME: This can theoretically happen with self-modifying code. */
- cpu_abort(cpu, "IO on conditional branch instruction");
- }
-
- /* At this stage dc->condjmp will only be set when the skipped
- instruction was a conditional branch or trap, and the PC has
- already been written. */
- gen_set_condexec(dc);
- if (dc->base.is_jmp == DISAS_BX_EXCRET) {
- /* Exception return branches need some special case code at the
- * end of the TB, which is complex enough that it has to
- * handle the single-step vs not and the condition-failed
- * insn codepath itself.
- */
- gen_bx_excret_final_code(dc);
- } else if (unlikely(is_singlestepping(dc))) {
- /* Unconditional and "condition passed" instruction codepath. */
- switch (dc->base.is_jmp) {
- case DISAS_SWI:
- gen_ss_advance(dc);
- gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
- default_exception_el(dc));
- break;
- case DISAS_HVC:
- gen_ss_advance(dc);
- gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
- break;
- case DISAS_SMC:
- gen_ss_advance(dc);
- gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
- break;
- case DISAS_NEXT:
- case DISAS_TOO_MANY:
- case DISAS_UPDATE:
- gen_set_pc_im(dc, dc->pc);
- /* fall through */
- default:
- /* FIXME: Single stepping a WFI insn will not halt the CPU. */
- gen_singlestep_exception(dc);
- break;
- case DISAS_NORETURN:
- break;
- }
- } else {
- /* While branches must always occur at the end of an IT block,
- there are a few other things that can cause us to terminate
- the TB in the middle of an IT block:
- - Exception generating instructions (bkpt, swi, undefined).
- - Page boundaries.
- - Hardware watchpoints.
- Hardware breakpoints have already been handled and skip this code.
- */
- switch(dc->base.is_jmp) {
- case DISAS_NEXT:
- case DISAS_TOO_MANY:
- gen_goto_tb(dc, 1, dc->pc);
- break;
- case DISAS_JUMP:
- gen_goto_ptr();
- break;
- case DISAS_UPDATE:
- gen_set_pc_im(dc, dc->pc);
- /* fall through */
- default:
- /* indicate that the hash table must be used to find the next TB */
- tcg_gen_exit_tb(NULL, 0);
- break;
- case DISAS_NORETURN:
- /* nothing more to generate */
- break;
- case DISAS_WFI:
- {
- TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
- !(dc->insn & (1U << 31))) ? 2 : 4);
-
- gen_helper_wfi(cpu_env, tmp);
- tcg_temp_free_i32(tmp);
- /* The helper doesn't necessarily throw an exception, but we
- * must go back to the main loop to check for interrupts anyway.
- */
- tcg_gen_exit_tb(NULL, 0);
- break;
- }
- case DISAS_WFE:
- gen_helper_wfe(cpu_env);
- break;
- case DISAS_YIELD:
- gen_helper_yield(cpu_env);
- break;
- case DISAS_SWI:
- gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
- default_exception_el(dc));
- break;
- case DISAS_HVC:
- gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
- break;
- case DISAS_SMC:
- gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
- break;
- }
- }
-
- if (dc->condjmp) {
- /* "Condition failed" instruction codepath for the branch/trap insn */
- gen_set_label(dc->condlabel);
- gen_set_condexec(dc);
- if (unlikely(is_singlestepping(dc))) {
- gen_set_pc_im(dc, dc->pc);
- gen_singlestep_exception(dc);
- } else {
- gen_goto_tb(dc, 1, dc->pc);
- }
- }
-
- /* Functions above can change dc->pc, so re-align db->pc_next */
- dc->base.pc_next = dc->pc;
-}
-
-static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
-{
- DisasContext *dc = container_of(dcbase, DisasContext, base);
-
- qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
- log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
-}
-
-static const TranslatorOps arm_translator_ops = {
- .init_disas_context = arm_tr_init_disas_context,
- .tb_start = arm_tr_tb_start,
- .insn_start = arm_tr_insn_start,
- .breakpoint_check = arm_tr_breakpoint_check,
- .translate_insn = arm_tr_translate_insn,
- .tb_stop = arm_tr_tb_stop,
- .disas_log = arm_tr_disas_log,
-};
-
-static const TranslatorOps thumb_translator_ops = {
- .init_disas_context = arm_tr_init_disas_context,
- .tb_start = arm_tr_tb_start,
- .insn_start = arm_tr_insn_start,
- .breakpoint_check = arm_tr_breakpoint_check,
- .translate_insn = thumb_tr_translate_insn,
- .tb_stop = arm_tr_tb_stop,
- .disas_log = arm_tr_disas_log,
-};
-
-/* generate intermediate code for basic block 'tb'. */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
-{
- DisasContext dc;
- const TranslatorOps *ops = &arm_translator_ops;
-
- if (ARM_TBFLAG_THUMB(tb->flags)) {
- ops = &thumb_translator_ops;
- }
-#ifdef TARGET_AARCH64
- if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
- ops = &aarch64_translator_ops;
- }
-#endif
-
- translator_loop(ops, &dc.base, cpu, tb);
-}
-
-static const char *cpu_mode_names[16] = {
- "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
- "???", "???", "hyp", "und", "???", "???", "???", "sys"
-};
-
-void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
- int flags)
-{
- ARMCPU *cpu = ARM_CPU(cs);
- CPUARMState *env = &cpu->env;
- int i;
-
- if (is_a64(env)) {
- aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
- return;
- }
-
- for(i=0;i<16;i++) {
- cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
- if ((i % 4) == 3)
- cpu_fprintf(f, "\n");
- else
- cpu_fprintf(f, " ");
- }
-
- if (arm_feature(env, ARM_FEATURE_M)) {
- uint32_t xpsr = xpsr_read(env);
- const char *mode;
- const char *ns_status = "";
-
- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- ns_status = env->v7m.secure ? "S " : "NS ";
- }
-
- if (xpsr & XPSR_EXCP) {
- mode = "handler";
- } else {
- if (env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_NPRIV_MASK) {
- mode = "unpriv-thread";
- } else {
- mode = "priv-thread";
- }
- }
-
- cpu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
- xpsr,
- xpsr & XPSR_N ? 'N' : '-',
- xpsr & XPSR_Z ? 'Z' : '-',
- xpsr & XPSR_C ? 'C' : '-',
- xpsr & XPSR_V ? 'V' : '-',
- xpsr & XPSR_T ? 'T' : 'A',
- ns_status,
- mode);
- } else {
- uint32_t psr = cpsr_read(env);
- const char *ns_status = "";
-
- if (arm_feature(env, ARM_FEATURE_EL3) &&
- (psr & CPSR_M) != ARM_CPU_MODE_MON) {
- ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
- }
-
- cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
- psr,
- psr & CPSR_N ? 'N' : '-',
- psr & CPSR_Z ? 'Z' : '-',
- psr & CPSR_C ? 'C' : '-',
- psr & CPSR_V ? 'V' : '-',
- psr & CPSR_T ? 'T' : 'A',
- ns_status,
- cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
- }
-
- if (flags & CPU_DUMP_FPU) {
- int numvfpregs = 0;
- if (arm_feature(env, ARM_FEATURE_VFP)) {
- numvfpregs += 16;
- }
- if (arm_feature(env, ARM_FEATURE_VFP3)) {
- numvfpregs += 16;
- }
- for (i = 0; i < numvfpregs; i++) {
- uint64_t v = *aa32_vfp_dreg(env, i);
- cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
- i * 2, (uint32_t)v,
- i * 2 + 1, (uint32_t)(v >> 32),
- i, v);
- }
- cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
- }
-}
-
-void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
- target_ulong *data)
-{
- if (is_a64(env)) {
- env->pc = data[0];
- env->condexec_bits = 0;
- env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
- } else {
- env->regs[15] = data[0];
- env->condexec_bits = data[1];
- env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
- }
-}
diff --git a/target/arm/translate.h b/target/arm/translate.h
deleted file mode 100644
index c1b65f3efb..0000000000
--- a/target/arm/translate.h
+++ /dev/null
@@ -1,193 +0,0 @@
-#ifndef TARGET_ARM_TRANSLATE_H
-#define TARGET_ARM_TRANSLATE_H
-
-#include "exec/translator.h"
-
-
-/* internal defines */
-typedef struct DisasContext {
- DisasContextBase base;
-
- target_ulong pc;
- target_ulong page_start;
- uint32_t insn;
- /* Nonzero if this instruction has been conditionally skipped. */
- int condjmp;
- /* The label that will be jumped to when the instruction is skipped. */
- TCGLabel *condlabel;
- /* Thumb-2 conditional execution bits. */
- int condexec_mask;
- int condexec_cond;
- int thumb;
- int sctlr_b;
- TCGMemOp be_data;
-#if !defined(CONFIG_USER_ONLY)
- int user;
-#endif
- ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
- bool tbi0; /* TBI0 for EL0/1 or TBI for EL2/3 */
- bool tbi1; /* TBI1 for EL0/1, not used for EL2/3 */
- bool ns; /* Use non-secure CPREG bank on access */
- int fp_excp_el; /* FP exception EL or 0 if enabled */
- int sve_excp_el; /* SVE exception EL or 0 if enabled */
- int sve_len; /* SVE vector length in bytes */
- /* Flag indicating that exceptions from secure mode are routed to EL3. */
- bool secure_routed_to_el3;
- bool vfp_enabled; /* FP enabled via FPSCR.EN */
- int vec_len;
- int vec_stride;
- bool v7m_handler_mode;
- bool v8m_secure; /* true if v8M and we're in Secure mode */
- bool v8m_stackcheck; /* true if we need to perform v8M stack limit checks */
- /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
- * so that top level loop can generate correct syndrome information.
- */
- uint32_t svc_imm;
- int aarch64;
- int current_el;
- GHashTable *cp_regs;
- uint64_t features; /* CPU features bits */
- /* Because unallocated encodings generate different exception syndrome
- * information from traps due to FP being disabled, we can't do a single
- * "is fp access disabled" check at a high level in the decode tree.
- * To help in catching bugs where the access check was forgotten in some
- * code path, we set this flag when the access check is done, and assert
- * that it is set at the point where we actually touch the FP regs.
- */
- bool fp_access_checked;
- /* ARMv8 single-step state (this is distinct from the QEMU gdbstub
- * single-step support).
- */
- bool ss_active;
- bool pstate_ss;
- /* True if the insn just emitted was a load-exclusive instruction
- * (necessary for syndrome information for single step exceptions),
- * ie A64 LDX*, LDAX*, A32/T32 LDREX*, LDAEX*.
- */
- bool is_ldex;
- /* True if a single-step exception will be taken to the current EL */
- bool ss_same_el;
- /* Bottom two bits of XScale c15_cpar coprocessor access control reg */
- int c15_cpar;
- /* TCG op of the current insn_start. */
- TCGOp *insn_start;
-#define TMP_A64_MAX 16
- int tmp_a64_count;
- TCGv_i64 tmp_a64[TMP_A64_MAX];
-} DisasContext;
-
-typedef struct DisasCompare {
- TCGCond cond;
- TCGv_i32 value;
- bool value_global;
-} DisasCompare;
-
-/* Share the TCG temporaries common between 32 and 64 bit modes. */
-extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
-extern TCGv_i64 cpu_exclusive_addr;
-extern TCGv_i64 cpu_exclusive_val;
-
-static inline int arm_dc_feature(DisasContext *dc, int feature)
-{
- return (dc->features & (1ULL << feature)) != 0;
-}
-
-static inline int get_mem_index(DisasContext *s)
-{
- return arm_to_core_mmu_idx(s->mmu_idx);
-}
-
-/* Function used to determine the target exception EL when otherwise not known
- * or default.
- */
-static inline int default_exception_el(DisasContext *s)
-{
- /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
- * there is no secure EL1, so we route exceptions to EL3. Otherwise,
- * exceptions can only be routed to ELs above 1, so we target the higher of
- * 1 or the current EL.
- */
- return (s->mmu_idx == ARMMMUIdx_S1SE0 && s->secure_routed_to_el3)
- ? 3 : MAX(1, s->current_el);
-}
-
-static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
-{
- /* We don't need to save all of the syndrome so we mask and shift
- * out unneeded bits to help the sleb128 encoder do a better job.
- */
- syn &= ARM_INSN_START_WORD2_MASK;
- syn >>= ARM_INSN_START_WORD2_SHIFT;
-
- /* We check and clear insn_start_idx to catch multiple updates. */
- assert(s->insn_start != NULL);
- tcg_set_insn_start_param(s->insn_start, 2, syn);
- s->insn_start = NULL;
-}
-
-/* is_jmp field values */
-#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
-#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */
-/* These instructions trap after executing, so the A32/T32 decoder must
- * defer them until after the conditional execution state has been updated.
- * WFI also needs special handling when single-stepping.
- */
-#define DISAS_WFI DISAS_TARGET_2
-#define DISAS_SWI DISAS_TARGET_3
-/* WFE */
-#define DISAS_WFE DISAS_TARGET_4
-#define DISAS_HVC DISAS_TARGET_5
-#define DISAS_SMC DISAS_TARGET_6
-#define DISAS_YIELD DISAS_TARGET_7
-/* M profile branch which might be an exception return (and so needs
- * custom end-of-TB code)
- */
-#define DISAS_BX_EXCRET DISAS_TARGET_8
-/* For instructions which want an immediate exit to the main loop,
- * as opposed to attempting to use lookup_and_goto_ptr. Unlike
- * DISAS_UPDATE this doesn't write the PC on exiting the translation
- * loop so you need to ensure something (gen_a64_set_pc_im or runtime
- * helper) has done so before we reach return from cpu_tb_exec.
- */
-#define DISAS_EXIT DISAS_TARGET_9
-
-#ifdef TARGET_AARCH64
-void a64_translate_init(void);
-void gen_a64_set_pc_im(uint64_t val);
-void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
- fprintf_function cpu_fprintf, int flags);
-extern const TranslatorOps aarch64_translator_ops;
-#else
-static inline void a64_translate_init(void)
-{
-}
-
-static inline void gen_a64_set_pc_im(uint64_t val)
-{
-}
-
-static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
- fprintf_function cpu_fprintf,
- int flags)
-{
-}
-#endif
-
-void arm_test_cc(DisasCompare *cmp, int cc);
-void arm_free_cc(DisasCompare *cmp);
-void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
-void arm_gen_test_cc(int cc, TCGLabel *label);
-
-/* Return state of Alternate Half-precision flag, caller frees result */
-static inline TCGv_i32 get_ahp_flag(void)
-{
- TCGv_i32 ret = tcg_temp_new_i32();
-
- tcg_gen_ld_i32(ret, cpu_env,
- offsetof(CPUARMState, vfp.xregs[ARM_VFP_FPSCR]));
- tcg_gen_extract_i32(ret, ret, 26, 1);
-
- return ret;
-}
-
-#endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
deleted file mode 100644
index 37f338732e..0000000000
--- a/target/arm/vec_helper.c
+++ /dev/null
@@ -1,768 +0,0 @@
-/*
- * ARM AdvSIMD / SVE Vector Operations
- *
- * Copyright (c) 2018 Linaro
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "tcg/tcg-gvec-desc.h"
-#include "fpu/softfloat.h"
-
-
-/* Note that vector data is stored in host-endian 64-bit chunks,
- so addressing units smaller than that needs a host-endian fixup. */
-#ifdef HOST_WORDS_BIGENDIAN
-#define H1(x) ((x) ^ 7)
-#define H2(x) ((x) ^ 3)
-#define H4(x) ((x) ^ 1)
-#else
-#define H1(x) (x)
-#define H2(x) (x)
-#define H4(x) (x)
-#endif
-
-#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
-
-static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
-{
- uint64_t *d = vd + opr_sz;
- uintptr_t i;
-
- for (i = opr_sz; i < max_sz; i += 8) {
- *d++ = 0;
- }
-}
-
-/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
-static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
- int16_t src2, int16_t src3)
-{
- /* Simplify:
- * = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
- * = ((a3 << 15) + (e1 * e2) + (1 << 14)) >> 15
- */
- int32_t ret = (int32_t)src1 * src2;
- ret = ((int32_t)src3 << 15) + ret + (1 << 14);
- ret >>= 15;
- if (ret != (int16_t)ret) {
- SET_QC();
- ret = (ret < 0 ? -0x8000 : 0x7fff);
- }
- return ret;
-}
-
-uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
- uint32_t src2, uint32_t src3)
-{
- uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3);
- uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
- return deposit32(e1, 16, 16, e2);
-}
-
-void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
- void *ve, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- int16_t *d = vd;
- int16_t *n = vn;
- int16_t *m = vm;
- CPUARMState *env = ve;
- uintptr_t i;
-
- for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
-static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
- int16_t src2, int16_t src3)
-{
- /* Similarly, using subtraction:
- * = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
- * = ((a3 << 15) - (e1 * e2) + (1 << 14)) >> 15
- */
- int32_t ret = (int32_t)src1 * src2;
- ret = ((int32_t)src3 << 15) - ret + (1 << 14);
- ret >>= 15;
- if (ret != (int16_t)ret) {
- SET_QC();
- ret = (ret < 0 ? -0x8000 : 0x7fff);
- }
- return ret;
-}
-
-uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
- uint32_t src2, uint32_t src3)
-{
- uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3);
- uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
- return deposit32(e1, 16, 16, e2);
-}
-
-void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
- void *ve, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- int16_t *d = vd;
- int16_t *n = vn;
- int16_t *m = vm;
- CPUARMState *env = ve;
- uintptr_t i;
-
- for (i = 0; i < opr_sz / 2; ++i) {
- d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
-uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
- int32_t src2, int32_t src3)
-{
- /* Simplify similarly to int_qrdmlah_s16 above. */
- int64_t ret = (int64_t)src1 * src2;
- ret = ((int64_t)src3 << 31) + ret + (1 << 30);
- ret >>= 31;
- if (ret != (int32_t)ret) {
- SET_QC();
- ret = (ret < 0 ? INT32_MIN : INT32_MAX);
- }
- return ret;
-}
-
-void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
- void *ve, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- int32_t *d = vd;
- int32_t *n = vn;
- int32_t *m = vm;
- CPUARMState *env = ve;
- uintptr_t i;
-
- for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
-uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
- int32_t src2, int32_t src3)
-{
- /* Simplify similarly to int_qrdmlsh_s16 above. */
- int64_t ret = (int64_t)src1 * src2;
- ret = ((int64_t)src3 << 31) - ret + (1 << 30);
- ret >>= 31;
- if (ret != (int32_t)ret) {
- SET_QC();
- ret = (ret < 0 ? INT32_MIN : INT32_MAX);
- }
- return ret;
-}
-
-void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
- void *ve, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- int32_t *d = vd;
- int32_t *n = vn;
- int32_t *m = vm;
- CPUARMState *env = ve;
- uintptr_t i;
-
- for (i = 0; i < opr_sz / 4; ++i) {
- d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-/* Integer 8 and 16-bit dot-product.
- *
- * Note that for the loops herein, host endianness does not matter
- * with respect to the ordering of data within the 64-bit lanes.
- * All elements are treated equally, no matter where they are.
- */
-
-void HELPER(gvec_sdot_b)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc);
- uint32_t *d = vd;
- int8_t *n = vn, *m = vm;
-
- for (i = 0; i < opr_sz / 4; ++i) {
- d[i] += n[i * 4 + 0] * m[i * 4 + 0]
- + n[i * 4 + 1] * m[i * 4 + 1]
- + n[i * 4 + 2] * m[i * 4 + 2]
- + n[i * 4 + 3] * m[i * 4 + 3];
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_udot_b)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc);
- uint32_t *d = vd;
- uint8_t *n = vn, *m = vm;
-
- for (i = 0; i < opr_sz / 4; ++i) {
- d[i] += n[i * 4 + 0] * m[i * 4 + 0]
- + n[i * 4 + 1] * m[i * 4 + 1]
- + n[i * 4 + 2] * m[i * 4 + 2]
- + n[i * 4 + 3] * m[i * 4 + 3];
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_sdot_h)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc);
- uint64_t *d = vd;
- int16_t *n = vn, *m = vm;
-
- for (i = 0; i < opr_sz / 8; ++i) {
- d[i] += (int64_t)n[i * 4 + 0] * m[i * 4 + 0]
- + (int64_t)n[i * 4 + 1] * m[i * 4 + 1]
- + (int64_t)n[i * 4 + 2] * m[i * 4 + 2]
- + (int64_t)n[i * 4 + 3] * m[i * 4 + 3];
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_udot_h)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc);
- uint64_t *d = vd;
- uint16_t *n = vn, *m = vm;
-
- for (i = 0; i < opr_sz / 8; ++i) {
- d[i] += (uint64_t)n[i * 4 + 0] * m[i * 4 + 0]
- + (uint64_t)n[i * 4 + 1] * m[i * 4 + 1]
- + (uint64_t)n[i * 4 + 2] * m[i * 4 + 2]
- + (uint64_t)n[i * 4 + 3] * m[i * 4 + 3];
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_sdot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, segend, opr_sz = simd_oprsz(desc), opr_sz_4 = opr_sz / 4;
- intptr_t index = simd_data(desc);
- uint32_t *d = vd;
- int8_t *n = vn;
- int8_t *m_indexed = (int8_t *)vm + index * 4;
-
- /* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
- * Otherwise opr_sz is a multiple of 16.
- */
- segend = MIN(4, opr_sz_4);
- i = 0;
- do {
- int8_t m0 = m_indexed[i * 4 + 0];
- int8_t m1 = m_indexed[i * 4 + 1];
- int8_t m2 = m_indexed[i * 4 + 2];
- int8_t m3 = m_indexed[i * 4 + 3];
-
- do {
- d[i] += n[i * 4 + 0] * m0
- + n[i * 4 + 1] * m1
- + n[i * 4 + 2] * m2
- + n[i * 4 + 3] * m3;
- } while (++i < segend);
- segend = i + 4;
- } while (i < opr_sz_4);
-
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_udot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, segend, opr_sz = simd_oprsz(desc), opr_sz_4 = opr_sz / 4;
- intptr_t index = simd_data(desc);
- uint32_t *d = vd;
- uint8_t *n = vn;
- uint8_t *m_indexed = (uint8_t *)vm + index * 4;
-
- /* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
- * Otherwise opr_sz is a multiple of 16.
- */
- segend = MIN(4, opr_sz_4);
- i = 0;
- do {
- uint8_t m0 = m_indexed[i * 4 + 0];
- uint8_t m1 = m_indexed[i * 4 + 1];
- uint8_t m2 = m_indexed[i * 4 + 2];
- uint8_t m3 = m_indexed[i * 4 + 3];
-
- do {
- d[i] += n[i * 4 + 0] * m0
- + n[i * 4 + 1] * m1
- + n[i * 4 + 2] * m2
- + n[i * 4 + 3] * m3;
- } while (++i < segend);
- segend = i + 4;
- } while (i < opr_sz_4);
-
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_sdot_idx_h)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc), opr_sz_8 = opr_sz / 8;
- intptr_t index = simd_data(desc);
- uint64_t *d = vd;
- int16_t *n = vn;
- int16_t *m_indexed = (int16_t *)vm + index * 4;
-
- /* This is supported by SVE only, so opr_sz is always a multiple of 16.
- * Process the entire segment all at once, writing back the results
- * only after we've consumed all of the inputs.
- */
- for (i = 0; i < opr_sz_8 ; i += 2) {
- uint64_t d0, d1;
-
- d0 = n[i * 4 + 0] * (int64_t)m_indexed[i * 4 + 0];
- d0 += n[i * 4 + 1] * (int64_t)m_indexed[i * 4 + 1];
- d0 += n[i * 4 + 2] * (int64_t)m_indexed[i * 4 + 2];
- d0 += n[i * 4 + 3] * (int64_t)m_indexed[i * 4 + 3];
- d1 = n[i * 4 + 4] * (int64_t)m_indexed[i * 4 + 0];
- d1 += n[i * 4 + 5] * (int64_t)m_indexed[i * 4 + 1];
- d1 += n[i * 4 + 6] * (int64_t)m_indexed[i * 4 + 2];
- d1 += n[i * 4 + 7] * (int64_t)m_indexed[i * 4 + 3];
-
- d[i + 0] += d0;
- d[i + 1] += d1;
- }
-
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_udot_idx_h)(void *vd, void *vn, void *vm, uint32_t desc)
-{
- intptr_t i, opr_sz = simd_oprsz(desc), opr_sz_8 = opr_sz / 8;
- intptr_t index = simd_data(desc);
- uint64_t *d = vd;
- uint16_t *n = vn;
- uint16_t *m_indexed = (uint16_t *)vm + index * 4;
-
- /* This is supported by SVE only, so opr_sz is always a multiple of 16.
- * Process the entire segment all at once, writing back the results
- * only after we've consumed all of the inputs.
- */
- for (i = 0; i < opr_sz_8 ; i += 2) {
- uint64_t d0, d1;
-
- d0 = n[i * 4 + 0] * (uint64_t)m_indexed[i * 4 + 0];
- d0 += n[i * 4 + 1] * (uint64_t)m_indexed[i * 4 + 1];
- d0 += n[i * 4 + 2] * (uint64_t)m_indexed[i * 4 + 2];
- d0 += n[i * 4 + 3] * (uint64_t)m_indexed[i * 4 + 3];
- d1 = n[i * 4 + 4] * (uint64_t)m_indexed[i * 4 + 0];
- d1 += n[i * 4 + 5] * (uint64_t)m_indexed[i * 4 + 1];
- d1 += n[i * 4 + 6] * (uint64_t)m_indexed[i * 4 + 2];
- d1 += n[i * 4 + 7] * (uint64_t)m_indexed[i * 4 + 3];
-
- d[i + 0] += d0;
- d[i + 1] += d1;
- }
-
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- float16 *d = vd;
- float16 *n = vn;
- float16 *m = vm;
- float_status *fpst = vfpst;
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = neg_real ^ 1;
- uintptr_t i;
-
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
-
- for (i = 0; i < opr_sz / 2; i += 2) {
- float16 e0 = n[H2(i)];
- float16 e1 = m[H2(i + 1)] ^ neg_imag;
- float16 e2 = n[H2(i + 1)];
- float16 e3 = m[H2(i)] ^ neg_real;
-
- d[H2(i)] = float16_add(e0, e1, fpst);
- d[H2(i + 1)] = float16_add(e2, e3, fpst);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- float32 *d = vd;
- float32 *n = vn;
- float32 *m = vm;
- float_status *fpst = vfpst;
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = neg_real ^ 1;
- uintptr_t i;
-
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
-
- for (i = 0; i < opr_sz / 4; i += 2) {
- float32 e0 = n[H4(i)];
- float32 e1 = m[H4(i + 1)] ^ neg_imag;
- float32 e2 = n[H4(i + 1)];
- float32 e3 = m[H4(i)] ^ neg_real;
-
- d[H4(i)] = float32_add(e0, e1, fpst);
- d[H4(i + 1)] = float32_add(e2, e3, fpst);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- float64 *d = vd;
- float64 *n = vn;
- float64 *m = vm;
- float_status *fpst = vfpst;
- uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1);
- uint64_t neg_imag = neg_real ^ 1;
- uintptr_t i;
-
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 63;
- neg_imag <<= 63;
-
- for (i = 0; i < opr_sz / 8; i += 2) {
- float64 e0 = n[i];
- float64 e1 = m[i + 1] ^ neg_imag;
- float64 e2 = n[i + 1];
- float64 e3 = m[i] ^ neg_real;
-
- d[i] = float64_add(e0, e1, fpst);
- d[i + 1] = float64_add(e2, e3, fpst);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- float16 *d = vd;
- float16 *n = vn;
- float16 *m = vm;
- float_status *fpst = vfpst;
- intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint32_t neg_real = flip ^ neg_imag;
- uintptr_t i;
-
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
-
- for (i = 0; i < opr_sz / 2; i += 2) {
- float16 e2 = n[H2(i + flip)];
- float16 e1 = m[H2(i + flip)] ^ neg_real;
- float16 e4 = e2;
- float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
-
- d[H2(i)] = float16_muladd(e2, e1, d[H2(i)], 0, fpst);
- d[H2(i + 1)] = float16_muladd(e4, e3, d[H2(i + 1)], 0, fpst);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- float16 *d = vd;
- float16 *n = vn;
- float16 *m = vm;
- float_status *fpst = vfpst;
- intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
- uint32_t neg_real = flip ^ neg_imag;
- intptr_t elements = opr_sz / sizeof(float16);
- intptr_t eltspersegment = 16 / sizeof(float16);
- intptr_t i, j;
-
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
-
- for (i = 0; i < elements; i += eltspersegment) {
- float16 mr = m[H2(i + 2 * index + 0)];
- float16 mi = m[H2(i + 2 * index + 1)];
- float16 e1 = neg_real ^ (flip ? mi : mr);
- float16 e3 = neg_imag ^ (flip ? mr : mi);
-
- for (j = i; j < i + eltspersegment; j += 2) {
- float16 e2 = n[H2(j + flip)];
- float16 e4 = e2;
-
- d[H2(j)] = float16_muladd(e2, e1, d[H2(j)], 0, fpst);
- d[H2(j + 1)] = float16_muladd(e4, e3, d[H2(j + 1)], 0, fpst);
- }
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- float32 *d = vd;
- float32 *n = vn;
- float32 *m = vm;
- float_status *fpst = vfpst;
- intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint32_t neg_real = flip ^ neg_imag;
- uintptr_t i;
-
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
-
- for (i = 0; i < opr_sz / 4; i += 2) {
- float32 e2 = n[H4(i + flip)];
- float32 e1 = m[H4(i + flip)] ^ neg_real;
- float32 e4 = e2;
- float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
-
- d[H4(i)] = float32_muladd(e2, e1, d[H4(i)], 0, fpst);
- d[H4(i + 1)] = float32_muladd(e4, e3, d[H4(i + 1)], 0, fpst);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- float32 *d = vd;
- float32 *n = vn;
- float32 *m = vm;
- float_status *fpst = vfpst;
- intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
- uint32_t neg_real = flip ^ neg_imag;
- intptr_t elements = opr_sz / sizeof(float32);
- intptr_t eltspersegment = 16 / sizeof(float32);
- intptr_t i, j;
-
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
-
- for (i = 0; i < elements; i += eltspersegment) {
- float32 mr = m[H4(i + 2 * index + 0)];
- float32 mi = m[H4(i + 2 * index + 1)];
- float32 e1 = neg_real ^ (flip ? mi : mr);
- float32 e3 = neg_imag ^ (flip ? mr : mi);
-
- for (j = i; j < i + eltspersegment; j += 2) {
- float32 e2 = n[H4(j + flip)];
- float32 e4 = e2;
-
- d[H4(j)] = float32_muladd(e2, e1, d[H4(j)], 0, fpst);
- d[H4(j + 1)] = float32_muladd(e4, e3, d[H4(j + 1)], 0, fpst);
- }
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm,
- void *vfpst, uint32_t desc)
-{
- uintptr_t opr_sz = simd_oprsz(desc);
- float64 *d = vd;
- float64 *n = vn;
- float64 *m = vm;
- float_status *fpst = vfpst;
- intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint64_t neg_real = flip ^ neg_imag;
- uintptr_t i;
-
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 63;
- neg_imag <<= 63;
-
- for (i = 0; i < opr_sz / 8; i += 2) {
- float64 e2 = n[i + flip];
- float64 e1 = m[i + flip] ^ neg_real;
- float64 e4 = e2;
- float64 e3 = m[i + 1 - flip] ^ neg_imag;
-
- d[i] = float64_muladd(e2, e1, d[i], 0, fpst);
- d[i + 1] = float64_muladd(e4, e3, d[i + 1], 0, fpst);
- }
- clear_tail(d, opr_sz, simd_maxsz(desc));
-}
-
-#define DO_2OP(NAME, FUNC, TYPE) \
-void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- TYPE *d = vd, *n = vn; \
- for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
- d[i] = FUNC(n[i], stat); \
- } \
-}
-
-DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
-DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32)
-DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64)
-
-DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
-DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
-DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
-
-#undef DO_2OP
-
-/* Floating-point trigonometric starting value.
- * See the ARM ARM pseudocode function FPTrigSMul.
- */
-static float16 float16_ftsmul(float16 op1, uint16_t op2, float_status *stat)
-{
- float16 result = float16_mul(op1, op1, stat);
- if (!float16_is_any_nan(result)) {
- result = float16_set_sign(result, op2 & 1);
- }
- return result;
-}
-
-static float32 float32_ftsmul(float32 op1, uint32_t op2, float_status *stat)
-{
- float32 result = float32_mul(op1, op1, stat);
- if (!float32_is_any_nan(result)) {
- result = float32_set_sign(result, op2 & 1);
- }
- return result;
-}
-
-static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
-{
- float64 result = float64_mul(op1, op1, stat);
- if (!float64_is_any_nan(result)) {
- result = float64_set_sign(result, op2 & 1);
- }
- return result;
-}
-
-#define DO_3OP(NAME, FUNC, TYPE) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
-{ \
- intptr_t i, oprsz = simd_oprsz(desc); \
- TYPE *d = vd, *n = vn, *m = vm; \
- for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
- d[i] = FUNC(n[i], m[i], stat); \
- } \
-}
-
-DO_3OP(gvec_fadd_h, float16_add, float16)
-DO_3OP(gvec_fadd_s, float32_add, float32)
-DO_3OP(gvec_fadd_d, float64_add, float64)
-
-DO_3OP(gvec_fsub_h, float16_sub, float16)
-DO_3OP(gvec_fsub_s, float32_sub, float32)
-DO_3OP(gvec_fsub_d, float64_sub, float64)
-
-DO_3OP(gvec_fmul_h, float16_mul, float16)
-DO_3OP(gvec_fmul_s, float32_mul, float32)
-DO_3OP(gvec_fmul_d, float64_mul, float64)
-
-DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
-DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
-DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
-
-#ifdef TARGET_AARCH64
-
-DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
-DO_3OP(gvec_recps_s, helper_recpsf_f32, float32)
-DO_3OP(gvec_recps_d, helper_recpsf_f64, float64)
-
-DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
-DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
-DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
-
-#endif
-#undef DO_3OP
-
-/* For the indexed ops, SVE applies the index per 128-bit vector segment.
- * For AdvSIMD, there is of course only one such vector segment.
- */
-
-#define DO_MUL_IDX(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
-{ \
- intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
- intptr_t idx = simd_data(desc); \
- TYPE *d = vd, *n = vn, *m = vm; \
- for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
- TYPE mm = m[H(i + idx)]; \
- for (j = 0; j < segment; j++) { \
- d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
- } \
- } \
-}
-
-DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
-DO_MUL_IDX(gvec_fmul_idx_s, float32, H4)
-DO_MUL_IDX(gvec_fmul_idx_d, float64, )
-
-#undef DO_MUL_IDX
-
-#define DO_FMLA_IDX(NAME, TYPE, H) \
-void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
- void *stat, uint32_t desc) \
-{ \
- intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
- TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
- intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
- TYPE *d = vd, *n = vn, *m = vm, *a = va; \
- op1_neg <<= (8 * sizeof(TYPE) - 1); \
- for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
- TYPE mm = m[H(i + idx)]; \
- for (j = 0; j < segment; j++) { \
- d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
- mm, a[i + j], 0, stat); \
- } \
- } \
-}
-
-DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
-DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
-DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
-
-#undef DO_FMLA_IDX
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
new file mode 100644
index 0000000000..3e5e37abbe
--- /dev/null
+++ b/target/arm/vfp_helper.c
@@ -0,0 +1,1283 @@
+/*
+ * ARM VFP floating-point operations
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "internals.h"
+#include "cpu-features.h"
+#ifdef CONFIG_TCG
+#include "qemu/log.h"
+#include "fpu/softfloat.h"
+#endif
+
+/* VFP support. We follow the convention used for VFP instructions:
+ Single precision routines have a "s" suffix, double precision a
+ "d" suffix. */
+
+#ifdef CONFIG_TCG
+
+/* Convert host exception flags to vfp form. */
+static inline int vfp_exceptbits_from_host(int host_bits)
+{
+ int target_bits = 0;
+
+ if (host_bits & float_flag_invalid) {
+ target_bits |= 1;
+ }
+ if (host_bits & float_flag_divbyzero) {
+ target_bits |= 2;
+ }
+ if (host_bits & float_flag_overflow) {
+ target_bits |= 4;
+ }
+ if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
+ target_bits |= 8;
+ }
+ if (host_bits & float_flag_inexact) {
+ target_bits |= 0x10;
+ }
+ if (host_bits & float_flag_input_denormal) {
+ target_bits |= 0x80;
+ }
+ return target_bits;
+}
+
+/* Convert vfp exception flags to target form. */
+static inline int vfp_exceptbits_to_host(int target_bits)
+{
+ int host_bits = 0;
+
+ if (target_bits & 1) {
+ host_bits |= float_flag_invalid;
+ }
+ if (target_bits & 2) {
+ host_bits |= float_flag_divbyzero;
+ }
+ if (target_bits & 4) {
+ host_bits |= float_flag_overflow;
+ }
+ if (target_bits & 8) {
+ host_bits |= float_flag_underflow;
+ }
+ if (target_bits & 0x10) {
+ host_bits |= float_flag_inexact;
+ }
+ if (target_bits & 0x80) {
+ host_bits |= float_flag_input_denormal;
+ }
+ return host_bits;
+}
+
+static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
+{
+ uint32_t i;
+
+ i = get_float_exception_flags(&env->vfp.fp_status);
+ i |= get_float_exception_flags(&env->vfp.standard_fp_status);
+ /* FZ16 does not generate an input denormal exception. */
+ i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
+ & ~float_flag_input_denormal);
+ i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
+ & ~float_flag_input_denormal);
+ return vfp_exceptbits_from_host(i);
+}
+
+static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
+{
+ int i;
+ uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
+
+ changed ^= val;
+ if (changed & (3 << 22)) {
+ i = (val >> 22) & 3;
+ switch (i) {
+ case FPROUNDING_TIEEVEN:
+ i = float_round_nearest_even;
+ break;
+ case FPROUNDING_POSINF:
+ i = float_round_up;
+ break;
+ case FPROUNDING_NEGINF:
+ i = float_round_down;
+ break;
+ case FPROUNDING_ZERO:
+ i = float_round_to_zero;
+ break;
+ }
+ set_float_rounding_mode(i, &env->vfp.fp_status);
+ set_float_rounding_mode(i, &env->vfp.fp_status_f16);
+ }
+ if (changed & FPCR_FZ16) {
+ bool ftz_enabled = val & FPCR_FZ16;
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
+ set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
+ }
+ if (changed & FPCR_FZ) {
+ bool ftz_enabled = val & FPCR_FZ;
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
+ }
+ if (changed & FPCR_DN) {
+ bool dnan_enabled = val & FPCR_DN;
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
+ }
+
+ /*
+ * The exception flags are ORed together when we read fpscr so we
+ * only need to preserve the current state in one of our
+ * float_status values.
+ */
+ i = vfp_exceptbits_to_host(val);
+ set_float_exception_flags(i, &env->vfp.fp_status);
+ set_float_exception_flags(0, &env->vfp.fp_status_f16);
+ set_float_exception_flags(0, &env->vfp.standard_fp_status);
+ set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
+}
+
+#else
+
+static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
+{
+ return 0;
+}
+
+static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
+{
+}
+
+#endif
+
+uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+{
+ uint32_t i, fpscr;
+
+ fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
+ | (env->vfp.vec_len << 16)
+ | (env->vfp.vec_stride << 20);
+
+ /*
+ * M-profile LTPSIZE overlaps A-profile Stride; whichever of the
+ * two is not applicable to this CPU will always be zero.
+ */
+ fpscr |= env->v7m.ltpsize << 16;
+
+ fpscr |= vfp_get_fpscr_from_host(env);
+
+ i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
+ fpscr |= i ? FPCR_QC : 0;
+
+ return fpscr;
+}
+
+uint32_t vfp_get_fpscr(CPUARMState *env)
+{
+ return HELPER(vfp_get_fpscr)(env);
+}
+
+void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
+{
+ ARMCPU *cpu = env_archcpu(env);
+
+ /* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
+ if (!cpu_isar_feature(any_fp16, cpu)) {
+ val &= ~FPCR_FZ16;
+ }
+
+ vfp_set_fpscr_to_host(env, val);
+
+ if (!arm_feature(env, ARM_FEATURE_M)) {
+ /*
+ * Short-vector length and stride; on M-profile these bits
+ * are used for different purposes.
+ * We can't make this conditional be "if MVFR0.FPShVec != 0",
+ * because in v7A no-short-vector-support cores still had to
+ * allow Stride/Len to be written with the only effect that
+ * some insns are required to UNDEF if the guest sets them.
+ */
+ env->vfp.vec_len = extract32(val, 16, 3);
+ env->vfp.vec_stride = extract32(val, 20, 2);
+ } else if (cpu_isar_feature(aa32_mve, cpu)) {
+ env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
+ FPCR_LTPSIZE_LENGTH);
+ }
+
+ if (arm_feature(env, ARM_FEATURE_NEON) ||
+ cpu_isar_feature(aa32_mve, cpu)) {
+ /*
+ * The bit we set within fpscr_q is arbitrary; the register as a
+ * whole being zero/non-zero is what counts.
+ * TODO: M-profile MVE also has a QC bit.
+ */
+ env->vfp.qc[0] = val & FPCR_QC;
+ env->vfp.qc[1] = 0;
+ env->vfp.qc[2] = 0;
+ env->vfp.qc[3] = 0;
+ }
+
+ /*
+ * We don't implement trapped exception handling, so the
+ * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
+ *
+ * The exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in
+ * fp_status; QC, Len and Stride are stored separately earlier.
+ * Clear out all of those and the RES0 bits: only NZCV, AHP, DN,
+ * FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR].
+ */
+ env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
+}
+
+void vfp_set_fpscr(CPUARMState *env, uint32_t val)
+{
+ HELPER(vfp_set_fpscr)(env, val);
+}
+
+#ifdef CONFIG_TCG
+
+#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
+
+#define VFP_BINOP(name) \
+dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return float16_ ## name(a, b, fpst); \
+} \
+float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return float32_ ## name(a, b, fpst); \
+} \
+float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return float64_ ## name(a, b, fpst); \
+}
+VFP_BINOP(add)
+VFP_BINOP(sub)
+VFP_BINOP(mul)
+VFP_BINOP(div)
+VFP_BINOP(min)
+VFP_BINOP(max)
+VFP_BINOP(minnum)
+VFP_BINOP(maxnum)
+#undef VFP_BINOP
+
+dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
+{
+ return float16_chs(a);
+}
+
+float32 VFP_HELPER(neg, s)(float32 a)
+{
+ return float32_chs(a);
+}
+
+float64 VFP_HELPER(neg, d)(float64 a)
+{
+ return float64_chs(a);
+}
+
+dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a)
+{
+ return float16_abs(a);
+}
+
+float32 VFP_HELPER(abs, s)(float32 a)
+{
+ return float32_abs(a);
+}
+
+float64 VFP_HELPER(abs, d)(float64 a)
+{
+ return float64_abs(a);
+}
+
+dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
+{
+ return float16_sqrt(a, &env->vfp.fp_status_f16);
+}
+
+float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
+{
+ return float32_sqrt(a, &env->vfp.fp_status);
+}
+
+float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
+{
+ return float64_sqrt(a, &env->vfp.fp_status);
+}
+
+static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
+{
+ uint32_t flags;
+ switch (cmp) {
+ case float_relation_equal:
+ flags = 0x6;
+ break;
+ case float_relation_less:
+ flags = 0x8;
+ break;
+ case float_relation_greater:
+ flags = 0x2;
+ break;
+ case float_relation_unordered:
+ flags = 0x3;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ env->vfp.xregs[ARM_VFP_FPSCR] =
+ deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
+}
+
+/* XXX: check quiet/signaling case */
+#define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \
+void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
+{ \
+ softfloat_to_vfp_compare(env, \
+ FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
+} \
+void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
+{ \
+ softfloat_to_vfp_compare(env, \
+ FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
+}
+DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16)
+DO_VFP_cmp(s, float32, float32, fp_status)
+DO_VFP_cmp(d, float64, float64, fp_status)
+#undef DO_VFP_cmp
+
+/* Integer to float and float to integer conversions */
+
+#define CONV_ITOF(name, ftype, fsz, sign) \
+ftype HELPER(name)(uint32_t x, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \
+}
+
+#define CONV_FTOI(name, ftype, fsz, sign, round) \
+sign##int32_t HELPER(name)(ftype x, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ if (float##fsz##_is_any_nan(x)) { \
+ float_raise(float_flag_invalid, fpst); \
+ return 0; \
+ } \
+ return float##fsz##_to_##sign##int32##round(x, fpst); \
+}
+
+#define FLOAT_CONVS(name, p, ftype, fsz, sign) \
+ CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \
+ CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \
+ CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
+
+FLOAT_CONVS(si, h, uint32_t, 16, )
+FLOAT_CONVS(si, s, float32, 32, )
+FLOAT_CONVS(si, d, float64, 64, )
+FLOAT_CONVS(ui, h, uint32_t, 16, u)
+FLOAT_CONVS(ui, s, float32, 32, u)
+FLOAT_CONVS(ui, d, float64, 64, u)
+
+#undef CONV_ITOF
+#undef CONV_FTOI
+#undef FLOAT_CONVS
+
+/* floating point conversion */
+float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
+{
+ return float32_to_float64(x, &env->vfp.fp_status);
+}
+
+float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
+{
+ return float64_to_float32(x, &env->vfp.fp_status);
+}
+
+uint32_t HELPER(bfcvt)(float32 x, void *status)
+{
+ return float32_to_bfloat16(x, status);
+}
+
+uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status)
+{
+ bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
+ bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
+ return deposit32(lo, 16, 16, hi);
+}
+
+/*
+ * VFP3 fixed point conversion. The AArch32 versions of fix-to-float
+ * must always round-to-nearest; the AArch64 ones honour the FPSCR
+ * rounding mode. (For AArch32 Neon the standard-FPSCR is set to
+ * round-to-nearest so either helper will work.) AArch32 float-to-fix
+ * must round-to-zero.
+ */
+#define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
+ftype HELPER(vfp_##name##to##p)(uint##isz##_t x, uint32_t shift, \
+ void *fpstp) \
+{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
+
+#define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \
+ ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t x, \
+ uint32_t shift, \
+ void *fpstp) \
+ { \
+ ftype ret; \
+ float_status *fpst = fpstp; \
+ FloatRoundMode oldmode = fpst->float_rounding_mode; \
+ fpst->float_rounding_mode = float_round_nearest_even; \
+ ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); \
+ fpst->float_rounding_mode = oldmode; \
+ return ret; \
+ }
+
+#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \
+uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift, \
+ void *fpst) \
+{ \
+ if (unlikely(float##fsz##_is_any_nan(x))) { \
+ float_raise(float_flag_invalid, fpst); \
+ return 0; \
+ } \
+ return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst); \
+}
+
+#define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
+ float_round_to_zero, _round_to_zero) \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
+ get_float_rounding_mode(fpst), )
+
+#define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype) \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, \
+ get_float_rounding_mode(fpst), )
+
+VFP_CONV_FIX(sh, d, 64, float64, 64, int16)
+VFP_CONV_FIX(sl, d, 64, float64, 64, int32)
+VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64)
+VFP_CONV_FIX(uh, d, 64, float64, 64, uint16)
+VFP_CONV_FIX(ul, d, 64, float64, 64, uint32)
+VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64)
+VFP_CONV_FIX(sh, s, 32, float32, 32, int16)
+VFP_CONV_FIX(sl, s, 32, float32, 32, int32)
+VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64)
+VFP_CONV_FIX(uh, s, 32, float32, 32, uint16)
+VFP_CONV_FIX(ul, s, 32, float32, 32, uint32)
+VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64)
+VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16)
+VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32)
+VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
+VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
+VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
+VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
+
+#undef VFP_CONV_FIX
+#undef VFP_CONV_FIX_FLOAT
+#undef VFP_CONV_FLOAT_FIX_ROUND
+#undef VFP_CONV_FIX_A64
+
+/* Set the current fp rounding mode and return the old one.
+ * The argument is a softfloat float_round_ value.
+ */
+uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
+{
+ float_status *fp_status = fpstp;
+
+ uint32_t prev_rmode = get_float_rounding_mode(fp_status);
+ set_float_rounding_mode(rmode, fp_status);
+
+ return prev_rmode;
+}
+
+/* Half precision conversions. */
+float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
+{
+ /* Squash FZ16 to 0 for the duration of conversion. In this case,
+ * it would affect flushing input denormals.
+ */
+ float_status *fpst = fpstp;
+ bool save = get_flush_inputs_to_zero(fpst);
+ set_flush_inputs_to_zero(false, fpst);
+ float32 r = float16_to_float32(a, !ahp_mode, fpst);
+ set_flush_inputs_to_zero(save, fpst);
+ return r;
+}
+
+uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
+{
+ /* Squash FZ16 to 0 for the duration of conversion. In this case,
+ * it would affect flushing output denormals.
+ */
+ float_status *fpst = fpstp;
+ bool save = get_flush_to_zero(fpst);
+ set_flush_to_zero(false, fpst);
+ float16 r = float32_to_float16(a, !ahp_mode, fpst);
+ set_flush_to_zero(save, fpst);
+ return r;
+}
+
+float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
+{
+ /* Squash FZ16 to 0 for the duration of conversion. In this case,
+ * it would affect flushing input denormals.
+ */
+ float_status *fpst = fpstp;
+ bool save = get_flush_inputs_to_zero(fpst);
+ set_flush_inputs_to_zero(false, fpst);
+ float64 r = float16_to_float64(a, !ahp_mode, fpst);
+ set_flush_inputs_to_zero(save, fpst);
+ return r;
+}
+
+uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
+{
+ /* Squash FZ16 to 0 for the duration of conversion. In this case,
+ * it would affect flushing output denormals.
+ */
+ float_status *fpst = fpstp;
+ bool save = get_flush_to_zero(fpst);
+ set_flush_to_zero(false, fpst);
+ float16 r = float64_to_float16(a, !ahp_mode, fpst);
+ set_flush_to_zero(save, fpst);
+ return r;
+}
+
+/* NEON helpers. */
+
+/* Constants 256 and 512 are used in some helpers; we avoid relying on
+ * int->float conversions at run-time. */
+#define float64_256 make_float64(0x4070000000000000LL)
+#define float64_512 make_float64(0x4080000000000000LL)
+#define float16_maxnorm make_float16(0x7bff)
+#define float32_maxnorm make_float32(0x7f7fffff)
+#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
+
+/* Reciprocal functions
+ *
+ * The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
+ */
+
+/* See RecipEstimate()
+ *
+ * input is a 9 bit fixed point number
+ * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
+ * result range 256 .. 511 for a number from 1.0 to 511/256.
+ */
+
+static int recip_estimate(int input)
+{
+ int a, b, r;
+ assert(256 <= input && input < 512);
+ a = (input * 2) + 1;
+ b = (1 << 19) / a;
+ r = (b + 1) >> 1;
+ assert(256 <= r && r < 512);
+ return r;
+}
+
+/*
+ * Common wrapper to call recip_estimate
+ *
+ * The parameters are exponent and 64 bit fraction (without implicit
+ * bit) where the binary point is nominally at bit 52. Returns a
+ * float64 which can then be rounded to the appropriate size by the
+ * callee.
+ */
+
+static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
+{
+ uint32_t scaled, estimate;
+ uint64_t result_frac;
+ int result_exp;
+
+ /* Handle sub-normals */
+ if (*exp == 0) {
+ if (extract64(frac, 51, 1) == 0) {
+ *exp = -1;
+ frac <<= 2;
+ } else {
+ frac <<= 1;
+ }
+ }
+
+ /* scaled = UInt('1':fraction<51:44>) */
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ estimate = recip_estimate(scaled);
+
+ result_exp = exp_off - *exp;
+ result_frac = deposit64(0, 44, 8, estimate);
+ if (result_exp == 0) {
+ result_frac = deposit64(result_frac >> 1, 51, 1, 1);
+ } else if (result_exp == -1) {
+ result_frac = deposit64(result_frac >> 2, 50, 2, 1);
+ result_exp = 0;
+ }
+
+ *exp = result_exp;
+
+ return result_frac;
+}
+
+static bool round_to_inf(float_status *fpst, bool sign_bit)
+{
+ switch (fpst->float_rounding_mode) {
+ case float_round_nearest_even: /* Round to Nearest */
+ return true;
+ case float_round_up: /* Round to +Inf */
+ return !sign_bit;
+ case float_round_down: /* Round to -Inf */
+ return sign_bit;
+ case float_round_to_zero: /* Round to Zero */
+ return false;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 f16 = float16_squash_input_denormal(input, fpst);
+ uint32_t f16_val = float16_val(f16);
+ uint32_t f16_sign = float16_is_neg(f16);
+ int f16_exp = extract32(f16_val, 10, 5);
+ uint32_t f16_frac = extract32(f16_val, 0, 10);
+ uint64_t f64_frac;
+
+ if (float16_is_any_nan(f16)) {
+ float16 nan = f16;
+ if (float16_is_signaling_nan(f16, fpst)) {
+ float_raise(float_flag_invalid, fpst);
+ if (!fpst->default_nan_mode) {
+ nan = float16_silence_nan(f16, fpst);
+ }
+ }
+ if (fpst->default_nan_mode) {
+ nan = float16_default_nan(fpst);
+ }
+ return nan;
+ } else if (float16_is_infinity(f16)) {
+ return float16_set_sign(float16_zero, float16_is_neg(f16));
+ } else if (float16_is_zero(f16)) {
+ float_raise(float_flag_divbyzero, fpst);
+ return float16_set_sign(float16_infinity, float16_is_neg(f16));
+ } else if (float16_abs(f16) < (1 << 8)) {
+ /* Abs(value) < 2.0^-16 */
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
+ if (round_to_inf(fpst, f16_sign)) {
+ return float16_set_sign(float16_infinity, f16_sign);
+ } else {
+ return float16_set_sign(float16_maxnorm, f16_sign);
+ }
+ } else if (f16_exp >= 29 && fpst->flush_to_zero) {
+ float_raise(float_flag_underflow, fpst);
+ return float16_set_sign(float16_zero, float16_is_neg(f16));
+ }
+
+ f64_frac = call_recip_estimate(&f16_exp, 29,
+ ((uint64_t) f16_frac) << (52 - 10));
+
+ /* result = sign : result_exp<4:0> : fraction<51:42> */
+ f16_val = deposit32(0, 15, 1, f16_sign);
+ f16_val = deposit32(f16_val, 10, 5, f16_exp);
+ f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
+ return make_float16(f16_val);
+}
+
+float32 HELPER(recpe_f32)(float32 input, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float32 f32 = float32_squash_input_denormal(input, fpst);
+ uint32_t f32_val = float32_val(f32);
+ bool f32_sign = float32_is_neg(f32);
+ int f32_exp = extract32(f32_val, 23, 8);
+ uint32_t f32_frac = extract32(f32_val, 0, 23);
+ uint64_t f64_frac;
+
+ if (float32_is_any_nan(f32)) {
+ float32 nan = f32;
+ if (float32_is_signaling_nan(f32, fpst)) {
+ float_raise(float_flag_invalid, fpst);
+ if (!fpst->default_nan_mode) {
+ nan = float32_silence_nan(f32, fpst);
+ }
+ }
+ if (fpst->default_nan_mode) {
+ nan = float32_default_nan(fpst);
+ }
+ return nan;
+ } else if (float32_is_infinity(f32)) {
+ return float32_set_sign(float32_zero, float32_is_neg(f32));
+ } else if (float32_is_zero(f32)) {
+ float_raise(float_flag_divbyzero, fpst);
+ return float32_set_sign(float32_infinity, float32_is_neg(f32));
+ } else if (float32_abs(f32) < (1ULL << 21)) {
+ /* Abs(value) < 2.0^-128 */
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
+ if (round_to_inf(fpst, f32_sign)) {
+ return float32_set_sign(float32_infinity, f32_sign);
+ } else {
+ return float32_set_sign(float32_maxnorm, f32_sign);
+ }
+ } else if (f32_exp >= 253 && fpst->flush_to_zero) {
+ float_raise(float_flag_underflow, fpst);
+ return float32_set_sign(float32_zero, float32_is_neg(f32));
+ }
+
+ f64_frac = call_recip_estimate(&f32_exp, 253,
+ ((uint64_t) f32_frac) << (52 - 23));
+
+ /* result = sign : result_exp<7:0> : fraction<51:29> */
+ f32_val = deposit32(0, 31, 1, f32_sign);
+ f32_val = deposit32(f32_val, 23, 8, f32_exp);
+ f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
+ return make_float32(f32_val);
+}
+
+float64 HELPER(recpe_f64)(float64 input, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float64 f64 = float64_squash_input_denormal(input, fpst);
+ uint64_t f64_val = float64_val(f64);
+ bool f64_sign = float64_is_neg(f64);
+ int f64_exp = extract64(f64_val, 52, 11);
+ uint64_t f64_frac = extract64(f64_val, 0, 52);
+
+ /* Deal with any special cases */
+ if (float64_is_any_nan(f64)) {
+ float64 nan = f64;
+ if (float64_is_signaling_nan(f64, fpst)) {
+ float_raise(float_flag_invalid, fpst);
+ if (!fpst->default_nan_mode) {
+ nan = float64_silence_nan(f64, fpst);
+ }
+ }
+ if (fpst->default_nan_mode) {
+ nan = float64_default_nan(fpst);
+ }
+ return nan;
+ } else if (float64_is_infinity(f64)) {
+ return float64_set_sign(float64_zero, float64_is_neg(f64));
+ } else if (float64_is_zero(f64)) {
+ float_raise(float_flag_divbyzero, fpst);
+ return float64_set_sign(float64_infinity, float64_is_neg(f64));
+ } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
+ /* Abs(value) < 2.0^-1024 */
+ float_raise(float_flag_overflow | float_flag_inexact, fpst);
+ if (round_to_inf(fpst, f64_sign)) {
+ return float64_set_sign(float64_infinity, f64_sign);
+ } else {
+ return float64_set_sign(float64_maxnorm, f64_sign);
+ }
+ } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
+ float_raise(float_flag_underflow, fpst);
+ return float64_set_sign(float64_zero, float64_is_neg(f64));
+ }
+
+ f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
+
+ /* result = sign : result_exp<10:0> : fraction<51:0>; */
+ f64_val = deposit64(0, 63, 1, f64_sign);
+ f64_val = deposit64(f64_val, 52, 11, f64_exp);
+ f64_val = deposit64(f64_val, 0, 52, f64_frac);
+ return make_float64(f64_val);
+}
+
+/* The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM.
+ */
+
+static int do_recip_sqrt_estimate(int a)
+{
+ int b, estimate;
+
+ assert(128 <= a && a < 512);
+ if (a < 256) {
+ a = a * 2 + 1;
+ } else {
+ a = (a >> 1) << 1;
+ a = (a + 1) * 2;
+ }
+ b = 512;
+ while (a * (b + 1) * (b + 1) < (1 << 28)) {
+ b += 1;
+ }
+ estimate = (b + 1) / 2;
+ assert(256 <= estimate && estimate < 512);
+
+ return estimate;
+}
+
+
+static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
+{
+ int estimate;
+ uint32_t scaled;
+
+ if (*exp == 0) {
+ while (extract64(frac, 51, 1) == 0) {
+ frac = frac << 1;
+ *exp -= 1;
+ }
+ frac = extract64(frac, 0, 51) << 1;
+ }
+
+ if (*exp & 1) {
+ /* scaled = UInt('01':fraction<51:45>) */
+ scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
+ } else {
+ /* scaled = UInt('1':fraction<51:44>) */
+ scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
+ }
+ estimate = do_recip_sqrt_estimate(scaled);
+
+ *exp = (exp_off - *exp) / 2;
+ return extract64(estimate, 0, 8) << 44;
+}
+
+uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
+{
+ float_status *s = fpstp;
+ float16 f16 = float16_squash_input_denormal(input, s);
+ uint16_t val = float16_val(f16);
+ bool f16_sign = float16_is_neg(f16);
+ int f16_exp = extract32(val, 10, 5);
+ uint16_t f16_frac = extract32(val, 0, 10);
+ uint64_t f64_frac;
+
+ if (float16_is_any_nan(f16)) {
+ float16 nan = f16;
+ if (float16_is_signaling_nan(f16, s)) {
+ float_raise(float_flag_invalid, s);
+ if (!s->default_nan_mode) {
+ nan = float16_silence_nan(f16, fpstp);
+ }
+ }
+ if (s->default_nan_mode) {
+ nan = float16_default_nan(s);
+ }
+ return nan;
+ } else if (float16_is_zero(f16)) {
+ float_raise(float_flag_divbyzero, s);
+ return float16_set_sign(float16_infinity, f16_sign);
+ } else if (f16_sign) {
+ float_raise(float_flag_invalid, s);
+ return float16_default_nan(s);
+ } else if (float16_is_infinity(f16)) {
+ return float16_zero;
+ }
+
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+ * preserving the parity of the exponent. */
+
+ f64_frac = ((uint64_t) f16_frac) << (52 - 10);
+
+ f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
+
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
+ val = deposit32(0, 15, 1, f16_sign);
+ val = deposit32(val, 10, 5, f16_exp);
+ val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
+ return make_float16(val);
+}
+
+float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
+{
+ float_status *s = fpstp;
+ float32 f32 = float32_squash_input_denormal(input, s);
+ uint32_t val = float32_val(f32);
+ uint32_t f32_sign = float32_is_neg(f32);
+ int f32_exp = extract32(val, 23, 8);
+ uint32_t f32_frac = extract32(val, 0, 23);
+ uint64_t f64_frac;
+
+ if (float32_is_any_nan(f32)) {
+ float32 nan = f32;
+ if (float32_is_signaling_nan(f32, s)) {
+ float_raise(float_flag_invalid, s);
+ if (!s->default_nan_mode) {
+ nan = float32_silence_nan(f32, fpstp);
+ }
+ }
+ if (s->default_nan_mode) {
+ nan = float32_default_nan(s);
+ }
+ return nan;
+ } else if (float32_is_zero(f32)) {
+ float_raise(float_flag_divbyzero, s);
+ return float32_set_sign(float32_infinity, float32_is_neg(f32));
+ } else if (float32_is_neg(f32)) {
+ float_raise(float_flag_invalid, s);
+ return float32_default_nan(s);
+ } else if (float32_is_infinity(f32)) {
+ return float32_zero;
+ }
+
+ /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+ * preserving the parity of the exponent. */
+
+ f64_frac = ((uint64_t) f32_frac) << 29;
+
+ f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
+
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
+ val = deposit32(0, 31, 1, f32_sign);
+ val = deposit32(val, 23, 8, f32_exp);
+ val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
+ return make_float32(val);
+}
+
+float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
+{
+ float_status *s = fpstp;
+ float64 f64 = float64_squash_input_denormal(input, s);
+ uint64_t val = float64_val(f64);
+ bool f64_sign = float64_is_neg(f64);
+ int f64_exp = extract64(val, 52, 11);
+ uint64_t f64_frac = extract64(val, 0, 52);
+
+ if (float64_is_any_nan(f64)) {
+ float64 nan = f64;
+ if (float64_is_signaling_nan(f64, s)) {
+ float_raise(float_flag_invalid, s);
+ if (!s->default_nan_mode) {
+ nan = float64_silence_nan(f64, fpstp);
+ }
+ }
+ if (s->default_nan_mode) {
+ nan = float64_default_nan(s);
+ }
+ return nan;
+ } else if (float64_is_zero(f64)) {
+ float_raise(float_flag_divbyzero, s);
+ return float64_set_sign(float64_infinity, float64_is_neg(f64));
+ } else if (float64_is_neg(f64)) {
+ float_raise(float_flag_invalid, s);
+ return float64_default_nan(s);
+ } else if (float64_is_infinity(f64)) {
+ return float64_zero;
+ }
+
+ f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
+
+ /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
+ val = deposit64(0, 61, 1, f64_sign);
+ val = deposit64(val, 52, 11, f64_exp);
+ val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
+ return make_float64(val);
+}
+
+uint32_t HELPER(recpe_u32)(uint32_t a)
+{
+ int input, estimate;
+
+ if ((a & 0x80000000) == 0) {
+ return 0xffffffff;
+ }
+
+ input = extract32(a, 23, 9);
+ estimate = recip_estimate(input);
+
+ return deposit32(0, (32 - 9), 9, estimate);
+}
+
+uint32_t HELPER(rsqrte_u32)(uint32_t a)
+{
+ int estimate;
+
+ if ((a & 0xc0000000) == 0) {
+ return 0xffffffff;
+ }
+
+ estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
+
+ return deposit32(0, 23, 9, estimate);
+}
+
+/* VFPv4 fused multiply-accumulate */
+dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
+ dh_ctype_f16 c, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return float16_muladd(a, b, c, 0, fpst);
+}
+
+float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return float32_muladd(a, b, c, 0, fpst);
+}
+
+float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return float64_muladd(a, b, c, 0, fpst);
+}
+
+/* ARMv8 round to integral */
+dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
+{
+ return float16_round_to_int(x, fp_status);
+}
+
+float32 HELPER(rints_exact)(float32 x, void *fp_status)
+{
+ return float32_round_to_int(x, fp_status);
+}
+
+float64 HELPER(rintd_exact)(float64 x, void *fp_status)
+{
+ return float64_round_to_int(x, fp_status);
+}
+
+dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float16 ret;
+
+ ret = float16_round_to_int(x, fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
+float32 HELPER(rints)(float32 x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float32 ret;
+
+ ret = float32_round_to_int(x, fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
+float64 HELPER(rintd)(float64 x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float64 ret;
+
+ ret = float64_round_to_int(x, fp_status);
+
+ new_flags = get_float_exception_flags(fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
+/* Convert ARM rounding mode to softfloat */
+const FloatRoundMode arm_rmode_to_sf_map[] = {
+ [FPROUNDING_TIEEVEN] = float_round_nearest_even,
+ [FPROUNDING_POSINF] = float_round_up,
+ [FPROUNDING_NEGINF] = float_round_down,
+ [FPROUNDING_ZERO] = float_round_to_zero,
+ [FPROUNDING_TIEAWAY] = float_round_ties_away,
+ [FPROUNDING_ODD] = float_round_to_odd,
+};
+
+/*
+ * Implement float64 to int32_t conversion without saturation;
+ * the result is supplied modulo 2^32.
+ */
+uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
+{
+ float_status *status = vstatus;
+ uint32_t inexact, frac;
+ uint32_t e_old, e_new;
+
+ e_old = get_float_exception_flags(status);
+ set_float_exception_flags(0, status);
+ frac = float64_to_int32_modulo(value, float_round_to_zero, status);
+ e_new = get_float_exception_flags(status);
+ set_float_exception_flags(e_old | e_new, status);
+
+ if (value == float64_chs(float64_zero)) {
+ /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
+ inexact = 1;
+ } else {
+ /* Normal inexact or overflow or NaN */
+ inexact = e_new & (float_flag_inexact | float_flag_invalid);
+ }
+
+ /* Pack the result and the env->ZF representation of Z together. */
+ return deposit64(frac, 32, 32, inexact);
+}
+
+uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
+{
+ uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
+ uint32_t result = pair;
+ uint32_t z = (pair >> 32) == 0;
+
+ /* Store Z, clear NCV, in FPSCR.NZCV. */
+ env->vfp.xregs[ARM_VFP_FPSCR]
+ = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z);
+
+ return result;
+}
+
+/* Round a float32 to an integer that fits in int32_t or int64_t. */
+static float32 frint_s(float32 f, float_status *fpst, int intsize)
+{
+ int old_flags = get_float_exception_flags(fpst);
+ uint32_t exp = extract32(f, 23, 8);
+
+ if (unlikely(exp == 0xff)) {
+ /* NaN or Inf. */
+ goto overflow;
+ }
+
+ /* Round and re-extract the exponent. */
+ f = float32_round_to_int(f, fpst);
+ exp = extract32(f, 23, 8);
+
+ /* Validate the range of the result. */
+ if (exp < 126 + intsize) {
+ /* abs(F) <= INT{N}_MAX */
+ return f;
+ }
+ if (exp == 126 + intsize) {
+ uint32_t sign = extract32(f, 31, 1);
+ uint32_t frac = extract32(f, 0, 23);
+ if (sign && frac == 0) {
+ /* F == INT{N}_MIN */
+ return f;
+ }
+ }
+
+ overflow:
+ /*
+ * Raise Invalid and return INT{N}_MIN as a float. Revert any
+ * inexact exception float32_round_to_int may have raised.
+ */
+ set_float_exception_flags(old_flags | float_flag_invalid, fpst);
+ return (0x100u + 126u + intsize) << 23;
+}
+
+float32 HELPER(frint32_s)(float32 f, void *fpst)
+{
+ return frint_s(f, fpst, 32);
+}
+
+float32 HELPER(frint64_s)(float32 f, void *fpst)
+{
+ return frint_s(f, fpst, 64);
+}
+
+/* Round a float64 to an integer that fits in int32_t or int64_t. */
+static float64 frint_d(float64 f, float_status *fpst, int intsize)
+{
+ int old_flags = get_float_exception_flags(fpst);
+ uint32_t exp = extract64(f, 52, 11);
+
+ if (unlikely(exp == 0x7ff)) {
+ /* NaN or Inf. */
+ goto overflow;
+ }
+
+ /* Round and re-extract the exponent. */
+ f = float64_round_to_int(f, fpst);
+ exp = extract64(f, 52, 11);
+
+ /* Validate the range of the result. */
+ if (exp < 1022 + intsize) {
+ /* abs(F) <= INT{N}_MAX */
+ return f;
+ }
+ if (exp == 1022 + intsize) {
+ uint64_t sign = extract64(f, 63, 1);
+ uint64_t frac = extract64(f, 0, 52);
+ if (sign && frac == 0) {
+ /* F == INT{N}_MIN */
+ return f;
+ }
+ }
+
+ overflow:
+ /*
+ * Raise Invalid and return INT{N}_MIN as a float. Revert any
+ * inexact exception float64_round_to_int may have raised.
+ */
+ set_float_exception_flags(old_flags | float_flag_invalid, fpst);
+ return (uint64_t)(0x800 + 1022 + intsize) << 52;
+}
+
+float64 HELPER(frint32_d)(float64 f, void *fpst)
+{
+ return frint_d(f, fpst, 32);
+}
+
+float64 HELPER(frint64_d)(float64 f, void *fpst)
+{
+ return frint_d(f, fpst, 64);
+}
+
+void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
+{
+ uint32_t syndrome;
+
+ switch (reg) {
+ case ARM_VFP_MVFR0:
+ case ARM_VFP_MVFR1:
+ case ARM_VFP_MVFR2:
+ if (!(arm_hcr_el2_eff(env) & HCR_TID3)) {
+ return;
+ }
+ break;
+ case ARM_VFP_FPSID:
+ if (!(arm_hcr_el2_eff(env) & HCR_TID0)) {
+ return;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ syndrome = ((EC_FPIDTRAP << ARM_EL_EC_SHIFT)
+ | ARM_EL_IL
+ | (1 << 24) | (0xe << 20) | (7 << 14)
+ | (reg << 10) | (rt << 5) | 1);
+
+ raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
+}
+
+#endif