aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-06-07 17:31:12 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-06-07 17:31:39 +0100
commit4cbe50f46a29c9813096be81a6c9660235701e6b (patch)
tree71aa610ddcfae9b17c2e86f71903b71f4b75bc4c
parent32ec6be98b5a2b25eed3f20f3ab173ff8010ae70 (diff)
downloadqemu-arm-mve-drop-1.tar.gz
target/arm: Make VMOV scalar <-> gpreg beatwise for MVEmve-drop-1
In a CPU with MVE, the VMOV (vector lane to general-purpose register) and VMOV (general-purpose register to vector lane) insns are not predicated, but they are subject to beatwise execution if they are not in an IT block. Since our implementation always executes all 4 beats in one tick, this means only that we need to handle PSR.ECI: * we must do the usual check for bad ECI state * we must advance ECI state if the insn succeeds * if ECI says we should not be executing the beat corresponding to the lane of the vector register being accessed then we should skip performing the move Note that if PSR.ECI is non-zero then we cannot be in an IT block. Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--target/arm/translate-a32.h2
-rw-r--r--target/arm/translate-mve.c4
-rw-r--r--target/arm/translate-vfp.c85
3 files changed, 83 insertions, 8 deletions
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
index 0a0053949f..6d384fc796 100644
--- a/target/arm/translate-a32.h
+++ b/target/arm/translate-a32.h
@@ -46,6 +46,8 @@ long neon_full_reg_offset(unsigned reg);
long neon_element_offset(int reg, int element, MemOp memop);
void gen_rev16(TCGv_i32 dest, TCGv_i32 var);
void clear_eci_state(DisasContext *s);
+bool mve_eci_check(DisasContext *s);
+void mve_update_eci(DisasContext *s);
static inline TCGv_i32 load_cpu_offset(int offset)
{
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index 1794c50d0e..b62e355a1a 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -49,7 +49,7 @@ static TCGv_ptr mve_qreg_ptr(unsigned reg)
return ret;
}
-static bool mve_eci_check(DisasContext *s)
+bool mve_eci_check(DisasContext *s)
{
/*
* This is a beatwise insn: check that ECI is valid (not a
@@ -72,7 +72,7 @@ static bool mve_eci_check(DisasContext *s)
}
}
-static void mve_update_eci(DisasContext *s)
+void mve_update_eci(DisasContext *s)
{
/*
* The helper function will always update the CPUState field,
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
index 6a572591ce..b5bb8230cd 100644
--- a/target/arm/translate-vfp.c
+++ b/target/arm/translate-vfp.c
@@ -553,6 +553,48 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
return true;
}
+static bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
+{
+ /*
+ * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
+ * and VMOV (general-purpose register to vector lane) insns are not
+ * predicated, but they are subject to beatwise execution if they are
+ * not in an IT block.
+ *
+ * Since our implementation always executes all 4 beats in one tick,
+ * this means only that if PSR.ECI says we should not be executing
+ * the beat corresponding to the lane of the vector register being
+ * accessed then we should skip performing the move, and that we need
+ * to do the usual check for bad ECI state and advance of ECI state.
+ *
+ * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
+ *
+ * Return true if this VMOV scalar <-> gpreg should be skipped because
+ * the MVE PSR.ECI state says we skip the beat where the store happens.
+ */
+
+ /* Calculate the byte offset into Qn which we're going to access */
+ int ofs = (index << size) + ((vn & 1) * 8);
+
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+
+ switch (s->eci) {
+ case ECI_NONE:
+ return false;
+ case ECI_A0:
+ return ofs < 4;
+ case ECI_A0A1:
+ return ofs < 8;
+ case ECI_A0A1A2:
+ case ECI_A0A1A2B0:
+ return ofs < 12;
+ default:
+ g_assert_not_reached();
+ }
+}
+
static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
{
/* VMOV scalar to general purpose register */
@@ -575,14 +617,30 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
return false;
}
+ if (dc_isar_feature(aa32_mve, s)) {
+ if (!mve_eci_check(s)) {
+ return true;
+ }
+ }
+
if (!vfp_access_check(s)) {
return true;
}
- tmp = tcg_temp_new_i32();
- read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
- store_reg(s, a->rt, tmp);
+ if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, a->vn, a->index,
+ a->size | (a->u ? 0 : MO_SIGN));
+ store_reg(s, a->rt, tmp);
+ }
+
+ if (dc_isar_feature(aa32_mve, s)) {
+ TCGv_i32 eci;
+ mve_update_eci(s);
+ eci = tcg_const_i32(s->eci << 4);
+ store_cpu_field(eci, condexec_bits);
+ }
return true;
}
@@ -608,14 +666,29 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
return false;
}
+ if (dc_isar_feature(aa32_mve, s)) {
+ if (!mve_eci_check(s)) {
+ return true;
+ }
+ }
+
if (!vfp_access_check(s)) {
return true;
}
- tmp = load_reg(s, a->rt);
- write_neon_element32(tmp, a->vn, a->index, a->size);
- tcg_temp_free_i32(tmp);
+ if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
+ tmp = load_reg(s, a->rt);
+ write_neon_element32(tmp, a->vn, a->index, a->size);
+ tcg_temp_free_i32(tmp);
+ }
+ if (dc_isar_feature(aa32_mve, s)) {
+ TCGv_i32 eci;
+
+ mve_update_eci(s);
+ eci = tcg_const_i32(s->eci << 4);
+ store_cpu_field(eci, condexec_bits);
+ }
return true;
}