aboutsummaryrefslogtreecommitdiff
path: root/target/arm/translate-vfp.inc.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-06-11 16:39:49 +0100
committerPeter Maydell <peter.maydell@linaro.org>2019-06-13 15:14:05 +0100
commitd4893b01d23060845ee3855bc96626e16aad9ab5 (patch)
treea92fe3bbd8616fdf0febd1ad375cb0321ad71534 /target/arm/translate-vfp.inc.c
parent519ee7ae31e050eb0ff9ad35c213f0bd7ab1c03e (diff)
target/arm: Convert VFP fused multiply-add insns to decodetree
Convert the VFP fused multiply-add instructions (VFNMA, VFNMS, VFMA, VFMS) to decodetree. Note that in the old decode structure we were implementing these to honour the VFP vector stride/length. These instructions were introduced in VFPv4, and in the v7A architecture they are UNPREDICTABLE if the vector stride or length are non-zero. In v8A they must UNDEF if stride or length are non-zero, like all VFP instructions; we choose to UNDEF always. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/arm/translate-vfp.inc.c')
-rw-r--r--target/arm/translate-vfp.inc.c121
1 files changed, 121 insertions, 0 deletions
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index 6af99605d5..ba6506a378 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -1481,3 +1481,124 @@ static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a)
{
return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
}
+
+static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i32 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only.
+ * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+ * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
+ (s->vec_len != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i32();
+ vm = tcg_temp_new_i32();
+ vd = tcg_temp_new_i32();
+
+ neon_load_reg32(vn, a->vn);
+ neon_load_reg32(vm, a->vm);
+ if (a->o2) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negs(vn, vn);
+ }
+ neon_load_reg32(vd, a->vd);
+ if (a->o1 & 1) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negs(vd, vd);
+ }
+ fpst = get_fpstatus_ptr(0);
+ gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
+ neon_store_reg32(vd, a->vd);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(vn);
+ tcg_temp_free_i32(vm);
+ tcg_temp_free_i32(vd);
+
+ return true;
+}
+
+static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
+{
+ /*
+ * VFNMA : fd = muladd(-fd, fn, fm)
+ * VFNMS : fd = muladd(-fd, -fn, fm)
+ * VFMA : fd = muladd( fd, fn, fm)
+ * VFMS : fd = muladd( fd, -fn, fm)
+ *
+ * These are fused multiply-add, and must be done as one floating
+ * point operation with no rounding between the multiplication and
+ * addition steps. NB that doing the negations here as separate
+ * steps is correct : an input NaN should come out with its sign
+ * bit flipped if it is a negated-input.
+ */
+ TCGv_ptr fpst;
+ TCGv_i64 vn, vm, vd;
+
+ /*
+ * Present in VFPv4 only.
+ * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
+ * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
+ */
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
+ (s->vec_len != 0 || s->vec_stride != 0)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vn | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ vn = tcg_temp_new_i64();
+ vm = tcg_temp_new_i64();
+ vd = tcg_temp_new_i64();
+
+ neon_load_reg64(vn, a->vn);
+ neon_load_reg64(vm, a->vm);
+ if (a->o2) {
+ /* VFNMS, VFMS */
+ gen_helper_vfp_negd(vn, vn);
+ }
+ neon_load_reg64(vd, a->vd);
+ if (a->o1 & 1) {
+ /* VFNMA, VFNMS */
+ gen_helper_vfp_negd(vd, vd);
+ }
+ fpst = get_fpstatus_ptr(0);
+ gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
+ neon_store_reg64(vd, a->vd);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i64(vn);
+ tcg_temp_free_i64(vm);
+ tcg_temp_free_i64(vd);
+
+ return true;
+}