aboutsummaryrefslogtreecommitdiff
path: root/target/arm
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-06-17 13:16:11 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-06-21 17:12:51 +0100
commitbe3c8299bba11d1b657905110c9dc479f5995cdd (patch)
treedf288ab5659e498184355e60dd4011b9921ad33b /target/arm
parente597afc3e97760c9fc079ece3a1e9ee68342dedd (diff)
target/arm: Implement MVE VQADD and VQSUB
Implement the MVE VQADD and VQSUB insns, which perform saturating addition of a scalar to each element. Note that individual bytes of each result element are used or discarded according to the predicate mask, but FPSCR.QC is only set if the predicate mask for the lowest byte of the element is set. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210617121628.20116-28-peter.maydell@linaro.org
Diffstat (limited to 'target/arm')
-rw-r--r--target/arm/helper-mve.h16
-rw-r--r--target/arm/mve.decode5
-rw-r--r--target/arm/mve_helper.c62
-rw-r--r--target/arm/translate-mve.c4
4 files changed, 87 insertions, 0 deletions
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index 1b807e1cf5..092efdab47 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -173,6 +173,22 @@ DEF_HELPER_FLAGS_4(mve_vhsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vhsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vhsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqadds_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqaddu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubs_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vqsubu_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(mve_vbrsrb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vbrsrh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vbrsrw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index e189e2de64..c85227c675 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -167,6 +167,11 @@ VHADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
VHADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 100 .... @2scalar
VHSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
VHSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 100 .... @2scalar
+
+VQADD_S_scalar 1110 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
+VQADD_U_scalar 1111 1110 0 . .. ... 0 ... 0 1111 . 110 .... @2scalar
+VQSUB_S_scalar 1110 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
+VQSUB_U_scalar 1111 1110 0 . .. ... 0 ... 1 1111 . 110 .... @2scalar
VBRSR 1111 1110 0 . .. ... 1 ... 1 1110 . 110 .... @2scalar
# Predicate operations
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index 558c6f5aa3..1d97bd2ae8 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -490,6 +490,33 @@ DO_2OP_U(vhaddu, do_vhadd_u)
DO_2OP_S(vhsubs, do_vhsub_s)
DO_2OP_U(vhsubu, do_vhsub_u)
+static inline int32_t do_sat_bhw(int64_t val, int64_t min, int64_t max, bool *s)
+{
+ if (val > max) {
+ *s = true;
+ return max;
+ } else if (val < min) {
+ *s = true;
+ return min;
+ }
+ return val;
+}
+
+#define DO_SQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, INT8_MIN, INT8_MAX, s)
+#define DO_SQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, INT16_MIN, INT16_MAX, s)
+#define DO_SQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, INT32_MIN, INT32_MAX, s)
+
+#define DO_UQADD_B(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT8_MAX, s)
+#define DO_UQADD_H(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT16_MAX, s)
+#define DO_UQADD_W(n, m, s) do_sat_bhw((int64_t)n + m, 0, UINT32_MAX, s)
+
+#define DO_SQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, INT8_MIN, INT8_MAX, s)
+#define DO_SQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, INT16_MIN, INT16_MAX, s)
+#define DO_SQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, INT32_MIN, INT32_MAX, s)
+
+#define DO_UQSUB_B(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT8_MAX, s)
+#define DO_UQSUB_H(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT16_MAX, s)
+#define DO_UQSUB_W(n, m, s) do_sat_bhw((int64_t)n - m, 0, UINT32_MAX, s)
#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
@@ -505,6 +532,27 @@ DO_2OP_U(vhsubu, do_vhsub_u)
mve_advance_vpt(env); \
}
+#define DO_2OP_SAT_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ bool qc = false; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ bool sat = false; \
+ mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m, &sat), \
+ mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
/* provide unsigned 2-op scalar helpers for all sizes */
#define DO_2OP_SCALAR_U(OP, FN) \
DO_2OP_SCALAR(OP##b, 1, uint8_t, FN) \
@@ -523,6 +571,20 @@ DO_2OP_SCALAR_U(vhaddu_scalar, do_vhadd_u)
DO_2OP_SCALAR_S(vhsubs_scalar, do_vhsub_s)
DO_2OP_SCALAR_U(vhsubu_scalar, do_vhsub_u)
+DO_2OP_SAT_SCALAR(vqaddu_scalarb, 1, uint8_t, DO_UQADD_B)
+DO_2OP_SAT_SCALAR(vqaddu_scalarh, 2, uint16_t, DO_UQADD_H)
+DO_2OP_SAT_SCALAR(vqaddu_scalarw, 4, uint32_t, DO_UQADD_W)
+DO_2OP_SAT_SCALAR(vqadds_scalarb, 1, int8_t, DO_SQADD_B)
+DO_2OP_SAT_SCALAR(vqadds_scalarh, 2, int16_t, DO_SQADD_H)
+DO_2OP_SAT_SCALAR(vqadds_scalarw, 4, int32_t, DO_SQADD_W)
+
+DO_2OP_SAT_SCALAR(vqsubu_scalarb, 1, uint8_t, DO_UQSUB_B)
+DO_2OP_SAT_SCALAR(vqsubu_scalarh, 2, uint16_t, DO_UQSUB_H)
+DO_2OP_SAT_SCALAR(vqsubu_scalarw, 4, uint32_t, DO_UQSUB_W)
+DO_2OP_SAT_SCALAR(vqsubs_scalarb, 1, int8_t, DO_SQSUB_B)
+DO_2OP_SAT_SCALAR(vqsubs_scalarh, 2, int16_t, DO_SQSUB_H)
+DO_2OP_SAT_SCALAR(vqsubs_scalarw, 4, int32_t, DO_SQSUB_W)
+
static inline uint32_t do_vbrsrb(uint32_t n, uint32_t m)
{
m &= 0xff;
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index 7c4c06e434..27c69d9c7d 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -446,6 +446,10 @@ DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
+DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
+DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
+DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
+DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
DO_2OP_SCALAR(VBRSR, vbrsr)
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,