aboutsummaryrefslogtreecommitdiff
path: root/target-arm/neon_helper.c
diff options
context:
space:
mode:
Diffstat (limited to 'target-arm/neon_helper.c')
-rw-r--r--target-arm/neon_helper.c165
1 files changed, 165 insertions, 0 deletions
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index e23f2241c0..8d6f9a92f2 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -236,6 +236,171 @@ uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
return res;
}
+/* Unsigned saturating accumulate of signed value
+ *
+ * Op1/Rn is treated as signed
+ * Op2/Rd is treated as unsigned
+ *
+ * Explicit casting is used to ensure the correct sign extension of
+ * inputs. The result is treated as a unsigned value and saturated as such.
+ *
+ * We use a macro for the 8/16 bit cases which expects signed integers of va,
+ * vb, and vr for interim calculation and an unsigned 32 bit result value r.
+ */
+
+#define USATACC(bits, shift) \
+ do { \
+ va = sextract32(a, shift, bits); \
+ vb = extract32(b, shift, bits); \
+ vr = va + vb; \
+ if (vr > UINT##bits##_MAX) { \
+ SET_QC(); \
+ vr = UINT##bits##_MAX; \
+ } else if (vr < 0) { \
+ SET_QC(); \
+ vr = 0; \
+ } \
+ r = deposit32(r, shift, bits, vr); \
+ } while (0)
+
+uint32_t HELPER(neon_uqadd_s8)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int16_t va, vb, vr;
+ uint32_t r = 0;
+
+ USATACC(8, 0);
+ USATACC(8, 8);
+ USATACC(8, 16);
+ USATACC(8, 24);
+ return r;
+}
+
+uint32_t HELPER(neon_uqadd_s16)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int32_t va, vb, vr;
+ uint64_t r = 0;
+
+ USATACC(16, 0);
+ USATACC(16, 16);
+ return r;
+}
+
+#undef USATACC
+
+uint32_t HELPER(neon_uqadd_s32)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int64_t va = (int32_t)a;
+ int64_t vb = (uint32_t)b;
+ int64_t vr = va + vb;
+ if (vr > UINT32_MAX) {
+ SET_QC();
+ vr = UINT32_MAX;
+ } else if (vr < 0) {
+ SET_QC();
+ vr = 0;
+ }
+ return vr;
+}
+
+uint64_t HELPER(neon_uqadd_s64)(CPUARMState *env, uint64_t a, uint64_t b)
+{
+ uint64_t res;
+ res = a + b;
+ /* We only need to look at the pattern of SIGN bits to detect
+ * +ve/-ve saturation
+ */
+ if (~a & b & ~res & SIGNBIT64) {
+ SET_QC();
+ res = UINT64_MAX;
+ } else if (a & ~b & res & SIGNBIT64) {
+ SET_QC();
+ res = 0;
+ }
+ return res;
+}
+
+/* Signed saturating accumulate of unsigned value
+ *
+ * Op1/Rn is treated as unsigned
+ * Op2/Rd is treated as signed
+ *
+ * The result is treated as a signed value and saturated as such
+ *
+ * We use a macro for the 8/16 bit cases which expects signed integers of va,
+ * vb, and vr for interim calculation and an unsigned 32 bit result value r.
+ */
+
+#define SSATACC(bits, shift) \
+ do { \
+ va = extract32(a, shift, bits); \
+ vb = sextract32(b, shift, bits); \
+ vr = va + vb; \
+ if (vr > INT##bits##_MAX) { \
+ SET_QC(); \
+ vr = INT##bits##_MAX; \
+ } else if (vr < INT##bits##_MIN) { \
+ SET_QC(); \
+ vr = INT##bits##_MIN; \
+ } \
+ r = deposit32(r, shift, bits, vr); \
+ } while (0)
+
+uint32_t HELPER(neon_sqadd_u8)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int16_t va, vb, vr;
+ uint32_t r = 0;
+
+ SSATACC(8, 0);
+ SSATACC(8, 8);
+ SSATACC(8, 16);
+ SSATACC(8, 24);
+ return r;
+}
+
+uint32_t HELPER(neon_sqadd_u16)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int32_t va, vb, vr;
+ uint32_t r = 0;
+
+ SSATACC(16, 0);
+ SSATACC(16, 16);
+
+ return r;
+}
+
+#undef SSATACC
+
+uint32_t HELPER(neon_sqadd_u32)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int64_t res;
+ int64_t op1 = (uint32_t)a;
+ int64_t op2 = (int32_t)b;
+ res = op1 + op2;
+ if (res > INT32_MAX) {
+ SET_QC();
+ res = INT32_MAX;
+ } else if (res < INT32_MIN) {
+ SET_QC();
+ res = INT32_MIN;
+ }
+ return res;
+}
+
+uint64_t HELPER(neon_sqadd_u64)(CPUARMState *env, uint64_t a, uint64_t b)
+{
+ uint64_t res;
+ res = a + b;
+ /* We only need to look at the pattern of SIGN bits to detect an overflow */
+ if (((a & res)
+ | (~b & res)
+ | (a & ~b)) & SIGNBIT64) {
+ SET_QC();
+ res = INT64_MAX;
+ }
+ return res;
+}
+
+
#define NEON_USAT(dest, src1, src2, type) do { \
uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
if (tmp != (type)tmp) { \