aboutsummaryrefslogtreecommitdiff
path: root/target-alpha/int_helper.c
diff options
context:
space:
mode:
authorRichard Henderson <rth@twiddle.net>2014-08-18 10:19:06 -0700
committerRichard Henderson <rth@twiddle.net>2015-05-21 10:34:18 -0700
commit32ad48abd74a997220b841e4e913edeb267aa362 (patch)
treec286efc97ba68e75848b444c40a249832dfb9c0a /target-alpha/int_helper.c
parent8d8d324e3424bf891d41e9c7758dcc09cf3c38b9 (diff)
target-alpha: Add vector implementation for CMPBGE
While conditionalized on SSE2, it's a "portable" gcc generic vector implementation, which could be enabled on other hosts. Signed-off-by: Richard Henderson <rth@twiddle.net>
Diffstat (limited to 'target-alpha/int_helper.c')
-rw-r--r--target-alpha/int_helper.c37
1 files changed, 37 insertions, 0 deletions
diff --git a/target-alpha/int_helper.c b/target-alpha/int_helper.c
index 74f38cbe7b..29e927f53f 100644
--- a/target-alpha/int_helper.c
+++ b/target-alpha/int_helper.c
@@ -60,6 +60,42 @@ uint64_t helper_zap(uint64_t val, uint64_t mask)
uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
{
+#if defined(__SSE2__)
+ uint64_t r;
+
+ /* The cmpbge instruction is heavily used in the implementation of
+ every string function on Alpha. We can do much better than either
+ the default loop below, or even an unrolled version by using the
+ native vector support. */
+ {
+ typedef uint64_t Q __attribute__((vector_size(16)));
+ typedef uint8_t B __attribute__((vector_size(16)));
+
+ Q q1 = (Q){ op1, 0 };
+ Q q2 = (Q){ op2, 0 };
+
+ q1 = (Q)((B)q1 >= (B)q2);
+
+ r = q1[0];
+ }
+
+ /* Select only one bit from each byte. */
+ r &= 0x0101010101010101;
+
+ /* Collect the bits into the bottom byte. */
+ /* .......A.......B.......C.......D.......E.......F.......G.......H */
+ r |= r >> (8 - 1);
+
+ /* .......A......AB......BC......CD......DE......EF......FG......GH */
+ r |= r >> (16 - 2);
+
+ /* .......A......AB.....ABC....ABCD....BCDE....CDEF....DEFG....EFGH */
+ r |= r >> (32 - 4);
+
+ /* .......A......AB.....ABC....ABCD...ABCDE..ABCDEF.ABCDEFGABCDEFGH */
+ /* Return only the low 8 bits. */
+ return r & 0xff;
+#else
uint8_t opa, opb, res;
int i;
@@ -72,6 +108,7 @@ uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
}
}
return res;
+#endif
}
uint64_t helper_minub8(uint64_t op1, uint64_t op2)