aboutsummaryrefslogtreecommitdiff
path: root/target-i386/ops_sse.h
diff options
context:
space:
mode:
authorAndre Przywara <andre.przywara@amd.com>2009-09-19 00:30:48 +0200
committerAurelien Jarno <aurelien@aurel32.net>2009-10-04 14:09:41 +0200
commitd9f4bb27dbff2e40ec2e36eb8017c9dedce77f30 (patch)
tree60e4374cd0a5fd95cfadb397f0413468d11a1f14 /target-i386/ops_sse.h
parentccd59d09a9d0c75b86185b89d8246e40b5f01168 (diff)
target-i386: add SSE4a instruction support
This adds support for the AMD Phenom/Barcelona's SSE4a instructions. Those include insertq and extrq, which are doing shift and mask on XMM registers, in two versions (immediate shift/length values and stored in another XMM register). Additionally it implements movntss, movntsd, which are scalar non-temporal stores (avoiding cache trashing). These are implemented as normal stores, though. SSE4a is guarded by the SSE4A CPUID bit (Fn8000_0001:ECX[6]). Signed-off-by: Andre Przywara <andre.przywara@amd.com> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Diffstat (limited to 'target-i386/ops_sse.h')
-rw-r--r--target-i386/ops_sse.h44
1 files changed, 44 insertions, 0 deletions
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index 709732a4a7..3232abd965 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -802,6 +802,50 @@ void helper_rcpss(XMMReg *d, XMMReg *s)
d->XMM_S(0) = approx_rcp(s->XMM_S(0));
}
+static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
+{
+ uint64_t mask;
+
+ if (len == 0) {
+ mask = ~0LL;
+ } else {
+ mask = (1ULL << len) - 1;
+ }
+ return (src >> shift) & mask;
+}
+
+void helper_extrq_r(XMMReg *d, XMMReg *s)
+{
+ d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), s->XMM_B(1), s->XMM_B(0));
+}
+
+void helper_extrq_i(XMMReg *d, int index, int length)
+{
+ d->XMM_Q(0) = helper_extrq(d->XMM_Q(0), index, length);
+}
+
+static inline uint64_t helper_insertq(uint64_t src, int shift, int len)
+{
+ uint64_t mask;
+
+ if (len == 0) {
+ mask = ~0ULL;
+ } else {
+ mask = (1ULL << len) - 1;
+ }
+ return (src & ~(mask << shift)) | ((src & mask) << shift);
+}
+
+void helper_insertq_r(XMMReg *d, XMMReg *s)
+{
+ d->XMM_Q(0) = helper_insertq(s->XMM_Q(0), s->XMM_B(9), s->XMM_B(8));
+}
+
+void helper_insertq_i(XMMReg *d, int index, int length)
+{
+ d->XMM_Q(0) = helper_insertq(d->XMM_Q(0), index, length);
+}
+
void helper_haddps(XMMReg *d, XMMReg *s)
{
XMMReg r;