Add encode_iso_array intrinsic

author: Edward Nevill edward.nevill@linaro.org <Edward Nevill edward.nevill@linaro.org> 2014-08-04 18:03:53 +0100
committer: Edward Nevill edward.nevill@linaro.org <Edward Nevill edward.nevill@linaro.org> 2014-08-04 18:03:53 +0100
commit: 3920a67639b349e65f2b55ceb5207280d0c37a49 (patch)
tree: 666e93f15ad2c69eb6988e1136e819d89af82dc7
parent: 5179fe4960c995b995e084ee173d46e06f4899fc (diff)
4 files changed, 164 insertions, 0 deletions
diff --git a/src/cpu/aarch64/vm/aarch64.ad b/src/cpu/aarch64/vm/aarch64.ad
index e1f5a44f4..448098561 100644
--- a/src/cpu/aarch64/vm/aarch64.ad
+++ b/src/cpu/aarch64/vm/aarch64.ad
@@ -381,6 +381,9 @@ reg_class int_r0_reg(R0);
 // Singleton class for R2 int register
 reg_class int_r2_reg(R2);
 
+// Singleton class for R3 int register
+reg_class int_r3_reg(R3);
+
 // Singleton class for R4 int register
 reg_class int_r4_reg(R4);
 
@@ -698,6 +701,26 @@ reg_class double_reg(
     V31, V31_H
 );
 
+// Class for 128 bit register v0
+reg_class v0_reg(
+    V0, V0_H
+);
+
+// Class for 128 bit register v1
+reg_class v1_reg(
+    V1, V1_H
+);
+
+// Class for 128 bit register v2
+reg_class v2_reg(
+    V2, V2_H
+);
+
+// Class for 128 bit register v3
+reg_class v3_reg(
+    V3, V3_H
+);
+
 // Singleton class for condition codes
 reg_class int_flags(RFLAGS);
 
@@ -4116,6 +4139,18 @@ operand iRegI_R2()
   interface(REG_INTER);
 %}
 
+// Register R3 only
+operand iRegI_R3()
+%{
+  constraint(ALLOC_IN_RC(int_r3_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
 // Register R2 only
 operand iRegI_R4()
 %{
@@ -4185,6 +4220,42 @@ operand vRegD()
   interface(REG_INTER);
 %}
 
+operand vRegD_V0()
+%{
+  constraint(ALLOC_IN_RC(v0_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V1()
+%{
+  constraint(ALLOC_IN_RC(v1_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V2()
+%{
+  constraint(ALLOC_IN_RC(v2_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vRegD_V3()
+%{
+  constraint(ALLOC_IN_RC(v3_reg));
+  match(RegD);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Flags register, used as output of signed compare instructions
 
 // note that on AArch64 we also use this register as the output for
@@ -11691,6 +11762,25 @@ instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
   ins_pipe(pipe_class_memory);
 %}
 
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
+                          vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
+                          vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
+                          iRegI_R0 result, rFlagsReg cr)
+%{
+  match(Set result (EncodeISOArray src (Binary dst len)));
+  effect(USE_KILL src, USE_KILL dst, USE_KILL len,
+         KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
+
+  format %{ "Encode array $src,$dst,$len -> $result" %}
+  ins_encode %{
+    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
+         $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
+         $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
+  %}
+  ins_pipe( pipe_class_memory );
+%}
+
 // ============================================================================
 // This name is KNOWN by the ADLC and cannot be changed.
 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
diff --git a/src/cpu/aarch64/vm/assembler_aarch64.hpp b/src/cpu/aarch64/vm/assembler_aarch64.hpp
index 0b003a7ce..816ff3bf3 100644
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp
@@ -2073,6 +2073,15 @@ public:
     pmull(Vd, Ta, Vn, Vm, Tb);
   }
 
+  void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
+    starti;
+    int size_b = (int)Tb >> 1;
+    int size_a = (int)Ta >> 1;
+    assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
+    f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22);
+    f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+
   void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn)
   {
     starti;
diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
index a00726d37..6c2b02d99 100644
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@@ -3613,3 +3613,64 @@ void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
   
   BLOCK_COMMENT("} char_arrays_equals");
 }
+
+// encode char[] to byte[] in ISO_8859_1
+void MacroAssembler::encode_iso_array(Register src, Register dst,
+		      Register len, Register result,
+		      FloatRegister Vtmp1, FloatRegister Vtmp2,
+                      FloatRegister Vtmp3, FloatRegister Vtmp4)
+{
+    Label DONE, NEXT_32, LOOP_8, NEXT_8, LOOP_1, NEXT_1;
+    Register tmp1 = rscratch1;
+
+      mov(result, len);	// Save initial len
+
+      subs(len, len, 32);
+      br(LT, LOOP_8);
+
+// The following code uses the SIMD 'uqxtn' and 'uqxtn2' instructions
+// to convert chars to bytes. These set the 'QC' bit in the FPSR if
+// any char could not fit in a byte, so clear the FPSR so we can test it.
+      clear_fpsr();
+
+    BIND(NEXT_32);
+      ld1(Vtmp1, Vtmp2, Vtmp3, Vtmp4, T8H, src);
+      uqxtn(Vtmp1, T8B, Vtmp1, T8H);  // uqxtn  - write bottom half
+      uqxtn(Vtmp1, T16B, Vtmp2, T8H); // uqxtn2 - write top half
+      uqxtn(Vtmp2, T8B, Vtmp3, T8H);
+      uqxtn(Vtmp2, T16B, Vtmp4, T8H); // uqxtn2
+      get_fpsr(tmp1);
+      cbnzw(tmp1, LOOP_8);
+      st1(Vtmp1, Vtmp2, T16B, post(dst, 32));
+      subs(len, len, 32);
+      add(src, src, 64);
+      br(GE, NEXT_32);
+
+    BIND(LOOP_8);
+      adds(len, len, 32-8);
+      br(LT, LOOP_1);
+      clear_fpsr(); // QC may be set from loop above, clear again
+    BIND(NEXT_8);
+      ld1(Vtmp1, T8H, src);
+      uqxtn(Vtmp1, T8B, Vtmp1, T8H);
+      get_fpsr(tmp1);
+      cbnzw(tmp1, LOOP_1);
+      st1(Vtmp1, T8B, post(dst, 8));
+      subs(len, len, 8);
+      add(src, src, 16);
+      br(GE, NEXT_8);
+
+    BIND(LOOP_1);
+      adds(len, len, 8);
+      br(LE, DONE);
+    BIND(NEXT_1);
+      ldrh(tmp1, Address(post(src, 2)));
+      tst(tmp1, 0xff00);
+      br(NE, DONE);
+      strb(tmp1, Address(post(dst, 1)));
+      subs(len, len, 1);
+      br(GT, NEXT_1);
+
+    BIND(DONE);
+      sub(result, result, len); // Return index where we stopped
+}
diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
index 77274d855..e707d9de0 100644
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@@ -1087,6 +1087,10 @@ public:
 		     Register tmp1);
   void char_arrays_equals(Register ary1, Register ary2,
                           Register result, Register tmp1);
+  void encode_iso_array(Register src, Register dst,
+                        Register len, Register result,
+                        FloatRegister Vtmp1, FloatRegister Vtmp2,
+                        FloatRegister Vtmp3, FloatRegister Vtmp4);
 };
 
 // Used by aarch64.ad to control code generation
author	Edward Nevill edward.nevill@linaro.org <Edward Nevill edward.nevill@linaro.org>	2014-08-04 18:03:53 +0100
committer	Edward Nevill edward.nevill@linaro.org <Edward Nevill edward.nevill@linaro.org>	2014-08-04 18:03:53 +0100
commit	3920a67639b349e65f2b55ceb5207280d0c37a49 (patch)
tree	666e93f15ad2c69eb6988e1136e819d89af82dc7
parent	5179fe4960c995b995e084ee173d46e06f4899fc (diff)