Jacob Bramley | d77a8e4 | 2019-02-12 16:52:24 +0000 | [diff] [blame] | 1 | // Copyright 2019, VIXL authors |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are met: |
| 6 | // |
| 7 | // * Redistributions of source code must retain the above copyright notice, |
| 8 | // this list of conditions and the following disclaimer. |
| 9 | // * Redistributions in binary form must reproduce the above copyright notice, |
| 10 | // this list of conditions and the following disclaimer in the documentation |
| 11 | // and/or other materials provided with the distribution. |
| 12 | // * Neither the name of ARM Limited nor the names of its contributors may be |
| 13 | // used to endorse or promote products derived from this software without |
| 14 | // specific prior written permission. |
| 15 | // |
| 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND |
| 17 | // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 18 | // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 19 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE |
| 20 | // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 21 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 22 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 23 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 24 | // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 25 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | |
| 27 | #include <sys/mman.h> |
| 28 | |
| 29 | #include <cfloat> |
| 30 | #include <cmath> |
| 31 | #include <cstdio> |
| 32 | #include <cstdlib> |
| 33 | #include <cstring> |
| 34 | |
| 35 | #include "test-runner.h" |
| 36 | #include "test-utils.h" |
| 37 | #include "aarch64/test-utils-aarch64.h" |
| 38 | |
| 39 | #include "aarch64/cpu-aarch64.h" |
| 40 | #include "aarch64/disasm-aarch64.h" |
| 41 | #include "aarch64/macro-assembler-aarch64.h" |
| 42 | #include "aarch64/simulator-aarch64.h" |
| 43 | #include "test-assembler-aarch64.h" |
| 44 | |
| 45 | namespace vixl { |
| 46 | namespace aarch64 { |
| 47 | |
Jacob Bramley | 03c0b51 | 2019-02-22 16:42:06 +0000 | [diff] [blame] | 48 | // Call masm->Insr repeatedly to allow test inputs to be set up concisely. This |
| 49 | // is optimised for call-site clarity, not generated code quality, so it doesn't |
| 50 | // exist in the MacroAssembler itself. |
| 51 | // |
| 52 | // Usage: |
| 53 | // |
| 54 | // int values[] = { 42, 43, 44 }; |
| 55 | // InsrHelper(&masm, z0.VnS(), values); // Sets z0.S = { ..., 42, 43, 44 } |
| 56 | // |
| 57 | // The rightmost (highest-indexed) array element maps to the lowest-numbered |
| 58 | // lane. |
| 59 | template <typename T, size_t N> |
| 60 | void InsrHelper(MacroAssembler* masm, |
| 61 | const ZRegister& zdn, |
| 62 | const T (&values)[N]) { |
| 63 | for (size_t i = 0; i < N; i++) { |
| 64 | masm->Insr(zdn, values[i]); |
| 65 | } |
| 66 | } |
| 67 | |
Jacob Bramley | 2eaecf1 | 2019-05-01 15:46:34 +0100 | [diff] [blame^] | 68 | // Conveniently initialise P registers. This is optimised for call-site clarity, |
| 69 | // not generated code quality. |
| 70 | // |
| 71 | // Usage: |
| 72 | // |
| 73 | // int values[] = { 0x0, 0x1, 0x2 }; |
| 74 | // Initialise(&masm, p0.VnS(), values); // Sets p0 = 0b'0000'0001'0020 |
| 75 | // |
| 76 | // The rightmost (highest-indexed) array element maps to the lowest-numbered |
| 77 | // lane. |
| 78 | // |
| 79 | // Each element of the `values` array is mapped onto a lane in `pd`. The |
| 80 | // architecture only respects the lower bit, and writes zero the upper bits, but |
| 81 | // other (encodable) values can be specified if required by the test. |
| 82 | template <typename T, size_t N> |
| 83 | void Initialise(MacroAssembler* masm, |
| 84 | const PRegisterWithLaneSize& pd, |
| 85 | const T (&values)[N]) { |
| 86 | UseScratchRegisterScope temps(masm); |
| 87 | Register temp = temps.AcquireX(); |
| 88 | Label data; |
| 89 | Label done; |
| 90 | |
| 91 | // There is no 'insr' for P registers. The easiest way to initialise one with |
| 92 | // an arbitrary value is to load it from a literal pool. |
| 93 | |
| 94 | int p_bits_per_lane = pd.GetLaneSizeInBits() / kZRegBitsPerPRegBit; |
| 95 | VIXL_ASSERT((N * p_bits_per_lane) <= kPRegMaxSize); |
| 96 | uint64_t p_lane_mask = GetUintMask(p_bits_per_lane); |
| 97 | |
| 98 | // For most lane sizes, each value contributes less than a byte. We need to |
| 99 | // pack them into chunks which we can store directly. It's sensible for the |
| 100 | // chunk to be the same size as an instruction because we need to pad to an |
| 101 | // instruction boundary anyway. |
| 102 | typedef Instr Chunk; |
| 103 | const size_t kChunkSizeInBits = sizeof(Chunk) * kBitsPerByte; |
| 104 | VIXL_ASSERT((kPRegMaxSize % kChunkSizeInBits) == 0); |
| 105 | const size_t kPRegMaxSizeInChunks = kPRegMaxSize / kChunkSizeInBits; |
| 106 | |
| 107 | masm->Adr(temp, &data); |
| 108 | // TODO: Use `Ldr(pd, MemOperand(temp))` once available. |
| 109 | masm->Ldr(PRegister(pd.GetCode()), temp); |
| 110 | masm->B(&done); |
| 111 | { |
| 112 | ExactAssemblyScope total(masm, kPRegMaxSizeInBytes); |
| 113 | masm->bind(&data); |
| 114 | // Put the last-specified value at the lowest address. |
| 115 | int values_index = N - 1; |
| 116 | for (size_t c = 0; c < kPRegMaxSizeInChunks; c++) { |
| 117 | Chunk chunk = 0; |
| 118 | // Whilst we still have values left, use them to populate the chunk. |
| 119 | for (size_t chunk_bit = 0; |
| 120 | (chunk_bit < kChunkSizeInBits) && (values_index >= 0); |
| 121 | chunk_bit += p_bits_per_lane) { |
| 122 | Chunk value = values[values_index] & p_lane_mask; |
| 123 | VIXL_ASSERT(static_cast<T>(value) == values[values_index]); |
| 124 | chunk |= value << chunk_bit; |
| 125 | values_index--; |
| 126 | } |
| 127 | masm->dc(chunk); |
| 128 | } |
| 129 | } |
| 130 | masm->Bind(&done); |
| 131 | } |
| 132 | |
Jacob Bramley | d77a8e4 | 2019-02-12 16:52:24 +0000 | [diff] [blame] | 133 | // Ensure that basic test infrastructure works. |
Jacob Bramley | 2eaecf1 | 2019-05-01 15:46:34 +0100 | [diff] [blame^] | 134 | TEST(sve_test_infrastructure_z) { |
Jacob Bramley | d77a8e4 | 2019-02-12 16:52:24 +0000 | [diff] [blame] | 135 | SETUP_WITH_FEATURES(CPUFeatures::kSVE); |
| 136 | START(); |
| 137 | |
Jacob Bramley | 03c0b51 | 2019-02-22 16:42:06 +0000 | [diff] [blame] | 138 | __ Mov(x0, 0x0123456789abcdef); |
| 139 | |
| 140 | // Test basic `Insr` behaviour. |
| 141 | __ Insr(z0.VnB(), 1); |
| 142 | __ Insr(z0.VnB(), 2); |
| 143 | __ Insr(z0.VnB(), x0); |
| 144 | __ Insr(z0.VnB(), -42); |
| 145 | __ Insr(z0.VnB(), 0); |
| 146 | |
| 147 | // Test array inputs. |
| 148 | int z1_inputs[] = {3, 4, 5, -42, 0}; |
| 149 | InsrHelper(&masm, z1.VnH(), z1_inputs); |
| 150 | |
| 151 | // Test that sign-extension works as intended for various lane sizes. |
| 152 | __ Dup(z2.VnD(), 0); // Clear the register first. |
| 153 | __ Insr(z2.VnB(), -42); // 0xd6 |
| 154 | __ Insr(z2.VnB(), 0xfe); // 0xfe |
| 155 | __ Insr(z2.VnH(), -42); // 0xffd6 |
| 156 | __ Insr(z2.VnH(), 0xfedc); // 0xfedc |
| 157 | __ Insr(z2.VnS(), -42); // 0xffffffd6 |
| 158 | __ Insr(z2.VnS(), 0xfedcba98); // 0xfedcba98 |
| 159 | // Use another register for VnD(), so we can support 128-bit Z registers. |
| 160 | __ Insr(z3.VnD(), -42); // 0xffffffffffffffd6 |
| 161 | __ Insr(z3.VnD(), 0xfedcba9876543210); // 0xfedcba9876543210 |
| 162 | |
Jacob Bramley | d77a8e4 | 2019-02-12 16:52:24 +0000 | [diff] [blame] | 163 | END(); |
Jacob Bramley | d77a8e4 | 2019-02-12 16:52:24 +0000 | [diff] [blame] | 164 | |
Jacob Bramley | 119bd21 | 2019-04-16 10:13:09 +0100 | [diff] [blame] | 165 | if (CAN_RUN()) { |
Jacob Bramley | 03c0b51 | 2019-02-22 16:42:06 +0000 | [diff] [blame] | 166 | if (0) { |
| 167 | // TODO: Enable this once we have sufficient simulator support. |
| 168 | RUN(); |
| 169 | |
| 170 | // Test that array checks work properly on a register initialised |
| 171 | // lane-by-lane. |
| 172 | int z0_inputs_b[] = {0x01, 0x02, 0xef, 0xd6, 0x00}; |
| 173 | ASSERT_EQUAL_SVE(z0_inputs_b, z0.VnB()); |
| 174 | |
| 175 | // Test that lane-by-lane checks work properly on a register initialised |
Jacob Bramley | 2eaecf1 | 2019-05-01 15:46:34 +0100 | [diff] [blame^] | 176 | // by array. |
Jacob Bramley | 03c0b51 | 2019-02-22 16:42:06 +0000 | [diff] [blame] | 177 | for (size_t i = 0; i < ArrayLength(z1_inputs); i++) { |
| 178 | ASSERT_EQUAL_SVE_LANE(z1_inputs[i], z0.VnH(), i); |
| 179 | } |
| 180 | |
| 181 | uint64_t z2_inputs_d[] = {0x0000d6feffd6fedc, 0xffffffd6fedcba98}; |
| 182 | ASSERT_EQUAL_SVE(z2_inputs_d, z2.VnD()); |
| 183 | uint64_t z3_inputs_d[] = {0xffffffffffffffd6, 0xfedcba9876543210}; |
| 184 | ASSERT_EQUAL_SVE(z3_inputs_d, z3.VnD()); |
| 185 | } else { |
| 186 | // TODO: This normally happens in 'RUN()', so remove it once we enable the |
| 187 | // block above. |
| 188 | DISASSEMBLE(); |
| 189 | } |
Jacob Bramley | 119bd21 | 2019-04-16 10:13:09 +0100 | [diff] [blame] | 190 | } |
Jacob Bramley | d77a8e4 | 2019-02-12 16:52:24 +0000 | [diff] [blame] | 191 | |
| 192 | TEARDOWN(); |
| 193 | } |
| 194 | |
Jacob Bramley | 2eaecf1 | 2019-05-01 15:46:34 +0100 | [diff] [blame^] | 195 | // Ensure that basic test infrastructure works. |
| 196 | TEST(sve_test_infrastructure_p) { |
| 197 | SETUP_WITH_FEATURES(CPUFeatures::kSVE); |
| 198 | START(); |
| 199 | |
| 200 | // Simple cases: move boolean (0 or 1) values. |
| 201 | |
| 202 | int p0_inputs[] = {0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0}; |
| 203 | Initialise(&masm, p0.VnB(), p0_inputs); |
| 204 | |
| 205 | int p1_inputs[] = {1, 0, 1, 1, 0, 1, 1, 1}; |
| 206 | Initialise(&masm, p1.VnH(), p1_inputs); |
| 207 | |
| 208 | int p2_inputs[] = {1, 0, 0, 1}; |
| 209 | Initialise(&masm, p2.VnS(), p2_inputs); |
| 210 | |
| 211 | int p3_inputs[] = {0, 1}; |
| 212 | Initialise(&masm, p3.VnD(), p3_inputs); |
| 213 | |
| 214 | // Advanced cases: move numeric value into architecturally-ignored bits. |
| 215 | |
| 216 | // B-sized lanes get one bit in a P register, so there are no ignored bits. |
| 217 | |
| 218 | // H-sized lanes get two bits in a P register. |
| 219 | int p4_inputs[] = {0x3, 0x2, 0x1, 0x0, 0x1, 0x2, 0x3}; |
| 220 | Initialise(&masm, p4.VnH(), p4_inputs); |
| 221 | |
| 222 | // S-sized lanes get four bits in a P register. |
| 223 | int p5_inputs[] = {0xc, 0x7, 0x9, 0x6, 0xf}; |
| 224 | Initialise(&masm, p5.VnS(), p5_inputs); |
| 225 | |
| 226 | // D-sized lanes get eight bits in a P register. |
| 227 | int p6_inputs[] = {0x81, 0xcc, 0x55}; |
| 228 | Initialise(&masm, p6.VnD(), p6_inputs); |
| 229 | |
| 230 | // The largest possible P register has 32 bytes. |
| 231 | int p7_inputs[] = {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, |
| 232 | 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, |
| 233 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, |
| 234 | 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f}; |
| 235 | Initialise(&masm, p7.VnD(), p7_inputs); |
| 236 | |
| 237 | END(); |
| 238 | |
| 239 | if (CAN_RUN()) { |
| 240 | if (0) { |
| 241 | // TODO: Enable this once we have sufficient simulator support. |
| 242 | RUN(); |
| 243 | |
| 244 | // Test that lane-by-lane checks work properly. |
| 245 | for (size_t i = 0; i < ArrayLength(p0_inputs); i++) { |
| 246 | ASSERT_EQUAL_SVE_LANE(p0_inputs[i], p0.VnB(), i); |
| 247 | } |
| 248 | for (size_t i = 0; i < ArrayLength(p1_inputs); i++) { |
| 249 | ASSERT_EQUAL_SVE_LANE(p1_inputs[i], p1.VnB(), i); |
| 250 | } |
| 251 | for (size_t i = 0; i < ArrayLength(p2_inputs); i++) { |
| 252 | ASSERT_EQUAL_SVE_LANE(p2_inputs[i], p2.VnB(), i); |
| 253 | } |
| 254 | for (size_t i = 0; i < ArrayLength(p3_inputs); i++) { |
| 255 | ASSERT_EQUAL_SVE_LANE(p3_inputs[i], p3.VnB(), i); |
| 256 | } |
| 257 | |
| 258 | // Test that array checks work properly on predicates initialised with a |
| 259 | // possibly-different lane size. |
| 260 | // 0b...11'10'01'00'01'10'11 |
| 261 | int p4_expected[] = {0x39, 0x1b}; |
| 262 | ASSERT_EQUAL_SVE(p4_expected, p4.VnD()); |
| 263 | |
| 264 | ASSERT_EQUAL_SVE(p5_inputs, p5.VnS()); |
| 265 | |
| 266 | // 0b...10000001'11001100'01010101 |
| 267 | int p6_expected[] = {2, 0, 0, 1, 3, 0, 3, 0, 1, 1, 1, 1}; |
| 268 | ASSERT_EQUAL_SVE(p6_expected, p6.VnH()); |
| 269 | |
| 270 | // 0b...10011100'10011101'10011110'10011111 |
| 271 | int p7_expected[] = {1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, |
| 272 | 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1}; |
| 273 | ASSERT_EQUAL_SVE(p7_expected, p7.VnB()); |
| 274 | |
| 275 | } else { |
| 276 | // TODO: This normally happens in 'RUN()', so remove it once we enable the |
| 277 | // block above. |
| 278 | DISASSEMBLE(); |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | |
Jacob Bramley | d77a8e4 | 2019-02-12 16:52:24 +0000 | [diff] [blame] | 283 | } // namespace aarch64 |
| 284 | } // namespace vixl |