blob: de77e58a98fff0842549c0b53659c60dac49a80c [file] [log] [blame]
Alexandre Ramesd3832962016-07-04 15:03:43 +01001// Copyright 2015, VIXL authors
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7// * Redistributions of source code must retain the above copyright notice,
8// this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12// * Neither the name of ARM Limited nor the names of its contributors may be
13// used to endorse or promote products derived from this software without
14// specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
Pierre Langlois1e85b7f2016-08-05 14:20:36 +010027#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
Alexandre Ramesd3832962016-07-04 15:03:43 +010028
29#include <cmath>
30
Alexandre Ramesb49bdb72016-09-26 12:08:57 +010031#include "simulator-aarch64.h"
Alexandre Ramesd3832962016-07-04 15:03:43 +010032
33namespace vixl {
34namespace aarch64 {
35
Jacob Bramleyca789742018-09-13 14:25:46 +010036using vixl::internal::SimFloat16;
37
38template <typename T>
39bool IsFloat64() {
40 return false;
41}
42template <>
43bool IsFloat64<double>() {
44 return true;
45}
46
47template <typename T>
48bool IsFloat32() {
49 return false;
50}
51template <>
52bool IsFloat32<float>() {
53 return true;
54}
55
56template <typename T>
57bool IsFloat16() {
58 return false;
59}
60template <>
61bool IsFloat16<Float16>() {
62 return true;
63}
64template <>
65bool IsFloat16<SimFloat16>() {
66 return true;
67}
68
Alexandre Ramesd3832962016-07-04 15:03:43 +010069template <>
70double Simulator::FPDefaultNaN<double>() {
71 return kFP64DefaultNaN;
72}
73
74
75template <>
76float Simulator::FPDefaultNaN<float>() {
77 return kFP32DefaultNaN;
78}
79
Jacob Bramleyca789742018-09-13 14:25:46 +010080
81template <>
82SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83 return SimFloat16(kFP16DefaultNaN);
84}
85
86
Alexandre Ramesd3832962016-07-04 15:03:43 +010087double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88 if (src >= 0) {
89 return UFixedToDouble(src, fbits, round);
Martyn Capewell9e52d5b2016-11-01 17:33:36 +000090 } else if (src == INT64_MIN) {
91 return -UFixedToDouble(src, fbits, round);
Alexandre Ramesd3832962016-07-04 15:03:43 +010092 } else {
Alexandre Ramesd3832962016-07-04 15:03:43 +010093 return -UFixedToDouble(-src, fbits, round);
94 }
95}
96
97
98double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99 // An input of 0 is a special case because the result is effectively
100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101 if (src == 0) {
102 return 0.0;
103 }
104
105 // Calculate the exponent. The highest significant bit will have the value
106 // 2^exponent.
107 const int highest_significant_bit = 63 - CountLeadingZeros(src);
108 const int64_t exponent = highest_significant_bit - fbits;
109
110 return FPRoundToDouble(0, exponent, src, round);
111}
112
113
114float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115 if (src >= 0) {
116 return UFixedToFloat(src, fbits, round);
Martyn Capewell9e52d5b2016-11-01 17:33:36 +0000117 } else if (src == INT64_MIN) {
118 return -UFixedToFloat(src, fbits, round);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100119 } else {
Alexandre Ramesd3832962016-07-04 15:03:43 +0100120 return -UFixedToFloat(-src, fbits, round);
121 }
122}
123
124
125float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126 // An input of 0 is a special case because the result is effectively
127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128 if (src == 0) {
129 return 0.0f;
130 }
131
132 // Calculate the exponent. The highest significant bit will have the value
133 // 2^exponent.
134 const int highest_significant_bit = 63 - CountLeadingZeros(src);
135 const int32_t exponent = highest_significant_bit - fbits;
136
137 return FPRoundToFloat(0, exponent, src, round);
138}
139
140
Jacob Bramleyca789742018-09-13 14:25:46 +0100141SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142 if (src >= 0) {
143 return UFixedToFloat16(src, fbits, round);
144 } else if (src == INT64_MIN) {
145 return -UFixedToFloat16(src, fbits, round);
146 } else {
147 return -UFixedToFloat16(-src, fbits, round);
148 }
149}
150
151
152SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153 int fbits,
154 FPRounding round) {
155 // An input of 0 is a special case because the result is effectively
156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157 if (src == 0) {
158 return 0.0f;
159 }
160
161 // Calculate the exponent. The highest significant bit will have the value
162 // 2^exponent.
163 const int highest_significant_bit = 63 - CountLeadingZeros(src);
164 const int16_t exponent = highest_significant_bit - fbits;
165
166 return FPRoundToFloat16(0, exponent, src, round);
167}
168
169
Alexandre Ramesd3832962016-07-04 15:03:43 +0100170void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171 dst.ClearForWrite(vform);
172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000173 LoadLane(dst, vform, i, addr);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100174 addr += LaneSizeInBytesFromFormat(vform);
175 }
176}
177
178
179void Simulator::ld1(VectorFormat vform,
180 LogicVRegister dst,
181 int index,
182 uint64_t addr) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000183 LoadLane(dst, vform, index, addr);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100184}
185
186
TatWai Chong85e15102020-05-04 21:00:40 -0700187void Simulator::ld1r(VectorFormat vform,
188 VectorFormat unpack_vform,
189 LogicVRegister dst,
190 uint64_t addr,
191 bool is_signed) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000192 unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100193 dst.ClearForWrite(vform);
194 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
TatWai Chong85e15102020-05-04 21:00:40 -0700195 if (is_signed) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000196 LoadIntToLane(dst, vform, unpack_size, i, addr);
TatWai Chong85e15102020-05-04 21:00:40 -0700197 } else {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000198 LoadUintToLane(dst, vform, unpack_size, i, addr);
TatWai Chong85e15102020-05-04 21:00:40 -0700199 }
Alexandre Ramesd3832962016-07-04 15:03:43 +0100200 }
201}
202
203
TatWai Chong85e15102020-05-04 21:00:40 -0700204void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
205 ld1r(vform, vform, dst, addr);
206}
207
208
Alexandre Ramesd3832962016-07-04 15:03:43 +0100209void Simulator::ld2(VectorFormat vform,
210 LogicVRegister dst1,
211 LogicVRegister dst2,
212 uint64_t addr1) {
213 dst1.ClearForWrite(vform);
214 dst2.ClearForWrite(vform);
215 int esize = LaneSizeInBytesFromFormat(vform);
216 uint64_t addr2 = addr1 + esize;
217 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000218 LoadLane(dst1, vform, i, addr1);
219 LoadLane(dst2, vform, i, addr2);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100220 addr1 += 2 * esize;
221 addr2 += 2 * esize;
222 }
223}
224
225
226void Simulator::ld2(VectorFormat vform,
227 LogicVRegister dst1,
228 LogicVRegister dst2,
229 int index,
230 uint64_t addr1) {
231 dst1.ClearForWrite(vform);
232 dst2.ClearForWrite(vform);
233 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000234 LoadLane(dst1, vform, index, addr1);
235 LoadLane(dst2, vform, index, addr2);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100236}
237
238
239void Simulator::ld2r(VectorFormat vform,
240 LogicVRegister dst1,
241 LogicVRegister dst2,
242 uint64_t addr) {
243 dst1.ClearForWrite(vform);
244 dst2.ClearForWrite(vform);
245 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
246 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000247 LoadLane(dst1, vform, i, addr);
248 LoadLane(dst2, vform, i, addr2);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100249 }
250}
251
252
253void Simulator::ld3(VectorFormat vform,
254 LogicVRegister dst1,
255 LogicVRegister dst2,
256 LogicVRegister dst3,
257 uint64_t addr1) {
258 dst1.ClearForWrite(vform);
259 dst2.ClearForWrite(vform);
260 dst3.ClearForWrite(vform);
261 int esize = LaneSizeInBytesFromFormat(vform);
262 uint64_t addr2 = addr1 + esize;
263 uint64_t addr3 = addr2 + esize;
264 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000265 LoadLane(dst1, vform, i, addr1);
266 LoadLane(dst2, vform, i, addr2);
267 LoadLane(dst3, vform, i, addr3);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100268 addr1 += 3 * esize;
269 addr2 += 3 * esize;
270 addr3 += 3 * esize;
271 }
272}
273
274
275void Simulator::ld3(VectorFormat vform,
276 LogicVRegister dst1,
277 LogicVRegister dst2,
278 LogicVRegister dst3,
279 int index,
280 uint64_t addr1) {
281 dst1.ClearForWrite(vform);
282 dst2.ClearForWrite(vform);
283 dst3.ClearForWrite(vform);
284 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
285 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000286 LoadLane(dst1, vform, index, addr1);
287 LoadLane(dst2, vform, index, addr2);
288 LoadLane(dst3, vform, index, addr3);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100289}
290
291
292void Simulator::ld3r(VectorFormat vform,
293 LogicVRegister dst1,
294 LogicVRegister dst2,
295 LogicVRegister dst3,
296 uint64_t addr) {
297 dst1.ClearForWrite(vform);
298 dst2.ClearForWrite(vform);
299 dst3.ClearForWrite(vform);
300 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
301 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
302 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000303 LoadLane(dst1, vform, i, addr);
304 LoadLane(dst2, vform, i, addr2);
305 LoadLane(dst3, vform, i, addr3);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100306 }
307}
308
309
310void Simulator::ld4(VectorFormat vform,
311 LogicVRegister dst1,
312 LogicVRegister dst2,
313 LogicVRegister dst3,
314 LogicVRegister dst4,
315 uint64_t addr1) {
316 dst1.ClearForWrite(vform);
317 dst2.ClearForWrite(vform);
318 dst3.ClearForWrite(vform);
319 dst4.ClearForWrite(vform);
320 int esize = LaneSizeInBytesFromFormat(vform);
321 uint64_t addr2 = addr1 + esize;
322 uint64_t addr3 = addr2 + esize;
323 uint64_t addr4 = addr3 + esize;
324 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000325 LoadLane(dst1, vform, i, addr1);
326 LoadLane(dst2, vform, i, addr2);
327 LoadLane(dst3, vform, i, addr3);
328 LoadLane(dst4, vform, i, addr4);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100329 addr1 += 4 * esize;
330 addr2 += 4 * esize;
331 addr3 += 4 * esize;
332 addr4 += 4 * esize;
333 }
334}
335
336
337void Simulator::ld4(VectorFormat vform,
338 LogicVRegister dst1,
339 LogicVRegister dst2,
340 LogicVRegister dst3,
341 LogicVRegister dst4,
342 int index,
343 uint64_t addr1) {
344 dst1.ClearForWrite(vform);
345 dst2.ClearForWrite(vform);
346 dst3.ClearForWrite(vform);
347 dst4.ClearForWrite(vform);
348 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
349 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
350 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000351 LoadLane(dst1, vform, index, addr1);
352 LoadLane(dst2, vform, index, addr2);
353 LoadLane(dst3, vform, index, addr3);
354 LoadLane(dst4, vform, index, addr4);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100355}
356
357
358void Simulator::ld4r(VectorFormat vform,
359 LogicVRegister dst1,
360 LogicVRegister dst2,
361 LogicVRegister dst3,
362 LogicVRegister dst4,
363 uint64_t addr) {
364 dst1.ClearForWrite(vform);
365 dst2.ClearForWrite(vform);
366 dst3.ClearForWrite(vform);
367 dst4.ClearForWrite(vform);
368 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
369 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
370 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
371 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000372 LoadLane(dst1, vform, i, addr);
373 LoadLane(dst2, vform, i, addr2);
374 LoadLane(dst3, vform, i, addr3);
375 LoadLane(dst4, vform, i, addr4);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100376 }
377}
378
379
380void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
381 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000382 StoreLane(src, vform, i, addr);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100383 addr += LaneSizeInBytesFromFormat(vform);
384 }
385}
386
387
388void Simulator::st1(VectorFormat vform,
389 LogicVRegister src,
390 int index,
391 uint64_t addr) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000392 StoreLane(src, vform, index, addr);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100393}
394
395
396void Simulator::st2(VectorFormat vform,
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000397 LogicVRegister src,
398 LogicVRegister src2,
Alexandre Ramesd3832962016-07-04 15:03:43 +0100399 uint64_t addr) {
400 int esize = LaneSizeInBytesFromFormat(vform);
401 uint64_t addr2 = addr + esize;
402 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000403 StoreLane(src, vform, i, addr);
404 StoreLane(src2, vform, i, addr2);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100405 addr += 2 * esize;
406 addr2 += 2 * esize;
407 }
408}
409
410
411void Simulator::st2(VectorFormat vform,
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000412 LogicVRegister src,
413 LogicVRegister src2,
Alexandre Ramesd3832962016-07-04 15:03:43 +0100414 int index,
415 uint64_t addr) {
416 int esize = LaneSizeInBytesFromFormat(vform);
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000417 StoreLane(src, vform, index, addr);
418 StoreLane(src2, vform, index, addr + 1 * esize);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100419}
420
421
422void Simulator::st3(VectorFormat vform,
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000423 LogicVRegister src,
424 LogicVRegister src2,
425 LogicVRegister src3,
Alexandre Ramesd3832962016-07-04 15:03:43 +0100426 uint64_t addr) {
427 int esize = LaneSizeInBytesFromFormat(vform);
428 uint64_t addr2 = addr + esize;
429 uint64_t addr3 = addr2 + esize;
430 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000431 StoreLane(src, vform, i, addr);
432 StoreLane(src2, vform, i, addr2);
433 StoreLane(src3, vform, i, addr3);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100434 addr += 3 * esize;
435 addr2 += 3 * esize;
436 addr3 += 3 * esize;
437 }
438}
439
440
441void Simulator::st3(VectorFormat vform,
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000442 LogicVRegister src,
443 LogicVRegister src2,
444 LogicVRegister src3,
Alexandre Ramesd3832962016-07-04 15:03:43 +0100445 int index,
446 uint64_t addr) {
447 int esize = LaneSizeInBytesFromFormat(vform);
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000448 StoreLane(src, vform, index, addr);
449 StoreLane(src2, vform, index, addr + 1 * esize);
450 StoreLane(src3, vform, index, addr + 2 * esize);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100451}
452
453
454void Simulator::st4(VectorFormat vform,
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000455 LogicVRegister src,
456 LogicVRegister src2,
457 LogicVRegister src3,
458 LogicVRegister src4,
Alexandre Ramesd3832962016-07-04 15:03:43 +0100459 uint64_t addr) {
460 int esize = LaneSizeInBytesFromFormat(vform);
461 uint64_t addr2 = addr + esize;
462 uint64_t addr3 = addr2 + esize;
463 uint64_t addr4 = addr3 + esize;
464 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000465 StoreLane(src, vform, i, addr);
466 StoreLane(src2, vform, i, addr2);
467 StoreLane(src3, vform, i, addr3);
468 StoreLane(src4, vform, i, addr4);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100469 addr += 4 * esize;
470 addr2 += 4 * esize;
471 addr3 += 4 * esize;
472 addr4 += 4 * esize;
473 }
474}
475
476
477void Simulator::st4(VectorFormat vform,
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000478 LogicVRegister src,
479 LogicVRegister src2,
480 LogicVRegister src3,
481 LogicVRegister src4,
Alexandre Ramesd3832962016-07-04 15:03:43 +0100482 int index,
483 uint64_t addr) {
484 int esize = LaneSizeInBytesFromFormat(vform);
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +0000485 StoreLane(src, vform, index, addr);
486 StoreLane(src2, vform, index, addr + 1 * esize);
487 StoreLane(src3, vform, index, addr + 2 * esize);
488 StoreLane(src4, vform, index, addr + 3 * esize);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100489}
490
491
492LogicVRegister Simulator::cmp(VectorFormat vform,
493 LogicVRegister dst,
494 const LogicVRegister& src1,
495 const LogicVRegister& src2,
496 Condition cond) {
497 dst.ClearForWrite(vform);
498 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
499 int64_t sa = src1.Int(vform, i);
500 int64_t sb = src2.Int(vform, i);
501 uint64_t ua = src1.Uint(vform, i);
502 uint64_t ub = src2.Uint(vform, i);
503 bool result = false;
504 switch (cond) {
505 case eq:
506 result = (ua == ub);
507 break;
508 case ge:
509 result = (sa >= sb);
510 break;
511 case gt:
512 result = (sa > sb);
513 break;
514 case hi:
515 result = (ua > ub);
516 break;
517 case hs:
518 result = (ua >= ub);
519 break;
520 case lt:
521 result = (sa < sb);
522 break;
523 case le:
524 result = (sa <= sb);
525 break;
526 default:
527 VIXL_UNREACHABLE();
528 break;
529 }
530 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
531 }
532 return dst;
533}
534
535
536LogicVRegister Simulator::cmp(VectorFormat vform,
537 LogicVRegister dst,
538 const LogicVRegister& src1,
539 int imm,
540 Condition cond) {
541 SimVRegister temp;
542 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
543 return cmp(vform, dst, src1, imm_reg, cond);
544}
545
546
547LogicVRegister Simulator::cmptst(VectorFormat vform,
548 LogicVRegister dst,
549 const LogicVRegister& src1,
550 const LogicVRegister& src2) {
551 dst.ClearForWrite(vform);
552 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
553 uint64_t ua = src1.Uint(vform, i);
554 uint64_t ub = src2.Uint(vform, i);
555 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
556 }
557 return dst;
558}
559
560
561LogicVRegister Simulator::add(VectorFormat vform,
562 LogicVRegister dst,
563 const LogicVRegister& src1,
564 const LogicVRegister& src2) {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +0000565 int lane_size = LaneSizeInBitsFromFormat(vform);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100566 dst.ClearForWrite(vform);
TatWai Chong13634762019-07-16 16:20:45 -0700567
Alexandre Ramesd3832962016-07-04 15:03:43 +0100568 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569 // Test for unsigned saturation.
570 uint64_t ua = src1.UintLeftJustified(vform, i);
571 uint64_t ub = src2.UintLeftJustified(vform, i);
572 uint64_t ur = ua + ub;
573 if (ur < ua) {
574 dst.SetUnsignedSat(i, true);
575 }
576
577 // Test for signed saturation.
Martyn Capewell9e52d5b2016-11-01 17:33:36 +0000578 bool pos_a = (ua >> 63) == 0;
579 bool pos_b = (ub >> 63) == 0;
580 bool pos_r = (ur >> 63) == 0;
Alexandre Ramesd3832962016-07-04 15:03:43 +0100581 // If the signs of the operands are the same, but different from the result,
582 // there was an overflow.
Martyn Capewell9e52d5b2016-11-01 17:33:36 +0000583 if ((pos_a == pos_b) && (pos_a != pos_r)) {
584 dst.SetSignedSat(i, pos_a);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100585 }
Martyn Capewell9e52d5b2016-11-01 17:33:36 +0000586 dst.SetInt(vform, i, ur >> (64 - lane_size));
Alexandre Ramesd3832962016-07-04 15:03:43 +0100587 }
588 return dst;
589}
590
Jacob Bramleyb28f6172019-10-02 12:12:35 +0100591LogicVRegister Simulator::add_uint(VectorFormat vform,
592 LogicVRegister dst,
593 const LogicVRegister& src1,
594 uint64_t value) {
595 int lane_size = LaneSizeInBitsFromFormat(vform);
596 VIXL_ASSERT(IsUintN(lane_size, value));
597 dst.ClearForWrite(vform);
598 // Left-justify `value`.
599 uint64_t ub = value << (64 - lane_size);
600 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
601 // Test for unsigned saturation.
602 uint64_t ua = src1.UintLeftJustified(vform, i);
603 uint64_t ur = ua + ub;
604 if (ur < ua) {
605 dst.SetUnsignedSat(i, true);
606 }
Alexandre Ramesd3832962016-07-04 15:03:43 +0100607
Jacob Bramleyb28f6172019-10-02 12:12:35 +0100608 // Test for signed saturation.
609 // `value` is always positive, so we have an overflow if the (signed) result
610 // is smaller than the first operand.
611 if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
612 dst.SetSignedSat(i, true);
613 }
614
615 dst.SetInt(vform, i, ur >> (64 - lane_size));
616 }
617 return dst;
Jacob Bramleyd1686cb2019-05-28 17:39:05 +0100618}
619
Alexandre Ramesd3832962016-07-04 15:03:43 +0100620LogicVRegister Simulator::addp(VectorFormat vform,
621 LogicVRegister dst,
622 const LogicVRegister& src1,
623 const LogicVRegister& src2) {
624 SimVRegister temp1, temp2;
625 uzp1(vform, temp1, src1, src2);
626 uzp2(vform, temp2, src1, src2);
627 add(vform, dst, temp1, temp2);
628 return dst;
629}
630
TatWai Chong13634762019-07-16 16:20:45 -0700631LogicVRegister Simulator::sdiv(VectorFormat vform,
632 LogicVRegister dst,
633 const LogicVRegister& src1,
634 const LogicVRegister& src2) {
635 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
636
637 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
638 int64_t val1 = src1.Int(vform, i);
639 int64_t val2 = src2.Int(vform, i);
640 int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
641 int64_t quotient = 0;
642 if ((val1 == min_int) && (val2 == -1)) {
643 quotient = min_int;
644 } else if (val2 != 0) {
645 quotient = val1 / val2;
646 }
647 dst.SetInt(vform, i, quotient);
648 }
649
650 return dst;
651}
652
653LogicVRegister Simulator::udiv(VectorFormat vform,
654 LogicVRegister dst,
655 const LogicVRegister& src1,
656 const LogicVRegister& src2) {
657 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
658
659 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
660 uint64_t val1 = src1.Uint(vform, i);
661 uint64_t val2 = src2.Uint(vform, i);
662 uint64_t quotient = 0;
663 if (val2 != 0) {
664 quotient = val1 / val2;
665 }
666 dst.SetUint(vform, i, quotient);
667 }
668
669 return dst;
670}
671
Alexandre Ramesd3832962016-07-04 15:03:43 +0100672
673LogicVRegister Simulator::mla(VectorFormat vform,
674 LogicVRegister dst,
Jacob Bramley22023df2019-05-14 17:55:43 +0100675 const LogicVRegister& srca,
Alexandre Ramesd3832962016-07-04 15:03:43 +0100676 const LogicVRegister& src1,
677 const LogicVRegister& src2) {
678 SimVRegister temp;
679 mul(vform, temp, src1, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +0100680 add(vform, dst, srca, temp);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100681 return dst;
682}
683
684
685LogicVRegister Simulator::mls(VectorFormat vform,
686 LogicVRegister dst,
Jacob Bramley22023df2019-05-14 17:55:43 +0100687 const LogicVRegister& srca,
Alexandre Ramesd3832962016-07-04 15:03:43 +0100688 const LogicVRegister& src1,
689 const LogicVRegister& src2) {
690 SimVRegister temp;
691 mul(vform, temp, src1, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +0100692 sub(vform, dst, srca, temp);
Alexandre Ramesd3832962016-07-04 15:03:43 +0100693 return dst;
694}
695
696
697LogicVRegister Simulator::mul(VectorFormat vform,
698 LogicVRegister dst,
699 const LogicVRegister& src1,
700 const LogicVRegister& src2) {
701 dst.ClearForWrite(vform);
TatWai Chong13634762019-07-16 16:20:45 -0700702
Alexandre Ramesd3832962016-07-04 15:03:43 +0100703 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
704 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
705 }
706 return dst;
707}
708
709
710LogicVRegister Simulator::mul(VectorFormat vform,
711 LogicVRegister dst,
712 const LogicVRegister& src1,
713 const LogicVRegister& src2,
714 int index) {
715 SimVRegister temp;
716 VectorFormat indexform = VectorFormatFillQ(vform);
717 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
718}
719
720
TatWai Chong13634762019-07-16 16:20:45 -0700721LogicVRegister Simulator::smulh(VectorFormat vform,
722 LogicVRegister dst,
723 const LogicVRegister& src1,
724 const LogicVRegister& src2) {
725 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
726 int64_t dst_val;
727 int64_t val1 = src1.Int(vform, i);
728 int64_t val2 = src2.Int(vform, i);
729 switch (LaneSizeInBitsFromFormat(vform)) {
730 case 8:
731 dst_val = internal::MultiplyHigh<8>(val1, val2);
732 break;
733 case 16:
734 dst_val = internal::MultiplyHigh<16>(val1, val2);
735 break;
736 case 32:
737 dst_val = internal::MultiplyHigh<32>(val1, val2);
738 break;
739 case 64:
740 dst_val = internal::MultiplyHigh<64>(val1, val2);
741 break;
742 default:
743 dst_val = 0xbadbeef;
744 VIXL_UNREACHABLE();
745 break;
746 }
747 dst.SetInt(vform, i, dst_val);
748 }
749 return dst;
750}
751
752
753LogicVRegister Simulator::umulh(VectorFormat vform,
754 LogicVRegister dst,
755 const LogicVRegister& src1,
756 const LogicVRegister& src2) {
757 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
758 uint64_t dst_val;
759 uint64_t val1 = src1.Uint(vform, i);
760 uint64_t val2 = src2.Uint(vform, i);
761 switch (LaneSizeInBitsFromFormat(vform)) {
762 case 8:
763 dst_val = internal::MultiplyHigh<8>(val1, val2);
764 break;
765 case 16:
766 dst_val = internal::MultiplyHigh<16>(val1, val2);
767 break;
768 case 32:
769 dst_val = internal::MultiplyHigh<32>(val1, val2);
770 break;
771 case 64:
772 dst_val = internal::MultiplyHigh<64>(val1, val2);
773 break;
774 default:
775 dst_val = 0xbadbeef;
776 VIXL_UNREACHABLE();
777 break;
778 }
779 dst.SetUint(vform, i, dst_val);
780 }
781 return dst;
782}
783
784
Alexandre Ramesd3832962016-07-04 15:03:43 +0100785LogicVRegister Simulator::mla(VectorFormat vform,
786 LogicVRegister dst,
787 const LogicVRegister& src1,
788 const LogicVRegister& src2,
789 int index) {
790 SimVRegister temp;
791 VectorFormat indexform = VectorFormatFillQ(vform);
Jacob Bramley22023df2019-05-14 17:55:43 +0100792 return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
Alexandre Ramesd3832962016-07-04 15:03:43 +0100793}
794
795
796LogicVRegister Simulator::mls(VectorFormat vform,
797 LogicVRegister dst,
798 const LogicVRegister& src1,
799 const LogicVRegister& src2,
800 int index) {
801 SimVRegister temp;
802 VectorFormat indexform = VectorFormatFillQ(vform);
Jacob Bramley22023df2019-05-14 17:55:43 +0100803 return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
Alexandre Ramesd3832962016-07-04 15:03:43 +0100804}
805
806
807LogicVRegister Simulator::smull(VectorFormat vform,
808 LogicVRegister dst,
809 const LogicVRegister& src1,
810 const LogicVRegister& src2,
811 int index) {
812 SimVRegister temp;
813 VectorFormat indexform =
814 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
815 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
816}
817
818
819LogicVRegister Simulator::smull2(VectorFormat vform,
820 LogicVRegister dst,
821 const LogicVRegister& src1,
822 const LogicVRegister& src2,
823 int index) {
824 SimVRegister temp;
825 VectorFormat indexform =
826 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
827 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
828}
829
830
831LogicVRegister Simulator::umull(VectorFormat vform,
832 LogicVRegister dst,
833 const LogicVRegister& src1,
834 const LogicVRegister& src2,
835 int index) {
836 SimVRegister temp;
837 VectorFormat indexform =
838 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
839 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
840}
841
842
843LogicVRegister Simulator::umull2(VectorFormat vform,
844 LogicVRegister dst,
845 const LogicVRegister& src1,
846 const LogicVRegister& src2,
847 int index) {
848 SimVRegister temp;
849 VectorFormat indexform =
850 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
851 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
852}
853
854
855LogicVRegister Simulator::smlal(VectorFormat vform,
856 LogicVRegister dst,
857 const LogicVRegister& src1,
858 const LogicVRegister& src2,
859 int index) {
860 SimVRegister temp;
861 VectorFormat indexform =
862 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
863 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
864}
865
866
867LogicVRegister Simulator::smlal2(VectorFormat vform,
868 LogicVRegister dst,
869 const LogicVRegister& src1,
870 const LogicVRegister& src2,
871 int index) {
872 SimVRegister temp;
873 VectorFormat indexform =
874 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
875 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
876}
877
878
879LogicVRegister Simulator::umlal(VectorFormat vform,
880 LogicVRegister dst,
881 const LogicVRegister& src1,
882 const LogicVRegister& src2,
883 int index) {
884 SimVRegister temp;
885 VectorFormat indexform =
886 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
887 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
888}
889
890
891LogicVRegister Simulator::umlal2(VectorFormat vform,
892 LogicVRegister dst,
893 const LogicVRegister& src1,
894 const LogicVRegister& src2,
895 int index) {
896 SimVRegister temp;
897 VectorFormat indexform =
898 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
899 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
900}
901
902
903LogicVRegister Simulator::smlsl(VectorFormat vform,
904 LogicVRegister dst,
905 const LogicVRegister& src1,
906 const LogicVRegister& src2,
907 int index) {
908 SimVRegister temp;
909 VectorFormat indexform =
910 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
911 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
912}
913
914
915LogicVRegister Simulator::smlsl2(VectorFormat vform,
916 LogicVRegister dst,
917 const LogicVRegister& src1,
918 const LogicVRegister& src2,
919 int index) {
920 SimVRegister temp;
921 VectorFormat indexform =
922 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
923 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
924}
925
926
927LogicVRegister Simulator::umlsl(VectorFormat vform,
928 LogicVRegister dst,
929 const LogicVRegister& src1,
930 const LogicVRegister& src2,
931 int index) {
932 SimVRegister temp;
933 VectorFormat indexform =
934 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
935 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
936}
937
938
939LogicVRegister Simulator::umlsl2(VectorFormat vform,
940 LogicVRegister dst,
941 const LogicVRegister& src1,
942 const LogicVRegister& src2,
943 int index) {
944 SimVRegister temp;
945 VectorFormat indexform =
946 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
947 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
948}
949
950
951LogicVRegister Simulator::sqdmull(VectorFormat vform,
952 LogicVRegister dst,
953 const LogicVRegister& src1,
954 const LogicVRegister& src2,
955 int index) {
956 SimVRegister temp;
957 VectorFormat indexform =
958 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
959 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
960}
961
962
963LogicVRegister Simulator::sqdmull2(VectorFormat vform,
964 LogicVRegister dst,
965 const LogicVRegister& src1,
966 const LogicVRegister& src2,
967 int index) {
968 SimVRegister temp;
969 VectorFormat indexform =
970 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
971 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
972}
973
974
975LogicVRegister Simulator::sqdmlal(VectorFormat vform,
976 LogicVRegister dst,
977 const LogicVRegister& src1,
978 const LogicVRegister& src2,
979 int index) {
980 SimVRegister temp;
981 VectorFormat indexform =
982 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
983 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
984}
985
986
987LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
988 LogicVRegister dst,
989 const LogicVRegister& src1,
990 const LogicVRegister& src2,
991 int index) {
992 SimVRegister temp;
993 VectorFormat indexform =
994 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
995 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
996}
997
998
999LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1000 LogicVRegister dst,
1001 const LogicVRegister& src1,
1002 const LogicVRegister& src2,
1003 int index) {
1004 SimVRegister temp;
1005 VectorFormat indexform =
1006 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1007 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1008}
1009
1010
1011LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1012 LogicVRegister dst,
1013 const LogicVRegister& src1,
1014 const LogicVRegister& src2,
1015 int index) {
1016 SimVRegister temp;
1017 VectorFormat indexform =
1018 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1019 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1020}
1021
1022
1023LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1024 LogicVRegister dst,
1025 const LogicVRegister& src1,
1026 const LogicVRegister& src2,
1027 int index) {
1028 SimVRegister temp;
1029 VectorFormat indexform = VectorFormatFillQ(vform);
1030 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1031}
1032
1033
1034LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1035 LogicVRegister dst,
1036 const LogicVRegister& src1,
1037 const LogicVRegister& src2,
1038 int index) {
1039 SimVRegister temp;
1040 VectorFormat indexform = VectorFormatFillQ(vform);
1041 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1042}
1043
1044
Alexander Gilday560332d2018-04-05 13:25:17 +01001045LogicVRegister Simulator::sdot(VectorFormat vform,
1046 LogicVRegister dst,
1047 const LogicVRegister& src1,
1048 const LogicVRegister& src2,
1049 int index) {
1050 SimVRegister temp;
TatWai Chongfa3f6bf2020-03-13 00:22:03 -07001051 // NEON indexed `dot` allows the index value exceed the register size.
1052 // Promote the format to Q-sized vector format before the duplication.
1053 dup_elements_to_segments(IsSVEFormat(vform) ? vform
1054 : VectorFormatFillQ(vform),
1055 temp,
1056 src2,
1057 index);
1058 return sdot(vform, dst, src1, temp);
Alexander Gilday560332d2018-04-05 13:25:17 +01001059}
1060
1061
Alexander Gilday43785642018-04-04 13:42:33 +01001062LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
1063 LogicVRegister dst,
1064 const LogicVRegister& src1,
1065 const LogicVRegister& src2,
1066 int index) {
1067 SimVRegister temp;
1068 VectorFormat indexform = VectorFormatFillQ(vform);
1069 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
1070}
1071
1072
Alexander Gilday560332d2018-04-05 13:25:17 +01001073LogicVRegister Simulator::udot(VectorFormat vform,
1074 LogicVRegister dst,
1075 const LogicVRegister& src1,
1076 const LogicVRegister& src2,
1077 int index) {
1078 SimVRegister temp;
TatWai Chongfa3f6bf2020-03-13 00:22:03 -07001079 // NEON indexed `dot` allows the index value exceed the register size.
1080 // Promote the format to Q-sized vector format before the duplication.
1081 dup_elements_to_segments(IsSVEFormat(vform) ? vform
1082 : VectorFormatFillQ(vform),
1083 temp,
1084 src2,
1085 index);
1086 return udot(vform, dst, src1, temp);
Alexander Gilday560332d2018-04-05 13:25:17 +01001087}
1088
1089
Alexander Gilday43785642018-04-04 13:42:33 +01001090LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
1091 LogicVRegister dst,
1092 const LogicVRegister& src1,
1093 const LogicVRegister& src2,
1094 int index) {
1095 SimVRegister temp;
1096 VectorFormat indexform = VectorFormatFillQ(vform);
1097 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1098}
1099
1100
Alexandre Rames868bfc42016-07-19 17:10:48 +01001101uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001102 uint16_t result = 0;
1103 uint16_t extended_op2 = op2;
1104 for (int i = 0; i < 8; ++i) {
1105 if ((op1 >> i) & 1) {
1106 result = result ^ (extended_op2 << i);
1107 }
1108 }
1109 return result;
1110}
1111
1112
1113LogicVRegister Simulator::pmul(VectorFormat vform,
1114 LogicVRegister dst,
1115 const LogicVRegister& src1,
1116 const LogicVRegister& src2) {
1117 dst.ClearForWrite(vform);
1118 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1119 dst.SetUint(vform,
1120 i,
1121 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1122 }
1123 return dst;
1124}
1125
1126
1127LogicVRegister Simulator::pmull(VectorFormat vform,
1128 LogicVRegister dst,
1129 const LogicVRegister& src1,
1130 const LogicVRegister& src2) {
1131 VectorFormat vform_src = VectorFormatHalfWidth(vform);
1132 dst.ClearForWrite(vform);
1133 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1134 dst.SetUint(vform,
1135 i,
1136 PolynomialMult(src1.Uint(vform_src, i),
1137 src2.Uint(vform_src, i)));
1138 }
1139 return dst;
1140}
1141
1142
1143LogicVRegister Simulator::pmull2(VectorFormat vform,
1144 LogicVRegister dst,
1145 const LogicVRegister& src1,
1146 const LogicVRegister& src2) {
1147 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1148 dst.ClearForWrite(vform);
1149 int lane_count = LaneCountFromFormat(vform);
1150 for (int i = 0; i < lane_count; i++) {
1151 dst.SetUint(vform,
1152 i,
1153 PolynomialMult(src1.Uint(vform_src, lane_count + i),
1154 src2.Uint(vform_src, lane_count + i)));
1155 }
1156 return dst;
1157}
1158
1159
1160LogicVRegister Simulator::sub(VectorFormat vform,
1161 LogicVRegister dst,
1162 const LogicVRegister& src1,
1163 const LogicVRegister& src2) {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00001164 int lane_size = LaneSizeInBitsFromFormat(vform);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001165 dst.ClearForWrite(vform);
1166 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1167 // Test for unsigned saturation.
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00001168 uint64_t ua = src1.UintLeftJustified(vform, i);
1169 uint64_t ub = src2.UintLeftJustified(vform, i);
1170 uint64_t ur = ua - ub;
1171 if (ub > ua) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001172 dst.SetUnsignedSat(i, false);
1173 }
1174
1175 // Test for signed saturation.
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00001176 bool pos_a = (ua >> 63) == 0;
1177 bool pos_b = (ub >> 63) == 0;
1178 bool pos_r = (ur >> 63) == 0;
Alexandre Ramesd3832962016-07-04 15:03:43 +01001179 // If the signs of the operands are different, and the sign of the first
1180 // operand doesn't match the result, there was an overflow.
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00001181 if ((pos_a != pos_b) && (pos_a != pos_r)) {
1182 dst.SetSignedSat(i, pos_a);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001183 }
1184
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00001185 dst.SetInt(vform, i, ur >> (64 - lane_size));
Alexandre Ramesd3832962016-07-04 15:03:43 +01001186 }
1187 return dst;
1188}
1189
Jacob Bramleyb28f6172019-10-02 12:12:35 +01001190LogicVRegister Simulator::sub_uint(VectorFormat vform,
1191 LogicVRegister dst,
1192 const LogicVRegister& src1,
1193 uint64_t value) {
1194 int lane_size = LaneSizeInBitsFromFormat(vform);
1195 VIXL_ASSERT(IsUintN(lane_size, value));
1196 dst.ClearForWrite(vform);
1197 // Left-justify `value`.
1198 uint64_t ub = value << (64 - lane_size);
1199 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1200 // Test for unsigned saturation.
1201 uint64_t ua = src1.UintLeftJustified(vform, i);
1202 uint64_t ur = ua - ub;
1203 if (ub > ua) {
1204 dst.SetUnsignedSat(i, false);
1205 }
Alexandre Ramesd3832962016-07-04 15:03:43 +01001206
Jacob Bramleyb28f6172019-10-02 12:12:35 +01001207 // Test for signed saturation.
1208 // `value` is always positive, so we have an overflow if the (signed) result
1209 // is greater than the first operand.
1210 if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1211 dst.SetSignedSat(i, false);
1212 }
1213
1214 dst.SetInt(vform, i, ur >> (64 - lane_size));
1215 }
1216 return dst;
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01001217}
1218
Alexandre Ramesd3832962016-07-04 15:03:43 +01001219LogicVRegister Simulator::and_(VectorFormat vform,
1220 LogicVRegister dst,
1221 const LogicVRegister& src1,
1222 const LogicVRegister& src2) {
1223 dst.ClearForWrite(vform);
1224 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1225 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1226 }
1227 return dst;
1228}
1229
1230
1231LogicVRegister Simulator::orr(VectorFormat vform,
1232 LogicVRegister dst,
1233 const LogicVRegister& src1,
1234 const LogicVRegister& src2) {
1235 dst.ClearForWrite(vform);
1236 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1237 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1238 }
1239 return dst;
1240}
1241
1242
1243LogicVRegister Simulator::orn(VectorFormat vform,
1244 LogicVRegister dst,
1245 const LogicVRegister& src1,
1246 const LogicVRegister& src2) {
1247 dst.ClearForWrite(vform);
1248 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1249 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1250 }
1251 return dst;
1252}
1253
1254
1255LogicVRegister Simulator::eor(VectorFormat vform,
1256 LogicVRegister dst,
1257 const LogicVRegister& src1,
1258 const LogicVRegister& src2) {
1259 dst.ClearForWrite(vform);
1260 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1261 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1262 }
1263 return dst;
1264}
1265
1266
1267LogicVRegister Simulator::bic(VectorFormat vform,
1268 LogicVRegister dst,
1269 const LogicVRegister& src1,
1270 const LogicVRegister& src2) {
1271 dst.ClearForWrite(vform);
1272 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1273 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1274 }
1275 return dst;
1276}
1277
1278
1279LogicVRegister Simulator::bic(VectorFormat vform,
1280 LogicVRegister dst,
1281 const LogicVRegister& src,
1282 uint64_t imm) {
1283 uint64_t result[16];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00001284 int lane_count = LaneCountFromFormat(vform);
1285 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001286 result[i] = src.Uint(vform, i) & ~imm;
1287 }
1288 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00001289 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001290 dst.SetUint(vform, i, result[i]);
1291 }
1292 return dst;
1293}
1294
1295
1296LogicVRegister Simulator::bif(VectorFormat vform,
1297 LogicVRegister dst,
1298 const LogicVRegister& src1,
1299 const LogicVRegister& src2) {
1300 dst.ClearForWrite(vform);
1301 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1302 uint64_t operand1 = dst.Uint(vform, i);
1303 uint64_t operand2 = ~src2.Uint(vform, i);
1304 uint64_t operand3 = src1.Uint(vform, i);
1305 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1306 dst.SetUint(vform, i, result);
1307 }
1308 return dst;
1309}
1310
1311
1312LogicVRegister Simulator::bit(VectorFormat vform,
1313 LogicVRegister dst,
1314 const LogicVRegister& src1,
1315 const LogicVRegister& src2) {
1316 dst.ClearForWrite(vform);
1317 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1318 uint64_t operand1 = dst.Uint(vform, i);
1319 uint64_t operand2 = src2.Uint(vform, i);
1320 uint64_t operand3 = src1.Uint(vform, i);
1321 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1322 dst.SetUint(vform, i, result);
1323 }
1324 return dst;
1325}
1326
1327
1328LogicVRegister Simulator::bsl(VectorFormat vform,
1329 LogicVRegister dst,
1330 const LogicVRegister& src1,
1331 const LogicVRegister& src2) {
1332 dst.ClearForWrite(vform);
1333 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1334 uint64_t operand1 = src2.Uint(vform, i);
1335 uint64_t operand2 = dst.Uint(vform, i);
1336 uint64_t operand3 = src1.Uint(vform, i);
1337 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1338 dst.SetUint(vform, i, result);
1339 }
1340 return dst;
1341}
1342
1343
1344LogicVRegister Simulator::sminmax(VectorFormat vform,
1345 LogicVRegister dst,
1346 const LogicVRegister& src1,
1347 const LogicVRegister& src2,
1348 bool max) {
1349 dst.ClearForWrite(vform);
1350 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1351 int64_t src1_val = src1.Int(vform, i);
1352 int64_t src2_val = src2.Int(vform, i);
1353 int64_t dst_val;
Martyn Capewell491a5752016-10-18 13:45:55 +01001354 if (max) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001355 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1356 } else {
1357 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1358 }
1359 dst.SetInt(vform, i, dst_val);
1360 }
1361 return dst;
1362}
1363
1364
1365LogicVRegister Simulator::smax(VectorFormat vform,
1366 LogicVRegister dst,
1367 const LogicVRegister& src1,
1368 const LogicVRegister& src2) {
1369 return sminmax(vform, dst, src1, src2, true);
1370}
1371
1372
1373LogicVRegister Simulator::smin(VectorFormat vform,
1374 LogicVRegister dst,
1375 const LogicVRegister& src1,
1376 const LogicVRegister& src2) {
1377 return sminmax(vform, dst, src1, src2, false);
1378}
1379
1380
1381LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1382 LogicVRegister dst,
Martyn Capewellb953ea82016-10-20 19:20:17 +01001383 const LogicVRegister& src1,
1384 const LogicVRegister& src2,
Alexandre Ramesd3832962016-07-04 15:03:43 +01001385 bool max) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01001386 int lanes = LaneCountFromFormat(vform);
1387 int64_t result[kMaxLanesPerVector];
1388 const LogicVRegister* src = &src1;
1389 for (int j = 0; j < 2; j++) {
1390 for (int i = 0; i < lanes; i += 2) {
1391 int64_t first_val = src->Int(vform, i);
1392 int64_t second_val = src->Int(vform, i + 1);
1393 int64_t dst_val;
1394 if (max) {
1395 dst_val = (first_val > second_val) ? first_val : second_val;
1396 } else {
1397 dst_val = (first_val < second_val) ? first_val : second_val;
1398 }
1399 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1400 result[(i >> 1) + (j * lanes / 2)] = dst_val;
Alexandre Ramesd3832962016-07-04 15:03:43 +01001401 }
Martyn Capewellb953ea82016-10-20 19:20:17 +01001402 src = &src2;
Alexandre Ramesd3832962016-07-04 15:03:43 +01001403 }
Martyn Capewellb953ea82016-10-20 19:20:17 +01001404 dst.SetIntArray(vform, result);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001405 return dst;
1406}
1407
1408
1409LogicVRegister Simulator::smaxp(VectorFormat vform,
1410 LogicVRegister dst,
1411 const LogicVRegister& src1,
1412 const LogicVRegister& src2) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01001413 return sminmaxp(vform, dst, src1, src2, true);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001414}
1415
1416
1417LogicVRegister Simulator::sminp(VectorFormat vform,
1418 LogicVRegister dst,
1419 const LogicVRegister& src1,
1420 const LogicVRegister& src2) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01001421 return sminmaxp(vform, dst, src1, src2, false);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001422}
1423
1424
1425LogicVRegister Simulator::addp(VectorFormat vform,
1426 LogicVRegister dst,
1427 const LogicVRegister& src) {
1428 VIXL_ASSERT(vform == kFormatD);
1429
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00001430 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001431 dst.ClearForWrite(vform);
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00001432 dst.SetUint(vform, 0, dst_val);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001433 return dst;
1434}
1435
1436
1437LogicVRegister Simulator::addv(VectorFormat vform,
1438 LogicVRegister dst,
1439 const LogicVRegister& src) {
1440 VectorFormat vform_dst =
1441 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1442
1443
1444 int64_t dst_val = 0;
1445 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1446 dst_val += src.Int(vform, i);
1447 }
1448
1449 dst.ClearForWrite(vform_dst);
1450 dst.SetInt(vform_dst, 0, dst_val);
1451 return dst;
1452}
1453
1454
1455LogicVRegister Simulator::saddlv(VectorFormat vform,
1456 LogicVRegister dst,
1457 const LogicVRegister& src) {
1458 VectorFormat vform_dst =
1459 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1460
1461 int64_t dst_val = 0;
1462 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1463 dst_val += src.Int(vform, i);
1464 }
1465
1466 dst.ClearForWrite(vform_dst);
1467 dst.SetInt(vform_dst, 0, dst_val);
1468 return dst;
1469}
1470
1471
1472LogicVRegister Simulator::uaddlv(VectorFormat vform,
1473 LogicVRegister dst,
1474 const LogicVRegister& src) {
1475 VectorFormat vform_dst =
1476 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1477
1478 uint64_t dst_val = 0;
1479 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1480 dst_val += src.Uint(vform, i);
1481 }
1482
1483 dst.ClearForWrite(vform_dst);
1484 dst.SetUint(vform_dst, 0, dst_val);
1485 return dst;
1486}
1487
1488
1489LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1490 LogicVRegister dst,
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07001491 const LogicPRegister& pg,
Alexandre Ramesd3832962016-07-04 15:03:43 +01001492 const LogicVRegister& src,
1493 bool max) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001494 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1495 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07001496 if (!pg.IsActive(vform, i)) continue;
1497
Alexandre Ramesd3832962016-07-04 15:03:43 +01001498 int64_t src_val = src.Int(vform, i);
Martyn Capewell491a5752016-10-18 13:45:55 +01001499 if (max) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001500 dst_val = (src_val > dst_val) ? src_val : dst_val;
1501 } else {
1502 dst_val = (src_val < dst_val) ? src_val : dst_val;
1503 }
1504 }
Martyn Capewell491a5752016-10-18 13:45:55 +01001505 dst.ClearForWrite(ScalarFormatFromFormat(vform));
Alexandre Ramesd3832962016-07-04 15:03:43 +01001506 dst.SetInt(vform, 0, dst_val);
1507 return dst;
1508}
1509
1510
1511LogicVRegister Simulator::smaxv(VectorFormat vform,
1512 LogicVRegister dst,
1513 const LogicVRegister& src) {
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07001514 sminmaxv(vform, dst, GetPTrue(), src, true);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001515 return dst;
1516}
1517
1518
1519LogicVRegister Simulator::sminv(VectorFormat vform,
1520 LogicVRegister dst,
1521 const LogicVRegister& src) {
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07001522 sminmaxv(vform, dst, GetPTrue(), src, false);
1523 return dst;
1524}
1525
1526
1527LogicVRegister Simulator::smaxv(VectorFormat vform,
1528 LogicVRegister dst,
1529 const LogicPRegister& pg,
1530 const LogicVRegister& src) {
1531 VIXL_ASSERT(IsSVEFormat(vform));
1532 sminmaxv(vform, dst, pg, src, true);
1533 return dst;
1534}
1535
1536
1537LogicVRegister Simulator::sminv(VectorFormat vform,
1538 LogicVRegister dst,
1539 const LogicPRegister& pg,
1540 const LogicVRegister& src) {
1541 VIXL_ASSERT(IsSVEFormat(vform));
1542 sminmaxv(vform, dst, pg, src, false);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001543 return dst;
1544}
1545
1546
1547LogicVRegister Simulator::uminmax(VectorFormat vform,
1548 LogicVRegister dst,
1549 const LogicVRegister& src1,
1550 const LogicVRegister& src2,
1551 bool max) {
1552 dst.ClearForWrite(vform);
1553 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1554 uint64_t src1_val = src1.Uint(vform, i);
1555 uint64_t src2_val = src2.Uint(vform, i);
1556 uint64_t dst_val;
Martyn Capewell491a5752016-10-18 13:45:55 +01001557 if (max) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001558 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1559 } else {
1560 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1561 }
1562 dst.SetUint(vform, i, dst_val);
1563 }
1564 return dst;
1565}
1566
1567
1568LogicVRegister Simulator::umax(VectorFormat vform,
1569 LogicVRegister dst,
1570 const LogicVRegister& src1,
1571 const LogicVRegister& src2) {
1572 return uminmax(vform, dst, src1, src2, true);
1573}
1574
1575
1576LogicVRegister Simulator::umin(VectorFormat vform,
1577 LogicVRegister dst,
1578 const LogicVRegister& src1,
1579 const LogicVRegister& src2) {
1580 return uminmax(vform, dst, src1, src2, false);
1581}
1582
1583
1584LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1585 LogicVRegister dst,
Martyn Capewellb953ea82016-10-20 19:20:17 +01001586 const LogicVRegister& src1,
1587 const LogicVRegister& src2,
Alexandre Ramesd3832962016-07-04 15:03:43 +01001588 bool max) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01001589 int lanes = LaneCountFromFormat(vform);
1590 uint64_t result[kMaxLanesPerVector];
1591 const LogicVRegister* src = &src1;
1592 for (int j = 0; j < 2; j++) {
1593 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1594 uint64_t first_val = src->Uint(vform, i);
1595 uint64_t second_val = src->Uint(vform, i + 1);
1596 uint64_t dst_val;
1597 if (max) {
1598 dst_val = (first_val > second_val) ? first_val : second_val;
1599 } else {
1600 dst_val = (first_val < second_val) ? first_val : second_val;
1601 }
1602 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1603 result[(i >> 1) + (j * lanes / 2)] = dst_val;
Alexandre Ramesd3832962016-07-04 15:03:43 +01001604 }
Martyn Capewellb953ea82016-10-20 19:20:17 +01001605 src = &src2;
Alexandre Ramesd3832962016-07-04 15:03:43 +01001606 }
Martyn Capewellb953ea82016-10-20 19:20:17 +01001607 dst.SetUintArray(vform, result);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001608 return dst;
1609}
1610
1611
1612LogicVRegister Simulator::umaxp(VectorFormat vform,
1613 LogicVRegister dst,
1614 const LogicVRegister& src1,
1615 const LogicVRegister& src2) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01001616 return uminmaxp(vform, dst, src1, src2, true);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001617}
1618
1619
1620LogicVRegister Simulator::uminp(VectorFormat vform,
1621 LogicVRegister dst,
1622 const LogicVRegister& src1,
1623 const LogicVRegister& src2) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01001624 return uminmaxp(vform, dst, src1, src2, false);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001625}
1626
1627
1628LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1629 LogicVRegister dst,
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07001630 const LogicPRegister& pg,
Alexandre Ramesd3832962016-07-04 15:03:43 +01001631 const LogicVRegister& src,
1632 bool max) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001633 uint64_t dst_val = max ? 0 : UINT64_MAX;
1634 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07001635 if (!pg.IsActive(vform, i)) continue;
1636
Alexandre Ramesd3832962016-07-04 15:03:43 +01001637 uint64_t src_val = src.Uint(vform, i);
Martyn Capewell491a5752016-10-18 13:45:55 +01001638 if (max) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001639 dst_val = (src_val > dst_val) ? src_val : dst_val;
1640 } else {
1641 dst_val = (src_val < dst_val) ? src_val : dst_val;
1642 }
1643 }
Martyn Capewell491a5752016-10-18 13:45:55 +01001644 dst.ClearForWrite(ScalarFormatFromFormat(vform));
Alexandre Ramesd3832962016-07-04 15:03:43 +01001645 dst.SetUint(vform, 0, dst_val);
1646 return dst;
1647}
1648
1649
1650LogicVRegister Simulator::umaxv(VectorFormat vform,
1651 LogicVRegister dst,
1652 const LogicVRegister& src) {
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07001653 uminmaxv(vform, dst, GetPTrue(), src, true);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001654 return dst;
1655}
1656
1657
1658LogicVRegister Simulator::uminv(VectorFormat vform,
1659 LogicVRegister dst,
1660 const LogicVRegister& src) {
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07001661 uminmaxv(vform, dst, GetPTrue(), src, false);
1662 return dst;
1663}
1664
1665
1666LogicVRegister Simulator::umaxv(VectorFormat vform,
1667 LogicVRegister dst,
1668 const LogicPRegister& pg,
1669 const LogicVRegister& src) {
1670 VIXL_ASSERT(IsSVEFormat(vform));
1671 uminmaxv(vform, dst, pg, src, true);
1672 return dst;
1673}
1674
1675
1676LogicVRegister Simulator::uminv(VectorFormat vform,
1677 LogicVRegister dst,
1678 const LogicPRegister& pg,
1679 const LogicVRegister& src) {
1680 VIXL_ASSERT(IsSVEFormat(vform));
1681 uminmaxv(vform, dst, pg, src, false);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001682 return dst;
1683}
1684
1685
1686LogicVRegister Simulator::shl(VectorFormat vform,
1687 LogicVRegister dst,
1688 const LogicVRegister& src,
1689 int shift) {
1690 VIXL_ASSERT(shift >= 0);
1691 SimVRegister temp;
1692 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1693 return ushl(vform, dst, src, shiftreg);
1694}
1695
1696
1697LogicVRegister Simulator::sshll(VectorFormat vform,
1698 LogicVRegister dst,
1699 const LogicVRegister& src,
1700 int shift) {
1701 VIXL_ASSERT(shift >= 0);
1702 SimVRegister temp1, temp2;
1703 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1704 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1705 return sshl(vform, dst, extendedreg, shiftreg);
1706}
1707
1708
1709LogicVRegister Simulator::sshll2(VectorFormat vform,
1710 LogicVRegister dst,
1711 const LogicVRegister& src,
1712 int shift) {
1713 VIXL_ASSERT(shift >= 0);
1714 SimVRegister temp1, temp2;
1715 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1716 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1717 return sshl(vform, dst, extendedreg, shiftreg);
1718}
1719
1720
1721LogicVRegister Simulator::shll(VectorFormat vform,
1722 LogicVRegister dst,
1723 const LogicVRegister& src) {
1724 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1725 return sshll(vform, dst, src, shift);
1726}
1727
1728
1729LogicVRegister Simulator::shll2(VectorFormat vform,
1730 LogicVRegister dst,
1731 const LogicVRegister& src) {
1732 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1733 return sshll2(vform, dst, src, shift);
1734}
1735
1736
1737LogicVRegister Simulator::ushll(VectorFormat vform,
1738 LogicVRegister dst,
1739 const LogicVRegister& src,
1740 int shift) {
1741 VIXL_ASSERT(shift >= 0);
1742 SimVRegister temp1, temp2;
1743 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1744 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1745 return ushl(vform, dst, extendedreg, shiftreg);
1746}
1747
1748
1749LogicVRegister Simulator::ushll2(VectorFormat vform,
1750 LogicVRegister dst,
1751 const LogicVRegister& src,
1752 int shift) {
1753 VIXL_ASSERT(shift >= 0);
1754 SimVRegister temp1, temp2;
1755 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1756 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1757 return ushl(vform, dst, extendedreg, shiftreg);
1758}
1759
Martyn Capewellf804b602020-02-24 18:57:18 +00001760std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1761 const LogicPRegister& pg,
1762 const LogicVRegister& src,
1763 int offset_from_last_active) {
1764 // Untested for any other values.
1765 VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1766
1767 int last_active = GetLastActive(vform, pg);
1768 int lane_count = LaneCountFromFormat(vform);
1769 int index =
1770 ((last_active + offset_from_last_active) + lane_count) % lane_count;
1771 return std::make_pair(last_active >= 0, src.Uint(vform, index));
1772}
1773
1774LogicVRegister Simulator::compact(VectorFormat vform,
1775 LogicVRegister dst,
1776 const LogicPRegister& pg,
1777 const LogicVRegister& src) {
1778 int j = 0;
1779 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1780 if (pg.IsActive(vform, i)) {
1781 dst.SetUint(vform, j++, src.Uint(vform, i));
1782 }
1783 }
1784 for (; j < LaneCountFromFormat(vform); j++) {
1785 dst.SetUint(vform, j, 0);
1786 }
1787 return dst;
1788}
1789
1790LogicVRegister Simulator::splice(VectorFormat vform,
1791 LogicVRegister dst,
1792 const LogicPRegister& pg,
1793 const LogicVRegister& src1,
1794 const LogicVRegister& src2) {
1795 int lane_count = LaneCountFromFormat(vform);
1796 int first_active = GetFirstActive(vform, pg);
1797 int last_active = GetLastActive(vform, pg);
1798 int dst_idx = 0;
1799 uint64_t result[kZRegMaxSizeInBytes];
1800
1801 if (first_active >= 0) {
1802 VIXL_ASSERT(last_active >= first_active);
1803 VIXL_ASSERT(last_active < lane_count);
1804 for (int i = first_active; i <= last_active; i++) {
1805 result[dst_idx++] = src1.Uint(vform, i);
1806 }
1807 }
1808
1809 VIXL_ASSERT(dst_idx <= lane_count);
1810 for (int i = dst_idx; i < lane_count; i++) {
1811 result[i] = src2.Uint(vform, i - dst_idx);
1812 }
1813
1814 for (int i = 0; i < lane_count; i++) {
1815 dst.SetUint(vform, i, result[i]);
1816 }
1817 return dst;
1818}
Alexandre Ramesd3832962016-07-04 15:03:43 +01001819
Jacob Bramley22023df2019-05-14 17:55:43 +01001820LogicVRegister Simulator::sel(VectorFormat vform,
1821 LogicVRegister dst,
1822 const SimPRegister& pg,
1823 const LogicVRegister& src1,
1824 const LogicVRegister& src2) {
1825 int p_reg_bits_per_lane =
1826 LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1827 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1828 uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1829 ? src1.Uint(vform, lane)
1830 : src2.Uint(vform, lane);
1831 dst.SetUint(vform, lane, lane_value);
1832 }
1833 return dst;
1834}
1835
1836
TatWai Chonga3e8b172019-11-22 21:48:56 -08001837LogicPRegister Simulator::sel(LogicPRegister dst,
1838 const LogicPRegister& pg,
1839 const LogicPRegister& src1,
1840 const LogicPRegister& src2) {
1841 for (int i = 0; i < dst.GetChunkCount(); i++) {
1842 LogicPRegister::ChunkType mask = pg.GetChunk(i);
1843 LogicPRegister::ChunkType result =
1844 (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1845 dst.SetChunk(i, result);
1846 }
1847 return dst;
1848}
1849
1850
Alexandre Ramesd3832962016-07-04 15:03:43 +01001851LogicVRegister Simulator::sli(VectorFormat vform,
1852 LogicVRegister dst,
1853 const LogicVRegister& src,
1854 int shift) {
1855 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00001856 int lane_count = LaneCountFromFormat(vform);
1857 for (int i = 0; i < lane_count; i++) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001858 uint64_t src_lane = src.Uint(vform, i);
1859 uint64_t dst_lane = dst.Uint(vform, i);
1860 uint64_t shifted = src_lane << shift;
1861 uint64_t mask = MaxUintFromFormat(vform) << shift;
1862 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1863 }
1864 return dst;
1865}
1866
1867
1868LogicVRegister Simulator::sqshl(VectorFormat vform,
1869 LogicVRegister dst,
1870 const LogicVRegister& src,
1871 int shift) {
1872 VIXL_ASSERT(shift >= 0);
1873 SimVRegister temp;
1874 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1875 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1876}
1877
1878
1879LogicVRegister Simulator::uqshl(VectorFormat vform,
1880 LogicVRegister dst,
1881 const LogicVRegister& src,
1882 int shift) {
1883 VIXL_ASSERT(shift >= 0);
1884 SimVRegister temp;
1885 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1886 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1887}
1888
1889
1890LogicVRegister Simulator::sqshlu(VectorFormat vform,
1891 LogicVRegister dst,
1892 const LogicVRegister& src,
1893 int shift) {
1894 VIXL_ASSERT(shift >= 0);
1895 SimVRegister temp;
1896 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1897 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1898}
1899
1900
1901LogicVRegister Simulator::sri(VectorFormat vform,
1902 LogicVRegister dst,
1903 const LogicVRegister& src,
1904 int shift) {
1905 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00001906 int lane_count = LaneCountFromFormat(vform);
Alexandre Ramesd3832962016-07-04 15:03:43 +01001907 VIXL_ASSERT((shift > 0) &&
1908 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00001909 for (int i = 0; i < lane_count; i++) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01001910 uint64_t src_lane = src.Uint(vform, i);
1911 uint64_t dst_lane = dst.Uint(vform, i);
1912 uint64_t shifted;
1913 uint64_t mask;
1914 if (shift == 64) {
1915 shifted = 0;
1916 mask = 0;
1917 } else {
1918 shifted = src_lane >> shift;
1919 mask = MaxUintFromFormat(vform) >> shift;
1920 }
1921 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1922 }
1923 return dst;
1924}
1925
1926
1927LogicVRegister Simulator::ushr(VectorFormat vform,
1928 LogicVRegister dst,
1929 const LogicVRegister& src,
1930 int shift) {
1931 VIXL_ASSERT(shift >= 0);
1932 SimVRegister temp;
1933 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1934 return ushl(vform, dst, src, shiftreg);
1935}
1936
1937
1938LogicVRegister Simulator::sshr(VectorFormat vform,
1939 LogicVRegister dst,
1940 const LogicVRegister& src,
1941 int shift) {
1942 VIXL_ASSERT(shift >= 0);
1943 SimVRegister temp;
1944 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1945 return sshl(vform, dst, src, shiftreg);
1946}
1947
1948
1949LogicVRegister Simulator::ssra(VectorFormat vform,
1950 LogicVRegister dst,
1951 const LogicVRegister& src,
1952 int shift) {
1953 SimVRegister temp;
1954 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1955 return add(vform, dst, dst, shifted_reg);
1956}
1957
1958
1959LogicVRegister Simulator::usra(VectorFormat vform,
1960 LogicVRegister dst,
1961 const LogicVRegister& src,
1962 int shift) {
1963 SimVRegister temp;
1964 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1965 return add(vform, dst, dst, shifted_reg);
1966}
1967
1968
1969LogicVRegister Simulator::srsra(VectorFormat vform,
1970 LogicVRegister dst,
1971 const LogicVRegister& src,
1972 int shift) {
1973 SimVRegister temp;
1974 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1975 return add(vform, dst, dst, shifted_reg);
1976}
1977
1978
1979LogicVRegister Simulator::ursra(VectorFormat vform,
1980 LogicVRegister dst,
1981 const LogicVRegister& src,
1982 int shift) {
1983 SimVRegister temp;
1984 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1985 return add(vform, dst, dst, shifted_reg);
1986}
1987
1988
1989LogicVRegister Simulator::cls(VectorFormat vform,
1990 LogicVRegister dst,
1991 const LogicVRegister& src) {
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01001992 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1993 int lane_count = LaneCountFromFormat(vform);
1994
1995 // Ensure that we can store one result per lane.
1996 int result[kZRegMaxSizeInBytes];
1997
1998 for (int i = 0; i < lane_count; i++) {
1999 result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002000 }
2001
2002 dst.ClearForWrite(vform);
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01002003 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002004 dst.SetUint(vform, i, result[i]);
2005 }
2006 return dst;
2007}
2008
2009
2010LogicVRegister Simulator::clz(VectorFormat vform,
2011 LogicVRegister dst,
2012 const LogicVRegister& src) {
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01002013 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2014 int lane_count = LaneCountFromFormat(vform);
2015
2016 // Ensure that we can store one result per lane.
2017 int result[kZRegMaxSizeInBytes];
2018
2019 for (int i = 0; i < lane_count; i++) {
2020 result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002021 }
2022
2023 dst.ClearForWrite(vform);
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01002024 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002025 dst.SetUint(vform, i, result[i]);
2026 }
2027 return dst;
2028}
2029
2030
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01002031LogicVRegister Simulator::cnot(VectorFormat vform,
2032 LogicVRegister dst,
2033 const LogicVRegister& src) {
2034 dst.ClearForWrite(vform);
2035 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2036 uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
2037 dst.SetUint(vform, i, value);
2038 }
2039 return dst;
2040}
2041
2042
Alexandre Ramesd3832962016-07-04 15:03:43 +01002043LogicVRegister Simulator::cnt(VectorFormat vform,
2044 LogicVRegister dst,
2045 const LogicVRegister& src) {
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01002046 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2047 int lane_count = LaneCountFromFormat(vform);
2048
2049 // Ensure that we can store one result per lane.
2050 int result[kZRegMaxSizeInBytes];
2051
2052 for (int i = 0; i < lane_count; i++) {
2053 result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002054 }
2055
2056 dst.ClearForWrite(vform);
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01002057 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002058 dst.SetUint(vform, i, result[i]);
2059 }
2060 return dst;
2061}
2062
2063
2064LogicVRegister Simulator::sshl(VectorFormat vform,
2065 LogicVRegister dst,
2066 const LogicVRegister& src1,
2067 const LogicVRegister& src2) {
2068 dst.ClearForWrite(vform);
2069 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2070 int8_t shift_val = src2.Int(vform, i);
2071 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
2072
2073 // Set signed saturation state.
2074 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
2075 dst.SetSignedSat(i, lj_src_val >= 0);
2076 }
2077
2078 // Set unsigned saturation state.
2079 if (lj_src_val < 0) {
2080 dst.SetUnsignedSat(i, false);
2081 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
2082 (lj_src_val != 0)) {
2083 dst.SetUnsignedSat(i, true);
2084 }
2085
2086 int64_t src_val = src1.Int(vform, i);
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002087 bool src_is_negative = src_val < 0;
Alexandre Ramesd3832962016-07-04 15:03:43 +01002088 if (shift_val > 63) {
2089 dst.SetInt(vform, i, 0);
2090 } else if (shift_val < -63) {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002091 dst.SetRounding(i, src_is_negative);
2092 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002093 } else {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002094 // Use unsigned types for shifts, as behaviour is undefined for signed
2095 // lhs.
2096 uint64_t usrc_val = static_cast<uint64_t>(src_val);
2097
Alexandre Ramesd3832962016-07-04 15:03:43 +01002098 if (shift_val < 0) {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002099 // Convert to right shift.
2100 shift_val = -shift_val;
2101
2102 // Set rounding state by testing most-significant bit shifted out.
2103 // Rounding only needed on right shifts.
2104 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002105 dst.SetRounding(i, true);
2106 }
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002107
2108 usrc_val >>= shift_val;
2109
2110 if (src_is_negative) {
2111 // Simulate sign-extension.
2112 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
2113 }
Alexandre Ramesd3832962016-07-04 15:03:43 +01002114 } else {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002115 usrc_val <<= shift_val;
Alexandre Ramesd3832962016-07-04 15:03:43 +01002116 }
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002117 dst.SetUint(vform, i, usrc_val);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002118 }
2119 }
2120 return dst;
2121}
2122
2123
2124LogicVRegister Simulator::ushl(VectorFormat vform,
2125 LogicVRegister dst,
2126 const LogicVRegister& src1,
2127 const LogicVRegister& src2) {
2128 dst.ClearForWrite(vform);
2129 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2130 int8_t shift_val = src2.Int(vform, i);
2131 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
2132
2133 // Set saturation state.
2134 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
2135 dst.SetUnsignedSat(i, true);
2136 }
2137
2138 uint64_t src_val = src1.Uint(vform, i);
2139 if ((shift_val > 63) || (shift_val < -64)) {
2140 dst.SetUint(vform, i, 0);
2141 } else {
2142 if (shift_val < 0) {
2143 // Set rounding state. Rounding only needed on right shifts.
2144 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
2145 dst.SetRounding(i, true);
2146 }
2147
2148 if (shift_val == -64) {
2149 src_val = 0;
2150 } else {
2151 src_val >>= -shift_val;
2152 }
2153 } else {
2154 src_val <<= shift_val;
2155 }
2156 dst.SetUint(vform, i, src_val);
2157 }
2158 }
2159 return dst;
2160}
2161
2162
2163LogicVRegister Simulator::neg(VectorFormat vform,
2164 LogicVRegister dst,
2165 const LogicVRegister& src) {
2166 dst.ClearForWrite(vform);
2167 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2168 // Test for signed saturation.
2169 int64_t sa = src.Int(vform, i);
2170 if (sa == MinIntFromFormat(vform)) {
2171 dst.SetSignedSat(i, true);
2172 }
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002173 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002174 }
2175 return dst;
2176}
2177
2178
2179LogicVRegister Simulator::suqadd(VectorFormat vform,
2180 LogicVRegister dst,
2181 const LogicVRegister& src) {
2182 dst.ClearForWrite(vform);
2183 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2184 int64_t sa = dst.IntLeftJustified(vform, i);
2185 uint64_t ub = src.UintLeftJustified(vform, i);
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002186 uint64_t ur = sa + ub;
Alexandre Ramesd3832962016-07-04 15:03:43 +01002187
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002188 int64_t sr;
2189 memcpy(&sr, &ur, sizeof(sr));
Alexandre Ramesd3832962016-07-04 15:03:43 +01002190 if (sr < sa) { // Test for signed positive saturation.
2191 dst.SetInt(vform, i, MaxIntFromFormat(vform));
2192 } else {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002193 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
Alexandre Ramesd3832962016-07-04 15:03:43 +01002194 }
2195 }
2196 return dst;
2197}
2198
2199
2200LogicVRegister Simulator::usqadd(VectorFormat vform,
2201 LogicVRegister dst,
2202 const LogicVRegister& src) {
2203 dst.ClearForWrite(vform);
2204 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2205 uint64_t ua = dst.UintLeftJustified(vform, i);
2206 int64_t sb = src.IntLeftJustified(vform, i);
2207 uint64_t ur = ua + sb;
2208
2209 if ((sb > 0) && (ur <= ua)) {
2210 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2211 } else if ((sb < 0) && (ur >= ua)) {
2212 dst.SetUint(vform, i, 0); // Negative saturation.
2213 } else {
2214 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2215 }
2216 }
2217 return dst;
2218}
2219
2220
2221LogicVRegister Simulator::abs(VectorFormat vform,
2222 LogicVRegister dst,
2223 const LogicVRegister& src) {
2224 dst.ClearForWrite(vform);
2225 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2226 // Test for signed saturation.
2227 int64_t sa = src.Int(vform, i);
2228 if (sa == MinIntFromFormat(vform)) {
2229 dst.SetSignedSat(i, true);
2230 }
2231 if (sa < 0) {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002232 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002233 } else {
2234 dst.SetInt(vform, i, sa);
2235 }
2236 }
2237 return dst;
2238}
2239
2240
TatWai Chong6f111bc2019-10-07 09:20:37 +01002241LogicVRegister Simulator::andv(VectorFormat vform,
2242 LogicVRegister dst,
2243 const LogicPRegister& pg,
2244 const LogicVRegister& src) {
2245 VIXL_ASSERT(IsSVEFormat(vform));
2246 uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2247 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2248 if (!pg.IsActive(vform, i)) continue;
2249
2250 result &= src.Uint(vform, i);
2251 }
2252 VectorFormat vform_dst =
2253 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2254 dst.ClearForWrite(vform_dst);
2255 dst.SetUint(vform_dst, 0, result);
2256 return dst;
2257}
2258
2259
2260LogicVRegister Simulator::eorv(VectorFormat vform,
2261 LogicVRegister dst,
2262 const LogicPRegister& pg,
2263 const LogicVRegister& src) {
2264 VIXL_ASSERT(IsSVEFormat(vform));
2265 uint64_t result = 0;
2266 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2267 if (!pg.IsActive(vform, i)) continue;
2268
2269 result ^= src.Uint(vform, i);
2270 }
2271 VectorFormat vform_dst =
2272 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2273 dst.ClearForWrite(vform_dst);
2274 dst.SetUint(vform_dst, 0, result);
2275 return dst;
2276}
2277
2278
2279LogicVRegister Simulator::orv(VectorFormat vform,
2280 LogicVRegister dst,
2281 const LogicPRegister& pg,
2282 const LogicVRegister& src) {
2283 VIXL_ASSERT(IsSVEFormat(vform));
2284 uint64_t result = 0;
2285 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2286 if (!pg.IsActive(vform, i)) continue;
2287
2288 result |= src.Uint(vform, i);
2289 }
2290 VectorFormat vform_dst =
2291 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2292 dst.ClearForWrite(vform_dst);
2293 dst.SetUint(vform_dst, 0, result);
2294 return dst;
2295}
2296
2297
TatWai Chongb2d8d1f2019-10-21 15:19:31 -07002298LogicVRegister Simulator::saddv(VectorFormat vform,
2299 LogicVRegister dst,
2300 const LogicPRegister& pg,
2301 const LogicVRegister& src) {
2302 VIXL_ASSERT(IsSVEFormat(vform));
2303 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2304 int64_t result = 0;
2305 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2306 if (!pg.IsActive(vform, i)) continue;
2307
2308 // The destination register always has D-lane sizes and the source register
2309 // always has S-lanes or smaller, so signed integer overflow -- undefined
2310 // behaviour -- can't occur.
2311 result += src.Int(vform, i);
2312 }
2313
2314 dst.ClearForWrite(kFormatD);
2315 dst.SetInt(kFormatD, 0, result);
2316 return dst;
2317}
2318
2319
2320LogicVRegister Simulator::uaddv(VectorFormat vform,
2321 LogicVRegister dst,
2322 const LogicPRegister& pg,
2323 const LogicVRegister& src) {
2324 VIXL_ASSERT(IsSVEFormat(vform));
2325 uint64_t result = 0;
2326 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2327 if (!pg.IsActive(vform, i)) continue;
2328
2329 result += src.Uint(vform, i);
2330 }
2331
2332 dst.ClearForWrite(kFormatD);
2333 dst.SetUint(kFormatD, 0, result);
2334 return dst;
2335}
2336
2337
Alexandre Ramesd3832962016-07-04 15:03:43 +01002338LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2339 LogicVRegister dst,
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002340 bool dst_is_signed,
Alexandre Ramesd3832962016-07-04 15:03:43 +01002341 const LogicVRegister& src,
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002342 bool src_is_signed) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002343 bool upperhalf = false;
2344 VectorFormat srcform = kFormatUndefined;
2345 int64_t ssrc[8];
2346 uint64_t usrc[8];
2347
2348 switch (dstform) {
2349 case kFormat8B:
2350 upperhalf = false;
2351 srcform = kFormat8H;
2352 break;
2353 case kFormat16B:
2354 upperhalf = true;
2355 srcform = kFormat8H;
2356 break;
2357 case kFormat4H:
2358 upperhalf = false;
2359 srcform = kFormat4S;
2360 break;
2361 case kFormat8H:
2362 upperhalf = true;
2363 srcform = kFormat4S;
2364 break;
2365 case kFormat2S:
2366 upperhalf = false;
2367 srcform = kFormat2D;
2368 break;
2369 case kFormat4S:
2370 upperhalf = true;
2371 srcform = kFormat2D;
2372 break;
2373 case kFormatB:
2374 upperhalf = false;
2375 srcform = kFormatH;
2376 break;
2377 case kFormatH:
2378 upperhalf = false;
2379 srcform = kFormatS;
2380 break;
2381 case kFormatS:
2382 upperhalf = false;
2383 srcform = kFormatD;
2384 break;
2385 default:
2386 VIXL_UNIMPLEMENTED();
2387 }
2388
2389 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2390 ssrc[i] = src.Int(srcform, i);
2391 usrc[i] = src.Uint(srcform, i);
2392 }
2393
2394 int offset;
2395 if (upperhalf) {
2396 offset = LaneCountFromFormat(dstform) / 2;
2397 } else {
2398 offset = 0;
2399 dst.ClearForWrite(dstform);
2400 }
2401
2402 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2403 // Test for signed saturation
2404 if (ssrc[i] > MaxIntFromFormat(dstform)) {
2405 dst.SetSignedSat(offset + i, true);
2406 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2407 dst.SetSignedSat(offset + i, false);
2408 }
2409
2410 // Test for unsigned saturation
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002411 if (src_is_signed) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002412 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2413 dst.SetUnsignedSat(offset + i, true);
2414 } else if (ssrc[i] < 0) {
2415 dst.SetUnsignedSat(offset + i, false);
2416 }
2417 } else {
2418 if (usrc[i] > MaxUintFromFormat(dstform)) {
2419 dst.SetUnsignedSat(offset + i, true);
2420 }
2421 }
2422
2423 int64_t result;
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002424 if (src_is_signed) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002425 result = ssrc[i] & MaxUintFromFormat(dstform);
2426 } else {
2427 result = usrc[i] & MaxUintFromFormat(dstform);
2428 }
2429
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002430 if (dst_is_signed) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002431 dst.SetInt(dstform, offset + i, result);
2432 } else {
2433 dst.SetUint(dstform, offset + i, result);
2434 }
2435 }
2436 return dst;
2437}
2438
2439
2440LogicVRegister Simulator::xtn(VectorFormat vform,
2441 LogicVRegister dst,
2442 const LogicVRegister& src) {
2443 return extractnarrow(vform, dst, true, src, true);
2444}
2445
2446
2447LogicVRegister Simulator::sqxtn(VectorFormat vform,
2448 LogicVRegister dst,
2449 const LogicVRegister& src) {
2450 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2451}
2452
2453
2454LogicVRegister Simulator::sqxtun(VectorFormat vform,
2455 LogicVRegister dst,
2456 const LogicVRegister& src) {
2457 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2458}
2459
2460
2461LogicVRegister Simulator::uqxtn(VectorFormat vform,
2462 LogicVRegister dst,
2463 const LogicVRegister& src) {
2464 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2465}
2466
2467
2468LogicVRegister Simulator::absdiff(VectorFormat vform,
2469 LogicVRegister dst,
2470 const LogicVRegister& src1,
2471 const LogicVRegister& src2,
TatWai Chong13634762019-07-16 16:20:45 -07002472 bool is_signed) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002473 dst.ClearForWrite(vform);
2474 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
TatWai Chong13634762019-07-16 16:20:45 -07002475 bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2476 : (src1.Uint(vform, i) > src2.Uint(vform, i));
2477 // Always calculate the answer using unsigned arithmetic, to avoid
2478 // implemenation-defined signed overflow.
2479 if (src1_gt_src2) {
2480 dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
Alexandre Ramesd3832962016-07-04 15:03:43 +01002481 } else {
TatWai Chong13634762019-07-16 16:20:45 -07002482 dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
Alexandre Ramesd3832962016-07-04 15:03:43 +01002483 }
2484 }
2485 return dst;
2486}
2487
2488
2489LogicVRegister Simulator::saba(VectorFormat vform,
2490 LogicVRegister dst,
2491 const LogicVRegister& src1,
2492 const LogicVRegister& src2) {
2493 SimVRegister temp;
2494 dst.ClearForWrite(vform);
2495 absdiff(vform, temp, src1, src2, true);
2496 add(vform, dst, dst, temp);
2497 return dst;
2498}
2499
2500
2501LogicVRegister Simulator::uaba(VectorFormat vform,
2502 LogicVRegister dst,
2503 const LogicVRegister& src1,
2504 const LogicVRegister& src2) {
2505 SimVRegister temp;
2506 dst.ClearForWrite(vform);
2507 absdiff(vform, temp, src1, src2, false);
2508 add(vform, dst, dst, temp);
2509 return dst;
2510}
2511
2512
2513LogicVRegister Simulator::not_(VectorFormat vform,
2514 LogicVRegister dst,
2515 const LogicVRegister& src) {
2516 dst.ClearForWrite(vform);
2517 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2518 dst.SetUint(vform, i, ~src.Uint(vform, i));
2519 }
2520 return dst;
2521}
2522
2523
2524LogicVRegister Simulator::rbit(VectorFormat vform,
2525 LogicVRegister dst,
2526 const LogicVRegister& src) {
Martyn Capewell77b6d982019-12-02 18:34:59 +00002527 uint64_t result[kZRegMaxSizeInBytes];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002528 int lane_count = LaneCountFromFormat(vform);
2529 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002530 uint64_t reversed_value;
2531 uint64_t value;
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002532 for (int i = 0; i < lane_count; i++) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002533 value = src.Uint(vform, i);
2534 reversed_value = 0;
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002535 for (int j = 0; j < lane_size_in_bits; j++) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002536 reversed_value = (reversed_value << 1) | (value & 1);
2537 value >>= 1;
2538 }
2539 result[i] = reversed_value;
2540 }
2541
2542 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002543 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002544 dst.SetUint(vform, i, result[i]);
2545 }
2546 return dst;
2547}
2548
2549
2550LogicVRegister Simulator::rev(VectorFormat vform,
2551 LogicVRegister dst,
TatWai Chong4f28df72019-08-14 17:50:30 -07002552 const LogicVRegister& src) {
2553 VIXL_ASSERT(IsSVEFormat(vform));
2554 int lane_count = LaneCountFromFormat(vform);
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00002555 for (int i = 0; i < lane_count / 2; i++) {
2556 uint64_t t = src.Uint(vform, i);
TatWai Chong4f28df72019-08-14 17:50:30 -07002557 dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00002558 dst.SetUint(vform, lane_count - i - 1, t);
TatWai Chong4f28df72019-08-14 17:50:30 -07002559 }
2560 return dst;
2561}
2562
2563
2564LogicVRegister Simulator::rev_byte(VectorFormat vform,
2565 LogicVRegister dst,
2566 const LogicVRegister& src,
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002567 int rev_size) {
Martyn Capewell77b6d982019-12-02 18:34:59 +00002568 uint64_t result[kZRegMaxSizeInBytes];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002569 int lane_count = LaneCountFromFormat(vform);
2570 int lane_size = LaneSizeInBytesFromFormat(vform);
2571 int lanes_per_loop = rev_size / lane_size;
2572 for (int i = 0; i < lane_count; i += lanes_per_loop) {
2573 for (int j = 0; j < lanes_per_loop; j++) {
2574 result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002575 }
2576 }
2577 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002578 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002579 dst.SetUint(vform, i, result[i]);
2580 }
2581 return dst;
2582}
2583
2584
2585LogicVRegister Simulator::rev16(VectorFormat vform,
2586 LogicVRegister dst,
2587 const LogicVRegister& src) {
TatWai Chong4f28df72019-08-14 17:50:30 -07002588 return rev_byte(vform, dst, src, 2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002589}
2590
2591
2592LogicVRegister Simulator::rev32(VectorFormat vform,
2593 LogicVRegister dst,
2594 const LogicVRegister& src) {
TatWai Chong4f28df72019-08-14 17:50:30 -07002595 return rev_byte(vform, dst, src, 4);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002596}
2597
2598
2599LogicVRegister Simulator::rev64(VectorFormat vform,
2600 LogicVRegister dst,
2601 const LogicVRegister& src) {
TatWai Chong4f28df72019-08-14 17:50:30 -07002602 return rev_byte(vform, dst, src, 8);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002603}
2604
2605
2606LogicVRegister Simulator::addlp(VectorFormat vform,
2607 LogicVRegister dst,
2608 const LogicVRegister& src,
2609 bool is_signed,
2610 bool do_accumulate) {
2611 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002612 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
2613 VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002614
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002615 uint64_t result[8];
2616 int lane_count = LaneCountFromFormat(vform);
2617 for (int i = 0; i < lane_count; i++) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002618 if (is_signed) {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002619 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2620 src.Int(vformsrc, 2 * i + 1));
Alexandre Ramesd3832962016-07-04 15:03:43 +01002621 } else {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002622 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002623 }
2624 }
2625
2626 dst.ClearForWrite(vform);
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002627 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002628 if (do_accumulate) {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002629 result[i] += dst.Uint(vform, i);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002630 }
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00002631 dst.SetUint(vform, i, result[i]);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002632 }
2633
2634 return dst;
2635}
2636
2637
2638LogicVRegister Simulator::saddlp(VectorFormat vform,
2639 LogicVRegister dst,
2640 const LogicVRegister& src) {
2641 return addlp(vform, dst, src, true, false);
2642}
2643
2644
2645LogicVRegister Simulator::uaddlp(VectorFormat vform,
2646 LogicVRegister dst,
2647 const LogicVRegister& src) {
2648 return addlp(vform, dst, src, false, false);
2649}
2650
2651
2652LogicVRegister Simulator::sadalp(VectorFormat vform,
2653 LogicVRegister dst,
2654 const LogicVRegister& src) {
2655 return addlp(vform, dst, src, true, true);
2656}
2657
2658
2659LogicVRegister Simulator::uadalp(VectorFormat vform,
2660 LogicVRegister dst,
2661 const LogicVRegister& src) {
2662 return addlp(vform, dst, src, false, true);
2663}
2664
2665
2666LogicVRegister Simulator::ext(VectorFormat vform,
2667 LogicVRegister dst,
2668 const LogicVRegister& src1,
2669 const LogicVRegister& src2,
2670 int index) {
Martyn Capewellac07af12019-12-02 14:55:05 +00002671 uint8_t result[kZRegMaxSizeInBytes];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002672 int lane_count = LaneCountFromFormat(vform);
2673 for (int i = 0; i < lane_count - index; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002674 result[i] = src1.Uint(vform, i + index);
2675 }
2676 for (int i = 0; i < index; ++i) {
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002677 result[lane_count - index + i] = src2.Uint(vform, i);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002678 }
2679 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002680 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002681 dst.SetUint(vform, i, result[i]);
2682 }
2683 return dst;
2684}
2685
Carey Williams2809e6c2018-03-13 12:24:16 +00002686template <typename T>
Martyn Capewell4a9829f2020-01-30 17:41:01 +00002687LogicVRegister Simulator::fadda(VectorFormat vform,
2688 LogicVRegister acc,
2689 const LogicPRegister& pg,
2690 const LogicVRegister& src) {
2691 T result = acc.Float<T>(0);
2692 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2693 if (!pg.IsActive(vform, i)) continue;
2694
2695 result = FPAdd(result, src.Float<T>(i));
2696 }
2697 VectorFormat vform_dst =
2698 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2699 acc.ClearForWrite(vform_dst);
2700 acc.SetFloat(0, result);
2701 return acc;
2702}
2703
2704LogicVRegister Simulator::fadda(VectorFormat vform,
2705 LogicVRegister acc,
2706 const LogicPRegister& pg,
2707 const LogicVRegister& src) {
2708 switch (LaneSizeInBitsFromFormat(vform)) {
2709 case kHRegSize:
2710 fadda<SimFloat16>(vform, acc, pg, src);
2711 break;
2712 case kSRegSize:
2713 fadda<float>(vform, acc, pg, src);
2714 break;
2715 case kDRegSize:
2716 fadda<double>(vform, acc, pg, src);
2717 break;
2718 default:
2719 VIXL_UNREACHABLE();
2720 }
2721 return acc;
2722}
2723
2724template <typename T>
Carey Williams2809e6c2018-03-13 12:24:16 +00002725LogicVRegister Simulator::fcadd(VectorFormat vform,
2726 LogicVRegister dst, // d
2727 const LogicVRegister& src1, // n
2728 const LogicVRegister& src2, // m
2729 int rot) {
2730 int elements = LaneCountFromFormat(vform);
2731
2732 T element1, element3;
2733 rot = (rot == 1) ? 270 : 90;
2734
2735 // Loop example:
2736 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2737 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2738
2739 for (int e = 0; e <= (elements / 2) - 1; e++) {
2740 switch (rot) {
2741 case 90:
2742 element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2743 element3 = src2.Float<T>(e * 2);
2744 break;
2745 case 270:
2746 element1 = src2.Float<T>(e * 2 + 1);
2747 element3 = FPNeg(src2.Float<T>(e * 2));
2748 break;
2749 default:
2750 VIXL_UNREACHABLE();
2751 return dst; // prevents "element(n) may be unintialized" errors
2752 }
2753 dst.ClearForWrite(vform);
2754 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2755 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2756 }
2757 return dst;
2758}
2759
2760
2761LogicVRegister Simulator::fcadd(VectorFormat vform,
2762 LogicVRegister dst, // d
2763 const LogicVRegister& src1, // n
2764 const LogicVRegister& src2, // m
2765 int rot) {
2766 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
Martyn Capewell0b1afa82020-03-04 11:31:42 +00002767 fcadd<SimFloat16>(vform, dst, src1, src2, rot);
Carey Williams2809e6c2018-03-13 12:24:16 +00002768 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2769 fcadd<float>(vform, dst, src1, src2, rot);
2770 } else {
2771 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2772 fcadd<double>(vform, dst, src1, src2, rot);
2773 }
2774 return dst;
2775}
2776
Carey Williams2809e6c2018-03-13 12:24:16 +00002777template <typename T>
2778LogicVRegister Simulator::fcmla(VectorFormat vform,
Martyn Capewell75f1c432020-03-30 09:23:27 +01002779 LogicVRegister dst,
2780 const LogicVRegister& src1,
2781 const LogicVRegister& src2,
2782 const LogicVRegister& acc,
Carey Williams2809e6c2018-03-13 12:24:16 +00002783 int index,
2784 int rot) {
2785 int elements = LaneCountFromFormat(vform);
2786
2787 T element1, element2, element3, element4;
2788 rot *= 90;
2789
2790 // Loop example:
2791 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2792 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2793
2794 for (int e = 0; e <= (elements / 2) - 1; e++) {
Martyn Capewell75f1c432020-03-30 09:23:27 +01002795 // Index == -1 indicates a vector/vector rather than vector/indexed-element
2796 // operation.
2797 int f = (index < 0) ? e : index;
2798
Carey Williams2809e6c2018-03-13 12:24:16 +00002799 switch (rot) {
2800 case 0:
Martyn Capewell75f1c432020-03-30 09:23:27 +01002801 element1 = src2.Float<T>(f * 2);
Carey Williams2809e6c2018-03-13 12:24:16 +00002802 element2 = src1.Float<T>(e * 2);
Martyn Capewell75f1c432020-03-30 09:23:27 +01002803 element3 = src2.Float<T>(f * 2 + 1);
Carey Williams2809e6c2018-03-13 12:24:16 +00002804 element4 = src1.Float<T>(e * 2);
2805 break;
2806 case 90:
Martyn Capewell75f1c432020-03-30 09:23:27 +01002807 element1 = FPNeg(src2.Float<T>(f * 2 + 1));
Carey Williams2809e6c2018-03-13 12:24:16 +00002808 element2 = src1.Float<T>(e * 2 + 1);
Martyn Capewell75f1c432020-03-30 09:23:27 +01002809 element3 = src2.Float<T>(f * 2);
Carey Williams2809e6c2018-03-13 12:24:16 +00002810 element4 = src1.Float<T>(e * 2 + 1);
2811 break;
2812 case 180:
Martyn Capewell75f1c432020-03-30 09:23:27 +01002813 element1 = FPNeg(src2.Float<T>(f * 2));
Carey Williams2809e6c2018-03-13 12:24:16 +00002814 element2 = src1.Float<T>(e * 2);
Martyn Capewell75f1c432020-03-30 09:23:27 +01002815 element3 = FPNeg(src2.Float<T>(f * 2 + 1));
Carey Williams2809e6c2018-03-13 12:24:16 +00002816 element4 = src1.Float<T>(e * 2);
2817 break;
2818 case 270:
Martyn Capewell75f1c432020-03-30 09:23:27 +01002819 element1 = src2.Float<T>(f * 2 + 1);
Carey Williams2809e6c2018-03-13 12:24:16 +00002820 element2 = src1.Float<T>(e * 2 + 1);
Martyn Capewell75f1c432020-03-30 09:23:27 +01002821 element3 = FPNeg(src2.Float<T>(f * 2));
Carey Williams2809e6c2018-03-13 12:24:16 +00002822 element4 = src1.Float<T>(e * 2 + 1);
2823 break;
2824 default:
2825 VIXL_UNREACHABLE();
2826 return dst; // prevents "element(n) may be unintialized" errors
2827 }
2828 dst.ClearForWrite(vform);
Martyn Capewelle4886e52020-03-30 09:28:52 +01002829 dst.SetFloat<T>(vform,
2830 e * 2,
2831 FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2832 dst.SetFloat<T>(vform,
2833 e * 2 + 1,
Martyn Capewell75f1c432020-03-30 09:23:27 +01002834 FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
Carey Williams2809e6c2018-03-13 12:24:16 +00002835 }
2836 return dst;
2837}
2838
Carey Williams2809e6c2018-03-13 12:24:16 +00002839LogicVRegister Simulator::fcmla(VectorFormat vform,
Martyn Capewell75f1c432020-03-30 09:23:27 +01002840 LogicVRegister dst,
2841 const LogicVRegister& src1,
2842 const LogicVRegister& src2,
2843 const LogicVRegister& acc,
Carey Williams2809e6c2018-03-13 12:24:16 +00002844 int rot) {
2845 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
Martyn Capewell75f1c432020-03-30 09:23:27 +01002846 fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
Carey Williams2809e6c2018-03-13 12:24:16 +00002847 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Martyn Capewell75f1c432020-03-30 09:23:27 +01002848 fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
Carey Williams2809e6c2018-03-13 12:24:16 +00002849 } else {
Martyn Capewell75f1c432020-03-30 09:23:27 +01002850 fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
Carey Williams2809e6c2018-03-13 12:24:16 +00002851 }
2852 return dst;
2853}
2854
2855
2856LogicVRegister Simulator::fcmla(VectorFormat vform,
2857 LogicVRegister dst, // d
2858 const LogicVRegister& src1, // n
2859 const LogicVRegister& src2, // m
2860 int index,
2861 int rot) {
2862 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2863 VIXL_UNIMPLEMENTED();
2864 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Martyn Capewell75f1c432020-03-30 09:23:27 +01002865 fcmla<float>(vform, dst, src1, src2, dst, index, rot);
Carey Williams2809e6c2018-03-13 12:24:16 +00002866 } else {
Martyn Capewell75f1c432020-03-30 09:23:27 +01002867 fcmla<double>(vform, dst, src1, src2, dst, index, rot);
Carey Williams2809e6c2018-03-13 12:24:16 +00002868 }
2869 return dst;
2870}
2871
Alexandre Ramesd3832962016-07-04 15:03:43 +01002872
2873LogicVRegister Simulator::dup_element(VectorFormat vform,
2874 LogicVRegister dst,
2875 const LogicVRegister& src,
2876 int src_index) {
TatWai Chong4f28df72019-08-14 17:50:30 -07002877 if (vform == kFormatVnQ) {
2878 // When duplicating a 128-bit value, split it into two 64-bit parts, and
2879 // then copy the two to their slots on destination register.
2880 uint64_t low = src.Uint(kFormatVnD, src_index * 2);
2881 uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1);
2882 dst.ClearForWrite(vform);
2883 for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD);
2884 d_lane += 2) {
2885 dst.SetUint(kFormatVnD, d_lane, low);
2886 dst.SetUint(kFormatVnD, d_lane + 1, high);
2887 }
2888 } else {
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002889 int lane_count = LaneCountFromFormat(vform);
TatWai Chong4f28df72019-08-14 17:50:30 -07002890 uint64_t value = src.Uint(vform, src_index);
2891 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00002892 for (int i = 0; i < lane_count; ++i) {
TatWai Chong4f28df72019-08-14 17:50:30 -07002893 dst.SetUint(vform, i, value);
2894 }
Alexandre Ramesd3832962016-07-04 15:03:43 +01002895 }
2896 return dst;
2897}
2898
Martyn Capewelle4886e52020-03-30 09:28:52 +01002899LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2900 LogicVRegister dst,
2901 const LogicVRegister& src,
2902 int src_index) {
TatWai Chongfa3f6bf2020-03-13 00:22:03 -07002903 // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2904 // whereas in NEON, the size of segment is equal to the size of register
2905 // itself.
2906 int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2907 VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2908 int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
Martyn Capewellc7501512020-03-16 10:35:33 +00002909
2910 VIXL_ASSERT(src_index >= 0);
2911 VIXL_ASSERT(src_index < lanes_per_segment);
2912
TatWai Chongfa3f6bf2020-03-13 00:22:03 -07002913 dst.ClearForWrite(vform);
Martyn Capewelle4886e52020-03-30 09:28:52 +01002914 for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2915 uint64_t value = src.Uint(vform, j + src_index);
2916 for (int i = 0; i < lanes_per_segment; i++) {
2917 dst.SetUint(vform, j + i, value);
2918 }
2919 }
2920 return dst;
2921}
Alexandre Ramesd3832962016-07-04 15:03:43 +01002922
2923LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2924 LogicVRegister dst,
2925 uint64_t imm) {
TatWai Chong4f28df72019-08-14 17:50:30 -07002926 int lane_count = LaneCountFromFormat(vform);
Alexandre Ramesd3832962016-07-04 15:03:43 +01002927 uint64_t value = imm & MaxUintFromFormat(vform);
2928 dst.ClearForWrite(vform);
TatWai Chong4f28df72019-08-14 17:50:30 -07002929 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01002930 dst.SetUint(vform, i, value);
2931 }
2932 return dst;
2933}
2934
2935
2936LogicVRegister Simulator::ins_element(VectorFormat vform,
2937 LogicVRegister dst,
2938 int dst_index,
2939 const LogicVRegister& src,
2940 int src_index) {
2941 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2942 return dst;
2943}
2944
2945
2946LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2947 LogicVRegister dst,
2948 int dst_index,
2949 uint64_t imm) {
2950 uint64_t value = imm & MaxUintFromFormat(vform);
2951 dst.SetUint(vform, dst_index, value);
2952 return dst;
2953}
2954
2955
Jacob Bramleycd8148c2019-07-11 18:43:20 +01002956LogicVRegister Simulator::index(VectorFormat vform,
2957 LogicVRegister dst,
2958 uint64_t start,
2959 uint64_t step) {
2960 VIXL_ASSERT(IsSVEFormat(vform));
2961 uint64_t value = start;
2962 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2963 dst.SetUint(vform, i, value);
2964 value += step;
2965 }
2966 return dst;
2967}
2968
2969
Jacob Bramley9d06c4d2019-05-13 18:15:06 +01002970LogicVRegister Simulator::insr(VectorFormat vform,
2971 LogicVRegister dst,
2972 uint64_t imm) {
2973 VIXL_ASSERT(IsSVEFormat(vform));
2974 for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
2975 dst.SetUint(vform, i, dst.Uint(vform, i - 1));
2976 }
2977 dst.SetUint(vform, 0, imm);
2978 return dst;
2979}
2980
2981
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +01002982LogicVRegister Simulator::mov(VectorFormat vform,
2983 LogicVRegister dst,
2984 const LogicVRegister& src) {
2985 dst.ClearForWrite(vform);
2986 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
2987 dst.SetUint(vform, lane, src.Uint(vform, lane));
2988 }
2989 return dst;
2990}
2991
2992
Jacob Bramley0ce75842019-07-17 18:12:50 +01002993LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
2994 // Avoid a copy if the registers already alias.
2995 if (dst.Aliases(src)) return dst;
2996
2997 for (int i = 0; i < dst.GetChunkCount(); i++) {
2998 dst.SetChunk(i, src.GetChunk(i));
2999 }
3000 return dst;
3001}
3002
3003
Jacob Bramley22023df2019-05-14 17:55:43 +01003004LogicVRegister Simulator::mov_merging(VectorFormat vform,
3005 LogicVRegister dst,
3006 const SimPRegister& pg,
3007 const LogicVRegister& src) {
3008 return sel(vform, dst, pg, src, dst);
3009}
3010
3011
Jacob Bramleyae2fc3b2019-05-21 19:24:36 +01003012LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3013 LogicVRegister dst,
3014 const SimPRegister& pg,
3015 const LogicVRegister& src) {
3016 SimVRegister zero;
3017 dup_immediate(vform, zero, 0);
3018 return sel(vform, dst, pg, src, zero);
3019}
3020
3021
TatWai Chonga3e8b172019-11-22 21:48:56 -08003022LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3023 const LogicPRegister& pg,
3024 const LogicPRegister& src) {
3025 return sel(dst, pg, src, dst);
3026}
3027
3028
3029LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3030 const LogicPRegister& pg,
3031 const LogicPRegister& src) {
3032 SimPRegister all_false;
3033 return sel(dst, pg, src, pfalse(all_false));
3034}
3035
3036
Alexandre Ramesd3832962016-07-04 15:03:43 +01003037LogicVRegister Simulator::movi(VectorFormat vform,
3038 LogicVRegister dst,
3039 uint64_t imm) {
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00003040 int lane_count = LaneCountFromFormat(vform);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003041 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00003042 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01003043 dst.SetUint(vform, i, imm);
3044 }
3045 return dst;
3046}
3047
3048
3049LogicVRegister Simulator::mvni(VectorFormat vform,
3050 LogicVRegister dst,
3051 uint64_t imm) {
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00003052 int lane_count = LaneCountFromFormat(vform);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003053 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00003054 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01003055 dst.SetUint(vform, i, ~imm);
3056 }
3057 return dst;
3058}
3059
3060
3061LogicVRegister Simulator::orr(VectorFormat vform,
3062 LogicVRegister dst,
3063 const LogicVRegister& src,
3064 uint64_t imm) {
3065 uint64_t result[16];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00003066 int lane_count = LaneCountFromFormat(vform);
3067 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01003068 result[i] = src.Uint(vform, i) | imm;
3069 }
3070 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00003071 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01003072 dst.SetUint(vform, i, result[i]);
3073 }
3074 return dst;
3075}
3076
3077
3078LogicVRegister Simulator::uxtl(VectorFormat vform,
3079 LogicVRegister dst,
3080 const LogicVRegister& src) {
3081 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3082
3083 dst.ClearForWrite(vform);
3084 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3085 dst.SetUint(vform, i, src.Uint(vform_half, i));
3086 }
3087 return dst;
3088}
3089
3090
3091LogicVRegister Simulator::sxtl(VectorFormat vform,
3092 LogicVRegister dst,
3093 const LogicVRegister& src) {
3094 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3095
3096 dst.ClearForWrite(vform);
3097 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3098 dst.SetInt(vform, i, src.Int(vform_half, i));
3099 }
3100 return dst;
3101}
3102
3103
3104LogicVRegister Simulator::uxtl2(VectorFormat vform,
3105 LogicVRegister dst,
3106 const LogicVRegister& src) {
3107 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3108 int lane_count = LaneCountFromFormat(vform);
3109
3110 dst.ClearForWrite(vform);
3111 for (int i = 0; i < lane_count; i++) {
3112 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
3113 }
3114 return dst;
3115}
3116
3117
3118LogicVRegister Simulator::sxtl2(VectorFormat vform,
3119 LogicVRegister dst,
3120 const LogicVRegister& src) {
3121 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3122 int lane_count = LaneCountFromFormat(vform);
3123
3124 dst.ClearForWrite(vform);
3125 for (int i = 0; i < lane_count; i++) {
3126 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
3127 }
3128 return dst;
3129}
3130
3131
Jacob Bramleybc21a0d2019-09-20 18:49:15 +01003132LogicVRegister Simulator::uxt(VectorFormat vform,
3133 LogicVRegister dst,
3134 const LogicVRegister& src,
3135 unsigned from_size_in_bits) {
3136 int lane_count = LaneCountFromFormat(vform);
3137 uint64_t mask = GetUintMask(from_size_in_bits);
3138
3139 dst.ClearForWrite(vform);
3140 for (int i = 0; i < lane_count; i++) {
3141 dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3142 }
3143 return dst;
3144}
3145
3146
3147LogicVRegister Simulator::sxt(VectorFormat vform,
3148 LogicVRegister dst,
3149 const LogicVRegister& src,
3150 unsigned from_size_in_bits) {
3151 int lane_count = LaneCountFromFormat(vform);
3152
3153 dst.ClearForWrite(vform);
3154 for (int i = 0; i < lane_count; i++) {
3155 uint64_t value =
3156 ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3157 dst.SetInt(vform, i, value);
3158 }
3159 return dst;
3160}
3161
3162
Alexandre Ramesd3832962016-07-04 15:03:43 +01003163LogicVRegister Simulator::shrn(VectorFormat vform,
3164 LogicVRegister dst,
3165 const LogicVRegister& src,
3166 int shift) {
3167 SimVRegister temp;
3168 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3169 VectorFormat vform_dst = vform;
3170 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3171 return extractnarrow(vform_dst, dst, false, shifted_src, false);
3172}
3173
3174
3175LogicVRegister Simulator::shrn2(VectorFormat vform,
3176 LogicVRegister dst,
3177 const LogicVRegister& src,
3178 int shift) {
3179 SimVRegister temp;
3180 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3181 VectorFormat vformdst = vform;
3182 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3183 return extractnarrow(vformdst, dst, false, shifted_src, false);
3184}
3185
3186
3187LogicVRegister Simulator::rshrn(VectorFormat vform,
3188 LogicVRegister dst,
3189 const LogicVRegister& src,
3190 int shift) {
3191 SimVRegister temp;
3192 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3193 VectorFormat vformdst = vform;
3194 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3195 return extractnarrow(vformdst, dst, false, shifted_src, false);
3196}
3197
3198
3199LogicVRegister Simulator::rshrn2(VectorFormat vform,
3200 LogicVRegister dst,
3201 const LogicVRegister& src,
3202 int shift) {
3203 SimVRegister temp;
3204 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3205 VectorFormat vformdst = vform;
3206 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3207 return extractnarrow(vformdst, dst, false, shifted_src, false);
3208}
3209
TatWai Chong4f28df72019-08-14 17:50:30 -07003210LogicVRegister Simulator::Table(VectorFormat vform,
3211 LogicVRegister dst,
3212 const LogicVRegister& tab,
3213 const LogicVRegister& ind) {
3214 VIXL_ASSERT(IsSVEFormat(vform));
3215 int lane_count = LaneCountFromFormat(vform);
3216 for (int i = 0; i < lane_count; i++) {
3217 uint64_t index = ind.Uint(vform, i);
3218 uint64_t value = (index >= static_cast<uint64_t>(lane_count))
3219 ? 0
3220 : tab.Uint(vform, static_cast<int>(index));
3221 dst.SetUint(vform, i, value);
3222 }
3223 return dst;
3224}
3225
Alexandre Ramesd3832962016-07-04 15:03:43 +01003226
Martyn Capewellb953ea82016-10-20 19:20:17 +01003227LogicVRegister Simulator::Table(VectorFormat vform,
3228 LogicVRegister dst,
3229 const LogicVRegister& ind,
3230 bool zero_out_of_bounds,
3231 const LogicVRegister* tab1,
3232 const LogicVRegister* tab2,
3233 const LogicVRegister* tab3,
3234 const LogicVRegister* tab4) {
3235 VIXL_ASSERT(tab1 != NULL);
3236 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
3237 uint64_t result[kMaxLanesPerVector];
3238 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3239 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
3240 }
3241 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3242 uint64_t j = ind.Uint(vform, i);
3243 int tab_idx = static_cast<int>(j >> 4);
3244 int j_idx = static_cast<int>(j & 15);
3245 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
3246 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
3247 }
3248 }
3249 dst.SetUintArray(vform, result);
3250 return dst;
3251}
3252
3253
Alexandre Ramesd3832962016-07-04 15:03:43 +01003254LogicVRegister Simulator::tbl(VectorFormat vform,
3255 LogicVRegister dst,
3256 const LogicVRegister& tab,
3257 const LogicVRegister& ind) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01003258 return Table(vform, dst, ind, true, &tab);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003259}
3260
3261
3262LogicVRegister Simulator::tbl(VectorFormat vform,
3263 LogicVRegister dst,
3264 const LogicVRegister& tab,
3265 const LogicVRegister& tab2,
3266 const LogicVRegister& ind) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01003267 return Table(vform, dst, ind, true, &tab, &tab2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003268}
3269
3270
3271LogicVRegister Simulator::tbl(VectorFormat vform,
3272 LogicVRegister dst,
3273 const LogicVRegister& tab,
3274 const LogicVRegister& tab2,
3275 const LogicVRegister& tab3,
3276 const LogicVRegister& ind) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01003277 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003278}
3279
3280
3281LogicVRegister Simulator::tbl(VectorFormat vform,
3282 LogicVRegister dst,
3283 const LogicVRegister& tab,
3284 const LogicVRegister& tab2,
3285 const LogicVRegister& tab3,
3286 const LogicVRegister& tab4,
3287 const LogicVRegister& ind) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01003288 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003289}
3290
3291
3292LogicVRegister Simulator::tbx(VectorFormat vform,
3293 LogicVRegister dst,
3294 const LogicVRegister& tab,
3295 const LogicVRegister& ind) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01003296 return Table(vform, dst, ind, false, &tab);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003297}
3298
3299
3300LogicVRegister Simulator::tbx(VectorFormat vform,
3301 LogicVRegister dst,
3302 const LogicVRegister& tab,
3303 const LogicVRegister& tab2,
3304 const LogicVRegister& ind) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01003305 return Table(vform, dst, ind, false, &tab, &tab2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003306}
3307
3308
3309LogicVRegister Simulator::tbx(VectorFormat vform,
3310 LogicVRegister dst,
3311 const LogicVRegister& tab,
3312 const LogicVRegister& tab2,
3313 const LogicVRegister& tab3,
3314 const LogicVRegister& ind) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01003315 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003316}
3317
3318
3319LogicVRegister Simulator::tbx(VectorFormat vform,
3320 LogicVRegister dst,
3321 const LogicVRegister& tab,
3322 const LogicVRegister& tab2,
3323 const LogicVRegister& tab3,
3324 const LogicVRegister& tab4,
3325 const LogicVRegister& ind) {
Martyn Capewellb953ea82016-10-20 19:20:17 +01003326 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003327}
3328
3329
3330LogicVRegister Simulator::uqshrn(VectorFormat vform,
3331 LogicVRegister dst,
3332 const LogicVRegister& src,
3333 int shift) {
3334 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3335}
3336
3337
3338LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3339 LogicVRegister dst,
3340 const LogicVRegister& src,
3341 int shift) {
3342 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3343}
3344
3345
3346LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3347 LogicVRegister dst,
3348 const LogicVRegister& src,
3349 int shift) {
3350 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3351}
3352
3353
3354LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3355 LogicVRegister dst,
3356 const LogicVRegister& src,
3357 int shift) {
3358 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3359}
3360
3361
3362LogicVRegister Simulator::sqshrn(VectorFormat vform,
3363 LogicVRegister dst,
3364 const LogicVRegister& src,
3365 int shift) {
3366 SimVRegister temp;
3367 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3368 VectorFormat vformdst = vform;
3369 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3370 return sqxtn(vformdst, dst, shifted_src);
3371}
3372
3373
3374LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3375 LogicVRegister dst,
3376 const LogicVRegister& src,
3377 int shift) {
3378 SimVRegister temp;
3379 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3380 VectorFormat vformdst = vform;
3381 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3382 return sqxtn(vformdst, dst, shifted_src);
3383}
3384
3385
3386LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3387 LogicVRegister dst,
3388 const LogicVRegister& src,
3389 int shift) {
3390 SimVRegister temp;
3391 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3392 VectorFormat vformdst = vform;
3393 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3394 return sqxtn(vformdst, dst, shifted_src);
3395}
3396
3397
3398LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3399 LogicVRegister dst,
3400 const LogicVRegister& src,
3401 int shift) {
3402 SimVRegister temp;
3403 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3404 VectorFormat vformdst = vform;
3405 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3406 return sqxtn(vformdst, dst, shifted_src);
3407}
3408
3409
3410LogicVRegister Simulator::sqshrun(VectorFormat vform,
3411 LogicVRegister dst,
3412 const LogicVRegister& src,
3413 int shift) {
3414 SimVRegister temp;
3415 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3416 VectorFormat vformdst = vform;
3417 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3418 return sqxtun(vformdst, dst, shifted_src);
3419}
3420
3421
3422LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3423 LogicVRegister dst,
3424 const LogicVRegister& src,
3425 int shift) {
3426 SimVRegister temp;
3427 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3428 VectorFormat vformdst = vform;
3429 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3430 return sqxtun(vformdst, dst, shifted_src);
3431}
3432
3433
3434LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3435 LogicVRegister dst,
3436 const LogicVRegister& src,
3437 int shift) {
3438 SimVRegister temp;
3439 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3440 VectorFormat vformdst = vform;
3441 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3442 return sqxtun(vformdst, dst, shifted_src);
3443}
3444
3445
3446LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3447 LogicVRegister dst,
3448 const LogicVRegister& src,
3449 int shift) {
3450 SimVRegister temp;
3451 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3452 VectorFormat vformdst = vform;
3453 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3454 return sqxtun(vformdst, dst, shifted_src);
3455}
3456
3457
3458LogicVRegister Simulator::uaddl(VectorFormat vform,
3459 LogicVRegister dst,
3460 const LogicVRegister& src1,
3461 const LogicVRegister& src2) {
3462 SimVRegister temp1, temp2;
3463 uxtl(vform, temp1, src1);
3464 uxtl(vform, temp2, src2);
3465 add(vform, dst, temp1, temp2);
3466 return dst;
3467}
3468
3469
3470LogicVRegister Simulator::uaddl2(VectorFormat vform,
3471 LogicVRegister dst,
3472 const LogicVRegister& src1,
3473 const LogicVRegister& src2) {
3474 SimVRegister temp1, temp2;
3475 uxtl2(vform, temp1, src1);
3476 uxtl2(vform, temp2, src2);
3477 add(vform, dst, temp1, temp2);
3478 return dst;
3479}
3480
3481
3482LogicVRegister Simulator::uaddw(VectorFormat vform,
3483 LogicVRegister dst,
3484 const LogicVRegister& src1,
3485 const LogicVRegister& src2) {
3486 SimVRegister temp;
3487 uxtl(vform, temp, src2);
3488 add(vform, dst, src1, temp);
3489 return dst;
3490}
3491
3492
3493LogicVRegister Simulator::uaddw2(VectorFormat vform,
3494 LogicVRegister dst,
3495 const LogicVRegister& src1,
3496 const LogicVRegister& src2) {
3497 SimVRegister temp;
3498 uxtl2(vform, temp, src2);
3499 add(vform, dst, src1, temp);
3500 return dst;
3501}
3502
3503
3504LogicVRegister Simulator::saddl(VectorFormat vform,
3505 LogicVRegister dst,
3506 const LogicVRegister& src1,
3507 const LogicVRegister& src2) {
3508 SimVRegister temp1, temp2;
3509 sxtl(vform, temp1, src1);
3510 sxtl(vform, temp2, src2);
3511 add(vform, dst, temp1, temp2);
3512 return dst;
3513}
3514
3515
3516LogicVRegister Simulator::saddl2(VectorFormat vform,
3517 LogicVRegister dst,
3518 const LogicVRegister& src1,
3519 const LogicVRegister& src2) {
3520 SimVRegister temp1, temp2;
3521 sxtl2(vform, temp1, src1);
3522 sxtl2(vform, temp2, src2);
3523 add(vform, dst, temp1, temp2);
3524 return dst;
3525}
3526
3527
3528LogicVRegister Simulator::saddw(VectorFormat vform,
3529 LogicVRegister dst,
3530 const LogicVRegister& src1,
3531 const LogicVRegister& src2) {
3532 SimVRegister temp;
3533 sxtl(vform, temp, src2);
3534 add(vform, dst, src1, temp);
3535 return dst;
3536}
3537
3538
3539LogicVRegister Simulator::saddw2(VectorFormat vform,
3540 LogicVRegister dst,
3541 const LogicVRegister& src1,
3542 const LogicVRegister& src2) {
3543 SimVRegister temp;
3544 sxtl2(vform, temp, src2);
3545 add(vform, dst, src1, temp);
3546 return dst;
3547}
3548
3549
3550LogicVRegister Simulator::usubl(VectorFormat vform,
3551 LogicVRegister dst,
3552 const LogicVRegister& src1,
3553 const LogicVRegister& src2) {
3554 SimVRegister temp1, temp2;
3555 uxtl(vform, temp1, src1);
3556 uxtl(vform, temp2, src2);
3557 sub(vform, dst, temp1, temp2);
3558 return dst;
3559}
3560
3561
3562LogicVRegister Simulator::usubl2(VectorFormat vform,
3563 LogicVRegister dst,
3564 const LogicVRegister& src1,
3565 const LogicVRegister& src2) {
3566 SimVRegister temp1, temp2;
3567 uxtl2(vform, temp1, src1);
3568 uxtl2(vform, temp2, src2);
3569 sub(vform, dst, temp1, temp2);
3570 return dst;
3571}
3572
3573
3574LogicVRegister Simulator::usubw(VectorFormat vform,
3575 LogicVRegister dst,
3576 const LogicVRegister& src1,
3577 const LogicVRegister& src2) {
3578 SimVRegister temp;
3579 uxtl(vform, temp, src2);
3580 sub(vform, dst, src1, temp);
3581 return dst;
3582}
3583
3584
3585LogicVRegister Simulator::usubw2(VectorFormat vform,
3586 LogicVRegister dst,
3587 const LogicVRegister& src1,
3588 const LogicVRegister& src2) {
3589 SimVRegister temp;
3590 uxtl2(vform, temp, src2);
3591 sub(vform, dst, src1, temp);
3592 return dst;
3593}
3594
3595
3596LogicVRegister Simulator::ssubl(VectorFormat vform,
3597 LogicVRegister dst,
3598 const LogicVRegister& src1,
3599 const LogicVRegister& src2) {
3600 SimVRegister temp1, temp2;
3601 sxtl(vform, temp1, src1);
3602 sxtl(vform, temp2, src2);
3603 sub(vform, dst, temp1, temp2);
3604 return dst;
3605}
3606
3607
3608LogicVRegister Simulator::ssubl2(VectorFormat vform,
3609 LogicVRegister dst,
3610 const LogicVRegister& src1,
3611 const LogicVRegister& src2) {
3612 SimVRegister temp1, temp2;
3613 sxtl2(vform, temp1, src1);
3614 sxtl2(vform, temp2, src2);
3615 sub(vform, dst, temp1, temp2);
3616 return dst;
3617}
3618
3619
3620LogicVRegister Simulator::ssubw(VectorFormat vform,
3621 LogicVRegister dst,
3622 const LogicVRegister& src1,
3623 const LogicVRegister& src2) {
3624 SimVRegister temp;
3625 sxtl(vform, temp, src2);
3626 sub(vform, dst, src1, temp);
3627 return dst;
3628}
3629
3630
3631LogicVRegister Simulator::ssubw2(VectorFormat vform,
3632 LogicVRegister dst,
3633 const LogicVRegister& src1,
3634 const LogicVRegister& src2) {
3635 SimVRegister temp;
3636 sxtl2(vform, temp, src2);
3637 sub(vform, dst, src1, temp);
3638 return dst;
3639}
3640
3641
3642LogicVRegister Simulator::uabal(VectorFormat vform,
3643 LogicVRegister dst,
3644 const LogicVRegister& src1,
3645 const LogicVRegister& src2) {
3646 SimVRegister temp1, temp2;
3647 uxtl(vform, temp1, src1);
3648 uxtl(vform, temp2, src2);
3649 uaba(vform, dst, temp1, temp2);
3650 return dst;
3651}
3652
3653
3654LogicVRegister Simulator::uabal2(VectorFormat vform,
3655 LogicVRegister dst,
3656 const LogicVRegister& src1,
3657 const LogicVRegister& src2) {
3658 SimVRegister temp1, temp2;
3659 uxtl2(vform, temp1, src1);
3660 uxtl2(vform, temp2, src2);
3661 uaba(vform, dst, temp1, temp2);
3662 return dst;
3663}
3664
3665
3666LogicVRegister Simulator::sabal(VectorFormat vform,
3667 LogicVRegister dst,
3668 const LogicVRegister& src1,
3669 const LogicVRegister& src2) {
3670 SimVRegister temp1, temp2;
3671 sxtl(vform, temp1, src1);
3672 sxtl(vform, temp2, src2);
3673 saba(vform, dst, temp1, temp2);
3674 return dst;
3675}
3676
3677
3678LogicVRegister Simulator::sabal2(VectorFormat vform,
3679 LogicVRegister dst,
3680 const LogicVRegister& src1,
3681 const LogicVRegister& src2) {
3682 SimVRegister temp1, temp2;
3683 sxtl2(vform, temp1, src1);
3684 sxtl2(vform, temp2, src2);
3685 saba(vform, dst, temp1, temp2);
3686 return dst;
3687}
3688
3689
3690LogicVRegister Simulator::uabdl(VectorFormat vform,
3691 LogicVRegister dst,
3692 const LogicVRegister& src1,
3693 const LogicVRegister& src2) {
3694 SimVRegister temp1, temp2;
3695 uxtl(vform, temp1, src1);
3696 uxtl(vform, temp2, src2);
3697 absdiff(vform, dst, temp1, temp2, false);
3698 return dst;
3699}
3700
3701
3702LogicVRegister Simulator::uabdl2(VectorFormat vform,
3703 LogicVRegister dst,
3704 const LogicVRegister& src1,
3705 const LogicVRegister& src2) {
3706 SimVRegister temp1, temp2;
3707 uxtl2(vform, temp1, src1);
3708 uxtl2(vform, temp2, src2);
3709 absdiff(vform, dst, temp1, temp2, false);
3710 return dst;
3711}
3712
3713
3714LogicVRegister Simulator::sabdl(VectorFormat vform,
3715 LogicVRegister dst,
3716 const LogicVRegister& src1,
3717 const LogicVRegister& src2) {
3718 SimVRegister temp1, temp2;
3719 sxtl(vform, temp1, src1);
3720 sxtl(vform, temp2, src2);
3721 absdiff(vform, dst, temp1, temp2, true);
3722 return dst;
3723}
3724
3725
3726LogicVRegister Simulator::sabdl2(VectorFormat vform,
3727 LogicVRegister dst,
3728 const LogicVRegister& src1,
3729 const LogicVRegister& src2) {
3730 SimVRegister temp1, temp2;
3731 sxtl2(vform, temp1, src1);
3732 sxtl2(vform, temp2, src2);
3733 absdiff(vform, dst, temp1, temp2, true);
3734 return dst;
3735}
3736
3737
3738LogicVRegister Simulator::umull(VectorFormat vform,
3739 LogicVRegister dst,
3740 const LogicVRegister& src1,
3741 const LogicVRegister& src2) {
3742 SimVRegister temp1, temp2;
3743 uxtl(vform, temp1, src1);
3744 uxtl(vform, temp2, src2);
3745 mul(vform, dst, temp1, temp2);
3746 return dst;
3747}
3748
3749
3750LogicVRegister Simulator::umull2(VectorFormat vform,
3751 LogicVRegister dst,
3752 const LogicVRegister& src1,
3753 const LogicVRegister& src2) {
3754 SimVRegister temp1, temp2;
3755 uxtl2(vform, temp1, src1);
3756 uxtl2(vform, temp2, src2);
3757 mul(vform, dst, temp1, temp2);
3758 return dst;
3759}
3760
3761
3762LogicVRegister Simulator::smull(VectorFormat vform,
3763 LogicVRegister dst,
3764 const LogicVRegister& src1,
3765 const LogicVRegister& src2) {
3766 SimVRegister temp1, temp2;
3767 sxtl(vform, temp1, src1);
3768 sxtl(vform, temp2, src2);
3769 mul(vform, dst, temp1, temp2);
3770 return dst;
3771}
3772
3773
3774LogicVRegister Simulator::smull2(VectorFormat vform,
3775 LogicVRegister dst,
3776 const LogicVRegister& src1,
3777 const LogicVRegister& src2) {
3778 SimVRegister temp1, temp2;
3779 sxtl2(vform, temp1, src1);
3780 sxtl2(vform, temp2, src2);
3781 mul(vform, dst, temp1, temp2);
3782 return dst;
3783}
3784
3785
3786LogicVRegister Simulator::umlsl(VectorFormat vform,
3787 LogicVRegister dst,
3788 const LogicVRegister& src1,
3789 const LogicVRegister& src2) {
3790 SimVRegister temp1, temp2;
3791 uxtl(vform, temp1, src1);
3792 uxtl(vform, temp2, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +01003793 mls(vform, dst, dst, temp1, temp2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003794 return dst;
3795}
3796
3797
3798LogicVRegister Simulator::umlsl2(VectorFormat vform,
3799 LogicVRegister dst,
3800 const LogicVRegister& src1,
3801 const LogicVRegister& src2) {
3802 SimVRegister temp1, temp2;
3803 uxtl2(vform, temp1, src1);
3804 uxtl2(vform, temp2, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +01003805 mls(vform, dst, dst, temp1, temp2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003806 return dst;
3807}
3808
3809
3810LogicVRegister Simulator::smlsl(VectorFormat vform,
3811 LogicVRegister dst,
3812 const LogicVRegister& src1,
3813 const LogicVRegister& src2) {
3814 SimVRegister temp1, temp2;
3815 sxtl(vform, temp1, src1);
3816 sxtl(vform, temp2, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +01003817 mls(vform, dst, dst, temp1, temp2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003818 return dst;
3819}
3820
3821
3822LogicVRegister Simulator::smlsl2(VectorFormat vform,
3823 LogicVRegister dst,
3824 const LogicVRegister& src1,
3825 const LogicVRegister& src2) {
3826 SimVRegister temp1, temp2;
3827 sxtl2(vform, temp1, src1);
3828 sxtl2(vform, temp2, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +01003829 mls(vform, dst, dst, temp1, temp2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003830 return dst;
3831}
3832
3833
3834LogicVRegister Simulator::umlal(VectorFormat vform,
3835 LogicVRegister dst,
3836 const LogicVRegister& src1,
3837 const LogicVRegister& src2) {
3838 SimVRegister temp1, temp2;
3839 uxtl(vform, temp1, src1);
3840 uxtl(vform, temp2, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +01003841 mla(vform, dst, dst, temp1, temp2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003842 return dst;
3843}
3844
3845
3846LogicVRegister Simulator::umlal2(VectorFormat vform,
3847 LogicVRegister dst,
3848 const LogicVRegister& src1,
3849 const LogicVRegister& src2) {
3850 SimVRegister temp1, temp2;
3851 uxtl2(vform, temp1, src1);
3852 uxtl2(vform, temp2, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +01003853 mla(vform, dst, dst, temp1, temp2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003854 return dst;
3855}
3856
3857
3858LogicVRegister Simulator::smlal(VectorFormat vform,
3859 LogicVRegister dst,
3860 const LogicVRegister& src1,
3861 const LogicVRegister& src2) {
3862 SimVRegister temp1, temp2;
3863 sxtl(vform, temp1, src1);
3864 sxtl(vform, temp2, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +01003865 mla(vform, dst, dst, temp1, temp2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003866 return dst;
3867}
3868
3869
3870LogicVRegister Simulator::smlal2(VectorFormat vform,
3871 LogicVRegister dst,
3872 const LogicVRegister& src1,
3873 const LogicVRegister& src2) {
3874 SimVRegister temp1, temp2;
3875 sxtl2(vform, temp1, src1);
3876 sxtl2(vform, temp2, src2);
Jacob Bramley22023df2019-05-14 17:55:43 +01003877 mla(vform, dst, dst, temp1, temp2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01003878 return dst;
3879}
3880
3881
3882LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3883 LogicVRegister dst,
3884 const LogicVRegister& src1,
3885 const LogicVRegister& src2) {
3886 SimVRegister temp;
3887 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3888 return add(vform, dst, dst, product).SignedSaturate(vform);
3889}
3890
3891
3892LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3893 LogicVRegister dst,
3894 const LogicVRegister& src1,
3895 const LogicVRegister& src2) {
3896 SimVRegister temp;
3897 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3898 return add(vform, dst, dst, product).SignedSaturate(vform);
3899}
3900
3901
3902LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3903 LogicVRegister dst,
3904 const LogicVRegister& src1,
3905 const LogicVRegister& src2) {
3906 SimVRegister temp;
3907 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3908 return sub(vform, dst, dst, product).SignedSaturate(vform);
3909}
3910
3911
3912LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3913 LogicVRegister dst,
3914 const LogicVRegister& src1,
3915 const LogicVRegister& src2) {
3916 SimVRegister temp;
3917 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3918 return sub(vform, dst, dst, product).SignedSaturate(vform);
3919}
3920
3921
3922LogicVRegister Simulator::sqdmull(VectorFormat vform,
3923 LogicVRegister dst,
3924 const LogicVRegister& src1,
3925 const LogicVRegister& src2) {
3926 SimVRegister temp;
3927 LogicVRegister product = smull(vform, temp, src1, src2);
3928 return add(vform, dst, product, product).SignedSaturate(vform);
3929}
3930
3931
3932LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3933 LogicVRegister dst,
3934 const LogicVRegister& src1,
3935 const LogicVRegister& src2) {
3936 SimVRegister temp;
3937 LogicVRegister product = smull2(vform, temp, src1, src2);
3938 return add(vform, dst, product, product).SignedSaturate(vform);
3939}
3940
3941
3942LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3943 LogicVRegister dst,
3944 const LogicVRegister& src1,
3945 const LogicVRegister& src2,
3946 bool round) {
3947 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3948 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3949 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3950
3951 int esize = LaneSizeInBitsFromFormat(vform);
3952 int round_const = round ? (1 << (esize - 2)) : 0;
3953 int64_t product;
3954
3955 dst.ClearForWrite(vform);
3956 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3957 product = src1.Int(vform, i) * src2.Int(vform, i);
3958 product += round_const;
3959 product = product >> (esize - 1);
3960
3961 if (product > MaxIntFromFormat(vform)) {
3962 product = MaxIntFromFormat(vform);
3963 } else if (product < MinIntFromFormat(vform)) {
3964 product = MinIntFromFormat(vform);
3965 }
3966 dst.SetInt(vform, i, product);
3967 }
3968 return dst;
3969}
3970
3971
Alexander Gilday560332d2018-04-05 13:25:17 +01003972LogicVRegister Simulator::dot(VectorFormat vform,
3973 LogicVRegister dst,
3974 const LogicVRegister& src1,
3975 const LogicVRegister& src2,
3976 bool is_signed) {
3977 VectorFormat quarter_vform =
3978 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3979
3980 dst.ClearForWrite(vform);
3981 for (int e = 0; e < LaneCountFromFormat(vform); e++) {
TatWai Chong4d2a4e92019-10-23 16:19:32 -07003982 uint64_t result = 0;
Alexander Gilday560332d2018-04-05 13:25:17 +01003983 int64_t element1, element2;
3984 for (int i = 0; i < 4; i++) {
3985 int index = 4 * e + i;
3986 if (is_signed) {
3987 element1 = src1.Int(quarter_vform, index);
3988 element2 = src2.Int(quarter_vform, index);
3989 } else {
3990 element1 = src1.Uint(quarter_vform, index);
3991 element2 = src2.Uint(quarter_vform, index);
3992 }
3993 result += element1 * element2;
3994 }
TatWai Chong4d2a4e92019-10-23 16:19:32 -07003995 dst.SetUint(vform, e, result + dst.Uint(vform, e));
Alexander Gilday560332d2018-04-05 13:25:17 +01003996 }
3997 return dst;
3998}
3999
4000
4001LogicVRegister Simulator::sdot(VectorFormat vform,
4002 LogicVRegister dst,
4003 const LogicVRegister& src1,
4004 const LogicVRegister& src2) {
4005 return dot(vform, dst, src1, src2, true);
4006}
4007
4008
4009LogicVRegister Simulator::udot(VectorFormat vform,
4010 LogicVRegister dst,
4011 const LogicVRegister& src1,
4012 const LogicVRegister& src2) {
4013 return dot(vform, dst, src1, src2, false);
4014}
4015
4016
Alexander Gilday43785642018-04-04 13:42:33 +01004017LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4018 LogicVRegister dst,
4019 const LogicVRegister& src1,
4020 const LogicVRegister& src2,
4021 bool round,
4022 bool sub_op) {
4023 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4024 // To avoid this, we use:
4025 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4026 // which is same as:
4027 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4028
4029 int esize = LaneSizeInBitsFromFormat(vform);
4030 int round_const = round ? (1 << (esize - 2)) : 0;
4031 int64_t accum;
4032
4033 dst.ClearForWrite(vform);
4034 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4035 accum = dst.Int(vform, i) << (esize - 1);
4036 if (sub_op) {
4037 accum -= src1.Int(vform, i) * src2.Int(vform, i);
4038 } else {
4039 accum += src1.Int(vform, i) * src2.Int(vform, i);
4040 }
4041 accum += round_const;
4042 accum = accum >> (esize - 1);
4043
4044 if (accum > MaxIntFromFormat(vform)) {
4045 accum = MaxIntFromFormat(vform);
4046 } else if (accum < MinIntFromFormat(vform)) {
4047 accum = MinIntFromFormat(vform);
4048 }
4049 dst.SetInt(vform, i, accum);
4050 }
4051 return dst;
4052}
4053
4054
4055LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4056 LogicVRegister dst,
4057 const LogicVRegister& src1,
4058 const LogicVRegister& src2,
4059 bool round) {
4060 return sqrdmlash(vform, dst, src1, src2, round, false);
4061}
4062
4063
4064LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4065 LogicVRegister dst,
4066 const LogicVRegister& src1,
4067 const LogicVRegister& src2,
4068 bool round) {
4069 return sqrdmlash(vform, dst, src1, src2, round, true);
4070}
4071
4072
Alexandre Ramesd3832962016-07-04 15:03:43 +01004073LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4074 LogicVRegister dst,
4075 const LogicVRegister& src1,
4076 const LogicVRegister& src2) {
4077 return sqrdmulh(vform, dst, src1, src2, false);
4078}
4079
4080
4081LogicVRegister Simulator::addhn(VectorFormat vform,
4082 LogicVRegister dst,
4083 const LogicVRegister& src1,
4084 const LogicVRegister& src2) {
4085 SimVRegister temp;
4086 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4087 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4088 return dst;
4089}
4090
4091
4092LogicVRegister Simulator::addhn2(VectorFormat vform,
4093 LogicVRegister dst,
4094 const LogicVRegister& src1,
4095 const LogicVRegister& src2) {
4096 SimVRegister temp;
4097 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4098 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4099 return dst;
4100}
4101
4102
4103LogicVRegister Simulator::raddhn(VectorFormat vform,
4104 LogicVRegister dst,
4105 const LogicVRegister& src1,
4106 const LogicVRegister& src2) {
4107 SimVRegister temp;
4108 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4109 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4110 return dst;
4111}
4112
4113
4114LogicVRegister Simulator::raddhn2(VectorFormat vform,
4115 LogicVRegister dst,
4116 const LogicVRegister& src1,
4117 const LogicVRegister& src2) {
4118 SimVRegister temp;
4119 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4120 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4121 return dst;
4122}
4123
4124
4125LogicVRegister Simulator::subhn(VectorFormat vform,
4126 LogicVRegister dst,
4127 const LogicVRegister& src1,
4128 const LogicVRegister& src2) {
4129 SimVRegister temp;
4130 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4131 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4132 return dst;
4133}
4134
4135
4136LogicVRegister Simulator::subhn2(VectorFormat vform,
4137 LogicVRegister dst,
4138 const LogicVRegister& src1,
4139 const LogicVRegister& src2) {
4140 SimVRegister temp;
4141 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4142 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4143 return dst;
4144}
4145
4146
4147LogicVRegister Simulator::rsubhn(VectorFormat vform,
4148 LogicVRegister dst,
4149 const LogicVRegister& src1,
4150 const LogicVRegister& src2) {
4151 SimVRegister temp;
4152 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4153 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4154 return dst;
4155}
4156
4157
4158LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4159 LogicVRegister dst,
4160 const LogicVRegister& src1,
4161 const LogicVRegister& src2) {
4162 SimVRegister temp;
4163 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4164 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4165 return dst;
4166}
4167
4168
4169LogicVRegister Simulator::trn1(VectorFormat vform,
4170 LogicVRegister dst,
4171 const LogicVRegister& src1,
4172 const LogicVRegister& src2) {
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00004173 uint64_t result[kZRegMaxSizeInBytes];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004174 int lane_count = LaneCountFromFormat(vform);
4175 int pairs = lane_count / 2;
Alexandre Ramesd3832962016-07-04 15:03:43 +01004176 for (int i = 0; i < pairs; ++i) {
4177 result[2 * i] = src1.Uint(vform, 2 * i);
4178 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4179 }
4180
4181 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004182 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004183 dst.SetUint(vform, i, result[i]);
4184 }
4185 return dst;
4186}
4187
4188
4189LogicVRegister Simulator::trn2(VectorFormat vform,
4190 LogicVRegister dst,
4191 const LogicVRegister& src1,
4192 const LogicVRegister& src2) {
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00004193 uint64_t result[kZRegMaxSizeInBytes];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004194 int lane_count = LaneCountFromFormat(vform);
4195 int pairs = lane_count / 2;
Alexandre Ramesd3832962016-07-04 15:03:43 +01004196 for (int i = 0; i < pairs; ++i) {
4197 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4198 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4199 }
4200
4201 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004202 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004203 dst.SetUint(vform, i, result[i]);
4204 }
4205 return dst;
4206}
4207
4208
4209LogicVRegister Simulator::zip1(VectorFormat vform,
4210 LogicVRegister dst,
4211 const LogicVRegister& src1,
4212 const LogicVRegister& src2) {
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00004213 uint64_t result[kZRegMaxSizeInBytes];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004214 int lane_count = LaneCountFromFormat(vform);
4215 int pairs = lane_count / 2;
Alexandre Ramesd3832962016-07-04 15:03:43 +01004216 for (int i = 0; i < pairs; ++i) {
4217 result[2 * i] = src1.Uint(vform, i);
4218 result[(2 * i) + 1] = src2.Uint(vform, i);
4219 }
4220
4221 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004222 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004223 dst.SetUint(vform, i, result[i]);
4224 }
4225 return dst;
4226}
4227
4228
4229LogicVRegister Simulator::zip2(VectorFormat vform,
4230 LogicVRegister dst,
4231 const LogicVRegister& src1,
4232 const LogicVRegister& src2) {
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00004233 uint64_t result[kZRegMaxSizeInBytes];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004234 int lane_count = LaneCountFromFormat(vform);
4235 int pairs = lane_count / 2;
Alexandre Ramesd3832962016-07-04 15:03:43 +01004236 for (int i = 0; i < pairs; ++i) {
4237 result[2 * i] = src1.Uint(vform, pairs + i);
4238 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4239 }
4240
4241 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004242 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004243 dst.SetUint(vform, i, result[i]);
4244 }
4245 return dst;
4246}
4247
4248
4249LogicVRegister Simulator::uzp1(VectorFormat vform,
4250 LogicVRegister dst,
4251 const LogicVRegister& src1,
4252 const LogicVRegister& src2) {
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00004253 uint64_t result[kZRegMaxSizeInBytes * 2];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004254 int lane_count = LaneCountFromFormat(vform);
4255 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004256 result[i] = src1.Uint(vform, i);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004257 result[lane_count + i] = src2.Uint(vform, i);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004258 }
4259
4260 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004261 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004262 dst.SetUint(vform, i, result[2 * i]);
4263 }
4264 return dst;
4265}
4266
4267
4268LogicVRegister Simulator::uzp2(VectorFormat vform,
4269 LogicVRegister dst,
4270 const LogicVRegister& src1,
4271 const LogicVRegister& src2) {
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00004272 uint64_t result[kZRegMaxSizeInBytes * 2];
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004273 int lane_count = LaneCountFromFormat(vform);
4274 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004275 result[i] = src1.Uint(vform, i);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004276 result[lane_count + i] = src2.Uint(vform, i);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004277 }
4278
4279 dst.ClearForWrite(vform);
Jacob Bramleyacd32aa2019-12-12 18:08:20 +00004280 for (int i = 0; i < lane_count; ++i) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004281 dst.SetUint(vform, i, result[(2 * i) + 1]);
4282 }
4283 return dst;
4284}
4285
4286
4287template <typename T>
Carey Williams2809e6c2018-03-13 12:24:16 +00004288T Simulator::FPNeg(T op) {
4289 return -op;
4290}
4291
Carey Williams2809e6c2018-03-13 12:24:16 +00004292template <typename T>
Alexandre Ramesd3832962016-07-04 15:03:43 +01004293T Simulator::FPAdd(T op1, T op2) {
4294 T result = FPProcessNaNs(op1, op2);
Jacob Bramleyca789742018-09-13 14:25:46 +01004295 if (IsNaN(result)) {
4296 return result;
4297 }
Alexandre Ramesd3832962016-07-04 15:03:43 +01004298
Jacob Bramleyca789742018-09-13 14:25:46 +01004299 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004300 // inf + -inf returns the default NaN.
4301 FPProcessException();
4302 return FPDefaultNaN<T>();
4303 } else {
4304 // Other cases should be handled by standard arithmetic.
4305 return op1 + op2;
4306 }
4307}
4308
4309
4310template <typename T>
4311T Simulator::FPSub(T op1, T op2) {
4312 // NaNs should be handled elsewhere.
Jacob Bramleyca789742018-09-13 14:25:46 +01004313 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
Alexandre Ramesd3832962016-07-04 15:03:43 +01004314
Jacob Bramleyca789742018-09-13 14:25:46 +01004315 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004316 // inf - inf returns the default NaN.
4317 FPProcessException();
4318 return FPDefaultNaN<T>();
4319 } else {
4320 // Other cases should be handled by standard arithmetic.
4321 return op1 - op2;
4322 }
4323}
4324
4325
4326template <typename T>
4327T Simulator::FPMul(T op1, T op2) {
4328 // NaNs should be handled elsewhere.
Jacob Bramleyca789742018-09-13 14:25:46 +01004329 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
Alexandre Ramesd3832962016-07-04 15:03:43 +01004330
Jacob Bramleyca789742018-09-13 14:25:46 +01004331 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004332 // inf * 0.0 returns the default NaN.
4333 FPProcessException();
4334 return FPDefaultNaN<T>();
4335 } else {
4336 // Other cases should be handled by standard arithmetic.
4337 return op1 * op2;
4338 }
4339}
4340
4341
4342template <typename T>
4343T Simulator::FPMulx(T op1, T op2) {
Jacob Bramleyca789742018-09-13 14:25:46 +01004344 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004345 // inf * 0.0 returns +/-2.0.
4346 T two = 2.0;
4347 return copysign(1.0, op1) * copysign(1.0, op2) * two;
4348 }
4349 return FPMul(op1, op2);
4350}
4351
4352
4353template <typename T>
4354T Simulator::FPMulAdd(T a, T op1, T op2) {
4355 T result = FPProcessNaNs3(a, op1, op2);
4356
4357 T sign_a = copysign(1.0, a);
4358 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
Jacob Bramleyca789742018-09-13 14:25:46 +01004359 bool isinf_prod = IsInf(op1) || IsInf(op2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004360 bool operation_generates_nan =
Jacob Bramleyca789742018-09-13 14:25:46 +01004361 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
4362 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
4363 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
Alexandre Ramesd3832962016-07-04 15:03:43 +01004364
Jacob Bramleyca789742018-09-13 14:25:46 +01004365 if (IsNaN(result)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004366 // Generated NaNs override quiet NaNs propagated from a.
4367 if (operation_generates_nan && IsQuietNaN(a)) {
4368 FPProcessException();
4369 return FPDefaultNaN<T>();
4370 } else {
4371 return result;
4372 }
4373 }
4374
4375 // If the operation would produce a NaN, return the default NaN.
4376 if (operation_generates_nan) {
4377 FPProcessException();
4378 return FPDefaultNaN<T>();
4379 }
4380
4381 // Work around broken fma implementations for exact zero results: The sign of
4382 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4383 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
Jacob Bramleyca789742018-09-13 14:25:46 +01004384 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
Alexandre Ramesd3832962016-07-04 15:03:43 +01004385 }
4386
4387 result = FusedMultiplyAdd(op1, op2, a);
Jacob Bramleyca789742018-09-13 14:25:46 +01004388 VIXL_ASSERT(!IsNaN(result));
Alexandre Ramesd3832962016-07-04 15:03:43 +01004389
4390 // Work around broken fma implementations for rounded zero results: If a is
4391 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4392 if ((a == 0.0) && (result == 0.0)) {
4393 return copysign(0.0, sign_prod);
4394 }
4395
4396 return result;
4397}
4398
4399
4400template <typename T>
4401T Simulator::FPDiv(T op1, T op2) {
4402 // NaNs should be handled elsewhere.
Jacob Bramleyca789742018-09-13 14:25:46 +01004403 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
Alexandre Ramesd3832962016-07-04 15:03:43 +01004404
Jacob Bramleyca789742018-09-13 14:25:46 +01004405 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004406 // inf / inf and 0.0 / 0.0 return the default NaN.
4407 FPProcessException();
4408 return FPDefaultNaN<T>();
4409 } else {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00004410 if (op2 == 0.0) {
4411 FPProcessException();
Jacob Bramleyca789742018-09-13 14:25:46 +01004412 if (!IsNaN(op1)) {
Martyn Capewell9e52d5b2016-11-01 17:33:36 +00004413 double op1_sign = copysign(1.0, op1);
4414 double op2_sign = copysign(1.0, op2);
4415 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4416 }
4417 }
Alexandre Ramesd3832962016-07-04 15:03:43 +01004418
4419 // Other cases should be handled by standard arithmetic.
4420 return op1 / op2;
4421 }
4422}
4423
4424
4425template <typename T>
4426T Simulator::FPSqrt(T op) {
Jacob Bramleyca789742018-09-13 14:25:46 +01004427 if (IsNaN(op)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004428 return FPProcessNaN(op);
Jacob Bramleyca789742018-09-13 14:25:46 +01004429 } else if (op < T(0.0)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004430 FPProcessException();
4431 return FPDefaultNaN<T>();
4432 } else {
4433 return sqrt(op);
4434 }
4435}
4436
4437
4438template <typename T>
4439T Simulator::FPMax(T a, T b) {
4440 T result = FPProcessNaNs(a, b);
Jacob Bramleyca789742018-09-13 14:25:46 +01004441 if (IsNaN(result)) return result;
Alexandre Ramesd3832962016-07-04 15:03:43 +01004442
4443 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4444 // a and b are zero, and the sign differs: return +0.0.
4445 return 0.0;
4446 } else {
4447 return (a > b) ? a : b;
4448 }
4449}
4450
4451
4452template <typename T>
4453T Simulator::FPMaxNM(T a, T b) {
4454 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4455 a = kFP64NegativeInfinity;
4456 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4457 b = kFP64NegativeInfinity;
4458 }
4459
4460 T result = FPProcessNaNs(a, b);
Jacob Bramleyca789742018-09-13 14:25:46 +01004461 return IsNaN(result) ? result : FPMax(a, b);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004462}
4463
4464
4465template <typename T>
4466T Simulator::FPMin(T a, T b) {
4467 T result = FPProcessNaNs(a, b);
Jacob Bramleyca789742018-09-13 14:25:46 +01004468 if (IsNaN(result)) return result;
Alexandre Ramesd3832962016-07-04 15:03:43 +01004469
4470 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4471 // a and b are zero, and the sign differs: return -0.0.
4472 return -0.0;
4473 } else {
4474 return (a < b) ? a : b;
4475 }
4476}
4477
4478
4479template <typename T>
4480T Simulator::FPMinNM(T a, T b) {
4481 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4482 a = kFP64PositiveInfinity;
4483 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4484 b = kFP64PositiveInfinity;
4485 }
4486
4487 T result = FPProcessNaNs(a, b);
Jacob Bramleyca789742018-09-13 14:25:46 +01004488 return IsNaN(result) ? result : FPMin(a, b);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004489}
4490
4491
4492template <typename T>
4493T Simulator::FPRecipStepFused(T op1, T op2) {
4494 const T two = 2.0;
Jacob Bramleyca789742018-09-13 14:25:46 +01004495 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004496 return two;
Jacob Bramleyca789742018-09-13 14:25:46 +01004497 } else if (IsInf(op1) || IsInf(op2)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004498 // Return +inf if signs match, otherwise -inf.
4499 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4500 : kFP64NegativeInfinity;
4501 } else {
4502 return FusedMultiplyAdd(op1, op2, two);
4503 }
4504}
4505
Jacob Bramleyca789742018-09-13 14:25:46 +01004506template <typename T>
4507bool IsNormal(T value) {
4508 return std::isnormal(value);
4509}
4510
4511template <>
4512bool IsNormal(SimFloat16 value) {
4513 uint16_t rawbits = Float16ToRawbits(value);
4514 uint16_t exp_mask = 0x7c00;
4515 // Check that the exponent is neither all zeroes or all ones.
4516 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4517}
4518
Alexandre Ramesd3832962016-07-04 15:03:43 +01004519
4520template <typename T>
4521T Simulator::FPRSqrtStepFused(T op1, T op2) {
4522 const T one_point_five = 1.5;
4523 const T two = 2.0;
4524
Jacob Bramleyca789742018-09-13 14:25:46 +01004525 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004526 return one_point_five;
Jacob Bramleyca789742018-09-13 14:25:46 +01004527 } else if (IsInf(op1) || IsInf(op2)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004528 // Return +inf if signs match, otherwise -inf.
4529 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4530 : kFP64NegativeInfinity;
4531 } else {
4532 // The multiply-add-halve operation must be fully fused, so avoid interim
4533 // rounding by checking which operand can be losslessly divided by two
4534 // before doing the multiply-add.
Jacob Bramleyca789742018-09-13 14:25:46 +01004535 if (IsNormal(op1 / two)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004536 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
Jacob Bramleyca789742018-09-13 14:25:46 +01004537 } else if (IsNormal(op2 / two)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004538 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4539 } else {
4540 // Neither operand is normal after halving: the result is dominated by
4541 // the addition term, so just return that.
4542 return one_point_five;
4543 }
4544 }
4545}
4546
Jacob Bramleyca789742018-09-13 14:25:46 +01004547int32_t Simulator::FPToFixedJS(double value) {
4548 // The Z-flag is set when the conversion from double precision floating-point
4549 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4550 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4551 // Z-flag is unset.
4552 int Z = 1;
4553 int32_t result;
4554
4555 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4556 (value == kFP64NegativeInfinity)) {
4557 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4558 // unset the Z-flag.
4559 result = 0.0;
4560 if ((value != 0.0) || std::signbit(value)) {
4561 Z = 0;
4562 }
4563 } else if (std::isnan(value)) {
4564 // NaN values unset the Z-flag and set the result to 0.
4565 FPProcessNaN(value);
4566 result = 0;
4567 Z = 0;
4568 } else {
4569 // All other values are converted to an integer representation, rounded
4570 // toward zero.
4571 double int_result = std::floor(value);
4572 double error = value - int_result;
4573
4574 if ((error != 0.0) && (int_result < 0.0)) {
4575 int_result++;
4576 }
4577
4578 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4579 // write a one-liner with std::round, but the behaviour on ties is incorrect
4580 // for our purposes.
4581 double mod_const = static_cast<double>(UINT64_C(1) << 32);
4582 double mod_error =
4583 (int_result / mod_const) - std::floor(int_result / mod_const);
4584 double constrained;
4585 if (mod_error == 0.5) {
4586 constrained = INT32_MIN;
4587 } else {
4588 constrained = int_result - mod_const * round(int_result / mod_const);
4589 }
4590
4591 VIXL_ASSERT(std::floor(constrained) == constrained);
4592 VIXL_ASSERT(constrained >= INT32_MIN);
4593 VIXL_ASSERT(constrained <= INT32_MAX);
4594
4595 // Take the bottom 32 bits of the result as a 32-bit integer.
4596 result = static_cast<int32_t>(constrained);
4597
4598 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4599 (error != 0.0)) {
4600 // If the integer result is out of range or the conversion isn't exact,
4601 // take exception and unset the Z-flag.
4602 FPProcessException();
4603 Z = 0;
4604 }
4605 }
4606
4607 ReadNzcv().SetN(0);
4608 ReadNzcv().SetZ(Z);
4609 ReadNzcv().SetC(0);
4610 ReadNzcv().SetV(0);
4611
4612 return result;
4613}
4614
TatWai Chong04471812019-03-19 14:29:00 -07004615double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4616 VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4617 (value != kFP64NegativeInfinity));
4618 VIXL_ASSERT(!IsNaN(value));
Alexandre Ramesd3832962016-07-04 15:03:43 +01004619
4620 double int_result = std::floor(value);
4621 double error = value - int_result;
4622 switch (round_mode) {
4623 case FPTieAway: {
4624 // Take care of correctly handling the range ]-0.5, -0.0], which must
4625 // yield -0.0.
4626 if ((-0.5 < value) && (value < 0.0)) {
4627 int_result = -0.0;
4628
4629 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4630 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4631 // result is positive, round up.
4632 int_result++;
4633 }
4634 break;
4635 }
4636 case FPTieEven: {
4637 // Take care of correctly handling the range [-0.5, -0.0], which must
4638 // yield -0.0.
4639 if ((-0.5 <= value) && (value < 0.0)) {
4640 int_result = -0.0;
4641
4642 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4643 // result is odd, round up.
4644 } else if ((error > 0.5) ||
4645 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4646 int_result++;
4647 }
4648 break;
4649 }
4650 case FPZero: {
4651 // If value>0 then we take floor(value)
4652 // otherwise, ceil(value).
4653 if (value < 0) {
4654 int_result = ceil(value);
4655 }
4656 break;
4657 }
4658 case FPNegativeInfinity: {
4659 // We always use floor(value).
4660 break;
4661 }
4662 case FPPositiveInfinity: {
4663 // Take care of correctly handling the range ]-1.0, -0.0], which must
4664 // yield -0.0.
4665 if ((-1.0 < value) && (value < 0.0)) {
4666 int_result = -0.0;
4667
4668 // If the error is non-zero, round up.
4669 } else if (error > 0.0) {
4670 int_result++;
4671 }
4672 break;
4673 }
4674 default:
4675 VIXL_UNIMPLEMENTED();
4676 }
4677 return int_result;
4678}
4679
TatWai Chong04471812019-03-19 14:29:00 -07004680double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4681 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4682 (value == kFP64NegativeInfinity)) {
4683 return value;
4684 } else if (IsNaN(value)) {
4685 return FPProcessNaN(value);
4686 }
4687 return FPRoundIntCommon(value, round_mode);
4688}
4689
4690double Simulator::FPRoundInt(double value,
4691 FPRounding round_mode,
4692 FrintMode frint_mode) {
4693 if (frint_mode == kFrintToInteger) {
4694 return FPRoundInt(value, round_mode);
4695 }
4696
4697 VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4698
4699 if (value == 0.0) {
4700 return value;
4701 }
4702
4703 if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4704 IsNaN(value)) {
4705 if (frint_mode == kFrintToInt32) {
4706 return INT32_MIN;
4707 } else {
4708 return INT64_MIN;
4709 }
4710 }
4711
4712 double result = FPRoundIntCommon(value, round_mode);
4713
Jacob Bramleyf73036b2020-11-04 09:06:03 +00004714 // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4715 // representable as a double, and is rounded to (INT64_MAX + 1) when
4716 // converted. To avoid this, we compare `result >= int64_max_plus_one`
4717 // instead; this is safe because `result` is known to be integral, and
4718 // `int64_max_plus_one` is exactly representable as a double.
4719 constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4720 VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4721 int64_max_plus_one)) == int64_max_plus_one);
4722
TatWai Chong04471812019-03-19 14:29:00 -07004723 if (frint_mode == kFrintToInt32) {
4724 if ((result > INT32_MAX) || (result < INT32_MIN)) {
4725 return INT32_MIN;
4726 }
Jacob Bramleyf73036b2020-11-04 09:06:03 +00004727 } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
TatWai Chong04471812019-03-19 14:29:00 -07004728 return INT64_MIN;
4729 }
4730
4731 return result;
4732}
Alexandre Ramesd3832962016-07-04 15:03:43 +01004733
Jacob Bramleyca789742018-09-13 14:25:46 +01004734int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4735 value = FPRoundInt(value, rmode);
4736 if (value >= kHMaxInt) {
4737 return kHMaxInt;
4738 } else if (value < kHMinInt) {
4739 return kHMinInt;
4740 }
4741 return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4742}
4743
4744
Alexandre Ramesd3832962016-07-04 15:03:43 +01004745int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4746 value = FPRoundInt(value, rmode);
4747 if (value >= kWMaxInt) {
4748 return kWMaxInt;
4749 } else if (value < kWMinInt) {
4750 return kWMinInt;
4751 }
Jacob Bramleyca789742018-09-13 14:25:46 +01004752 return IsNaN(value) ? 0 : static_cast<int32_t>(value);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004753}
4754
4755
4756int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4757 value = FPRoundInt(value, rmode);
Peter Collingbourne852d12c2021-01-25 13:10:37 -08004758 // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4759 // as a result of kMaxInt not being representable as a double.
4760 if (value >= 9223372036854775808.) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004761 return kXMaxInt;
4762 } else if (value < kXMinInt) {
4763 return kXMinInt;
4764 }
Jacob Bramleyca789742018-09-13 14:25:46 +01004765 return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4766}
4767
4768
4769uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4770 value = FPRoundInt(value, rmode);
4771 if (value >= kHMaxUInt) {
4772 return kHMaxUInt;
4773 } else if (value < 0.0) {
4774 return 0;
4775 }
4776 return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004777}
4778
4779
4780uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4781 value = FPRoundInt(value, rmode);
4782 if (value >= kWMaxUInt) {
4783 return kWMaxUInt;
4784 } else if (value < 0.0) {
4785 return 0;
4786 }
Jacob Bramleyca789742018-09-13 14:25:46 +01004787 return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004788}
4789
4790
4791uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4792 value = FPRoundInt(value, rmode);
Peter Collingbourne852d12c2021-01-25 13:10:37 -08004793 // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
4794 // as a result of kMaxUInt not being representable as a double.
4795 if (value >= 18446744073709551616.) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004796 return kXMaxUInt;
4797 } else if (value < 0.0) {
4798 return 0;
4799 }
Jacob Bramleyca789742018-09-13 14:25:46 +01004800 return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
Alexandre Ramesd3832962016-07-04 15:03:43 +01004801}
4802
4803
4804#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4805 template <typename T> \
4806 LogicVRegister Simulator::FN(VectorFormat vform, \
4807 LogicVRegister dst, \
4808 const LogicVRegister& src1, \
4809 const LogicVRegister& src2) { \
4810 dst.ClearForWrite(vform); \
4811 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
4812 T op1 = src1.Float<T>(i); \
4813 T op2 = src2.Float<T>(i); \
4814 T result; \
4815 if (PROCNAN) { \
4816 result = FPProcessNaNs(op1, op2); \
Jacob Bramleyca789742018-09-13 14:25:46 +01004817 if (!IsNaN(result)) { \
Alexandre Ramesd3832962016-07-04 15:03:43 +01004818 result = OP(op1, op2); \
4819 } \
4820 } else { \
4821 result = OP(op1, op2); \
4822 } \
Martyn Capewell0b1afa82020-03-04 11:31:42 +00004823 dst.SetFloat(vform, i, result); \
Alexandre Ramesd3832962016-07-04 15:03:43 +01004824 } \
4825 return dst; \
4826 } \
4827 \
4828 LogicVRegister Simulator::FN(VectorFormat vform, \
4829 LogicVRegister dst, \
4830 const LogicVRegister& src1, \
4831 const LogicVRegister& src2) { \
Jacob Bramleyca789742018-09-13 14:25:46 +01004832 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
4833 FN<SimFloat16>(vform, dst, src1, src2); \
4834 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
Alexandre Ramesd3832962016-07-04 15:03:43 +01004835 FN<float>(vform, dst, src1, src2); \
4836 } else { \
4837 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
4838 FN<double>(vform, dst, src1, src2); \
4839 } \
4840 return dst; \
4841 }
4842NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
4843#undef DEFINE_NEON_FP_VECTOR_OP
4844
4845
4846LogicVRegister Simulator::fnmul(VectorFormat vform,
4847 LogicVRegister dst,
4848 const LogicVRegister& src1,
4849 const LogicVRegister& src2) {
4850 SimVRegister temp;
4851 LogicVRegister product = fmul(vform, temp, src1, src2);
4852 return fneg(vform, dst, product);
4853}
4854
4855
4856template <typename T>
4857LogicVRegister Simulator::frecps(VectorFormat vform,
4858 LogicVRegister dst,
4859 const LogicVRegister& src1,
4860 const LogicVRegister& src2) {
4861 dst.ClearForWrite(vform);
4862 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4863 T op1 = -src1.Float<T>(i);
4864 T op2 = src2.Float<T>(i);
4865 T result = FPProcessNaNs(op1, op2);
Martyn Capewellefd9dc72020-02-13 10:46:29 +00004866 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
Alexandre Ramesd3832962016-07-04 15:03:43 +01004867 }
4868 return dst;
4869}
4870
4871
4872LogicVRegister Simulator::frecps(VectorFormat vform,
4873 LogicVRegister dst,
4874 const LogicVRegister& src1,
4875 const LogicVRegister& src2) {
Jacob Bramleyca789742018-09-13 14:25:46 +01004876 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4877 frecps<SimFloat16>(vform, dst, src1, src2);
4878 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004879 frecps<float>(vform, dst, src1, src2);
4880 } else {
4881 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4882 frecps<double>(vform, dst, src1, src2);
4883 }
4884 return dst;
4885}
4886
4887
4888template <typename T>
4889LogicVRegister Simulator::frsqrts(VectorFormat vform,
4890 LogicVRegister dst,
4891 const LogicVRegister& src1,
4892 const LogicVRegister& src2) {
4893 dst.ClearForWrite(vform);
4894 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4895 T op1 = -src1.Float<T>(i);
4896 T op2 = src2.Float<T>(i);
4897 T result = FPProcessNaNs(op1, op2);
Martyn Capewellefd9dc72020-02-13 10:46:29 +00004898 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
Alexandre Ramesd3832962016-07-04 15:03:43 +01004899 }
4900 return dst;
4901}
4902
4903
4904LogicVRegister Simulator::frsqrts(VectorFormat vform,
4905 LogicVRegister dst,
4906 const LogicVRegister& src1,
4907 const LogicVRegister& src2) {
Jacob Bramleyca789742018-09-13 14:25:46 +01004908 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4909 frsqrts<SimFloat16>(vform, dst, src1, src2);
4910 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004911 frsqrts<float>(vform, dst, src1, src2);
4912 } else {
4913 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4914 frsqrts<double>(vform, dst, src1, src2);
4915 }
4916 return dst;
4917}
4918
4919
4920template <typename T>
4921LogicVRegister Simulator::fcmp(VectorFormat vform,
4922 LogicVRegister dst,
4923 const LogicVRegister& src1,
4924 const LogicVRegister& src2,
4925 Condition cond) {
4926 dst.ClearForWrite(vform);
4927 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4928 bool result = false;
4929 T op1 = src1.Float<T>(i);
4930 T op2 = src2.Float<T>(i);
Jacob Bramley4606adc2020-07-02 14:23:08 +01004931 bool unordered = IsNaN(FPProcessNaNs(op1, op2));
TatWai Chong47c26842020-02-10 01:51:32 -08004932
Jacob Bramley4606adc2020-07-02 14:23:08 +01004933 switch (cond) {
4934 case eq:
4935 result = (op1 == op2);
4936 break;
4937 case ge:
4938 result = (op1 >= op2);
4939 break;
4940 case gt:
4941 result = (op1 > op2);
4942 break;
4943 case le:
4944 result = (op1 <= op2);
4945 break;
4946 case lt:
4947 result = (op1 < op2);
4948 break;
4949 case ne:
4950 result = (op1 != op2);
4951 break;
4952 case uo:
4953 result = unordered;
4954 break;
4955 default:
4956 // Other conditions are defined in terms of those above.
4957 VIXL_UNREACHABLE();
4958 break;
TatWai Chong47c26842020-02-10 01:51:32 -08004959 }
4960
Jacob Bramley4606adc2020-07-02 14:23:08 +01004961 if (result && unordered) {
4962 // Only `uo` and `ne` can be true for unordered comparisons.
4963 VIXL_ASSERT((cond == uo) || (cond == ne));
Alexandre Ramesd3832962016-07-04 15:03:43 +01004964 }
Jacob Bramley4606adc2020-07-02 14:23:08 +01004965
Alexandre Ramesd3832962016-07-04 15:03:43 +01004966 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4967 }
4968 return dst;
4969}
4970
4971
4972LogicVRegister Simulator::fcmp(VectorFormat vform,
4973 LogicVRegister dst,
4974 const LogicVRegister& src1,
4975 const LogicVRegister& src2,
4976 Condition cond) {
Jacob Bramleyca789742018-09-13 14:25:46 +01004977 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4978 fcmp<SimFloat16>(vform, dst, src1, src2, cond);
4979 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004980 fcmp<float>(vform, dst, src1, src2, cond);
4981 } else {
4982 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4983 fcmp<double>(vform, dst, src1, src2, cond);
4984 }
4985 return dst;
4986}
4987
4988
4989LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4990 LogicVRegister dst,
4991 const LogicVRegister& src,
4992 Condition cond) {
4993 SimVRegister temp;
Jacob Bramleyca789742018-09-13 14:25:46 +01004994 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4995 LogicVRegister zero_reg =
4996 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
4997 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
4998 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01004999 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5000 fcmp<float>(vform, dst, src, zero_reg, cond);
5001 } else {
5002 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5003 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5004 fcmp<double>(vform, dst, src, zero_reg, cond);
5005 }
5006 return dst;
5007}
5008
5009
5010LogicVRegister Simulator::fabscmp(VectorFormat vform,
5011 LogicVRegister dst,
5012 const LogicVRegister& src1,
5013 const LogicVRegister& src2,
5014 Condition cond) {
5015 SimVRegister temp1, temp2;
Jacob Bramleyca789742018-09-13 14:25:46 +01005016 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5017 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5018 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5019 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5020 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005021 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5022 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5023 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5024 } else {
5025 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5026 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5027 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5028 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5029 }
5030 return dst;
5031}
5032
5033
5034template <typename T>
5035LogicVRegister Simulator::fmla(VectorFormat vform,
5036 LogicVRegister dst,
TatWai Chongf8d29f12020-02-16 22:53:18 -08005037 const LogicVRegister& srca,
Alexandre Ramesd3832962016-07-04 15:03:43 +01005038 const LogicVRegister& src1,
5039 const LogicVRegister& src2) {
5040 dst.ClearForWrite(vform);
5041 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5042 T op1 = src1.Float<T>(i);
5043 T op2 = src2.Float<T>(i);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005044 T acc = srca.Float<T>(i);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005045 T result = FPMulAdd(acc, op1, op2);
Martyn Capewellc7501512020-03-16 10:35:33 +00005046 dst.SetFloat(vform, i, result);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005047 }
5048 return dst;
5049}
5050
5051
5052LogicVRegister Simulator::fmla(VectorFormat vform,
5053 LogicVRegister dst,
TatWai Chongf8d29f12020-02-16 22:53:18 -08005054 const LogicVRegister& srca,
Alexandre Ramesd3832962016-07-04 15:03:43 +01005055 const LogicVRegister& src1,
5056 const LogicVRegister& src2) {
Jacob Bramleyca789742018-09-13 14:25:46 +01005057 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
TatWai Chongf8d29f12020-02-16 22:53:18 -08005058 fmla<SimFloat16>(vform, dst, srca, src1, src2);
Jacob Bramleyca789742018-09-13 14:25:46 +01005059 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
TatWai Chongf8d29f12020-02-16 22:53:18 -08005060 fmla<float>(vform, dst, srca, src1, src2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005061 } else {
5062 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005063 fmla<double>(vform, dst, srca, src1, src2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005064 }
5065 return dst;
5066}
5067
5068
5069template <typename T>
5070LogicVRegister Simulator::fmls(VectorFormat vform,
5071 LogicVRegister dst,
TatWai Chongf8d29f12020-02-16 22:53:18 -08005072 const LogicVRegister& srca,
Alexandre Ramesd3832962016-07-04 15:03:43 +01005073 const LogicVRegister& src1,
5074 const LogicVRegister& src2) {
5075 dst.ClearForWrite(vform);
5076 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5077 T op1 = -src1.Float<T>(i);
5078 T op2 = src2.Float<T>(i);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005079 T acc = srca.Float<T>(i);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005080 T result = FPMulAdd(acc, op1, op2);
5081 dst.SetFloat(i, result);
5082 }
5083 return dst;
5084}
5085
5086
5087LogicVRegister Simulator::fmls(VectorFormat vform,
5088 LogicVRegister dst,
TatWai Chongf8d29f12020-02-16 22:53:18 -08005089 const LogicVRegister& srca,
Alexandre Ramesd3832962016-07-04 15:03:43 +01005090 const LogicVRegister& src1,
5091 const LogicVRegister& src2) {
Jacob Bramleyca789742018-09-13 14:25:46 +01005092 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
TatWai Chongf8d29f12020-02-16 22:53:18 -08005093 fmls<SimFloat16>(vform, dst, srca, src1, src2);
Jacob Bramleyca789742018-09-13 14:25:46 +01005094 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
TatWai Chongf8d29f12020-02-16 22:53:18 -08005095 fmls<float>(vform, dst, srca, src1, src2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005096 } else {
5097 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005098 fmls<double>(vform, dst, srca, src1, src2);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005099 }
5100 return dst;
5101}
5102
5103
Jacob Bramley8f36e7f2018-08-23 17:45:37 +01005104LogicVRegister Simulator::fmlal(VectorFormat vform,
5105 LogicVRegister dst,
5106 const LogicVRegister& src1,
5107 const LogicVRegister& src2) {
5108 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5109 dst.ClearForWrite(vform);
5110 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5111 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5112 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5113 float acc = dst.Float<float>(i);
5114 float result = FPMulAdd(acc, op1, op2);
5115 dst.SetFloat(i, result);
5116 }
5117 return dst;
5118}
5119
5120
5121LogicVRegister Simulator::fmlal2(VectorFormat vform,
5122 LogicVRegister dst,
5123 const LogicVRegister& src1,
5124 const LogicVRegister& src2) {
5125 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5126 dst.ClearForWrite(vform);
5127 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5128 int src = i + LaneCountFromFormat(vform);
5129 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5130 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5131 float acc = dst.Float<float>(i);
5132 float result = FPMulAdd(acc, op1, op2);
5133 dst.SetFloat(i, result);
5134 }
5135 return dst;
5136}
5137
5138
5139LogicVRegister Simulator::fmlsl(VectorFormat vform,
5140 LogicVRegister dst,
5141 const LogicVRegister& src1,
5142 const LogicVRegister& src2) {
5143 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5144 dst.ClearForWrite(vform);
5145 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5146 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5147 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5148 float acc = dst.Float<float>(i);
5149 float result = FPMulAdd(acc, op1, op2);
5150 dst.SetFloat(i, result);
5151 }
5152 return dst;
5153}
5154
5155
5156LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5157 LogicVRegister dst,
5158 const LogicVRegister& src1,
5159 const LogicVRegister& src2) {
5160 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5161 dst.ClearForWrite(vform);
5162 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5163 int src = i + LaneCountFromFormat(vform);
5164 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5165 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5166 float acc = dst.Float<float>(i);
5167 float result = FPMulAdd(acc, op1, op2);
5168 dst.SetFloat(i, result);
5169 }
5170 return dst;
5171}
5172
5173
5174LogicVRegister Simulator::fmlal(VectorFormat vform,
5175 LogicVRegister dst,
5176 const LogicVRegister& src1,
5177 const LogicVRegister& src2,
5178 int index) {
5179 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5180 dst.ClearForWrite(vform);
5181 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5182 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5183 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5184 float acc = dst.Float<float>(i);
5185 float result = FPMulAdd(acc, op1, op2);
5186 dst.SetFloat(i, result);
5187 }
5188 return dst;
5189}
5190
5191
5192LogicVRegister Simulator::fmlal2(VectorFormat vform,
5193 LogicVRegister dst,
5194 const LogicVRegister& src1,
5195 const LogicVRegister& src2,
5196 int index) {
5197 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5198 dst.ClearForWrite(vform);
5199 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5200 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5201 int src = i + LaneCountFromFormat(vform);
5202 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5203 float acc = dst.Float<float>(i);
5204 float result = FPMulAdd(acc, op1, op2);
5205 dst.SetFloat(i, result);
5206 }
5207 return dst;
5208}
5209
5210
5211LogicVRegister Simulator::fmlsl(VectorFormat vform,
5212 LogicVRegister dst,
5213 const LogicVRegister& src1,
5214 const LogicVRegister& src2,
5215 int index) {
5216 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5217 dst.ClearForWrite(vform);
5218 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5219 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5220 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5221 float acc = dst.Float<float>(i);
5222 float result = FPMulAdd(acc, op1, op2);
5223 dst.SetFloat(i, result);
5224 }
5225 return dst;
5226}
5227
5228
5229LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5230 LogicVRegister dst,
5231 const LogicVRegister& src1,
5232 const LogicVRegister& src2,
5233 int index) {
5234 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5235 dst.ClearForWrite(vform);
5236 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5237 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5238 int src = i + LaneCountFromFormat(vform);
5239 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5240 float acc = dst.Float<float>(i);
5241 float result = FPMulAdd(acc, op1, op2);
5242 dst.SetFloat(i, result);
5243 }
5244 return dst;
5245}
5246
5247
Alexandre Ramesd3832962016-07-04 15:03:43 +01005248template <typename T>
5249LogicVRegister Simulator::fneg(VectorFormat vform,
5250 LogicVRegister dst,
5251 const LogicVRegister& src) {
5252 dst.ClearForWrite(vform);
5253 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5254 T op = src.Float<T>(i);
5255 op = -op;
5256 dst.SetFloat(i, op);
5257 }
5258 return dst;
5259}
5260
5261
5262LogicVRegister Simulator::fneg(VectorFormat vform,
5263 LogicVRegister dst,
5264 const LogicVRegister& src) {
Jacob Bramleyca789742018-09-13 14:25:46 +01005265 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5266 fneg<SimFloat16>(vform, dst, src);
5267 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005268 fneg<float>(vform, dst, src);
5269 } else {
5270 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5271 fneg<double>(vform, dst, src);
5272 }
5273 return dst;
5274}
5275
5276
5277template <typename T>
5278LogicVRegister Simulator::fabs_(VectorFormat vform,
5279 LogicVRegister dst,
5280 const LogicVRegister& src) {
5281 dst.ClearForWrite(vform);
5282 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5283 T op = src.Float<T>(i);
5284 if (copysign(1.0, op) < 0.0) {
5285 op = -op;
5286 }
5287 dst.SetFloat(i, op);
5288 }
5289 return dst;
5290}
5291
5292
5293LogicVRegister Simulator::fabs_(VectorFormat vform,
5294 LogicVRegister dst,
5295 const LogicVRegister& src) {
Jacob Bramleyca789742018-09-13 14:25:46 +01005296 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5297 fabs_<SimFloat16>(vform, dst, src);
5298 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005299 fabs_<float>(vform, dst, src);
5300 } else {
5301 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5302 fabs_<double>(vform, dst, src);
5303 }
5304 return dst;
5305}
5306
5307
5308LogicVRegister Simulator::fabd(VectorFormat vform,
5309 LogicVRegister dst,
5310 const LogicVRegister& src1,
5311 const LogicVRegister& src2) {
5312 SimVRegister temp;
5313 fsub(vform, temp, src1, src2);
5314 fabs_(vform, dst, temp);
5315 return dst;
5316}
5317
5318
5319LogicVRegister Simulator::fsqrt(VectorFormat vform,
5320 LogicVRegister dst,
5321 const LogicVRegister& src) {
5322 dst.ClearForWrite(vform);
Jacob Bramleyca789742018-09-13 14:25:46 +01005323 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5324 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5325 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5326 dst.SetFloat(i, result);
5327 }
5328 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005329 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5330 float result = FPSqrt(src.Float<float>(i));
5331 dst.SetFloat(i, result);
5332 }
5333 } else {
5334 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5335 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5336 double result = FPSqrt(src.Float<double>(i));
5337 dst.SetFloat(i, result);
5338 }
5339 }
5340 return dst;
5341}
5342
5343
Jacob Bramleyca789742018-09-13 14:25:46 +01005344#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
5345 LogicVRegister Simulator::FNP(VectorFormat vform, \
5346 LogicVRegister dst, \
5347 const LogicVRegister& src1, \
5348 const LogicVRegister& src2) { \
5349 SimVRegister temp1, temp2; \
5350 uzp1(vform, temp1, src1, src2); \
5351 uzp2(vform, temp2, src1, src2); \
5352 FN(vform, dst, temp1, temp2); \
5353 return dst; \
5354 } \
5355 \
5356 LogicVRegister Simulator::FNP(VectorFormat vform, \
5357 LogicVRegister dst, \
5358 const LogicVRegister& src) { \
5359 if (vform == kFormatH) { \
5360 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
5361 SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5362 dst.SetUint(vform, 0, Float16ToRawbits(result)); \
5363 } else if (vform == kFormatS) { \
5364 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
5365 dst.SetFloat(0, result); \
5366 } else { \
5367 VIXL_ASSERT(vform == kFormatD); \
5368 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
5369 dst.SetFloat(0, result); \
5370 } \
5371 dst.ClearForWrite(vform); \
5372 return dst; \
Alexandre Ramesd3832962016-07-04 15:03:43 +01005373 }
5374NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5375#undef DEFINE_NEON_FP_PAIR_OP
5376
Jacob Bramleyca789742018-09-13 14:25:46 +01005377template <typename T>
Martyn Capewell894962f2020-02-05 15:46:44 +00005378LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5379 LogicVRegister dst,
5380 const LogicVRegister& src,
5381 typename TFPPairOp<T>::type fn,
5382 uint64_t inactive_value) {
5383 int lane_count = LaneCountFromFormat(vform);
5384 T result[kZRegMaxSizeInBytes / sizeof(T)];
5385 // Copy the source vector into a working array. Initialise the unused elements
5386 // at the end of the array to the same value that a false predicate would set.
5387 for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5388 result[i] = (i < lane_count)
5389 ? src.Float<T>(i)
5390 : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
Jacob Bramleyca789742018-09-13 14:25:46 +01005391 }
Martyn Capewell894962f2020-02-05 15:46:44 +00005392
5393 // Pairwise reduce the elements to a single value, using the pair op function
5394 // argument.
5395 for (int step = 1; step < lane_count; step *= 2) {
5396 for (int i = 0; i < lane_count; i += step * 2) {
5397 result[i] = (this->*fn)(result[i], result[i + step]);
5398 }
5399 }
Jacob Bramleyca789742018-09-13 14:25:46 +01005400 dst.ClearForWrite(ScalarFormatFromFormat(vform));
Martyn Capewell894962f2020-02-05 15:46:44 +00005401 dst.SetFloat<T>(0, result[0]);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005402 return dst;
5403}
5404
Martyn Capewell894962f2020-02-05 15:46:44 +00005405LogicVRegister Simulator::FPPairedAcrossHelper(
5406 VectorFormat vform,
5407 LogicVRegister dst,
5408 const LogicVRegister& src,
5409 typename TFPPairOp<SimFloat16>::type fn16,
5410 typename TFPPairOp<float>::type fn32,
5411 typename TFPPairOp<double>::type fn64,
5412 uint64_t inactive_value) {
5413 switch (LaneSizeInBitsFromFormat(vform)) {
5414 case kHRegSize:
5415 return FPPairedAcrossHelper<SimFloat16>(vform,
5416 dst,
5417 src,
5418 fn16,
5419 inactive_value);
5420 case kSRegSize:
5421 return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5422 default:
5423 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5424 return FPPairedAcrossHelper<double>(vform,
5425 dst,
5426 src,
5427 fn64,
5428 inactive_value);
5429 }
5430}
5431
5432LogicVRegister Simulator::faddv(VectorFormat vform,
5433 LogicVRegister dst,
5434 const LogicVRegister& src) {
5435 return FPPairedAcrossHelper(vform,
5436 dst,
5437 src,
5438 &Simulator::FPAdd<SimFloat16>,
5439 &Simulator::FPAdd<float>,
5440 &Simulator::FPAdd<double>,
5441 0);
5442}
Alexandre Ramesd3832962016-07-04 15:03:43 +01005443
5444LogicVRegister Simulator::fmaxv(VectorFormat vform,
5445 LogicVRegister dst,
5446 const LogicVRegister& src) {
Martyn Capewell894962f2020-02-05 15:46:44 +00005447 int lane_size = LaneSizeInBitsFromFormat(vform);
5448 uint64_t inactive_value =
5449 FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5450 return FPPairedAcrossHelper(vform,
5451 dst,
5452 src,
5453 &Simulator::FPMax<SimFloat16>,
5454 &Simulator::FPMax<float>,
5455 &Simulator::FPMax<double>,
5456 inactive_value);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005457}
5458
5459
5460LogicVRegister Simulator::fminv(VectorFormat vform,
5461 LogicVRegister dst,
5462 const LogicVRegister& src) {
Martyn Capewell894962f2020-02-05 15:46:44 +00005463 int lane_size = LaneSizeInBitsFromFormat(vform);
5464 uint64_t inactive_value =
5465 FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5466 return FPPairedAcrossHelper(vform,
5467 dst,
5468 src,
5469 &Simulator::FPMin<SimFloat16>,
5470 &Simulator::FPMin<float>,
5471 &Simulator::FPMin<double>,
5472 inactive_value);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005473}
5474
5475
5476LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5477 LogicVRegister dst,
5478 const LogicVRegister& src) {
Martyn Capewell894962f2020-02-05 15:46:44 +00005479 int lane_size = LaneSizeInBitsFromFormat(vform);
5480 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5481 return FPPairedAcrossHelper(vform,
5482 dst,
5483 src,
5484 &Simulator::FPMaxNM<SimFloat16>,
5485 &Simulator::FPMaxNM<float>,
5486 &Simulator::FPMaxNM<double>,
5487 inactive_value);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005488}
5489
5490
5491LogicVRegister Simulator::fminnmv(VectorFormat vform,
5492 LogicVRegister dst,
5493 const LogicVRegister& src) {
Martyn Capewell894962f2020-02-05 15:46:44 +00005494 int lane_size = LaneSizeInBitsFromFormat(vform);
5495 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5496 return FPPairedAcrossHelper(vform,
5497 dst,
5498 src,
5499 &Simulator::FPMinNM<SimFloat16>,
5500 &Simulator::FPMinNM<float>,
5501 &Simulator::FPMinNM<double>,
5502 inactive_value);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005503}
5504
5505
5506LogicVRegister Simulator::fmul(VectorFormat vform,
5507 LogicVRegister dst,
5508 const LogicVRegister& src1,
5509 const LogicVRegister& src2,
5510 int index) {
5511 dst.ClearForWrite(vform);
5512 SimVRegister temp;
Jacob Bramleyca789742018-09-13 14:25:46 +01005513 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5514 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5515 fmul<SimFloat16>(vform, dst, src1, index_reg);
5516 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005517 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5518 fmul<float>(vform, dst, src1, index_reg);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005519 } else {
5520 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5521 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5522 fmul<double>(vform, dst, src1, index_reg);
5523 }
5524 return dst;
5525}
5526
5527
5528LogicVRegister Simulator::fmla(VectorFormat vform,
5529 LogicVRegister dst,
5530 const LogicVRegister& src1,
5531 const LogicVRegister& src2,
5532 int index) {
5533 dst.ClearForWrite(vform);
5534 SimVRegister temp;
Jacob Bramleyca789742018-09-13 14:25:46 +01005535 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5536 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005537 fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
Jacob Bramleyca789742018-09-13 14:25:46 +01005538 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005539 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005540 fmla<float>(vform, dst, dst, src1, index_reg);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005541 } else {
5542 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5543 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005544 fmla<double>(vform, dst, dst, src1, index_reg);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005545 }
5546 return dst;
5547}
5548
5549
5550LogicVRegister Simulator::fmls(VectorFormat vform,
5551 LogicVRegister dst,
5552 const LogicVRegister& src1,
5553 const LogicVRegister& src2,
5554 int index) {
5555 dst.ClearForWrite(vform);
5556 SimVRegister temp;
Jacob Bramleyca789742018-09-13 14:25:46 +01005557 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5558 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005559 fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
Jacob Bramleyca789742018-09-13 14:25:46 +01005560 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005561 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005562 fmls<float>(vform, dst, dst, src1, index_reg);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005563 } else {
5564 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5565 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
TatWai Chongf8d29f12020-02-16 22:53:18 -08005566 fmls<double>(vform, dst, dst, src1, index_reg);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005567 }
5568 return dst;
5569}
5570
5571
5572LogicVRegister Simulator::fmulx(VectorFormat vform,
5573 LogicVRegister dst,
5574 const LogicVRegister& src1,
5575 const LogicVRegister& src2,
5576 int index) {
5577 dst.ClearForWrite(vform);
5578 SimVRegister temp;
Jacob Bramleyca789742018-09-13 14:25:46 +01005579 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5580 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5581 fmulx<SimFloat16>(vform, dst, src1, index_reg);
5582 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005583 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5584 fmulx<float>(vform, dst, src1, index_reg);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005585 } else {
5586 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5587 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5588 fmulx<double>(vform, dst, src1, index_reg);
5589 }
5590 return dst;
5591}
5592
5593
5594LogicVRegister Simulator::frint(VectorFormat vform,
5595 LogicVRegister dst,
5596 const LogicVRegister& src,
5597 FPRounding rounding_mode,
TatWai Chong04471812019-03-19 14:29:00 -07005598 bool inexact_exception,
5599 FrintMode frint_mode) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005600 dst.ClearForWrite(vform);
Jacob Bramleyca789742018-09-13 14:25:46 +01005601 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
TatWai Chong04471812019-03-19 14:29:00 -07005602 VIXL_ASSERT(frint_mode == kFrintToInteger);
Jacob Bramleyca789742018-09-13 14:25:46 +01005603 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5604 SimFloat16 input = src.Float<SimFloat16>(i);
5605 SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5606 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5607 FPProcessException();
5608 }
5609 dst.SetFloat<SimFloat16>(i, rounded);
5610 }
5611 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005612 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5613 float input = src.Float<float>(i);
TatWai Chong04471812019-03-19 14:29:00 -07005614 float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5615
Jacob Bramleyca789742018-09-13 14:25:46 +01005616 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005617 FPProcessException();
5618 }
5619 dst.SetFloat<float>(i, rounded);
5620 }
5621 } else {
5622 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5623 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5624 double input = src.Float<double>(i);
TatWai Chong04471812019-03-19 14:29:00 -07005625 double rounded = FPRoundInt(input, rounding_mode, frint_mode);
Jacob Bramleyca789742018-09-13 14:25:46 +01005626 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005627 FPProcessException();
5628 }
5629 dst.SetFloat<double>(i, rounded);
5630 }
5631 }
5632 return dst;
5633}
5634
TatWai Chong2cb1b612020-03-04 23:51:21 -08005635LogicVRegister Simulator::fcvt(VectorFormat vform,
5636 unsigned dst_data_size_in_bits,
5637 unsigned src_data_size_in_bits,
5638 LogicVRegister dst,
5639 const LogicPRegister& pg,
5640 const LogicVRegister& src) {
5641 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5642 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5643
5644 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5645 if (!pg.IsActive(vform, i)) continue;
5646
5647 uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5648 0,
5649 src.Uint(vform, i));
5650 double dst_value =
5651 RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5652
5653 uint64_t dst_raw_bits =
5654 FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5655
5656 dst.SetUint(vform, i, dst_raw_bits);
5657 }
5658
5659 return dst;
5660}
5661
TatWai Chongdb7437c2020-01-09 17:44:10 -08005662LogicVRegister Simulator::fcvts(VectorFormat vform,
5663 unsigned dst_data_size_in_bits,
5664 unsigned src_data_size_in_bits,
5665 LogicVRegister dst,
5666 const LogicPRegister& pg,
5667 const LogicVRegister& src,
5668 FPRounding round,
5669 int fbits) {
5670 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5671 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5672
5673 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5674 if (!pg.IsActive(vform, i)) continue;
5675
5676 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5677 0,
5678 src.Uint(vform, i));
5679 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5680 std::pow(2.0, fbits);
5681
5682 switch (dst_data_size_in_bits) {
5683 case kHRegSize:
5684 dst.SetInt(vform, i, FPToInt16(result, round));
5685 break;
5686 case kSRegSize:
5687 dst.SetInt(vform, i, FPToInt32(result, round));
5688 break;
5689 case kDRegSize:
5690 dst.SetInt(vform, i, FPToInt64(result, round));
5691 break;
5692 default:
5693 VIXL_UNIMPLEMENTED();
5694 break;
5695 }
5696 }
5697
5698 return dst;
5699}
Alexandre Ramesd3832962016-07-04 15:03:43 +01005700
5701LogicVRegister Simulator::fcvts(VectorFormat vform,
5702 LogicVRegister dst,
5703 const LogicVRegister& src,
TatWai Chongdb7437c2020-01-09 17:44:10 -08005704 FPRounding round,
Alexandre Ramesd3832962016-07-04 15:03:43 +01005705 int fbits) {
5706 dst.ClearForWrite(vform);
TatWai Chongdb7437c2020-01-09 17:44:10 -08005707 return fcvts(vform,
5708 LaneSizeInBitsFromFormat(vform),
5709 LaneSizeInBitsFromFormat(vform),
5710 dst,
5711 GetPTrue(),
5712 src,
5713 round,
5714 fbits);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005715}
5716
TatWai Chongdb7437c2020-01-09 17:44:10 -08005717LogicVRegister Simulator::fcvtu(VectorFormat vform,
5718 unsigned dst_data_size_in_bits,
5719 unsigned src_data_size_in_bits,
5720 LogicVRegister dst,
5721 const LogicPRegister& pg,
5722 const LogicVRegister& src,
5723 FPRounding round,
5724 int fbits) {
5725 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5726 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5727
5728 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5729 if (!pg.IsActive(vform, i)) continue;
5730
5731 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5732 0,
5733 src.Uint(vform, i));
5734 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5735 std::pow(2.0, fbits);
5736
5737 switch (dst_data_size_in_bits) {
5738 case kHRegSize:
5739 dst.SetUint(vform, i, FPToUInt16(result, round));
5740 break;
5741 case kSRegSize:
5742 dst.SetUint(vform, i, FPToUInt32(result, round));
5743 break;
5744 case kDRegSize:
5745 dst.SetUint(vform, i, FPToUInt64(result, round));
5746 break;
5747 default:
5748 VIXL_UNIMPLEMENTED();
5749 break;
5750 }
5751 }
5752
5753 return dst;
5754}
Alexandre Ramesd3832962016-07-04 15:03:43 +01005755
5756LogicVRegister Simulator::fcvtu(VectorFormat vform,
5757 LogicVRegister dst,
5758 const LogicVRegister& src,
TatWai Chongdb7437c2020-01-09 17:44:10 -08005759 FPRounding round,
Alexandre Ramesd3832962016-07-04 15:03:43 +01005760 int fbits) {
5761 dst.ClearForWrite(vform);
TatWai Chongdb7437c2020-01-09 17:44:10 -08005762 return fcvtu(vform,
5763 LaneSizeInBitsFromFormat(vform),
5764 LaneSizeInBitsFromFormat(vform),
5765 dst,
5766 GetPTrue(),
5767 src,
5768 round,
5769 fbits);
Alexandre Ramesd3832962016-07-04 15:03:43 +01005770}
5771
Alexandre Ramesd3832962016-07-04 15:03:43 +01005772LogicVRegister Simulator::fcvtl(VectorFormat vform,
5773 LogicVRegister dst,
5774 const LogicVRegister& src) {
5775 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5776 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
Jacob Bramleyca789742018-09-13 14:25:46 +01005777 // TODO: Full support for SimFloat16 in SimRegister(s).
5778 dst.SetFloat(i,
5779 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5780 ReadDN()));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005781 }
5782 } else {
5783 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5784 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
Carey Williamsb57e3622018-04-10 11:42:03 +01005785 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005786 }
5787 }
5788 return dst;
5789}
5790
5791
5792LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5793 LogicVRegister dst,
5794 const LogicVRegister& src) {
5795 int lane_count = LaneCountFromFormat(vform);
5796 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5797 for (int i = 0; i < lane_count; i++) {
Jacob Bramleyca789742018-09-13 14:25:46 +01005798 // TODO: Full support for SimFloat16 in SimRegister(s).
5799 dst.SetFloat(i,
5800 FPToFloat(RawbitsToFloat16(
5801 src.Float<uint16_t>(i + lane_count)),
5802 ReadDN()));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005803 }
5804 } else {
5805 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5806 for (int i = 0; i < lane_count; i++) {
Carey Williamsb57e3622018-04-10 11:42:03 +01005807 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005808 }
5809 }
5810 return dst;
5811}
5812
5813
5814LogicVRegister Simulator::fcvtn(VectorFormat vform,
5815 LogicVRegister dst,
5816 const LogicVRegister& src) {
5817 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5818 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleyca789742018-09-13 14:25:46 +01005819 dst.SetFloat(i,
5820 Float16ToRawbits(
5821 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005822 }
5823 } else {
5824 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5825 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Carey Williamsb57e3622018-04-10 11:42:03 +01005826 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005827 }
5828 }
5829 return dst;
5830}
5831
5832
5833LogicVRegister Simulator::fcvtn2(VectorFormat vform,
5834 LogicVRegister dst,
5835 const LogicVRegister& src) {
5836 int lane_count = LaneCountFromFormat(vform) / 2;
5837 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5838 for (int i = lane_count - 1; i >= 0; i--) {
Carey Williamsb57e3622018-04-10 11:42:03 +01005839 dst.SetFloat(i + lane_count,
Jacob Bramleyca789742018-09-13 14:25:46 +01005840 Float16ToRawbits(
5841 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005842 }
5843 } else {
5844 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5845 for (int i = lane_count - 1; i >= 0; i--) {
Carey Williamsb57e3622018-04-10 11:42:03 +01005846 dst.SetFloat(i + lane_count,
5847 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005848 }
5849 }
5850 return dst;
5851}
5852
5853
5854LogicVRegister Simulator::fcvtxn(VectorFormat vform,
5855 LogicVRegister dst,
5856 const LogicVRegister& src) {
5857 dst.ClearForWrite(vform);
5858 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5859 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Carey Williamsb57e3622018-04-10 11:42:03 +01005860 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005861 }
5862 return dst;
5863}
5864
5865
5866LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
5867 LogicVRegister dst,
5868 const LogicVRegister& src) {
5869 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5870 int lane_count = LaneCountFromFormat(vform) / 2;
5871 for (int i = lane_count - 1; i >= 0; i--) {
Carey Williamsb57e3622018-04-10 11:42:03 +01005872 dst.SetFloat(i + lane_count,
5873 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005874 }
5875 return dst;
5876}
5877
5878
5879// Based on reference C function recip_sqrt_estimate from ARM ARM.
5880double Simulator::recip_sqrt_estimate(double a) {
5881 int q0, q1, s;
5882 double r;
5883 if (a < 0.5) {
5884 q0 = static_cast<int>(a * 512.0);
5885 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
5886 } else {
5887 q1 = static_cast<int>(a * 256.0);
5888 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
5889 }
5890 s = static_cast<int>(256.0 * r + 0.5);
5891 return static_cast<double>(s) / 256.0;
5892}
5893
5894
5895static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
5896 return ExtractUnsignedBitfield64(start_bit, end_bit, val);
5897}
5898
5899
5900template <typename T>
5901T Simulator::FPRecipSqrtEstimate(T op) {
Jacob Bramleyca789742018-09-13 14:25:46 +01005902 if (IsNaN(op)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005903 return FPProcessNaN(op);
5904 } else if (op == 0.0) {
5905 if (copysign(1.0, op) < 0.0) {
5906 return kFP64NegativeInfinity;
5907 } else {
5908 return kFP64PositiveInfinity;
5909 }
5910 } else if (copysign(1.0, op) < 0.0) {
5911 FPProcessException();
5912 return FPDefaultNaN<T>();
Jacob Bramleyca789742018-09-13 14:25:46 +01005913 } else if (IsInf(op)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005914 return 0.0;
5915 } else {
5916 uint64_t fraction;
5917 int exp, result_exp;
5918
Jacob Bramleyca789742018-09-13 14:25:46 +01005919 if (IsFloat16<T>()) {
5920 exp = Float16Exp(op);
5921 fraction = Float16Mantissa(op);
5922 fraction <<= 42;
5923 } else if (IsFloat32<T>()) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005924 exp = FloatExp(op);
5925 fraction = FloatMantissa(op);
5926 fraction <<= 29;
5927 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01005928 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01005929 exp = DoubleExp(op);
5930 fraction = DoubleMantissa(op);
5931 }
5932
5933 if (exp == 0) {
5934 while (Bits(fraction, 51, 51) == 0) {
5935 fraction = Bits(fraction, 50, 0) << 1;
5936 exp -= 1;
5937 }
5938 fraction = Bits(fraction, 50, 0) << 1;
5939 }
5940
5941 double scaled;
5942 if (Bits(exp, 0, 0) == 0) {
5943 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
5944 } else {
5945 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
5946 }
5947
Jacob Bramleyca789742018-09-13 14:25:46 +01005948 if (IsFloat16<T>()) {
5949 result_exp = (44 - exp) / 2;
5950 } else if (IsFloat32<T>()) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005951 result_exp = (380 - exp) / 2;
5952 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01005953 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01005954 result_exp = (3068 - exp) / 2;
5955 }
5956
5957 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
5958
Jacob Bramleyca789742018-09-13 14:25:46 +01005959 if (IsFloat16<T>()) {
5960 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
5961 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
5962 return Float16Pack(0, exp_bits, est_bits);
5963 } else if (IsFloat32<T>()) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005964 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
5965 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
5966 return FloatPack(0, exp_bits, est_bits);
5967 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01005968 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01005969 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
5970 }
5971 }
5972}
5973
5974
5975LogicVRegister Simulator::frsqrte(VectorFormat vform,
5976 LogicVRegister dst,
5977 const LogicVRegister& src) {
5978 dst.ClearForWrite(vform);
Jacob Bramleyca789742018-09-13 14:25:46 +01005979 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5980 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5981 SimFloat16 input = src.Float<SimFloat16>(i);
Martyn Capewell13050ca2020-02-11 16:43:40 +00005982 dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
Jacob Bramleyca789742018-09-13 14:25:46 +01005983 }
5984 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01005985 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5986 float input = src.Float<float>(i);
Martyn Capewell13050ca2020-02-11 16:43:40 +00005987 dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005988 }
5989 } else {
5990 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5991 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5992 double input = src.Float<double>(i);
Martyn Capewell13050ca2020-02-11 16:43:40 +00005993 dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
Alexandre Ramesd3832962016-07-04 15:03:43 +01005994 }
5995 }
5996 return dst;
5997}
5998
5999template <typename T>
6000T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6001 uint32_t sign;
6002
Jacob Bramleyca789742018-09-13 14:25:46 +01006003 if (IsFloat16<T>()) {
6004 sign = Float16Sign(op);
6005 } else if (IsFloat32<T>()) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006006 sign = FloatSign(op);
6007 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01006008 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01006009 sign = DoubleSign(op);
6010 }
6011
Jacob Bramleyca789742018-09-13 14:25:46 +01006012 if (IsNaN(op)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006013 return FPProcessNaN(op);
Jacob Bramleyca789742018-09-13 14:25:46 +01006014 } else if (IsInf(op)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006015 return (sign == 1) ? -0.0 : 0.0;
6016 } else if (op == 0.0) {
6017 FPProcessException(); // FPExc_DivideByZero exception.
6018 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
Jacob Bramleyca789742018-09-13 14:25:46 +01006019 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6020 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6021 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006022 bool overflow_to_inf = false;
6023 switch (rounding) {
6024 case FPTieEven:
6025 overflow_to_inf = true;
6026 break;
6027 case FPPositiveInfinity:
6028 overflow_to_inf = (sign == 0);
6029 break;
6030 case FPNegativeInfinity:
6031 overflow_to_inf = (sign == 1);
6032 break;
6033 case FPZero:
6034 overflow_to_inf = false;
6035 break;
6036 default:
6037 break;
6038 }
6039 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
6040 if (overflow_to_inf) {
6041 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6042 } else {
6043 // Return FPMaxNormal(sign).
Jacob Bramleyca789742018-09-13 14:25:46 +01006044 if (IsFloat16<T>()) {
6045 return Float16Pack(sign, 0x1f, 0x3ff);
6046 } else if (IsFloat32<T>()) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006047 return FloatPack(sign, 0xfe, 0x07fffff);
6048 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01006049 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01006050 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6051 }
6052 }
6053 } else {
6054 uint64_t fraction;
6055 int exp, result_exp;
6056 uint32_t sign;
6057
Jacob Bramleyca789742018-09-13 14:25:46 +01006058 if (IsFloat16<T>()) {
6059 sign = Float16Sign(op);
6060 exp = Float16Exp(op);
6061 fraction = Float16Mantissa(op);
6062 fraction <<= 42;
6063 } else if (IsFloat32<T>()) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006064 sign = FloatSign(op);
6065 exp = FloatExp(op);
6066 fraction = FloatMantissa(op);
6067 fraction <<= 29;
6068 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01006069 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01006070 sign = DoubleSign(op);
6071 exp = DoubleExp(op);
6072 fraction = DoubleMantissa(op);
6073 }
6074
6075 if (exp == 0) {
6076 if (Bits(fraction, 51, 51) == 0) {
6077 exp -= 1;
6078 fraction = Bits(fraction, 49, 0) << 2;
6079 } else {
6080 fraction = Bits(fraction, 50, 0) << 1;
6081 }
6082 }
6083
6084 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6085
Jacob Bramleyca789742018-09-13 14:25:46 +01006086 if (IsFloat16<T>()) {
6087 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
6088 } else if (IsFloat32<T>()) {
6089 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
Alexandre Ramesd3832962016-07-04 15:03:43 +01006090 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01006091 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01006092 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
6093 }
6094
6095 double estimate = recip_estimate(scaled);
6096
6097 fraction = DoubleMantissa(estimate);
6098 if (result_exp == 0) {
6099 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6100 } else if (result_exp == -1) {
6101 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6102 result_exp = 0;
6103 }
Jacob Bramleyca789742018-09-13 14:25:46 +01006104 if (IsFloat16<T>()) {
6105 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6106 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6107 return Float16Pack(sign, exp_bits, frac_bits);
6108 } else if (IsFloat32<T>()) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006109 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6110 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6111 return FloatPack(sign, exp_bits, frac_bits);
6112 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01006113 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01006114 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6115 }
6116 }
6117}
6118
6119
6120LogicVRegister Simulator::frecpe(VectorFormat vform,
6121 LogicVRegister dst,
6122 const LogicVRegister& src,
6123 FPRounding round) {
6124 dst.ClearForWrite(vform);
Jacob Bramleyca789742018-09-13 14:25:46 +01006125 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6126 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6127 SimFloat16 input = src.Float<SimFloat16>(i);
Martyn Capewell13050ca2020-02-11 16:43:40 +00006128 dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
Jacob Bramleyca789742018-09-13 14:25:46 +01006129 }
6130 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006131 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6132 float input = src.Float<float>(i);
Martyn Capewell13050ca2020-02-11 16:43:40 +00006133 dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
Alexandre Ramesd3832962016-07-04 15:03:43 +01006134 }
6135 } else {
6136 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6137 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6138 double input = src.Float<double>(i);
Martyn Capewell13050ca2020-02-11 16:43:40 +00006139 dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
Alexandre Ramesd3832962016-07-04 15:03:43 +01006140 }
6141 }
6142 return dst;
6143}
6144
6145
6146LogicVRegister Simulator::ursqrte(VectorFormat vform,
6147 LogicVRegister dst,
6148 const LogicVRegister& src) {
6149 dst.ClearForWrite(vform);
6150 uint64_t operand;
6151 uint32_t result;
6152 double dp_operand, dp_result;
6153 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6154 operand = src.Uint(vform, i);
6155 if (operand <= 0x3FFFFFFF) {
6156 result = 0xFFFFFFFF;
6157 } else {
6158 dp_operand = operand * std::pow(2.0, -32);
6159 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6160 result = static_cast<uint32_t>(dp_result);
6161 }
6162 dst.SetUint(vform, i, result);
6163 }
6164 return dst;
6165}
6166
6167
6168// Based on reference C function recip_estimate from ARM ARM.
6169double Simulator::recip_estimate(double a) {
6170 int q, s;
6171 double r;
6172 q = static_cast<int>(a * 512.0);
6173 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6174 s = static_cast<int>(256.0 * r + 0.5);
6175 return static_cast<double>(s) / 256.0;
6176}
6177
6178
6179LogicVRegister Simulator::urecpe(VectorFormat vform,
6180 LogicVRegister dst,
6181 const LogicVRegister& src) {
6182 dst.ClearForWrite(vform);
6183 uint64_t operand;
6184 uint32_t result;
6185 double dp_operand, dp_result;
6186 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6187 operand = src.Uint(vform, i);
6188 if (operand <= 0x7FFFFFFF) {
6189 result = 0xFFFFFFFF;
6190 } else {
6191 dp_operand = operand * std::pow(2.0, -32);
6192 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6193 result = static_cast<uint32_t>(dp_result);
6194 }
6195 dst.SetUint(vform, i, result);
6196 }
6197 return dst;
6198}
6199
Jacob Bramley0ce75842019-07-17 18:12:50 +01006200LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6201 dst.Clear();
6202 return dst;
6203}
6204
6205LogicPRegister Simulator::pfirst(LogicPRegister dst,
6206 const LogicPRegister& pg,
6207 const LogicPRegister& src) {
6208 int first_pg = GetFirstActive(kFormatVnB, pg);
6209 VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6210 mov(dst, src);
6211 if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6212 return dst;
6213}
6214
6215LogicPRegister Simulator::ptrue(VectorFormat vform,
6216 LogicPRegister dst,
6217 int pattern) {
6218 int count = GetPredicateConstraintLaneCount(vform, pattern);
6219 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6220 dst.SetActive(vform, i, i < count);
6221 }
6222 return dst;
6223}
6224
6225LogicPRegister Simulator::pnext(VectorFormat vform,
6226 LogicPRegister dst,
6227 const LogicPRegister& pg,
6228 const LogicPRegister& src) {
6229 int next = GetLastActive(vform, src) + 1;
6230 while (next < LaneCountFromFormat(vform)) {
6231 if (pg.IsActive(vform, next)) break;
6232 next++;
6233 }
6234
6235 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6236 dst.SetActive(vform, i, (i == next));
6237 }
6238 return dst;
6239}
6240
Alexandre Ramesd3832962016-07-04 15:03:43 +01006241template <typename T>
6242LogicVRegister Simulator::frecpx(VectorFormat vform,
6243 LogicVRegister dst,
6244 const LogicVRegister& src) {
6245 dst.ClearForWrite(vform);
6246 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6247 T op = src.Float<T>(i);
6248 T result;
Jacob Bramleyca789742018-09-13 14:25:46 +01006249 if (IsNaN(op)) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006250 result = FPProcessNaN(op);
6251 } else {
6252 int exp;
6253 uint32_t sign;
Jacob Bramleyca789742018-09-13 14:25:46 +01006254 if (IsFloat16<T>()) {
6255 sign = Float16Sign(op);
6256 exp = Float16Exp(op);
6257 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6258 result = Float16Pack(sign, exp, 0);
6259 } else if (IsFloat32<T>()) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006260 sign = FloatSign(op);
6261 exp = FloatExp(op);
6262 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6263 result = FloatPack(sign, exp, 0);
6264 } else {
Jacob Bramleyca789742018-09-13 14:25:46 +01006265 VIXL_ASSERT(IsFloat64<T>());
Alexandre Ramesd3832962016-07-04 15:03:43 +01006266 sign = DoubleSign(op);
6267 exp = DoubleExp(op);
6268 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6269 result = DoublePack(sign, exp, 0);
6270 }
6271 }
6272 dst.SetFloat(i, result);
6273 }
6274 return dst;
6275}
6276
6277
6278LogicVRegister Simulator::frecpx(VectorFormat vform,
6279 LogicVRegister dst,
6280 const LogicVRegister& src) {
Jacob Bramleyca789742018-09-13 14:25:46 +01006281 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6282 frecpx<SimFloat16>(vform, dst, src);
6283 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
Alexandre Ramesd3832962016-07-04 15:03:43 +01006284 frecpx<float>(vform, dst, src);
6285 } else {
6286 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6287 frecpx<double>(vform, dst, src);
6288 }
6289 return dst;
6290}
6291
Martyn Capewellefd9dc72020-02-13 10:46:29 +00006292LogicVRegister Simulator::ftsmul(VectorFormat vform,
6293 LogicVRegister dst,
6294 const LogicVRegister& src1,
6295 const LogicVRegister& src2) {
Jacob Bramleydfb93b52020-07-02 12:06:45 +01006296 SimVRegister maybe_neg_src1;
Martyn Capewellefd9dc72020-02-13 10:46:29 +00006297
Jacob Bramleydfb93b52020-07-02 12:06:45 +01006298 // The bottom bit of src2 controls the sign of the result. Use it to
6299 // conditionally invert the sign of one `fmul` operand.
6300 shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6301 eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
Martyn Capewellefd9dc72020-02-13 10:46:29 +00006302
6303 // Multiply src1 by the modified neg_src1, which is potentially its negation.
6304 // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6305 // rather than neg_src1, must be the first source argument.
Jacob Bramleydfb93b52020-07-02 12:06:45 +01006306 fmul(vform, dst, src1, maybe_neg_src1);
Martyn Capewellefd9dc72020-02-13 10:46:29 +00006307
6308 return dst;
6309}
6310
Martyn Capewell43782632019-12-12 13:22:10 +00006311LogicVRegister Simulator::ftssel(VectorFormat vform,
6312 LogicVRegister dst,
6313 const LogicVRegister& src1,
6314 const LogicVRegister& src2) {
6315 unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6316 uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6317 uint64_t one;
6318
6319 if (lane_bits == kHRegSize) {
6320 one = Float16ToRawbits(Float16(1.0));
6321 } else if (lane_bits == kSRegSize) {
6322 one = FloatToRawbits(1.0);
6323 } else {
6324 VIXL_ASSERT(lane_bits == kDRegSize);
6325 one = DoubleToRawbits(1.0);
6326 }
6327
6328 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6329 // Use integer accessors for this operation, as this is a data manipulation
6330 // task requiring no calculation.
6331 uint64_t op = src1.Uint(vform, i);
6332
6333 // Only the bottom two bits of the src2 register are significant, indicating
6334 // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6335 // determines the sign of the value written to dst.
6336 uint64_t q = src2.Uint(vform, i);
6337 if ((q & 1) == 1) op = one;
6338 if ((q & 2) == 2) op ^= sign_bit;
6339
6340 dst.SetUint(vform, i, op);
6341 }
6342
6343 return dst;
6344}
6345
Martyn Capewell5fb2ad62020-01-10 14:08:27 +00006346template <typename T>
6347LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6348 LogicVRegister dst,
6349 const LogicVRegister& src1,
6350 const LogicVRegister& src2,
6351 uint64_t coeff_pos,
6352 uint64_t coeff_neg) {
6353 SimVRegister zero;
6354 dup_immediate(kFormatVnB, zero, 0);
6355
6356 SimVRegister cf;
6357 SimVRegister cfn;
6358 dup_immediate(vform, cf, coeff_pos);
6359 dup_immediate(vform, cfn, coeff_neg);
6360
6361 // The specification requires testing the top bit of the raw value, rather
6362 // than the sign of the floating point number, so use an integer comparison
6363 // here.
6364 SimPRegister is_neg;
6365 SVEIntCompareVectorsHelper(lt,
6366 vform,
6367 is_neg,
6368 GetPTrue(),
6369 src2,
6370 zero,
6371 false,
6372 LeaveFlags);
6373 mov_merging(vform, cf, is_neg, cfn);
6374
6375 SimVRegister temp;
6376 fabs_<T>(vform, temp, src2);
TatWai Chongf8d29f12020-02-16 22:53:18 -08006377 fmla<T>(vform, cf, cf, src1, temp);
Martyn Capewell5fb2ad62020-01-10 14:08:27 +00006378 mov(vform, dst, cf);
6379 return dst;
6380}
6381
6382
6383LogicVRegister Simulator::ftmad(VectorFormat vform,
6384 LogicVRegister dst,
6385 const LogicVRegister& src1,
6386 const LogicVRegister& src2,
6387 unsigned index) {
6388 static const uint64_t ftmad_coeff16[] = {0x3c00,
6389 0xb155,
6390 0x2030,
6391 0x0000,
6392 0x0000,
6393 0x0000,
6394 0x0000,
6395 0x0000,
6396 0x3c00,
6397 0xb800,
6398 0x293a,
6399 0x0000,
6400 0x0000,
6401 0x0000,
6402 0x0000,
6403 0x0000};
6404
6405 static const uint64_t ftmad_coeff32[] = {0x3f800000,
6406 0xbe2aaaab,
6407 0x3c088886,
6408 0xb95008b9,
6409 0x36369d6d,
6410 0x00000000,
6411 0x00000000,
6412 0x00000000,
6413 0x3f800000,
6414 0xbf000000,
6415 0x3d2aaaa6,
6416 0xbab60705,
6417 0x37cd37cc,
6418 0x00000000,
6419 0x00000000,
6420 0x00000000};
6421
6422 static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6423 0xbfc5555555555543,
6424 0x3f8111111110f30c,
6425 0xbf2a01a019b92fc6,
6426 0x3ec71de351f3d22b,
6427 0xbe5ae5e2b60f7b91,
6428 0x3de5d8408868552f,
6429 0x0000000000000000,
6430 0x3ff0000000000000,
6431 0xbfe0000000000000,
6432 0x3fa5555555555536,
6433 0xbf56c16c16c13a0b,
6434 0x3efa01a019b1e8d8,
6435 0xbe927e4f7282f468,
6436 0x3e21ee96d2641b13,
6437 0xbda8f76380fbb401};
6438 VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6439 VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6440 VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6441
6442 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6443 FTMaddHelper<SimFloat16>(vform,
6444 dst,
6445 src1,
6446 src2,
6447 ftmad_coeff16[index],
6448 ftmad_coeff16[index + 8]);
6449 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6450 FTMaddHelper<float>(vform,
6451 dst,
6452 src1,
6453 src2,
6454 ftmad_coeff32[index],
6455 ftmad_coeff32[index + 8]);
6456 } else {
6457 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6458 FTMaddHelper<double>(vform,
6459 dst,
6460 src1,
6461 src2,
6462 ftmad_coeff64[index],
6463 ftmad_coeff64[index + 8]);
6464 }
6465 return dst;
6466}
6467
Martyn Capewell43782632019-12-12 13:22:10 +00006468LogicVRegister Simulator::fexpa(VectorFormat vform,
6469 LogicVRegister dst,
6470 const LogicVRegister& src) {
6471 static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6472 0x005d, 0x0075, 0x008e, 0x00a8,
6473 0x00c2, 0x00dc, 0x00f8, 0x0114,
6474 0x0130, 0x014d, 0x016b, 0x0189,
6475 0x01a8, 0x01c8, 0x01e8, 0x0209,
6476 0x022b, 0x024e, 0x0271, 0x0295,
6477 0x02ba, 0x02e0, 0x0306, 0x032e,
6478 0x0356, 0x037f, 0x03a9, 0x03d4};
6479
6480 static const uint64_t fexpa_coeff32[] =
6481 {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6482 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6483 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6484 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6485 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6486 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6487 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6488 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6489 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6490 0x7d3e0c};
6491
6492 static const uint64_t fexpa_coeff64[] =
6493 {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6494 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6495 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6496 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6497 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6498 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6499 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6500 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6501 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6502 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6503 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6504 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6505 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6506 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6507 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6508 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6509
6510 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6511 int index_highbit = 5;
6512 int op_highbit, op_shift;
6513 const uint64_t* fexpa_coeff;
6514
6515 if (lane_size == kHRegSize) {
6516 index_highbit = 4;
6517 VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6518 fexpa_coeff = fexpa_coeff16;
6519 op_highbit = 9;
6520 op_shift = 10;
6521 } else if (lane_size == kSRegSize) {
6522 VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6523 fexpa_coeff = fexpa_coeff32;
6524 op_highbit = 13;
6525 op_shift = 23;
6526 } else {
6527 VIXL_ASSERT(lane_size == kDRegSize);
6528 VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6529 fexpa_coeff = fexpa_coeff64;
6530 op_highbit = 16;
6531 op_shift = 52;
6532 }
6533
6534 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6535 uint64_t op = src.Uint(vform, i);
6536 uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6537 result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6538 dst.SetUint(vform, i, result);
6539 }
6540 return dst;
6541}
6542
Martyn Capewell37f28182020-01-14 10:15:10 +00006543template <typename T>
6544LogicVRegister Simulator::fscale(VectorFormat vform,
6545 LogicVRegister dst,
6546 const LogicVRegister& src1,
6547 const LogicVRegister& src2) {
6548 T two = T(2.0);
6549 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6550 T s1 = src1.Float<T>(i);
6551 if (!IsNaN(s1)) {
6552 int64_t scale = src2.Int(vform, i);
6553 // TODO: this is a low-performance implementation, but it's simple and
6554 // less likely to be buggy. Consider replacing it with something faster.
6555
6556 // Scales outside of these bounds become infinity or zero, so there's no
6557 // point iterating further.
6558 scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6559
6560 // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and
6561 // decrement scale until it's zero.
6562 while (scale-- > 0) {
6563 s1 = FPMul(s1, two);
6564 }
6565
6566 // If scale is negative, divide by two and increment scale until it's
6567 // zero. Initially, scale is (src2 - 1), so we pre-increment.
6568 while (++scale < 0) {
6569 s1 = FPDiv(s1, two);
6570 }
6571 }
6572 dst.SetFloat<T>(i, s1);
6573 }
6574 return dst;
6575}
6576
6577LogicVRegister Simulator::fscale(VectorFormat vform,
6578 LogicVRegister dst,
6579 const LogicVRegister& src1,
6580 const LogicVRegister& src2) {
6581 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6582 fscale<SimFloat16>(vform, dst, src1, src2);
6583 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6584 fscale<float>(vform, dst, src1, src2);
6585 } else {
6586 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6587 fscale<double>(vform, dst, src1, src2);
6588 }
6589 return dst;
6590}
6591
Alexandre Ramesd3832962016-07-04 15:03:43 +01006592LogicVRegister Simulator::scvtf(VectorFormat vform,
TatWai Chong31cd6a02020-01-10 13:03:26 -08006593 unsigned dst_data_size_in_bits,
6594 unsigned src_data_size_in_bits,
6595 LogicVRegister dst,
6596 const LogicPRegister& pg,
6597 const LogicVRegister& src,
6598 FPRounding round,
6599 int fbits) {
6600 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6601 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6602
6603 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6604 if (!pg.IsActive(vform, i)) continue;
6605
6606 int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6607 0,
6608 src.Uint(vform, i));
6609
6610 switch (dst_data_size_in_bits) {
6611 case kHRegSize: {
6612 SimFloat16 result = FixedToFloat16(value, fbits, round);
6613 dst.SetUint(vform, i, Float16ToRawbits(result));
6614 break;
6615 }
6616 case kSRegSize: {
6617 float result = FixedToFloat(value, fbits, round);
6618 dst.SetUint(vform, i, FloatToRawbits(result));
6619 break;
6620 }
6621 case kDRegSize: {
6622 double result = FixedToDouble(value, fbits, round);
6623 dst.SetUint(vform, i, DoubleToRawbits(result));
6624 break;
6625 }
6626 default:
6627 VIXL_UNIMPLEMENTED();
6628 break;
6629 }
6630 }
6631
6632 return dst;
6633}
6634
6635LogicVRegister Simulator::scvtf(VectorFormat vform,
Alexandre Ramesd3832962016-07-04 15:03:43 +01006636 LogicVRegister dst,
6637 const LogicVRegister& src,
6638 int fbits,
6639 FPRounding round) {
TatWai Chong31cd6a02020-01-10 13:03:26 -08006640 return scvtf(vform,
6641 LaneSizeInBitsFromFormat(vform),
6642 LaneSizeInBitsFromFormat(vform),
6643 dst,
6644 GetPTrue(),
6645 src,
6646 round,
6647 fbits);
6648}
6649
6650LogicVRegister Simulator::ucvtf(VectorFormat vform,
6651 unsigned dst_data_size_in_bits,
6652 unsigned src_data_size_in_bits,
6653 LogicVRegister dst,
6654 const LogicPRegister& pg,
6655 const LogicVRegister& src,
6656 FPRounding round,
6657 int fbits) {
6658 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6659 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6660
Alexandre Ramesd3832962016-07-04 15:03:43 +01006661 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
TatWai Chong31cd6a02020-01-10 13:03:26 -08006662 if (!pg.IsActive(vform, i)) continue;
6663
6664 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6665 0,
6666 src.Uint(vform, i));
6667
6668 switch (dst_data_size_in_bits) {
6669 case kHRegSize: {
6670 SimFloat16 result = UFixedToFloat16(value, fbits, round);
6671 dst.SetUint(vform, i, Float16ToRawbits(result));
6672 break;
6673 }
6674 case kSRegSize: {
6675 float result = UFixedToFloat(value, fbits, round);
6676 dst.SetUint(vform, i, FloatToRawbits(result));
6677 break;
6678 }
6679 case kDRegSize: {
6680 double result = UFixedToDouble(value, fbits, round);
6681 dst.SetUint(vform, i, DoubleToRawbits(result));
6682 break;
6683 }
6684 default:
6685 VIXL_UNIMPLEMENTED();
6686 break;
Alexandre Ramesd3832962016-07-04 15:03:43 +01006687 }
6688 }
TatWai Chong31cd6a02020-01-10 13:03:26 -08006689
Alexandre Ramesd3832962016-07-04 15:03:43 +01006690 return dst;
6691}
6692
Alexandre Ramesd3832962016-07-04 15:03:43 +01006693LogicVRegister Simulator::ucvtf(VectorFormat vform,
6694 LogicVRegister dst,
6695 const LogicVRegister& src,
6696 int fbits,
6697 FPRounding round) {
TatWai Chong31cd6a02020-01-10 13:03:26 -08006698 return ucvtf(vform,
6699 LaneSizeInBitsFromFormat(vform),
6700 LaneSizeInBitsFromFormat(vform),
6701 dst,
6702 GetPTrue(),
6703 src,
6704 round,
6705 fbits);
Alexandre Ramesd3832962016-07-04 15:03:43 +01006706}
6707
TatWai Chong4f28df72019-08-14 17:50:30 -07006708LogicVRegister Simulator::unpk(VectorFormat vform,
6709 LogicVRegister dst,
6710 const LogicVRegister& src,
6711 UnpackType unpack_type,
6712 ExtendType extend_type) {
6713 VectorFormat vform_half = VectorFormatHalfWidth(vform);
6714 const int lane_count = LaneCountFromFormat(vform);
6715 const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6716
6717 switch (extend_type) {
Martyn Capewell2e954292020-01-14 14:56:42 +00006718 case kSignedExtend: {
6719 int64_t result[kZRegMaxSizeInBytes];
TatWai Chong4f28df72019-08-14 17:50:30 -07006720 for (int i = 0; i < lane_count; ++i) {
Martyn Capewell2e954292020-01-14 14:56:42 +00006721 result[i] = src.Int(vform_half, i + src_start_lane);
6722 }
6723 for (int i = 0; i < lane_count; ++i) {
6724 dst.SetInt(vform, i, result[i]);
TatWai Chong4f28df72019-08-14 17:50:30 -07006725 }
6726 break;
Martyn Capewell2e954292020-01-14 14:56:42 +00006727 }
6728 case kUnsignedExtend: {
6729 uint64_t result[kZRegMaxSizeInBytes];
TatWai Chong4f28df72019-08-14 17:50:30 -07006730 for (int i = 0; i < lane_count; ++i) {
Martyn Capewell2e954292020-01-14 14:56:42 +00006731 result[i] = src.Uint(vform_half, i + src_start_lane);
6732 }
6733 for (int i = 0; i < lane_count; ++i) {
6734 dst.SetUint(vform, i, result[i]);
TatWai Chong4f28df72019-08-14 17:50:30 -07006735 }
6736 break;
Martyn Capewell2e954292020-01-14 14:56:42 +00006737 }
TatWai Chong4f28df72019-08-14 17:50:30 -07006738 default:
6739 VIXL_UNREACHABLE();
6740 }
6741 return dst;
6742}
6743
TatWai Chong302729c2019-06-14 16:18:51 -07006744LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
TatWai Chong96713fe2019-06-04 16:39:37 -07006745 VectorFormat vform,
6746 LogicPRegister dst,
6747 const LogicPRegister& mask,
6748 const LogicVRegister& src1,
6749 const LogicVRegister& src2,
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00006750 bool is_wide_elements,
6751 FlagsUpdate flags) {
TatWai Chong96713fe2019-06-04 16:39:37 -07006752 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
TatWai Chong302729c2019-06-14 16:18:51 -07006753 bool result = false;
TatWai Chong96713fe2019-06-04 16:39:37 -07006754 if (mask.IsActive(vform, lane)) {
6755 int64_t op1 = 0xbadbeef;
6756 int64_t op2 = 0xbadbeef;
6757 int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
TatWai Chong302729c2019-06-14 16:18:51 -07006758 switch (cond) {
TatWai Chong96713fe2019-06-04 16:39:37 -07006759 case eq:
6760 case ge:
6761 case gt:
6762 case lt:
6763 case le:
6764 case ne:
6765 op1 = src1.Int(vform, lane);
6766 op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
6767 : src2.Int(vform, lane);
6768 break;
6769 case hi:
6770 case hs:
6771 case ls:
6772 case lo:
6773 op1 = src1.Uint(vform, lane);
6774 op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
6775 : src2.Uint(vform, lane);
6776 break;
6777 default:
6778 VIXL_UNREACHABLE();
6779 }
6780
TatWai Chong302729c2019-06-14 16:18:51 -07006781 switch (cond) {
TatWai Chong96713fe2019-06-04 16:39:37 -07006782 case eq:
TatWai Chong302729c2019-06-14 16:18:51 -07006783 result = (op1 == op2);
TatWai Chong96713fe2019-06-04 16:39:37 -07006784 break;
6785 case ne:
TatWai Chong302729c2019-06-14 16:18:51 -07006786 result = (op1 != op2);
TatWai Chong96713fe2019-06-04 16:39:37 -07006787 break;
6788 case ge:
TatWai Chong302729c2019-06-14 16:18:51 -07006789 result = (op1 >= op2);
TatWai Chong96713fe2019-06-04 16:39:37 -07006790 break;
6791 case gt:
TatWai Chong302729c2019-06-14 16:18:51 -07006792 result = (op1 > op2);
TatWai Chong96713fe2019-06-04 16:39:37 -07006793 break;
6794 case le:
TatWai Chong302729c2019-06-14 16:18:51 -07006795 result = (op1 <= op2);
TatWai Chong96713fe2019-06-04 16:39:37 -07006796 break;
6797 case lt:
TatWai Chong302729c2019-06-14 16:18:51 -07006798 result = (op1 < op2);
TatWai Chong96713fe2019-06-04 16:39:37 -07006799 break;
6800 case hs:
TatWai Chong302729c2019-06-14 16:18:51 -07006801 result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
TatWai Chong96713fe2019-06-04 16:39:37 -07006802 break;
6803 case hi:
TatWai Chong302729c2019-06-14 16:18:51 -07006804 result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
TatWai Chong96713fe2019-06-04 16:39:37 -07006805 break;
6806 case ls:
TatWai Chong302729c2019-06-14 16:18:51 -07006807 result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
TatWai Chong96713fe2019-06-04 16:39:37 -07006808 break;
6809 case lo:
TatWai Chong302729c2019-06-14 16:18:51 -07006810 result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
TatWai Chong96713fe2019-06-04 16:39:37 -07006811 break;
6812 default:
6813 VIXL_UNREACHABLE();
6814 }
6815 }
TatWai Chong302729c2019-06-14 16:18:51 -07006816 dst.SetActive(vform, lane, result);
TatWai Chong96713fe2019-06-04 16:39:37 -07006817 }
6818
Martyn Capewell7fd6fd52019-12-06 14:50:15 +00006819 if (flags == SetFlags) PredTest(vform, mask, dst);
TatWai Chong96713fe2019-06-04 16:39:37 -07006820
6821 return dst;
6822}
6823
TatWai Chong29a0c432019-11-06 22:20:44 -08006824LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
6825 VectorFormat vform,
6826 LogicVRegister dst,
6827 const LogicVRegister& src1,
6828 const LogicVRegister& src2,
6829 bool is_wide_elements) {
6830 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
Martyn Capewell3bf2d162020-02-17 15:04:36 +00006831 VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
TatWai Chong29a0c432019-11-06 22:20:44 -08006832
Martyn Capewell3bf2d162020-02-17 15:04:36 +00006833 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6834 int shift_src_lane = lane;
6835 if (is_wide_elements) {
6836 // If the shift amount comes from wide elements, select the D-sized lane
6837 // which occupies the corresponding lanes of the value to be shifted.
6838 shift_src_lane = (lane * lane_size) / kDRegSize;
6839 }
6840 uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
6841
6842 // Saturate shift_amount to the size of the lane that will be shifted.
6843 if (shift_amount > lane_size) shift_amount = lane_size;
6844
6845 uint64_t value = src1.Uint(vform, lane);
6846 int64_t result = ShiftOperand(lane_size,
6847 value,
6848 shift_op,
6849 static_cast<unsigned>(shift_amount));
TatWai Chong29a0c432019-11-06 22:20:44 -08006850 dst.SetUint(vform, lane, result);
6851 }
6852
6853 return dst;
6854}
6855
Martyn Capewell83e86612020-02-19 15:46:15 +00006856LogicVRegister Simulator::asrd(VectorFormat vform,
6857 LogicVRegister dst,
6858 const LogicVRegister& src1,
6859 int shift) {
6860 VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
6861 LaneSizeInBitsFromFormat(vform)));
6862
6863 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6864 int64_t value = src1.Int(vform, i);
6865 if (shift <= 63) {
6866 if (value < 0) {
6867 // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
6868 // cast to int64_t, and cannot cause signed overflow in the result.
6869 value = value + GetUintMask(shift);
6870 }
6871 value = ShiftOperand(kDRegSize, value, ASR, shift);
6872 } else {
6873 value = 0;
6874 }
6875 dst.SetInt(vform, i, value);
6876 }
6877 return dst;
6878}
6879
TatWai Chongcfb94212019-05-16 13:30:09 -07006880LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
TatWai Chong13634762019-07-16 16:20:45 -07006881 LogicalOp logical_op,
6882 VectorFormat vform,
TatWai Chongcfb94212019-05-16 13:30:09 -07006883 LogicVRegister zd,
6884 const LogicVRegister& zn,
6885 const LogicVRegister& zm) {
TatWai Chong13634762019-07-16 16:20:45 -07006886 VIXL_ASSERT(IsSVEFormat(vform));
TatWai Chongcfb94212019-05-16 13:30:09 -07006887 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6888 uint64_t op1 = zn.Uint(vform, i);
6889 uint64_t op2 = zm.Uint(vform, i);
6890 uint64_t result;
TatWai Chong13634762019-07-16 16:20:45 -07006891 switch (logical_op) {
6892 case AND:
TatWai Chongcfb94212019-05-16 13:30:09 -07006893 result = op1 & op2;
6894 break;
TatWai Chong13634762019-07-16 16:20:45 -07006895 case BIC:
TatWai Chongcfb94212019-05-16 13:30:09 -07006896 result = op1 & ~op2;
6897 break;
TatWai Chong13634762019-07-16 16:20:45 -07006898 case EOR:
TatWai Chongcfb94212019-05-16 13:30:09 -07006899 result = op1 ^ op2;
6900 break;
TatWai Chong13634762019-07-16 16:20:45 -07006901 case ORR:
TatWai Chongcfb94212019-05-16 13:30:09 -07006902 result = op1 | op2;
6903 break;
6904 default:
6905 result = 0;
6906 VIXL_UNIMPLEMENTED();
6907 }
6908 zd.SetUint(vform, i, result);
6909 }
6910
6911 return zd;
6912}
6913
TatWai Chongf4fa8222019-06-17 12:08:14 -07006914LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
6915 LogicPRegister pd,
TatWai Chongf4fa8222019-06-17 12:08:14 -07006916 const LogicPRegister& pn,
TatWai Chonga3e8b172019-11-22 21:48:56 -08006917 const LogicPRegister& pm) {
6918 for (int i = 0; i < pn.GetChunkCount(); i++) {
TatWai Chongf4fa8222019-06-17 12:08:14 -07006919 LogicPRegister::ChunkType op1 = pn.GetChunk(i);
6920 LogicPRegister::ChunkType op2 = pm.GetChunk(i);
TatWai Chongf4fa8222019-06-17 12:08:14 -07006921 LogicPRegister::ChunkType result;
6922 switch (op) {
6923 case ANDS_p_p_pp_z:
6924 case AND_p_p_pp_z:
TatWai Chonga3e8b172019-11-22 21:48:56 -08006925 result = op1 & op2;
TatWai Chongf4fa8222019-06-17 12:08:14 -07006926 break;
6927 case BICS_p_p_pp_z:
6928 case BIC_p_p_pp_z:
TatWai Chonga3e8b172019-11-22 21:48:56 -08006929 result = op1 & ~op2;
TatWai Chongf4fa8222019-06-17 12:08:14 -07006930 break;
6931 case EORS_p_p_pp_z:
6932 case EOR_p_p_pp_z:
TatWai Chonga3e8b172019-11-22 21:48:56 -08006933 result = op1 ^ op2;
TatWai Chongf4fa8222019-06-17 12:08:14 -07006934 break;
6935 case NANDS_p_p_pp_z:
6936 case NAND_p_p_pp_z:
TatWai Chonga3e8b172019-11-22 21:48:56 -08006937 result = ~(op1 & op2);
TatWai Chongf4fa8222019-06-17 12:08:14 -07006938 break;
6939 case NORS_p_p_pp_z:
6940 case NOR_p_p_pp_z:
TatWai Chonga3e8b172019-11-22 21:48:56 -08006941 result = ~(op1 | op2);
TatWai Chongf4fa8222019-06-17 12:08:14 -07006942 break;
6943 case ORNS_p_p_pp_z:
6944 case ORN_p_p_pp_z:
TatWai Chonga3e8b172019-11-22 21:48:56 -08006945 result = op1 | ~op2;
TatWai Chongf4fa8222019-06-17 12:08:14 -07006946 break;
6947 case ORRS_p_p_pp_z:
6948 case ORR_p_p_pp_z:
TatWai Chonga3e8b172019-11-22 21:48:56 -08006949 result = op1 | op2;
TatWai Chongf4fa8222019-06-17 12:08:14 -07006950 break;
6951 default:
6952 result = 0;
6953 VIXL_UNIMPLEMENTED();
6954 }
6955 pd.SetChunk(i, result);
6956 }
TatWai Chongf4fa8222019-06-17 12:08:14 -07006957 return pd;
6958}
Alexandre Ramesd3832962016-07-04 15:03:43 +01006959
Martyn Capewelld255bdb2019-08-13 16:27:30 +01006960LogicVRegister Simulator::SVEBitwiseImmHelper(
6961 SVEBitwiseLogicalWithImm_UnpredicatedOp op,
6962 VectorFormat vform,
6963 LogicVRegister zd,
6964 uint64_t imm) {
TatWai Chonga1885a52019-04-15 17:19:14 -07006965 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6966 uint64_t op1 = zd.Uint(vform, i);
6967 uint64_t result;
6968 switch (op) {
6969 case AND_z_zi:
6970 result = op1 & imm;
6971 break;
TatWai Chonga1885a52019-04-15 17:19:14 -07006972 case EOR_z_zi:
6973 result = op1 ^ imm;
6974 break;
6975 case ORR_z_zi:
6976 result = op1 | imm;
6977 break;
6978 default:
6979 result = 0;
6980 VIXL_UNIMPLEMENTED();
6981 }
6982 zd.SetUint(vform, i, result);
6983 }
6984
6985 return zd;
6986}
6987
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006988void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
Jacob Bramleye668b202019-08-14 17:57:34 +01006989 const LogicPRegister& pg,
6990 unsigned zt_code,
Jacob Bramleye668b202019-08-14 17:57:34 +01006991 const LogicSVEAddressVector& addr) {
6992 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
6993
6994 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00006995 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
6996 int msize_in_bytes = addr.GetMsizeInBytes();
6997 int reg_count = addr.GetRegCount();
Jacob Bramleye668b202019-08-14 17:57:34 +01006998
6999 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7000 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7001
7002 unsigned zt_codes[4] = {zt_code,
7003 (zt_code + 1) % kNumberOfZRegisters,
7004 (zt_code + 2) % kNumberOfZRegisters,
7005 (zt_code + 3) % kNumberOfZRegisters};
7006
7007 LogicVRegister zt[4] = {
7008 ReadVRegister(zt_codes[0]),
7009 ReadVRegister(zt_codes[1]),
7010 ReadVRegister(zt_codes[2]),
7011 ReadVRegister(zt_codes[3]),
7012 };
7013
7014 // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7015 // are ignored, so read the source register using the VectorFormat that
7016 // corresponds with the storage format, and multiply the index accordingly.
7017 VectorFormat unpack_vform =
7018 SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7019 int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7020
7021 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7022 if (!pg.IsActive(vform, i)) continue;
7023
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007024 for (int r = 0; r < reg_count; r++) {
7025 uint64_t element_address = addr.GetElementAddress(i, r);
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +00007026 StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
Jacob Bramleye668b202019-08-14 17:57:34 +01007027 }
7028 }
7029
Jacob Bramley7eb3e212019-11-22 17:28:05 +00007030 if (ShouldTraceWrites()) {
7031 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7032 if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7033 // Use an FP format where it's likely that we're accessing FP data.
7034 format = GetPrintRegisterFormatTryFP(format);
7035 }
7036 // Stores don't represent a change to the source register's value, so only
7037 // print the relevant part of the value.
7038 format = GetPrintRegPartial(format);
7039
7040 PrintZStructAccess(zt_code,
7041 reg_count,
7042 pg,
7043 format,
7044 msize_in_bytes,
7045 "->",
7046 addr);
Jacob Bramleye668b202019-08-14 17:57:34 +01007047 }
7048}
7049
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007050void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
TatWai Chong6205eb42019-09-24 10:07:20 +01007051 const LogicPRegister& pg,
7052 unsigned zt_code,
TatWai Chong6205eb42019-09-24 10:07:20 +01007053 const LogicSVEAddressVector& addr,
7054 bool is_signed) {
Jacob Bramley7eb3e212019-11-22 17:28:05 +00007055 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007056 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7057 int msize_in_bytes = addr.GetMsizeInBytes();
7058 int reg_count = addr.GetRegCount();
7059
TatWai Chong6205eb42019-09-24 10:07:20 +01007060 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
Jacob Bramley7eb3e212019-11-22 17:28:05 +00007061 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
TatWai Chong6205eb42019-09-24 10:07:20 +01007062 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7063
TatWai Chong6205eb42019-09-24 10:07:20 +01007064 unsigned zt_codes[4] = {zt_code,
7065 (zt_code + 1) % kNumberOfZRegisters,
7066 (zt_code + 2) % kNumberOfZRegisters,
7067 (zt_code + 3) % kNumberOfZRegisters};
TatWai Chong6205eb42019-09-24 10:07:20 +01007068 LogicVRegister zt[4] = {
7069 ReadVRegister(zt_codes[0]),
7070 ReadVRegister(zt_codes[1]),
7071 ReadVRegister(zt_codes[2]),
7072 ReadVRegister(zt_codes[3]),
7073 };
7074
TatWai Chong6205eb42019-09-24 10:07:20 +01007075 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramleybc4a54f2019-11-04 16:44:01 +00007076 for (int r = 0; r < reg_count; r++) {
7077 uint64_t element_address = addr.GetElementAddress(i, r);
TatWai Chong6205eb42019-09-24 10:07:20 +01007078
7079 if (!pg.IsActive(vform, i)) {
7080 zt[r].SetUint(vform, i, 0);
7081 continue;
7082 }
7083
7084 if (is_signed) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +00007085 LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
TatWai Chong6205eb42019-09-24 10:07:20 +01007086 } else {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +00007087 LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
TatWai Chong6205eb42019-09-24 10:07:20 +01007088 }
7089 }
7090 }
7091
Jacob Bramley7eb3e212019-11-22 17:28:05 +00007092 if (ShouldTraceVRegs()) {
7093 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7094 if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7095 // Use an FP format where it's likely that we're accessing FP data.
7096 format = GetPrintRegisterFormatTryFP(format);
7097 }
7098 PrintZStructAccess(zt_code,
7099 reg_count,
7100 pg,
7101 format,
7102 msize_in_bytes,
7103 "<-",
7104 addr);
TatWai Chong6205eb42019-09-24 10:07:20 +01007105 }
7106}
7107
TatWai Chong5d872292020-01-02 15:39:51 -08007108LogicPRegister Simulator::brka(LogicPRegister pd,
7109 const LogicPRegister& pg,
7110 const LogicPRegister& pn) {
7111 bool break_ = false;
7112 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7113 if (pg.IsActive(kFormatVnB, i)) {
7114 pd.SetActive(kFormatVnB, i, !break_);
7115 break_ |= pn.IsActive(kFormatVnB, i);
7116 }
7117 }
7118
7119 return pd;
7120}
7121
7122LogicPRegister Simulator::brkb(LogicPRegister pd,
7123 const LogicPRegister& pg,
7124 const LogicPRegister& pn) {
7125 bool break_ = false;
7126 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7127 if (pg.IsActive(kFormatVnB, i)) {
7128 break_ |= pn.IsActive(kFormatVnB, i);
7129 pd.SetActive(kFormatVnB, i, !break_);
7130 }
7131 }
7132
7133 return pd;
7134}
7135
7136LogicPRegister Simulator::brkn(LogicPRegister pdm,
7137 const LogicPRegister& pg,
7138 const LogicPRegister& pn) {
7139 if (!IsLastActive(kFormatVnB, pg, pn)) {
7140 pfalse(pdm);
7141 }
7142 return pdm;
7143}
7144
TatWai Chong38303d92019-12-02 15:49:29 -08007145LogicPRegister Simulator::brkpa(LogicPRegister pd,
7146 const LogicPRegister& pg,
7147 const LogicPRegister& pn,
7148 const LogicPRegister& pm) {
7149 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7150
7151 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7152 bool active = false;
7153 if (pg.IsActive(kFormatVnB, i)) {
7154 active = last_active;
7155 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7156 }
7157 pd.SetActive(kFormatVnB, i, active);
7158 }
7159
7160 return pd;
7161}
7162
7163LogicPRegister Simulator::brkpb(LogicPRegister pd,
7164 const LogicPRegister& pg,
7165 const LogicPRegister& pn,
7166 const LogicPRegister& pm) {
7167 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7168
7169 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7170 bool active = false;
7171 if (pg.IsActive(kFormatVnB, i)) {
7172 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7173 active = last_active;
7174 }
7175 pd.SetActive(kFormatVnB, i, active);
7176 }
7177
7178 return pd;
7179}
7180
Jacob Bramley85a9c102019-12-09 17:48:29 +00007181void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7182 const LogicPRegister& pg,
7183 unsigned zt_code,
7184 const LogicSVEAddressVector& addr,
7185 SVEFaultTolerantLoadType type,
7186 bool is_signed) {
7187 int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7188 int msize_in_bits = addr.GetMsizeInBits();
7189 int msize_in_bytes = addr.GetMsizeInBytes();
7190
7191 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7192 VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7193 VIXL_ASSERT(addr.GetRegCount() == 1);
7194
7195 LogicVRegister zt = ReadVRegister(zt_code);
7196 LogicPRegister ffr = ReadFFR();
7197
7198 // Non-faulting loads are allowed to fail arbitrarily. To stress user
7199 // code, fail a random element in roughly one in eight full-vector loads.
7200 uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7201 int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7202
7203 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7204 uint64_t value = 0;
7205
7206 if (pg.IsActive(vform, i)) {
7207 uint64_t element_address = addr.GetElementAddress(i, 0);
7208
7209 if (type == kSVEFirstFaultLoad) {
7210 // First-faulting loads always load the first active element, regardless
7211 // of FFR. The result will be discarded if its FFR lane is inactive, but
7212 // it could still generate a fault.
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +00007213 value = MemReadUint(msize_in_bytes, element_address);
Jacob Bramley85a9c102019-12-09 17:48:29 +00007214 // All subsequent elements have non-fault semantics.
7215 type = kSVENonFaultLoad;
7216
7217 } else if (ffr.IsActive(vform, i)) {
7218 // Simulation of fault-tolerant loads relies on system calls, and is
7219 // likely to be relatively slow, so we only actually perform the load if
7220 // its FFR lane is active.
7221
7222 bool can_read = (i < fake_fault_at_lane) &&
7223 CanReadMemory(element_address, msize_in_bytes);
7224 if (can_read) {
Jacob Bramleyc4ef66e2020-10-30 18:25:43 +00007225 value = MemReadUint(msize_in_bytes, element_address);
Jacob Bramley85a9c102019-12-09 17:48:29 +00007226 } else {
7227 // Propagate the fault to the end of FFR.
7228 for (int j = i; j < LaneCountFromFormat(vform); j++) {
7229 ffr.SetActive(vform, j, false);
7230 }
7231 }
7232 }
7233 }
7234
7235 // The architecture permits a few possible results for inactive FFR lanes
7236 // (including those caused by a fault in this instruction). We choose to
7237 // leave the register value unchanged (like merging predication) because
7238 // no other input to this instruction can have the same behaviour.
7239 //
7240 // Note that this behaviour takes precedence over pg's zeroing predication.
7241
7242 if (ffr.IsActive(vform, i)) {
7243 int msb = msize_in_bits - 1;
7244 if (is_signed) {
7245 zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7246 } else {
7247 zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7248 }
7249 }
7250 }
7251
7252 if (ShouldTraceVRegs()) {
7253 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7254 if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7255 // Use an FP format where it's likely that we're accessing FP data.
7256 format = GetPrintRegisterFormatTryFP(format);
7257 }
7258 // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7259 // expects a single mask, so combine the two predicates.
7260 SimPRegister mask;
7261 SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7262 PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7263 }
7264}
7265
TatWai Chong113d9192020-05-19 01:02:36 -07007266void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
TatWai Chongcd3f6c52020-06-14 00:42:39 -07007267 VectorFormat vform,
7268 SVEOffsetModifier mod) {
TatWai Chong113d9192020-05-19 01:02:36 -07007269 bool is_signed = instr->ExtractBit(14) == 0;
7270 bool is_ff = instr->ExtractBit(13) == 1;
7271 // Note that these instructions don't use the Dtype encoding.
7272 int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7273 int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7274 uint64_t base = ReadXRegister(instr->GetRn());
TatWai Chong113d9192020-05-19 01:02:36 -07007275 LogicSVEAddressVector addr(base,
7276 &ReadVRegister(instr->GetRm()),
7277 vform,
7278 mod,
7279 scale);
7280 addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7281 if (is_ff) {
7282 SVEFaultTolerantLoadHelper(vform,
7283 ReadPRegister(instr->GetPgLow8()),
7284 instr->GetRt(),
7285 addr,
7286 kSVEFirstFaultLoad,
7287 is_signed);
7288 } else {
7289 SVEStructuredLoadHelper(vform,
7290 ReadPRegister(instr->GetPgLow8()),
7291 instr->GetRt(),
7292 addr,
7293 is_signed);
7294 }
7295}
7296
Jacob Bramley0ce75842019-07-17 18:12:50 +01007297int Simulator::GetFirstActive(VectorFormat vform,
7298 const LogicPRegister& pg) const {
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01007299 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
Jacob Bramley0ce75842019-07-17 18:12:50 +01007300 if (pg.IsActive(vform, i)) return i;
7301 }
7302 return -1;
7303}
7304
7305int Simulator::GetLastActive(VectorFormat vform,
7306 const LogicPRegister& pg) const {
7307 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7308 if (pg.IsActive(vform, i)) return i;
7309 }
7310 return -1;
7311}
7312
7313int Simulator::CountActiveLanes(VectorFormat vform,
7314 const LogicPRegister& pg) const {
7315 int count = 0;
7316 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7317 count += pg.IsActive(vform, i) ? 1 : 0;
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01007318 }
7319 return count;
7320}
7321
Jacob Bramleyd961a0c2019-07-17 10:53:45 +01007322int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7323 const LogicPRegister& pg,
7324 const LogicPRegister& pn) const {
7325 int count = 0;
7326 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7327 count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7328 }
7329 return count;
7330}
7331
Jacob Bramley0ce75842019-07-17 18:12:50 +01007332int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7333 int pattern) const {
7334 VIXL_ASSERT(IsSVEFormat(vform));
7335 int all = LaneCountFromFormat(vform);
7336 VIXL_ASSERT(all > 0);
7337
7338 switch (pattern) {
7339 case SVE_VL1:
7340 case SVE_VL2:
7341 case SVE_VL3:
7342 case SVE_VL4:
7343 case SVE_VL5:
7344 case SVE_VL6:
7345 case SVE_VL7:
7346 case SVE_VL8:
7347 // VL1-VL8 are encoded directly.
7348 VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7349 VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7350 return (pattern <= all) ? pattern : 0;
7351 case SVE_VL16:
7352 case SVE_VL32:
7353 case SVE_VL64:
7354 case SVE_VL128:
7355 case SVE_VL256: {
7356 // VL16-VL256 are encoded as log2(N) + c.
7357 int min = 16 << (pattern - SVE_VL16);
7358 return (min <= all) ? min : 0;
7359 }
7360 // Special cases.
7361 case SVE_POW2:
7362 return 1 << HighestSetBitPosition(all);
7363 case SVE_MUL4:
7364 return all - (all % 4);
7365 case SVE_MUL3:
7366 return all - (all % 3);
7367 case SVE_ALL:
7368 return all;
7369 }
7370 // Unnamed cases archicturally return 0.
7371 return 0;
7372}
7373
Jacob Bramleydcdbd752020-01-20 11:47:36 +00007374uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7375 if (IsContiguous()) {
7376 return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7377 }
7378
7379 VIXL_ASSERT(IsScatterGather());
7380 VIXL_ASSERT(vector_ != NULL);
7381
7382 // For scatter-gather accesses, we need to extract the offset from vector_,
7383 // and apply modifiers.
7384
7385 uint64_t offset = 0;
7386 switch (vector_form_) {
7387 case kFormatVnS:
7388 offset = vector_->GetLane<uint32_t>(lane);
7389 break;
7390 case kFormatVnD:
7391 offset = vector_->GetLane<uint64_t>(lane);
7392 break;
7393 default:
7394 VIXL_UNIMPLEMENTED();
7395 break;
7396 }
7397
7398 switch (vector_mod_) {
7399 case SVE_MUL_VL:
7400 VIXL_UNIMPLEMENTED();
7401 break;
7402 case SVE_LSL:
7403 // We apply the shift below. There's nothing to do here.
7404 break;
7405 case NO_SVE_OFFSET_MODIFIER:
7406 VIXL_ASSERT(vector_shift_ == 0);
7407 break;
7408 case SVE_UXTW:
7409 offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7410 break;
7411 case SVE_SXTW:
7412 offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7413 break;
7414 }
7415
7416 return base_ + (offset << vector_shift_);
7417}
7418
Jacob Bramleyd1686cb2019-05-28 17:39:05 +01007419
Alexandre Ramesd3832962016-07-04 15:03:43 +01007420} // namespace aarch64
7421} // namespace vixl
7422
Pierre Langlois1e85b7f2016-08-05 14:20:36 +01007423#endif // VIXL_INCLUDE_SIMULATOR_AARCH64