aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2022-06-09 22:08:27 -0700
committerRichard Henderson <richard.henderson@linaro.org>2022-06-09 22:08:27 -0700
commitb3cd3b5a66f0dddfe3d5ba2bef13cd4f5b89cde9 (patch)
treef1f508cc436cab122b0d832653b4a27245114597
parent9cc1bf1ebca550f8d90f967ccd2b6d2e00e81387 (diff)
parent07314158f6aa4d2589520c194a7531b9364a8d54 (diff)
Merge tag 'pull-riscv-to-apply-20220610' of github.com:alistair23/qemu into staging
Fourth RISC-V PR for QEMU 7.1 * Update MAINTAINERS * Add support for Zmmul extension * Fixup FDT errors when supplying device tree from the command line for virt machine * Avoid overflowing the addr_config buffer in the SiFive PLIC * Support -device loader addresses above 2GB * Correctly wake from WFI on VS-level external interrupts * Fixes for RV128 support * Support Vector extension tail agnostic setting elements' bits to all 1s * Don't expose the CPU properties on named CPUs * Fix vector extension assert for RV32 # -----BEGIN PGP SIGNATURE----- # # iQEzBAABCAAdFiEE9sSsRtSTSGjTuM6PIeENKd+XcFQFAmKix74ACgkQIeENKd+X # cFTKdgf8DP85Mf91+m1Dd1zji6d4JiFa+i7wer5T6la7qQAiIbyyq6kax0K31YYF # QuX3x7i9erF8Z/kox3MlYjjytPS0iJK9+Fica1ttslBJLv/o2K7SAaLmUwS65AB5 # rHjRCWDdeA3zPv7tcHEIpYZNFb163N2ZYqhmTTmL6Q0KTaa73OWKuJIbJzB8iT85 # LH1cUTfCEWNzsG0PLAD4Xh4ug4Hq6sW54NXXMDZiDSVak/FdNSEzuUMUsNW12XA1 # ib1uhfygHGYfSXFUgYmCiHK7iEey7A9IZtGKdNIXObx1/QVOrvyW+E90XRQqEHHC # XeOkdTUB2YfPsC0Qs4VVqsVEQVjUCw== # =gz3H # -----END PGP SIGNATURE----- # gpg: Signature made Thu 09 Jun 2022 09:25:34 PM PDT # gpg: using RSA key F6C4AC46D4934868D3B8CE8F21E10D29DF977054 # gpg: Good signature from "Alistair Francis <alistair@alistair23.me>" [undefined] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: F6C4 AC46 D493 4868 D3B8 CE8F 21E1 0D29 DF97 7054 * tag 'pull-riscv-to-apply-20220610' of github.com:alistair23/qemu: (25 commits) target/riscv: trans_rvv: Avoid assert for RV32 and e64 target/riscv: Don't expose the CPU properties on names CPUs target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail agnostic behavior target/riscv: rvv: Add tail agnostic for vector permutation instructions target/riscv: rvv: Add tail agnostic for vector mask instructions target/riscv: rvv: Add tail agnostic for vector reduction instructions target/riscv: rvv: Add tail agnostic for vector floating-point instructions target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions target/riscv: rvv: Add tail agnostic for vector integer comparison instructions target/riscv: rvv: Add tail agnostic for vector integer shift instructions target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions target/riscv: rvv: Add tail agnostic for vector load / store instructions target/riscv: rvv: Add tail agnostic for vv instructions target/riscv: rvv: Early exit when vstart >= vl target/riscv: rvv: Rename ambiguous esz target/riscv: rvv: Prune redundant access_type parameter passed target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed target/riscv/debug.c: keep experimental rv128 support working target/riscv: Wake on VS-level external interrupts ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--MAINTAINERS1
-rw-r--r--hw/arm/armv7m.c2
-rw-r--r--hw/arm/boot.c8
-rw-r--r--hw/core/generic-loader.c2
-rw-r--r--hw/core/loader.c81
-rw-r--r--hw/i386/x86.c2
-rw-r--r--hw/intc/sifive_plic.c19
-rw-r--r--hw/riscv/boot.c5
-rw-r--r--hw/riscv/virt.c28
-rw-r--r--include/hw/loader.h55
-rw-r--r--target/riscv/cpu.c68
-rw-r--r--target/riscv/cpu.h4
-rw-r--r--target/riscv/cpu_helper.c4
-rw-r--r--target/riscv/debug.c2
-rw-r--r--target/riscv/insn_trans/trans_rvm.c.inc18
-rw-r--r--target/riscv/insn_trans/trans_rvv.c.inc106
-rw-r--r--target/riscv/internals.h6
-rw-r--r--target/riscv/translate.c4
-rw-r--r--target/riscv/vector_helper.c1588
19 files changed, 1244 insertions, 759 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 5580a36b68..b3af081c51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2198,6 +2198,7 @@ Generic Loader
M: Alistair Francis <alistair@alistair23.me>
S: Maintained
F: hw/core/generic-loader.c
+F: hw/core/uboot_image.h
F: include/hw/core/generic-loader.h
F: docs/system/generic-loader.rst
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 32349ec94b..990861ee5e 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -570,7 +570,7 @@ static void armv7m_reset(void *opaque)
void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
{
- int image_size;
+ ssize_t image_size;
uint64_t entry;
int big_endian;
AddressSpace *as;
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index a8de33fd64..ada2717f76 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -881,7 +881,7 @@ static int do_arm_linux_init(Object *obj, void *opaque)
return 0;
}
-static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
+static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
uint64_t *lowaddr, uint64_t *highaddr,
int elf_machine, AddressSpace *as)
{
@@ -892,7 +892,7 @@ static int64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
} elf_header;
int data_swab = 0;
bool big_endian;
- int64_t ret = -1;
+ ssize_t ret = -1;
Error *err = NULL;
@@ -1014,7 +1014,7 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
/* Set up for a direct boot of a kernel image file. */
CPUState *cs;
AddressSpace *as = arm_boot_address_space(cpu, info);
- int kernel_size;
+ ssize_t kernel_size;
int initrd_size;
int is_linux = 0;
uint64_t elf_entry;
@@ -1093,7 +1093,7 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
if (kernel_size > info->ram_size) {
error_report("kernel '%s' is too large to fit in RAM "
- "(kernel size %d, RAM size %" PRId64 ")",
+ "(kernel size %zd, RAM size %" PRId64 ")",
info->kernel_filename, kernel_size, info->ram_size);
exit(1);
}
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index c666545aa0..4f4d77908d 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -67,7 +67,7 @@ static void generic_loader_realize(DeviceState *dev, Error **errp)
GenericLoaderState *s = GENERIC_LOADER(dev);
hwaddr entry;
int big_endian;
- int size = 0;
+ ssize_t size = 0;
s->set_pc = false;
diff --git a/hw/core/loader.c b/hw/core/loader.c
index edde657ac3..0548830733 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -114,17 +114,17 @@ ssize_t read_targphys(const char *name,
return did;
}
-int load_image_targphys(const char *filename,
- hwaddr addr, uint64_t max_sz)
+ssize_t load_image_targphys(const char *filename,
+ hwaddr addr, uint64_t max_sz)
{
return load_image_targphys_as(filename, addr, max_sz, NULL);
}
/* return the size or -1 if error */
-int load_image_targphys_as(const char *filename,
- hwaddr addr, uint64_t max_sz, AddressSpace *as)
+ssize_t load_image_targphys_as(const char *filename,
+ hwaddr addr, uint64_t max_sz, AddressSpace *as)
{
- int size;
+ ssize_t size;
size = get_image_size(filename);
if (size < 0 || size > max_sz) {
@@ -138,9 +138,9 @@ int load_image_targphys_as(const char *filename,
return size;
}
-int load_image_mr(const char *filename, MemoryRegion *mr)
+ssize_t load_image_mr(const char *filename, MemoryRegion *mr)
{
- int size;
+ ssize_t size;
if (!memory_access_is_direct(mr, false)) {
/* Can only load an image into RAM or ROM */
@@ -222,8 +222,8 @@ static void bswap_ahdr(struct exec *e)
: (_N_SEGMENT_ROUND (_N_TXTENDADDR(x, target_page_size), target_page_size)))
-int load_aout(const char *filename, hwaddr addr, int max_sz,
- int bswap_needed, hwaddr target_page_size)
+ssize_t load_aout(const char *filename, hwaddr addr, int max_sz,
+ int bswap_needed, hwaddr target_page_size)
{
int fd;
ssize_t size, ret;
@@ -617,13 +617,14 @@ toosmall:
}
/* Load a U-Boot image. */
-static int load_uboot_image(const char *filename, hwaddr *ep, hwaddr *loadaddr,
- int *is_linux, uint8_t image_type,
- uint64_t (*translate_fn)(void *, uint64_t),
- void *translate_opaque, AddressSpace *as)
+static ssize_t load_uboot_image(const char *filename, hwaddr *ep,
+ hwaddr *loadaddr, int *is_linux,
+ uint8_t image_type,
+ uint64_t (*translate_fn)(void *, uint64_t),
+ void *translate_opaque, AddressSpace *as)
{
int fd;
- int size;
+ ssize_t size;
hwaddr address;
uboot_image_header_t h;
uboot_image_header_t *hdr = &h;
@@ -760,40 +761,40 @@ out:
return ret;
}
-int load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr,
- int *is_linux,
- uint64_t (*translate_fn)(void *, uint64_t),
- void *translate_opaque)
+ssize_t load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr,
+ int *is_linux,
+ uint64_t (*translate_fn)(void *, uint64_t),
+ void *translate_opaque)
{
return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL,
translate_fn, translate_opaque, NULL);
}
-int load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr,
- int *is_linux,
- uint64_t (*translate_fn)(void *, uint64_t),
- void *translate_opaque, AddressSpace *as)
+ssize_t load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr,
+ int *is_linux,
+ uint64_t (*translate_fn)(void *, uint64_t),
+ void *translate_opaque, AddressSpace *as)
{
return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL,
translate_fn, translate_opaque, as);
}
/* Load a ramdisk. */
-int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz)
+ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz)
{
return load_ramdisk_as(filename, addr, max_sz, NULL);
}
-int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
- AddressSpace *as)
+ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
+ AddressSpace *as)
{
return load_uboot_image(filename, NULL, &addr, NULL, IH_TYPE_RAMDISK,
NULL, NULL, as);
}
/* Load a gzip-compressed kernel to a dynamically allocated buffer. */
-int load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
- uint8_t **buffer)
+ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
+ uint8_t **buffer)
{
uint8_t *compressed_data = NULL;
uint8_t *data = NULL;
@@ -838,9 +839,9 @@ int load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
}
/* Load a gzip-compressed kernel. */
-int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz)
+ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz)
{
- int bytes;
+ ssize_t bytes;
uint8_t *data;
bytes = load_image_gzipped_buffer(filename, max_sz, &data);
@@ -970,14 +971,15 @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name, bool ro)
return data;
}
-int rom_add_file(const char *file, const char *fw_dir,
- hwaddr addr, int32_t bootindex,
- bool option_rom, MemoryRegion *mr,
- AddressSpace *as)
+ssize_t rom_add_file(const char *file, const char *fw_dir,
+ hwaddr addr, int32_t bootindex,
+ bool option_rom, MemoryRegion *mr,
+ AddressSpace *as)
{
MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
Rom *rom;
- int rc, fd = -1;
+ ssize_t rc;
+ int fd = -1;
char devpath[100];
if (as && mr) {
@@ -1019,7 +1021,7 @@ int rom_add_file(const char *file, const char *fw_dir,
lseek(fd, 0, SEEK_SET);
rc = read(fd, rom->data, rom->datasize);
if (rc != rom->datasize) {
- fprintf(stderr, "rom: file %-20s: read error: rc=%d (expected %zd)\n",
+ fprintf(stderr, "rom: file %-20s: read error: rc=%zd (expected %zd)\n",
rom->name, rc, rom->datasize);
goto err;
}
@@ -1138,12 +1140,12 @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data,
return 0;
}
-int rom_add_vga(const char *file)
+ssize_t rom_add_vga(const char *file)
{
return rom_add_file(file, "vgaroms", 0, -1, true, NULL, NULL);
}
-int rom_add_option(const char *file, int32_t bootindex)
+ssize_t rom_add_option(const char *file, int32_t bootindex)
{
return rom_add_file(file, "genroms", 0, bootindex, true, NULL, NULL);
}
@@ -1846,11 +1848,12 @@ out:
}
/* return size or -1 if error */
-int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as)
+ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry,
+ AddressSpace *as)
{
gsize hex_blob_size;
gchar *hex_blob;
- int total_size = 0;
+ ssize_t total_size = 0;
if (!g_file_get_contents(filename, &hex_blob, &hex_blob_size, NULL)) {
return -1;
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 78b05ab7a2..6003b4b2df 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1115,7 +1115,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
char *filename;
MemoryRegion *bios, *isa_bios;
int bios_size, isa_bios_size;
- int ret;
+ ssize_t ret;
/* BIOS load */
bios_name = ms->firmware ?: default_firmware;
diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c
index eebbcf33d4..56d60e9ac9 100644
--- a/hw/intc/sifive_plic.c
+++ b/hw/intc/sifive_plic.c
@@ -431,7 +431,7 @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
uint32_t context_stride, uint32_t aperture_size)
{
DeviceState *dev = qdev_new(TYPE_SIFIVE_PLIC);
- int i, j = 0;
+ int i;
SiFivePLICState *plic;
assert(enable_stride == (enable_stride & -enable_stride));
@@ -451,18 +451,17 @@ DeviceState *sifive_plic_create(hwaddr addr, char *hart_config,
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
plic = SIFIVE_PLIC(dev);
- for (i = 0; i < num_harts; i++) {
- CPUState *cpu = qemu_get_cpu(hartid_base + i);
- if (plic->addr_config[j].mode == PLICMode_M) {
- j++;
- qdev_connect_gpio_out(dev, num_harts + i,
+ for (i = 0; i < plic->num_addrs; i++) {
+ int cpu_num = plic->addr_config[i].hartid;
+ CPUState *cpu = qemu_get_cpu(hartid_base + cpu_num);
+
+ if (plic->addr_config[i].mode == PLICMode_M) {
+ qdev_connect_gpio_out(dev, num_harts + cpu_num,
qdev_get_gpio_in(DEVICE(cpu), IRQ_M_EXT));
}
-
- if (plic->addr_config[j].mode == PLICMode_S) {
- j++;
- qdev_connect_gpio_out(dev, i,
+ if (plic->addr_config[i].mode == PLICMode_S) {
+ qdev_connect_gpio_out(dev, cpu_num,
qdev_get_gpio_in(DEVICE(cpu), IRQ_S_EXT));
}
}
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 57a41df8e9..2d80f40b31 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -129,7 +129,8 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
hwaddr firmware_load_addr,
symbol_fn_t sym_cb)
{
- uint64_t firmware_entry, firmware_size, firmware_end;
+ uint64_t firmware_entry, firmware_end;
+ ssize_t firmware_size;
if (load_elf_ram_sym(firmware_filename, NULL, NULL, NULL,
&firmware_entry, NULL, &firmware_end, NULL,
@@ -185,7 +186,7 @@ target_ulong riscv_load_kernel(const char *kernel_filename,
hwaddr riscv_load_initrd(const char *filename, uint64_t mem_size,
uint64_t kernel_entry, hwaddr *start)
{
- int size;
+ ssize_t size;
/*
* We want to put the initrd far enough into RAM that when the
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 293e9c95b7..bc424dd2f5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -975,6 +975,23 @@ static void create_fdt_flash(RISCVVirtState *s, const MemMapEntry *memmap)
g_free(name);
}
+static void create_fdt_fw_cfg(RISCVVirtState *s, const MemMapEntry *memmap)
+{
+ char *nodename;
+ MachineState *mc = MACHINE(s);
+ hwaddr base = memmap[VIRT_FW_CFG].base;
+ hwaddr size = memmap[VIRT_FW_CFG].size;
+
+ nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
+ qemu_fdt_add_subnode(mc->fdt, nodename);
+ qemu_fdt_setprop_string(mc->fdt, nodename,
+ "compatible", "qemu,fw-cfg-mmio");
+ qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg",
+ 2, base, 2, size);
+ qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0);
+ g_free(nodename);
+}
+
static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
uint64_t mem_size, const char *cmdline, bool is_32_bit)
{
@@ -1023,6 +1040,7 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap,
create_fdt_rtc(s, memmap, irq_mmio_phandle);
create_fdt_flash(s, memmap);
+ create_fdt_fw_cfg(s, memmap);
update_bootargs:
if (cmdline && *cmdline) {
@@ -1082,22 +1100,12 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
static FWCfgState *create_fw_cfg(const MachineState *mc)
{
hwaddr base = virt_memmap[VIRT_FW_CFG].base;
- hwaddr size = virt_memmap[VIRT_FW_CFG].size;
FWCfgState *fw_cfg;
- char *nodename;
fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16,
&address_space_memory);
fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)mc->smp.cpus);
- nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
- qemu_fdt_add_subnode(mc->fdt, nodename);
- qemu_fdt_setprop_string(mc->fdt, nodename,
- "compatible", "qemu,fw-cfg-mmio");
- qemu_fdt_setprop_sized_cells(mc->fdt, nodename, "reg",
- 2, base, 2, size);
- qemu_fdt_setprop(mc->fdt, nodename, "dma-coherent", NULL, 0);
- g_free(nodename);
return fw_cfg;
}
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 5572108ba5..70248e0da7 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -40,8 +40,8 @@ ssize_t load_image_size(const char *filename, void *addr, size_t size);
*
* Returns the size of the loaded image on success, -1 otherwise.
*/
-int load_image_targphys_as(const char *filename,
- hwaddr addr, uint64_t max_sz, AddressSpace *as);
+ssize_t load_image_targphys_as(const char *filename,
+ hwaddr addr, uint64_t max_sz, AddressSpace *as);
/**load_targphys_hex_as:
* @filename: Path to the .hex file
@@ -53,14 +53,15 @@ int load_image_targphys_as(const char *filename,
*
* Returns the size of the loaded .hex file on success, -1 otherwise.
*/
-int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as);
+ssize_t load_targphys_hex_as(const char *filename, hwaddr *entry,
+ AddressSpace *as);
/** load_image_targphys:
* Same as load_image_targphys_as(), but doesn't allow the caller to specify
* an AddressSpace.
*/
-int load_image_targphys(const char *filename, hwaddr,
- uint64_t max_sz);
+ssize_t load_image_targphys(const char *filename, hwaddr,
+ uint64_t max_sz);
/**
* load_image_mr: load an image into a memory region
@@ -73,7 +74,7 @@ int load_image_targphys(const char *filename, hwaddr,
* If the file is larger than the memory region's size the call will fail.
* Returns -1 on failure, or the size of the file.
*/
-int load_image_mr(const char *filename, MemoryRegion *mr);
+ssize_t load_image_mr(const char *filename, MemoryRegion *mr);
/* This is the limit on the maximum uncompressed image size that
* load_image_gzipped_buffer() and load_image_gzipped() will read. It prevents
@@ -81,9 +82,9 @@ int load_image_mr(const char *filename, MemoryRegion *mr);
*/
#define LOAD_IMAGE_MAX_GUNZIP_BYTES (256 << 20)
-int load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
- uint8_t **buffer);
-int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
+ssize_t load_image_gzipped_buffer(const char *filename, uint64_t max_sz,
+ uint8_t **buffer);
+ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
#define ELF_LOAD_FAILED -1
#define ELF_LOAD_NOT_ELF -2
@@ -183,8 +184,8 @@ ssize_t load_elf(const char *filename,
*/
void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp);
-int load_aout(const char *filename, hwaddr addr, int max_sz,
- int bswap_needed, hwaddr target_page_size);
+ssize_t load_aout(const char *filename, hwaddr addr, int max_sz,
+ int bswap_needed, hwaddr target_page_size);
#define LOAD_UIMAGE_LOADADDR_INVALID (-1)
@@ -205,19 +206,19 @@ int load_aout(const char *filename, hwaddr addr, int max_sz,
*
* Returns the size of the loaded image on success, -1 otherwise.
*/
-int load_uimage_as(const char *filename, hwaddr *ep,
- hwaddr *loadaddr, int *is_linux,
- uint64_t (*translate_fn)(void *, uint64_t),
- void *translate_opaque, AddressSpace *as);
+ssize_t load_uimage_as(const char *filename, hwaddr *ep,
+ hwaddr *loadaddr, int *is_linux,
+ uint64_t (*translate_fn)(void *, uint64_t),
+ void *translate_opaque, AddressSpace *as);
/** load_uimage:
* Same as load_uimage_as(), but doesn't allow the caller to specify an
* AddressSpace.
*/
-int load_uimage(const char *filename, hwaddr *ep,
- hwaddr *loadaddr, int *is_linux,
- uint64_t (*translate_fn)(void *, uint64_t),
- void *translate_opaque);
+ssize_t load_uimage(const char *filename, hwaddr *ep,
+ hwaddr *loadaddr, int *is_linux,
+ uint64_t (*translate_fn)(void *, uint64_t),
+ void *translate_opaque);
/**
* load_ramdisk_as:
@@ -232,15 +233,15 @@ int load_uimage(const char *filename, hwaddr *ep,
*
* Returns the size of the loaded image on success, -1 otherwise.
*/
-int load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
- AddressSpace *as);
+ssize_t load_ramdisk_as(const char *filename, hwaddr addr, uint64_t max_sz,
+ AddressSpace *as);
/**
* load_ramdisk:
* Same as load_ramdisk_as(), but doesn't allow the caller to specify
* an AddressSpace.
*/
-int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz);
+ssize_t load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz);
ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen);
@@ -253,9 +254,9 @@ void pstrcpy_targphys(const char *name,
extern bool option_rom_has_mr;
extern bool rom_file_has_mr;
-int rom_add_file(const char *file, const char *fw_dir,
- hwaddr addr, int32_t bootindex,
- bool option_rom, MemoryRegion *mr, AddressSpace *as);
+ssize_t rom_add_file(const char *file, const char *fw_dir,
+ hwaddr addr, int32_t bootindex,
+ bool option_rom, MemoryRegion *mr, AddressSpace *as);
MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
size_t max_len, hwaddr addr,
const char *fw_file_name,
@@ -336,8 +337,8 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict);
#define rom_add_blob_fixed_as(_f, _b, _l, _a, _as) \
rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as, true)
-int rom_add_vga(const char *file);
-int rom_add_option(const char *file, int32_t bootindex);
+ssize_t rom_add_vga(const char *file);
+ssize_t rom_add_option(const char *file, int32_t bootindex);
/* This is the usual maximum in uboot, so if a uImage overflows this, it would
* overflow on real hardware too. */
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index a91253d4bd..05e6521351 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -118,6 +118,8 @@ static const char * const riscv_intr_names[] = {
"reserved"
};
+static void register_cpu_props(DeviceState *dev);
+
const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
{
if (async) {
@@ -161,6 +163,7 @@ static void riscv_any_cpu_init(Object *obj)
set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU);
#endif
set_priv_version(env, PRIV_VERSION_1_12_0);
+ register_cpu_props(DEVICE(obj));
}
#if defined(TARGET_RISCV64)
@@ -169,6 +172,7 @@ static void rv64_base_cpu_init(Object *obj)
CPURISCVState *env = &RISCV_CPU(obj)->env;
/* We set this in the realise function */
set_misa(env, MXL_RV64, 0);
+ register_cpu_props(DEVICE(obj));
}
static void rv64_sifive_u_cpu_init(Object *obj)
@@ -181,9 +185,11 @@ static void rv64_sifive_u_cpu_init(Object *obj)
static void rv64_sifive_e_cpu_init(Object *obj)
{
CPURISCVState *env = &RISCV_CPU(obj)->env;
+ RISCVCPU *cpu = RISCV_CPU(obj);
+
set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU);
set_priv_version(env, PRIV_VERSION_1_10_0);
- qdev_prop_set_bit(DEVICE(obj), "mmu", false);
+ cpu->cfg.mmu = false;
}
static void rv128_base_cpu_init(Object *obj)
@@ -197,6 +203,7 @@ static void rv128_base_cpu_init(Object *obj)
CPURISCVState *env = &RISCV_CPU(obj)->env;
/* We set this in the realise function */
set_misa(env, MXL_RV128, 0);
+ register_cpu_props(DEVICE(obj));
}
#else
static void rv32_base_cpu_init(Object *obj)
@@ -204,6 +211,7 @@ static void rv32_base_cpu_init(Object *obj)
CPURISCVState *env = &RISCV_CPU(obj)->env;
/* We set this in the realise function */
set_misa(env, MXL_RV32, 0);
+ register_cpu_props(DEVICE(obj));
}
static void rv32_sifive_u_cpu_init(Object *obj)
@@ -216,27 +224,33 @@ static void rv32_sifive_u_cpu_init(Object *obj)
static void rv32_sifive_e_cpu_init(Object *obj)
{
CPURISCVState *env = &RISCV_CPU(obj)->env;
+ RISCVCPU *cpu = RISCV_CPU(obj);
+
set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU);
set_priv_version(env, PRIV_VERSION_1_10_0);
- qdev_prop_set_bit(DEVICE(obj), "mmu", false);
+ cpu->cfg.mmu = false;
}
static void rv32_ibex_cpu_init(Object *obj)
{
CPURISCVState *env = &RISCV_CPU(obj)->env;
+ RISCVCPU *cpu = RISCV_CPU(obj);
+
set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU);
set_priv_version(env, PRIV_VERSION_1_10_0);
- qdev_prop_set_bit(DEVICE(obj), "mmu", false);
- qdev_prop_set_bit(DEVICE(obj), "x-epmp", true);
+ cpu->cfg.mmu = false;
+ cpu->cfg.epmp = true;
}
static void rv32_imafcu_nommu_cpu_init(Object *obj)
{
CPURISCVState *env = &RISCV_CPU(obj)->env;
+ RISCVCPU *cpu = RISCV_CPU(obj);
+
set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU);
set_priv_version(env, PRIV_VERSION_1_10_0);
set_resetvec(env, DEFAULT_RSTVEC);
- qdev_prop_set_bit(DEVICE(obj), "mmu", false);
+ cpu->cfg.mmu = false;
}
#endif
@@ -249,6 +263,7 @@ static void riscv_host_cpu_init(Object *obj)
#elif defined(TARGET_RISCV64)
set_misa(env, MXL_RV64, 0);
#endif
+ register_cpu_props(DEVICE(obj));
}
#endif
@@ -391,7 +406,7 @@ static bool riscv_cpu_has_work(CPUState *cs)
* Definition of the WFI instruction requires it to ignore the privilege
* mode and delegation registers, but respect individual enables
*/
- return (env->mip & env->mie) != 0;
+ return riscv_cpu_all_pending(env) != 0;
#else
return true;
#endif
@@ -600,6 +615,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp)
cpu->cfg.ext_ifencei = true;
}
+ if (cpu->cfg.ext_m && cpu->cfg.ext_zmmul) {
+ warn_report("Zmmul will override M");
+ cpu->cfg.ext_m = false;
+ }
+
if (cpu->cfg.ext_i && cpu->cfg.ext_e) {
error_setg(errp,
"I and E extensions are incompatible");
@@ -831,6 +851,12 @@ static void riscv_cpu_init(Object *obj)
{
RISCVCPU *cpu = RISCV_CPU(obj);
+ cpu->cfg.ext_counters = true;
+ cpu->cfg.ext_ifencei = true;
+ cpu->cfg.ext_icsr = true;
+ cpu->cfg.mmu = true;
+ cpu->cfg.pmp = true;
+
cpu_set_cpustate_pointers(cpu);
#ifndef CONFIG_USER_ONLY
@@ -839,7 +865,7 @@ static void riscv_cpu_init(Object *obj)
#endif /* CONFIG_USER_ONLY */
}
-static Property riscv_cpu_properties[] = {
+static Property riscv_cpu_extensions[] = {
/* Defaults for standard extensions */
DEFINE_PROP_BOOL("i", RISCVCPU, cfg.ext_i, true),
DEFINE_PROP_BOOL("e", RISCVCPU, cfg.ext_e, false),
@@ -862,17 +888,12 @@ static Property riscv_cpu_properties[] = {
DEFINE_PROP_BOOL("Zve64f", RISCVCPU, cfg.ext_zve64f, false),
DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true),
DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
- DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),
DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
- DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0),
- DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID),
- DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID),
-
DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false),
DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false),
DEFINE_PROP_BOOL("svpbmt", RISCVCPU, cfg.ext_svpbmt, false),
@@ -905,13 +926,35 @@ static Property riscv_cpu_properties[] = {
/* These are experimental so mark with 'x-' */
DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
+ DEFINE_PROP_BOOL("x-zmmul", RISCVCPU, cfg.ext_zmmul, false),
/* ePMP 0.9.3 */
DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void register_cpu_props(DeviceState *dev)
+{
+ Property *prop;
+
+ for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
+ qdev_property_add_static(dev, prop);
+ }
+}
+
+static Property riscv_cpu_properties[] = {
+ DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),
+
+ DEFINE_PROP_UINT32("mvendorid", RISCVCPU, cfg.mvendorid, 0),
+ DEFINE_PROP_UINT64("marchid", RISCVCPU, cfg.marchid, RISCV_CPU_MARCHID),
+ DEFINE_PROP_UINT64("mimpid", RISCVCPU, cfg.mimpid, RISCV_CPU_MIMPID),
+
DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
DEFINE_PROP_BOOL("short-isa-string", RISCVCPU, cfg.short_isa_string, false),
+
+ DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
DEFINE_PROP_END_OF_LIST(),
};
@@ -1031,6 +1074,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len)
struct isa_ext_data isa_edata_arr[] = {
ISA_EDATA_ENTRY(zicsr, ext_icsr),
ISA_EDATA_ENTRY(zifencei, ext_ifencei),
+ ISA_EDATA_ENTRY(zmmul, ext_zmmul),
ISA_EDATA_ENTRY(zfh, ext_zfh),
ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
ISA_EDATA_ENTRY(zfinx, ext_zfinx),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index f08c3e8813..7d6397acdf 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -411,6 +411,8 @@ struct RISCVCPUConfig {
bool ext_zhinxmin;
bool ext_zve32f;
bool ext_zve64f;
+ bool ext_zmmul;
+ bool rvv_ta_all_1s;
uint32_t mvendorid;
uint64_t marchid;
@@ -488,6 +490,7 @@ int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
int riscv_cpu_hviprio_index2irq(int index, int *out_irq, int *out_rdzero);
uint8_t riscv_cpu_default_priority(int irq);
+uint64_t riscv_cpu_all_pending(CPURISCVState *env);
int riscv_cpu_mirq_pending(CPURISCVState *env);
int riscv_cpu_sirq_pending(CPURISCVState *env);
int riscv_cpu_vsirq_pending(CPURISCVState *env);
@@ -565,6 +568,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
/* If PointerMasking should be applied */
FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
+FIELD(TB_FLAGS, VTA, 24, 1)
#ifdef TARGET_RISCV32
#define riscv_cpu_mxl(env) ((void)(env), MXL_RV32)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index d99fac9d2d..4a6700c890 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong *pc,
flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
FIELD_EX64(env->vtype, VTYPE, VLMUL));
flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
+ flags = FIELD_DP32(flags, TB_FLAGS, VTA,
+ FIELD_EX64(env->vtype, VTYPE, VTA));
} else {
flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
}
@@ -340,7 +342,7 @@ static int riscv_cpu_pending_to_irq(CPURISCVState *env,
return best_irq;
}
-static uint64_t riscv_cpu_all_pending(CPURISCVState *env)
+uint64_t riscv_cpu_all_pending(CPURISCVState *env)
{
uint32_t gein = get_field(env->hstatus, HSTATUS_VGEIN);
uint64_t vsgein = (env->hgeip & (1ULL << gein)) ? MIP_VSEIP : 0;
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index 2f2a51c732..fc6e13222f 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -77,6 +77,7 @@ static inline target_ulong trigger_type(CPURISCVState *env,
tdata1 = RV32_TYPE(type);
break;
case MXL_RV64:
+ case MXL_RV128:
tdata1 = RV64_TYPE(type);
break;
default:
@@ -123,6 +124,7 @@ static target_ulong tdata1_validate(CPURISCVState *env, target_ulong val,
tdata1 = RV32_TYPE(t);
break;
case MXL_RV64:
+ case MXL_RV128:
type = extract64(val, 60, 4);
dmode = extract64(val, 59, 1);
tdata1 = RV64_TYPE(t);
diff --git a/target/riscv/insn_trans/trans_rvm.c.inc b/target/riscv/insn_trans/trans_rvm.c.inc
index 16b029edf0..ec7f705aab 100644
--- a/target/riscv/insn_trans/trans_rvm.c.inc
+++ b/target/riscv/insn_trans/trans_rvm.c.inc
@@ -18,6 +18,12 @@
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#define REQUIRE_M_OR_ZMMUL(ctx) do { \
+ if (!ctx->cfg_ptr->ext_zmmul && !has_ext(ctx, RVM)) { \
+ return false; \
+ } \
+} while (0)
+
static void gen_mulhu_i128(TCGv r2, TCGv r3, TCGv al, TCGv ah, TCGv bl, TCGv bh)
{
TCGv tmpl = tcg_temp_new();
@@ -65,7 +71,7 @@ static void gen_mul_i128(TCGv rl, TCGv rh,
static bool trans_mul(DisasContext *ctx, arg_mul *a)
{
- REQUIRE_EXT(ctx, RVM);
+ REQUIRE_M_OR_ZMMUL(ctx);
return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, gen_mul_i128);
}
@@ -109,7 +115,7 @@ static void gen_mulh_w(TCGv ret, TCGv s1, TCGv s2)
static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
{
- REQUIRE_EXT(ctx, RVM);
+ REQUIRE_M_OR_ZMMUL(ctx);
return gen_arith_per_ol(ctx, a, EXT_SIGN, gen_mulh, gen_mulh_w,
gen_mulh_i128);
}
@@ -161,7 +167,7 @@ static void gen_mulhsu_w(TCGv ret, TCGv arg1, TCGv arg2)
static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
{
- REQUIRE_EXT(ctx, RVM);
+ REQUIRE_M_OR_ZMMUL(ctx);
return gen_arith_per_ol(ctx, a, EXT_NONE, gen_mulhsu, gen_mulhsu_w,
gen_mulhsu_i128);
}
@@ -176,7 +182,7 @@ static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
{
- REQUIRE_EXT(ctx, RVM);
+ REQUIRE_M_OR_ZMMUL(ctx);
/* gen_mulh_w works for either sign as input. */
return gen_arith_per_ol(ctx, a, EXT_ZERO, gen_mulhu, gen_mulh_w,
gen_mulhu_i128);
@@ -349,7 +355,7 @@ static bool trans_remu(DisasContext *ctx, arg_remu *a)
static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
{
REQUIRE_64_OR_128BIT(ctx);
- REQUIRE_EXT(ctx, RVM);
+ REQUIRE_M_OR_ZMMUL(ctx);
ctx->ol = MXL_RV32;
return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
}
@@ -389,7 +395,7 @@ static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
static bool trans_muld(DisasContext *ctx, arg_muld *a)
{
REQUIRE_128BIT(ctx);
- REQUIRE_EXT(ctx, RVM);
+ REQUIRE_M_OR_ZMMUL(ctx);
ctx->ol = MXL_RV64;
return gen_arith(ctx, a, EXT_SIGN, tcg_gen_mul_tl, NULL);
}
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index 391c61fe93..6c091824b6 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -652,6 +652,7 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
dest = tcg_temp_new_ptr();
mask = tcg_temp_new_ptr();
@@ -710,6 +711,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
}
@@ -773,6 +775,8 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew)
/* EMUL = 1, NFIELDS = 1 */
data = FIELD_DP32(data, VDATA, LMUL, 0);
data = FIELD_DP32(data, VDATA, NF, 1);
+ /* Mask destination register are always tail-agnostic */
+ data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s);
return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
}
@@ -818,6 +822,7 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
dest = tcg_temp_new_ptr();
mask = tcg_temp_new_ptr();
@@ -860,6 +865,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
}
@@ -925,6 +931,7 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
dest = tcg_temp_new_ptr();
mask = tcg_temp_new_ptr();
@@ -988,6 +995,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew)
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
}
@@ -1067,6 +1075,7 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
dest = tcg_temp_new_ptr();
mask = tcg_temp_new_ptr();
@@ -1104,6 +1113,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew)
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, emul);
data = FIELD_DP32(data, VDATA, NF, a->nf);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
return ldff_trans(a->rd, a->rs1, data, fn, s);
}
@@ -1225,8 +1235,9 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
}
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
- if (a->vm && s->vl_eq_vlmax) {
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
gvec_fn(s->sew, vreg_ofs(s, a->rd),
vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
MAXSZ(s), MAXSZ(s));
@@ -1235,6 +1246,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn,
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
cpu_env, s->cfg_ptr->vlen / 8,
@@ -1272,6 +1284,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
dest = tcg_temp_new_ptr();
mask = tcg_temp_new_ptr();
@@ -1280,6 +1293,8 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm,
data = FIELD_DP32(data, VDATA, VM, vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
s->cfg_ptr->vlen / 8, data));
@@ -1315,7 +1330,7 @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn,
return false;
}
- if (a->vm && s->vl_eq_vlmax) {
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
TCGv_i64 src1 = tcg_temp_new_i64();
tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN));
@@ -1436,6 +1451,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
dest = tcg_temp_new_ptr();
mask = tcg_temp_new_ptr();
@@ -1444,6 +1460,8 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm,
data = FIELD_DP32(data, VDATA, VM, vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
s->cfg_ptr->vlen / 8, data));
@@ -1472,7 +1490,7 @@ do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn,
return false;
}
- if (a->vm && s->vl_eq_vlmax) {
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
mark_vs_dirty(s);
@@ -1522,9 +1540,11 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
uint32_t data = 0;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1),
vreg_ofs(s, a->rs2),
@@ -1602,9 +1622,11 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
uint32_t data = 0;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1),
vreg_ofs(s, a->rs2),
@@ -1679,9 +1701,13 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
}; \
TCGLabel *over = gen_new_label(); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
+ data = \
+ FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1), \
vreg_ofs(s, a->rs2), cpu_env, \
@@ -1805,7 +1831,7 @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn,
return false;
}
- if (a->vm && s->vl_eq_vlmax) {
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
TCGv_i32 src1 = tcg_temp_new_i32();
tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE));
@@ -1860,9 +1886,11 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
}; \
TCGLabel *over = gen_new_label(); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1), \
vreg_ofs(s, a->rs2), cpu_env, \
@@ -2058,18 +2086,20 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
vext_check_isa_ill(s) &&
/* vmv.v.v has rs2 = 0 and vm = 1 */
vext_check_sss(s, a->rd, a->rs1, 0, 1)) {
- if (s->vl_eq_vlmax) {
+ if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
vreg_ofs(s, a->rs1),
MAXSZ(s), MAXSZ(s));
} else {
uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
static gen_helper_gvec_2_ptr * const fns[4] = {
gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
};
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1),
cpu_env, s->cfg_ptr->vlen / 8,
@@ -2093,17 +2123,27 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
TCGv s1;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
s1 = get_gpr(s, a->rs1, EXT_SIGN);
- if (s->vl_eq_vlmax) {
- tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
- MAXSZ(s), MAXSZ(s), s1);
+ if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
+ if (get_xl(s) == MXL_RV32 && s->sew == MO_64) {
+ TCGv_i64 s1_i64 = tcg_temp_new_i64();
+ tcg_gen_ext_tl_i64(s1_i64, s1);
+ tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
+ MAXSZ(s), MAXSZ(s), s1_i64);
+ tcg_temp_free_i64(s1_i64);
+ } else {
+ tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
+ MAXSZ(s), MAXSZ(s), s1);
+ }
} else {
TCGv_i32 desc;
TCGv_i64 s1_i64 = tcg_temp_new_i64();
TCGv_ptr dest = tcg_temp_new_ptr();
uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
static gen_helper_vmv_vx * const fns[4] = {
gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
@@ -2133,7 +2173,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
/* vmv.v.i has rs2 = 0 and vm = 1 */
vext_check_ss(s, a->rd, 0, 1)) {
int64_t simm = sextract64(a->rs1, 0, 5);
- if (s->vl_eq_vlmax) {
+ if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd),
MAXSZ(s), MAXSZ(s), simm);
mark_vs_dirty(s);
@@ -2142,12 +2182,14 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
TCGv_i64 s1;
TCGv_ptr dest;
uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
static gen_helper_vmv_vx * const fns[4] = {
gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
};
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
s1 = tcg_constant_i64(simm);
dest = tcg_temp_new_ptr();
@@ -2300,9 +2342,13 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
TCGLabel *over = gen_new_label(); \
gen_set_rm(s, RISCV_FRM_DYN); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
+ data = \
+ FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1), \
vreg_ofs(s, a->rs2), cpu_env, \
@@ -2330,6 +2376,7 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2,
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
dest = tcg_temp_new_ptr();
mask = tcg_temp_new_ptr();
@@ -2384,6 +2431,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_set_rm(s, RISCV_FRM_DYN); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \
+ s->cfg_vta_all_1s); \
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
fns[s->sew - 1], s); \
} \
@@ -2418,9 +2468,11 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
TCGLabel *over = gen_new_label(); \
gen_set_rm(s, RISCV_FRM_DYN); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1), \
vreg_ofs(s, a->rs2), cpu_env, \
@@ -2460,6 +2512,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_set_rm(s, RISCV_FRM_DYN); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
fns[s->sew - 1], s); \
} \
@@ -2492,9 +2545,11 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
TCGLabel *over = gen_new_label(); \
gen_set_rm(s, RISCV_FRM_DYN); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1), \
vreg_ofs(s, a->rs2), cpu_env, \
@@ -2534,6 +2589,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
gen_set_rm(s, RISCV_FRM_DYN); \
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
return opfvf_trans(a->rd, a->rs1, a->rs2, data, \
fns[s->sew - 1], s); \
} \
@@ -2613,9 +2669,11 @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
TCGLabel *over = gen_new_label();
gen_set_rm(s, rm);
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs2), cpu_env,
s->cfg_ptr->vlen / 8,
@@ -2707,7 +2765,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
TCGv_i64 t1;
- if (s->vl_eq_vlmax) {
+ if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
t1 = tcg_temp_new_i64();
/* NaN-box f[rs1] */
do_nanbox(s, t1, cpu_fpr[a->rs1]);
@@ -2719,6 +2777,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
TCGv_ptr dest;
TCGv_i32 desc;
uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
static gen_helper_vmv_vx * const fns[3] = {
gen_helper_vmv_v_x_h,
gen_helper_vmv_v_x_w,
@@ -2726,6 +2785,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
};
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
t1 = tcg_temp_new_i64();
/* NaN-box f[rs1] */
@@ -2814,9 +2874,11 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
TCGLabel *over = gen_new_label(); \
gen_set_rm(s, FRM); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs2), cpu_env, \
s->cfg_ptr->vlen / 8, \
@@ -2865,8 +2927,11 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
TCGLabel *over = gen_new_label(); \
gen_set_rm(s, RISCV_FRM_DYN); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs2), cpu_env, \
s->cfg_ptr->vlen / 8, \
@@ -2930,9 +2995,11 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
TCGLabel *over = gen_new_label(); \
gen_set_rm(s, FRM); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs2), cpu_env, \
s->cfg_ptr->vlen / 8, \
@@ -2983,8 +3050,11 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
TCGLabel *over = gen_new_label(); \
gen_set_rm(s, FRM); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs2), cpu_env, \
s->cfg_ptr->vlen / 8, \
@@ -3070,8 +3140,11 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \
gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \
TCGLabel *over = gen_new_label(); \
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = \
+ FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1), \
vreg_ofs(s, a->rs2), cpu_env, \
@@ -3176,6 +3249,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \
\
data = FIELD_DP32(data, VDATA, VM, a->vm); \
data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = \
+ FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \
vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \
cpu_env, s->cfg_ptr->vlen / 8, \
@@ -3213,6 +3288,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
static gen_helper_gvec_3_ptr * const fns[4] = {
gen_helper_viota_m_b, gen_helper_viota_m_h,
gen_helper_viota_m_w, gen_helper_viota_m_d,
@@ -3238,9 +3314,11 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
uint32_t data = 0;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
data = FIELD_DP32(data, VDATA, VM, a->vm);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
static gen_helper_gvec_2_ptr * const fns[4] = {
gen_helper_vid_v_b, gen_helper_vid_v_h,
gen_helper_vid_v_w, gen_helper_vid_v_d,
@@ -3599,7 +3677,7 @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a)
return false;
}
- if (a->vm && s->vl_eq_vlmax) {
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
int scale = s->lmul - (s->sew + 3);
int vlmax = s->cfg_ptr->vlen >> -scale;
TCGv_i64 dest = tcg_temp_new_i64();
@@ -3631,7 +3709,7 @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a)
return false;
}
- if (a->vm && s->vl_eq_vlmax) {
+ if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) {
int scale = s->lmul - (s->sew + 3);
int vlmax = s->cfg_ptr->vlen >> -scale;
if (a->rs1 >= vlmax) {
@@ -3683,6 +3761,7 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
cpu_env, s->cfg_ptr->vlen / 8,
@@ -3748,6 +3827,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
gen_helper_gvec_3_ptr *fn;
TCGLabel *over = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
static gen_helper_gvec_3_ptr * const fns[6][4] = {
{
@@ -3782,6 +3862,8 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq)
}
data = FIELD_DP32(data, VDATA, VM, a->vm);
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+ data = FIELD_DP32(data, VDATA, VTA, s->vta);
tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs2), cpu_env,
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index dbb322bfa7..193ce57a6d 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -24,8 +24,10 @@
/* share data between vector helpers and decode code */
FIELD(VDATA, VM, 0, 1)
FIELD(VDATA, LMUL, 1, 3)
-FIELD(VDATA, NF, 4, 4)
-FIELD(VDATA, WD, 4, 1)
+FIELD(VDATA, VTA, 4, 1)
+FIELD(VDATA, VTA_ALL_1S, 5, 1)
+FIELD(VDATA, NF, 6, 4)
+FIELD(VDATA, WD, 6, 1)
/* float point classify helpers */
target_ulong fclass_h(uint64_t frs1);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 55a4713af2..b151c20674 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -94,6 +94,8 @@ typedef struct DisasContext {
*/
int8_t lmul;
uint8_t sew;
+ uint8_t vta;
+ bool cfg_vta_all_1s;
target_ulong vstart;
bool vl_eq_vlmax;
uint8_t ntemp;
@@ -1099,6 +1101,8 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
+ ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
+ ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s;
ctx->vstart = env->vstart;
ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
ctx->misa_mxl_max = env->misa_mxl_max;
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 576b14e5a3..a96fc49c71 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -122,12 +122,22 @@ static inline int32_t vext_lmul(uint32_t desc)
return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
}
+static inline uint32_t vext_vta(uint32_t desc)
+{
+ return FIELD_EX32(simd_data(desc), VDATA, VTA);
+}
+
+static inline uint32_t vext_vta_all_1s(uint32_t desc)
+{
+ return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
+}
+
/*
* Get the maximum number of elements can be operated.
*
- * esz: log2 of element size in bytes.
+ * log2_esz: log2 of element size in bytes.
*/
-static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
+static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
{
/*
* As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
@@ -136,10 +146,25 @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
uint32_t vlenb = simd_maxsz(desc);
/* Return VLMAX */
- int scale = vext_lmul(desc) - esz;
+ int scale = vext_lmul(desc) - log2_esz;
return scale < 0 ? vlenb >> -scale : vlenb << scale;
}
+/*
+ * Get number of total elements, including prestart, body and tail elements.
+ * Note that when LMUL < 1, the tail includes the elements past VLMAX that
+ * are held in the same vector register.
+ */
+static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
+ uint32_t esz)
+{
+ uint32_t vlenb = simd_maxsz(desc);
+ uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
+ int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
+ ctzl(esz) - ctzl(sew) + vext_lmul(desc);
+ return (vlenb << emul) / esz;
+}
+
static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
{
return (addr & env->cur_pmmask) | env->cur_pmbase;
@@ -172,6 +197,20 @@ static void probe_pages(CPURISCVState *env, target_ulong addr,
}
}
+/* set agnostic elements to 1s */
+static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
+ uint32_t tot)
+{
+ if (is_agnostic == 0) {
+ /* policy undisturbed */
+ return;
+ }
+ if (tot - cnt == 0) {
+ return ;
+ }
+ memset(base + cnt, -1, tot - cnt);
+}
+
static inline void vext_set_elem_mask(void *v0, int index,
uint8_t value)
{
@@ -231,11 +270,14 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
target_ulong stride, CPURISCVState *env,
uint32_t desc, uint32_t vm,
vext_ldst_elem_fn *ldst_elem,
- uint32_t esz, uintptr_t ra, MMUAccessType access_type)
+ uint32_t log2_esz, uintptr_t ra)
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
- uint32_t max_elems = vext_max_elems(desc, esz);
+ uint32_t max_elems = vext_max_elems(desc, log2_esz);
+ uint32_t esz = 1 << log2_esz;
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
for (i = env->vstart; i < env->vl; i++, env->vstart++) {
if (!vm && !vext_elem_mask(v0, i)) {
@@ -244,12 +286,24 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
k = 0;
while (k < nf) {
- target_ulong addr = base + stride * i + (k << esz);
+ target_ulong addr = base + stride * i + (k << log2_esz);
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
k++;
}
}
env->vstart = 0;
+ /* set tail elements to 1s */
+ for (k = 0; k < nf; ++k) {
+ vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
+ (k * max_elems + max_elems) * esz);
+ }
+ if (nf * max_elems % total_elems != 0) {
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
+ uint32_t registers_used =
+ ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
+ vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
+ registers_used * vlenb);
+ }
}
#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
@@ -259,7 +313,7 @@ void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
{ \
uint32_t vm = vext_vm(desc); \
vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
- ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
+ ctzl(sizeof(ETYPE)), GETPC()); \
}
GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
@@ -274,7 +328,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
{ \
uint32_t vm = vext_vm(desc); \
vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
- ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
+ ctzl(sizeof(ETYPE)), GETPC()); \
}
GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
@@ -289,23 +343,38 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
/* unmasked unit-stride load and store operation*/
static void
vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
- vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
- uintptr_t ra, MMUAccessType access_type)
+ vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
+ uintptr_t ra)
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
- uint32_t max_elems = vext_max_elems(desc, esz);
+ uint32_t max_elems = vext_max_elems(desc, log2_esz);
+ uint32_t esz = 1 << log2_esz;
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
/* load bytes from guest memory */
for (i = env->vstart; i < evl; i++, env->vstart++) {
k = 0;
while (k < nf) {
- target_ulong addr = base + ((i * nf + k) << esz);
+ target_ulong addr = base + ((i * nf + k) << log2_esz);
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
k++;
}
}
env->vstart = 0;
+ /* set tail elements to 1s */
+ for (k = 0; k < nf; ++k) {
+ vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz,
+ (k * max_elems + max_elems) * esz);
+ }
+ if (nf * max_elems % total_elems != 0) {
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
+ uint32_t registers_used =
+ ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
+ vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
+ registers_used * vlenb);
+ }
}
/*
@@ -319,14 +388,14 @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
{ \
uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
- ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
+ ctzl(sizeof(ETYPE)), GETPC()); \
} \
\
void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
CPURISCVState *env, uint32_t desc) \
{ \
vext_ldst_us(vd, base, env, desc, LOAD_FN, \
- ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
+ ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
}
GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
@@ -340,14 +409,14 @@ void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
{ \
uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
- ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
+ ctzl(sizeof(ETYPE)), GETPC()); \
} \
\
void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
CPURISCVState *env, uint32_t desc) \
{ \
vext_ldst_us(vd, base, env, desc, STORE_FN, \
- ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
+ ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
}
GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
@@ -364,7 +433,7 @@ void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
/* evl = ceil(vl/8) */
uint8_t evl = (env->vl + 7) >> 3;
vext_ldst_us(vd, base, env, desc, lde_b,
- 0, evl, GETPC(), MMU_DATA_LOAD);
+ 0, evl, GETPC());
}
void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
@@ -373,7 +442,7 @@ void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
/* evl = ceil(vl/8) */
uint8_t evl = (env->vl + 7) >> 3;
vext_ldst_us(vd, base, env, desc, ste_b,
- 0, evl, GETPC(), MMU_DATA_STORE);
+ 0, evl, GETPC());
}
/*
@@ -399,12 +468,15 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
void *vs2, CPURISCVState *env, uint32_t desc,
vext_get_index_addr get_index_addr,
vext_ldst_elem_fn *ldst_elem,
- uint32_t esz, uintptr_t ra, MMUAccessType access_type)
+ uint32_t log2_esz, uintptr_t ra)
{
uint32_t i, k;
uint32_t nf = vext_nf(desc);
uint32_t vm = vext_vm(desc);
- uint32_t max_elems = vext_max_elems(desc, esz);
+ uint32_t max_elems = vext_max_elems(desc, log2_esz);
+ uint32_t esz = 1 << log2_esz;
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
/* load bytes from guest memory */
for (i = env->vstart; i < env->vl; i++, env->vstart++) {
@@ -414,12 +486,24 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
k = 0;
while (k < nf) {
- abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
+ abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
k++;
}
}
env->vstart = 0;
+ /* set tail elements to 1s */
+ for (k = 0; k < nf; ++k) {
+ vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
+ (k * max_elems + max_elems) * esz);
+ }
+ if (nf * max_elems % total_elems != 0) {
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
+ uint32_t registers_used =
+ ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
+ vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
+ registers_used * vlenb);
+ }
}
#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
@@ -427,7 +511,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
- LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
+ LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
}
GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
@@ -453,7 +537,7 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
{ \
vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
STORE_FN, ctzl(sizeof(ETYPE)), \
- GETPC(), MMU_DATA_STORE); \
+ GETPC()); \
}
GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
@@ -480,13 +564,16 @@ static inline void
vext_ldff(void *vd, void *v0, target_ulong base,
CPURISCVState *env, uint32_t desc,
vext_ldst_elem_fn *ldst_elem,
- uint32_t esz, uintptr_t ra)
+ uint32_t log2_esz, uintptr_t ra)
{
void *host;
uint32_t i, k, vl = 0;
uint32_t nf = vext_nf(desc);
uint32_t vm = vext_vm(desc);
- uint32_t max_elems = vext_max_elems(desc, esz);
+ uint32_t max_elems = vext_max_elems(desc, log2_esz);
+ uint32_t esz = 1 << log2_esz;
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
target_ulong addr, offset, remain;
/* probe every access*/
@@ -494,12 +581,12 @@ vext_ldff(void *vd, void *v0, target_ulong base,
if (!vm && !vext_elem_mask(v0, i)) {
continue;
}
- addr = adjust_addr(env, base + i * (nf << esz));
+ addr = adjust_addr(env, base + i * (nf << log2_esz));
if (i == 0) {
- probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
+ probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
} else {
/* if it triggers an exception, no need to check watchpoint */
- remain = nf << esz;
+ remain = nf << log2_esz;
while (remain > 0) {
offset = -(addr | TARGET_PAGE_MASK);
host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
@@ -536,12 +623,24 @@ ProbeSuccess:
continue;
}
while (k < nf) {
- target_ulong addr = base + ((i * nf + k) << esz);
+ target_ulong addr = base + ((i * nf + k) << log2_esz);
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
k++;
}
}
env->vstart = 0;
+ /* set tail elements to 1s */
+ for (k = 0; k < nf; ++k) {
+ vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
+ (k * max_elems + max_elems) * esz);
+ }
+ if (nf * max_elems % total_elems != 0) {
+ uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
+ uint32_t registers_used =
+ ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
+ vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
+ registers_used * vlenb);
+ }
}
#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
@@ -576,13 +675,12 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
*/
static void
vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
- vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
- MMUAccessType access_type)
+ vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
{
uint32_t i, k, off, pos;
uint32_t nf = vext_nf(desc);
uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
- uint32_t max_elems = vlenb >> esz;
+ uint32_t max_elems = vlenb >> log2_esz;
k = env->vstart / max_elems;
off = env->vstart % max_elems;
@@ -590,7 +688,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
if (off) {
/* load/store rest of elements of current segment pointed by vstart */
for (pos = off; pos < max_elems; pos++, env->vstart++) {
- target_ulong addr = base + ((pos + k * max_elems) << esz);
+ target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
}
k++;
@@ -599,7 +697,7 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
/* load/store elements for rest of segments */
for (; k < nf; k++) {
for (i = 0; i < max_elems; i++, env->vstart++) {
- target_ulong addr = base + ((i + k * max_elems) << esz);
+ target_ulong addr = base + ((i + k * max_elems) << log2_esz);
ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
}
}
@@ -612,8 +710,7 @@ void HELPER(NAME)(void *vd, target_ulong base, \
CPURISCVState *env, uint32_t desc) \
{ \
vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
- ctzl(sizeof(ETYPE)), GETPC(), \
- MMU_DATA_LOAD); \
+ ctzl(sizeof(ETYPE)), GETPC()); \
}
GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
@@ -638,8 +735,7 @@ void HELPER(NAME)(void *vd, target_ulong base, \
CPURISCVState *env, uint32_t desc) \
{ \
vext_ldst_whole(vd, base, env, desc, STORE_FN, \
- ctzl(sizeof(ETYPE)), GETPC(), \
- MMU_DATA_STORE); \
+ ctzl(sizeof(ETYPE)), GETPC()); \
}
GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
@@ -710,11 +806,12 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
CPURISCVState *env, uint32_t desc,
- uint32_t esz, uint32_t dsz,
- opivv2_fn *fn)
+ opivv2_fn *fn, uint32_t esz)
{
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
uint32_t i;
for (i = env->vstart; i < vl; i++) {
@@ -724,26 +821,28 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
fn(vd, vs1, vs2, i);
}
env->vstart = 0;
+ /* set tail elements to 1s */
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
}
/* generate the helpers for OPIVV */
-#define GEN_VEXT_VV(NAME, ESZ, DSZ) \
+#define GEN_VEXT_VV(NAME, ESZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
- do_##NAME); \
+ do_vext_vv(vd, v0, vs1, vs2, env, desc, \
+ do_##NAME, ESZ); \
}
-GEN_VEXT_VV(vadd_vv_b, 1, 1)
-GEN_VEXT_VV(vadd_vv_h, 2, 2)
-GEN_VEXT_VV(vadd_vv_w, 4, 4)
-GEN_VEXT_VV(vadd_vv_d, 8, 8)
-GEN_VEXT_VV(vsub_vv_b, 1, 1)
-GEN_VEXT_VV(vsub_vv_h, 2, 2)
-GEN_VEXT_VV(vsub_vv_w, 4, 4)
-GEN_VEXT_VV(vsub_vv_d, 8, 8)
+GEN_VEXT_VV(vadd_vv_b, 1)
+GEN_VEXT_VV(vadd_vv_h, 2)
+GEN_VEXT_VV(vadd_vv_w, 4)
+GEN_VEXT_VV(vadd_vv_d, 8)
+GEN_VEXT_VV(vsub_vv_b, 1)
+GEN_VEXT_VV(vsub_vv_h, 2)
+GEN_VEXT_VV(vsub_vv_w, 4)
+GEN_VEXT_VV(vsub_vv_d, 8)
typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
@@ -773,11 +872,12 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
CPURISCVState *env, uint32_t desc,
- uint32_t esz, uint32_t dsz,
- opivx2_fn fn)
+ opivx2_fn fn, uint32_t esz)
{
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
uint32_t i;
for (i = env->vstart; i < vl; i++) {
@@ -787,30 +887,32 @@ static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
fn(vd, s1, vs2, i);
}
env->vstart = 0;
+ /* set tail elements to 1s */
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
}
/* generate the helpers for OPIVX */
-#define GEN_VEXT_VX(NAME, ESZ, DSZ) \
+#define GEN_VEXT_VX(NAME, ESZ) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
- do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
- do_##NAME); \
-}
-
-GEN_VEXT_VX(vadd_vx_b, 1, 1)
-GEN_VEXT_VX(vadd_vx_h, 2, 2)
-GEN_VEXT_VX(vadd_vx_w, 4, 4)
-GEN_VEXT_VX(vadd_vx_d, 8, 8)
-GEN_VEXT_VX(vsub_vx_b, 1, 1)
-GEN_VEXT_VX(vsub_vx_h, 2, 2)
-GEN_VEXT_VX(vsub_vx_w, 4, 4)
-GEN_VEXT_VX(vsub_vx_d, 8, 8)
-GEN_VEXT_VX(vrsub_vx_b, 1, 1)
-GEN_VEXT_VX(vrsub_vx_h, 2, 2)
-GEN_VEXT_VX(vrsub_vx_w, 4, 4)
-GEN_VEXT_VX(vrsub_vx_d, 8, 8)
+ do_vext_vx(vd, v0, s1, vs2, env, desc, \
+ do_##NAME, ESZ); \
+}
+
+GEN_VEXT_VX(vadd_vx_b, 1)
+GEN_VEXT_VX(vadd_vx_h, 2)
+GEN_VEXT_VX(vadd_vx_w, 4)
+GEN_VEXT_VX(vadd_vx_d, 8)
+GEN_VEXT_VX(vsub_vx_b, 1)
+GEN_VEXT_VX(vsub_vx_h, 2)
+GEN_VEXT_VX(vsub_vx_w, 4)
+GEN_VEXT_VX(vsub_vx_d, 8)
+GEN_VEXT_VX(vrsub_vx_b, 1)
+GEN_VEXT_VX(vrsub_vx_h, 2)
+GEN_VEXT_VX(vrsub_vx_w, 4)
+GEN_VEXT_VX(vrsub_vx_d, 8)
void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
{
@@ -889,30 +991,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
-GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
-GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
-GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
-GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
-GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
-GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
-GEN_VEXT_VV(vwadd_vv_b, 1, 2)
-GEN_VEXT_VV(vwadd_vv_h, 2, 4)
-GEN_VEXT_VV(vwadd_vv_w, 4, 8)
-GEN_VEXT_VV(vwsub_vv_b, 1, 2)
-GEN_VEXT_VV(vwsub_vv_h, 2, 4)
-GEN_VEXT_VV(vwsub_vv_w, 4, 8)
-GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
-GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
-GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
-GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
-GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
-GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
-GEN_VEXT_VV(vwadd_wv_b, 1, 2)
-GEN_VEXT_VV(vwadd_wv_h, 2, 4)
-GEN_VEXT_VV(vwadd_wv_w, 4, 8)
-GEN_VEXT_VV(vwsub_wv_b, 1, 2)
-GEN_VEXT_VV(vwsub_wv_h, 2, 4)
-GEN_VEXT_VV(vwsub_wv_w, 4, 8)
+GEN_VEXT_VV(vwaddu_vv_b, 2)
+GEN_VEXT_VV(vwaddu_vv_h, 4)
+GEN_VEXT_VV(vwaddu_vv_w, 8)
+GEN_VEXT_VV(vwsubu_vv_b, 2)
+GEN_VEXT_VV(vwsubu_vv_h, 4)
+GEN_VEXT_VV(vwsubu_vv_w, 8)
+GEN_VEXT_VV(vwadd_vv_b, 2)
+GEN_VEXT_VV(vwadd_vv_h, 4)
+GEN_VEXT_VV(vwadd_vv_w, 8)
+GEN_VEXT_VV(vwsub_vv_b, 2)
+GEN_VEXT_VV(vwsub_vv_h, 4)
+GEN_VEXT_VV(vwsub_vv_w, 8)
+GEN_VEXT_VV(vwaddu_wv_b, 2)
+GEN_VEXT_VV(vwaddu_wv_h, 4)
+GEN_VEXT_VV(vwaddu_wv_w, 8)
+GEN_VEXT_VV(vwsubu_wv_b, 2)
+GEN_VEXT_VV(vwsubu_wv_h, 4)
+GEN_VEXT_VV(vwsubu_wv_w, 8)
+GEN_VEXT_VV(vwadd_wv_b, 2)
+GEN_VEXT_VV(vwadd_wv_h, 4)
+GEN_VEXT_VV(vwadd_wv_w, 8)
+GEN_VEXT_VV(vwsub_wv_b, 2)
+GEN_VEXT_VV(vwsub_wv_h, 4)
+GEN_VEXT_VV(vwsub_wv_w, 8)
RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
@@ -938,30 +1040,30 @@ RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
-GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
-GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
-GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
-GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
-GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
-GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
-GEN_VEXT_VX(vwadd_vx_b, 1, 2)
-GEN_VEXT_VX(vwadd_vx_h, 2, 4)
-GEN_VEXT_VX(vwadd_vx_w, 4, 8)
-GEN_VEXT_VX(vwsub_vx_b, 1, 2)
-GEN_VEXT_VX(vwsub_vx_h, 2, 4)
-GEN_VEXT_VX(vwsub_vx_w, 4, 8)
-GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
-GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
-GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
-GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
-GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
-GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
-GEN_VEXT_VX(vwadd_wx_b, 1, 2)
-GEN_VEXT_VX(vwadd_wx_h, 2, 4)
-GEN_VEXT_VX(vwadd_wx_w, 4, 8)
-GEN_VEXT_VX(vwsub_wx_b, 1, 2)
-GEN_VEXT_VX(vwsub_wx_h, 2, 4)
-GEN_VEXT_VX(vwsub_wx_w, 4, 8)
+GEN_VEXT_VX(vwaddu_vx_b, 2)
+GEN_VEXT_VX(vwaddu_vx_h, 4)
+GEN_VEXT_VX(vwaddu_vx_w, 8)
+GEN_VEXT_VX(vwsubu_vx_b, 2)
+GEN_VEXT_VX(vwsubu_vx_h, 4)
+GEN_VEXT_VX(vwsubu_vx_w, 8)
+GEN_VEXT_VX(vwadd_vx_b, 2)
+GEN_VEXT_VX(vwadd_vx_h, 4)
+GEN_VEXT_VX(vwadd_vx_w, 8)
+GEN_VEXT_VX(vwsub_vx_b, 2)
+GEN_VEXT_VX(vwsub_vx_h, 4)
+GEN_VEXT_VX(vwsub_vx_w, 8)
+GEN_VEXT_VX(vwaddu_wx_b, 2)
+GEN_VEXT_VX(vwaddu_wx_h, 4)
+GEN_VEXT_VX(vwaddu_wx_w, 8)
+GEN_VEXT_VX(vwsubu_wx_b, 2)
+GEN_VEXT_VX(vwsubu_wx_h, 4)
+GEN_VEXT_VX(vwsubu_wx_w, 8)
+GEN_VEXT_VX(vwadd_wx_b, 2)
+GEN_VEXT_VX(vwadd_wx_h, 4)
+GEN_VEXT_VX(vwadd_wx_w, 8)
+GEN_VEXT_VX(vwsub_wx_b, 2)
+GEN_VEXT_VX(vwsub_wx_h, 4)
+GEN_VEXT_VX(vwsub_wx_w, 8)
/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
#define DO_VADC(N, M, C) (N + M + C)
@@ -972,6 +1074,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = \
+ vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -982,6 +1088,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
*((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
@@ -999,6 +1107,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1008,6 +1119,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
*((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
@@ -1030,6 +1143,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
{ \
uint32_t vl = env->vl; \
uint32_t vm = vext_vm(desc); \
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1039,6 +1154,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail-agnostic */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
@@ -1057,6 +1179,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
{ \
uint32_t vl = env->vl; \
uint32_t vm = vext_vm(desc); \
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1066,6 +1190,13 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail-agnostic */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
@@ -1091,18 +1222,18 @@ RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
-GEN_VEXT_VV(vand_vv_b, 1, 1)
-GEN_VEXT_VV(vand_vv_h, 2, 2)
-GEN_VEXT_VV(vand_vv_w, 4, 4)
-GEN_VEXT_VV(vand_vv_d, 8, 8)
-GEN_VEXT_VV(vor_vv_b, 1, 1)
-GEN_VEXT_VV(vor_vv_h, 2, 2)
-GEN_VEXT_VV(vor_vv_w, 4, 4)
-GEN_VEXT_VV(vor_vv_d, 8, 8)
-GEN_VEXT_VV(vxor_vv_b, 1, 1)
-GEN_VEXT_VV(vxor_vv_h, 2, 2)
-GEN_VEXT_VV(vxor_vv_w, 4, 4)
-GEN_VEXT_VV(vxor_vv_d, 8, 8)
+GEN_VEXT_VV(vand_vv_b, 1)
+GEN_VEXT_VV(vand_vv_h, 2)
+GEN_VEXT_VV(vand_vv_w, 4)
+GEN_VEXT_VV(vand_vv_d, 8)
+GEN_VEXT_VV(vor_vv_b, 1)
+GEN_VEXT_VV(vor_vv_h, 2)
+GEN_VEXT_VV(vor_vv_w, 4)
+GEN_VEXT_VV(vor_vv_d, 8)
+GEN_VEXT_VV(vxor_vv_b, 1)
+GEN_VEXT_VV(vxor_vv_h, 2)
+GEN_VEXT_VV(vxor_vv_w, 4)
+GEN_VEXT_VV(vxor_vv_d, 8)
RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
@@ -1116,18 +1247,18 @@ RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
-GEN_VEXT_VX(vand_vx_b, 1, 1)
-GEN_VEXT_VX(vand_vx_h, 2, 2)
-GEN_VEXT_VX(vand_vx_w, 4, 4)
-GEN_VEXT_VX(vand_vx_d, 8, 8)
-GEN_VEXT_VX(vor_vx_b, 1, 1)
-GEN_VEXT_VX(vor_vx_h, 2, 2)
-GEN_VEXT_VX(vor_vx_w, 4, 4)
-GEN_VEXT_VX(vor_vx_d, 8, 8)
-GEN_VEXT_VX(vxor_vx_b, 1, 1)
-GEN_VEXT_VX(vxor_vx_h, 2, 2)
-GEN_VEXT_VX(vxor_vx_w, 4, 4)
-GEN_VEXT_VX(vxor_vx_d, 8, 8)
+GEN_VEXT_VX(vand_vx_b, 1)
+GEN_VEXT_VX(vand_vx_h, 2)
+GEN_VEXT_VX(vand_vx_w, 4)
+GEN_VEXT_VX(vand_vx_d, 8)
+GEN_VEXT_VX(vor_vx_b, 1)
+GEN_VEXT_VX(vor_vx_h, 2)
+GEN_VEXT_VX(vor_vx_w, 4)
+GEN_VEXT_VX(vor_vx_d, 8)
+GEN_VEXT_VX(vxor_vx_b, 1)
+GEN_VEXT_VX(vxor_vx_h, 2)
+GEN_VEXT_VX(vxor_vx_w, 4)
+GEN_VEXT_VX(vxor_vx_d, 8)
/* Vector Single-Width Bit Shift Instructions */
#define DO_SLL(N, M) (N << (M))
@@ -1140,6 +1271,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(TS1); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1151,6 +1285,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
*((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
@@ -1175,6 +1311,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(TD); \
+ uint32_t total_elems = \
+ vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1185,6 +1325,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
*((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
}
GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
@@ -1229,6 +1371,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1240,6 +1384,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail-agnostic */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
@@ -1278,6 +1429,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1289,6 +1442,13 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
DO_OP(s2, (ETYPE)(target_long)s1)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail-agnostic */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
@@ -1348,22 +1508,22 @@ RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
-GEN_VEXT_VV(vminu_vv_b, 1, 1)
-GEN_VEXT_VV(vminu_vv_h, 2, 2)
-GEN_VEXT_VV(vminu_vv_w, 4, 4)
-GEN_VEXT_VV(vminu_vv_d, 8, 8)
-GEN_VEXT_VV(vmin_vv_b, 1, 1)
-GEN_VEXT_VV(vmin_vv_h, 2, 2)
-GEN_VEXT_VV(vmin_vv_w, 4, 4)
-GEN_VEXT_VV(vmin_vv_d, 8, 8)
-GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
-GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
-GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
-GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
-GEN_VEXT_VV(vmax_vv_b, 1, 1)
-GEN_VEXT_VV(vmax_vv_h, 2, 2)
-GEN_VEXT_VV(vmax_vv_w, 4, 4)
-GEN_VEXT_VV(vmax_vv_d, 8, 8)
+GEN_VEXT_VV(vminu_vv_b, 1)
+GEN_VEXT_VV(vminu_vv_h, 2)
+GEN_VEXT_VV(vminu_vv_w, 4)
+GEN_VEXT_VV(vminu_vv_d, 8)
+GEN_VEXT_VV(vmin_vv_b, 1)
+GEN_VEXT_VV(vmin_vv_h, 2)
+GEN_VEXT_VV(vmin_vv_w, 4)
+GEN_VEXT_VV(vmin_vv_d, 8)
+GEN_VEXT_VV(vmaxu_vv_b, 1)
+GEN_VEXT_VV(vmaxu_vv_h, 2)
+GEN_VEXT_VV(vmaxu_vv_w, 4)
+GEN_VEXT_VV(vmaxu_vv_d, 8)
+GEN_VEXT_VV(vmax_vv_b, 1)
+GEN_VEXT_VV(vmax_vv_h, 2)
+GEN_VEXT_VV(vmax_vv_w, 4)
+GEN_VEXT_VV(vmax_vv_d, 8)
RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
@@ -1381,22 +1541,22 @@ RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
-GEN_VEXT_VX(vminu_vx_b, 1, 1)
-GEN_VEXT_VX(vminu_vx_h, 2, 2)
-GEN_VEXT_VX(vminu_vx_w, 4, 4)
-GEN_VEXT_VX(vminu_vx_d, 8, 8)
-GEN_VEXT_VX(vmin_vx_b, 1, 1)
-GEN_VEXT_VX(vmin_vx_h, 2, 2)
-GEN_VEXT_VX(vmin_vx_w, 4, 4)
-GEN_VEXT_VX(vmin_vx_d, 8, 8)
-GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
-GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
-GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
-GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
-GEN_VEXT_VX(vmax_vx_b, 1, 1)
-GEN_VEXT_VX(vmax_vx_h, 2, 2)
-GEN_VEXT_VX(vmax_vx_w, 4, 4)
-GEN_VEXT_VX(vmax_vx_d, 8, 8)
+GEN_VEXT_VX(vminu_vx_b, 1)
+GEN_VEXT_VX(vminu_vx_h, 2)
+GEN_VEXT_VX(vminu_vx_w, 4)
+GEN_VEXT_VX(vminu_vx_d, 8)
+GEN_VEXT_VX(vmin_vx_b, 1)
+GEN_VEXT_VX(vmin_vx_h, 2)
+GEN_VEXT_VX(vmin_vx_w, 4)
+GEN_VEXT_VX(vmin_vx_d, 8)
+GEN_VEXT_VX(vmaxu_vx_b, 1)
+GEN_VEXT_VX(vmaxu_vx_h, 2)
+GEN_VEXT_VX(vmaxu_vx_w, 4)
+GEN_VEXT_VX(vmaxu_vx_d, 8)
+GEN_VEXT_VX(vmax_vx_b, 1)
+GEN_VEXT_VX(vmax_vx_h, 2)
+GEN_VEXT_VX(vmax_vx_w, 4)
+GEN_VEXT_VX(vmax_vx_d, 8)
/* Vector Single-Width Integer Multiply Instructions */
#define DO_MUL(N, M) (N * M)
@@ -1404,10 +1564,10 @@ RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
-GEN_VEXT_VV(vmul_vv_b, 1, 1)
-GEN_VEXT_VV(vmul_vv_h, 2, 2)
-GEN_VEXT_VV(vmul_vv_w, 4, 4)
-GEN_VEXT_VV(vmul_vv_d, 8, 8)
+GEN_VEXT_VV(vmul_vv_b, 1)
+GEN_VEXT_VV(vmul_vv_h, 2)
+GEN_VEXT_VV(vmul_vv_w, 4)
+GEN_VEXT_VV(vmul_vv_d, 8)
static int8_t do_mulh_b(int8_t s2, int8_t s1)
{
@@ -1511,18 +1671,18 @@ RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
-GEN_VEXT_VV(vmulh_vv_b, 1, 1)
-GEN_VEXT_VV(vmulh_vv_h, 2, 2)
-GEN_VEXT_VV(vmulh_vv_w, 4, 4)
-GEN_VEXT_VV(vmulh_vv_d, 8, 8)
-GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
-GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
-GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
-GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
-GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
-GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
-GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
-GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
+GEN_VEXT_VV(vmulh_vv_b, 1)
+GEN_VEXT_VV(vmulh_vv_h, 2)
+GEN_VEXT_VV(vmulh_vv_w, 4)
+GEN_VEXT_VV(vmulh_vv_d, 8)
+GEN_VEXT_VV(vmulhu_vv_b, 1)
+GEN_VEXT_VV(vmulhu_vv_h, 2)
+GEN_VEXT_VV(vmulhu_vv_w, 4)
+GEN_VEXT_VV(vmulhu_vv_d, 8)
+GEN_VEXT_VV(vmulhsu_vv_b, 1)
+GEN_VEXT_VV(vmulhsu_vv_h, 2)
+GEN_VEXT_VV(vmulhsu_vv_w, 4)
+GEN_VEXT_VV(vmulhsu_vv_d, 8)
RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
@@ -1540,22 +1700,22 @@ RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
-GEN_VEXT_VX(vmul_vx_b, 1, 1)
-GEN_VEXT_VX(vmul_vx_h, 2, 2)
-GEN_VEXT_VX(vmul_vx_w, 4, 4)
-GEN_VEXT_VX(vmul_vx_d, 8, 8)
-GEN_VEXT_VX(vmulh_vx_b, 1, 1)
-GEN_VEXT_VX(vmulh_vx_h, 2, 2)
-GEN_VEXT_VX(vmulh_vx_w, 4, 4)
-GEN_VEXT_VX(vmulh_vx_d, 8, 8)
-GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
-GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
-GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
-GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
-GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
-GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
-GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
-GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
+GEN_VEXT_VX(vmul_vx_b, 1)
+GEN_VEXT_VX(vmul_vx_h, 2)
+GEN_VEXT_VX(vmul_vx_w, 4)
+GEN_VEXT_VX(vmul_vx_d, 8)
+GEN_VEXT_VX(vmulh_vx_b, 1)
+GEN_VEXT_VX(vmulh_vx_h, 2)
+GEN_VEXT_VX(vmulh_vx_w, 4)
+GEN_VEXT_VX(vmulh_vx_d, 8)
+GEN_VEXT_VX(vmulhu_vx_b, 1)
+GEN_VEXT_VX(vmulhu_vx_h, 2)
+GEN_VEXT_VX(vmulhu_vx_w, 4)
+GEN_VEXT_VX(vmulhu_vx_d, 8)
+GEN_VEXT_VX(vmulhsu_vx_b, 1)
+GEN_VEXT_VX(vmulhsu_vx_h, 2)
+GEN_VEXT_VX(vmulhsu_vx_w, 4)
+GEN_VEXT_VX(vmulhsu_vx_d, 8)
/* Vector Integer Divide Instructions */
#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
@@ -1581,22 +1741,22 @@ RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
-GEN_VEXT_VV(vdivu_vv_b, 1, 1)
-GEN_VEXT_VV(vdivu_vv_h, 2, 2)
-GEN_VEXT_VV(vdivu_vv_w, 4, 4)
-GEN_VEXT_VV(vdivu_vv_d, 8, 8)
-GEN_VEXT_VV(vdiv_vv_b, 1, 1)
-GEN_VEXT_VV(vdiv_vv_h, 2, 2)
-GEN_VEXT_VV(vdiv_vv_w, 4, 4)
-GEN_VEXT_VV(vdiv_vv_d, 8, 8)
-GEN_VEXT_VV(vremu_vv_b, 1, 1)
-GEN_VEXT_VV(vremu_vv_h, 2, 2)
-GEN_VEXT_VV(vremu_vv_w, 4, 4)
-GEN_VEXT_VV(vremu_vv_d, 8, 8)
-GEN_VEXT_VV(vrem_vv_b, 1, 1)
-GEN_VEXT_VV(vrem_vv_h, 2, 2)
-GEN_VEXT_VV(vrem_vv_w, 4, 4)
-GEN_VEXT_VV(vrem_vv_d, 8, 8)
+GEN_VEXT_VV(vdivu_vv_b, 1)
+GEN_VEXT_VV(vdivu_vv_h, 2)
+GEN_VEXT_VV(vdivu_vv_w, 4)
+GEN_VEXT_VV(vdivu_vv_d, 8)
+GEN_VEXT_VV(vdiv_vv_b, 1)
+GEN_VEXT_VV(vdiv_vv_h, 2)
+GEN_VEXT_VV(vdiv_vv_w, 4)
+GEN_VEXT_VV(vdiv_vv_d, 8)
+GEN_VEXT_VV(vremu_vv_b, 1)
+GEN_VEXT_VV(vremu_vv_h, 2)
+GEN_VEXT_VV(vremu_vv_w, 4)
+GEN_VEXT_VV(vremu_vv_d, 8)
+GEN_VEXT_VV(vrem_vv_b, 1)
+GEN_VEXT_VV(vrem_vv_h, 2)
+GEN_VEXT_VV(vrem_vv_w, 4)
+GEN_VEXT_VV(vrem_vv_d, 8)
RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
@@ -1614,22 +1774,22 @@ RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
-GEN_VEXT_VX(vdivu_vx_b, 1, 1)
-GEN_VEXT_VX(vdivu_vx_h, 2, 2)
-GEN_VEXT_VX(vdivu_vx_w, 4, 4)
-GEN_VEXT_VX(vdivu_vx_d, 8, 8)
-GEN_VEXT_VX(vdiv_vx_b, 1, 1)
-GEN_VEXT_VX(vdiv_vx_h, 2, 2)
-GEN_VEXT_VX(vdiv_vx_w, 4, 4)
-GEN_VEXT_VX(vdiv_vx_d, 8, 8)
-GEN_VEXT_VX(vremu_vx_b, 1, 1)
-GEN_VEXT_VX(vremu_vx_h, 2, 2)
-GEN_VEXT_VX(vremu_vx_w, 4, 4)
-GEN_VEXT_VX(vremu_vx_d, 8, 8)
-GEN_VEXT_VX(vrem_vx_b, 1, 1)
-GEN_VEXT_VX(vrem_vx_h, 2, 2)
-GEN_VEXT_VX(vrem_vx_w, 4, 4)
-GEN_VEXT_VX(vrem_vx_d, 8, 8)
+GEN_VEXT_VX(vdivu_vx_b, 1)
+GEN_VEXT_VX(vdivu_vx_h, 2)
+GEN_VEXT_VX(vdivu_vx_w, 4)
+GEN_VEXT_VX(vdivu_vx_d, 8)
+GEN_VEXT_VX(vdiv_vx_b, 1)
+GEN_VEXT_VX(vdiv_vx_h, 2)
+GEN_VEXT_VX(vdiv_vx_w, 4)
+GEN_VEXT_VX(vdiv_vx_d, 8)
+GEN_VEXT_VX(vremu_vx_b, 1)
+GEN_VEXT_VX(vremu_vx_h, 2)
+GEN_VEXT_VX(vremu_vx_w, 4)
+GEN_VEXT_VX(vremu_vx_d, 8)
+GEN_VEXT_VX(vrem_vx_b, 1)
+GEN_VEXT_VX(vrem_vx_h, 2)
+GEN_VEXT_VX(vrem_vx_w, 4)
+GEN_VEXT_VX(vrem_vx_d, 8)
/* Vector Widening Integer Multiply Instructions */
RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
@@ -1641,15 +1801,15 @@ RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
-GEN_VEXT_VV(vwmul_vv_b, 1, 2)
-GEN_VEXT_VV(vwmul_vv_h, 2, 4)
-GEN_VEXT_VV(vwmul_vv_w, 4, 8)
-GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
-GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
-GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
-GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
-GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
-GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
+GEN_VEXT_VV(vwmul_vv_b, 2)
+GEN_VEXT_VV(vwmul_vv_h, 4)
+GEN_VEXT_VV(vwmul_vv_w, 8)
+GEN_VEXT_VV(vwmulu_vv_b, 2)
+GEN_VEXT_VV(vwmulu_vv_h, 4)
+GEN_VEXT_VV(vwmulu_vv_w, 8)
+GEN_VEXT_VV(vwmulsu_vv_b, 2)
+GEN_VEXT_VV(vwmulsu_vv_h, 4)
+GEN_VEXT_VV(vwmulsu_vv_w, 8)
RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
@@ -1660,15 +1820,15 @@ RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
-GEN_VEXT_VX(vwmul_vx_b, 1, 2)
-GEN_VEXT_VX(vwmul_vx_h, 2, 4)
-GEN_VEXT_VX(vwmul_vx_w, 4, 8)
-GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
-GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
-GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
-GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
-GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
-GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
+GEN_VEXT_VX(vwmul_vx_b, 2)
+GEN_VEXT_VX(vwmul_vx_h, 4)
+GEN_VEXT_VX(vwmul_vx_w, 8)
+GEN_VEXT_VX(vwmulu_vx_b, 2)
+GEN_VEXT_VX(vwmulu_vx_h, 4)
+GEN_VEXT_VX(vwmulu_vx_w, 8)
+GEN_VEXT_VX(vwmulsu_vx_b, 2)
+GEN_VEXT_VX(vwmulsu_vx_h, 4)
+GEN_VEXT_VX(vwmulsu_vx_w, 8)
/* Vector Single-Width Integer Multiply-Add Instructions */
#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
@@ -1700,22 +1860,22 @@ RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
-GEN_VEXT_VV(vmacc_vv_b, 1, 1)
-GEN_VEXT_VV(vmacc_vv_h, 2, 2)
-GEN_VEXT_VV(vmacc_vv_w, 4, 4)
-GEN_VEXT_VV(vmacc_vv_d, 8, 8)
-GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
-GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
-GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
-GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
-GEN_VEXT_VV(vmadd_vv_b, 1, 1)
-GEN_VEXT_VV(vmadd_vv_h, 2, 2)
-GEN_VEXT_VV(vmadd_vv_w, 4, 4)
-GEN_VEXT_VV(vmadd_vv_d, 8, 8)
-GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
-GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
-GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
-GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
+GEN_VEXT_VV(vmacc_vv_b, 1)
+GEN_VEXT_VV(vmacc_vv_h, 2)
+GEN_VEXT_VV(vmacc_vv_w, 4)
+GEN_VEXT_VV(vmacc_vv_d, 8)
+GEN_VEXT_VV(vnmsac_vv_b, 1)
+GEN_VEXT_VV(vnmsac_vv_h, 2)
+GEN_VEXT_VV(vnmsac_vv_w, 4)
+GEN_VEXT_VV(vnmsac_vv_d, 8)
+GEN_VEXT_VV(vmadd_vv_b, 1)
+GEN_VEXT_VV(vmadd_vv_h, 2)
+GEN_VEXT_VV(vmadd_vv_w, 4)
+GEN_VEXT_VV(vmadd_vv_d, 8)
+GEN_VEXT_VV(vnmsub_vv_b, 1)
+GEN_VEXT_VV(vnmsub_vv_h, 2)
+GEN_VEXT_VV(vnmsub_vv_w, 4)
+GEN_VEXT_VV(vnmsub_vv_d, 8)
#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
@@ -1741,22 +1901,22 @@ RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
-GEN_VEXT_VX(vmacc_vx_b, 1, 1)
-GEN_VEXT_VX(vmacc_vx_h, 2, 2)
-GEN_VEXT_VX(vmacc_vx_w, 4, 4)
-GEN_VEXT_VX(vmacc_vx_d, 8, 8)
-GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
-GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
-GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
-GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
-GEN_VEXT_VX(vmadd_vx_b, 1, 1)
-GEN_VEXT_VX(vmadd_vx_h, 2, 2)
-GEN_VEXT_VX(vmadd_vx_w, 4, 4)
-GEN_VEXT_VX(vmadd_vx_d, 8, 8)
-GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
-GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
-GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
-GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
+GEN_VEXT_VX(vmacc_vx_b, 1)
+GEN_VEXT_VX(vmacc_vx_h, 2)
+GEN_VEXT_VX(vmacc_vx_w, 4)
+GEN_VEXT_VX(vmacc_vx_d, 8)
+GEN_VEXT_VX(vnmsac_vx_b, 1)
+GEN_VEXT_VX(vnmsac_vx_h, 2)
+GEN_VEXT_VX(vnmsac_vx_w, 4)
+GEN_VEXT_VX(vnmsac_vx_d, 8)
+GEN_VEXT_VX(vmadd_vx_b, 1)
+GEN_VEXT_VX(vmadd_vx_h, 2)
+GEN_VEXT_VX(vmadd_vx_w, 4)
+GEN_VEXT_VX(vmadd_vx_d, 8)
+GEN_VEXT_VX(vnmsub_vx_b, 1)
+GEN_VEXT_VX(vnmsub_vx_h, 2)
+GEN_VEXT_VX(vnmsub_vx_w, 4)
+GEN_VEXT_VX(vnmsub_vx_d, 8)
/* Vector Widening Integer Multiply-Add Instructions */
RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
@@ -1768,15 +1928,15 @@ RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
-GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
-GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
-GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
-GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
-GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
-GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
-GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
-GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
-GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
+GEN_VEXT_VV(vwmaccu_vv_b, 2)
+GEN_VEXT_VV(vwmaccu_vv_h, 4)
+GEN_VEXT_VV(vwmaccu_vv_w, 8)
+GEN_VEXT_VV(vwmacc_vv_b, 2)
+GEN_VEXT_VV(vwmacc_vv_h, 4)
+GEN_VEXT_VV(vwmacc_vv_w, 8)
+GEN_VEXT_VV(vwmaccsu_vv_b, 2)
+GEN_VEXT_VV(vwmaccsu_vv_h, 4)
+GEN_VEXT_VV(vwmaccsu_vv_w, 8)
RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
@@ -1790,18 +1950,18 @@ RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
-GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
-GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
-GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
-GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
-GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
-GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
-GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
-GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
-GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
-GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
-GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
-GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
+GEN_VEXT_VX(vwmaccu_vx_b, 2)
+GEN_VEXT_VX(vwmaccu_vx_h, 4)
+GEN_VEXT_VX(vwmaccu_vx_w, 8)
+GEN_VEXT_VX(vwmacc_vx_b, 2)
+GEN_VEXT_VX(vwmacc_vx_h, 4)
+GEN_VEXT_VX(vwmacc_vx_w, 8)
+GEN_VEXT_VX(vwmaccsu_vx_b, 2)
+GEN_VEXT_VX(vwmaccsu_vx_h, 4)
+GEN_VEXT_VX(vwmaccsu_vx_w, 8)
+GEN_VEXT_VX(vwmaccus_vx_b, 2)
+GEN_VEXT_VX(vwmaccus_vx_h, 4)
+GEN_VEXT_VX(vwmaccus_vx_w, 8)
/* Vector Integer Merge and Move Instructions */
#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
@@ -1809,6 +1969,9 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
uint32_t desc) \
{ \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1816,6 +1979,8 @@ void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
*((ETYPE *)vd + H(i)) = s1; \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
@@ -1828,12 +1993,17 @@ void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
uint32_t desc) \
{ \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
*((ETYPE *)vd + H(i)) = (ETYPE)s1; \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
@@ -1846,6 +2016,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1853,6 +2026,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
*((ETYPE *)vd + H(i)) = *(vt + H(i)); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
@@ -1865,6 +2040,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -1874,6 +2052,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
*((ETYPE *)vd + H(i)) = d; \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
@@ -1922,11 +2102,13 @@ vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
static inline void
vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
CPURISCVState *env,
- uint32_t desc, uint32_t esz, uint32_t dsz,
- opivv2_rm_fn *fn)
+ uint32_t desc,
+ opivv2_rm_fn *fn, uint32_t esz)
{
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
switch (env->vxrm) {
case 0: /* rnu */
@@ -1946,15 +2128,17 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
env, vl, vm, 3, fn);
break;
}
+ /* set tail elements to 1s */
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
}
/* generate helpers for fixed point instructions with OPIVV format */
-#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \
+#define GEN_VEXT_VV_RM(NAME, ESZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
- do_##NAME); \
+ vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
+ do_##NAME, ESZ); \
}
static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
@@ -2004,10 +2188,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
-GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
+GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
+GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
+GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
CPURISCVState *env, int vxrm);
@@ -2039,11 +2223,13 @@ vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
static inline void
vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
CPURISCVState *env,
- uint32_t desc, uint32_t esz, uint32_t dsz,
- opivx2_rm_fn *fn)
+ uint32_t desc,
+ opivx2_rm_fn *fn, uint32_t esz)
{
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+ uint32_t vta = vext_vta(desc);
switch (env->vxrm) {
case 0: /* rnu */
@@ -2063,25 +2249,27 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
env, vl, vm, 3, fn);
break;
}
+ /* set tail elements to 1s */
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
}
/* generate helpers for fixed point instructions with OPIVX format */
-#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \
+#define GEN_VEXT_VX_RM(NAME, ESZ) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
- vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
- do_##NAME); \
+ vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
+ do_##NAME, ESZ); \
}
RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
-GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
+GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
+GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
+GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
{
@@ -2127,19 +2315,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
-GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vsadd_vv_b, 1)
+GEN_VEXT_VV_RM(vsadd_vv_h, 2)
+GEN_VEXT_VV_RM(vsadd_vv_w, 4)
+GEN_VEXT_VV_RM(vsadd_vv_d, 8)
RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
-GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vsadd_vx_b, 1)
+GEN_VEXT_VX_RM(vsadd_vx_h, 2)
+GEN_VEXT_VX_RM(vsadd_vx_w, 4)
+GEN_VEXT_VX_RM(vsadd_vx_d, 8)
static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
{
@@ -2188,19 +2376,19 @@ RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
-GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vssubu_vv_b, 1)
+GEN_VEXT_VV_RM(vssubu_vv_h, 2)
+GEN_VEXT_VV_RM(vssubu_vv_w, 4)
+GEN_VEXT_VV_RM(vssubu_vv_d, 8)
RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
-GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vssubu_vx_b, 1)
+GEN_VEXT_VX_RM(vssubu_vx_h, 2)
+GEN_VEXT_VX_RM(vssubu_vx_w, 4)
+GEN_VEXT_VX_RM(vssubu_vx_d, 8)
static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
{
@@ -2246,19 +2434,19 @@ RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
-GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vssub_vv_b, 1)
+GEN_VEXT_VV_RM(vssub_vv_h, 2)
+GEN_VEXT_VV_RM(vssub_vv_w, 4)
+GEN_VEXT_VV_RM(vssub_vv_d, 8)
RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
-GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vssub_vx_b, 1)
+GEN_VEXT_VX_RM(vssub_vx_h, 2)
+GEN_VEXT_VX_RM(vssub_vx_w, 4)
+GEN_VEXT_VX_RM(vssub_vx_d, 8)
/* Vector Single-Width Averaging Add and Subtract */
static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
@@ -2310,19 +2498,19 @@ RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
-GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vaadd_vv_b, 1)
+GEN_VEXT_VV_RM(vaadd_vv_h, 2)
+GEN_VEXT_VV_RM(vaadd_vv_w, 4)
+GEN_VEXT_VV_RM(vaadd_vv_d, 8)
RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
-GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vaadd_vx_b, 1)
+GEN_VEXT_VX_RM(vaadd_vx_h, 2)
+GEN_VEXT_VX_RM(vaadd_vx_w, 4)
+GEN_VEXT_VX_RM(vaadd_vx_d, 8)
static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
uint32_t a, uint32_t b)
@@ -2347,19 +2535,19 @@ RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
-GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
+GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
+GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
+GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
-GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
+GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
+GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
+GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
{
@@ -2383,19 +2571,19 @@ RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
-GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vasub_vv_b, 1)
+GEN_VEXT_VV_RM(vasub_vv_h, 2)
+GEN_VEXT_VV_RM(vasub_vv_w, 4)
+GEN_VEXT_VV_RM(vasub_vv_d, 8)
RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
-GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vasub_vx_b, 1)
+GEN_VEXT_VX_RM(vasub_vx_h, 2)
+GEN_VEXT_VX_RM(vasub_vx_w, 4)
+GEN_VEXT_VX_RM(vasub_vx_d, 8)
static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
uint32_t a, uint32_t b)
@@ -2420,19 +2608,19 @@ RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
-GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vasubu_vv_b, 1)
+GEN_VEXT_VV_RM(vasubu_vv_h, 2)
+GEN_VEXT_VV_RM(vasubu_vv_w, 4)
+GEN_VEXT_VV_RM(vasubu_vv_d, 8)
RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
-GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vasubu_vx_b, 1)
+GEN_VEXT_VX_RM(vasubu_vx_h, 2)
+GEN_VEXT_VX_RM(vasubu_vx_w, 4)
+GEN_VEXT_VX_RM(vasubu_vx_d, 8)
/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
@@ -2527,19 +2715,19 @@ RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
-GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vsmul_vv_b, 1)
+GEN_VEXT_VV_RM(vsmul_vv_h, 2)
+GEN_VEXT_VV_RM(vsmul_vv_w, 4)
+GEN_VEXT_VV_RM(vsmul_vv_d, 8)
RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
-GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vsmul_vx_b, 1)
+GEN_VEXT_VX_RM(vsmul_vx_h, 2)
+GEN_VEXT_VX_RM(vsmul_vx_w, 4)
+GEN_VEXT_VX_RM(vsmul_vx_d, 8)
/* Vector Single-Width Scaling Shift Instructions */
static inline uint8_t
@@ -2586,19 +2774,19 @@ RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
-GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vssrl_vv_b, 1)
+GEN_VEXT_VV_RM(vssrl_vv_h, 2)
+GEN_VEXT_VV_RM(vssrl_vv_w, 4)
+GEN_VEXT_VV_RM(vssrl_vv_d, 8)
RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
-GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vssrl_vx_b, 1)
+GEN_VEXT_VX_RM(vssrl_vx_h, 2)
+GEN_VEXT_VX_RM(vssrl_vx_w, 4)
+GEN_VEXT_VX_RM(vssrl_vx_d, 8)
static inline int8_t
vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
@@ -2645,19 +2833,19 @@ RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
-GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
-GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
-GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
-GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
+GEN_VEXT_VV_RM(vssra_vv_b, 1)
+GEN_VEXT_VV_RM(vssra_vv_h, 2)
+GEN_VEXT_VV_RM(vssra_vv_w, 4)
+GEN_VEXT_VV_RM(vssra_vv_d, 8)
RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
-GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
-GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
-GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
-GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
+GEN_VEXT_VX_RM(vssra_vx_b, 1)
+GEN_VEXT_VX_RM(vssra_vx_h, 2)
+GEN_VEXT_VX_RM(vssra_vx_w, 4)
+GEN_VEXT_VX_RM(vssra_vx_d, 8)
/* Vector Narrowing Fixed-Point Clip Instructions */
static inline int8_t
@@ -2720,16 +2908,16 @@ vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
-GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
-GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
-GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
+GEN_VEXT_VV_RM(vnclip_wv_b, 1)
+GEN_VEXT_VV_RM(vnclip_wv_h, 2)
+GEN_VEXT_VV_RM(vnclip_wv_w, 4)
RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
-GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
-GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
-GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
+GEN_VEXT_VX_RM(vnclip_wx_b, 1)
+GEN_VEXT_VX_RM(vnclip_wx_h, 2)
+GEN_VEXT_VX_RM(vnclip_wx_w, 4)
static inline uint8_t
vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
@@ -2782,16 +2970,16 @@ vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
-GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
-GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
-GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
+GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
+GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
+GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
-GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
-GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
-GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
+GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
+GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
+GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
/*
*** Vector Float Point Arithmetic Instructions
@@ -2806,13 +2994,16 @@ static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
*((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
}
-#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \
+#define GEN_VEXT_VV_ENV(NAME, ESZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t total_elems = \
+ vext_get_total_elems(env, desc, ESZ); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -2822,14 +3013,17 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
do_##NAME(vd, vs1, vs2, i, env); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
+ total_elems * ESZ); \
}
RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
-GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
+GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
+GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
@@ -2839,13 +3033,16 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
*((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
}
-#define GEN_VEXT_VF(NAME, ESZ, DSZ) \
+#define GEN_VEXT_VF(NAME, ESZ) \
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t total_elems = \
+ vext_get_total_elems(env, desc, ESZ); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -2855,27 +3052,30 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
do_##NAME(vd, s1, vs2, i, env); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
+ total_elems * ESZ); \
}
RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
-GEN_VEXT_VF(vfadd_vf_h, 2, 2)
-GEN_VEXT_VF(vfadd_vf_w, 4, 4)
-GEN_VEXT_VF(vfadd_vf_d, 8, 8)
+GEN_VEXT_VF(vfadd_vf_h, 2)
+GEN_VEXT_VF(vfadd_vf_w, 4)
+GEN_VEXT_VF(vfadd_vf_d, 8)
RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
-GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
+GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
+GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
-GEN_VEXT_VF(vfsub_vf_h, 2, 2)
-GEN_VEXT_VF(vfsub_vf_w, 4, 4)
-GEN_VEXT_VF(vfsub_vf_d, 8, 8)
+GEN_VEXT_VF(vfsub_vf_h, 2)
+GEN_VEXT_VF(vfsub_vf_w, 4)
+GEN_VEXT_VF(vfsub_vf_d, 8)
static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
{
@@ -2895,9 +3095,9 @@ static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
-GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
-GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
-GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
+GEN_VEXT_VF(vfrsub_vf_h, 2)
+GEN_VEXT_VF(vfrsub_vf_w, 4)
+GEN_VEXT_VF(vfrsub_vf_d, 8)
/* Vector Widening Floating-Point Add/Subtract Instructions */
static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
@@ -2915,12 +3115,12 @@ static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
-GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
+GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
-GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
-GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
+GEN_VEXT_VF(vfwadd_vf_h, 4)
+GEN_VEXT_VF(vfwadd_vf_w, 8)
static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
{
@@ -2937,12 +3137,12 @@ static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
-GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
+GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
-GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
-GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
+GEN_VEXT_VF(vfwsub_vf_h, 4)
+GEN_VEXT_VF(vfwsub_vf_w, 8)
static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
{
@@ -2956,12 +3156,12 @@ static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
-GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
+GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
-GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
-GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
+GEN_VEXT_VF(vfwadd_wf_h, 4)
+GEN_VEXT_VF(vfwadd_wf_w, 8)
static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
{
@@ -2975,39 +3175,39 @@ static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
-GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
+GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
-GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
-GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
+GEN_VEXT_VF(vfwsub_wf_h, 4)
+GEN_VEXT_VF(vfwsub_wf_w, 8)
/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
-GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
+GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
+GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
-GEN_VEXT_VF(vfmul_vf_h, 2, 2)
-GEN_VEXT_VF(vfmul_vf_w, 4, 4)
-GEN_VEXT_VF(vfmul_vf_d, 8, 8)
+GEN_VEXT_VF(vfmul_vf_h, 2)
+GEN_VEXT_VF(vfmul_vf_w, 4)
+GEN_VEXT_VF(vfmul_vf_d, 8)
RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
-GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
+GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
+GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
-GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
-GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
-GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
+GEN_VEXT_VF(vfdiv_vf_h, 2)
+GEN_VEXT_VF(vfdiv_vf_w, 4)
+GEN_VEXT_VF(vfdiv_vf_d, 8)
static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
{
@@ -3027,9 +3227,9 @@ static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
-GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
-GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
-GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
+GEN_VEXT_VF(vfrdiv_vf_h, 2)
+GEN_VEXT_VF(vfrdiv_vf_w, 4)
+GEN_VEXT_VF(vfrdiv_vf_d, 8)
/* Vector Widening Floating-Point Multiply */
static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
@@ -3046,12 +3246,12 @@ static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
}
RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
-GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
+GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
-GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
-GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
+GEN_VEXT_VF(vfwmul_vf_h, 4)
+GEN_VEXT_VF(vfwmul_vf_w, 8)
/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
@@ -3082,9 +3282,9 @@ static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
-GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
+GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
+GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
@@ -3098,9 +3298,9 @@ static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
-GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
-GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
-GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
+GEN_VEXT_VF(vfmacc_vf_h, 2)
+GEN_VEXT_VF(vfmacc_vf_w, 4)
+GEN_VEXT_VF(vfmacc_vf_d, 8)
static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3123,15 +3323,15 @@ static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
-GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
+GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
+GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
-GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
-GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
-GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
+GEN_VEXT_VF(vfnmacc_vf_h, 2)
+GEN_VEXT_VF(vfnmacc_vf_w, 4)
+GEN_VEXT_VF(vfnmacc_vf_d, 8)
static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3151,15 +3351,15 @@ static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
-GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
+GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
+GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
-GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
-GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
-GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
+GEN_VEXT_VF(vfmsac_vf_h, 2)
+GEN_VEXT_VF(vfmsac_vf_w, 4)
+GEN_VEXT_VF(vfmsac_vf_d, 8)
static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3179,15 +3379,15 @@ static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
-GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
+GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
+GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
-GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
-GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
-GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
+GEN_VEXT_VF(vfnmsac_vf_h, 2)
+GEN_VEXT_VF(vfnmsac_vf_w, 4)
+GEN_VEXT_VF(vfnmsac_vf_d, 8)
static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3207,15 +3407,15 @@ static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
-GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
+GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
+GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
-GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
-GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
-GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
+GEN_VEXT_VF(vfmadd_vf_h, 2)
+GEN_VEXT_VF(vfmadd_vf_w, 4)
+GEN_VEXT_VF(vfmadd_vf_d, 8)
static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3238,15 +3438,15 @@ static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
-GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
+GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
+GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
-GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
-GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
-GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
+GEN_VEXT_VF(vfnmadd_vf_h, 2)
+GEN_VEXT_VF(vfnmadd_vf_w, 4)
+GEN_VEXT_VF(vfnmadd_vf_d, 8)
static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3266,15 +3466,15 @@ static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
-GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
+GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
+GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
-GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
-GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
-GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
+GEN_VEXT_VF(vfmsub_vf_h, 2)
+GEN_VEXT_VF(vfmsub_vf_w, 4)
+GEN_VEXT_VF(vfmsub_vf_d, 8)
static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
{
@@ -3294,15 +3494,15 @@ static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
-GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
+GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
+GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
-GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
-GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
-GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
+GEN_VEXT_VF(vfnmsub_vf_h, 2)
+GEN_VEXT_VF(vfnmsub_vf_w, 4)
+GEN_VEXT_VF(vfnmsub_vf_d, 8)
/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
@@ -3319,12 +3519,12 @@ static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
-GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
+GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
-GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
-GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
+GEN_VEXT_VF(vfwmacc_vf_h, 4)
+GEN_VEXT_VF(vfwmacc_vf_w, 8)
static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
@@ -3342,12 +3542,12 @@ static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
-GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
+GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
-GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
-GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
+GEN_VEXT_VF(vfwnmacc_vf_h, 4)
+GEN_VEXT_VF(vfwnmacc_vf_w, 8)
static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
@@ -3365,12 +3565,12 @@ static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
-GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
+GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
-GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
-GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
+GEN_VEXT_VF(vfwmsac_vf_h, 4)
+GEN_VEXT_VF(vfwmsac_vf_w, 8)
static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
{
@@ -3388,12 +3588,12 @@ static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
-GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
-GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
+GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
+GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
-GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
-GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
+GEN_VEXT_VF(vfwnmsac_vf_h, 4)
+GEN_VEXT_VF(vfwnmsac_vf_w, 8)
/* Vector Floating-Point Square-Root Instruction */
/* (TD, T2, TX2) */
@@ -3409,12 +3609,15 @@ static void do_##NAME(void *vd, void *vs2, int i, \
*((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
}
-#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \
+#define GEN_VEXT_V_ENV(NAME, ESZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t total_elems = \
+ vext_get_total_elems(env, desc, ESZ); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
if (vl == 0) { \
@@ -3427,14 +3630,16 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
do_##NAME(vd, vs2, i, env); \
} \
env->vstart = 0; \
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
+ total_elems * ESZ); \
}
RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
-GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
-GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
-GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
+GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
+GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
+GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
/*
* Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -3614,9 +3819,9 @@ static float64 frsqrt7_d(float64 f, float_status *s)
RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
-GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
-GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
-GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
+GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
+GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
+GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
/*
* Vector Floating-Point Reciprocal Estimate Instruction
@@ -3805,36 +4010,36 @@ static float64 frec7_d(float64 f, float_status *s)
RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
-GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
-GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
-GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
+GEN_VEXT_V_ENV(vfrec7_v_h, 2)
+GEN_VEXT_V_ENV(vfrec7_v_w, 4)
+GEN_VEXT_V_ENV(vfrec7_v_d, 8)
/* Vector Floating-Point MIN/MAX Instructions */
RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
-GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
+GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
+GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
-GEN_VEXT_VF(vfmin_vf_h, 2, 2)
-GEN_VEXT_VF(vfmin_vf_w, 4, 4)
-GEN_VEXT_VF(vfmin_vf_d, 8, 8)
+GEN_VEXT_VF(vfmin_vf_h, 2)
+GEN_VEXT_VF(vfmin_vf_w, 4)
+GEN_VEXT_VF(vfmin_vf_d, 8)
RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
-GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
+GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
+GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
-GEN_VEXT_VF(vfmax_vf_h, 2, 2)
-GEN_VEXT_VF(vfmax_vf_w, 4, 4)
-GEN_VEXT_VF(vfmax_vf_d, 8, 8)
+GEN_VEXT_VF(vfmax_vf_h, 2)
+GEN_VEXT_VF(vfmax_vf_w, 4)
+GEN_VEXT_VF(vfmax_vf_d, 8)
/* Vector Floating-Point Sign-Injection Instructions */
static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
@@ -3855,15 +4060,15 @@ static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
-GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
+GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
+GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
-GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
-GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
-GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
+GEN_VEXT_VF(vfsgnj_vf_h, 2)
+GEN_VEXT_VF(vfsgnj_vf_w, 4)
+GEN_VEXT_VF(vfsgnj_vf_d, 8)
static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
{
@@ -3883,15 +4088,15 @@ static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
-GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
+GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
-GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
-GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
-GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
+GEN_VEXT_VF(vfsgnjn_vf_h, 2)
+GEN_VEXT_VF(vfsgnjn_vf_w, 4)
+GEN_VEXT_VF(vfsgnjn_vf_d, 8)
static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
{
@@ -3911,15 +4116,15 @@ static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
-GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
-GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
-GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
+GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
-GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
-GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
-GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
+GEN_VEXT_VF(vfsgnjx_vf_h, 2)
+GEN_VEXT_VF(vfsgnjx_vf_w, 4)
+GEN_VEXT_VF(vfsgnjx_vf_d, 8)
/* Vector Floating-Point Compare Instructions */
#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
@@ -3928,6 +4133,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -3940,6 +4147,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
DO_OP(s2, s1, &env->fp_status)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail-agnostic */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
@@ -3952,6 +4166,8 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -3963,6 +4179,13 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail-agnostic */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
@@ -4063,12 +4286,15 @@ static void do_##NAME(void *vd, void *vs2, int i) \
*((TD *)vd + HD(i)) = OP(s2); \
}
-#define GEN_VEXT_V(NAME, ESZ, DSZ) \
+#define GEN_VEXT_V(NAME, ESZ) \
void HELPER(NAME)(void *vd, void *v0, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t total_elems = \
+ vext_get_total_elems(env, desc, ESZ); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -4078,6 +4304,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
do_##NAME(vd, vs2, i); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * ESZ, \
+ total_elems * ESZ); \
}
target_ulong fclass_h(uint64_t frs1)
@@ -4140,17 +4369,22 @@ target_ulong fclass_d(uint64_t frs1)
RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
-GEN_VEXT_V(vfclass_v_h, 2, 2)
-GEN_VEXT_V(vfclass_v_w, 4, 4)
-GEN_VEXT_V(vfclass_v_d, 8, 8)
+GEN_VEXT_V(vfclass_v_h, 2)
+GEN_VEXT_V(vfclass_v_w, 4)
+GEN_VEXT_V(vfclass_v_d, 8)
/* Vector Floating-Point Merge Instruction */
+
#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = \
+ vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -4159,6 +4393,8 @@ void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
= (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
@@ -4170,33 +4406,33 @@ GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
-GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
+GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
-GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
-GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
-GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
+GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
-GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
+GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
-GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
-GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
-GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
+GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
/* Widening Floating-Point/Integer Type-Convert Instructions */
/* (TD, T2, TX2) */
@@ -4206,30 +4442,30 @@ GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
-GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
-GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
+GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
-GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
-GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
+GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
-GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
+GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
-GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
+GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
/*
* vfwcvt.f.f.v vd, vs2, vm
@@ -4242,8 +4478,8 @@ static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
-GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
-GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
+GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
/* Narrowing Floating-Point/Integer Type-Convert Instructions */
/* (TD, T2, TX2) */
@@ -4254,29 +4490,29 @@ GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
-GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
+GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
-GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
-GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
-GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
+GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
+GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
+GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
-GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
-GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
+GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
+GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
-GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
-GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
+GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
+GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
static uint16_t vfncvtffv16(uint32_t a, float_status *s)
@@ -4286,8 +4522,8 @@ static uint16_t vfncvtffv16(uint32_t a, float_status *s)
RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
-GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
-GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
+GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
+GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
/*
*** Vector Reduction Operations
@@ -4299,6 +4535,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(TD); \
+ uint32_t vlenb = simd_maxsz(desc); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
TD s1 = *((TD *)vs1 + HD(0)); \
\
@@ -4311,6 +4550,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
} \
*((TD *)vd + HD(0)) = s1; \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, esz, vlenb); \
}
/* vd[0] = sum(vs1[0], vs2[*]) */
@@ -4380,6 +4621,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(TD); \
+ uint32_t vlenb = simd_maxsz(desc); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
TD s1 = *((TD *)vs1 + HD(0)); \
\
@@ -4392,6 +4636,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
} \
*((TD *)vd + HD(0)) = s1; \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, esz, vlenb); \
}
/* Unordered sum */
@@ -4416,6 +4662,9 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
{
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
+ uint32_t esz = sizeof(uint32_t);
+ uint32_t vlenb = simd_maxsz(desc);
+ uint32_t vta = vext_vta(desc);
uint32_t i;
uint32_t s1 = *((uint32_t *)vs1 + H4(0));
@@ -4429,6 +4678,8 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
}
*((uint32_t *)vd + H4(0)) = s1;
env->vstart = 0;
+ /* set tail elements to 1s */
+ vext_set_elems_1s(vd, vta, esz, vlenb);
}
void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
@@ -4436,6 +4687,9 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
{
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
+ uint32_t esz = sizeof(uint64_t);
+ uint32_t vlenb = simd_maxsz(desc);
+ uint32_t vta = vext_vta(desc);
uint32_t i;
uint64_t s1 = *((uint64_t *)vs1);
@@ -4449,6 +4703,8 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
}
*((uint64_t *)vd) = s1;
env->vstart = 0;
+ /* set tail elements to 1s */
+ vext_set_elems_1s(vd, vta, esz, vlenb);
}
/*
@@ -4461,6 +4717,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
uint32_t desc) \
{ \
uint32_t vl = env->vl; \
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
+ uint32_t vta_all_1s = vext_vta_all_1s(desc); \
uint32_t i; \
int a, b; \
\
@@ -4470,6 +4728,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, \
vext_set_elem_mask(vd, i, OP(b, a)); \
} \
env->vstart = 0; \
+ /* mask destination register are always tail- \
+ * agnostic \
+ */ \
+ /* set tail elements to 1s */ \
+ if (vta_all_1s) { \
+ for (; i < total_elems; i++) { \
+ vext_set_elem_mask(vd, i, 1); \
+ } \
+ } \
}
#define DO_NAND(N, M) (!(N & M))
@@ -4537,6 +4804,8 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
{
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
+ uint32_t total_elems = env_archcpu(env)->cfg.vlen;
+ uint32_t vta_all_1s = vext_vta_all_1s(desc);
int i;
bool first_mask_bit = false;
@@ -4565,6 +4834,13 @@ static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
}
}
env->vstart = 0;
+ /* mask destination register are always tail-agnostic */
+ /* set tail elements to 1s */
+ if (vta_all_1s) {
+ for (; i < total_elems; i++) {
+ vext_set_elem_mask(vd, i, 1);
+ }
+ }
}
void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
@@ -4592,6 +4868,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t sum = 0; \
int i; \
\
@@ -4605,6 +4884,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
} \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
@@ -4618,6 +4899,9 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
int i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -4627,6 +4911,8 @@ void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
*((ETYPE *)vd + H(i)) = i; \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
@@ -4645,6 +4931,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
{ \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
target_ulong offset = s1, i_min, i; \
\
i_min = MAX(env->vstart, offset); \
@@ -4654,6 +4943,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
} \
*((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
} \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
@@ -4669,6 +4960,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
target_ulong i_max, i; \
\
i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
@@ -4685,6 +4979,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
} \
\
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
@@ -4693,13 +4989,16 @@ GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
-#define GEN_VEXT_VSLIE1UP(ESZ, H) \
-static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
- CPURISCVState *env, uint32_t desc) \
+#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
+static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
+ void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
- typedef uint##ESZ##_t ETYPE; \
+ typedef uint##BITWIDTH##_t ETYPE; \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -4713,6 +5012,8 @@ static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
} \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VSLIE1UP(8, H1)
@@ -4720,11 +5021,11 @@ GEN_VEXT_VSLIE1UP(16, H2)
GEN_VEXT_VSLIE1UP(32, H4)
GEN_VEXT_VSLIE1UP(64, H8)
-#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \
+#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
+ vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
}
/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
@@ -4733,13 +5034,16 @@ GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
-#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \
-static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
- CPURISCVState *env, uint32_t desc) \
+#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
+static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
+ void *vs2, CPURISCVState *env, uint32_t desc) \
{ \
- typedef uint##ESZ##_t ETYPE; \
+ typedef uint##BITWIDTH##_t ETYPE; \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -4753,6 +5057,8 @@ static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
} \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_VSLIDE1DOWN(8, H1)
@@ -4760,11 +5066,11 @@ GEN_VEXT_VSLIDE1DOWN(16, H2)
GEN_VEXT_VSLIDE1DOWN(32, H4)
GEN_VEXT_VSLIDE1DOWN(64, H8)
-#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \
+#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
+ vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
}
/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
@@ -4774,11 +5080,11 @@ GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
/* Vector Floating-Point Slide Instructions */
-#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \
+#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
+ vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
}
/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
@@ -4786,11 +5092,11 @@ GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
-#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \
+#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
- vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
+ vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
}
/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
@@ -4806,6 +5112,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(TS2); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint64_t index; \
uint32_t i; \
\
@@ -4821,6 +5130,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
} \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
@@ -4841,6 +5152,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
uint32_t vm = vext_vm(desc); \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint64_t index = s1; \
uint32_t i; \
\
@@ -4855,6 +5169,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
} \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
@@ -4869,6 +5185,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
CPURISCVState *env, uint32_t desc) \
{ \
uint32_t vl = env->vl; \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t num = 0, i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -4879,6 +5198,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
num++; \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
/* Compress into vd elements of vs2 where vs1 is enabled */
@@ -4910,6 +5231,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
{ \
uint32_t vl = env->vl; \
uint32_t vm = vext_vm(desc); \
+ uint32_t esz = sizeof(ETYPE); \
+ uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
+ uint32_t vta = vext_vta(desc); \
uint32_t i; \
\
for (i = env->vstart; i < vl; i++) { \
@@ -4919,6 +5243,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs2, \
*((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
} \
env->vstart = 0; \
+ /* set tail elements to 1s */ \
+ vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
}
GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)