aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-06-03 18:08:59 +0100
committerPeter Maydell <peter.maydell@linaro.org>2019-06-04 14:58:30 +0100
commita3bfbe6695cbdf0c971133bfb3db120cf90868d0 (patch)
tree39a98561edb7df8d45f785a447185b81f7af2c3f
parent0fa35ff0ac9e6e65566dd6da9907fe98ec9a1a0e (diff)
target/arm: Convert the VCVT-to-f16 insns to decodetree
Convert the VCVTT and VCVTB instructions which convert from f32 and f64 to f16 to decodetree. Since we're no longer constrained to the old decoder's style using cpu_F0s and cpu_F0d we can perform a direct 16 bit store of the right half of the input single-precision register rather than doing a load/modify/store sequence on the full 32 bits. Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--target/arm/translate-vfp.inc.c62
-rw-r--r--target/arm/translate.c79
-rw-r--r--target/arm/vfp.decode6
3 files changed, 69 insertions, 78 deletions
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index b24578df8d..def69bc765 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -2105,3 +2105,65 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
tcg_temp_free_i64(vd);
return true;
}
+
+static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp_mode;
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+ return 1;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = get_fpstatus_ptr(false);
+ ahp_mode = get_ahp_flag();
+ tmp = tcg_temp_new_i32();
+
+ tcg_gen_ld_f32(tmp, cpu_env, vfp_reg_offset(false, a->vm));
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
+ tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
+ tcg_temp_free_i32(ahp_mode);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
+
+static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp_mode;
+ TCGv_i32 tmp;
+ TCGv_i64 vm;
+
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+ return 1;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = get_fpstatus_ptr(false);
+ ahp_mode = get_ahp_flag();
+ tmp = tcg_temp_new_i32();
+ vm = tcg_temp_new_i64();
+
+ tcg_gen_ld_f64(vm, cpu_env, vfp_reg_offset(false, a->vm));
+ gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
+ tcg_temp_free_i64(vm);
+ tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
+ tcg_temp_free_i32(ahp_mode);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tmp);
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index a6efc86167..b643f67a16 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -2953,20 +2953,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
-/* Move between integer and VFP cores. */
-static TCGv_i32 gen_vfp_mrs(void)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp, cpu_F0s);
- return tmp;
-}
-
-static void gen_vfp_msr(TCGv_i32 tmp)
-{
- tcg_gen_mov_i32(cpu_F0s, tmp);
- tcg_temp_free_i32(tmp);
-}
-
static void gen_neon_dup_low16(TCGv_i32 var)
{
TCGv_i32 tmp = tcg_temp_new_i32();
@@ -2993,8 +2979,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
{
uint32_t rd, rn, rm, op, delta_d, delta_m, bank_mask;
int dp, veclen;
- TCGv_i32 tmp;
- TCGv_i32 tmp2;
if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
return 1;
@@ -3056,8 +3040,7 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
return 1;
case 15:
switch (rn) {
- case 0 ... 5:
- case 8 ... 11:
+ case 0 ... 11:
/* Already handled by decodetree */
return 1;
default:
@@ -3070,20 +3053,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
if (op == 15) {
/* rn is opcode, encoded as per VFP_SREG_N. */
switch (rn) {
- case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */
- case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */
- if (dp) {
- if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
- return 1;
- }
- } else {
- if (!dc_isar_feature(aa32_fp16_spconv, s)) {
- return 1;
- }
- }
- rd_is_dp = false;
- break;
-
case 0x0c: /* vrintr */
case 0x0d: /* vrintz */
case 0x0e: /* vrintx */
@@ -3211,52 +3180,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
switch (op) {
case 15: /* extension space */
switch (rn) {
- case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(false);
- TCGv_i32 ahp = get_ahp_flag();
- tmp = tcg_temp_new_i32();
-
- if (dp) {
- gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
- fpst, ahp);
- } else {
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
- fpst, ahp);
- }
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- gen_mov_F0_vreg(0, rd);
- tmp2 = gen_vfp_mrs();
- tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- gen_vfp_msr(tmp);
- break;
- }
- case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
- {
- TCGv_ptr fpst = get_fpstatus_ptr(false);
- TCGv_i32 ahp = get_ahp_flag();
- tmp = tcg_temp_new_i32();
- if (dp) {
- gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
- fpst, ahp);
- } else {
- gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
- fpst, ahp);
- }
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- tcg_gen_shli_i32(tmp, tmp, 16);
- gen_mov_F0_vreg(0, rd);
- tmp2 = gen_vfp_mrs();
- tcg_gen_ext16u_i32(tmp2, tmp2);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- gen_vfp_msr(tmp);
- break;
- }
case 12: /* vrintr */
{
TCGv_ptr fpst = get_fpstatus_ptr(0);
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index 4c181cdf89..5b43a06c35 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -175,3 +175,9 @@ VCVT_f32_f16 ---- 1110 1.11 0010 .... 1010 t:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
vd=%vd_dp vm=%vm_sp
+
+# VCVTB and VCVTT to f16: Vd format is always vd_sp; Vm format depends on size bit
+VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
+ vd=%vd_sp vm=%vm_sp
+VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
+ vd=%vd_sp vm=%vm_dp