diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c index 358aa98c..f5686d04 100644 --- a/qemu/target/arm/translate-neon.inc.c +++ b/qemu/target/arm/translate-neon.inc.c @@ -974,18 +974,24 @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) * early. Since Q is 0 there are always just two passes, so instead * of a complicated loop over each pass we just unroll. */ - tmp = neon_load_reg(s, a->vn, 0); - tmp2 = neon_load_reg(s, a->vn, 1); + tmp = tcg_temp_new_i32(tcg_ctx); + tmp2 = tcg_temp_new_i32(tcg_ctx); + tmp3 = tcg_temp_new_i32(tcg_ctx); + + read_neon_element32(s, tmp, a->vn, 0, MO_32); + read_neon_element32(s, tmp2, a->vn, 1, MO_32); fn(tcg_ctx, tmp, tmp, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp2); - tmp3 = neon_load_reg(s, a->vm, 0); - tmp2 = neon_load_reg(s, a->vm, 1); + read_neon_element32(s, tmp3, a->vm, 0, MO_32); + read_neon_element32(s, tmp2, a->vm, 1, MO_32); fn(tcg_ctx, tmp3, tmp3, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp2); - neon_store_reg(s, a->vd, 0, tmp); - neon_store_reg(s, a->vd, 1, tmp3); + write_neon_element32(s, tmp, a->vd, 0, MO_32); + write_neon_element32(s, tmp3, a->vd, 1, MO_32); + + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, tmp2); + tcg_temp_free_i32(tcg_ctx, tmp3); return true; } @@ -1298,7 +1304,7 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, * helper needs to be passed cpu_env. */ TCGContext *tcg_ctx = s->uc->tcg_ctx; - TCGv_i32 constimm; + TCGv_i32 constimm, tmp; int pass; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -1324,12 +1330,14 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a, * by immediate using the variable shift operations. */ constimm = tcg_const_i32(tcg_ctx, dup_const(a->size, a->shift)); + tmp = tcg_temp_new_i32(tcg_ctx); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(s, a->vm, pass); + read_neon_element32(s, tmp, a->vm, pass, MO_32); fn(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, constimm); - neon_store_reg(s, a->vd, pass, tmp); + write_neon_element32(s, tmp, a->vd, pass, MO_32); } + tcg_temp_free_i32(tcg_ctx, tmp); tcg_temp_free_i32(tcg_ctx, constimm); return true; } @@ -1388,21 +1396,21 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a, constimm = tcg_const_i64(tcg_ctx, -a->shift); rm1 = tcg_temp_new_i64(tcg_ctx); rm2 = tcg_temp_new_i64(tcg_ctx); + rd = tcg_temp_new_i32(tcg_ctx); /* Load both inputs first to avoid potential overwrite if rm == rd */ neon_load_reg64(s, rm1, a->vm); neon_load_reg64(s, rm2, a->vm + 1); shiftfn(tcg_ctx, rm1, rm1, constimm); - rd = tcg_temp_new_i32(tcg_ctx); narrowfn(tcg_ctx, rd, tcg_ctx->cpu_env, rm1); - neon_store_reg(s, a->vd, 0, rd); + write_neon_element32(s, rd, a->vd, 0, MO_32); shiftfn(tcg_ctx, rm2, rm2, constimm); - rd = tcg_temp_new_i32(tcg_ctx); narrowfn(tcg_ctx, rd, tcg_ctx->cpu_env, rm2); - neon_store_reg(s, a->vd, 1, rd); + write_neon_element32(s, rd, a->vd, 1, MO_32); + tcg_temp_free_i32(tcg_ctx, rd); tcg_temp_free_i64(tcg_ctx, rm1); tcg_temp_free_i64(tcg_ctx, rm2); tcg_temp_free_i64(tcg_ctx, constimm); @@ -1453,10 +1461,14 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, constimm = tcg_const_i32(tcg_ctx, imm); /* Load all inputs first to avoid potential overwrite */ - rm1 = neon_load_reg(s, a->vm, 0); - rm2 = neon_load_reg(s, a->vm, 1); - rm3 = neon_load_reg(s, a->vm + 1, 0); - rm4 = neon_load_reg(s, a->vm + 1, 1); + rm1 = tcg_temp_new_i32(tcg_ctx); + rm2 = tcg_temp_new_i32(tcg_ctx); + rm3 = tcg_temp_new_i32(tcg_ctx); + rm4 = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, rm1, a->vm, 0, MO_32); + read_neon_element32(s, rm2, a->vm, 1, MO_32); + read_neon_element32(s, rm3, a->vm, 2, MO_32); + read_neon_element32(s, rm4, a->vm, 3, MO_32); rtmp = tcg_temp_new_i64(tcg_ctx); shiftfn(tcg_ctx, rm1, rm1, constimm); @@ -1466,7 +1478,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, tcg_temp_free_i32(tcg_ctx, rm2); narrowfn(tcg_ctx, rm1, tcg_ctx->cpu_env, rtmp); - neon_store_reg(s, a->vd, 0, rm1); + write_neon_element32(s, rm1, a->vd, 0, MO_32); + tcg_temp_free_i32(tcg_ctx, rm1); shiftfn(tcg_ctx, rm3, rm3, constimm); shiftfn(tcg_ctx, rm4, rm4, constimm); @@ -1477,7 +1490,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a, narrowfn(tcg_ctx, rm3, tcg_ctx->cpu_env, rtmp); tcg_temp_free_i64(tcg_ctx, rtmp); - neon_store_reg(s, a->vd, 1, rm3); + write_neon_element32(s, rm3, a->vd, 1, MO_32); + tcg_temp_free_i32(tcg_ctx, rm3); return true; } @@ -1579,8 +1593,10 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a, widen_mask = dup_const(a->size + 1, widen_mask); } - rm0 = neon_load_reg(s, a->vm, 0); - rm1 = neon_load_reg(s, a->vm, 1); + rm0 = tcg_temp_new_i32(tcg_ctx); + rm1 = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, rm0, a->vm, 0, MO_32); + read_neon_element32(s, rm1, a->vm, 1, MO_32); tmp = tcg_temp_new_i64(tcg_ctx); widenfn(tcg_ctx, tmp, rm0); @@ -1837,11 +1853,13 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, if (src1_wide) { neon_load_reg64(s, rn0_64, a->vn); } else { - TCGv_i32 tmp = neon_load_reg(s, a->vn, 0); + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp, a->vn, 0, MO_32); widenfn(tcg_ctx, rn0_64, tmp); tcg_temp_free_i32(tcg_ctx, tmp); } - rm = neon_load_reg(s, a->vm, 0); + rm = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, rm, a->vm, 0, MO_32); widenfn(tcg_ctx, rm_64, rm); tcg_temp_free_i32(tcg_ctx, rm); @@ -1854,11 +1872,13 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a, if (src1_wide) { neon_load_reg64(s, rn1_64, a->vn + 1); } else { - TCGv_i32 tmp = neon_load_reg(s, a->vn, 1); + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp, a->vn, 1, MO_32); widenfn(tcg_ctx, rn1_64, tmp); tcg_temp_free_i32(tcg_ctx, tmp); } - rm = neon_load_reg(s, a->vm, 1); + rm = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, rm, a->vm, 1, MO_32); neon_store_reg64(s, rn0_64, a->vd); @@ -1952,8 +1972,11 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a, narrowfn(tcg_ctx, rd1, rn_64); - neon_store_reg(s, a->vd, 0, rd0); - neon_store_reg(s, a->vd, 1, rd1); + write_neon_element32(s, rd0, a->vd, 0, MO_32); + write_neon_element32(s, rd1, a->vd, 1, MO_32); + + tcg_temp_free_i32(tcg_ctx, rd0); + tcg_temp_free_i32(tcg_ctx, rd1); tcg_temp_free_i64(tcg_ctx, rn_64); tcg_temp_free_i64(tcg_ctx, rm_64); @@ -2030,14 +2053,14 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a, rd0 = tcg_temp_new_i64(tcg_ctx); rd1 = tcg_temp_new_i64(tcg_ctx); - rn = neon_load_reg(s, a->vn, 0); - rm = neon_load_reg(s, a->vm, 0); + rn = tcg_temp_new_i32(tcg_ctx); + rm = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, rn, a->vn, 0, MO_32); + read_neon_element32(s, rm, a->vm, 0, MO_32); opfn(tcg_ctx, rd0, rn, rm); - tcg_temp_free_i32(tcg_ctx, rn); - tcg_temp_free_i32(tcg_ctx, rm); - rn = neon_load_reg(s, a->vn, 1); - rm = neon_load_reg(s, a->vm, 1); + read_neon_element32(s, rn, a->vn, 1, MO_32); + read_neon_element32(s, rm, a->vm, 1, MO_32); opfn(tcg_ctx, rd1, rn, rm); tcg_temp_free_i32(tcg_ctx, rn); tcg_temp_free_i32(tcg_ctx, rm); @@ -2341,16 +2364,16 @@ static void gen_neon_dup_high16(TCGContext *s, TCGv_i32 var) static inline TCGv_i32 neon_get_scalar(DisasContext *s, int size, int reg) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - TCGv_i32 tmp; - if (size == 1) { - tmp = neon_load_reg(s, reg & 7, reg >> 4); + TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); + if (size == MO_16) { + read_neon_element32(s, tmp, reg & 7, reg >> 4, MO_32); if (reg & 8) { gen_neon_dup_high16(tcg_ctx, tmp); } else { gen_neon_dup_low16(tcg_ctx, tmp); } } else { - tmp = neon_load_reg(s, reg & 15, reg >> 4); + read_neon_element32(s, tmp, reg & 15, reg >> 4, MO_32); } return tmp; } @@ -2365,7 +2388,7 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a, * destination. */ TCGContext *tcg_ctx = s->uc->tcg_ctx; - TCGv_i32 scalar; + TCGv_i32 scalar, tmp; int pass; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -2392,17 +2415,20 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a, } scalar = neon_get_scalar(s, a->size, a->vm); + tmp = tcg_temp_new_i32(tcg_ctx); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(s, a->vn, pass); + read_neon_element32(s, tmp, a->vn, pass, MO_32); opfn(tcg_ctx, tmp, tmp, scalar); if (accfn) { - TCGv_i32 rd = neon_load_reg(s, a->vd, pass); + TCGv_i32 rd = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, rd, a->vd, pass, MO_32); accfn(tcg_ctx, tmp, rd, tmp); tcg_temp_free_i32(tcg_ctx, rd); } - neon_store_reg(s, a->vd, pass, tmp); + write_neon_element32(s, tmp, a->vd, pass, MO_32); } + tcg_temp_free_i32(tcg_ctx, tmp); tcg_temp_free_i32(tcg_ctx, scalar); return true; } @@ -2559,7 +2585,7 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, * function that takes all of rd, rn and the scalar at once. */ TCGContext *tcg_ctx = s->uc->tcg_ctx; - TCGv_i32 scalar; + TCGv_i32 scalar, rn, rd; int pass; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -2590,14 +2616,17 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a, } scalar = neon_get_scalar(s, a->size, a->vm); + rn = tcg_temp_new_i32(tcg_ctx); + rd = tcg_temp_new_i32(tcg_ctx); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 rn = neon_load_reg(s, a->vn, pass); - TCGv_i32 rd = neon_load_reg(s, a->vd, pass); + read_neon_element32(s, rn, a->vn, pass, MO_32); + read_neon_element32(s, rd, a->vd, pass, MO_32); opfn(tcg_ctx, rd, tcg_ctx->cpu_env, rn, scalar, rd); - tcg_temp_free_i32(tcg_ctx, rn); - neon_store_reg(s, a->vd, pass, rd); + write_neon_element32(s, rd, a->vd, pass, MO_32); } + tcg_temp_free_i32(tcg_ctx, rn); + tcg_temp_free_i32(tcg_ctx, rd); tcg_temp_free_i32(tcg_ctx, scalar); return true; @@ -2666,12 +2695,12 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, scalar = neon_get_scalar(s, a->size, a->vm); /* Load all inputs before writing any outputs, in case of overlap */ - rn = neon_load_reg(s, a->vn, 0); + rn = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, rn, a->vn, 0, MO_32); rn0_64 = tcg_temp_new_i64(tcg_ctx); opfn(tcg_ctx, rn0_64, rn, scalar); - tcg_temp_free_i32(tcg_ctx, rn); - rn = neon_load_reg(s, a->vn, 1); + read_neon_element32(s, rn, a->vn, 1, MO_32); rn1_64 = tcg_temp_new_i64(tcg_ctx); opfn(tcg_ctx, rn1_64, rn, scalar); tcg_temp_free_i32(tcg_ctx, rn); @@ -2898,30 +2927,34 @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a) return false; } n <<= 3; + tmp = tcg_temp_new_i32(tcg_ctx); if (a->op) { - tmp = neon_load_reg(s, a->vd, 0); + read_neon_element32(s, tmp, a->vd, 0, MO_32); } else { - tmp = tcg_temp_new_i32(tcg_ctx); tcg_gen_movi_i32(tcg_ctx, tmp, 0); } - tmp2 = neon_load_reg(s, a->vm, 0); + tmp2 = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp2, a->vm, 0, MO_32); ptr1 = vfp_reg_ptr(s, true, a->vn); tmp4 = tcg_const_i32(tcg_ctx, n); gen_helper_neon_tbl(tcg_ctx, tmp2, tmp2, tmp, ptr1, tmp4); - tcg_temp_free_i32(tcg_ctx, tmp); + if (a->op) { - tmp = neon_load_reg(s, a->vd, 1); + read_neon_element32(s, tmp, a->vd, 1, MO_32); } else { - tmp = tcg_temp_new_i32(tcg_ctx); tcg_gen_movi_i32(tcg_ctx, tmp, 0); } - tmp3 = neon_load_reg(s, a->vm, 1); + tmp3 = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp3, a->vm, 1, MO_32); gen_helper_neon_tbl(tcg_ctx, tmp3, tmp3, tmp, ptr1, tmp4); + tcg_temp_free_i32(tcg_ctx, tmp); tcg_temp_free_i32(tcg_ctx, tmp4); tcg_temp_free_ptr(tcg_ctx, ptr1); - neon_store_reg(s, a->vd, 0, tmp2); - neon_store_reg(s, a->vd, 1, tmp3); - tcg_temp_free_i32(tcg_ctx, tmp); + + write_neon_element32(s, tmp2, a->vd, 0, MO_32); + write_neon_element32(s, tmp3, a->vd, 1, MO_32); + tcg_temp_free_i32(tcg_ctx, tmp2); + tcg_temp_free_i32(tcg_ctx, tmp3); return true; } @@ -2954,8 +2987,9 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a) static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) { - int pass, half; TCGContext *tcg_ctx = s->uc->tcg_ctx; + int pass, half; + TCGv_i32 tmp[2]; if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { return false; @@ -2979,11 +3013,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) return true; } - for (pass = 0; pass < (a->q ? 2 : 1); pass++) { - TCGv_i32 tmp[2]; + tmp[0] = tcg_temp_new_i32(tcg_ctx); + tmp[1] = tcg_temp_new_i32(tcg_ctx); + for (pass = 0; pass < (a->q ? 2 : 1); pass++) { for (half = 0; half < 2; half++) { - tmp[half] = neon_load_reg(s, a->vm, pass * 2 + half); + read_neon_element32(s, tmp[half], a->vm, pass * 2 + half, MO_32); switch (a->size) { case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp[half], tmp[half]); @@ -2997,9 +3032,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) g_assert_not_reached(); } } - neon_store_reg(s, a->vd, pass * 2, tmp[1]); - neon_store_reg(s, a->vd, pass * 2 + 1, tmp[0]); + write_neon_element32(s, tmp[1], a->vd, pass * 2, MO_32); + write_neon_element32(s, tmp[0], a->vd, pass * 2 + 1, MO_32); } + + tcg_temp_free_i32(tcg_ctx, tmp[0]); + tcg_temp_free_i32(tcg_ctx, tmp[1]); return true; } @@ -3045,12 +3083,14 @@ static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a, rm0_64 = tcg_temp_new_i64(tcg_ctx); rm1_64 = tcg_temp_new_i64(tcg_ctx); rd_64 = tcg_temp_new_i64(tcg_ctx); - tmp = neon_load_reg(s, a->vm, pass * 2); + + tmp = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp, a->vm, pass * 2, MO_32); widenfn(tcg_ctx, rm0_64, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); - tmp = neon_load_reg(s, a->vm, pass * 2 + 1); + read_neon_element32(s, tmp, a->vm, pass * 2 + 1, MO_32); widenfn(tcg_ctx, rm1_64, tmp); tcg_temp_free_i32(tcg_ctx, tmp); + opfn(tcg_ctx, rd_64, rm0_64, rm1_64); tcg_temp_free_i64(tcg_ctx, rm0_64); tcg_temp_free_i64(tcg_ctx, rm1_64); @@ -3265,8 +3305,10 @@ static bool do_vmovn(DisasContext *s, arg_2misc *a, narrowfn(tcg_ctx, rd0, tcg_ctx->cpu_env, rm); neon_load_reg64(s, rm, a->vm + 1); narrowfn(tcg_ctx, rd1, tcg_ctx->cpu_env, rm); - neon_store_reg(s, a->vd, 0, rd0); - neon_store_reg(s, a->vd, 1, rd1); + write_neon_element32(s, rd0, a->vd, 0, MO_32); + write_neon_element32(s, rd1, a->vd, 1, MO_32); + tcg_temp_free_i32(tcg_ctx, rd0); + tcg_temp_free_i32(tcg_ctx, rd1); tcg_temp_free_i64(tcg_ctx, rm); return true; } @@ -3324,9 +3366,11 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a) } rd = tcg_temp_new_i64(tcg_ctx); + rm0 = tcg_temp_new_i32(tcg_ctx); + rm1 = tcg_temp_new_i32(tcg_ctx); - rm0 = neon_load_reg(s, a->vm, 0); - rm1 = neon_load_reg(s, a->vm, 1); + read_neon_element32(s, rm0, a->vm, 0, MO_32); + read_neon_element32(s, rm1, a->vm, 1, MO_32); widenfn(tcg_ctx, rd, rm0); tcg_gen_shli_i64(tcg_ctx, rd, rd, 8 << a->size); @@ -3368,21 +3412,25 @@ static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a) fpst = fpstatus_ptr(tcg_ctx, FPST_STD); ahp = get_ahp_flag(s); - tmp = neon_load_reg(s, a->vm, 0); + tmp = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp, a->vm, 0, MO_32); gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); - tmp2 = neon_load_reg(s, a->vm, 1); + tmp2 = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp2, a->vm, 1, MO_32); gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp); tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16); tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); - tmp = neon_load_reg(s, a->vm, 2); + read_neon_element32(s, tmp, a->vm, 2, MO_32); gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp); - tmp3 = neon_load_reg(s, a->vm, 3); - neon_store_reg(s, a->vd, 0, tmp2); + tmp3 = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp3, a->vm, 3, MO_32); + write_neon_element32(s, tmp2, a->vd, 0, MO_32); + tcg_temp_free_i32(tcg_ctx, tmp2); gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp); tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16); tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp); - neon_store_reg(s, a->vd, 1, tmp3); + write_neon_element32(s, tmp3, a->vd, 1, MO_32); + tcg_temp_free_i32(tcg_ctx, tmp3); tcg_temp_free_i32(tcg_ctx, tmp); tcg_temp_free_i32(tcg_ctx, ahp); tcg_temp_free_ptr(tcg_ctx, fpst); @@ -3418,21 +3466,25 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a) fpst = fpstatus_ptr(tcg_ctx, FPST_STD); ahp = get_ahp_flag(s); tmp3 = tcg_temp_new_i32(tcg_ctx); - tmp = neon_load_reg(s, a->vm, 0); - tmp2 = neon_load_reg(s, a->vm, 1); + tmp2 = tcg_temp_new_i32(tcg_ctx); + tmp = tcg_temp_new_i32(tcg_ctx); + read_neon_element32(s, tmp, a->vm, 0, MO_32); + read_neon_element32(s, tmp2, a->vm, 1, MO_32); tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp); gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); - neon_store_reg(s, a->vd, 0, tmp3); + write_neon_element32(s, tmp3, a->vd, 0, MO_32); tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp); - neon_store_reg(s, a->vd, 1, tmp); - tmp3 = tcg_temp_new_i32(tcg_ctx); + write_neon_element32(s, tmp, a->vd, 1, MO_32); + tcg_temp_free_i32(tcg_ctx, tmp); tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2); gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp); - neon_store_reg(s, a->vd, 2, tmp3); + write_neon_element32(s, tmp3, a->vd, 2, MO_32); + tcg_temp_free_i32(tcg_ctx, tmp3); tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16); gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp); - neon_store_reg(s, a->vd, 3, tmp2); + write_neon_element32(s, tmp2, a->vd, 3, MO_32); + tcg_temp_free_i32(tcg_ctx, tmp2); tcg_temp_free_i32(tcg_ctx, ahp); tcg_temp_free_ptr(tcg_ctx, fpst); @@ -3539,8 +3591,9 @@ DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) { - int pass; TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i32 tmp; + int pass; /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { @@ -3565,11 +3618,13 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) return true; } + tmp = tcg_temp_new_i32(tcg_ctx); for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - TCGv_i32 tmp = neon_load_reg(s, a->vm, pass); + read_neon_element32(s, tmp, a->vm, pass, MO_32); fn(tcg_ctx, tmp, tmp); - neon_store_reg(s, a->vd, pass, tmp); + write_neon_element32(s, tmp, a->vd, pass, MO_32); } + tcg_temp_free_i32(tcg_ctx, tmp); return true; } @@ -3924,25 +3979,29 @@ static bool trans_VTRN(DisasContext *s, arg_2misc *a) return true; } - if (a->size == 2) { + tmp = tcg_temp_new_i32(tcg_ctx); + tmp2 = tcg_temp_new_i32(tcg_ctx); + if (a->size == MO_32) { for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) { - tmp = neon_load_reg(s, a->vm, pass); - tmp2 = neon_load_reg(s, a->vd, pass + 1); - neon_store_reg(s, a->vm, pass, tmp2); - neon_store_reg(s, a->vd, pass + 1, tmp); + read_neon_element32(s, tmp, a->vm, pass, MO_32); + read_neon_element32(s, tmp2, a->vd, pass + 1, MO_32); + write_neon_element32(s, tmp2, a->vm, pass, MO_32); + write_neon_element32(s, tmp, a->vd, pass + 1, MO_32); } } else { for (pass = 0; pass < (a->q ? 4 : 2); pass++) { - tmp = neon_load_reg(s, a->vm, pass); - tmp2 = neon_load_reg(s, a->vd, pass); - if (a->size == 0) { + read_neon_element32(s, tmp, a->vm, pass, MO_32); + read_neon_element32(s, tmp2, a->vd, pass, MO_32); + if (a->size == MO_8) { gen_neon_trn_u8(tcg_ctx, tmp, tmp2); } else { gen_neon_trn_u16(tcg_ctx, tmp, tmp2); } - neon_store_reg(s, a->vm, pass, tmp2); - neon_store_reg(s, a->vd, pass, tmp); + write_neon_element32(s, tmp2, a->vm, pass, MO_32); + write_neon_element32(s, tmp, a->vd, pass, MO_32); } } + tcg_temp_free_i32(tcg_ctx, tmp); + tcg_temp_free_i32(tcg_ctx, tmp2); return true; } diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index a122e7e6..87bfa6b9 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -1216,6 +1216,34 @@ static inline void neon_store_reg32(DisasContext *s, TCGv_i32 var, int reg) tcg_gen_st_i32(tcg_ctx, var, tcg_ctx->cpu_env, vfp_reg_offset(false, reg)); } +static void read_neon_element32(DisasContext *s, TCGv_i32 dest, int reg, int ele, MemOp size) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + long off = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_32: + tcg_gen_ld_i32(tcg_ctx, dest, tcg_ctx->cpu_env, off); + break; + default: + g_assert_not_reached(); + } +} + +static void write_neon_element32(DisasContext *s, TCGv_i32 src, int reg, int ele, MemOp size) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + long off = neon_element_offset(reg, ele, size); + + switch (size) { + case MO_32: + tcg_gen_st_i32(tcg_ctx, src, tcg_ctx->cpu_env, off); + break; + default: + g_assert_not_reached(); + } +} + static TCGv_ptr vfp_reg_ptr(DisasContext *s, bool dp, int reg) { TCGContext *tcg_ctx = s->uc->tcg_ctx;