diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 3c12d9f9..98da2728 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -12827,15 +12827,18 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x13: /* FCMLA #90 */ case 0x15: /* FCMLA #180 */ case 0x17: /* FCMLA #270 */ - tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_reg_offset(s, rm, index, size), fpst, - is_q ? 16 : 8, vec_full_reg_size(s), - extract32(insn, 13, 2), /* rot */ - size == MO_64 - ? gen_helper_gvec_fcmlas_idx - : gen_helper_gvec_fcmlah_idx); - tcg_temp_free_ptr(tcg_ctx, fpst); + { + int rot = extract32(insn, 13, 2); + int data = (index << 2) | rot; + tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + vec_full_reg_offset(s, rm), fpst, + is_q ? 16 : 8, vec_full_reg_size(s), data, + size == MO_64 + ? gen_helper_gvec_fcmlas_idx + : gen_helper_gvec_fcmlah_idx); + tcg_temp_free_ptr(tcg_ctx, fpst); + } return; } diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 3fb1c973..4526a294 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -7979,26 +7979,42 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn) static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) { TCGContext *tcg_ctx = s->uc->tcg_ctx; - int rd, rn, rm, rot, size, opr_sz; + gen_helper_gvec_3_ptr *fn_gvec_ptr; + int rd, rn, rm, opr_sz, data; TCGv_ptr fpst; bool q; q = extract32(insn, 6, 1); VFP_DREG_D(rd, insn); VFP_DREG_N(rn, insn); - VFP_DREG_M(rm, insn); if ((rd | rn) & q) { return 1; } if ((insn & 0xff000f10) == 0xfe000800) { /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */ - rot = extract32(insn, 20, 2); - size = extract32(insn, 23, 1); - if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA) - || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) { + int rot = extract32(insn, 20, 2); + int size = extract32(insn, 23, 1); + int index; + + if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { return 1; } + if (size == 0) { + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + return 1; + } + /* For fp16, rm is just Vm, and index is M. */ + rm = extract32(insn, 0, 4); + index = extract32(insn, 5, 1); + } else { + /* For fp32, rm is the usual M:Vm, and index is 0. */ + VFP_DREG_M(rm, insn); + index = 0; + } + data = (index << 2) | rot; + fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx + : gen_helper_gvec_fcmlah_idx); } else { return 1; } @@ -8017,9 +8033,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn) tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd), vfp_reg_offset(1, rn), vfp_reg_offset(1, rm), fpst, - opr_sz, opr_sz, rot, - size ? gen_helper_gvec_fcmlas_idx - : gen_helper_gvec_fcmlah_idx); + opr_sz, opr_sz, data, fn_gvec_ptr); tcg_temp_free_ptr(tcg_ctx, fpst); return 0; } diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c index a84dc149..50770e90 100644 --- a/qemu/target/arm/vec_helper.c +++ b/qemu/target/arm/vec_helper.c @@ -318,10 +318,11 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, float_status *fpst = vfpst; intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); uint32_t neg_real = flip ^ neg_imag; uintptr_t i; - float16 e1 = m[H2(flip)]; - float16 e3 = m[H2(1 - flip)]; + float16 e1 = m[H2(2 * index + flip)]; + float16 e3 = m[H2(2 * index + 1 - flip)]; /* Shift boolean to the sign bit so we can xor to negate. */ neg_real <<= 15; @@ -378,10 +379,11 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, float_status *fpst = vfpst; intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1); uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1); + intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2); uint32_t neg_real = flip ^ neg_imag; uintptr_t i; - float32 e1 = m[H4(flip)]; - float32 e3 = m[H4(1 - flip)]; + float32 e1 = m[H4(2 * index + flip)]; + float32 e3 = m[H4(2 * index + 1 - flip)]; /* Shift boolean to the sign bit so we can xor to negate. */ neg_real <<= 31;