target/arm: Use gvec for VSHR, VSHL

Backports commit 1dc8425e551be1371d657e94367f37130cd7aede from qemu
2025-07-06 21:10:43 +00:00 · 2018-11-10 10:14:25 -05:00 · 2018-11-10 10:14:25 -05:00 · b5877f1dfb
parent 7790ca1ccb
commit b5877f1dfb
1 changed files with 49 additions and 23 deletions
--- a/qemu/target/arm/translate.c
+++ b/qemu/target/arm/translate.c
@ -6355,7 +6355,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
            break;
        case NEON_3R_VMUL:
            /* VMUL.P8; other cases already eliminated.  */
-            gen_helper_neon_mul_p8(s, tmp, tmp, tmp2);
+            gen_helper_neon_mul_p8(tcg_ctx, tmp, tmp, tmp2);
            break;
        case NEON_3R_VPMAX:
            GEN_NEON_INTEGER_OP(pmax);
@ -6537,8 +6537,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                    size--;
            }
            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
            /* To avoid excessive duplication of ops we implement shift
               by immediate using the variable shift operations.  */
            if (op < 8) {
                /* Shift by immediate:
                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
@ -6550,37 +6548,62 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                }
                /* Right shifts are encoded as N - shift, where N is the
                   element size in bits.  */
-                if (op <= 4)
+                if (op <= 4) {
                    shift = shift - (1 << (size + 3));
                }
                switch (op) {
                case 0:  /* VSHR */
                    /* Right shift comes here negative.  */
                    shift = -shift;
                    /* Shifts larger than the element size are architecturally
                     * valid.  Unsigned results in all zeros; signed results
                     * in all sign bits.
                     */
                    if (!u) {
                        tcg_gen_gvec_sari(tcg_ctx, size, rd_ofs, rm_ofs,
                                          MIN(shift, (8 << size) - 1),
                                          vec_size, vec_size);
                    } else if (shift >= 8 << size) {
                        tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0);
                    } else {
                        tcg_gen_gvec_shri(tcg_ctx, size, rd_ofs, rm_ofs, shift,
                                          vec_size, vec_size);
                    }
                    return 0;
                case 5: /* VSHL, VSLI */
                    if (!u) { /* VSHL */
                        /* Shifts larger than the element size are
                         * architecturally valid and results in zero.
                         */
                        if (shift >= 8 << size) {
                            tcg_gen_gvec_dup8i(tcg_ctx, rd_ofs, vec_size, vec_size, 0);
                        } else {
                            tcg_gen_gvec_shli(tcg_ctx, size, rd_ofs, rm_ofs, shift,
                                              vec_size, vec_size);
                        }
                        return 0;
                    }
                    break;
                }
                if (size == 3) {
                    count = q + 1;
                } else {
                    count = q ? 4: 2;
                }
-                switch (size) {
+
-                case 0:
+                /* To avoid excessive duplication of ops we implement shift
-                    imm = (uint8_t) shift;
+                 * by immediate using the variable shift operations.
-                    imm |= imm << 8;
+                  */
-                    imm |= imm << 16;
+                imm = dup_const(size, shift);
                    break;
                case 1:
                    imm = (uint16_t) shift;
                    imm |= imm << 16;
                    break;
                case 2:
                case 3:
                    imm = shift;
                    break;
                default:
                    abort();
                }
                for (pass = 0; pass < count; pass++) {
                    if (size == 3) {
                        neon_load_reg64(s, s->V0, rm + pass);
                        tcg_gen_movi_i64(tcg_ctx, s->V1, imm);
                        switch (op) {
                        case 0:  /* VSHR */
                        case 1:  /* VSRA */
                            if (u)
                                gen_helper_neon_shl_u64(tcg_ctx, s->V0, s->V0, s->V1);
@ -6611,6 +6634,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                                                         s->V0, s->V1);
                            }
                            break;
                        default:
                            g_assert_not_reached();
                        }
                        if (op == 1 || op == 3) {
                            /* Accumulate.  */
@ -6639,7 +6664,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                        tmp2 = tcg_temp_new_i32(tcg_ctx);
                        tcg_gen_movi_i32(tcg_ctx, tmp2, imm);
                        switch (op) {
                        case 0:  /* VSHR */
                        case 1:  /* VSRA */
                            GEN_NEON_INTEGER_OP(shl);
                            break;
@ -6677,6 +6701,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                        case 7: /* VQSHL */
                            GEN_NEON_INTEGER_OP_ENV(qshl);
                            break;
                        default:
                            g_assert_not_reached();
                        }
                        tcg_temp_free_i32(tcg_ctx, tmp2);