target/arm: Convert Neon narrowing shifts with op==9 to decodetree

Convert the remaining Neon narrowing shifts to decodetree: * VQSHRN * VQRSHRN Backports commit b4a3a77bb7a0dff1cc5673fe3be467d9e3635d44 from qemu
2025-12-15 09:01:34 +00:00 · 2020-06-15 12:31:32 -04:00 · 2020-06-15 12:31:32 -04:00 · 6fc8fdaa2b
parent ef29b91a43
commit 6fc8fdaa2b
3 changed files with 37 additions and 109 deletions
--- a/qemu/target/arm/neon-dp.decode
+++ b/qemu/target/arm/neon-dp.decode
@ -328,3 +328,23 @@ VQSHRUN_16_2sh   1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h
 VQRSHRUN_64_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
 VQRSHRUN_32_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
 VQRSHRUN_16_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h
+
+# VQSHRN with signed input
+VQSHRN_S64_2sh   1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d
+VQSHRN_S32_2sh   1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s
+VQSHRN_S16_2sh   1111 001 0 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h
+
+# VQRSHRN with signed input
+VQRSHRN_S64_2sh  1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d
+VQRSHRN_S32_2sh  1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s
+VQRSHRN_S16_2sh  1111 001 0 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h
+
+# VQSHRN with unsigned input
+VQSHRN_U64_2sh   1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_d
+VQSHRN_U32_2sh   1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_s
+VQSHRN_U16_2sh   1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h
+
+# VQRSHRN with unsigned input
+VQRSHRN_U64_2sh  1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d
+VQRSHRN_U32_2sh  1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s
+VQRSHRN_U16_2sh  1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h
--- a/qemu/target/arm/translate-neon.inc.c
+++ b/qemu/target/arm/translate-neon.inc.c
@ -1584,3 +1584,18 @@ DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
+DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
+DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
+DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
+
+DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
+DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
+DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
+
+DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
+DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
+DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
+
+DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
+DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
+DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
--- a/qemu/target/arm/translate.c
+++ b/qemu/target/arm/translate.c
@ -3321,41 +3321,6 @@ static inline void gen_neon_unarrow_sats(DisasContext *s, int size, TCGv_i32 des
    }
 }

-static inline void gen_neon_shift_narrow(DisasContext *s, int size, TCGv_i32 var, TCGv_i32 shift,
-                                         int q, int u)
-{
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    if (q) {
-        if (u) {
-            switch (size) {
-            case 1: gen_helper_neon_rshl_u16(tcg_ctx, var, var, shift); break;
-            case 2: gen_helper_neon_rshl_u32(tcg_ctx, var, var, shift); break;
-            default: abort();
-            }
-        } else {
-            switch (size) {
-            case 1: gen_helper_neon_rshl_s16(tcg_ctx, var, var, shift); break;
-            case 2: gen_helper_neon_rshl_s32(tcg_ctx, var, var, shift); break;
-            default: abort();
-            }
-        }
-    } else {
-        if (u) {
-            switch (size) {
-            case 1: gen_helper_neon_shl_u16(tcg_ctx, var, var, shift); break;
-            case 2: gen_ushl_i32(tcg_ctx, var, var, shift); break;
-            default: abort();
-            }
-        } else {
-            switch (size) {
-            case 1: gen_helper_neon_shl_s16(tcg_ctx, var, var, shift); break;
-            case 2: gen_sshl_i32(tcg_ctx, var, var, shift); break;
-            default: abort();
-            }
-        }
-    }
-}
-
 static inline void gen_neon_widen(DisasContext *s, TCGv_i64 dest, TCGv_i32 src, int size, int u)
 {
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
@ -5407,6 +5372,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
            case 6: /* VQSHLU */
            case 7: /* VQSHL */
            case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */
+            case 9: /* VQSHRN, VQRSHRN */
                return 1; /* handled by decodetree */
            default:
                break;
@ -5424,80 +5390,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                    size--;
            }
            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
-            if (op < 10) {
-                /* Shift by immediate and narrow:
-                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
-                int input_unsigned = (op == 8) ? !u : u;
-                if (rm & 1) {
-                    return 1;
-                }
-                shift = shift - (1 << (size + 3));
-                size++;
-                if (size == 3) {
-                    tmp64 = tcg_const_i64(tcg_ctx, shift);
-                    neon_load_reg64(s, s->V0, rm);
-                    neon_load_reg64(s, s->V1, rm + 1);
-                    for (pass = 0; pass < 2; pass++) {
-                        TCGv_i64 in;
-                        if (pass == 0) {
-                            in = s->V0;
-                        } else {
-                            in = s->V1;
-                        }
-                        if (q) {
-                            if (input_unsigned) {
-                                gen_helper_neon_rshl_u64(tcg_ctx, s->V0, in, tmp64);
-                            } else {
-                                gen_helper_neon_rshl_s64(tcg_ctx, s->V0, in, tmp64);
-                            }
-                        } else {
-                            if (input_unsigned) {
-                                gen_ushl_i64(tcg_ctx, s->V0, in, tmp64);
-                            } else {
-                                gen_sshl_i64(tcg_ctx, s->V0, in, tmp64);
-                            }
-                        }
-                        tmp = tcg_temp_new_i32(tcg_ctx);
-                        gen_neon_narrow_op(s, op == 8, u, size - 1, tmp, s->V0);
-                        neon_store_reg(s, rd, pass, tmp);
-                    } /* for pass */
-                    tcg_temp_free_i64(tcg_ctx, tmp64);
-                } else {
-                    if (size == 1) {
-                        imm = (uint16_t)shift;
-                        imm |= imm << 16;
-                    } else {
-                        /* size == 2 */
-                        imm = (uint32_t)shift;
-                    }
-                    tmp2 = tcg_const_i32(tcg_ctx, imm);
-                    tmp4 = neon_load_reg(s, rm + 1, 0);
-                    tmp5 = neon_load_reg(s, rm + 1, 1);
-                    for (pass = 0; pass < 2; pass++) {
-                        if (pass == 0) {
-                            tmp = neon_load_reg(s, rm, 0);
-                        } else {
-                            tmp = tmp4;
-                        }
-                        gen_neon_shift_narrow(s, size, tmp, tmp2, q,
-                                              input_unsigned);
-                        if (pass == 0) {
-                            tmp3 = neon_load_reg(s, rm, 1);
-                        } else {
-                            tmp3 = tmp5;
-                        }
-                        gen_neon_shift_narrow(s, size, tmp3, tmp2, q,
-                                              input_unsigned);
-                        tcg_gen_concat_i32_i64(tcg_ctx, s->V0, tmp, tmp3);
-                        tcg_temp_free_i32(tcg_ctx, tmp);
-                        tcg_temp_free_i32(tcg_ctx, tmp3);
-                        tmp = tcg_temp_new_i32(tcg_ctx);
-                        gen_neon_narrow_op(s, op == 8, u, size - 1, tmp, s->V0);
-                        neon_store_reg(s, rd, pass, tmp);
-                    } /* for pass */
-                    tcg_temp_free_i32(tcg_ctx, tmp2);
-                }
-            } else if (op == 10) {
+            if (op == 10) {
                /* VSHLL, VMOVL */
                if (q || (rd & 1)) {
                    return 1;