target/arm: Implement SVE Permute - Extract Group

Backports commit b94f8f60bd841c5b737185cd38263e26822f77ab from qemu
2025-12-13 12:41:25 +00:00 · 2018-05-20 05:26:52 -04:00 · 2018-05-20 05:26:52 -04:00 · 6835b2dd13
parent 9917f0d536
commit 6835b2dd13
7 changed files with 128 additions and 0 deletions
--- a/qemu/aarch64.h
+++ b/qemu/aarch64.h
@ -3355,6 +3355,7 @@
 #define helper_sve_eorv_d helper_sve_eorv_d_aarch64
 #define helper_sve_eorv_h helper_sve_eorv_h_aarch64
 #define helper_sve_eorv_s helper_sve_eorv_s_aarch64
+#define helper_sve_ext helper_sve_ext_aarch64
 #define helper_sve_fabs_d helper_sve_fabs_d_aarch64
 #define helper_sve_fabs_h helper_sve_fabs_h_aarch64
 #define helper_sve_fabs_s helper_sve_fabs_s_aarch64
--- a/qemu/aarch64eb.h
+++ b/qemu/aarch64eb.h
@ -3355,6 +3355,7 @@
 #define helper_sve_eorv_d helper_sve_eorv_d_aarch64eb
 #define helper_sve_eorv_h helper_sve_eorv_h_aarch64eb
 #define helper_sve_eorv_s helper_sve_eorv_s_aarch64eb
+#define helper_sve_ext helper_sve_ext_aarch64eb
 #define helper_sve_fabs_d helper_sve_fabs_d_aarch64eb
 #define helper_sve_fabs_h helper_sve_fabs_h_aarch64eb
 #define helper_sve_fabs_s helper_sve_fabs_s_aarch64eb
--- a/qemu/header_gen.py
+++ b/qemu/header_gen.py
@ -3376,6 +3376,7 @@ aarch64_symbols = (
    'helper_sve_eorv_d',
    'helper_sve_eorv_h',
    'helper_sve_eorv_s',
+    'helper_sve_ext',
    'helper_sve_fabs_d',
    'helper_sve_fabs_h',
    'helper_sve_fabs_s',
--- a/qemu/target/arm/helper-sve.h
+++ b/qemu/target/arm/helper-sve.h
@ -414,6 +414,8 @@ DEF_HELPER_FLAGS_4(sve_cpy_z_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(sve_cpy_z_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(sve_cpy_z_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

+DEF_HELPER_FLAGS_4(sve_ext, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(sve_and_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_bic_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_eor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
--- a/qemu/target/arm/sve.decode
+++ b/qemu/target/arm/sve.decode
@ -24,6 +24,7 @@

 %imm4_16_p1     16:4 !function=plus1
 %imm6_22_5      22:1 5:5
+%imm8_16_10     16:5 10:3
 %imm9_16_10     16:s6 10:3

 # A combination of tsz:imm3 -- extract esize.
@ -362,6 +363,12 @@ FCPY            00000101 .. 01 .... 110 imm:8 .....             @rdn_pg4
 CPY_m_i         00000101 .. 01 .... 01 . ........ .....   @rdn_pg4 imm=%sh8_i8s
 CPY_z_i         00000101 .. 01 .... 00 . ........ .....   @rdn_pg4 imm=%sh8_i8s

+### SVE Permute - Extract Group
+
+# SVE extract vector (immediate offset)
+EXT             00000101 001 ..... 000 ... rm:5 rd:5 \
+                &rrri rn=%reg_movprfx imm=%imm8_16_10
+
 ### SVE Predicate Logical Operations Group

 # SVE predicate logical operations
--- a/qemu/target/arm/sve_helper.c
+++ b/qemu/target/arm/sve_helper.c
@ -1478,3 +1478,84 @@ void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
        d[i] = (pg[H1(i)] & 1 ? val : 0);
    }
 }
+
+/* Big-endian hosts need to frob the byte indicies.  If the copy
+ * happens to be 8-byte aligned, then no frobbing necessary.
+ */
+static void swap_memmove(void *vd, void *vs, size_t n)
+{
+    uintptr_t d = (uintptr_t)vd;
+    uintptr_t s = (uintptr_t)vs;
+    uintptr_t o = (d | s | n) & 7;
+    size_t i;
+
+#ifndef HOST_WORDS_BIGENDIAN
+    o = 0;
+#endif
+    switch (o) {
+    case 0:
+        memmove(vd, vs, n);
+        break;
+
+    case 4:
+        if (d < s || d >= s + n) {
+            for (i = 0; i < n; i += 4) {
+                *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
+            }
+        } else {
+            for (i = n; i > 0; ) {
+                i -= 4;
+                *(uint32_t *)H1_4(d + i) = *(uint32_t *)H1_4(s + i);
+            }
+        }
+        break;
+
+    case 2:
+    case 6:
+        if (d < s || d >= s + n) {
+            for (i = 0; i < n; i += 2) {
+                *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
+            }
+        } else {
+            for (i = n; i > 0; ) {
+                i -= 2;
+                *(uint16_t *)H1_2(d + i) = *(uint16_t *)H1_2(s + i);
+            }
+        }
+        break;
+
+    default:
+        if (d < s || d >= s + n) {
+            for (i = 0; i < n; i++) {
+                *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
+            }
+        } else {
+            for (i = n; i > 0; ) {
+                i -= 1;
+                *(uint8_t *)H1(d + i) = *(uint8_t *)H1(s + i);
+            }
+        }
+        break;
+    }
+}
+
+void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
+{
+    intptr_t opr_sz = simd_oprsz(desc);
+    size_t n_ofs = simd_data(desc);
+    size_t n_siz = opr_sz - n_ofs;
+
+    if (vd != vm) {
+        swap_memmove(vd, vn + n_ofs, n_siz);
+        swap_memmove(vd + n_siz, vm, n_ofs);
+    } else if (vd != vn) {
+        swap_memmove(vd + n_siz, vd, n_ofs);
+        swap_memmove(vd, vn + n_ofs, n_siz);
+    } else {
+        /* vd == vn == vm.  Need temp space.  */
+        ARMVectorReg tmp;
+        swap_memmove(&tmp, vm, n_ofs);
+        swap_memmove(vd, vd + n_ofs, n_siz);
+        memcpy(vd + n_siz, &tmp, n_ofs);
+    }
+}
--- a/qemu/target/arm/translate-sve.c
+++ b/qemu/target/arm/translate-sve.c
@ -1996,6 +1996,41 @@ static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
    return true;
 }

+/*
+ *** SVE Permute Extract Group
+ */
+
+static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
+{
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    TCGContext *tcg_ctx = s->uc->tcg_ctx;
+    unsigned vsz = vec_full_reg_size(s);
+    unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
+    unsigned n_siz = vsz - n_ofs;
+    unsigned d = vec_full_reg_offset(s, a->rd);
+    unsigned n = vec_full_reg_offset(s, a->rn);
+    unsigned m = vec_full_reg_offset(s, a->rm);
+
+    /* Use host vector move insns if we have appropriate sizes
+     * and no unfortunate overlap.
+     */
+    if (m != d
+        && n_ofs == size_for_gvec(n_ofs)
+        && n_siz == size_for_gvec(n_siz)
+        && (d != n || n_siz <= n_ofs)) {
+        tcg_gen_gvec_mov(tcg_ctx, 0, d, n + n_ofs, n_siz, n_siz);
+        if (n_ofs != 0) {
+            tcg_gen_gvec_mov(tcg_ctx, 0, d + n_siz, m, n_ofs, n_ofs);
+        }
+    } else {
+        tcg_gen_gvec_3_ool(tcg_ctx, d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
+    }
+    return true;
+}
+
 /*
 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
 */