From 2f6d5554739d3c2d2c139792ec1a996bc5f35ade Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Jul 2018 04:35:23 -0400 Subject: [PATCH] target/arm: Implement SVE dot product (vectors) Backports commit d730ecaae77ac696515207a5ef99509240fc792b from qemu --- qemu/aarch64.h | 4 ++ qemu/aarch64eb.h | 4 ++ qemu/arm.h | 4 ++ qemu/armeb.h | 4 ++ qemu/header_gen.py | 4 ++ qemu/m68k.h | 4 ++ qemu/mips.h | 4 ++ qemu/mips64.h | 4 ++ qemu/mips64el.h | 4 ++ qemu/mipsel.h | 4 ++ qemu/powerpc.h | 4 ++ qemu/sparc.h | 4 ++ qemu/sparc64.h | 4 ++ qemu/target/arm/helper.h | 5 +++ qemu/target/arm/sve.decode | 3 ++ qemu/target/arm/translate-sve.c | 18 +++++++++ qemu/target/arm/vec_helper.c | 67 +++++++++++++++++++++++++++++++++ qemu/x86_64.h | 4 ++ 18 files changed, 149 insertions(+) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 507bf0bb..a62e3f3a 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_aarch64 #define helper_gvec_sar32i helper_gvec_sar32i_aarch64 #define helper_gvec_sar64i helper_gvec_sar64i_aarch64 +#define helper_gvec_sdot_b helper_gvec_sdot_b_aarch64 +#define helper_gvec_sdot_h helper_gvec_sdot_h_aarch64 #define helper_gvec_shl8i helper_gvec_shl8i_aarch64 #define helper_gvec_shl16i helper_gvec_shl16i_aarch64 #define helper_gvec_shl32i helper_gvec_shl32i_aarch64 @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_aarch64 #define helper_gvec_sssub32 helper_gvec_sssub32_aarch64 #define helper_gvec_sssub64 helper_gvec_sssub64_aarch64 +#define helper_gvec_udot_b helper_gvec_udot_b_aarch64 +#define helper_gvec_udot_h helper_gvec_udot_h_aarch64 #define helper_gvec_usadd8 helper_gvec_usadd8_aarch64 #define helper_gvec_usadd16 helper_gvec_usadd16_aarch64 #define helper_gvec_usadd32 helper_gvec_usadd32_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 5abf8ef7..2de5a7f8 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_aarch64eb #define helper_gvec_sar32i helper_gvec_sar32i_aarch64eb #define helper_gvec_sar64i helper_gvec_sar64i_aarch64eb +#define helper_gvec_sdot_b helper_gvec_sdot_b_aarch64eb +#define helper_gvec_sdot_h helper_gvec_sdot_h_aarch64eb #define helper_gvec_shl8i helper_gvec_shl8i_aarch64eb #define helper_gvec_shl16i helper_gvec_shl16i_aarch64eb #define helper_gvec_shl32i helper_gvec_shl32i_aarch64eb @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_aarch64eb #define helper_gvec_sssub32 helper_gvec_sssub32_aarch64eb #define helper_gvec_sssub64 helper_gvec_sssub64_aarch64eb +#define helper_gvec_udot_b helper_gvec_udot_b_aarch64eb +#define helper_gvec_udot_h helper_gvec_udot_h_aarch64eb #define helper_gvec_usadd8 helper_gvec_usadd8_aarch64eb #define helper_gvec_usadd16 helper_gvec_usadd16_aarch64eb #define helper_gvec_usadd32 helper_gvec_usadd32_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index f559899c..61ee263b 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_arm #define helper_gvec_sar32i helper_gvec_sar32i_arm #define helper_gvec_sar64i helper_gvec_sar64i_arm +#define helper_gvec_sdot_b helper_gvec_sdot_b_arm +#define helper_gvec_sdot_h helper_gvec_sdot_h_arm #define helper_gvec_shl8i helper_gvec_shl8i_arm #define helper_gvec_shl16i helper_gvec_shl16i_arm #define helper_gvec_shl32i helper_gvec_shl32i_arm @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_arm #define helper_gvec_sssub32 helper_gvec_sssub32_arm #define helper_gvec_sssub64 helper_gvec_sssub64_arm +#define helper_gvec_udot_b helper_gvec_udot_b_arm +#define helper_gvec_udot_h helper_gvec_udot_h_arm #define helper_gvec_usadd8 helper_gvec_usadd8_arm #define helper_gvec_usadd16 helper_gvec_usadd16_arm #define helper_gvec_usadd32 helper_gvec_usadd32_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index 4c39118b..59f2b322 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_armeb #define helper_gvec_sar32i helper_gvec_sar32i_armeb #define helper_gvec_sar64i helper_gvec_sar64i_armeb +#define helper_gvec_sdot_b helper_gvec_sdot_b_armeb +#define helper_gvec_sdot_h helper_gvec_sdot_h_armeb #define helper_gvec_shl8i helper_gvec_shl8i_armeb #define helper_gvec_shl16i helper_gvec_shl16i_armeb #define helper_gvec_shl32i helper_gvec_shl32i_armeb @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_armeb #define helper_gvec_sssub32 helper_gvec_sssub32_armeb #define helper_gvec_sssub64 helper_gvec_sssub64_armeb +#define helper_gvec_udot_b helper_gvec_udot_b_armeb +#define helper_gvec_udot_h helper_gvec_udot_h_armeb #define helper_gvec_usadd8 helper_gvec_usadd8_armeb #define helper_gvec_usadd16 helper_gvec_usadd16_armeb #define helper_gvec_usadd32 helper_gvec_usadd32_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 82ca02ac..df3745c7 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -1195,6 +1195,8 @@ symbols = ( 'helper_gvec_sar16i', 'helper_gvec_sar32i', 'helper_gvec_sar64i', + 'helper_gvec_sdot_b', + 'helper_gvec_sdot_h', 'helper_gvec_shl8i', 'helper_gvec_shl16i', 'helper_gvec_shl32i', @@ -1219,6 +1221,8 @@ symbols = ( 'helper_gvec_sssub16', 'helper_gvec_sssub32', 'helper_gvec_sssub64', + 'helper_gvec_udot_b', + 'helper_gvec_udot_h', 'helper_gvec_usadd8', 'helper_gvec_usadd16', 'helper_gvec_usadd32', diff --git a/qemu/m68k.h b/qemu/m68k.h index 86619375..06f2008e 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_m68k #define helper_gvec_sar32i helper_gvec_sar32i_m68k #define helper_gvec_sar64i helper_gvec_sar64i_m68k +#define helper_gvec_sdot_b helper_gvec_sdot_b_m68k +#define helper_gvec_sdot_h helper_gvec_sdot_h_m68k #define helper_gvec_shl8i helper_gvec_shl8i_m68k #define helper_gvec_shl16i helper_gvec_shl16i_m68k #define helper_gvec_shl32i helper_gvec_shl32i_m68k @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_m68k #define helper_gvec_sssub32 helper_gvec_sssub32_m68k #define helper_gvec_sssub64 helper_gvec_sssub64_m68k +#define helper_gvec_udot_b helper_gvec_udot_b_m68k +#define helper_gvec_udot_h helper_gvec_udot_h_m68k #define helper_gvec_usadd8 helper_gvec_usadd8_m68k #define helper_gvec_usadd16 helper_gvec_usadd16_m68k #define helper_gvec_usadd32 helper_gvec_usadd32_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 51eba449..1f48b68d 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips #define helper_gvec_sar32i helper_gvec_sar32i_mips #define helper_gvec_sar64i helper_gvec_sar64i_mips +#define helper_gvec_sdot_b helper_gvec_sdot_b_mips +#define helper_gvec_sdot_h helper_gvec_sdot_h_mips #define helper_gvec_shl8i helper_gvec_shl8i_mips #define helper_gvec_shl16i helper_gvec_shl16i_mips #define helper_gvec_shl32i helper_gvec_shl32i_mips @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_mips #define helper_gvec_sssub32 helper_gvec_sssub32_mips #define helper_gvec_sssub64 helper_gvec_sssub64_mips +#define helper_gvec_udot_b helper_gvec_udot_b_mips +#define helper_gvec_udot_h helper_gvec_udot_h_mips #define helper_gvec_usadd8 helper_gvec_usadd8_mips #define helper_gvec_usadd16 helper_gvec_usadd16_mips #define helper_gvec_usadd32 helper_gvec_usadd32_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 4cbf93e1..8476e51c 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips64 #define helper_gvec_sar32i helper_gvec_sar32i_mips64 #define helper_gvec_sar64i helper_gvec_sar64i_mips64 +#define helper_gvec_sdot_b helper_gvec_sdot_b_mips64 +#define helper_gvec_sdot_h helper_gvec_sdot_h_mips64 #define helper_gvec_shl8i helper_gvec_shl8i_mips64 #define helper_gvec_shl16i helper_gvec_shl16i_mips64 #define helper_gvec_shl32i helper_gvec_shl32i_mips64 @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_mips64 #define helper_gvec_sssub32 helper_gvec_sssub32_mips64 #define helper_gvec_sssub64 helper_gvec_sssub64_mips64 +#define helper_gvec_udot_b helper_gvec_udot_b_mips64 +#define helper_gvec_udot_h helper_gvec_udot_h_mips64 #define helper_gvec_usadd8 helper_gvec_usadd8_mips64 #define helper_gvec_usadd16 helper_gvec_usadd16_mips64 #define helper_gvec_usadd32 helper_gvec_usadd32_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index c5c4bf78..fb87de92 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mips64el #define helper_gvec_sar32i helper_gvec_sar32i_mips64el #define helper_gvec_sar64i helper_gvec_sar64i_mips64el +#define helper_gvec_sdot_b helper_gvec_sdot_b_mips64el +#define helper_gvec_sdot_h helper_gvec_sdot_h_mips64el #define helper_gvec_shl8i helper_gvec_shl8i_mips64el #define helper_gvec_shl16i helper_gvec_shl16i_mips64el #define helper_gvec_shl32i helper_gvec_shl32i_mips64el @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_mips64el #define helper_gvec_sssub32 helper_gvec_sssub32_mips64el #define helper_gvec_sssub64 helper_gvec_sssub64_mips64el +#define helper_gvec_udot_b helper_gvec_udot_b_mips64el +#define helper_gvec_udot_h helper_gvec_udot_h_mips64el #define helper_gvec_usadd8 helper_gvec_usadd8_mips64el #define helper_gvec_usadd16 helper_gvec_usadd16_mips64el #define helper_gvec_usadd32 helper_gvec_usadd32_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index 62348d11..654f8031 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_mipsel #define helper_gvec_sar32i helper_gvec_sar32i_mipsel #define helper_gvec_sar64i helper_gvec_sar64i_mipsel +#define helper_gvec_sdot_b helper_gvec_sdot_b_mipsel +#define helper_gvec_sdot_h helper_gvec_sdot_h_mipsel #define helper_gvec_shl8i helper_gvec_shl8i_mipsel #define helper_gvec_shl16i helper_gvec_shl16i_mipsel #define helper_gvec_shl32i helper_gvec_shl32i_mipsel @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_mipsel #define helper_gvec_sssub32 helper_gvec_sssub32_mipsel #define helper_gvec_sssub64 helper_gvec_sssub64_mipsel +#define helper_gvec_udot_b helper_gvec_udot_b_mipsel +#define helper_gvec_udot_h helper_gvec_udot_h_mipsel #define helper_gvec_usadd8 helper_gvec_usadd8_mipsel #define helper_gvec_usadd16 helper_gvec_usadd16_mipsel #define helper_gvec_usadd32 helper_gvec_usadd32_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index 20b2101d..3c8b3c92 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_powerpc #define helper_gvec_sar32i helper_gvec_sar32i_powerpc #define helper_gvec_sar64i helper_gvec_sar64i_powerpc +#define helper_gvec_sdot_b helper_gvec_sdot_b_powerpc +#define helper_gvec_sdot_h helper_gvec_sdot_h_powerpc #define helper_gvec_shl8i helper_gvec_shl8i_powerpc #define helper_gvec_shl16i helper_gvec_shl16i_powerpc #define helper_gvec_shl32i helper_gvec_shl32i_powerpc @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_powerpc #define helper_gvec_sssub32 helper_gvec_sssub32_powerpc #define helper_gvec_sssub64 helper_gvec_sssub64_powerpc +#define helper_gvec_udot_b helper_gvec_udot_b_powerpc +#define helper_gvec_udot_h helper_gvec_udot_h_powerpc #define helper_gvec_usadd8 helper_gvec_usadd8_powerpc #define helper_gvec_usadd16 helper_gvec_usadd16_powerpc #define helper_gvec_usadd32 helper_gvec_usadd32_powerpc diff --git a/qemu/sparc.h b/qemu/sparc.h index da0e643a..ce9f5855 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_sparc #define helper_gvec_sar32i helper_gvec_sar32i_sparc #define helper_gvec_sar64i helper_gvec_sar64i_sparc +#define helper_gvec_sdot_b helper_gvec_sdot_b_sparc +#define helper_gvec_sdot_h helper_gvec_sdot_h_sparc #define helper_gvec_shl8i helper_gvec_shl8i_sparc #define helper_gvec_shl16i helper_gvec_shl16i_sparc #define helper_gvec_shl32i helper_gvec_shl32i_sparc @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_sparc #define helper_gvec_sssub32 helper_gvec_sssub32_sparc #define helper_gvec_sssub64 helper_gvec_sssub64_sparc +#define helper_gvec_udot_b helper_gvec_udot_b_sparc +#define helper_gvec_udot_h helper_gvec_udot_h_sparc #define helper_gvec_usadd8 helper_gvec_usadd8_sparc #define helper_gvec_usadd16 helper_gvec_usadd16_sparc #define helper_gvec_usadd32 helper_gvec_usadd32_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index afdb2b58..9cc0f56d 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_sparc64 #define helper_gvec_sar32i helper_gvec_sar32i_sparc64 #define helper_gvec_sar64i helper_gvec_sar64i_sparc64 +#define helper_gvec_sdot_b helper_gvec_sdot_b_sparc64 +#define helper_gvec_sdot_h helper_gvec_sdot_h_sparc64 #define helper_gvec_shl8i helper_gvec_shl8i_sparc64 #define helper_gvec_shl16i helper_gvec_shl16i_sparc64 #define helper_gvec_shl32i helper_gvec_shl32i_sparc64 @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_sparc64 #define helper_gvec_sssub32 helper_gvec_sssub32_sparc64 #define helper_gvec_sssub64 helper_gvec_sssub64_sparc64 +#define helper_gvec_udot_b helper_gvec_udot_b_sparc64 +#define helper_gvec_udot_h helper_gvec_udot_h_sparc64 #define helper_gvec_usadd8 helper_gvec_usadd8_sparc64 #define helper_gvec_usadd16 helper_gvec_usadd16_sparc64 #define helper_gvec_usadd32 helper_gvec_usadd32_sparc64 diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index 2eac50cb..4455646d 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -585,6 +585,11 @@ DEF_HELPER_FLAGS_5(gvec_qrdmlah_s32, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sdot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_udot_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sdot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_udot_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_5(gvec_fcaddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcadds, TCG_CALL_NO_RWG, diff --git a/qemu/target/arm/sve.decode b/qemu/target/arm/sve.decode index 80c6c5ff..074a658c 100644 --- a/qemu/target/arm/sve.decode +++ b/qemu/target/arm/sve.decode @@ -735,6 +735,9 @@ UMIN_zzi 00100101 .. 101 011 110 ........ ..... @rdn_i8u # SVE integer multiply immediate (unpredicated) MUL_zzi 00100101 .. 110 000 110 ........ ..... @rdn_i8s +# SVE integer dot product (unpredicated) +DOT_zzz 01000100 1 sz:1 0 rm:5 00000 u:1 rn:5 rd:5 ra=%reg_movprfx + # SVE floating-point complex add (predicated) FCADD 01100100 esz:2 00000 rot:1 100 pg:3 rm:5 rd:5 \ rn=%reg_movprfx diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index 0128cd54..6bc55a7d 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -846,6 +846,24 @@ DO_ZZW(LSL, lsl) #undef DO_ZZW +static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn) +{ + static gen_helper_gvec_3 * const fns[2][2] = { + { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h }, + { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h } + }; + + if (sve_access_check(s)) { + TCGContext *tcg_ctx = s->uc->tcg_ctx; + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vsz, vsz, 0, fns[a->u][a->sz]); + } + return true; +} + /* *** SVE Integer Multiply-Add Group */ diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c index 16e57bf3..29f483bb 100644 --- a/qemu/target/arm/vec_helper.c +++ b/qemu/target/arm/vec_helper.c @@ -195,6 +195,73 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +/* Integer 8 and 16-bit dot-product. + * + * Note that for the loops herein, host endianness does not matter + * with respect to the ordering of data within the 64-bit lanes. + * All elements are treated equally, no matter where they are. + */ + +void HELPER(gvec_sdot_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint32_t *d = vd; + int8_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] += n[i * 4 + 0] * m[i * 4 + 0] + + n[i * 4 + 1] * m[i * 4 + 1] + + n[i * 4 + 2] * m[i * 4 + 2] + + n[i * 4 + 3] * m[i * 4 + 3]; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_udot_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint32_t *d = vd; + uint8_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 4; ++i) { + d[i] += n[i * 4 + 0] * m[i * 4 + 0] + + n[i * 4 + 1] * m[i * 4 + 1] + + n[i * 4 + 2] * m[i * 4 + 2] + + n[i * 4 + 3] * m[i * 4 + 3]; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_sdot_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd; + int16_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] += (int64_t)n[i * 4 + 0] * m[i * 4 + 0] + + (int64_t)n[i * 4 + 1] * m[i * 4 + 1] + + (int64_t)n[i * 4 + 2] * m[i * 4 + 2] + + (int64_t)n[i * 4 + 3] * m[i * 4 + 3]; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_udot_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd; + uint16_t *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + d[i] += (uint64_t)n[i * 4 + 0] * m[i * 4 + 0] + + (uint64_t)n[i * 4 + 1] * m[i * 4 + 1] + + (uint64_t)n[i * 4 + 2] * m[i * 4 + 2] + + (uint64_t)n[i * 4 + 3] * m[i * 4 + 3]; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm, void *vfpst, uint32_t desc) { diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 4c6e8e0f..8ab54ae0 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -1189,6 +1189,8 @@ #define helper_gvec_sar16i helper_gvec_sar16i_x86_64 #define helper_gvec_sar32i helper_gvec_sar32i_x86_64 #define helper_gvec_sar64i helper_gvec_sar64i_x86_64 +#define helper_gvec_sdot_b helper_gvec_sdot_b_x86_64 +#define helper_gvec_sdot_h helper_gvec_sdot_h_x86_64 #define helper_gvec_shl8i helper_gvec_shl8i_x86_64 #define helper_gvec_shl16i helper_gvec_shl16i_x86_64 #define helper_gvec_shl32i helper_gvec_shl32i_x86_64 @@ -1213,6 +1215,8 @@ #define helper_gvec_sssub16 helper_gvec_sssub16_x86_64 #define helper_gvec_sssub32 helper_gvec_sssub32_x86_64 #define helper_gvec_sssub64 helper_gvec_sssub64_x86_64 +#define helper_gvec_udot_b helper_gvec_udot_b_x86_64 +#define helper_gvec_udot_h helper_gvec_udot_h_x86_64 #define helper_gvec_usadd8 helper_gvec_usadd8_x86_64 #define helper_gvec_usadd16 helper_gvec_usadd16_x86_64 #define helper_gvec_usadd32 helper_gvec_usadd32_x86_64