diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 4273c459..2c6ac67a 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_aarch64 #define helper_neon_mul_u16 helper_neon_mul_u16_aarch64 #define helper_neon_mul_u8 helper_neon_mul_u8_aarch64 -#define helper_neon_mull_p8 helper_neon_mull_p8_aarch64 #define helper_neon_mull_s16 helper_neon_mull_s16_aarch64 #define helper_neon_mull_s8 helper_neon_mull_s8_aarch64 #define helper_neon_mull_u16 helper_neon_mull_u16_aarch64 @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_aarch64 #define helper_neon_pmin_u16 helper_neon_pmin_u16_aarch64 #define helper_neon_pmin_u8 helper_neon_pmin_u8_aarch64 +#define helper_neon_pmull_h helper_neon_pmull_h_aarch64 #define helper_neon_qabs_s16 helper_neon_qabs_s16_aarch64 #define helper_neon_qabs_s32 helper_neon_qabs_s32_aarch64 #define helper_neon_qabs_s64 helper_neon_qabs_s64_aarch64 @@ -4401,6 +4401,7 @@ #define helper_sve_zip_h helper_sve_zip_h_aarch64 #define helper_sve_zip_s helper_sve_zip_s_aarch64 #define helper_sve_zip_p helper_sve_zip_p_aarch64 +#define helper_sve2_pmull_h helper_sve2_pmull_h_aarch64 #define helper_udiv64 helper_udiv64_aarch64 #define helper_vfp_cmpd_a64 helper_vfp_cmpd_a64_aarch64 #define helper_vfp_cmped_a64 helper_vfp_cmped_a64_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 18057d81..076ff7e3 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_aarch64eb #define helper_neon_mul_u16 helper_neon_mul_u16_aarch64eb #define helper_neon_mul_u8 helper_neon_mul_u8_aarch64eb -#define helper_neon_mull_p8 helper_neon_mull_p8_aarch64eb #define helper_neon_mull_s16 helper_neon_mull_s16_aarch64eb #define helper_neon_mull_s8 helper_neon_mull_s8_aarch64eb #define helper_neon_mull_u16 helper_neon_mull_u16_aarch64eb @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_aarch64eb #define helper_neon_pmin_u16 helper_neon_pmin_u16_aarch64eb #define helper_neon_pmin_u8 helper_neon_pmin_u8_aarch64eb +#define helper_neon_pmull_h helper_neon_pmull_h_aarch64eb #define helper_neon_qabs_s16 helper_neon_qabs_s16_aarch64eb #define helper_neon_qabs_s32 helper_neon_qabs_s32_aarch64eb #define helper_neon_qabs_s64 helper_neon_qabs_s64_aarch64eb @@ -4401,6 +4401,7 @@ #define helper_sve_zip_h helper_sve_zip_h_aarch64eb #define helper_sve_zip_s helper_sve_zip_s_aarch64eb #define helper_sve_zip_p helper_sve_zip_p_aarch64eb +#define helper_sve2_pmull_h helper_sve2_pmull_h_aarch64eb #define helper_udiv64 helper_udiv64_aarch64eb #define helper_vfp_cmpd_a64 helper_vfp_cmpd_a64_aarch64eb #define helper_vfp_cmped_a64 helper_vfp_cmped_a64_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index f6150cec..bfc64fb3 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_arm #define helper_neon_mul_u16 helper_neon_mul_u16_arm #define helper_neon_mul_u8 helper_neon_mul_u8_arm -#define helper_neon_mull_p8 helper_neon_mull_p8_arm #define helper_neon_mull_s16 helper_neon_mull_s16_arm #define helper_neon_mull_s8 helper_neon_mull_s8_arm #define helper_neon_mull_u16 helper_neon_mull_u16_arm @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_arm #define helper_neon_pmin_u16 helper_neon_pmin_u16_arm #define helper_neon_pmin_u8 helper_neon_pmin_u8_arm +#define helper_neon_pmull_h helper_neon_pmull_h_arm #define helper_neon_qabs_s16 helper_neon_qabs_s16_arm #define helper_neon_qabs_s32 helper_neon_qabs_s32_arm #define helper_neon_qabs_s64 helper_neon_qabs_s64_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index dd09f2e3..de1363ff 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_armeb #define helper_neon_mul_u16 helper_neon_mul_u16_armeb #define helper_neon_mul_u8 helper_neon_mul_u8_armeb -#define helper_neon_mull_p8 helper_neon_mull_p8_armeb #define helper_neon_mull_s16 helper_neon_mull_s16_armeb #define helper_neon_mull_s8 helper_neon_mull_s8_armeb #define helper_neon_mull_u16 helper_neon_mull_u16_armeb @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_armeb #define helper_neon_pmin_u16 helper_neon_pmin_u16_armeb #define helper_neon_pmin_u8 helper_neon_pmin_u8_armeb +#define helper_neon_pmull_h helper_neon_pmull_h_armeb #define helper_neon_qabs_s16 helper_neon_qabs_s16_armeb #define helper_neon_qabs_s32 helper_neon_qabs_s32_armeb #define helper_neon_qabs_s64 helper_neon_qabs_s64_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index fd588eea..837fe4a6 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -1535,7 +1535,6 @@ symbols = ( 'helper_neon_min_u8', 'helper_neon_mul_u16', 'helper_neon_mul_u8', - 'helper_neon_mull_p8', 'helper_neon_mull_s16', 'helper_neon_mull_s8', 'helper_neon_mull_u16', @@ -1566,6 +1565,7 @@ symbols = ( 'helper_neon_pmin_s8', 'helper_neon_pmin_u16', 'helper_neon_pmin_u8', + 'helper_neon_pmull_h', 'helper_neon_qabs_s16', 'helper_neon_qabs_s32', 'helper_neon_qabs_s64', @@ -4463,6 +4463,7 @@ aarch64_symbols = ( 'helper_sve_zip_h', 'helper_sve_zip_s', 'helper_sve_zip_p', + 'helper_sve2_pmull_h', 'helper_udiv64', 'helper_vfp_cmpd_a64', 'helper_vfp_cmped_a64', diff --git a/qemu/m68k.h b/qemu/m68k.h index 5e5d870d..643d05f7 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_m68k #define helper_neon_mul_u16 helper_neon_mul_u16_m68k #define helper_neon_mul_u8 helper_neon_mul_u8_m68k -#define helper_neon_mull_p8 helper_neon_mull_p8_m68k #define helper_neon_mull_s16 helper_neon_mull_s16_m68k #define helper_neon_mull_s8 helper_neon_mull_s8_m68k #define helper_neon_mull_u16 helper_neon_mull_u16_m68k @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_m68k #define helper_neon_pmin_u16 helper_neon_pmin_u16_m68k #define helper_neon_pmin_u8 helper_neon_pmin_u8_m68k +#define helper_neon_pmull_h helper_neon_pmull_h_m68k #define helper_neon_qabs_s16 helper_neon_qabs_s16_m68k #define helper_neon_qabs_s32 helper_neon_qabs_s32_m68k #define helper_neon_qabs_s64 helper_neon_qabs_s64_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 578d7dd1..acc4146e 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_mips #define helper_neon_mul_u16 helper_neon_mul_u16_mips #define helper_neon_mul_u8 helper_neon_mul_u8_mips -#define helper_neon_mull_p8 helper_neon_mull_p8_mips #define helper_neon_mull_s16 helper_neon_mull_s16_mips #define helper_neon_mull_s8 helper_neon_mull_s8_mips #define helper_neon_mull_u16 helper_neon_mull_u16_mips @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_mips #define helper_neon_pmin_u16 helper_neon_pmin_u16_mips #define helper_neon_pmin_u8 helper_neon_pmin_u8_mips +#define helper_neon_pmull_h helper_neon_pmull_h_mips #define helper_neon_qabs_s16 helper_neon_qabs_s16_mips #define helper_neon_qabs_s32 helper_neon_qabs_s32_mips #define helper_neon_qabs_s64 helper_neon_qabs_s64_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 8652d37e..4058f353 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_mips64 #define helper_neon_mul_u16 helper_neon_mul_u16_mips64 #define helper_neon_mul_u8 helper_neon_mul_u8_mips64 -#define helper_neon_mull_p8 helper_neon_mull_p8_mips64 #define helper_neon_mull_s16 helper_neon_mull_s16_mips64 #define helper_neon_mull_s8 helper_neon_mull_s8_mips64 #define helper_neon_mull_u16 helper_neon_mull_u16_mips64 @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_mips64 #define helper_neon_pmin_u16 helper_neon_pmin_u16_mips64 #define helper_neon_pmin_u8 helper_neon_pmin_u8_mips64 +#define helper_neon_pmull_h helper_neon_pmull_h_mips64 #define helper_neon_qabs_s16 helper_neon_qabs_s16_mips64 #define helper_neon_qabs_s32 helper_neon_qabs_s32_mips64 #define helper_neon_qabs_s64 helper_neon_qabs_s64_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 051bb0af..581e41ef 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_mips64el #define helper_neon_mul_u16 helper_neon_mul_u16_mips64el #define helper_neon_mul_u8 helper_neon_mul_u8_mips64el -#define helper_neon_mull_p8 helper_neon_mull_p8_mips64el #define helper_neon_mull_s16 helper_neon_mull_s16_mips64el #define helper_neon_mull_s8 helper_neon_mull_s8_mips64el #define helper_neon_mull_u16 helper_neon_mull_u16_mips64el @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_mips64el #define helper_neon_pmin_u16 helper_neon_pmin_u16_mips64el #define helper_neon_pmin_u8 helper_neon_pmin_u8_mips64el +#define helper_neon_pmull_h helper_neon_pmull_h_mips64el #define helper_neon_qabs_s16 helper_neon_qabs_s16_mips64el #define helper_neon_qabs_s32 helper_neon_qabs_s32_mips64el #define helper_neon_qabs_s64 helper_neon_qabs_s64_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index f303f30a..e4ae3f39 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_mipsel #define helper_neon_mul_u16 helper_neon_mul_u16_mipsel #define helper_neon_mul_u8 helper_neon_mul_u8_mipsel -#define helper_neon_mull_p8 helper_neon_mull_p8_mipsel #define helper_neon_mull_s16 helper_neon_mull_s16_mipsel #define helper_neon_mull_s8 helper_neon_mull_s8_mipsel #define helper_neon_mull_u16 helper_neon_mull_u16_mipsel @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_mipsel #define helper_neon_pmin_u16 helper_neon_pmin_u16_mipsel #define helper_neon_pmin_u8 helper_neon_pmin_u8_mipsel +#define helper_neon_pmull_h helper_neon_pmull_h_mipsel #define helper_neon_qabs_s16 helper_neon_qabs_s16_mipsel #define helper_neon_qabs_s32 helper_neon_qabs_s32_mipsel #define helper_neon_qabs_s64 helper_neon_qabs_s64_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index 663ad749..5b712ba5 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_powerpc #define helper_neon_mul_u16 helper_neon_mul_u16_powerpc #define helper_neon_mul_u8 helper_neon_mul_u8_powerpc -#define helper_neon_mull_p8 helper_neon_mull_p8_powerpc #define helper_neon_mull_s16 helper_neon_mull_s16_powerpc #define helper_neon_mull_s8 helper_neon_mull_s8_powerpc #define helper_neon_mull_u16 helper_neon_mull_u16_powerpc @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_powerpc #define helper_neon_pmin_u16 helper_neon_pmin_u16_powerpc #define helper_neon_pmin_u8 helper_neon_pmin_u8_powerpc +#define helper_neon_pmull_h helper_neon_pmull_h_powerpc #define helper_neon_qabs_s16 helper_neon_qabs_s16_powerpc #define helper_neon_qabs_s32 helper_neon_qabs_s32_powerpc #define helper_neon_qabs_s64 helper_neon_qabs_s64_powerpc diff --git a/qemu/riscv32.h b/qemu/riscv32.h index 5e66f042..56dd4025 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_riscv32 #define helper_neon_mul_u16 helper_neon_mul_u16_riscv32 #define helper_neon_mul_u8 helper_neon_mul_u8_riscv32 -#define helper_neon_mull_p8 helper_neon_mull_p8_riscv32 #define helper_neon_mull_s16 helper_neon_mull_s16_riscv32 #define helper_neon_mull_s8 helper_neon_mull_s8_riscv32 #define helper_neon_mull_u16 helper_neon_mull_u16_riscv32 @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_riscv32 #define helper_neon_pmin_u16 helper_neon_pmin_u16_riscv32 #define helper_neon_pmin_u8 helper_neon_pmin_u8_riscv32 +#define helper_neon_pmull_h helper_neon_pmull_h_riscv32 #define helper_neon_qabs_s16 helper_neon_qabs_s16_riscv32 #define helper_neon_qabs_s32 helper_neon_qabs_s32_riscv32 #define helper_neon_qabs_s64 helper_neon_qabs_s64_riscv32 diff --git a/qemu/riscv64.h b/qemu/riscv64.h index ee9c4d34..070e5824 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_riscv64 #define helper_neon_mul_u16 helper_neon_mul_u16_riscv64 #define helper_neon_mul_u8 helper_neon_mul_u8_riscv64 -#define helper_neon_mull_p8 helper_neon_mull_p8_riscv64 #define helper_neon_mull_s16 helper_neon_mull_s16_riscv64 #define helper_neon_mull_s8 helper_neon_mull_s8_riscv64 #define helper_neon_mull_u16 helper_neon_mull_u16_riscv64 @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_riscv64 #define helper_neon_pmin_u16 helper_neon_pmin_u16_riscv64 #define helper_neon_pmin_u8 helper_neon_pmin_u8_riscv64 +#define helper_neon_pmull_h helper_neon_pmull_h_riscv64 #define helper_neon_qabs_s16 helper_neon_qabs_s16_riscv64 #define helper_neon_qabs_s32 helper_neon_qabs_s32_riscv64 #define helper_neon_qabs_s64 helper_neon_qabs_s64_riscv64 diff --git a/qemu/sparc.h b/qemu/sparc.h index d0d6eb0f..ffb965be 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_sparc #define helper_neon_mul_u16 helper_neon_mul_u16_sparc #define helper_neon_mul_u8 helper_neon_mul_u8_sparc -#define helper_neon_mull_p8 helper_neon_mull_p8_sparc #define helper_neon_mull_s16 helper_neon_mull_s16_sparc #define helper_neon_mull_s8 helper_neon_mull_s8_sparc #define helper_neon_mull_u16 helper_neon_mull_u16_sparc @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_sparc #define helper_neon_pmin_u16 helper_neon_pmin_u16_sparc #define helper_neon_pmin_u8 helper_neon_pmin_u8_sparc +#define helper_neon_pmull_h helper_neon_pmull_h_sparc #define helper_neon_qabs_s16 helper_neon_qabs_s16_sparc #define helper_neon_qabs_s32 helper_neon_qabs_s32_sparc #define helper_neon_qabs_s64 helper_neon_qabs_s64_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 3454c034..b823f0c7 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_sparc64 #define helper_neon_mul_u16 helper_neon_mul_u16_sparc64 #define helper_neon_mul_u8 helper_neon_mul_u8_sparc64 -#define helper_neon_mull_p8 helper_neon_mull_p8_sparc64 #define helper_neon_mull_s16 helper_neon_mull_s16_sparc64 #define helper_neon_mull_s8 helper_neon_mull_s8_sparc64 #define helper_neon_mull_u16 helper_neon_mull_u16_sparc64 @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_sparc64 #define helper_neon_pmin_u16 helper_neon_pmin_u16_sparc64 #define helper_neon_pmin_u8 helper_neon_pmin_u8_sparc64 +#define helper_neon_pmull_h helper_neon_pmull_h_sparc64 #define helper_neon_qabs_s16 helper_neon_qabs_s16_sparc64 #define helper_neon_qabs_s32 helper_neon_qabs_s32_sparc64 #define helper_neon_qabs_s64 helper_neon_qabs_s64_sparc64 diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h index 9e79182a..2f472791 100644 --- a/qemu/target/arm/helper-sve.h +++ b/qemu/target/arm/helper-sve.h @@ -1574,3 +1574,5 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index 0271d075..c7f89b0f 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -339,7 +339,6 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32) DEF_HELPER_2(neon_sub_u16, i32, i32, i32) DEF_HELPER_2(neon_mul_u8, i32, i32, i32) DEF_HELPER_2(neon_mul_u16, i32, i32, i32) -DEF_HELPER_2(neon_mull_p8, i64, i32, i32) DEF_HELPER_2(neon_tst_u8, i32, i32, i32) DEF_HELPER_2(neon_tst_u16, i32, i32, i32) @@ -692,6 +691,8 @@ DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + #ifdef TARGET_ARM #define helper_clz helper_clz_arm #define gen_helper_clz gen_helper_clz_arm diff --git a/qemu/target/arm/neon_helper.c b/qemu/target/arm/neon_helper.c index 540cefa3..8cf09dd1 100644 --- a/qemu/target/arm/neon_helper.c +++ b/qemu/target/arm/neon_helper.c @@ -1141,39 +1141,6 @@ NEON_VOP(mul_u8, neon_u8, 4) NEON_VOP(mul_u16, neon_u16, 2) #undef NEON_FN -/* Polynomial multiplication is like integer multiplication except the - partial products are XORed, not added. */ - -uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2) -{ - uint64_t result = 0; - uint64_t mask; - uint64_t op2ex = op2; - op2ex = (op2ex & 0xff) | - ((op2ex & 0xff00) << 8) | - ((op2ex & 0xff0000) << 16) | - ((op2ex & 0xff000000) << 24); - while (op1) { - mask = 0; - if (op1 & 1) { - mask |= 0xffff; - } - if (op1 & (1 << 8)) { - mask |= (0xffffU << 16); - } - if (op1 & (1 << 16)) { - mask |= (0xffffULL << 32); - } - if (op1 & (1 << 24)) { - mask |= (0xffffULL << 48); - } - result ^= op2ex & mask; - op1 = (op1 >> 1) & 0x7f7f7f7f; - op2ex <<= 1; - } - return result; -} - #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 NEON_VOP(tst_u8, neon_u8, 4) NEON_VOP(tst_u16, neon_u16, 2) diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 72fc409d..6121c1ed 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -10830,10 +10830,6 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, gen_helper_neon_addl_saturate_s32(tcg_ctx, tcg_passres, tcg_ctx->cpu_env, tcg_passres, tcg_passres); break; - case 14: /* PMULL */ - assert(size == 0); - gen_helper_neon_mull_p8(tcg_ctx, tcg_passres, tcg_op1, tcg_op2); - break; default: g_assert_not_reached(); } @@ -10999,11 +10995,21 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); break; case 14: /* PMULL, PMULL2 */ - if (is_u || size == 1 || size == 2) { + if (is_u) { unallocated_encoding(s); return; } - if (size == 3) { + switch (size) { + case 0: /* PMULL.P8 */ + if (!fp_access_check(s)) { + return; + } + /* The Q field specifies lo/hi half input for this insn. */ + gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, + gen_helper_neon_pmull_h); + break; + + case 3: /* PMULL.P64 */ if (!dc_isar_feature(aa64_pmull, s)) { unallocated_encoding(s); return; @@ -11014,9 +11020,13 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) /* The Q field specifies lo/hi half input for this insn. */ gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, gen_helper_gvec_pmull_q); - return; + break; + + default: + unallocated_encoding(s); + break; } - goto is_widening; + return; case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ @@ -11037,7 +11047,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - is_widening: if (!fp_access_check(s)) { return; } diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 4135f3d6..a5d53ad5 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -5999,15 +5999,20 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } - /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply) - * outside the loop below as it only performs a single pass. - */ - if (op == 14 && size == 2) { - if (!dc_isar_feature(aa32_pmull, s)) { - return 1; + /* Handle polynomial VMULL in a single pass. */ + if (op == 14) { + if (size == 0) { + /* VMULL.P8 */ + tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16, + 0, gen_helper_neon_pmull_h); + } else { + /* VMULL.P64 */ + if (!dc_isar_feature(aa32_pmull, s)) { + return 1; + } + tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16, + 0, gen_helper_gvec_pmull_q); } - tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, 16, 16, - 0, gen_helper_gvec_pmull_q); return 0; } @@ -6085,11 +6090,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */ gen_neon_mull(s, s->V0, tmp, tmp2, size, u); break; - case 14: /* Polynomial VMULL */ - gen_helper_neon_mull_p8(tcg_ctx, s->V0, tmp, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp2); - tcg_temp_free_i32(tcg_ctx, tmp); - break; default: /* 15 is RESERVED: caught earlier */ abort(); } diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c index 6bf1ec09..e2ca4a42 100644 --- a/qemu/target/arm/vec_helper.c +++ b/qemu/target/arm/vec_helper.c @@ -1198,3 +1198,63 @@ void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc) } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +/* + * 8x8->16 polynomial multiply. + * + * The byte inputs are expanded to (or extracted from) half-words. + * Note that neon and sve2 get the inputs from different positions. + * This allows 4 bytes to be processed in parallel with uint64_t. + */ + +static uint64_t expand_byte_to_half(uint64_t x) +{ + return (x & 0x000000ff) + | ((x & 0x0000ff00) << 8) + | ((x & 0x00ff0000) << 16) + | ((x & 0xff000000) << 24); +} + +static uint64_t pmull_h(uint64_t op1, uint64_t op2) +{ + uint64_t result = 0; + int i; + + for (i = 0; i < 8; ++i) { + uint64_t mask = (op1 & 0x0001000100010001ull) * 0xffff; + result ^= op2 & mask; + op1 >>= 1; + op2 <<= 1; + } + return result; +} + +void HELPER(neon_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + int hi = simd_data(desc); + uint64_t *d = vd, *n = vn, *m = vm; + uint64_t nn = n[hi], mm = m[hi]; + + d[0] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm)); + nn >>= 32; + mm >>= 32; + d[1] = pmull_h(expand_byte_to_half(nn), expand_byte_to_half(mm)); + + clear_tail(d, 16, simd_maxsz(desc)); +} + +#ifdef TARGET_AARCH64 +void HELPER(sve2_pmull_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + int shift = simd_data(desc) * 8; + intptr_t i, opr_sz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 8; ++i) { + uint64_t nn = (n[i] >> shift) & 0x00ff00ff00ff00ffull; + uint64_t mm = (m[i] >> shift) & 0x00ff00ff00ff00ffull; + + d[i] = pmull_h(nn, mm); + } +} +#endif diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 5a154d5e..93c0b9f7 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -1529,7 +1529,6 @@ #define helper_neon_min_u8 helper_neon_min_u8_x86_64 #define helper_neon_mul_u16 helper_neon_mul_u16_x86_64 #define helper_neon_mul_u8 helper_neon_mul_u8_x86_64 -#define helper_neon_mull_p8 helper_neon_mull_p8_x86_64 #define helper_neon_mull_s16 helper_neon_mull_s16_x86_64 #define helper_neon_mull_s8 helper_neon_mull_s8_x86_64 #define helper_neon_mull_u16 helper_neon_mull_u16_x86_64 @@ -1560,6 +1559,7 @@ #define helper_neon_pmin_s8 helper_neon_pmin_s8_x86_64 #define helper_neon_pmin_u16 helper_neon_pmin_u16_x86_64 #define helper_neon_pmin_u8 helper_neon_pmin_u8_x86_64 +#define helper_neon_pmull_h helper_neon_pmull_h_x86_64 #define helper_neon_qabs_s16 helper_neon_qabs_s16_x86_64 #define helper_neon_qabs_s32 helper_neon_qabs_s32_x86_64 #define helper_neon_qabs_s64 helper_neon_qabs_s64_x86_64