From d3139f2f0a34168c2a5f12600b589c96b9aad1fd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 21 Mar 2020 18:55:12 -0400 Subject: [PATCH] target/arm: Vectorize USHL and SSHL These instructions shift left or right depending on the sign of the input, and 7 bits are significant to the shift. This requires several masks and selects in addition to the actual shifts to form the complete answer. That said, the operation is still a small improvement even for two 64-bit elements -- 13 vector operations instead of 2 * 7 integer operations. Backports commit 87b74e8b6edd287ea2160caa0ebea725fa8f1ca1 from qemu --- qemu/aarch64.h | 16 +- qemu/aarch64eb.h | 16 +- qemu/arm.h | 12 +- qemu/armeb.h | 12 +- qemu/header_gen.py | 18 +- qemu/m68k.h | 10 +- qemu/mips.h | 10 +- qemu/mips64.h | 10 +- qemu/mips64el.h | 10 +- qemu/mipsel.h | 10 +- qemu/powerpc.h | 10 +- qemu/riscv32.h | 10 +- qemu/riscv64.h | 10 +- qemu/sparc.h | 10 +- qemu/sparc64.h | 10 +- qemu/target/arm/helper.h | 11 +- qemu/target/arm/neon_helper.c | 33 ---- qemu/target/arm/translate-a64.c | 18 +- qemu/target/arm/translate.c | 299 ++++++++++++++++++++++++++++++-- qemu/target/arm/translate.h | 8 +- qemu/target/arm/vec_helper.c | 88 ++++++++++ qemu/x86_64.h | 10 +- 22 files changed, 478 insertions(+), 163 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index fa79ae91..9a6cd271 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_aarch64 #define helper_gvec_sqsub_h helper_gvec_sqsub_h_aarch64 #define helper_gvec_sqsub_s helper_gvec_sqsub_s_aarch64 +#define helper_gvec_sshl_b helper_gvec_sshl_b_aarch64 +#define helper_gvec_sshl_h helper_gvec_sshl_h_aarch64 #define helper_gvec_sub8 helper_gvec_sub8_aarch64 #define helper_gvec_sub16 helper_gvec_sub16_aarch64 #define helper_gvec_sub32 helper_gvec_sub32_aarch64 @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_aarch64 #define helper_gvec_usadd32 helper_gvec_usadd32_aarch64 #define helper_gvec_usadd64 helper_gvec_usadd64_aarch64 +#define helper_gvec_ushl_b helper_gvec_ushl_b_aarch64 +#define helper_gvec_ushl_h helper_gvec_ushl_h_aarch64 #define helper_gvec_ussub8 helper_gvec_ussub8_aarch64 #define helper_gvec_ussub16 helper_gvec_ussub16_aarch64 #define helper_gvec_ussub32 helper_gvec_ussub32_aarch64 @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_aarch64 #define helper_neon_rshl_u8 helper_neon_rshl_u8_aarch64 #define helper_neon_shl_s16 helper_neon_shl_s16_aarch64 -#define helper_neon_shl_s32 helper_neon_shl_s32_aarch64 -#define helper_neon_shl_s64 helper_neon_shl_s64_aarch64 -#define helper_neon_shl_s8 helper_neon_shl_s8_aarch64 #define helper_neon_shl_u16 helper_neon_shl_u16_aarch64 -#define helper_neon_shl_u32 helper_neon_shl_u32_aarch64 -#define helper_neon_shl_u64 helper_neon_shl_u64_aarch64 -#define helper_neon_shl_u8 helper_neon_shl_u8_aarch64 #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_aarch64 #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_aarch64 #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_aarch64 @@ -3428,6 +3426,10 @@ #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64 #define gen_cmtst_i64 gen_cmtst_i64_aarch64 #define get_phys_addr get_phys_addr_aarch64 +#define gen_sshl_i32 gen_sshl_i32_aarch64 +#define gen_sshl_i64 gen_sshl_i64_aarch64 +#define gen_ushl_i32 gen_ushl_i32_aarch64 +#define gen_ushl_i64 gen_ushl_i64_aarch64 #define pmu_init pmu_init_aarch64 #define helper_advsimd_acge_f16 helper_advsimd_acge_f16_aarch64 #define helper_advsimd_acgt_f16 helper_advsimd_acgt_f16_aarch64 @@ -4428,6 +4430,7 @@ #define sli_op sli_op_aarch64 #define sqadd_op sqadd_op_aarch64 #define sqsub_op sqsub_op_aarch64 +#define sshl_op sshl_op_aarch64 #define ssra_op ssra_op_aarch64 #define sri_op sri_op_aarch64 #define sve_access_check sve_access_check_aarch64 @@ -4436,6 +4439,7 @@ #define unallocated_encoding unallocated_encoding_aarch64 #define uqadd_op uqadd_op_aarch64 #define uqsub_op uqsub_op_aarch64 +#define ushl_op ushl_op_aarch64 #define usra_op usra_op_aarch64 #define v8m_security_lookup v8m_security_lookup_aarch64 #define vfp_expand_imm vfp_expand_imm_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 383eb560..61ef9044 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_aarch64eb #define helper_gvec_sqsub_h helper_gvec_sqsub_h_aarch64eb #define helper_gvec_sqsub_s helper_gvec_sqsub_s_aarch64eb +#define helper_gvec_sshl_b helper_gvec_sshl_b_aarch64eb +#define helper_gvec_sshl_h helper_gvec_sshl_h_aarch64eb #define helper_gvec_sub8 helper_gvec_sub8_aarch64eb #define helper_gvec_sub16 helper_gvec_sub16_aarch64eb #define helper_gvec_sub32 helper_gvec_sub32_aarch64eb @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_aarch64eb #define helper_gvec_usadd32 helper_gvec_usadd32_aarch64eb #define helper_gvec_usadd64 helper_gvec_usadd64_aarch64eb +#define helper_gvec_ushl_b helper_gvec_ushl_b_aarch64eb +#define helper_gvec_ushl_h helper_gvec_ushl_h_aarch64eb #define helper_gvec_ussub8 helper_gvec_ussub8_aarch64eb #define helper_gvec_ussub16 helper_gvec_ussub16_aarch64eb #define helper_gvec_ussub32 helper_gvec_ussub32_aarch64eb @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_aarch64eb #define helper_neon_rshl_u8 helper_neon_rshl_u8_aarch64eb #define helper_neon_shl_s16 helper_neon_shl_s16_aarch64eb -#define helper_neon_shl_s32 helper_neon_shl_s32_aarch64eb -#define helper_neon_shl_s64 helper_neon_shl_s64_aarch64eb -#define helper_neon_shl_s8 helper_neon_shl_s8_aarch64eb #define helper_neon_shl_u16 helper_neon_shl_u16_aarch64eb -#define helper_neon_shl_u32 helper_neon_shl_u32_aarch64eb -#define helper_neon_shl_u64 helper_neon_shl_u64_aarch64eb -#define helper_neon_shl_u8 helper_neon_shl_u8_aarch64eb #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_aarch64eb #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_aarch64eb #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_aarch64eb @@ -3428,6 +3426,10 @@ #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64eb #define gen_cmtst_i64 gen_cmtst_i64_aarch64eb #define get_phys_addr get_phys_addr_aarch64eb +#define gen_sshl_i32 gen_sshl_i32_aarch64eb +#define gen_sshl_i64 gen_sshl_i64_aarch64eb +#define gen_ushl_i32 gen_ushl_i32_aarch64eb +#define gen_ushl_i64 gen_ushl_i64_aarch64eb #define pmu_init pmu_init_aarch64eb #define helper_advsimd_acge_f16 helper_advsimd_acge_f16_aarch64eb #define helper_advsimd_acgt_f16 helper_advsimd_acgt_f16_aarch64eb @@ -4428,6 +4430,7 @@ #define sli_op sli_op_aarch64eb #define sqadd_op sqadd_op_aarch64eb #define sqsub_op sqsub_op_aarch64eb +#define sshl_op sshl_op_aarch64eb #define ssra_op ssra_op_aarch64eb #define sri_op sri_op_aarch64eb #define sve_access_check sve_access_check_aarch64eb @@ -4436,6 +4439,7 @@ #define unallocated_encoding unallocated_encoding_aarch64eb #define uqadd_op uqadd_op_aarch64eb #define uqsub_op uqsub_op_aarch64eb +#define ushl_op ushl_op_aarch64eb #define usra_op usra_op_aarch64eb #define v8m_security_lookup v8m_security_lookup_aarch64eb #define vfp_expand_imm vfp_expand_imm_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index cfd2667b..cfc9fd40 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_arm #define helper_gvec_sqsub_h helper_gvec_sqsub_h_arm #define helper_gvec_sqsub_s helper_gvec_sqsub_s_arm +#define helper_gvec_sshl_b helper_gvec_sshl_b_arm +#define helper_gvec_sshl_h helper_gvec_sshl_h_arm #define helper_gvec_sub8 helper_gvec_sub8_arm #define helper_gvec_sub16 helper_gvec_sub16_arm #define helper_gvec_sub32 helper_gvec_sub32_arm @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_arm #define helper_gvec_usadd32 helper_gvec_usadd32_arm #define helper_gvec_usadd64 helper_gvec_usadd64_arm +#define helper_gvec_ushl_b helper_gvec_ushl_b_arm +#define helper_gvec_ushl_h helper_gvec_ushl_h_arm #define helper_gvec_ussub8 helper_gvec_ussub8_arm #define helper_gvec_ussub16 helper_gvec_ussub16_arm #define helper_gvec_ussub32 helper_gvec_ussub32_arm @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_arm #define helper_neon_rshl_u8 helper_neon_rshl_u8_arm #define helper_neon_shl_s16 helper_neon_shl_s16_arm -#define helper_neon_shl_s32 helper_neon_shl_s32_arm -#define helper_neon_shl_s64 helper_neon_shl_s64_arm -#define helper_neon_shl_s8 helper_neon_shl_s8_arm #define helper_neon_shl_u16 helper_neon_shl_u16_arm -#define helper_neon_shl_u32 helper_neon_shl_u32_arm -#define helper_neon_shl_u64 helper_neon_shl_u64_arm -#define helper_neon_shl_u8 helper_neon_shl_u8_arm #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_arm #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_arm #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_arm @@ -3428,12 +3426,14 @@ #define sli_op sli_op_arm #define sqadd_op sqadd_op_arm #define sqsub_op sqsub_op_arm +#define sshl_op sshl_op_arm #define ssra_op ssra_op_arm #define sri_op sri_op_arm #define sve_exception_el sve_exception_el_arm #define sve_zcr_len_for_el sve_zcr_len_for_el_arm #define uqadd_op uqadd_op_arm #define uqsub_op uqsub_op_arm +#define ushl_op ushl_op_arm #define usra_op usra_op_arm #define v8m_security_lookup v8m_security_lookup_arm #define vfp_expand_imm vfp_expand_imm_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index 0d1dd856..7c99cb41 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_armeb #define helper_gvec_sqsub_h helper_gvec_sqsub_h_armeb #define helper_gvec_sqsub_s helper_gvec_sqsub_s_armeb +#define helper_gvec_sshl_b helper_gvec_sshl_b_armeb +#define helper_gvec_sshl_h helper_gvec_sshl_h_armeb #define helper_gvec_sub8 helper_gvec_sub8_armeb #define helper_gvec_sub16 helper_gvec_sub16_armeb #define helper_gvec_sub32 helper_gvec_sub32_armeb @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_armeb #define helper_gvec_usadd32 helper_gvec_usadd32_armeb #define helper_gvec_usadd64 helper_gvec_usadd64_armeb +#define helper_gvec_ushl_b helper_gvec_ushl_b_armeb +#define helper_gvec_ushl_h helper_gvec_ushl_h_armeb #define helper_gvec_ussub8 helper_gvec_ussub8_armeb #define helper_gvec_ussub16 helper_gvec_ussub16_armeb #define helper_gvec_ussub32 helper_gvec_ussub32_armeb @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_armeb #define helper_neon_rshl_u8 helper_neon_rshl_u8_armeb #define helper_neon_shl_s16 helper_neon_shl_s16_armeb -#define helper_neon_shl_s32 helper_neon_shl_s32_armeb -#define helper_neon_shl_s64 helper_neon_shl_s64_armeb -#define helper_neon_shl_s8 helper_neon_shl_s8_armeb #define helper_neon_shl_u16 helper_neon_shl_u16_armeb -#define helper_neon_shl_u32 helper_neon_shl_u32_armeb -#define helper_neon_shl_u64 helper_neon_shl_u64_armeb -#define helper_neon_shl_u8 helper_neon_shl_u8_armeb #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_armeb #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_armeb #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_armeb @@ -3428,12 +3426,14 @@ #define sli_op sli_op_armeb #define sqadd_op sqadd_op_armeb #define sqsub_op sqsub_op_armeb +#define sshl_op sshl_op_armeb #define ssra_op ssra_op_armeb #define sri_op sri_op_armeb #define sve_exception_el sve_exception_el_armeb #define sve_zcr_len_for_el sve_zcr_len_for_el_armeb #define uqadd_op uqadd_op_armeb #define uqsub_op uqsub_op_armeb +#define ushl_op ushl_op_armeb #define usra_op usra_op_armeb #define v8m_security_lookup v8m_security_lookup_armeb #define vfp_expand_imm vfp_expand_imm_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 58f9de8f..116e84f8 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -1269,6 +1269,8 @@ symbols = ( 'helper_gvec_sqsub_d', 'helper_gvec_sqsub_h', 'helper_gvec_sqsub_s', + 'helper_gvec_sshl_b', + 'helper_gvec_sshl_h', 'helper_gvec_sub8', 'helper_gvec_sub16', 'helper_gvec_sub32', @@ -1309,6 +1311,8 @@ symbols = ( 'helper_gvec_usadd16', 'helper_gvec_usadd32', 'helper_gvec_usadd64', + 'helper_gvec_ushl_b', + 'helper_gvec_ushl_h', 'helper_gvec_ussub8', 'helper_gvec_ussub16', 'helper_gvec_ussub32', @@ -1637,13 +1641,7 @@ symbols = ( 'helper_neon_rshl_u64', 'helper_neon_rshl_u8', 'helper_neon_shl_s16', - 'helper_neon_shl_s32', - 'helper_neon_shl_s64', - 'helper_neon_shl_s8', 'helper_neon_shl_u16', - 'helper_neon_shl_u32', - 'helper_neon_shl_u64', - 'helper_neon_shl_u8', 'helper_neon_sqadd_u16', 'helper_neon_sqadd_u32', 'helper_neon_sqadd_u64', @@ -3437,12 +3435,14 @@ arm_symbols = ( 'sli_op', 'sqadd_op', 'sqsub_op', + 'sshl_op', 'ssra_op', 'sri_op', 'sve_exception_el', 'sve_zcr_len_for_el', 'uqadd_op', 'uqsub_op', + 'ushl_op', 'usra_op', 'v8m_security_lookup', 'vfp_expand_imm', @@ -3488,6 +3488,10 @@ aarch64_symbols = ( 'gen_a64_set_pc_im', 'gen_cmtst_i64', 'get_phys_addr', + 'gen_sshl_i32', + 'gen_sshl_i64', + 'gen_ushl_i32', + 'gen_ushl_i64', 'pmu_init', 'helper_advsimd_acge_f16', 'helper_advsimd_acgt_f16', @@ -4488,6 +4492,7 @@ aarch64_symbols = ( 'sli_op', 'sqadd_op', 'sqsub_op', + 'sshl_op', 'ssra_op', 'sri_op', 'sve_access_check', @@ -4496,6 +4501,7 @@ aarch64_symbols = ( 'unallocated_encoding', 'uqadd_op', 'uqsub_op', + 'ushl_op', 'usra_op', 'v8m_security_lookup', 'vfp_expand_imm', diff --git a/qemu/m68k.h b/qemu/m68k.h index c2b91f6e..86b3dc99 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_m68k #define helper_gvec_sqsub_h helper_gvec_sqsub_h_m68k #define helper_gvec_sqsub_s helper_gvec_sqsub_s_m68k +#define helper_gvec_sshl_b helper_gvec_sshl_b_m68k +#define helper_gvec_sshl_h helper_gvec_sshl_h_m68k #define helper_gvec_sub8 helper_gvec_sub8_m68k #define helper_gvec_sub16 helper_gvec_sub16_m68k #define helper_gvec_sub32 helper_gvec_sub32_m68k @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_m68k #define helper_gvec_usadd32 helper_gvec_usadd32_m68k #define helper_gvec_usadd64 helper_gvec_usadd64_m68k +#define helper_gvec_ushl_b helper_gvec_ushl_b_m68k +#define helper_gvec_ushl_h helper_gvec_ushl_h_m68k #define helper_gvec_ussub8 helper_gvec_ussub8_m68k #define helper_gvec_ussub16 helper_gvec_ussub16_m68k #define helper_gvec_ussub32 helper_gvec_ussub32_m68k @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_m68k #define helper_neon_rshl_u8 helper_neon_rshl_u8_m68k #define helper_neon_shl_s16 helper_neon_shl_s16_m68k -#define helper_neon_shl_s32 helper_neon_shl_s32_m68k -#define helper_neon_shl_s64 helper_neon_shl_s64_m68k -#define helper_neon_shl_s8 helper_neon_shl_s8_m68k #define helper_neon_shl_u16 helper_neon_shl_u16_m68k -#define helper_neon_shl_u32 helper_neon_shl_u32_m68k -#define helper_neon_shl_u64 helper_neon_shl_u64_m68k -#define helper_neon_shl_u8 helper_neon_shl_u8_m68k #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_m68k #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_m68k #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_m68k diff --git a/qemu/mips.h b/qemu/mips.h index c340fdde..a271625c 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_mips #define helper_gvec_sqsub_h helper_gvec_sqsub_h_mips #define helper_gvec_sqsub_s helper_gvec_sqsub_s_mips +#define helper_gvec_sshl_b helper_gvec_sshl_b_mips +#define helper_gvec_sshl_h helper_gvec_sshl_h_mips #define helper_gvec_sub8 helper_gvec_sub8_mips #define helper_gvec_sub16 helper_gvec_sub16_mips #define helper_gvec_sub32 helper_gvec_sub32_mips @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_mips #define helper_gvec_usadd32 helper_gvec_usadd32_mips #define helper_gvec_usadd64 helper_gvec_usadd64_mips +#define helper_gvec_ushl_b helper_gvec_ushl_b_mips +#define helper_gvec_ushl_h helper_gvec_ushl_h_mips #define helper_gvec_ussub8 helper_gvec_ussub8_mips #define helper_gvec_ussub16 helper_gvec_ussub16_mips #define helper_gvec_ussub32 helper_gvec_ussub32_mips @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_mips #define helper_neon_rshl_u8 helper_neon_rshl_u8_mips #define helper_neon_shl_s16 helper_neon_shl_s16_mips -#define helper_neon_shl_s32 helper_neon_shl_s32_mips -#define helper_neon_shl_s64 helper_neon_shl_s64_mips -#define helper_neon_shl_s8 helper_neon_shl_s8_mips #define helper_neon_shl_u16 helper_neon_shl_u16_mips -#define helper_neon_shl_u32 helper_neon_shl_u32_mips -#define helper_neon_shl_u64 helper_neon_shl_u64_mips -#define helper_neon_shl_u8 helper_neon_shl_u8_mips #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_mips #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_mips #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index 66a10f38..3d3995fb 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_mips64 #define helper_gvec_sqsub_h helper_gvec_sqsub_h_mips64 #define helper_gvec_sqsub_s helper_gvec_sqsub_s_mips64 +#define helper_gvec_sshl_b helper_gvec_sshl_b_mips64 +#define helper_gvec_sshl_h helper_gvec_sshl_h_mips64 #define helper_gvec_sub8 helper_gvec_sub8_mips64 #define helper_gvec_sub16 helper_gvec_sub16_mips64 #define helper_gvec_sub32 helper_gvec_sub32_mips64 @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_mips64 #define helper_gvec_usadd32 helper_gvec_usadd32_mips64 #define helper_gvec_usadd64 helper_gvec_usadd64_mips64 +#define helper_gvec_ushl_b helper_gvec_ushl_b_mips64 +#define helper_gvec_ushl_h helper_gvec_ushl_h_mips64 #define helper_gvec_ussub8 helper_gvec_ussub8_mips64 #define helper_gvec_ussub16 helper_gvec_ussub16_mips64 #define helper_gvec_ussub32 helper_gvec_ussub32_mips64 @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_mips64 #define helper_neon_rshl_u8 helper_neon_rshl_u8_mips64 #define helper_neon_shl_s16 helper_neon_shl_s16_mips64 -#define helper_neon_shl_s32 helper_neon_shl_s32_mips64 -#define helper_neon_shl_s64 helper_neon_shl_s64_mips64 -#define helper_neon_shl_s8 helper_neon_shl_s8_mips64 #define helper_neon_shl_u16 helper_neon_shl_u16_mips64 -#define helper_neon_shl_u32 helper_neon_shl_u32_mips64 -#define helper_neon_shl_u64 helper_neon_shl_u64_mips64 -#define helper_neon_shl_u8 helper_neon_shl_u8_mips64 #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_mips64 #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_mips64 #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 38e21946..84b7ea83 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_mips64el #define helper_gvec_sqsub_h helper_gvec_sqsub_h_mips64el #define helper_gvec_sqsub_s helper_gvec_sqsub_s_mips64el +#define helper_gvec_sshl_b helper_gvec_sshl_b_mips64el +#define helper_gvec_sshl_h helper_gvec_sshl_h_mips64el #define helper_gvec_sub8 helper_gvec_sub8_mips64el #define helper_gvec_sub16 helper_gvec_sub16_mips64el #define helper_gvec_sub32 helper_gvec_sub32_mips64el @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_mips64el #define helper_gvec_usadd32 helper_gvec_usadd32_mips64el #define helper_gvec_usadd64 helper_gvec_usadd64_mips64el +#define helper_gvec_ushl_b helper_gvec_ushl_b_mips64el +#define helper_gvec_ushl_h helper_gvec_ushl_h_mips64el #define helper_gvec_ussub8 helper_gvec_ussub8_mips64el #define helper_gvec_ussub16 helper_gvec_ussub16_mips64el #define helper_gvec_ussub32 helper_gvec_ussub32_mips64el @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_mips64el #define helper_neon_rshl_u8 helper_neon_rshl_u8_mips64el #define helper_neon_shl_s16 helper_neon_shl_s16_mips64el -#define helper_neon_shl_s32 helper_neon_shl_s32_mips64el -#define helper_neon_shl_s64 helper_neon_shl_s64_mips64el -#define helper_neon_shl_s8 helper_neon_shl_s8_mips64el #define helper_neon_shl_u16 helper_neon_shl_u16_mips64el -#define helper_neon_shl_u32 helper_neon_shl_u32_mips64el -#define helper_neon_shl_u64 helper_neon_shl_u64_mips64el -#define helper_neon_shl_u8 helper_neon_shl_u8_mips64el #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_mips64el #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_mips64el #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index b4022b1a..7e0586e2 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_mipsel #define helper_gvec_sqsub_h helper_gvec_sqsub_h_mipsel #define helper_gvec_sqsub_s helper_gvec_sqsub_s_mipsel +#define helper_gvec_sshl_b helper_gvec_sshl_b_mipsel +#define helper_gvec_sshl_h helper_gvec_sshl_h_mipsel #define helper_gvec_sub8 helper_gvec_sub8_mipsel #define helper_gvec_sub16 helper_gvec_sub16_mipsel #define helper_gvec_sub32 helper_gvec_sub32_mipsel @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_mipsel #define helper_gvec_usadd32 helper_gvec_usadd32_mipsel #define helper_gvec_usadd64 helper_gvec_usadd64_mipsel +#define helper_gvec_ushl_b helper_gvec_ushl_b_mipsel +#define helper_gvec_ushl_h helper_gvec_ushl_h_mipsel #define helper_gvec_ussub8 helper_gvec_ussub8_mipsel #define helper_gvec_ussub16 helper_gvec_ussub16_mipsel #define helper_gvec_ussub32 helper_gvec_ussub32_mipsel @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_mipsel #define helper_neon_rshl_u8 helper_neon_rshl_u8_mipsel #define helper_neon_shl_s16 helper_neon_shl_s16_mipsel -#define helper_neon_shl_s32 helper_neon_shl_s32_mipsel -#define helper_neon_shl_s64 helper_neon_shl_s64_mipsel -#define helper_neon_shl_s8 helper_neon_shl_s8_mipsel #define helper_neon_shl_u16 helper_neon_shl_u16_mipsel -#define helper_neon_shl_u32 helper_neon_shl_u32_mipsel -#define helper_neon_shl_u64 helper_neon_shl_u64_mipsel -#define helper_neon_shl_u8 helper_neon_shl_u8_mipsel #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_mipsel #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_mipsel #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index f6c75aea..0882bbf1 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_powerpc #define helper_gvec_sqsub_h helper_gvec_sqsub_h_powerpc #define helper_gvec_sqsub_s helper_gvec_sqsub_s_powerpc +#define helper_gvec_sshl_b helper_gvec_sshl_b_powerpc +#define helper_gvec_sshl_h helper_gvec_sshl_h_powerpc #define helper_gvec_sub8 helper_gvec_sub8_powerpc #define helper_gvec_sub16 helper_gvec_sub16_powerpc #define helper_gvec_sub32 helper_gvec_sub32_powerpc @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_powerpc #define helper_gvec_usadd32 helper_gvec_usadd32_powerpc #define helper_gvec_usadd64 helper_gvec_usadd64_powerpc +#define helper_gvec_ushl_b helper_gvec_ushl_b_powerpc +#define helper_gvec_ushl_h helper_gvec_ushl_h_powerpc #define helper_gvec_ussub8 helper_gvec_ussub8_powerpc #define helper_gvec_ussub16 helper_gvec_ussub16_powerpc #define helper_gvec_ussub32 helper_gvec_ussub32_powerpc @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_powerpc #define helper_neon_rshl_u8 helper_neon_rshl_u8_powerpc #define helper_neon_shl_s16 helper_neon_shl_s16_powerpc -#define helper_neon_shl_s32 helper_neon_shl_s32_powerpc -#define helper_neon_shl_s64 helper_neon_shl_s64_powerpc -#define helper_neon_shl_s8 helper_neon_shl_s8_powerpc #define helper_neon_shl_u16 helper_neon_shl_u16_powerpc -#define helper_neon_shl_u32 helper_neon_shl_u32_powerpc -#define helper_neon_shl_u64 helper_neon_shl_u64_powerpc -#define helper_neon_shl_u8 helper_neon_shl_u8_powerpc #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_powerpc #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_powerpc #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_powerpc diff --git a/qemu/riscv32.h b/qemu/riscv32.h index ac1406bf..37cc611c 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_riscv32 #define helper_gvec_sqsub_h helper_gvec_sqsub_h_riscv32 #define helper_gvec_sqsub_s helper_gvec_sqsub_s_riscv32 +#define helper_gvec_sshl_b helper_gvec_sshl_b_riscv32 +#define helper_gvec_sshl_h helper_gvec_sshl_h_riscv32 #define helper_gvec_sub8 helper_gvec_sub8_riscv32 #define helper_gvec_sub16 helper_gvec_sub16_riscv32 #define helper_gvec_sub32 helper_gvec_sub32_riscv32 @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_riscv32 #define helper_gvec_usadd32 helper_gvec_usadd32_riscv32 #define helper_gvec_usadd64 helper_gvec_usadd64_riscv32 +#define helper_gvec_ushl_b helper_gvec_ushl_b_riscv32 +#define helper_gvec_ushl_h helper_gvec_ushl_h_riscv32 #define helper_gvec_ussub8 helper_gvec_ussub8_riscv32 #define helper_gvec_ussub16 helper_gvec_ussub16_riscv32 #define helper_gvec_ussub32 helper_gvec_ussub32_riscv32 @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_riscv32 #define helper_neon_rshl_u8 helper_neon_rshl_u8_riscv32 #define helper_neon_shl_s16 helper_neon_shl_s16_riscv32 -#define helper_neon_shl_s32 helper_neon_shl_s32_riscv32 -#define helper_neon_shl_s64 helper_neon_shl_s64_riscv32 -#define helper_neon_shl_s8 helper_neon_shl_s8_riscv32 #define helper_neon_shl_u16 helper_neon_shl_u16_riscv32 -#define helper_neon_shl_u32 helper_neon_shl_u32_riscv32 -#define helper_neon_shl_u64 helper_neon_shl_u64_riscv32 -#define helper_neon_shl_u8 helper_neon_shl_u8_riscv32 #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_riscv32 #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_riscv32 #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_riscv32 diff --git a/qemu/riscv64.h b/qemu/riscv64.h index 3a27e1ec..0a7a1242 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_riscv64 #define helper_gvec_sqsub_h helper_gvec_sqsub_h_riscv64 #define helper_gvec_sqsub_s helper_gvec_sqsub_s_riscv64 +#define helper_gvec_sshl_b helper_gvec_sshl_b_riscv64 +#define helper_gvec_sshl_h helper_gvec_sshl_h_riscv64 #define helper_gvec_sub8 helper_gvec_sub8_riscv64 #define helper_gvec_sub16 helper_gvec_sub16_riscv64 #define helper_gvec_sub32 helper_gvec_sub32_riscv64 @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_riscv64 #define helper_gvec_usadd32 helper_gvec_usadd32_riscv64 #define helper_gvec_usadd64 helper_gvec_usadd64_riscv64 +#define helper_gvec_ushl_b helper_gvec_ushl_b_riscv64 +#define helper_gvec_ushl_h helper_gvec_ushl_h_riscv64 #define helper_gvec_ussub8 helper_gvec_ussub8_riscv64 #define helper_gvec_ussub16 helper_gvec_ussub16_riscv64 #define helper_gvec_ussub32 helper_gvec_ussub32_riscv64 @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_riscv64 #define helper_neon_rshl_u8 helper_neon_rshl_u8_riscv64 #define helper_neon_shl_s16 helper_neon_shl_s16_riscv64 -#define helper_neon_shl_s32 helper_neon_shl_s32_riscv64 -#define helper_neon_shl_s64 helper_neon_shl_s64_riscv64 -#define helper_neon_shl_s8 helper_neon_shl_s8_riscv64 #define helper_neon_shl_u16 helper_neon_shl_u16_riscv64 -#define helper_neon_shl_u32 helper_neon_shl_u32_riscv64 -#define helper_neon_shl_u64 helper_neon_shl_u64_riscv64 -#define helper_neon_shl_u8 helper_neon_shl_u8_riscv64 #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_riscv64 #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_riscv64 #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_riscv64 diff --git a/qemu/sparc.h b/qemu/sparc.h index 5dac30be..e51bc1cf 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_sparc #define helper_gvec_sqsub_h helper_gvec_sqsub_h_sparc #define helper_gvec_sqsub_s helper_gvec_sqsub_s_sparc +#define helper_gvec_sshl_b helper_gvec_sshl_b_sparc +#define helper_gvec_sshl_h helper_gvec_sshl_h_sparc #define helper_gvec_sub8 helper_gvec_sub8_sparc #define helper_gvec_sub16 helper_gvec_sub16_sparc #define helper_gvec_sub32 helper_gvec_sub32_sparc @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_sparc #define helper_gvec_usadd32 helper_gvec_usadd32_sparc #define helper_gvec_usadd64 helper_gvec_usadd64_sparc +#define helper_gvec_ushl_b helper_gvec_ushl_b_sparc +#define helper_gvec_ushl_h helper_gvec_ushl_h_sparc #define helper_gvec_ussub8 helper_gvec_ussub8_sparc #define helper_gvec_ussub16 helper_gvec_ussub16_sparc #define helper_gvec_ussub32 helper_gvec_ussub32_sparc @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_sparc #define helper_neon_rshl_u8 helper_neon_rshl_u8_sparc #define helper_neon_shl_s16 helper_neon_shl_s16_sparc -#define helper_neon_shl_s32 helper_neon_shl_s32_sparc -#define helper_neon_shl_s64 helper_neon_shl_s64_sparc -#define helper_neon_shl_s8 helper_neon_shl_s8_sparc #define helper_neon_shl_u16 helper_neon_shl_u16_sparc -#define helper_neon_shl_u32 helper_neon_shl_u32_sparc -#define helper_neon_shl_u64 helper_neon_shl_u64_sparc -#define helper_neon_shl_u8 helper_neon_shl_u8_sparc #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_sparc #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_sparc #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 2a24ec55..f12f29d8 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_sparc64 #define helper_gvec_sqsub_h helper_gvec_sqsub_h_sparc64 #define helper_gvec_sqsub_s helper_gvec_sqsub_s_sparc64 +#define helper_gvec_sshl_b helper_gvec_sshl_b_sparc64 +#define helper_gvec_sshl_h helper_gvec_sshl_h_sparc64 #define helper_gvec_sub8 helper_gvec_sub8_sparc64 #define helper_gvec_sub16 helper_gvec_sub16_sparc64 #define helper_gvec_sub32 helper_gvec_sub32_sparc64 @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_sparc64 #define helper_gvec_usadd32 helper_gvec_usadd32_sparc64 #define helper_gvec_usadd64 helper_gvec_usadd64_sparc64 +#define helper_gvec_ushl_b helper_gvec_ushl_b_sparc64 +#define helper_gvec_ushl_h helper_gvec_ushl_h_sparc64 #define helper_gvec_ussub8 helper_gvec_ussub8_sparc64 #define helper_gvec_ussub16 helper_gvec_ussub16_sparc64 #define helper_gvec_ussub32 helper_gvec_ussub32_sparc64 @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_sparc64 #define helper_neon_rshl_u8 helper_neon_rshl_u8_sparc64 #define helper_neon_shl_s16 helper_neon_shl_s16_sparc64 -#define helper_neon_shl_s32 helper_neon_shl_s32_sparc64 -#define helper_neon_shl_s64 helper_neon_shl_s64_sparc64 -#define helper_neon_shl_s8 helper_neon_shl_s8_sparc64 #define helper_neon_shl_u16 helper_neon_shl_u16_sparc64 -#define helper_neon_shl_u32 helper_neon_shl_u32_sparc64 -#define helper_neon_shl_u64 helper_neon_shl_u64_sparc64 -#define helper_neon_shl_u8 helper_neon_shl_u8_sparc64 #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_sparc64 #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_sparc64 #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_sparc64 diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index 7100051e..47a6ae9f 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -300,14 +300,8 @@ DEF_HELPER_2(neon_abd_s16, i32, i32, i32) DEF_HELPER_2(neon_abd_u32, i32, i32, i32) DEF_HELPER_2(neon_abd_s32, i32, i32, i32) -DEF_HELPER_2(neon_shl_u8, i32, i32, i32) -DEF_HELPER_2(neon_shl_s8, i32, i32, i32) DEF_HELPER_2(neon_shl_u16, i32, i32, i32) DEF_HELPER_2(neon_shl_s16, i32, i32, i32) -DEF_HELPER_2(neon_shl_u32, i32, i32, i32) -DEF_HELPER_2(neon_shl_s32, i32, i32, i32) -DEF_HELPER_2(neon_shl_u64, i64, i64, i64) -DEF_HELPER_2(neon_shl_s64, i64, i64, i64) DEF_HELPER_2(neon_rshl_u8, i32, i32, i32) DEF_HELPER_2(neon_rshl_s8, i32, i32, i32) DEF_HELPER_2(neon_rshl_u16, i32, i32, i32) @@ -694,6 +688,11 @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr) +DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + #ifdef TARGET_ARM #define helper_clz helper_clz_arm #define gen_helper_clz gen_helper_clz_arm diff --git a/qemu/target/arm/neon_helper.c b/qemu/target/arm/neon_helper.c index 69304616..995b57a6 100644 --- a/qemu/target/arm/neon_helper.c +++ b/qemu/target/arm/neon_helper.c @@ -616,24 +616,9 @@ NEON_VOP(abd_u32, neon_u32, 1) } else { \ dest = src1 << tmp; \ }} while (0) -NEON_VOP(shl_u8, neon_u8, 4) NEON_VOP(shl_u16, neon_u16, 2) -NEON_VOP(shl_u32, neon_u32, 1) #undef NEON_FN -uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) -{ - int8_t shift = (int8_t)shiftop; - if (shift >= 64 || shift <= -64) { - val = 0; - } else if (shift < 0) { - val >>= -shift; - } else { - val <<= shift; - } - return val; -} - #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -646,27 +631,9 @@ uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) } else { \ dest = src1 << tmp; \ }} while (0) -NEON_VOP(shl_s8, neon_s8, 4) NEON_VOP(shl_s16, neon_s16, 2) -NEON_VOP(shl_s32, neon_s32, 1) #undef NEON_FN -uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) -{ - int8_t shift = (int8_t)shiftop; - int64_t val = valop; - if (shift >= 64) { - val = 0; - } else if (shift <= -64) { - val >>= 63; - } else if (shift < 0) { - val >>= -shift; - } else { - val <<= shift; - } - return val; -} - #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index ce10fd37..d830cb62 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -9018,9 +9018,9 @@ static void handle_3same_64(DisasContext *s, int opcode, bool u, break; case 0x8: /* SSHL, USHL */ if (u) { - gen_helper_neon_shl_u64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm); + gen_ushl_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm); } else { - gen_helper_neon_shl_s64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm); + gen_sshl_i64(tcg_ctx, tcg_rd, tcg_rn, tcg_rm); } break; case 0x9: /* SQSHL, UQSHL */ @@ -11436,6 +11436,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) is_q ? 16 : 8, vec_full_reg_size(s), (u ? uqsub_op : sqsub_op) + size); return; + case 0x08: /* SSHL, USHL */ + gen_gvec_op3(s, is_q, rd, rn, rm, + u ? &ushl_op[size] : &sshl_op[size]); + return; case 0x0c: /* SMAX, UMAX */ if (u) { gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); @@ -11551,16 +11555,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn = fns[size][u]; break; } - case 0x8: /* SSHL, USHL */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 }, - { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 }, - { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 }, - }; - genfn = fns[size][u]; - break; - } case 0x9: /* SQSHL, UQSHL */ { static NeonGenTwoOpEnvFn * const fns[3][2] = { diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index cf240295..0ca20c46 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -3699,13 +3699,13 @@ static inline void gen_neon_shift_narrow(DisasContext *s, int size, TCGv_i32 var if (u) { switch (size) { case 1: gen_helper_neon_shl_u16(tcg_ctx, var, var, shift); break; - case 2: gen_helper_neon_shl_u32(tcg_ctx, var, var, shift); break; + case 2: gen_ushl_i32(tcg_ctx, var, var, shift); break; default: abort(); } } else { switch (size) { case 1: gen_helper_neon_shl_s16(tcg_ctx, var, var, shift); break; - case 2: gen_helper_neon_shl_s32(tcg_ctx, var, var, shift); break; + case 2: gen_sshl_i32(tcg_ctx, var, var, shift); break; default: abort(); } } @@ -4516,6 +4516,280 @@ const GVecGen3 cmtst_op[4] = { .vece = MO_64 }, }; +void gen_ushl_i32(TCGContext *tcg_ctx, TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) +{ + TCGv_i32 lval = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 rval = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 lsh = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 rsh = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0); + TCGv_i32 max = tcg_const_i32(tcg_ctx, 32); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i32(tcg_ctx, lsh, shift); + tcg_gen_neg_i32(tcg_ctx, rsh, lsh); + tcg_gen_shl_i32(tcg_ctx, lval, src, lsh); + tcg_gen_shr_i32(tcg_ctx, rval, src, rsh); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LTU, dst, lsh, max, lval, zero); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LTU, dst, rsh, max, rval, dst); + + tcg_temp_free_i32(tcg_ctx, lval); + tcg_temp_free_i32(tcg_ctx, rval); + tcg_temp_free_i32(tcg_ctx, lsh); + tcg_temp_free_i32(tcg_ctx, rsh); + tcg_temp_free_i32(tcg_ctx, zero); + tcg_temp_free_i32(tcg_ctx, max); +} + +void gen_ushl_i64(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) +{ + TCGv_i64 lval = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 rval = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 lsh = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 rsh = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 zero = tcg_const_i64(tcg_ctx, 0); + TCGv_i64 max = tcg_const_i64(tcg_ctx, 64); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i64(tcg_ctx, lsh, shift); + tcg_gen_neg_i64(tcg_ctx, rsh, lsh); + tcg_gen_shl_i64(tcg_ctx, lval, src, lsh); + tcg_gen_shr_i64(tcg_ctx, rval, src, rsh); + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LTU, dst, lsh, max, lval, zero); + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LTU, dst, rsh, max, rval, dst); + + tcg_temp_free_i64(tcg_ctx, lval); + tcg_temp_free_i64(tcg_ctx, rval); + tcg_temp_free_i64(tcg_ctx, lsh); + tcg_temp_free_i64(tcg_ctx, rsh); + tcg_temp_free_i64(tcg_ctx, zero); + tcg_temp_free_i64(tcg_ctx, max); +} + +static void gen_ushl_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec dst, + TCGv_vec src, TCGv_vec shift) +{ + TCGv_vec lval = tcg_temp_new_vec_matching(tcg_ctx, dst); + TCGv_vec rval = tcg_temp_new_vec_matching(tcg_ctx, dst); + TCGv_vec lsh = tcg_temp_new_vec_matching(tcg_ctx, dst); + TCGv_vec rsh = tcg_temp_new_vec_matching(tcg_ctx, dst); + TCGv_vec msk, max; + + tcg_gen_neg_vec(tcg_ctx, vece, rsh, shift); + if (vece == MO_8) { + tcg_gen_mov_vec(tcg_ctx, lsh, shift); + } else { + msk = tcg_temp_new_vec_matching(tcg_ctx, dst); + tcg_gen_dupi_vec(tcg_ctx, vece, msk, 0xff); + tcg_gen_and_vec(tcg_ctx, vece, lsh, shift, msk); + tcg_gen_and_vec(tcg_ctx, vece, rsh, rsh, msk); + tcg_temp_free_vec(tcg_ctx, msk); + } + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_shlv_vec(tcg_ctx, vece, lval, src, lsh); + tcg_gen_shrv_vec(tcg_ctx, vece, rval, src, rsh); + + max = tcg_temp_new_vec_matching(tcg_ctx, dst); + tcg_gen_dupi_vec(tcg_ctx, vece, max, 8 << vece); + + /* + * The choice of LT (signed) and GEU (unsigned) are biased toward + * the instructions of the x86_64 host. For MO_8, the whole byte + * is significant so we must use an unsigned compare; otherwise we + * have already masked to a byte and so a signed compare works. + * Other tcg hosts have a full set of comparisons and do not care. + */ + if (vece == MO_8) { + tcg_gen_cmp_vec(tcg_ctx, TCG_COND_GEU, vece, lsh, lsh, max); + tcg_gen_cmp_vec(tcg_ctx, TCG_COND_GEU, vece, rsh, rsh, max); + tcg_gen_andc_vec(tcg_ctx, vece, lval, lval, lsh); + tcg_gen_andc_vec(tcg_ctx, vece, rval, rval, rsh); + } else { + tcg_gen_cmp_vec(tcg_ctx, TCG_COND_LT, vece, lsh, lsh, max); + tcg_gen_cmp_vec(tcg_ctx, TCG_COND_LT, vece, rsh, rsh, max); + tcg_gen_and_vec(tcg_ctx, vece, lval, lval, lsh); + tcg_gen_and_vec(tcg_ctx, vece, rval, rval, rsh); + } + tcg_gen_or_vec(tcg_ctx, vece, dst, lval, rval); + + tcg_temp_free_vec(tcg_ctx, max); + tcg_temp_free_vec(tcg_ctx, lval); + tcg_temp_free_vec(tcg_ctx, rval); + tcg_temp_free_vec(tcg_ctx, lsh); + tcg_temp_free_vec(tcg_ctx, rsh); +} + +static const TCGOpcode ushl_list[] = { + INDEX_op_neg_vec, INDEX_op_shlv_vec, + INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 +}; + +const GVecGen3 ushl_op[4] = { + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_b, + .opt_opc = ushl_list, + .vece = MO_8 }, + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_h, + .opt_opc = ushl_list, + .vece = MO_16 }, + { .fni4 = gen_ushl_i32, + .fniv = gen_ushl_vec, + .opt_opc = ushl_list, + .vece = MO_32 }, + { .fni8 = gen_ushl_i64, + .fniv = gen_ushl_vec, + .opt_opc = ushl_list, + .vece = MO_64 }, +}; + +void gen_sshl_i32(TCGContext *tcg_ctx, TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) +{ + TCGv_i32 lval = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 rval = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 lsh = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 rsh = tcg_temp_new_i32(tcg_ctx); + TCGv_i32 zero = tcg_const_i32(tcg_ctx, 0); + TCGv_i32 max = tcg_const_i32(tcg_ctx, 31); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i32(tcg_ctx, lsh, shift); + tcg_gen_neg_i32(tcg_ctx, rsh, lsh); + tcg_gen_shl_i32(tcg_ctx, lval, src, lsh); + tcg_gen_umin_i32(tcg_ctx, rsh, rsh, max); + tcg_gen_sar_i32(tcg_ctx, rval, src, rsh); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LEU, lval, lsh, max, lval, zero); + tcg_gen_movcond_i32(tcg_ctx, TCG_COND_LT, dst, lsh, zero, rval, lval); + + tcg_temp_free_i32(tcg_ctx, lval); + tcg_temp_free_i32(tcg_ctx, rval); + tcg_temp_free_i32(tcg_ctx, lsh); + tcg_temp_free_i32(tcg_ctx, rsh); + tcg_temp_free_i32(tcg_ctx, zero); + tcg_temp_free_i32(tcg_ctx, max); +} + +void gen_sshl_i64(TCGContext *tcg_ctx, TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) +{ + TCGv_i64 lval = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 rval = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 lsh = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 rsh = tcg_temp_new_i64(tcg_ctx); + TCGv_i64 zero = tcg_const_i64(tcg_ctx, 0); + TCGv_i64 max = tcg_const_i64(tcg_ctx, 63); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i64(tcg_ctx, lsh, shift); + tcg_gen_neg_i64(tcg_ctx, rsh, lsh); + tcg_gen_shl_i64(tcg_ctx, lval, src, lsh); + tcg_gen_umin_i64(tcg_ctx, rsh, rsh, max); + tcg_gen_sar_i64(tcg_ctx, rval, src, rsh); + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LEU, lval, lsh, max, lval, zero); + tcg_gen_movcond_i64(tcg_ctx, TCG_COND_LT, dst, lsh, zero, rval, lval); + + tcg_temp_free_i64(tcg_ctx, lval); + tcg_temp_free_i64(tcg_ctx, rval); + tcg_temp_free_i64(tcg_ctx, lsh); + tcg_temp_free_i64(tcg_ctx, rsh); + tcg_temp_free_i64(tcg_ctx, zero); + tcg_temp_free_i64(tcg_ctx, max); +} + +static void gen_sshl_vec(TCGContext *tcg_ctx, unsigned vece, TCGv_vec dst, + TCGv_vec src, TCGv_vec shift) +{ + TCGv_vec lval = tcg_temp_new_vec_matching(tcg_ctx, dst); + TCGv_vec rval = tcg_temp_new_vec_matching(tcg_ctx, dst); + TCGv_vec lsh = tcg_temp_new_vec_matching(tcg_ctx, dst); + TCGv_vec rsh = tcg_temp_new_vec_matching(tcg_ctx, dst); + TCGv_vec tmp = tcg_temp_new_vec_matching(tcg_ctx, dst); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_neg_vec(tcg_ctx, vece, rsh, shift); + if (vece == MO_8) { + tcg_gen_mov_vec(tcg_ctx, lsh, shift); + } else { + tcg_gen_dupi_vec(tcg_ctx, vece, tmp, 0xff); + tcg_gen_and_vec(tcg_ctx, vece, lsh, shift, tmp); + tcg_gen_and_vec(tcg_ctx, vece, rsh, rsh, tmp); + } + + /* Bound rsh so out of bound right shift gets -1. */ + tcg_gen_dupi_vec(tcg_ctx, vece, tmp, (8 << vece) - 1); + tcg_gen_umin_vec(tcg_ctx, vece, rsh, rsh, tmp); + tcg_gen_cmp_vec(tcg_ctx, TCG_COND_GT, vece, tmp, lsh, tmp); + + tcg_gen_shlv_vec(tcg_ctx, vece, lval, src, lsh); + tcg_gen_sarv_vec(tcg_ctx, vece, rval, src, rsh); + + /* Select in-bound left shift. */ + tcg_gen_andc_vec(tcg_ctx, vece, lval, lval, tmp); + + /* Select between left and right shift. */ + if (vece == MO_8) { + tcg_gen_dupi_vec(tcg_ctx, vece, tmp, 0); + tcg_gen_cmpsel_vec(tcg_ctx, TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); + } else { + tcg_gen_dupi_vec(tcg_ctx, vece, tmp, 0x80); + tcg_gen_cmpsel_vec(tcg_ctx, TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); + } + + tcg_temp_free_vec(tcg_ctx, lval); + tcg_temp_free_vec(tcg_ctx, rval); + tcg_temp_free_vec(tcg_ctx, lsh); + tcg_temp_free_vec(tcg_ctx, rsh); + tcg_temp_free_vec(tcg_ctx, tmp); +} + +static const TCGOpcode sshl_list[] = { + INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, + INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 +}; + +const GVecGen3 sshl_op[4] = { + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_b, + .opt_opc = sshl_list, + .vece = MO_8 }, + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_h, + .opt_opc = sshl_list, + .vece = MO_16 }, + { .fni4 = gen_sshl_i32, + .fniv = gen_sshl_vec, + .opt_opc = sshl_list, + .vece = MO_32 }, + { .fni8 = gen_sshl_i64, + .fniv = gen_sshl_vec, + .opt_opc = sshl_list, + .vece = MO_64 }, +}; + static void gen_uqadd_vec(TCGContext *s, unsigned vece, TCGv_vec t, TCGv_vec sat, TCGv_vec a, TCGv_vec b) { @@ -4920,6 +5194,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) vec_size, vec_size); } return 0; + + case NEON_3R_VSHL: + /* Note the operation is vshl vd,vm,vn */ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size, + u ? &ushl_op[size] : &sshl_op[size]); + return 0; } if (size == 3) { @@ -4928,13 +5208,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) neon_load_reg64(s, s->V0, rn + pass); neon_load_reg64(s, s->V1, rm + pass); switch (op) { - case NEON_3R_VSHL: - if (u) { - gen_helper_neon_shl_u64(tcg_ctx, s->V0, s->V1, s->V0); - } else { - gen_helper_neon_shl_s64(tcg_ctx, s->V0, s->V1, s->V0); - } - break; case NEON_3R_VQSHL: if (u) { gen_helper_neon_qshl_u64(tcg_ctx, s->V0, tcg_ctx->cpu_env, @@ -4969,7 +5242,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } pairwise = 0; switch (op) { - case NEON_3R_VSHL: case NEON_3R_VQSHL: case NEON_3R_VRSHL: case NEON_3R_VQRSHL: @@ -5049,9 +5321,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VHSUB: GEN_NEON_INTEGER_OP(hsub); break; - case NEON_3R_VSHL: - GEN_NEON_INTEGER_OP(shl); - break; case NEON_3R_VQSHL: GEN_NEON_INTEGER_OP_ENV(qshl); break; @@ -5460,9 +5729,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } } else { if (input_unsigned) { - gen_helper_neon_shl_u64(tcg_ctx, s->V0, in, tmp64); + gen_ushl_i64(tcg_ctx, s->V0, in, tmp64); } else { - gen_helper_neon_shl_s64(tcg_ctx, s->V0, in, tmp64); + gen_sshl_i64(tcg_ctx, s->V0, in, tmp64); } } tmp = tcg_temp_new_i32(tcg_ctx); diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index 422e4b19..a69e6623 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -282,9 +282,11 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex) uint64_t vfp_expand_imm(int size, uint8_t imm8); /* Vector operations shared between ARM and AArch64. */ -extern const GVecGen3 cmtst_op[4]; extern const GVecGen3 mla_op[4]; extern const GVecGen3 mls_op[4]; +extern const GVecGen3 cmtst_op[4]; +extern const GVecGen3 sshl_op[4]; +extern const GVecGen3 ushl_op[4]; extern const GVecGen2i ssra_op[4]; extern const GVecGen2i usra_op[4]; extern const GVecGen2i sri_op[4]; @@ -294,6 +296,10 @@ extern const GVecGen4 sqadd_op[4]; extern const GVecGen4 uqsub_op[4]; extern const GVecGen4 sqsub_op[4]; void gen_cmtst_i64(TCGContext* tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void gen_ushl_i32(TCGContext* tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void gen_sshl_i32(TCGContext* tcg_ctx, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); +void gen_ushl_i64(TCGContext* tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void gen_sshl_i64(TCGContext* tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); /* * Forward to the isar_feature_* tests given a DisasContext pointer. diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c index bec39188..b8c676ea 100644 --- a/qemu/target/arm/vec_helper.c +++ b/qemu/target/arm/vec_helper.c @@ -1047,3 +1047,91 @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm, do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status, desc, get_flush_inputs_to_zero(&env->vfp.fp_status_f16)); } + +void HELPER(gvec_sshl_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int8_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz; ++i) { + int8_t mm = m[i]; + int8_t nn = n[i]; + int8_t res = 0; + if (mm >= 0) { + if (mm < 8) { + res = nn << mm; + } + } else { + res = nn >> (mm > -8 ? -mm : 7); + } + d[i] = res; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_sshl_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + int16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + int8_t mm = m[i]; /* only 8 bits of shift are significant */ + int16_t nn = n[i]; + int16_t res = 0; + if (mm >= 0) { + if (mm < 16) { + res = nn << mm; + } + } else { + res = nn >> (mm > -16 ? -mm : 15); + } + d[i] = res; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_ushl_b)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint8_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz; ++i) { + int8_t mm = m[i]; + uint8_t nn = n[i]; + uint8_t res = 0; + if (mm >= 0) { + if (mm < 8) { + res = nn << mm; + } + } else { + if (mm > -8) { + res = nn >> -mm; + } + } + d[i] = res; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(gvec_ushl_h)(void *vd, void *vn, void *vm, uint32_t desc) +{ + intptr_t i, opr_sz = simd_oprsz(desc); + uint16_t *d = vd, *n = vn, *m = vm; + + for (i = 0; i < opr_sz / 2; ++i) { + int8_t mm = m[i]; /* only 8 bits of shift are significant */ + uint16_t nn = n[i]; + uint16_t res = 0; + if (mm >= 0) { + if (mm < 16) { + res = nn << mm; + } + } else { + if (mm > -16) { + res = nn >> -mm; + } + } + d[i] = res; + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 0bfc71ca..c81f61be 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -1263,6 +1263,8 @@ #define helper_gvec_sqsub_d helper_gvec_sqsub_d_x86_64 #define helper_gvec_sqsub_h helper_gvec_sqsub_h_x86_64 #define helper_gvec_sqsub_s helper_gvec_sqsub_s_x86_64 +#define helper_gvec_sshl_b helper_gvec_sshl_b_x86_64 +#define helper_gvec_sshl_h helper_gvec_sshl_h_x86_64 #define helper_gvec_sub8 helper_gvec_sub8_x86_64 #define helper_gvec_sub16 helper_gvec_sub16_x86_64 #define helper_gvec_sub32 helper_gvec_sub32_x86_64 @@ -1303,6 +1305,8 @@ #define helper_gvec_usadd16 helper_gvec_usadd16_x86_64 #define helper_gvec_usadd32 helper_gvec_usadd32_x86_64 #define helper_gvec_usadd64 helper_gvec_usadd64_x86_64 +#define helper_gvec_ushl_b helper_gvec_ushl_b_x86_64 +#define helper_gvec_ushl_h helper_gvec_ushl_h_x86_64 #define helper_gvec_ussub8 helper_gvec_ussub8_x86_64 #define helper_gvec_ussub16 helper_gvec_ussub16_x86_64 #define helper_gvec_ussub32 helper_gvec_ussub32_x86_64 @@ -1631,13 +1635,7 @@ #define helper_neon_rshl_u64 helper_neon_rshl_u64_x86_64 #define helper_neon_rshl_u8 helper_neon_rshl_u8_x86_64 #define helper_neon_shl_s16 helper_neon_shl_s16_x86_64 -#define helper_neon_shl_s32 helper_neon_shl_s32_x86_64 -#define helper_neon_shl_s64 helper_neon_shl_s64_x86_64 -#define helper_neon_shl_s8 helper_neon_shl_s8_x86_64 #define helper_neon_shl_u16 helper_neon_shl_u16_x86_64 -#define helper_neon_shl_u32 helper_neon_shl_u32_x86_64 -#define helper_neon_shl_u64 helper_neon_shl_u64_x86_64 -#define helper_neon_shl_u8 helper_neon_shl_u8_x86_64 #define helper_neon_sqadd_u16 helper_neon_sqadd_u16_x86_64 #define helper_neon_sqadd_u32 helper_neon_sqadd_u32_x86_64 #define helper_neon_sqadd_u64 helper_neon_sqadd_u64_x86_64