target/arm: Convert sq{, r}dmulh to gvec for aa64 advsimd

This commit is contained in:
Lioncash 2021-02-26 14:56:28 -05:00
parent aa97b6b755
commit f5a21abc0b
19 changed files with 185 additions and 10 deletions

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_aarch64
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_aarch64
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_aarch64
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_aarch64
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_aarch64
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_aarch64
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_aarch64
#define helper_neon_sub_u16 helper_neon_sub_u16_aarch64
#define helper_neon_sub_u8 helper_neon_sub_u8_aarch64
#define helper_neon_subl_u16 helper_neon_subl_u16_aarch64
@ -3661,6 +3665,10 @@
#define helper_neon_ceq_f64 helper_neon_ceq_f64_aarch64
#define helper_neon_cge_f64 helper_neon_cge_f64_aarch64
#define helper_neon_cgt_f64 helper_neon_cgt_f64_aarch64
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_aarch64
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_aarch64
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_aarch64
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_aarch64
#define helper_pacda helper_pacda_aarch64
#define helper_pacdb helper_pacdb_aarch64
#define helper_pacga helper_pacga_aarch64

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_aarch64eb
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_aarch64eb
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_aarch64eb
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_aarch64eb
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_aarch64eb
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_aarch64eb
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_aarch64eb
#define helper_neon_sub_u16 helper_neon_sub_u16_aarch64eb
#define helper_neon_sub_u8 helper_neon_sub_u8_aarch64eb
#define helper_neon_subl_u16 helper_neon_subl_u16_aarch64eb
@ -3661,6 +3665,10 @@
#define helper_neon_ceq_f64 helper_neon_ceq_f64_aarch64eb
#define helper_neon_cge_f64 helper_neon_cge_f64_aarch64eb
#define helper_neon_cgt_f64 helper_neon_cgt_f64_aarch64eb
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_aarch64eb
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_aarch64eb
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_aarch64eb
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_aarch64eb
#define helper_pacda helper_pacda_aarch64eb
#define helper_pacdb helper_pacdb_aarch64eb
#define helper_pacga helper_pacga_aarch64eb

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_arm
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_arm
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_arm
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_arm
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_arm
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_arm
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_arm
#define helper_neon_sub_u16 helper_neon_sub_u16_arm
#define helper_neon_sub_u8 helper_neon_sub_u8_arm
#define helper_neon_subl_u16 helper_neon_subl_u16_arm
@ -3523,6 +3527,15 @@
#define gen_ushl_i32 gen_ushl_i32_arm
#define gen_ushl_i64 gen_ushl_i64_arm
#define helper_fjcvtzs helper_fjcvtzs_arm
#define helper_gvec_mla_idx_d helper_gvec_mla_idx_d_arm
#define helper_gvec_mla_idx_h helper_gvec_mla_idx_h_arm
#define helper_gvec_mla_idx_s helper_gvec_mla_idx_s_arm
#define helper_gvec_mls_idx_d helper_gvec_mls_idx_d_arm
#define helper_gvec_mls_idx_h helper_gvec_mls_idx_h_arm
#define helper_gvec_mls_idx_s helper_gvec_mls_idx_s_arm
#define helper_gvec_mul_idx_d helper_gvec_mul_idx_d_arm
#define helper_gvec_mul_idx_h helper_gvec_mul_idx_h_arm
#define helper_gvec_mul_idx_s helper_gvec_mul_idx_s_arm
#define helper_gvec_saba_b helper_gvec_saba_b_arm
#define helper_gvec_saba_d helper_gvec_saba_d_arm
#define helper_gvec_saba_h helper_gvec_saba_h_arm

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_armeb
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_armeb
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_armeb
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_armeb
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_armeb
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_armeb
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_armeb
#define helper_neon_sub_u16 helper_neon_sub_u16_armeb
#define helper_neon_sub_u8 helper_neon_sub_u8_armeb
#define helper_neon_subl_u16 helper_neon_subl_u16_armeb
@ -3523,6 +3527,15 @@
#define gen_ushl_i32 gen_ushl_i32_armeb
#define gen_ushl_i64 gen_ushl_i64_armeb
#define helper_fjcvtzs helper_fjcvtzs_armeb
#define helper_gvec_mla_idx_d helper_gvec_mla_idx_d_armeb
#define helper_gvec_mla_idx_h helper_gvec_mla_idx_h_armeb
#define helper_gvec_mla_idx_s helper_gvec_mla_idx_s_armeb
#define helper_gvec_mls_idx_d helper_gvec_mls_idx_d_armeb
#define helper_gvec_mls_idx_h helper_gvec_mls_idx_h_armeb
#define helper_gvec_mls_idx_s helper_gvec_mls_idx_s_armeb
#define helper_gvec_mul_idx_d helper_gvec_mul_idx_d_armeb
#define helper_gvec_mul_idx_h helper_gvec_mul_idx_h_armeb
#define helper_gvec_mul_idx_s helper_gvec_mul_idx_s_armeb
#define helper_gvec_saba_b helper_gvec_saba_b_armeb
#define helper_gvec_saba_d helper_gvec_saba_d_armeb
#define helper_gvec_saba_h helper_gvec_saba_h_armeb

View file

@ -1713,6 +1713,10 @@ symbols = (
'helper_neon_sqadd_u32',
'helper_neon_sqadd_u64',
'helper_neon_sqadd_u8',
'helper_neon_sqdmulh_h',
'helper_neon_sqdmulh_s',
'helper_neon_sqrdmulh_h',
'helper_neon_sqrdmulh_s',
'helper_neon_sub_u16',
'helper_neon_sub_u8',
'helper_neon_subl_u16',
@ -3532,6 +3536,15 @@ arm_symbols = (
'gen_ushl_i32',
'gen_ushl_i64',
'helper_fjcvtzs',
'helper_gvec_mla_idx_d',
'helper_gvec_mla_idx_h',
'helper_gvec_mla_idx_s',
'helper_gvec_mls_idx_d',
'helper_gvec_mls_idx_h',
'helper_gvec_mls_idx_s',
'helper_gvec_mul_idx_d',
'helper_gvec_mul_idx_h',
'helper_gvec_mul_idx_s',
'helper_gvec_saba_b',
'helper_gvec_saba_d',
'helper_gvec_saba_h',
@ -3801,6 +3814,10 @@ aarch64_symbols = (
'helper_neon_ceq_f64',
'helper_neon_cge_f64',
'helper_neon_cgt_f64',
'helper_neon_sqdmulh_h',
'helper_neon_sqdmulh_s',
'helper_neon_sqrdmulh_h',
'helper_neon_sqrdmulh_s',
'helper_pacda',
'helper_pacdb',
'helper_pacga',

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_m68k
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_m68k
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_m68k
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_m68k
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_m68k
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_m68k
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_m68k
#define helper_neon_sub_u16 helper_neon_sub_u16_m68k
#define helper_neon_sub_u8 helper_neon_sub_u8_m68k
#define helper_neon_subl_u16 helper_neon_subl_u16_m68k

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_mips
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_mips
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_mips
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_mips
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_mips
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_mips
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_mips
#define helper_neon_sub_u16 helper_neon_sub_u16_mips
#define helper_neon_sub_u8 helper_neon_sub_u8_mips
#define helper_neon_subl_u16 helper_neon_subl_u16_mips

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_mips64
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_mips64
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_mips64
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_mips64
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_mips64
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_mips64
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_mips64
#define helper_neon_sub_u16 helper_neon_sub_u16_mips64
#define helper_neon_sub_u8 helper_neon_sub_u8_mips64
#define helper_neon_subl_u16 helper_neon_subl_u16_mips64

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_mips64el
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_mips64el
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_mips64el
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_mips64el
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_mips64el
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_mips64el
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_mips64el
#define helper_neon_sub_u16 helper_neon_sub_u16_mips64el
#define helper_neon_sub_u8 helper_neon_sub_u8_mips64el
#define helper_neon_subl_u16 helper_neon_subl_u16_mips64el

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_mipsel
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_mipsel
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_mipsel
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_mipsel
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_mipsel
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_mipsel
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_mipsel
#define helper_neon_sub_u16 helper_neon_sub_u16_mipsel
#define helper_neon_sub_u8 helper_neon_sub_u8_mipsel
#define helper_neon_subl_u16 helper_neon_subl_u16_mipsel

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_powerpc
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_powerpc
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_powerpc
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_powerpc
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_powerpc
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_powerpc
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_powerpc
#define helper_neon_sub_u16 helper_neon_sub_u16_powerpc
#define helper_neon_sub_u8 helper_neon_sub_u8_powerpc
#define helper_neon_subl_u16 helper_neon_subl_u16_powerpc

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_riscv32
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_riscv32
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_riscv32
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_riscv32
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_riscv32
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_riscv32
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_riscv32
#define helper_neon_sub_u16 helper_neon_sub_u16_riscv32
#define helper_neon_sub_u8 helper_neon_sub_u8_riscv32
#define helper_neon_subl_u16 helper_neon_subl_u16_riscv32

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_riscv64
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_riscv64
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_riscv64
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_riscv64
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_riscv64
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_riscv64
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_riscv64
#define helper_neon_sub_u16 helper_neon_sub_u16_riscv64
#define helper_neon_sub_u8 helper_neon_sub_u8_riscv64
#define helper_neon_subl_u16 helper_neon_subl_u16_riscv64

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_sparc
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_sparc
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_sparc
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_sparc
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_sparc
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_sparc
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_sparc
#define helper_neon_sub_u16 helper_neon_sub_u16_sparc
#define helper_neon_sub_u8 helper_neon_sub_u8_sparc
#define helper_neon_subl_u16 helper_neon_subl_u16_sparc

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_sparc64
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_sparc64
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_sparc64
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_sparc64
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_sparc64
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_sparc64
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_sparc64
#define helper_neon_sub_u16 helper_neon_sub_u16_sparc64
#define helper_neon_sub_u8 helper_neon_sub_u8_sparc64
#define helper_neon_subl_u16 helper_neon_subl_u16_sparc64

View file

@ -774,6 +774,16 @@ DEF_HELPER_FLAGS_5(gvec_mls_idx_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_mls_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_sqdmulh_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_sqdmulh_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
#ifdef TARGET_ARM
#define helper_clz helper_clz_arm
#define gen_helper_clz gen_helper_clz_arm

View file

@ -851,6 +851,21 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
tcg_temp_free_ptr(tcg_ctx, fpst);
}
/* Expand a 3-operand + qc + operation using an out-of-line helper. */
static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
int rm, gen_helper_gvec_3_ptr *fn)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
TCGv_ptr qc_ptr = tcg_temp_new_ptr(tcg_ctx);
tcg_gen_addi_ptr(tcg_ctx, qc_ptr, tcg_ctx->cpu_env, offsetof(CPUARMState, vfp.qc));
tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
vec_full_reg_offset(s, rn),
vec_full_reg_offset(s, rm), qc_ptr,
is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
tcg_temp_free_ptr(tcg_ctx, qc_ptr);
}
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
* than the 32 bit equivalent.
*/
@ -12033,6 +12048,15 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
}
return;
case 0x16: /* SQDMULH, SQRDMULH */
{
static gen_helper_gvec_3_ptr * const fns[2][2] = {
{ gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
{ gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
};
gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
}
return;
case 0x11:
if (!u) { /* CMTST */
gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
@ -12144,16 +12168,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
genenvfn = fns[size][u];
break;
}
case 0x16: /* SQDMULH, SQRDMULH */
{
static NeonGenTwoOpEnvFn * const fns[2][2] = {
{ gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
{ gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
};
assert(size == 1 || size == 2);
genenvfn = fns[size - 1][u];
break;
}
default:
g_assert_not_reached();
}

View file

@ -110,6 +110,30 @@ void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(neon_sqdmulh_h)(void *vd, void *vn, void *vm,
void *vq, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
int16_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz / 2; ++i) {
d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, false, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm,
void *vq, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
int16_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz / 2; ++i) {
d[i] = do_sqrdmlah_h(n[i], m[i], 0, false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
static int32_t do_sqrdmlah_s(int32_t src1, int32_t src2, int32_t src3,
bool neg, bool round, uint32_t *sat)
@ -173,6 +197,30 @@ void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(neon_sqdmulh_s)(void *vd, void *vn, void *vm,
void *vq, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
int32_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz / 4; ++i) {
d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, false, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm,
void *vq, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc);
int32_t *d = vd, *n = vn, *m = vm;
for (i = 0; i < opr_sz / 4; ++i) {
d[i] = do_sqrdmlah_s(n[i], m[i], 0, false, true, vq);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
/* Integer 8 and 16-bit dot-product.
*
* Note that for the loops herein, host endianness does not matter

View file

@ -1707,6 +1707,10 @@
#define helper_neon_sqadd_u32 helper_neon_sqadd_u32_x86_64
#define helper_neon_sqadd_u64 helper_neon_sqadd_u64_x86_64
#define helper_neon_sqadd_u8 helper_neon_sqadd_u8_x86_64
#define helper_neon_sqdmulh_h helper_neon_sqdmulh_h_x86_64
#define helper_neon_sqdmulh_s helper_neon_sqdmulh_s_x86_64
#define helper_neon_sqrdmulh_h helper_neon_sqrdmulh_h_x86_64
#define helper_neon_sqrdmulh_s helper_neon_sqrdmulh_s_x86_64
#define helper_neon_sub_u16 helper_neon_sub_u16_x86_64
#define helper_neon_sub_u8 helper_neon_sub_u8_x86_64
#define helper_neon_subl_u16 helper_neon_subl_u16_x86_64