target/arm: Convert sha1 and sha256 to gvec helpers

Do not yet convert the helpers to loop over opr_sz, but the
descriptor allows the vector tail to be cleared. Which fixes
an existing bug vs SVE.

Backports commit effa992f153f5e7ab97ab843b565690748c5b402 from qemu
This commit is contained in:
Richard Henderson 2020-06-14 23:11:25 -04:00 committed by Lioncash
parent 2c6c4da80c
commit 41c4efdb22
6 changed files with 67 additions and 166 deletions

View file

@ -310,7 +310,7 @@ void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
rd[1] = d.l[1]; rd[1] = d.l[1];
} }
void HELPER(crypto_sha1h)(void *vd, void *vm) void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
{ {
uint64_t *rd = vd; uint64_t *rd = vd;
uint64_t *rm = vm; uint64_t *rm = vm;
@ -323,9 +323,11 @@ void HELPER(crypto_sha1h)(void *vd, void *vm)
rd[0] = m.l[0]; rd[0] = m.l[0];
rd[1] = m.l[1]; rd[1] = m.l[1];
clear_tail_16(vd, desc);
} }
void HELPER(crypto_sha1su1)(void *vd, void *vm) void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
{ {
uint64_t *rd = vd; uint64_t *rd = vd;
uint64_t *rm = vm; uint64_t *rm = vm;
@ -343,6 +345,8 @@ void HELPER(crypto_sha1su1)(void *vd, void *vm)
rd[0] = d.l[0]; rd[0] = d.l[0];
rd[1] = d.l[1]; rd[1] = d.l[1];
clear_tail_16(vd, desc);
} }
/* /*
@ -370,7 +374,7 @@ static uint32_t s1(uint32_t x)
return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
} }
void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm) void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
{ {
uint64_t *rd = vd; uint64_t *rd = vd;
uint64_t *rn = vn; uint64_t *rn = vn;
@ -407,9 +411,11 @@ void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
rd[0] = d.l[0]; rd[0] = d.l[0];
rd[1] = d.l[1]; rd[1] = d.l[1];
clear_tail_16(vd, desc);
} }
void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm) void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
{ {
uint64_t *rd = vd; uint64_t *rd = vd;
uint64_t *rn = vn; uint64_t *rn = vn;
@ -439,9 +445,11 @@ void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
rd[0] = d.l[0]; rd[0] = d.l[0];
rd[1] = d.l[1]; rd[1] = d.l[1];
clear_tail_16(vd, desc);
} }
void HELPER(crypto_sha256su0)(void *vd, void *vm) void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
{ {
uint64_t *rd = vd; uint64_t *rd = vd;
uint64_t *rm = vm; uint64_t *rm = vm;
@ -459,9 +467,11 @@ void HELPER(crypto_sha256su0)(void *vd, void *vm)
rd[0] = d.l[0]; rd[0] = d.l[0];
rd[1] = d.l[1]; rd[1] = d.l[1];
clear_tail_16(vd, desc);
} }
void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm) void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
{ {
uint64_t *rd = vd; uint64_t *rd = vd;
uint64_t *rn = vn; uint64_t *rn = vn;
@ -483,6 +493,8 @@ void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
rd[0] = d.l[0]; rd[0] = d.l[0];
rd[1] = d.l[1]; rd[1] = d.l[1];
clear_tail_16(vd, desc);
} }
/* /*

View file

@ -510,13 +510,13 @@ DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_2(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_3(crypto_sha1h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_2(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_3(crypto_sha1su1, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(crypto_sha256h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_3(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_4(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

View file

@ -165,14 +165,14 @@ VPADD_3s 1111 001 0 0 . .. .... .... 1011 . . . 1 .... @3same_q0
VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same VQRDMLAH_3s 1111 001 1 0 . .. .... .... 1011 ... 1 .... @3same
@3same_crypto .... .... .... .... .... .... .... .... \
&3same vm=%vm_dp vn=%vn_dp vd=%vd_dp size=0 q=1
SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \ SHA1_3s 1111 001 0 0 . optype:2 .... .... 1100 . 1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp vm=%vm_dp vn=%vn_dp vd=%vd_dp
SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... \ SHA256H_3s 1111 001 1 0 . 00 .... .... 1100 . 1 . 0 .... @3same_crypto
vm=%vm_dp vn=%vn_dp vd=%vd_dp SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... @3same_crypto
SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \ SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... @3same_crypto
vm=%vm_dp vn=%vn_dp vd=%vd_dp
SHA256SU1_3s 1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
vm=%vm_dp vn=%vn_dp vd=%vd_dp
VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp VFMA_fp_3s 1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp VFMS_fp_3s 1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp

View file

@ -13769,8 +13769,7 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
int rm = extract32(insn, 16, 5); int rm = extract32(insn, 16, 5);
int rn = extract32(insn, 5, 5); int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5); int rd = extract32(insn, 0, 5);
CryptoThreeOpFn *genfn; gen_helper_gvec_3 *genfn;
TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
bool feature; bool feature;
if (size != 0) { if (size != 0) {
@ -13812,23 +13811,22 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
return; return;
} }
tcg_rd_ptr = vec_full_reg_ptr(s, rd);
tcg_rn_ptr = vec_full_reg_ptr(s, rn);
tcg_rm_ptr = vec_full_reg_ptr(s, rm);
if (genfn) { if (genfn) {
genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
} else { } else {
TCGv_i32 tcg_opcode = tcg_const_i32(tcg_ctx, opcode); TCGv_i32 tcg_opcode = tcg_const_i32(tcg_ctx, opcode);
TCGv_ptr tcg_rd_ptr = vec_full_reg_ptr(s, rd);
TCGv_ptr tcg_rn_ptr = vec_full_reg_ptr(s, rn);
TCGv_ptr tcg_rm_ptr = vec_full_reg_ptr(s, rm);
gen_helper_crypto_sha1_3reg(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, gen_helper_crypto_sha1_3reg(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr,
tcg_rm_ptr, tcg_opcode); tcg_rm_ptr, tcg_opcode);
tcg_temp_free_i32(tcg_ctx, tcg_opcode);
}
tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); tcg_temp_free_i32(tcg_ctx, tcg_opcode);
tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr);
tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr); tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr);
tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr);
}
} }
/* Crypto two-reg SHA /* Crypto two-reg SHA
@ -13839,14 +13837,12 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
*/ */
static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
{ {
TCGContext *tcg_ctx = s->uc->tcg_ctx;
int size = extract32(insn, 22, 2); int size = extract32(insn, 22, 2);
int opcode = extract32(insn, 12, 5); int opcode = extract32(insn, 12, 5);
int rn = extract32(insn, 5, 5); int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5); int rd = extract32(insn, 0, 5);
CryptoTwoOpFn *genfn; gen_helper_gvec_2 *genfn;
bool feature; bool feature;
TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
if (size != 0) { if (size != 0) {
unallocated_encoding(s); unallocated_encoding(s);
@ -13880,13 +13876,7 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
return; return;
} }
tcg_rd_ptr = vec_full_reg_ptr(s, rd); gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
tcg_rn_ptr = vec_full_reg_ptr(s, rn);
genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr);
tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr);
tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr);
} }
static void gen_rax1_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) static void gen_rax1_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)

View file

@ -674,12 +674,14 @@ DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
DO_3SAME_CMP(VCEQ, TCG_COND_EQ) DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
static void gen_VMUL_p_3s(TCGContext *s, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, #define WRAP_OOL_FN(WRAPNAME, FUNC) \
uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) static void WRAPNAME(TCGContext *tcg_ctx, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, \
{ uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) \
tcg_gen_gvec_3_ool(s, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, { \
0, gen_helper_gvec_pmul_b); tcg_gen_gvec_3_ool(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
} }
WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
{ {
@ -742,110 +744,19 @@ static bool trans_SHA1_3s(DisasContext *s, arg_SHA1_3s *a)
return true; return true;
} }
static bool trans_SHA256H_3s(DisasContext *s, arg_SHA256H_3s *a) #define DO_SHA2(NAME, FUNC) \
{ WRAP_OOL_FN(gen_##NAME##_3s, FUNC) \
TCGv_ptr ptr1, ptr2, ptr3; static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a) \
TCGContext *tcg_ctx = s->uc->tcg_ctx; { \
if (!dc_isar_feature(aa32_sha2, s)) { \
if (!arm_dc_feature(s, ARM_FEATURE_NEON) || return false; \
!dc_isar_feature(aa32_sha2, s)) { } \
return false; return do_3same(s, a, gen_##NAME##_3s); \
} }
/* UNDEF accesses to D16-D31 if they don't exist. */ DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
if (!dc_isar_feature(aa32_simd_r32, s) && DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
((a->vd | a->vn | a->vm) & 0x10)) { DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
return false;
}
if ((a->vn | a->vm | a->vd) & 1) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
ptr1 = vfp_reg_ptr(s, true, a->vd);
ptr2 = vfp_reg_ptr(s, true, a->vn);
ptr3 = vfp_reg_ptr(s, true, a->vm);
gen_helper_crypto_sha256h(tcg_ctx, ptr1, ptr2, ptr3);
tcg_temp_free_ptr(tcg_ctx, ptr1);
tcg_temp_free_ptr(tcg_ctx, ptr2);
tcg_temp_free_ptr(tcg_ctx, ptr3);
return true;
}
static bool trans_SHA256H2_3s(DisasContext *s, arg_SHA256H2_3s *a)
{
TCGv_ptr ptr1, ptr2, ptr3;
TCGContext *tcg_ctx = s->uc->tcg_ctx;
if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
!dc_isar_feature(aa32_sha2, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if ((a->vn | a->vm | a->vd) & 1) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
ptr1 = vfp_reg_ptr(s, true, a->vd);
ptr2 = vfp_reg_ptr(s, true, a->vn);
ptr3 = vfp_reg_ptr(s, true, a->vm);
gen_helper_crypto_sha256h2(tcg_ctx, ptr1, ptr2, ptr3);
tcg_temp_free_ptr(tcg_ctx, ptr1);
tcg_temp_free_ptr(tcg_ctx, ptr2);
tcg_temp_free_ptr(tcg_ctx, ptr3);
return true;
}
static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a)
{
TCGv_ptr ptr1, ptr2, ptr3;
TCGContext *tcg_ctx = s->uc->tcg_ctx;
if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
!dc_isar_feature(aa32_sha2, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vn | a->vm) & 0x10)) {
return false;
}
if ((a->vn | a->vm | a->vd) & 1) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
ptr1 = vfp_reg_ptr(s, true, a->vd);
ptr2 = vfp_reg_ptr(s, true, a->vn);
ptr3 = vfp_reg_ptr(s, true, a->vm);
gen_helper_crypto_sha256su1(tcg_ctx, ptr1, ptr2, ptr3);
tcg_temp_free_ptr(tcg_ctx, ptr1);
tcg_temp_free_ptr(tcg_ctx, ptr2);
tcg_temp_free_ptr(tcg_ctx, ptr3);
return true;
}
#define DO_3SAME_64(INSN, FUNC) \ #define DO_3SAME_64(INSN, FUNC) \
static void gen_##INSN##_3s(TCGContext *s, unsigned vece, uint32_t rd_ofs, \ static void gen_##INSN##_3s(TCGContext *s, unsigned vece, uint32_t rd_ofs, \

View file

@ -5406,7 +5406,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
int vec_size; int vec_size;
uint32_t imm; uint32_t imm;
TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5; TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
TCGv_ptr ptr1, ptr2; TCGv_ptr ptr1;
TCGv_i64 tmp64; TCGv_i64 tmp64;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
@ -6521,13 +6521,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) { if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
return 1; return 1;
} }
ptr1 = vfp_reg_ptr(s, true, rd); tcg_gen_gvec_2_ool(tcg_ctx, rd_ofs, rm_ofs, 16, 16, 0,
ptr2 = vfp_reg_ptr(s, true, rm); gen_helper_crypto_sha1h);
gen_helper_crypto_sha1h(tcg_ctx, ptr1, ptr2);
tcg_temp_free_ptr(tcg_ctx, ptr1);
tcg_temp_free_ptr(tcg_ctx, ptr2);
break; break;
case NEON_2RM_SHA1SU1: case NEON_2RM_SHA1SU1:
if ((rm | rd) & 1) { if ((rm | rd) & 1) {
@ -6541,17 +6536,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
} else if (!dc_isar_feature(aa32_sha1, s)) { } else if (!dc_isar_feature(aa32_sha1, s)) {
return 1; return 1;
} }
ptr1 = vfp_reg_ptr(s, true, rd); tcg_gen_gvec_2_ool(tcg_ctx, rd_ofs, rm_ofs, 16, 16, 0,
ptr2 = vfp_reg_ptr(s, true, rm); q ? gen_helper_crypto_sha256su0
if (q) { : gen_helper_crypto_sha1su1);
gen_helper_crypto_sha256su0(tcg_ctx, ptr1, ptr2);
} else {
gen_helper_crypto_sha1su1(tcg_ctx, ptr1, ptr2);
}
tcg_temp_free_ptr(tcg_ctx, ptr1);
tcg_temp_free_ptr(tcg_ctx, ptr2);
break; break;
case NEON_2RM_VMVN: case NEON_2RM_VMVN:
tcg_gen_gvec_not(tcg_ctx, 0, rd_ofs, rm_ofs, vec_size, vec_size); tcg_gen_gvec_not(tcg_ctx, 0, rd_ofs, rm_ofs, vec_size, vec_size);
break; break;