From 2c6c4da80cdb7325d740dbb72eb4a485afcd8ddc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 14 Jun 2020 23:00:24 -0400 Subject: [PATCH] target/arm: Convert sha512 and sm3 to gvec helpers Do not yet convert the helpers to loop over opr_sz, but the descriptor allows the vector tail to be cleared. Which fixes an existing bug vs SVE. Backports commit aaffebd6d3135b8aed7e61932af53b004d261579 from qemu --- qemu/target/arm/crypto_helper.c | 37 +++++++++++++++++++---- qemu/target/arm/helper.h | 15 ++++++---- qemu/target/arm/translate-a64.c | 52 ++++++++++----------------------- 3 files changed, 55 insertions(+), 49 deletions(-) diff --git a/qemu/target/arm/crypto_helper.c b/qemu/target/arm/crypto_helper.c index 0aa5bdad..6eec5a83 100644 --- a/qemu/target/arm/crypto_helper.c +++ b/qemu/target/arm/crypto_helper.c @@ -32,6 +32,19 @@ union CRYPTO_STATE { #define CR_ST_WORD(state, i) (state.words[i]) #endif +/* + * The caller has not been converted to full gvec, and so only + * modifies the low 16 bytes of the vector register. + */ +static void clear_tail_16(void *vd, uint32_t desc) +{ + int opr_sz = simd_oprsz(desc); + int max_sz = simd_maxsz(desc); + + assert(opr_sz == 16); + clear_tail(vd, opr_sz, max_sz); +} + static void do_crypto_aese(uint64_t *rd, uint64_t *rn, uint64_t *rm, bool decrypt) { @@ -506,7 +519,7 @@ static uint64_t s1_512(uint64_t x) return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); } -void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -519,9 +532,11 @@ void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -534,9 +549,11 @@ void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512su0)(void *vd, void *vn) +void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -548,9 +565,11 @@ void HELPER(crypto_sha512su0)(void *vd, void *vn) rd[0] = d0; rd[1] = d1; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -558,9 +577,11 @@ void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm) rd[0] += s1_512(rn[0]) + rm[0]; rd[1] += s1_512(rn[1]) + rm[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -593,9 +614,11 @@ void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } -void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) +void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc) { uint64_t *rd = vd; uint64_t *rn = vn; @@ -624,6 +647,8 @@ void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm) rd[0] = d.l[0]; rd[1] = d.l[1]; + + clear_tail_16(vd, desc); } void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2, diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h index cec39e71..6892a1cb 100644 --- a/qemu/target/arm/helper.h +++ b/qemu/target/arm/helper.h @@ -518,14 +518,17 @@ DEF_HELPER_FLAGS_3(crypto_sha256h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) DEF_HELPER_FLAGS_2(crypto_sha256su0, TCG_CALL_NO_RWG, void, ptr, ptr) DEF_HELPER_FLAGS_3(crypto_sha256su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_2(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sha512su1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sha512h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512h2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(crypto_sha512su0, TCG_CALL_NO_RWG, void, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sha512su1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(crypto_sm3tt, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32) -DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) -DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) +DEF_HELPER_FLAGS_4(crypto_sm3partw1, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(crypto_sm3partw2, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 467d4f4f..9bc08ea1 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -13923,14 +13923,12 @@ void gen_gvec_rax1(TCGContext *s, unsigned vece, uint32_t rd_ofs, uint32_t rn_of */ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int opcode = extract32(insn, 10, 2); int o = extract32(insn, 14, 1); int rm = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); bool feature; - CryptoThreeOpFn *genfn = NULL; gen_helper_gvec_3 *oolfn = NULL; GVecGen3Fn *gvecfn = NULL; @@ -13938,15 +13936,15 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SHA512H */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h; + oolfn = gen_helper_crypto_sha512h; break; case 1: /* SHA512H2 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512h2; + oolfn = gen_helper_crypto_sha512h2; break; case 2: /* SHA512SU1 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su1; + oolfn = gen_helper_crypto_sha512su1; break; case 3: /* RAX1 */ feature = dc_isar_feature(aa64_sha3, s); @@ -13959,11 +13957,11 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) switch (opcode) { case 0: /* SM3PARTW1 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw1; + oolfn = gen_helper_crypto_sm3partw1; break; case 1: /* SM3PARTW2 */ feature = dc_isar_feature(aa64_sm3, s); - genfn = gen_helper_crypto_sm3partw2; + oolfn = gen_helper_crypto_sm3partw2; break; case 2: /* SM4EKEY */ feature = dc_isar_feature(aa64_sm4, s); @@ -13986,20 +13984,8 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) if (oolfn) { gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); - } else if (gvecfn) { - gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); } else { - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - tcg_rm_ptr = vec_full_reg_ptr(s, rm); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rm_ptr); + gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); } } @@ -14011,23 +13997,17 @@ static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) */ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; int opcode = extract32(insn, 10, 2); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; bool feature; - CryptoTwoOpFn *genfn; - gen_helper_gvec_3 *oolfn = NULL; switch (opcode) { case 0: /* SHA512SU0 */ feature = dc_isar_feature(aa64_sha512, s); - genfn = gen_helper_crypto_sha512su0; break; case 1: /* SM4E */ feature = dc_isar_feature(aa64_sm4, s); - oolfn = gen_helper_crypto_sm4e; break; default: unallocated_encoding(s); @@ -14043,18 +14023,16 @@ static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) return; } - if (oolfn) { - gen_gvec_op3_ool(s, true, rd, rd, rn, 0, oolfn); - return; + switch (opcode) { + case 0: /* SHA512SU0 */ + gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); + break; + case 1: /* SM4E */ + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); + break; + default: + g_assert_not_reached(); } - - tcg_rd_ptr = vec_full_reg_ptr(s, rd); - tcg_rn_ptr = vec_full_reg_ptr(s, rn); - - genfn(tcg_ctx, tcg_rd_ptr, tcg_rn_ptr); - - tcg_temp_free_ptr(tcg_ctx, tcg_rd_ptr); - tcg_temp_free_ptr(tcg_ctx, tcg_rn_ptr); } /* Crypto four-register