From db1d39ab4ae6efc36e394fd35843944736e45e5d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Jul 2018 03:14:20 -0400 Subject: [PATCH] target/arm: Implement SVE floating-point compare vectors Backports commit abfdefd5bd444b629d16dcefc2b60ac8da37e87d from qemu --- qemu/aarch64.h | 21 +++++++++++ qemu/aarch64eb.h | 21 +++++++++++ qemu/header_gen.py | 21 +++++++++++ qemu/target/arm/helper-sve.h | 49 ++++++++++++++++++++++++++ qemu/target/arm/sve.decode | 11 ++++++ qemu/target/arm/sve_helper.c | 62 +++++++++++++++++++++++++++++++++ qemu/target/arm/translate-sve.c | 41 ++++++++++++++++++++++ 7 files changed, 226 insertions(+) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 2b4f1932..7d4d0fc5 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3507,6 +3507,27 @@ #define helper_sve_fadda_d helper_sve_fadda_d_aarch64 #define helper_sve_fadda_h helper_sve_fadda_h_aarch64 #define helper_sve_fadda_s helper_sve_fadda_s_aarch64 +#define helper_sve_facge_d helper_sve_facge_d_aarch64 +#define helper_sve_facge_h helper_sve_facge_h_aarch64 +#define helper_sve_facge_s helper_sve_facge_s_aarch64 +#define helper_sve_facgt_d helper_sve_facgt_d_aarch64 +#define helper_sve_facgt_h helper_sve_facgt_h_aarch64 +#define helper_sve_facgt_s helper_sve_facgt_s_aarch64 +#define helper_sve_fcmeq_d helper_sve_fcmeq_d_aarch64 +#define helper_sve_fcmeq_h helper_sve_fcmeq_h_aarch64 +#define helper_sve_fcmeq_s helper_sve_fcmeq_s_aarch64 +#define helper_sve_fcmge_d helper_sve_fcmge_d_aarch64 +#define helper_sve_fcmge_h helper_sve_fcmge_h_aarch64 +#define helper_sve_fcmge_s helper_sve_fcmge_s_aarch64 +#define helper_sve_fcmgt_d helper_sve_fcmgt_d_aarch64 +#define helper_sve_fcmgt_h helper_sve_fcmgt_h_aarch64 +#define helper_sve_fcmgt_s helper_sve_fcmgt_s_aarch64 +#define helper_sve_fcmne_d helper_sve_fcmne_d_aarch64 +#define helper_sve_fcmne_h helper_sve_fcmne_h_aarch64 +#define helper_sve_fcmne_s helper_sve_fcmne_s_aarch64 +#define helper_sve_fcmuo_d helper_sve_fcmuo_d_aarch64 +#define helper_sve_fcmuo_h helper_sve_fcmuo_h_aarch64 +#define helper_sve_fcmuo_s helper_sve_fcmuo_s_aarch64 #define helper_sve_fdiv_d helper_sve_fdiv_d_aarch64 #define helper_sve_fdiv_h helper_sve_fdiv_h_aarch64 #define helper_sve_fdiv_s helper_sve_fdiv_s_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 2e5dae02..acac3d83 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3507,6 +3507,27 @@ #define helper_sve_fadda_d helper_sve_fadda_d_aarch64eb #define helper_sve_fadda_h helper_sve_fadda_h_aarch64eb #define helper_sve_fadda_s helper_sve_fadda_s_aarch64eb +#define helper_sve_facge_d helper_sve_facge_d_aarch64eb +#define helper_sve_facge_h helper_sve_facge_h_aarch64eb +#define helper_sve_facge_s helper_sve_facge_s_aarch64eb +#define helper_sve_facgt_d helper_sve_facgt_d_aarch64eb +#define helper_sve_facgt_h helper_sve_facgt_h_aarch64eb +#define helper_sve_facgt_s helper_sve_facgt_s_aarch64eb +#define helper_sve_fcmeq_d helper_sve_fcmeq_d_aarch64eb +#define helper_sve_fcmeq_h helper_sve_fcmeq_h_aarch64eb +#define helper_sve_fcmeq_s helper_sve_fcmeq_s_aarch64eb +#define helper_sve_fcmge_d helper_sve_fcmge_d_aarch64eb +#define helper_sve_fcmge_h helper_sve_fcmge_h_aarch64eb +#define helper_sve_fcmge_s helper_sve_fcmge_s_aarch64eb +#define helper_sve_fcmgt_d helper_sve_fcmgt_d_aarch64eb +#define helper_sve_fcmgt_h helper_sve_fcmgt_h_aarch64eb +#define helper_sve_fcmgt_s helper_sve_fcmgt_s_aarch64eb +#define helper_sve_fcmne_d helper_sve_fcmne_d_aarch64eb +#define helper_sve_fcmne_h helper_sve_fcmne_h_aarch64eb +#define helper_sve_fcmne_s helper_sve_fcmne_s_aarch64eb +#define helper_sve_fcmuo_d helper_sve_fcmuo_d_aarch64eb +#define helper_sve_fcmuo_h helper_sve_fcmuo_h_aarch64eb +#define helper_sve_fcmuo_s helper_sve_fcmuo_s_aarch64eb #define helper_sve_fdiv_d helper_sve_fdiv_d_aarch64eb #define helper_sve_fdiv_h helper_sve_fdiv_h_aarch64eb #define helper_sve_fdiv_s helper_sve_fdiv_s_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index e530834c..d0ca7a81 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3528,6 +3528,27 @@ aarch64_symbols = ( 'helper_sve_fadda_d', 'helper_sve_fadda_h', 'helper_sve_fadda_s', + 'helper_sve_facge_d', + 'helper_sve_facge_h', + 'helper_sve_facge_s', + 'helper_sve_facgt_d', + 'helper_sve_facgt_h', + 'helper_sve_facgt_s', + 'helper_sve_fcmeq_d', + 'helper_sve_fcmeq_h', + 'helper_sve_fcmeq_s', + 'helper_sve_fcmge_d', + 'helper_sve_fcmge_h', + 'helper_sve_fcmge_s', + 'helper_sve_fcmgt_d', + 'helper_sve_fcmgt_h', + 'helper_sve_fcmgt_s', + 'helper_sve_fcmne_d', + 'helper_sve_fcmne_h', + 'helper_sve_fcmne_s', + 'helper_sve_fcmuo_d', + 'helper_sve_fcmuo_h', + 'helper_sve_fcmuo_s', 'helper_sve_fdiv_d', 'helper_sve_fdiv_h', 'helper_sve_fdiv_s', diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h index 55e8a908..6089b3a5 100644 --- a/qemu/target/arm/helper-sve.h +++ b/qemu/target/arm/helper-sve.h @@ -839,6 +839,55 @@ DEF_HELPER_FLAGS_5(sve_ucvt_ds, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(sve_ucvt_dd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmge_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmge_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmge_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sve_fcmgt_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmgt_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmgt_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sve_fcmeq_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmeq_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmeq_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sve_fcmne_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmne_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmne_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sve_fcmuo_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmuo_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_fcmuo_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sve_facge_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_facge_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_facge_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_6(sve_facgt_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_facgt_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_6(sve_facgt_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG, void, env, ptr, i32) DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG, void, env, ptr, i32) DEF_HELPER_FLAGS_3(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG, void, env, ptr, i32) diff --git a/qemu/target/arm/sve.decode b/qemu/target/arm/sve.decode index eb91ecdb..5ad20bea 100644 --- a/qemu/target/arm/sve.decode +++ b/qemu/target/arm/sve.decode @@ -324,6 +324,17 @@ UXTH 00000100 .. 010 011 101 ... ..... ..... @rd_pg_rn SXTW 00000100 .. 010 100 101 ... ..... ..... @rd_pg_rn UXTW 00000100 .. 010 101 101 ... ..... ..... @rd_pg_rn +### SVE Floating Point Compare - Vectors Group + +# SVE floating-point compare vectors +FCMGE_ppzz 01100101 .. 0 ..... 010 ... ..... 0 .... @pd_pg_rn_rm +FCMGT_ppzz 01100101 .. 0 ..... 010 ... ..... 1 .... @pd_pg_rn_rm +FCMEQ_ppzz 01100101 .. 0 ..... 011 ... ..... 0 .... @pd_pg_rn_rm +FCMNE_ppzz 01100101 .. 0 ..... 011 ... ..... 1 .... @pd_pg_rn_rm +FCMUO_ppzz 01100101 .. 0 ..... 110 ... ..... 0 .... @pd_pg_rn_rm +FACGE_ppzz 01100101 .. 0 ..... 110 ... ..... 1 .... @pd_pg_rn_rm +FACGT_ppzz 01100101 .. 0 ..... 111 ... ..... 1 .... @pd_pg_rn_rm + ### SVE Integer Multiply-Add Group # SVE integer multiply-add writing addend (predicated) diff --git a/qemu/target/arm/sve_helper.c b/qemu/target/arm/sve_helper.c index 5c19d1a8..36e82516 100644 --- a/qemu/target/arm/sve_helper.c +++ b/qemu/target/arm/sve_helper.c @@ -3009,6 +3009,68 @@ void HELPER(sve_fnmls_zpzzz_d)(CPUARMState *env, void *vg, uint32_t desc) do_fmla_zpzzz_d(env, vg, desc, 0, INT64_MIN); } +/* Two operand floating-point comparison controlled by a predicate. + * Unlike the integer version, we are not allowed to optimistically + * compare operands, since the comparison may have side effects wrt + * the FPSR. + */ +#define DO_FPCMP_PPZZ(NAME, TYPE, H, OP) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, \ + void *status, uint32_t desc) \ +{ \ + intptr_t i = simd_oprsz(desc), j = (i - 1) >> 6; \ + uint64_t *d = vd, *g = vg; \ + do { \ + uint64_t out = 0, pg = g[j]; \ + do { \ + i -= sizeof(TYPE), out <<= sizeof(TYPE); \ + if (likely((pg >> (i & 63)) & 1)) { \ + TYPE nn = *(TYPE *)(vn + H(i)); \ + TYPE mm = *(TYPE *)(vm + H(i)); \ + out |= OP(TYPE, nn, mm, status); \ + } \ + } while (i & 63); \ + d[j--] = out; \ + } while (i > 0); \ +} + +#define DO_FPCMP_PPZZ_H(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_h, float16, H1_2, OP) +#define DO_FPCMP_PPZZ_S(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_s, float32, H1_4, OP) +#define DO_FPCMP_PPZZ_D(NAME, OP) \ + DO_FPCMP_PPZZ(NAME##_d, float64, , OP) + +#define DO_FPCMP_PPZZ_ALL(NAME, OP) \ + DO_FPCMP_PPZZ_H(NAME, OP) \ + DO_FPCMP_PPZZ_S(NAME, OP) \ + DO_FPCMP_PPZZ_D(NAME, OP) + +#define DO_FCMGE(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) <= 0 +#define DO_FCMGT(TYPE, X, Y, ST) TYPE##_compare(Y, X, ST) < 0 +#define DO_FCMEQ(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) == 0 +#define DO_FCMNE(TYPE, X, Y, ST) TYPE##_compare_quiet(X, Y, ST) != 0 +#define DO_FCMUO(TYPE, X, Y, ST) \ + TYPE##_compare_quiet(X, Y, ST) == float_relation_unordered +#define DO_FACGE(TYPE, X, Y, ST) \ + TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) <= 0 +#define DO_FACGT(TYPE, X, Y, ST) \ + TYPE##_compare(TYPE##_abs(Y), TYPE##_abs(X), ST) < 0 + +DO_FPCMP_PPZZ_ALL(sve_fcmge, DO_FCMGE) +DO_FPCMP_PPZZ_ALL(sve_fcmgt, DO_FCMGT) +DO_FPCMP_PPZZ_ALL(sve_fcmeq, DO_FCMEQ) +DO_FPCMP_PPZZ_ALL(sve_fcmne, DO_FCMNE) +DO_FPCMP_PPZZ_ALL(sve_fcmuo, DO_FCMUO) +DO_FPCMP_PPZZ_ALL(sve_facge, DO_FACGE) +DO_FPCMP_PPZZ_ALL(sve_facgt, DO_FACGT) + +#undef DO_FPCMP_PPZZ_ALL +#undef DO_FPCMP_PPZZ_D +#undef DO_FPCMP_PPZZ_S +#undef DO_FPCMP_PPZZ_H +#undef DO_FPCMP_PPZZ + /* * Load contiguous data, protected by a governing predicate. */ diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index ebbcaf32..c0d5f662 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -3679,6 +3679,47 @@ DO_FP3(FMULX, fmulx) #undef DO_FP3 +static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a, + gen_helper_gvec_4_ptr *fn) +{ + if (fn == NULL) { + return false; + } + if (sve_access_check(s)) { + TCGContext *tcg_ctx = s->uc->tcg_ctx; + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16); + tcg_gen_gvec_4_ptr(tcg_ctx, pred_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + pred_full_reg_offset(s, a->pg), + status, vsz, vsz, 0, fn); + tcg_temp_free_ptr(tcg_ctx, status); + } + return true; +} + +#define DO_FPCMP(NAME, name) \ +static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \ + uint32_t insn) \ +{ \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + NULL, gen_helper_sve_##name##_h, \ + gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \ + }; \ + return do_fp_cmp(s, a, fns[a->esz]); \ +} + +DO_FPCMP(FCMGE, fcmge) +DO_FPCMP(FCMGT, fcmgt) +DO_FPCMP(FCMEQ, fcmeq) +DO_FPCMP(FCMNE, fcmne) +DO_FPCMP(FCMUO, fcmuo) +DO_FPCMP(FACGE, facge) +DO_FPCMP(FACGT, facgt) + +#undef DO_FPCMP + typedef void gen_helper_sve_fmla(TCGContext *, TCGv_env, TCGv_ptr, TCGv_i32); static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)