mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-03-24 22:15:07 +00:00
target/arm: Implement ARMv8.2-DotProd
We've already added the helpers with an SVE patch, all that remains is to wire up the aa64 and aa32 translators. Enable the feature within -cpu max for CONFIG_USER_ONLY. Backports commit 26c470a7bb4233454137de1062341ad48947f252 from qemu
This commit is contained in:
parent
d4a8f1bfcb
commit
a325de6685
|
@ -1551,6 +1551,7 @@ static void arm_max_initfn(struct uc_struct *uc, Object *obj, void *opaque)
|
||||||
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
|
set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
|
||||||
set_feature(&cpu->env, ARM_FEATURE_CRC);
|
set_feature(&cpu->env, ARM_FEATURE_CRC);
|
||||||
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
|
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
|
||||||
|
set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD);
|
||||||
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
|
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1420,6 +1420,7 @@ enum arm_features {
|
||||||
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
|
ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */
|
||||||
ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
|
ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */
|
||||||
ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
|
ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */
|
||||||
|
ARM_FEATURE_V8_DOTPROD, /* implements v8.2 simd dot product */
|
||||||
ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
|
ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */
|
||||||
ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
|
ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */
|
||||||
ARM_FEATURE_M_MAIN, /* M profile Main Extension */
|
ARM_FEATURE_M_MAIN, /* M profile Main Extension */
|
||||||
|
|
|
@ -223,6 +223,7 @@ static void aarch64_max_initfn(struct uc_struct *uc, Object *obj, void *opaque)
|
||||||
set_feature(&cpu->env, ARM_FEATURE_CRC);
|
set_feature(&cpu->env, ARM_FEATURE_CRC);
|
||||||
set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
|
set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS);
|
||||||
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
|
set_feature(&cpu->env, ARM_FEATURE_V8_RDM);
|
||||||
|
set_feature(&cpu->env, ARM_FEATURE_V8_DOTPROD);
|
||||||
set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
|
set_feature(&cpu->env, ARM_FEATURE_V8_FP16);
|
||||||
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
|
set_feature(&cpu->env, ARM_FEATURE_V8_FCMA);
|
||||||
set_feature(&cpu->env, ARM_FEATURE_SVE);
|
set_feature(&cpu->env, ARM_FEATURE_SVE);
|
||||||
|
|
|
@ -684,6 +684,18 @@ static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
|
||||||
vec_full_reg_size(s), gvec_op);
|
vec_full_reg_size(s), gvec_op);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Expand a 3-operand operation using an out-of-line helper. */
|
||||||
|
static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
|
||||||
|
int rn, int rm, int data, gen_helper_gvec_3 *fn)
|
||||||
|
{
|
||||||
|
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
||||||
|
|
||||||
|
tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd),
|
||||||
|
vec_full_reg_offset(s, rn),
|
||||||
|
vec_full_reg_offset(s, rm),
|
||||||
|
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
|
||||||
|
}
|
||||||
|
|
||||||
/* Expand a 3-operand + env pointer operation using
|
/* Expand a 3-operand + env pointer operation using
|
||||||
* an out-of-line helper.
|
* an out-of-line helper.
|
||||||
*/
|
*/
|
||||||
|
@ -11487,6 +11499,14 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
feature = ARM_FEATURE_V8_RDM;
|
feature = ARM_FEATURE_V8_RDM;
|
||||||
break;
|
break;
|
||||||
|
case 0x02: /* SDOT (vector) */
|
||||||
|
case 0x12: /* UDOT (vector) */
|
||||||
|
if (size != MO_32) {
|
||||||
|
unallocated_encoding(s);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
feature = ARM_FEATURE_V8_DOTPROD;
|
||||||
|
break;
|
||||||
case 0x8: /* FCMLA, #0 */
|
case 0x8: /* FCMLA, #0 */
|
||||||
case 0x9: /* FCMLA, #90 */
|
case 0x9: /* FCMLA, #90 */
|
||||||
case 0xa: /* FCMLA, #180 */
|
case 0xa: /* FCMLA, #180 */
|
||||||
|
@ -11540,6 +11560,11 @@ static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
case 0x2: /* SDOT / UDOT */
|
||||||
|
gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
|
||||||
|
u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
|
||||||
|
return;
|
||||||
|
|
||||||
case 0x8: /* FCMLA, #0 */
|
case 0x8: /* FCMLA, #0 */
|
||||||
case 0x9: /* FCMLA, #90 */
|
case 0x9: /* FCMLA, #90 */
|
||||||
case 0xa: /* FCMLA, #180 */
|
case 0xa: /* FCMLA, #180 */
|
||||||
|
@ -12726,6 +12751,13 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 0x0e: /* SDOT */
|
||||||
|
case 0x1e: /* UDOT */
|
||||||
|
if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
|
||||||
|
unallocated_encoding(s);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case 0x11: /* FCMLA #0 */
|
case 0x11: /* FCMLA #0 */
|
||||||
case 0x13: /* FCMLA #90 */
|
case 0x13: /* FCMLA #90 */
|
||||||
case 0x15: /* FCMLA #180 */
|
case 0x15: /* FCMLA #180 */
|
||||||
|
@ -12823,6 +12855,12 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (16 * u + opcode) {
|
switch (16 * u + opcode) {
|
||||||
|
case 0x0e: /* SDOT */
|
||||||
|
case 0x1e: /* UDOT */
|
||||||
|
gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
|
||||||
|
u ? gen_helper_gvec_udot_idx_b
|
||||||
|
: gen_helper_gvec_sdot_idx_b);
|
||||||
|
return;
|
||||||
case 0x11: /* FCMLA #0 */
|
case 0x11: /* FCMLA #0 */
|
||||||
case 0x13: /* FCMLA #90 */
|
case 0x13: /* FCMLA #90 */
|
||||||
case 0x15: /* FCMLA #180 */
|
case 0x15: /* FCMLA #180 */
|
||||||
|
|
|
@ -7914,9 +7914,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||||
static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
|
static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
|
||||||
{
|
{
|
||||||
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
||||||
gen_helper_gvec_3_ptr *fn_gvec_ptr;
|
gen_helper_gvec_3 *fn_gvec = NULL;
|
||||||
int rd, rn, rm, rot, size, opr_sz;
|
gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
|
||||||
TCGv_ptr fpst;
|
int rd, rn, rm, opr_sz;
|
||||||
|
int data = 0;
|
||||||
bool q;
|
bool q;
|
||||||
|
|
||||||
q = extract32(insn, 6, 1);
|
q = extract32(insn, 6, 1);
|
||||||
|
@ -7929,8 +7930,8 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
|
||||||
|
|
||||||
if ((insn & 0xfe200f10) == 0xfc200800) {
|
if ((insn & 0xfe200f10) == 0xfc200800) {
|
||||||
/* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
|
/* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
|
||||||
size = extract32(insn, 20, 1);
|
int size = extract32(insn, 20, 1);
|
||||||
rot = extract32(insn, 23, 2);
|
data = extract32(insn, 23, 2); /* rot */
|
||||||
if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
|
if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
|
||||||
|| (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
|
|| (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -7938,13 +7939,20 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
|
||||||
fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
|
fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
|
||||||
} else if ((insn & 0xfea00f10) == 0xfc800800) {
|
} else if ((insn & 0xfea00f10) == 0xfc800800) {
|
||||||
/* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
|
/* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
|
||||||
size = extract32(insn, 20, 1);
|
int size = extract32(insn, 20, 1);
|
||||||
rot = extract32(insn, 24, 1);
|
data = extract32(insn, 24, 1); /* rot */
|
||||||
if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
|
if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
|
||||||
|| (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
|
|| (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
|
fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
|
||||||
|
} else if ((insn & 0xfeb00f00) == 0xfc200d00) {
|
||||||
|
/* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
|
||||||
|
bool u = extract32(insn, 4, 1);
|
||||||
|
if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
|
||||||
} else {
|
} else {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -7959,12 +7967,19 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
|
|
||||||
opr_sz = (1 + q) * 8;
|
opr_sz = (1 + q) * 8;
|
||||||
fpst = get_fpstatus_ptr(s, 1);
|
if (fn_gvec_ptr) {
|
||||||
tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd),
|
TCGv_ptr fpst = get_fpstatus_ptr(s, 1);
|
||||||
vfp_reg_offset(1, rn),
|
tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd),
|
||||||
vfp_reg_offset(1, rm), fpst,
|
vfp_reg_offset(1, rn),
|
||||||
opr_sz, opr_sz, rot, fn_gvec_ptr);
|
vfp_reg_offset(1, rm), fpst,
|
||||||
tcg_temp_free_ptr(tcg_ctx, fpst);
|
opr_sz, opr_sz, data, fn_gvec_ptr);
|
||||||
|
tcg_temp_free_ptr(tcg_ctx, fpst);
|
||||||
|
} else {
|
||||||
|
tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, rd),
|
||||||
|
vfp_reg_offset(1, rn),
|
||||||
|
vfp_reg_offset(1, rm),
|
||||||
|
opr_sz, opr_sz, data, fn_gvec);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7979,9 +7994,9 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
|
||||||
static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
|
static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
|
||||||
{
|
{
|
||||||
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
||||||
gen_helper_gvec_3_ptr *fn_gvec_ptr;
|
gen_helper_gvec_3 *fn_gvec = NULL;
|
||||||
|
gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
|
||||||
int rd, rn, rm, opr_sz, data;
|
int rd, rn, rm, opr_sz, data;
|
||||||
TCGv_ptr fpst;
|
|
||||||
bool q;
|
bool q;
|
||||||
|
|
||||||
q = extract32(insn, 6, 1);
|
q = extract32(insn, 6, 1);
|
||||||
|
@ -8015,6 +8030,16 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
|
||||||
data = (index << 2) | rot;
|
data = (index << 2) | rot;
|
||||||
fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
|
fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
|
||||||
: gen_helper_gvec_fcmlah_idx);
|
: gen_helper_gvec_fcmlah_idx);
|
||||||
|
} else if ((insn & 0xffb00f00) == 0xfe200d00) {
|
||||||
|
/* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
|
||||||
|
int u = extract32(insn, 4, 1);
|
||||||
|
if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
|
||||||
|
/* rm is just Vm, and index is M. */
|
||||||
|
data = extract32(insn, 5, 1); /* index */
|
||||||
|
rm = extract32(insn, 0, 4);
|
||||||
} else {
|
} else {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -8029,12 +8054,19 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
|
||||||
}
|
}
|
||||||
|
|
||||||
opr_sz = (1 + q) * 8;
|
opr_sz = (1 + q) * 8;
|
||||||
fpst = get_fpstatus_ptr(s, 1);
|
if (fn_gvec_ptr) {
|
||||||
tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd),
|
TCGv_ptr fpst = get_fpstatus_ptr(s, 1);
|
||||||
vfp_reg_offset(1, rn),
|
tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, rd),
|
||||||
vfp_reg_offset(1, rm), fpst,
|
vfp_reg_offset(1, rn),
|
||||||
opr_sz, opr_sz, data, fn_gvec_ptr);
|
vfp_reg_offset(1, rm), fpst,
|
||||||
tcg_temp_free_ptr(tcg_ctx, fpst);
|
opr_sz, opr_sz, data, fn_gvec_ptr);
|
||||||
|
tcg_temp_free_ptr(tcg_ctx, fpst);
|
||||||
|
} else {
|
||||||
|
tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, rd),
|
||||||
|
vfp_reg_offset(1, rn),
|
||||||
|
vfp_reg_offset(1, rm),
|
||||||
|
opr_sz, opr_sz, data, fn_gvec);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue