target/arm: Use mte_checkN for sve unpredicated stores

Backports commit bba87d0a0f480805223a6428a7942a51733c488a from qemu
This commit is contained in:
Richard Henderson 2021-02-25 17:40:39 -05:00 committed by Lioncash
parent 3ce14ebc78
commit 49bd9a5c68
10 changed files with 77 additions and 55 deletions

View file

@ -3474,6 +3474,8 @@
#define gen_gvec_urshr gen_gvec_urshr_aarch64
#define gen_gvec_ushl gen_gvec_ushl_aarch64
#define gen_gvec_usra gen_gvec_usra_aarch64
#define gen_mte_check1 gen_mte_check1_aarch64
#define gen_mte_checkN gen_mte_checkN_aarch64
#define get_phys_addr get_phys_addr_aarch64
#define gen_sshl_i32 gen_sshl_i32_aarch64
#define gen_sshl_i64 gen_sshl_i64_aarch64
@ -4527,8 +4529,8 @@
#define helper_xpacd helper_xpacd_aarch64
#define helper_xpaci helper_xpaci_aarch64
#define logic_imm_decode_wmask logic_imm_decode_wmask_aarch64
#define mte_check1 mte_check1_aarch64
#define mte_checkN mte_checkN_aarch64
#define mte_check1_ mte_check1__aarch64
#define mte_checkN_ mte_checkN__aarch64
#define mte_probe1 mte_probe1_aarch64
#define new_tmp_a64 new_tmp_a64_aarch64
#define new_tmp_a64_zero new_tmp_a64_zero_aarch64

View file

@ -3474,6 +3474,8 @@
#define gen_gvec_urshr gen_gvec_urshr_aarch64eb
#define gen_gvec_ushl gen_gvec_ushl_aarch64eb
#define gen_gvec_usra gen_gvec_usra_aarch64eb
#define gen_mte_check1 gen_mte_check1_aarch64eb
#define gen_mte_checkN gen_mte_checkN_aarch64eb
#define get_phys_addr get_phys_addr_aarch64eb
#define gen_sshl_i32 gen_sshl_i32_aarch64eb
#define gen_sshl_i64 gen_sshl_i64_aarch64eb
@ -4527,8 +4529,8 @@
#define helper_xpacd helper_xpacd_aarch64eb
#define helper_xpaci helper_xpaci_aarch64eb
#define logic_imm_decode_wmask logic_imm_decode_wmask_aarch64eb
#define mte_check1 mte_check1_aarch64eb
#define mte_checkN mte_checkN_aarch64eb
#define mte_check1_ mte_check1__aarch64eb
#define mte_checkN_ mte_checkN__aarch64eb
#define mte_probe1 mte_probe1_aarch64eb
#define new_tmp_a64 new_tmp_a64_aarch64eb
#define new_tmp_a64_zero new_tmp_a64_zero_aarch64eb

View file

@ -3459,6 +3459,8 @@
#define gen_gvec_urshr gen_gvec_urshr_arm
#define gen_gvec_ushl gen_gvec_ushl_arm
#define gen_gvec_usra gen_gvec_usra_arm
#define gen_mte_check1 gen_mte_check1_arm
#define gen_mte_checkN gen_mte_checkN_arm
#define get_phys_addr get_phys_addr_arm
#define gen_sshl_i32 gen_sshl_i32_arm
#define gen_sshl_i64 gen_sshl_i64_arm
@ -3515,8 +3517,8 @@
#define helper_gvec_usra_s helper_gvec_usra_s_arm
#define helper_probe_access_armfn helper_probe_access_armfn_arm
#define helper_vjcvt helper_vjcvt_arm
#define mte_check1 mte_check1_arm
#define mte_checkN mte_checkN_arm
#define mte_check1_ mte_check1__arm
#define mte_checkN_ mte_checkN__arm
#define mte_probe1 mte_probe1_arm
#define pmu_init pmu_init_arm
#define pmsav8_mpu_lookup pmsav8_mpu_lookup_arm

View file

@ -3459,6 +3459,8 @@
#define gen_gvec_urshr gen_gvec_urshr_armeb
#define gen_gvec_ushl gen_gvec_ushl_armeb
#define gen_gvec_usra gen_gvec_usra_armeb
#define gen_mte_check1 gen_mte_check1_armeb
#define gen_mte_checkN gen_mte_checkN_armeb
#define get_phys_addr get_phys_addr_armeb
#define gen_sshl_i32 gen_sshl_i32_armeb
#define gen_sshl_i64 gen_sshl_i64_armeb
@ -3515,8 +3517,8 @@
#define helper_gvec_usra_s helper_gvec_usra_s_armeb
#define helper_probe_access_armfn helper_probe_access_armfn_armeb
#define helper_vjcvt helper_vjcvt_armeb
#define mte_check1 mte_check1_armeb
#define mte_checkN mte_checkN_armeb
#define mte_check1_ mte_check1__armeb
#define mte_checkN_ mte_checkN__armeb
#define mte_probe1 mte_probe1_armeb
#define pmu_init pmu_init_armeb
#define pmsav8_mpu_lookup pmsav8_mpu_lookup_armeb

View file

@ -3468,6 +3468,8 @@ arm_symbols = (
'gen_gvec_urshr',
'gen_gvec_ushl',
'gen_gvec_usra',
'gen_mte_check1',
'gen_mte_checkN',
'get_phys_addr',
'gen_sshl_i32',
'gen_sshl_i64',
@ -3524,8 +3526,8 @@ arm_symbols = (
'helper_gvec_usra_s',
'helper_probe_access_armfn',
'helper_vjcvt',
'mte_check1',
'mte_checkN',
'mte_check1_',
'mte_checkN_',
'mte_probe1',
'pmu_init',
'pmsav8_mpu_lookup',
@ -3612,6 +3614,8 @@ aarch64_symbols = (
'gen_gvec_urshr',
'gen_gvec_ushl',
'gen_gvec_usra',
'gen_mte_check1',
'gen_mte_checkN',
'get_phys_addr',
'gen_sshl_i32',
'gen_sshl_i64',
@ -4665,8 +4669,8 @@ aarch64_symbols = (
'helper_xpacd',
'helper_xpaci',
'logic_imm_decode_wmask',
'mte_check1',
'mte_checkN',
'mte_check1_',
'mte_checkN_',
'mte_probe1',
'new_tmp_a64',
'new_tmp_a64_zero',

View file

@ -1324,10 +1324,10 @@ FIELD(MTEDESC, ESIZE, 9, 5)
FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */
bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr);
uint64_t mte_check1(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra);
uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra);
uint64_t mte_check1_(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra);
uint64_t mte_checkN_(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra);
static inline int allocation_tag_from_addr(uint64_t ptr)

View file

@ -474,8 +474,8 @@ bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr)
return mte_probe1_int(env, desc, ptr, 0, bit55);
}
uint64_t mte_check1(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra)
uint64_t mte_check1_(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra)
{
int bit55 = extract64(ptr, 55, 1);
@ -494,7 +494,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc,
uint64_t HELPER(mte_check_1)(CPUARMState *env, uint32_t desc, uint64_t ptr)
{
return mte_check1(env, desc, ptr, GETPC());
return mte_check1_(env, desc, ptr, GETPC());
}
/**
@ -559,8 +559,8 @@ static int checkN(uint8_t *mem, int odd, int cmp, int count)
return n;
}
uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra)
uint64_t mte_checkN_(CPUARMState *env, uint32_t desc,
uint64_t ptr, uintptr_t ra)
{
int mmu_idx, ptr_tag, bit55;
uint64_t ptr_last, ptr_end, prev_page, next_page;
@ -661,7 +661,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
uint64_t HELPER(mte_check_N)(CPUARMState *env, uint32_t desc, uint64_t ptr)
{
return mte_checkN(env, desc, ptr, GETPC());
return mte_checkN_(env, desc, ptr, GETPC());
}
/*

View file

@ -415,8 +415,8 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
return clean_data_tbi(s, addr);
}
static TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
bool tag_checked, int log2_size)
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
bool tag_checked, int log2_size)
{
return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
false, get_mem_index(s));
@ -425,8 +425,8 @@ static TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
/*
* For MTE, check multiple logical sequential accesses.
*/
static TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
bool tag_checked, int log2_esize, int total_size)
TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
bool tag_checked, int log2_esize, int total_size)
{
if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) {
TCGv_i32 tcg_desc;

View file

@ -119,4 +119,9 @@ bool disas_sve(DisasContext *, uint32_t);
void gen_gvec_rax1(TCGContext *s, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
bool tag_checked, int log2_size);
TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
bool tag_checked, int log2_esize, int total_size);
#endif /* TARGET_ARM_TRANSLATE_A64_H */

View file

@ -4577,15 +4577,17 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
/* Similarly for stores. */
static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
int len_align = QEMU_ALIGN_DOWN(len, 8);
int len_remain = len % 8;
int nparts = len / 8 + ctpop8(len_remain);
int midx = get_mem_index(s);
TCGv_i64 addr, t0;
TCGv_i64 dirty_addr, clean_addr, t0;
TCGContext *tcg_ctx = s->uc->tcg_ctx;
addr = tcg_temp_new_i64(tcg_ctx);
t0 = tcg_temp_new_i64(tcg_ctx);
dirty_addr = tcg_temp_new_i64(tcg_ctx);
tcg_gen_addi_i64(tcg_ctx, dirty_addr, cpu_reg_sp(s, rn), imm);
clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
tcg_temp_free_i64(tcg_ctx, dirty_addr);
/* Note that unpredicated load/store of vector/predicate registers
* are defined as a stream of bytes, which equates to little-endian
@ -4598,33 +4600,35 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
if (nparts <= 4) {
int i;
t0 = tcg_temp_new_i64(tcg_ctx);
for (i = 0; i < len_align; i += 8) {
tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + i);
tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + i);
tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LEQ);
tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, MO_LEQ);
tcg_gen_addi_i64(tcg_ctx, clean_addr, cpu_reg_sp(s, rn), 8);
}
tcg_temp_free_i64(tcg_ctx, t0);
} else {
TCGLabel *loop = gen_new_label(tcg_ctx);
TCGv_ptr t2, i = tcg_const_local_ptr(tcg_ctx, 0);
TCGv_ptr tp, i = tcg_const_local_ptr(tcg_ctx, 0);
/* Copy the clean address into a local temp, live across the loop. */
t0 = clean_addr;
clean_addr = tcg_temp_local_new_i64(tcg_ctx);
tcg_gen_mov_i64(tcg_ctx, clean_addr, t0);
tcg_temp_free_i64(tcg_ctx, t0);
gen_set_label(tcg_ctx, loop);
t2 = tcg_temp_new_ptr(tcg_ctx);
tcg_gen_add_ptr(tcg_ctx, t2, tcg_ctx->cpu_env, i);
tcg_gen_ld_i64(tcg_ctx, t0, t2, vofs);
/* Minimize the number of local temps that must be re-read from
* the stack each iteration. Instead, re-compute values other
* than the loop counter.
*/
tcg_gen_addi_ptr(tcg_ctx, t2, i, imm);
tcg_gen_extu_ptr_i64(tcg_ctx, addr, t2);
tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, rn));
tcg_temp_free_ptr(tcg_ctx, t2);
tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LEQ);
t0 = tcg_temp_new_i64(tcg_ctx);
tp = tcg_temp_new_ptr(tcg_ctx);
tcg_gen_add_ptr(tcg_ctx, tp, tcg_ctx->cpu_env, i);
tcg_gen_ld_i64(tcg_ctx, t0, tp, vofs);
tcg_gen_addi_ptr(tcg_ctx, i, i, 8);
tcg_temp_free_ptr(tcg_ctx, tp);
tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, MO_LEQ);
tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8);
tcg_temp_free_i64(tcg_ctx, t0);
tcg_gen_brcondi_ptr(tcg_ctx, TCG_COND_LTU, i, len_align, loop);
tcg_temp_free_ptr(tcg_ctx, i);
@ -4632,29 +4636,30 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
/* Predicate register stores can be any multiple of 2. */
if (len_remain) {
t0 = tcg_temp_new_i64(tcg_ctx);
tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + len_align);
tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + len_align);
switch (len_remain) {
case 2:
case 4:
case 8:
tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LE | ctz32(len_remain));
tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx,
MO_LE | ctz32(len_remain));
break;
case 6:
tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LEUL);
tcg_gen_addi_i64(tcg_ctx, addr, addr, 4);
tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, MO_LEUL);
tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 4);
tcg_gen_shri_i64(tcg_ctx, t0, t0, 32);
tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LEUW);
tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, MO_LEUW);
break;
default:
g_assert_not_reached();
}
tcg_temp_free_i64(tcg_ctx, t0);
}
tcg_temp_free_i64(tcg_ctx, addr);
tcg_temp_free_i64(tcg_ctx, t0);
tcg_temp_free_i64(tcg_ctx, clean_addr);
}
static bool trans_LDR_zri(DisasContext *s, arg_rri *a)