target/arm: Handle SVE vector length changes in system mode

SVE vector length can change when changing EL, or when writing
to one of the ZCR_ELn registers.

For correctness, our implementation requires that predicate bits
that are inaccessible are never set. Which means noticing length
changes and zeroing the appropriate register bits.

Backports commit 0ab5953b00b3165877d00cf75de628c51670b550 from qemu
This commit is contained in:
Richard Henderson 2018-10-08 11:22:24 -04:00 committed by Lioncash
parent 2fb9c4c41d
commit 87c20b52c0
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
6 changed files with 135 additions and 13 deletions

View file

@ -3268,6 +3268,8 @@
#define ARM64_REGS_STORAGE_SIZE ARM64_REGS_STORAGE_SIZE_aarch64 #define ARM64_REGS_STORAGE_SIZE ARM64_REGS_STORAGE_SIZE_aarch64
#define aarch64_cpu_do_interrupt aarch64_cpu_do_interrupt_aarch64 #define aarch64_cpu_do_interrupt aarch64_cpu_do_interrupt_aarch64
#define aarch64_cpu_register_types aarch64_cpu_register_types_aarch64 #define aarch64_cpu_register_types aarch64_cpu_register_types_aarch64
#define aarch64_sve_change_el aarch64_sve_change_el_aarch64
#define aarch64_sve_narrow_vq aarch64_sve_narrow_vq_aarch64
#define aarch64_translator_ops aarch64_translator_ops_aarch64 #define aarch64_translator_ops aarch64_translator_ops_aarch64
#define arm64_reg_read arm64_reg_read_aarch64 #define arm64_reg_read arm64_reg_read_aarch64
#define arm64_reg_reset arm64_reg_reset_aarch64 #define arm64_reg_reset arm64_reg_reset_aarch64

View file

@ -3268,6 +3268,8 @@
#define ARM64_REGS_STORAGE_SIZE ARM64_REGS_STORAGE_SIZE_aarch64eb #define ARM64_REGS_STORAGE_SIZE ARM64_REGS_STORAGE_SIZE_aarch64eb
#define aarch64_cpu_do_interrupt aarch64_cpu_do_interrupt_aarch64eb #define aarch64_cpu_do_interrupt aarch64_cpu_do_interrupt_aarch64eb
#define aarch64_cpu_register_types aarch64_cpu_register_types_aarch64eb #define aarch64_cpu_register_types aarch64_cpu_register_types_aarch64eb
#define aarch64_sve_change_el aarch64_sve_change_el_aarch64eb
#define aarch64_sve_narrow_vq aarch64_sve_narrow_vq_aarch64eb
#define aarch64_translator_ops aarch64_translator_ops_aarch64eb #define aarch64_translator_ops aarch64_translator_ops_aarch64eb
#define arm64_reg_read arm64_reg_read_aarch64eb #define arm64_reg_read arm64_reg_read_aarch64eb
#define arm64_reg_reset arm64_reg_reset_aarch64eb #define arm64_reg_reset arm64_reg_reset_aarch64eb

View file

@ -3289,6 +3289,8 @@ aarch64_symbols = (
'ARM64_REGS_STORAGE_SIZE', 'ARM64_REGS_STORAGE_SIZE',
'aarch64_cpu_do_interrupt', 'aarch64_cpu_do_interrupt',
'aarch64_cpu_register_types', 'aarch64_cpu_register_types',
'aarch64_sve_change_el',
'aarch64_sve_narrow_vq',
'aarch64_translator_ops', 'aarch64_translator_ops',
'arm64_reg_read', 'arm64_reg_read',
'arm64_reg_reset', 'arm64_reg_reset',

View file

@ -852,6 +852,14 @@ bool arm_cpu_exec_interrupt(CPUState *cpu, int int_req);
hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
MemTxAttrs *attrs); MemTxAttrs *attrs);
#ifdef TARGET_AARCH64
void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el);
#else
static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
static inline void aarch64_sve_change_el(CPUARMState *env, int o, int n) { }
#endif
target_ulong do_arm_semihosting(CPUARMState *env); target_ulong do_arm_semihosting(CPUARMState *env);
void aarch64_sync_32_to_64(CPUARMState *env); void aarch64_sync_32_to_64(CPUARMState *env);
void aarch64_sync_64_to_32(CPUARMState *env); void aarch64_sync_64_to_32(CPUARMState *env);

View file

@ -3886,11 +3886,44 @@ static int sve_exception_el(CPUARMState *env, int el)
return 0; return 0;
} }
/*
* Given that SVE is enabled, return the vector length for EL.
*/
static uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
{
ARMCPU *cpu = arm_env_get_cpu(env);
uint32_t zcr_len = cpu->sve_max_vq - 1;
if (el <= 1) {
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
}
if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
}
if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
}
return zcr_len;
}
static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value) uint64_t value)
{ {
int cur_el = arm_current_el(env);
int old_len = sve_zcr_len_for_el(env, cur_el);
int new_len;
/* Bits other than [3:0] are RAZ/WI. */ /* Bits other than [3:0] are RAZ/WI. */
raw_write(env, ri, value & 0xf); raw_write(env, ri, value & 0xf);
/*
* Because we arrived here, we know both FP and SVE are enabled;
* otherwise we would have trapped access to the ZCR_ELn register.
*/
new_len = sve_zcr_len_for_el(env, cur_el);
if (new_len < old_len) {
aarch64_sve_narrow_vq(env, new_len + 1);
}
} }
static const ARMCPRegInfo zcr_el1_reginfo = { static const ARMCPRegInfo zcr_el1_reginfo = {
@ -7502,8 +7535,11 @@ static void arm_cpu_do_interrupt_aarch64_(CPUState *cs)
unsigned int new_el = env->exception.target_el; unsigned int new_el = env->exception.target_el;
target_ulong addr = env->cp15.vbar_el[new_el]; target_ulong addr = env->cp15.vbar_el[new_el];
unsigned int new_mode = aarch64_pstate_mode(new_el, true); unsigned int new_mode = aarch64_pstate_mode(new_el, true);
unsigned int cur_el = arm_current_el(env);
if (arm_current_el(env) < new_el) { aarch64_sve_change_el(env, cur_el, new_el);
if (cur_el < new_el) {
/* Entry vector offset depends on whether the implemented EL /* Entry vector offset depends on whether the implemented EL
* immediately lower than the target level is using AArch32 or AArch64 * immediately lower than the target level is using AArch32 or AArch64
*/ */
@ -11827,18 +11863,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
if (sve_el != 0 && fp_el == 0) { if (sve_el != 0 && fp_el == 0) {
zcr_len = 0; zcr_len = 0;
} else { } else {
ARMCPU *cpu = arm_env_get_cpu(env); zcr_len = sve_zcr_len_for_el(env, current_el);
zcr_len = cpu->sve_max_vq - 1;
if (current_el <= 1) {
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
}
if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
}
if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
}
} }
flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT; flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT; flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
@ -11894,3 +11919,85 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
*pflags = flags; *pflags = flags;
*cs_base = 0; *cs_base = 0;
} }
#ifdef TARGET_AARCH64
/*
* The manual says that when SVE is enabled and VQ is widened the
* implementation is allowed to zero the previously inaccessible
* portion of the registers. The corollary to that is that when
* SVE is enabled and VQ is narrowed we are also allowed to zero
* the now inaccessible portion of the registers.
*
* The intent of this is that no predicate bit beyond VQ is ever set.
* Which means that some operations on predicate registers themselves
* may operate on full uint64_t or even unrolled across the maximum
* uint64_t[4]. Performing 4 bits of host arithmetic unconditionally
* may well be cheaper than conditionals to restrict the operation
* to the relevant portion of a uint16_t[16].
*/
void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
{
int i, j;
uint64_t pmask;
assert(vq >= 1 && vq <= ARM_MAX_VQ);
assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
/* Zap the high bits of the zregs. */
for (i = 0; i < 32; i++) {
memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
}
/* Zap the high bits of the pregs and ffr. */
pmask = 0;
if (vq & 3) {
pmask = ~(-1ULL << (16 * (vq & 3)));
}
for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
for (i = 0; i < 17; ++i) {
env->vfp.pregs[i].p[j] &= pmask;
}
pmask = 0;
}
}
/*
* Notice a change in SVE vector size when changing EL.
*/
void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el)
{
int old_len, new_len;
/* Nothing to do if no SVE. */
if (!arm_feature(env, ARM_FEATURE_SVE)) {
return;
}
/* Nothing to do if FP is disabled in either EL. */
if (fp_exception_el(env, old_el) || fp_exception_el(env, new_el)) {
return;
}
/*
* DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
* at ELx, or not available because the EL is in AArch32 state, then
* for all purposes other than a direct read, the ZCR_ELx.LEN field
* has an effective value of 0".
*
* Consider EL2 (aa64, vq=4) -> EL0 (aa32) -> EL1 (aa64, vq=0).
* If we ignore aa32 state, we would fail to see the vq4->vq0 transition
* from EL2->EL1. Thus we go ahead and narrow when entering aa32 so that
* we already have the correct register contents when encountering the
* vq0->vq0 transition between EL0->EL1.
*/
old_len = (arm_el_is_aa64(env, old_el) && !sve_exception_el(env, old_el)
? sve_zcr_len_for_el(env, old_el) : 0);
new_len = (arm_el_is_aa64(env, new_el) && !sve_exception_el(env, new_el)
? sve_zcr_len_for_el(env, new_el) : 0);
/* When changing vector length, clear inaccessible state. */
if (new_len < old_len) {
aarch64_sve_narrow_vq(env, new_len + 1);
}
}
#endif

View file

@ -1060,6 +1060,7 @@ void HELPER(exception_return)(CPUARMState *env)
"AArch64 EL%d PC 0x%" PRIx64 "\n", "AArch64 EL%d PC 0x%" PRIx64 "\n",
cur_el, new_el, env->pc); cur_el, new_el, env->pc);
} }
aarch64_sve_change_el(env, cur_el, new_el);
arm_call_el_change_hook(arm_env_get_cpu(env)); arm_call_el_change_hook(arm_env_get_cpu(env));