target/arm/vec_helper: Handle oprsz less than 16 bytes in indexed operations

In the gvec helper functions for indexed operations, for AArch32
Neon the oprsz (total size of the vector) can be less than 16 bytes
if the operation is on a D reg. Since the inner loop in these
helpers always goes from 0 to segment, we must clamp it based
on oprsz to avoid processing a full 16 byte segment when asked to
handle an 8 byte wide vector.

Backports commit d7ce81e553e6789bf27657105b32575668d60b1c
This commit is contained in:
Peter Maydell 2021-03-01 17:48:40 -05:00 committed by Lioncash
parent 681218b4ab
commit 3cc3099e36

View file

@ -1041,7 +1041,8 @@ DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
#define DO_MUL_IDX(NAME, TYPE, H) \ #define DO_MUL_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
{ \ { \
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ intptr_t i, j, oprsz = simd_oprsz(desc); \
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
intptr_t idx = simd_data(desc); \ intptr_t idx = simd_data(desc); \
TYPE *d = vd, *n = vn, *m = vm; \ TYPE *d = vd, *n = vn, *m = vm; \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
@ -1062,7 +1063,8 @@ DO_MUL_IDX(gvec_mul_idx_d, uint64_t, )
#define DO_MLA_IDX(NAME, TYPE, OP, H) \ #define DO_MLA_IDX(NAME, TYPE, OP, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
{ \ { \
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ intptr_t i, j, oprsz = simd_oprsz(desc); \
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
intptr_t idx = simd_data(desc); \ intptr_t idx = simd_data(desc); \
TYPE *d = vd, *n = vn, *m = vm, *a = va; \ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
@ -1087,7 +1089,8 @@ DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, )
#define DO_FMUL_IDX(NAME, TYPE, H) \ #define DO_FMUL_IDX(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
{ \ { \
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ intptr_t i, j, oprsz = simd_oprsz(desc); \
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
intptr_t idx = simd_data(desc); \ intptr_t idx = simd_data(desc); \
TYPE *d = vd, *n = vn, *m = vm; \ TYPE *d = vd, *n = vn, *m = vm; \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
@ -1109,7 +1112,8 @@ DO_FMUL_IDX(gvec_fmul_idx_d, float64, )
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
void *stat, uint32_t desc) \ void *stat, uint32_t desc) \
{ \ { \
intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \ intptr_t i, j, oprsz = simd_oprsz(desc); \
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \ TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \ intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
TYPE *d = vd, *n = vn, *m = vm, *a = va; \ TYPE *d = vd, *n = vn, *m = vm, *a = va; \