unicorn/qemu/target/arm/translate-neon.inc.c

/*
 *  ARM translation: AArch32 Neon instructions
 *
 *  Copyright (c) 2003 Fabrice Bellard
 *  Copyright (c) 2005-2007 CodeSourcery
 *  Copyright (c) 2007 OpenedHand, Ltd.
 *  Copyright (c) 2020 Linaro, Ltd.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */

/*
 * This file is intended to be included from translate.c; it uses
 * some macros and definitions provided by that file.
 * It might be possible to convert it to a standalone .c file eventually.
 */

static inline int plus1(DisasContext *s, int x)
{
    return x + 1;
}

/* Include the generated Neon decoder */
#include "decode-neon-dp.inc.c"
#include "decode-neon-ls.inc.c"
#include "decode-neon-shared.inc.c"

static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
{
    int opr_sz;
    TCGv_ptr fpst;
    gen_helper_gvec_3_ptr *fn_gvec_ptr;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!dc_isar_feature(aa32_vcma, s)
        || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd | a->vn | a->vm) & 0x10)) {
        return false;
    }

    if ((a->vn | a->vm | a->vd) & a->q) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    opr_sz = (1 + a->q) * 8;
    fpst = get_fpstatus_ptr(s, 1);
    fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
    tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),
                       vfp_reg_offset(1, a->vn),
                       vfp_reg_offset(1, a->vm),
                       fpst, opr_sz, opr_sz, a->rot,
                       fn_gvec_ptr);
    tcg_temp_free_ptr(tcg_ctx, fpst);
    return true;
}

static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
{
    int opr_sz;
    TCGv_ptr fpst;
    gen_helper_gvec_3_ptr *fn_gvec_ptr;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!dc_isar_feature(aa32_vcma, s)
        || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd | a->vn | a->vm) & 0x10)) {
        return false;
    }

    if ((a->vn | a->vm | a->vd) & a->q) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    opr_sz = (1 + a->q) * 8;
    fpst = get_fpstatus_ptr(s, 1);
    fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
    tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),
                       vfp_reg_offset(1, a->vn),
                       vfp_reg_offset(1, a->vm),
                       fpst, opr_sz, opr_sz, a->rot,
                       fn_gvec_ptr);
    tcg_temp_free_ptr(tcg_ctx, fpst);
    return true;
}

static bool trans_VDOT(DisasContext *s, arg_VDOT *a)
{
    int opr_sz;
    gen_helper_gvec_3 *fn_gvec;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!dc_isar_feature(aa32_dp, s)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd | a->vn | a->vm) & 0x10)) {
        return false;
    }

    if ((a->vn | a->vm | a->vd) & a->q) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    opr_sz = (1 + a->q) * 8;
    fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
    tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, a->vd),
                       vfp_reg_offset(1, a->vn),
                       vfp_reg_offset(1, a->vm),
                       opr_sz, opr_sz, 0, fn_gvec);
    return true;
}

static bool trans_VFML(DisasContext *s, arg_VFML *a)
{
    int opr_sz;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!dc_isar_feature(aa32_fhm, s)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        (a->vd & 0x10)) {
        return false;
    }

    if (a->vd & a->q) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    opr_sz = (1 + a->q) * 8;
    tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),
                       vfp_reg_offset(a->q, a->vn),
                       vfp_reg_offset(a->q, a->vm),
                       tcg_ctx->cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
                       gen_helper_gvec_fmlal_a32);
    return true;
}

static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
{
    gen_helper_gvec_3_ptr *fn_gvec_ptr;
    int opr_sz;
    TCGv_ptr fpst;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!dc_isar_feature(aa32_vcma, s)) {
        return false;
    }
    if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd | a->vn | a->vm) & 0x10)) {
        return false;
    }

    if ((a->vd | a->vn) & a->q) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
                   : gen_helper_gvec_fcmlah_idx);
    opr_sz = (1 + a->q) * 8;
    fpst = get_fpstatus_ptr(s, 1);
    tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),
                       vfp_reg_offset(1, a->vn),
                       vfp_reg_offset(1, a->vm),
                       fpst, opr_sz, opr_sz,
                       (a->index << 2) | a->rot, fn_gvec_ptr);
    tcg_temp_free_ptr(tcg_ctx, fpst);
    return true;
}

static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
{
    gen_helper_gvec_3 *fn_gvec;
    int opr_sz;
    TCGv_ptr fpst;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!dc_isar_feature(aa32_dp, s)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd | a->vn) & 0x10)) {
        return false;
    }

    if ((a->vd | a->vn) & a->q) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
    opr_sz = (1 + a->q) * 8;
    fpst = get_fpstatus_ptr(s, 1);
    tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, a->vd),
                       vfp_reg_offset(1, a->vn),
                       vfp_reg_offset(1, a->rm),
                       opr_sz, opr_sz, a->index, fn_gvec);
    tcg_temp_free_ptr(tcg_ctx, fpst);
    return true;
}

static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
{
    int opr_sz;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!dc_isar_feature(aa32_fhm, s)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
        return false;
    }

    if (a->vd & a->q) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    opr_sz = (1 + a->q) * 8;
    tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),
                       vfp_reg_offset(a->q, a->vn),
                       vfp_reg_offset(a->q, a->rm),
                       tcg_ctx->cpu_env, opr_sz, opr_sz,
                       (a->index << 2) | a->s, /* is_2 == 0 */
                       gen_helper_gvec_fmlal_idx_a32);
    return true;
}

static struct {
    int nregs;
    int interleave;
    int spacing;
} const neon_ls_element_type[11] = {
    {1, 4, 1},
    {1, 4, 2},
    {4, 1, 1},
    {2, 2, 2},
    {1, 3, 1},
    {1, 3, 2},
    {3, 1, 1},
    {1, 1, 1},
    {1, 2, 1},
    {1, 2, 2},
    {2, 1, 1}
};

static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
                                      int stride)
{
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (rm != 15) {
        TCGv_i32 base;

        base = load_reg(s, rn);
        if (rm == 13) {
            tcg_gen_addi_i32(tcg_ctx, base, base, stride);
        } else {
            TCGv_i32 index;
            index = load_reg(s, rm);
            tcg_gen_add_i32(tcg_ctx, base, base, index);
            tcg_temp_free_i32(tcg_ctx, index);
        }
        store_reg(s, rn, base);
    }
}

static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
{
    /* Neon load/store multiple structures */
    int nregs, interleave, spacing, reg, n;
    MemOp endian = s->be_data;
    int mmu_idx = get_mem_index(s);
    int size = a->size;
    TCGv_i64 tmp64;
    TCGv_i32 addr, tmp;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist */
    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
        return false;
    }
    if (a->itype > 10) {
        return false;
    }
    /* Catch UNDEF cases for bad values of align field */
    switch (a->itype & 0xc) {
    case 4:
        if (a->align >= 2) {
            return false;
        }
        break;
    case 8:
        if (a->align == 3) {
            return false;
        }
        break;
    default:
        break;
    }
    nregs = neon_ls_element_type[a->itype].nregs;
    interleave = neon_ls_element_type[a->itype].interleave;
    spacing = neon_ls_element_type[a->itype].spacing;
    if (size == 3 && (interleave | spacing) != 1) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    /* For our purposes, bytes are always little-endian.  */
    if (size == 0) {
        endian = MO_LE;
    }
    /*
     * Consecutive little-endian elements from a single register
     * can be promoted to a larger little-endian operation.
     */
    if (interleave == 1 && endian == MO_LE) {
        size = 3;
    }
    tmp64 = tcg_temp_new_i64(tcg_ctx);
    addr = tcg_temp_new_i32(tcg_ctx);
    tmp = tcg_const_i32(tcg_ctx, 1 << size);
    load_reg_var(s, addr, a->rn);
    for (reg = 0; reg < nregs; reg++) {
        for (n = 0; n < 8 >> size; n++) {
            int xs;
            for (xs = 0; xs < interleave; xs++) {
                int tt = a->vd + reg + spacing * xs;

                if (a->l) {
                    gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
                    neon_store_element64(s, tt, n, size, tmp64);
                } else {
                    neon_load_element64(s, tmp64, tt, n, size);
                    gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
                }
                tcg_gen_add_i32(tcg_ctx, addr, addr, tmp);
            }
        }
    }
    tcg_temp_free_i32(tcg_ctx, addr);
    tcg_temp_free_i32(tcg_ctx, tmp);
    tcg_temp_free_i64(tcg_ctx, tmp64);

    gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
    return true;
}

static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
{
    /* Neon load single structure to all lanes */
    int reg, stride, vec_size;
    int vd = a->vd;
    int size = a->size;
    int nregs = a->n + 1;
    TCGv_i32 addr, tmp;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist */
    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
        return false;
    }

    if (size == 3) {
        if (nregs != 4 || a->a == 0) {
            return false;
        }
        /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
        size = 2;
    }
    if (nregs == 1 && a->a == 1 && size == 0) {
        return false;
    }
    if (nregs == 3 && a->a == 1) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    /*
     * VLD1 to all lanes: T bit indicates how many Dregs to write.
     * VLD2/3/4 to all lanes: T bit indicates register stride.
     */
    stride = a->t ? 2 : 1;
    vec_size = nregs == 1 ? stride * 8 : 8;

    tmp = tcg_temp_new_i32(tcg_ctx);
    addr = tcg_temp_new_i32(tcg_ctx);
    load_reg_var(s, addr, a->rn);
    for (reg = 0; reg < nregs; reg++) {
        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
                        s->be_data | size);
        if ((vd & 1) && vec_size == 16) {
            /*
             * We cannot write 16 bytes at once because the
             * destination is unaligned.
             */
            tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0),
                                 8, 8, tmp);
            tcg_gen_gvec_mov(tcg_ctx, 0, neon_reg_offset(vd + 1, 0),
                             neon_reg_offset(vd, 0), 8, 8);
        } else {
            tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0),
                                 vec_size, vec_size, tmp);
        }
        tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size);
        vd += stride;
    }
    tcg_temp_free_i32(tcg_ctx, tmp);
    tcg_temp_free_i32(tcg_ctx, addr);

    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);

    return true;
}

static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
{
    /* Neon load/store single structure to one lane */
    int reg;
    int nregs = a->n + 1;
    int vd = a->vd;
    TCGv_i32 addr, tmp;
    TCGContext *tcg_ctx = s->uc->tcg_ctx;

    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist */
    if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
        return false;
    }

    /* Catch the UNDEF cases. This is unavoidably a bit messy. */
    switch (nregs) {
    case 1:
        if (((a->align & (1 << a->size)) != 0) ||
            (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) {
            return false;
        }
        break;
    case 3:
        if ((a->align & 1) != 0) {
            return false;
        }
        /* fall through */
    case 2:
        if (a->size == 2 && (a->align & 2) != 0) {
            return false;
        }
        break;
    case 4:
        if ((a->size == 2) && ((a->align & 3) == 3)) {
            return false;
        }
        break;
    default:
        abort();
    }
    if ((vd + a->stride * (nregs - 1)) > 31) {
        /*
         * Attempts to write off the end of the register file are
         * UNPREDICTABLE; we choose to UNDEF because otherwise we would
         * access off the end of the array that holds the register data.
         */
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    tmp = tcg_temp_new_i32(tcg_ctx);
    addr = tcg_temp_new_i32(tcg_ctx);
    load_reg_var(s, addr, a->rn);
    /*
     * TODO: if we implemented alignment exceptions, we should check
     * addr against the alignment encoded in a->align here.
     */
    for (reg = 0; reg < nregs; reg++) {
        if (a->l) {
            gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
                            s->be_data | a->size);
            neon_store_element(s, vd, a->reg_idx, a->size, tmp);
        } else { /* Store */
            neon_load_element(s, tmp, vd, a->reg_idx, a->size);
            gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
                            s->be_data | a->size);
        }
        vd += a->stride;
        tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << a->size);
    }
    tcg_temp_free_i32(tcg_ctx, addr);
    tcg_temp_free_i32(tcg_ctx, tmp);

    gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);

    return true;
}
target/arm: Add stubs for AArch32 Neon decodetree Add the infrastructure for building and invoking a decodetree decoder for the AArch32 Neon encodings. At the moment the new decoder covers nothing, so we always fall back to the existing hand-written decode. We follow the same pattern we did for the VFP decodetree conversion (commit 78e138bc1f672c145ef6ace74617d and following): code that deals with Neon will be moving gradually out to translate-neon.vfp.inc, which we #include into translate.c. In order to share the decode files between A32 and T32, we split Neon into 3 parts: * data-processing * load-store * 'shared' encodings The first two groups of instructions have similar but not identical A32 and T32 encodings, so we need to manually transform the T32 encoding into the A32 one before calling the decoder; the third group covers the Neon instructions which are identical in A32 and T32. Backports commit 625e3dd44a15dfbe9532daa6454df3f86cf04d3e from qemu 2020-05-07 12:59:38 +00:00			`/*`
			`* ARM translation: AArch32 Neon instructions`
			`*`
			`* Copyright (c) 2003 Fabrice Bellard`
			`* Copyright (c) 2005-2007 CodeSourcery`
			`* Copyright (c) 2007 OpenedHand, Ltd.`
			`* Copyright (c) 2020 Linaro, Ltd.`
			`*`
			`* This library is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2 of the License, or (at your option) any later version.`
			`*`
			`* This library is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with this library; if not, see <http://www.gnu.org/licenses/>.`
			`*/`

			`/*`
			`* This file is intended to be included from translate.c; it uses`
			`* some macros and definitions provided by that file.`
			`* It might be possible to convert it to a standalone .c file eventually.`
			`*/`

target/arm: Convert Neon 'load/store single structure' to decodetree Convert the Neon "load/store single structure to one lane" insns to decodetree. As this is the last set of insns in the neon load/store group, we can remove the whole disas_neon_ls_insn() function. Backports commit 123ce4e3daba26b760b472687e1fb1ad82cf1993 from qemu 2020-05-07 13:32:03 +00:00			`static inline int plus1(DisasContext *s, int x)`
			`{`
			`return x + 1;`
			`}`

target/arm: Add stubs for AArch32 Neon decodetree Add the infrastructure for building and invoking a decodetree decoder for the AArch32 Neon encodings. At the moment the new decoder covers nothing, so we always fall back to the existing hand-written decode. We follow the same pattern we did for the VFP decodetree conversion (commit 78e138bc1f672c145ef6ace74617d and following): code that deals with Neon will be moving gradually out to translate-neon.vfp.inc, which we #include into translate.c. In order to share the decode files between A32 and T32, we split Neon into 3 parts: * data-processing * load-store * 'shared' encodings The first two groups of instructions have similar but not identical A32 and T32 encodings, so we need to manually transform the T32 encoding into the A32 one before calling the decoder; the third group covers the Neon instructions which are identical in A32 and T32. Backports commit 625e3dd44a15dfbe9532daa6454df3f86cf04d3e from qemu 2020-05-07 12:59:38 +00:00			`/* Include the generated Neon decoder */`
			`#include "decode-neon-dp.inc.c"`
			`#include "decode-neon-ls.inc.c"`
			`#include "decode-neon-shared.inc.c"`
target/arm: Convert VCMLA (vector) to decodetree Convert the VCMLA (vector) insns in the 3same extension group to decodetree. Backports commit afff8de0d4d55b4ce7c36eb9cdfafe477a35dd75 from qemu 2020-05-07 13:02:49 +00:00
			`static bool trans_VCMLA(DisasContext s, arg_VCMLA a)`
			`{`
			`int opr_sz;`
			`TCGv_ptr fpst;`
			`gen_helper_gvec_3_ptr *fn_gvec_ptr;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!dc_isar_feature(aa32_vcma, s)`
			`\|\| (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist. */`
			`if (!dc_isar_feature(aa32_simd_r32, s) &&`
			`((a->vd \| a->vn \| a->vm) & 0x10)) {`
			`return false;`
			`}`

			`if ((a->vn \| a->vm \| a->vd) & a->q) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`opr_sz = (1 + a->q) * 8;`
			`fpst = get_fpstatus_ptr(s, 1);`
			`fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;`
			`tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),`
			`vfp_reg_offset(1, a->vn),`
			`vfp_reg_offset(1, a->vm),`
			`fpst, opr_sz, opr_sz, a->rot,`
			`fn_gvec_ptr);`
			`tcg_temp_free_ptr(tcg_ctx, fpst);`
			`return true;`
			`}`
target/arm: Convert VCADD (vector) to decodetree Convert the VCADD (vector) insns to decodetree. Backports commit 94d5eb7b3f72fbbdee55d7908e9cb6de95949f4b from qemu 2020-05-07 13:05:53 +00:00
			`static bool trans_VCADD(DisasContext s, arg_VCADD a)`
			`{`
			`int opr_sz;`
			`TCGv_ptr fpst;`
			`gen_helper_gvec_3_ptr *fn_gvec_ptr;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!dc_isar_feature(aa32_vcma, s)`
			`\|\| (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist. */`
			`if (!dc_isar_feature(aa32_simd_r32, s) &&`
			`((a->vd \| a->vn \| a->vm) & 0x10)) {`
			`return false;`
			`}`

			`if ((a->vn \| a->vm \| a->vd) & a->q) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`opr_sz = (1 + a->q) * 8;`
			`fpst = get_fpstatus_ptr(s, 1);`
			`fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;`
			`tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),`
			`vfp_reg_offset(1, a->vn),`
			`vfp_reg_offset(1, a->vm),`
			`fpst, opr_sz, opr_sz, a->rot,`
			`fn_gvec_ptr);`
			`tcg_temp_free_ptr(tcg_ctx, fpst);`
			`return true;`
			`}`
target/arm: Convert V[US]DOT (vector) to decodetree Convert the V[US]DOT (vector) insns to decodetree. Backports commit 32da0e330d3e5218b669079826496751fb52c1ca from qemu 2020-05-07 13:09:22 +00:00
			`static bool trans_VDOT(DisasContext s, arg_VDOT a)`
			`{`
			`int opr_sz;`
			`gen_helper_gvec_3 *fn_gvec;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!dc_isar_feature(aa32_dp, s)) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist. */`
			`if (!dc_isar_feature(aa32_simd_r32, s) &&`
			`((a->vd \| a->vn \| a->vm) & 0x10)) {`
			`return false;`
			`}`

			`if ((a->vn \| a->vm \| a->vd) & a->q) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`opr_sz = (1 + a->q) * 8;`
			`fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;`
			`tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, a->vd),`
			`vfp_reg_offset(1, a->vn),`
			`vfp_reg_offset(1, a->vm),`
			`opr_sz, opr_sz, 0, fn_gvec);`
			`return true;`
			`}`
target/arm: Convert VFM[AS]L (vector) to decodetree Convert the VFM[AS]L (vector) insns to decodetree. This is the last insn in the legacy decoder for the 3same_ext group, so we can delete the legacy decoder function for the group entirely. Note that in disas_thumb2_insn() the parts of this encoding space where the decodetree decoder returns false will correctly be directed to illegal_op by the "(insn & (1 << 28))" check so they won't fall into disas_coproc_insn() by mistake. Backports commit 9a107e7b8a3c87ab63ec830d3d60f319fc577ff7 from qemu 2020-05-07 13:12:13 +00:00
			`static bool trans_VFML(DisasContext s, arg_VFML a)`
			`{`
			`int opr_sz;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!dc_isar_feature(aa32_fhm, s)) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist. */`
			`if (!dc_isar_feature(aa32_simd_r32, s) &&`
			`(a->vd & 0x10)) {`
			`return false;`
			`}`

			`if (a->vd & a->q) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`opr_sz = (1 + a->q) * 8;`
			`tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),`
			`vfp_reg_offset(a->q, a->vn),`
			`vfp_reg_offset(a->q, a->vm),`
			`tcg_ctx->cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */`
			`gen_helper_gvec_fmlal_a32);`
			`return true;`
			`}`

target/arm: Convert VCMLA (scalar) to decodetree Convert VCMLA (scalar) in the 2reg-scalar-ext group to decodetree. Backports commit 7e1b5d615361bb0038cda0e08af41e350e42d081 from qemu 2020-05-07 13:15:27 +00:00			`static bool trans_VCMLA_scalar(DisasContext s, arg_VCMLA_scalar a)`
			`{`
			`gen_helper_gvec_3_ptr *fn_gvec_ptr;`
			`int opr_sz;`
			`TCGv_ptr fpst;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!dc_isar_feature(aa32_vcma, s)) {`
			`return false;`
			`}`
			`if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist. */`
			`if (!dc_isar_feature(aa32_simd_r32, s) &&`
			`((a->vd \| a->vn \| a->vm) & 0x10)) {`
			`return false;`
			`}`

			`if ((a->vd \| a->vn) & a->q) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx`
			`: gen_helper_gvec_fcmlah_idx);`
			`opr_sz = (1 + a->q) * 8;`
			`fpst = get_fpstatus_ptr(s, 1);`
			`tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),`
			`vfp_reg_offset(1, a->vn),`
			`vfp_reg_offset(1, a->vm),`
			`fpst, opr_sz, opr_sz,`
			`(a->index << 2) \| a->rot, fn_gvec_ptr);`
			`tcg_temp_free_ptr(tcg_ctx, fpst);`
			`return true;`
			`}`
target/arm: Convert V[US]DOT (scalar) to decodetree Convert the V[US]DOT (scalar) insns in the 2reg-scalar-ext group to decodetree. Backports commit 35f5d4d1747558c6af2d914bcd848dcc30c3b531 from qemu 2020-05-07 13:17:30 +00:00
			`static bool trans_VDOT_scalar(DisasContext s, arg_VDOT_scalar a)`
			`{`
			`gen_helper_gvec_3 *fn_gvec;`
			`int opr_sz;`
			`TCGv_ptr fpst;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!dc_isar_feature(aa32_dp, s)) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist. */`
			`if (!dc_isar_feature(aa32_simd_r32, s) &&`
			`((a->vd \| a->vn) & 0x10)) {`
			`return false;`
			`}`

			`if ((a->vd \| a->vn) & a->q) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;`
			`opr_sz = (1 + a->q) * 8;`
			`fpst = get_fpstatus_ptr(s, 1);`
			`tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, a->vd),`
			`vfp_reg_offset(1, a->vn),`
			`vfp_reg_offset(1, a->rm),`
			`opr_sz, opr_sz, a->index, fn_gvec);`
			`tcg_temp_free_ptr(tcg_ctx, fpst);`
			`return true;`
			`}`
target/arm: Convert VFM[AS]L (scalar) to decodetree Convert the VFM[AS]L (scalar) insns in the 2reg-scalar-ext group to decodetree. These are the last ones in the group so we can remove all the legacy decode for the group. Note that in disas_thumb2_insn() the parts of this encoding space where the decodetree decoder returns false will correctly be directed to illegal_op by the "(insn & (1 << 28))" check so they won't fall into disas_coproc_insn() by mistake. Backports commit d27e82f7d02f35e5919bd9cbbcb157f3537069a0 from qemu 2020-05-07 13:20:33 +00:00
			`static bool trans_VFML_scalar(DisasContext s, arg_VFML_scalar a)`
			`{`
			`int opr_sz;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!dc_isar_feature(aa32_fhm, s)) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist. */`
			`if (!dc_isar_feature(aa32_simd_r32, s) &&`
			`((a->vd & 0x10) \|\| (a->q && (a->vn & 0x10)))) {`
			`return false;`
			`}`

			`if (a->vd & a->q) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`opr_sz = (1 + a->q) * 8;`
			`tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),`
			`vfp_reg_offset(a->q, a->vn),`
			`vfp_reg_offset(a->q, a->rm),`
			`tcg_ctx->cpu_env, opr_sz, opr_sz,`
			`(a->index << 2) \| a->s, /* is_2 == 0 */`
			`gen_helper_gvec_fmlal_idx_a32);`
			`return true;`
			`}`
target/arm: Convert Neon load/store multiple structures to decodetree Convert the Neon "load/store multiple structures" insns to decodetree. Backports commit a27b46304352a0eced45e560e96515dbe3cc174f from qemu 2020-05-07 13:24:31 +00:00
			`static struct {`
			`int nregs;`
			`int interleave;`
			`int spacing;`
			`} const neon_ls_element_type[11] = {`
			`{1, 4, 1},`
			`{1, 4, 2},`
			`{4, 1, 1},`
			`{2, 2, 2},`
			`{1, 3, 1},`
			`{1, 3, 2},`
			`{3, 1, 1},`
			`{1, 1, 1},`
			`{1, 2, 1},`
			`{1, 2, 2},`
			`{2, 1, 1}`
			`};`

			`static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,`
			`int stride)`
			`{`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (rm != 15) {`
			`TCGv_i32 base;`

			`base = load_reg(s, rn);`
			`if (rm == 13) {`
			`tcg_gen_addi_i32(tcg_ctx, base, base, stride);`
			`} else {`
			`TCGv_i32 index;`
			`index = load_reg(s, rm);`
			`tcg_gen_add_i32(tcg_ctx, base, base, index);`
			`tcg_temp_free_i32(tcg_ctx, index);`
			`}`
			`store_reg(s, rn, base);`
			`}`
			`}`

			`static bool trans_VLDST_multiple(DisasContext s, arg_VLDST_multiple a)`
			`{`
			`/* Neon load/store multiple structures */`
			`int nregs, interleave, spacing, reg, n;`
			`MemOp endian = s->be_data;`
			`int mmu_idx = get_mem_index(s);`
			`int size = a->size;`
			`TCGv_i64 tmp64;`
			`TCGv_i32 addr, tmp;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist */`
			`if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {`
			`return false;`
			`}`
			`if (a->itype > 10) {`
			`return false;`
			`}`
			`/* Catch UNDEF cases for bad values of align field */`
			`switch (a->itype & 0xc) {`
			`case 4:`
			`if (a->align >= 2) {`
			`return false;`
			`}`
			`break;`
			`case 8:`
			`if (a->align == 3) {`
			`return false;`
			`}`
			`break;`
			`default:`
			`break;`
			`}`
			`nregs = neon_ls_element_type[a->itype].nregs;`
			`interleave = neon_ls_element_type[a->itype].interleave;`
			`spacing = neon_ls_element_type[a->itype].spacing;`
			`if (size == 3 && (interleave \| spacing) != 1) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`/* For our purposes, bytes are always little-endian. */`
			`if (size == 0) {`
			`endian = MO_LE;`
			`}`
			`/*`
			`* Consecutive little-endian elements from a single register`
			`* can be promoted to a larger little-endian operation.`
			`*/`
			`if (interleave == 1 && endian == MO_LE) {`
			`size = 3;`
			`}`
			`tmp64 = tcg_temp_new_i64(tcg_ctx);`
			`addr = tcg_temp_new_i32(tcg_ctx);`
			`tmp = tcg_const_i32(tcg_ctx, 1 << size);`
			`load_reg_var(s, addr, a->rn);`
			`for (reg = 0; reg < nregs; reg++) {`
			`for (n = 0; n < 8 >> size; n++) {`
			`int xs;`
			`for (xs = 0; xs < interleave; xs++) {`
			`int tt = a->vd + reg + spacing * xs;`

			`if (a->l) {`
			`gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian \| size);`
			`neon_store_element64(s, tt, n, size, tmp64);`
			`} else {`
			`neon_load_element64(s, tmp64, tt, n, size);`
			`gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian \| size);`
			`}`
			`tcg_gen_add_i32(tcg_ctx, addr, addr, tmp);`
			`}`
			`}`
			`}`
			`tcg_temp_free_i32(tcg_ctx, addr);`
			`tcg_temp_free_i32(tcg_ctx, tmp);`
			`tcg_temp_free_i64(tcg_ctx, tmp64);`

			`gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);`
			`return true;`
			`}`
target/arm: Convert Neon 'load single structure to all lanes' to decodetree Convert the Neon "load single structure to all lanes" insns to decodetree. Backports commit 3698747c48db871d876a398592c5a23d7580ed4a from qemu 2020-05-07 13:28:59 +00:00
			`static bool trans_VLD_all_lanes(DisasContext s, arg_VLD_all_lanes a)`
			`{`
			`/* Neon load single structure to all lanes */`
			`int reg, stride, vec_size;`
			`int vd = a->vd;`
			`int size = a->size;`
			`int nregs = a->n + 1;`
			`TCGv_i32 addr, tmp;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist */`
			`if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {`
			`return false;`
			`}`

			`if (size == 3) {`
			`if (nregs != 4 \|\| a->a == 0) {`
			`return false;`
			`}`
			`/* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */`
			`size = 2;`
			`}`
			`if (nregs == 1 && a->a == 1 && size == 0) {`
			`return false;`
			`}`
			`if (nregs == 3 && a->a == 1) {`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`/*`
			`* VLD1 to all lanes: T bit indicates how many Dregs to write.`
			`* VLD2/3/4 to all lanes: T bit indicates register stride.`
			`*/`
			`stride = a->t ? 2 : 1;`
			`vec_size = nregs == 1 ? stride * 8 : 8;`

			`tmp = tcg_temp_new_i32(tcg_ctx);`
			`addr = tcg_temp_new_i32(tcg_ctx);`
			`load_reg_var(s, addr, a->rn);`
			`for (reg = 0; reg < nregs; reg++) {`
			`gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),`
			`s->be_data \| size);`
			`if ((vd & 1) && vec_size == 16) {`
			`/*`
			`* We cannot write 16 bytes at once because the`
			`* destination is unaligned.`
			`*/`
			`tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0),`
			`8, 8, tmp);`
			`tcg_gen_gvec_mov(tcg_ctx, 0, neon_reg_offset(vd + 1, 0),`
			`neon_reg_offset(vd, 0), 8, 8);`
			`} else {`
			`tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(vd, 0),`
			`vec_size, vec_size, tmp);`
			`}`
			`tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << size);`
			`vd += stride;`
			`}`
			`tcg_temp_free_i32(tcg_ctx, tmp);`
			`tcg_temp_free_i32(tcg_ctx, addr);`

			`gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);`

			`return true;`
			`}`
target/arm: Convert Neon 'load/store single structure' to decodetree Convert the Neon "load/store single structure to one lane" insns to decodetree. As this is the last set of insns in the neon load/store group, we can remove the whole disas_neon_ls_insn() function. Backports commit 123ce4e3daba26b760b472687e1fb1ad82cf1993 from qemu 2020-05-07 13:32:03 +00:00
			`static bool trans_VLDST_single(DisasContext s, arg_VLDST_single a)`
			`{`
			`/* Neon load/store single structure to one lane */`
			`int reg;`
			`int nregs = a->n + 1;`
			`int vd = a->vd;`
			`TCGv_i32 addr, tmp;`
			`TCGContext *tcg_ctx = s->uc->tcg_ctx;`

			`if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {`
			`return false;`
			`}`

			`/* UNDEF accesses to D16-D31 if they don't exist */`
			`if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {`
			`return false;`
			`}`

			`/* Catch the UNDEF cases. This is unavoidably a bit messy. */`
			`switch (nregs) {`
			`case 1:`
			`if (((a->align & (1 << a->size)) != 0) \|\|`
			`(a->size == 2 && ((a->align & 3) == 1 \|\| (a->align & 3) == 2))) {`
			`return false;`
			`}`
			`break;`
			`case 3:`
			`if ((a->align & 1) != 0) {`
			`return false;`
			`}`
			`/* fall through */`
			`case 2:`
			`if (a->size == 2 && (a->align & 2) != 0) {`
			`return false;`
			`}`
			`break;`
			`case 4:`
			`if ((a->size == 2) && ((a->align & 3) == 3)) {`
			`return false;`
			`}`
			`break;`
			`default:`
			`abort();`
			`}`
			`if ((vd + a->stride * (nregs - 1)) > 31) {`
			`/*`
			`* Attempts to write off the end of the register file are`
			`* UNPREDICTABLE; we choose to UNDEF because otherwise we would`
			`* access off the end of the array that holds the register data.`
			`*/`
			`return false;`
			`}`

			`if (!vfp_access_check(s)) {`
			`return true;`
			`}`

			`tmp = tcg_temp_new_i32(tcg_ctx);`
			`addr = tcg_temp_new_i32(tcg_ctx);`
			`load_reg_var(s, addr, a->rn);`
			`/*`
			`* TODO: if we implemented alignment exceptions, we should check`
			`* addr against the alignment encoded in a->align here.`
			`*/`
			`for (reg = 0; reg < nregs; reg++) {`
			`if (a->l) {`
			`gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),`
			`s->be_data \| a->size);`
			`neon_store_element(s, vd, a->reg_idx, a->size, tmp);`
			`} else { /* Store */`
			`neon_load_element(s, tmp, vd, a->reg_idx, a->size);`
			`gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),`
			`s->be_data \| a->size);`
			`}`
			`vd += a->stride;`
			`tcg_gen_addi_i32(tcg_ctx, addr, addr, 1 << a->size);`
			`}`
			`tcg_temp_free_i32(tcg_ctx, addr);`
			`tcg_temp_free_i32(tcg_ctx, tmp);`

			`gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);`

			`return true;`
			`}`