mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2024-12-25 04:55:31 +00:00
6ac2c597ab
Implement VFP fp16 support for fused multiply-add insns VFNMA, VFNMS, VFMA, VFMS. Backports 9886fe2834b064a3cf0675a4659942ed547aed42
230 lines
10 KiB
Plaintext
230 lines
10 KiB
Plaintext
# AArch32 VFP instruction descriptions (conditional insns)
|
|
#
|
|
# Copyright (c) 2019 Linaro, Ltd
|
|
#
|
|
# This library is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU Lesser General Public
|
|
# License as published by the Free Software Foundation; either
|
|
# version 2 of the License, or (at your option) any later version.
|
|
#
|
|
# This library is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# Lesser General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Lesser General Public
|
|
# License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
# This file is processed by scripts/decodetree.py
|
|
#
|
|
# Encodings for the conditional VFP instructions are here:
|
|
# generally anything matching A32
|
|
# cccc 11.. .... .... .... 101. .... ....
|
|
# and T32
|
|
# 1110 110. .... .... .... 101. .... ....
|
|
# 1110 1110 .... .... .... 101. .... ....
|
|
# (but those patterns might also cover some Neon instructions,
|
|
# which do not live in this file.)
|
|
|
|
# VFP registers have an odd encoding with a four-bit field
|
|
# and a one-bit field which are assembled in different orders
|
|
# depending on whether the register is double or single precision.
|
|
# Each individual instruction function must do the checks for
|
|
# "double register selected but CPU does not have double support"
|
|
# and "double register number has bit 4 set but CPU does not
|
|
# support D16-D31" (which should UNDEF).
|
|
%vm_dp 5:1 0:4
|
|
%vm_sp 0:4 5:1
|
|
%vn_dp 7:1 16:4
|
|
%vn_sp 16:4 7:1
|
|
%vd_dp 22:1 12:4
|
|
%vd_sp 12:4 22:1
|
|
|
|
%vmov_idx_b 21:1 5:2
|
|
%vmov_idx_h 21:1 6:1
|
|
|
|
%vmov_imm 16:4 0:4
|
|
|
|
@vfp_dnm_s ................................ vm=%vm_sp vn=%vn_sp vd=%vd_sp
|
|
@vfp_dnm_d ................................ vm=%vm_dp vn=%vn_dp vd=%vd_dp
|
|
|
|
@vfp_dm_ss ................................ vm=%vm_sp vd=%vd_sp
|
|
@vfp_dm_dd ................................ vm=%vm_dp vd=%vd_dp
|
|
@vfp_dm_ds ................................ vm=%vm_sp vd=%vd_dp
|
|
@vfp_dm_sd ................................ vm=%vm_dp vd=%vd_sp
|
|
|
|
# VMOV scalar to general-purpose register; note that this does
|
|
# include some Neon cases.
|
|
VMOV_to_gp ---- 1110 u:1 1. 1 .... rt:4 1011 ... 1 0000 \
|
|
vn=%vn_dp size=0 index=%vmov_idx_b
|
|
VMOV_to_gp ---- 1110 u:1 0. 1 .... rt:4 1011 ..1 1 0000 \
|
|
vn=%vn_dp size=1 index=%vmov_idx_h
|
|
VMOV_to_gp ---- 1110 0 0 index:1 1 .... rt:4 1011 .00 1 0000 \
|
|
vn=%vn_dp size=2 u=0
|
|
|
|
VMOV_from_gp ---- 1110 0 1. 0 .... rt:4 1011 ... 1 0000 \
|
|
vn=%vn_dp size=0 index=%vmov_idx_b
|
|
VMOV_from_gp ---- 1110 0 0. 0 .... rt:4 1011 ..1 1 0000 \
|
|
vn=%vn_dp size=1 index=%vmov_idx_h
|
|
VMOV_from_gp ---- 1110 0 0 index:1 0 .... rt:4 1011 .00 1 0000 \
|
|
vn=%vn_dp size=2
|
|
|
|
VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \
|
|
vn=%vn_dp
|
|
|
|
VMSR_VMRS ---- 1110 111 l:1 reg:4 rt:4 1010 0001 0000
|
|
VMOV_single ---- 1110 000 l:1 .... rt:4 1010 . 001 0000 vn=%vn_sp
|
|
|
|
VMOV_64_sp ---- 1100 010 op:1 rt2:4 rt:4 1010 00.1 .... vm=%vm_sp
|
|
VMOV_64_dp ---- 1100 010 op:1 rt2:4 rt:4 1011 00.1 .... vm=%vm_dp
|
|
|
|
# Note that the half-precision variants of VLDR and VSTR are
|
|
# not part of this decodetree at all because they have bits [9:8] == 0b01
|
|
VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 vd=%vd_sp
|
|
VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 vd=%vd_dp
|
|
|
|
# We split the load/store multiple up into two patterns to avoid
|
|
# overlap with other insns in the "Advanced SIMD load/store and 64-bit move"
|
|
# grouping:
|
|
# P=0 U=0 W=0 is 64-bit VMOV
|
|
# P=1 W=0 is VLDR/VSTR
|
|
# P=U W=1 is UNDEF
|
|
# leaving P=0 U=1 W=x and P=1 U=0 W=1 for load/store multiple.
|
|
# These include FSTM/FLDM.
|
|
VLDM_VSTM_sp ---- 1100 1 . w:1 l:1 rn:4 .... 1010 imm:8 \
|
|
vd=%vd_sp p=0 u=1
|
|
VLDM_VSTM_dp ---- 1100 1 . w:1 l:1 rn:4 .... 1011 imm:8 \
|
|
vd=%vd_dp p=0 u=1
|
|
|
|
VLDM_VSTM_sp ---- 1101 0.1 l:1 rn:4 .... 1010 imm:8 \
|
|
vd=%vd_sp p=1 u=0 w=1
|
|
VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
|
|
vd=%vd_dp p=1 u=0 w=1
|
|
|
|
# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
|
|
VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s
|
|
VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s
|
|
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d
|
|
|
|
VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s
|
|
VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s
|
|
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d
|
|
|
|
VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s
|
|
VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s
|
|
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
|
|
|
|
VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s
|
|
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
|
|
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
|
|
|
|
VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 .... @vfp_dnm_s
|
|
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
|
|
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
|
|
|
|
VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s
|
|
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
|
|
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d
|
|
|
|
VADD_hp ---- 1110 0.11 .... .... 1001 .0.0 .... @vfp_dnm_s
|
|
VADD_sp ---- 1110 0.11 .... .... 1010 .0.0 .... @vfp_dnm_s
|
|
VADD_dp ---- 1110 0.11 .... .... 1011 .0.0 .... @vfp_dnm_d
|
|
|
|
VSUB_hp ---- 1110 0.11 .... .... 1001 .1.0 .... @vfp_dnm_s
|
|
VSUB_sp ---- 1110 0.11 .... .... 1010 .1.0 .... @vfp_dnm_s
|
|
VSUB_dp ---- 1110 0.11 .... .... 1011 .1.0 .... @vfp_dnm_d
|
|
|
|
VDIV_hp ---- 1110 1.00 .... .... 1001 .0.0 .... @vfp_dnm_s
|
|
VDIV_sp ---- 1110 1.00 .... .... 1010 .0.0 .... @vfp_dnm_s
|
|
VDIV_dp ---- 1110 1.00 .... .... 1011 .0.0 .... @vfp_dnm_d
|
|
|
|
VFMA_hp ---- 1110 1.10 .... .... 1001 .0. 0 .... @vfp_dnm_s
|
|
VFMS_hp ---- 1110 1.10 .... .... 1001 .1. 0 .... @vfp_dnm_s
|
|
VFNMA_hp ---- 1110 1.01 .... .... 1001 .0. 0 .... @vfp_dnm_s
|
|
VFNMS_hp ---- 1110 1.01 .... .... 1001 .1. 0 .... @vfp_dnm_s
|
|
|
|
VFMA_sp ---- 1110 1.10 .... .... 1010 .0. 0 .... @vfp_dnm_s
|
|
VFMS_sp ---- 1110 1.10 .... .... 1010 .1. 0 .... @vfp_dnm_s
|
|
VFNMA_sp ---- 1110 1.01 .... .... 1010 .0. 0 .... @vfp_dnm_s
|
|
VFNMS_sp ---- 1110 1.01 .... .... 1010 .1. 0 .... @vfp_dnm_s
|
|
|
|
VFMA_dp ---- 1110 1.10 .... .... 1011 .0.0 .... @vfp_dnm_d
|
|
VFMS_dp ---- 1110 1.10 .... .... 1011 .1.0 .... @vfp_dnm_d
|
|
VFNMA_dp ---- 1110 1.01 .... .... 1011 .0.0 .... @vfp_dnm_d
|
|
VFNMS_dp ---- 1110 1.01 .... .... 1011 .1.0 .... @vfp_dnm_d
|
|
|
|
VMOV_imm_sp ---- 1110 1.11 .... .... 1010 0000 .... \
|
|
vd=%vd_sp imm=%vmov_imm
|
|
VMOV_imm_dp ---- 1110 1.11 .... .... 1011 0000 .... \
|
|
vd=%vd_dp imm=%vmov_imm
|
|
|
|
VMOV_reg_sp ---- 1110 1.11 0000 .... 1010 01.0 .... @vfp_dm_ss
|
|
VMOV_reg_dp ---- 1110 1.11 0000 .... 1011 01.0 .... @vfp_dm_dd
|
|
|
|
VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... @vfp_dm_ss
|
|
VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... @vfp_dm_dd
|
|
|
|
VNEG_sp ---- 1110 1.11 0001 .... 1010 01.0 .... @vfp_dm_ss
|
|
VNEG_dp ---- 1110 1.11 0001 .... 1011 01.0 .... @vfp_dm_dd
|
|
|
|
VSQRT_sp ---- 1110 1.11 0001 .... 1010 11.0 .... @vfp_dm_ss
|
|
VSQRT_dp ---- 1110 1.11 0001 .... 1011 11.0 .... @vfp_dm_dd
|
|
|
|
VCMP_sp ---- 1110 1.11 010 z:1 .... 1010 e:1 1.0 .... \
|
|
vd=%vd_sp vm=%vm_sp
|
|
VCMP_dp ---- 1110 1.11 010 z:1 .... 1011 e:1 1.0 .... \
|
|
vd=%vd_dp vm=%vm_dp
|
|
|
|
# VCVTT and VCVTB from f16: Vd format depends on size bit; Vm is always vm_sp
|
|
VCVT_f32_f16 ---- 1110 1.11 0010 .... 1010 t:1 1.0 .... \
|
|
vd=%vd_sp vm=%vm_sp
|
|
VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
|
|
vd=%vd_dp vm=%vm_sp
|
|
|
|
# VCVTB and VCVTT to f16: Vd format is always vd_sp;
|
|
# Vm format depends on size bit
|
|
VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
|
|
vd=%vd_sp vm=%vm_sp
|
|
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
|
|
vd=%vd_sp vm=%vm_dp
|
|
|
|
VRINTR_sp ---- 1110 1.11 0110 .... 1010 01.0 .... @vfp_dm_ss
|
|
VRINTR_dp ---- 1110 1.11 0110 .... 1011 01.0 .... @vfp_dm_dd
|
|
|
|
VRINTZ_sp ---- 1110 1.11 0110 .... 1010 11.0 .... @vfp_dm_ss
|
|
VRINTZ_dp ---- 1110 1.11 0110 .... 1011 11.0 .... @vfp_dm_dd
|
|
|
|
VRINTX_sp ---- 1110 1.11 0111 .... 1010 01.0 .... @vfp_dm_ss
|
|
VRINTX_dp ---- 1110 1.11 0111 .... 1011 01.0 .... @vfp_dm_dd
|
|
|
|
# VCVT between single and double:
|
|
# Vm precision depends on size; Vd is its reverse
|
|
VCVT_sp ---- 1110 1.11 0111 .... 1010 11.0 .... @vfp_dm_ds
|
|
VCVT_dp ---- 1110 1.11 0111 .... 1011 11.0 .... @vfp_dm_sd
|
|
|
|
# VCVT from integer to floating point: Vm always single; Vd depends on size
|
|
VCVT_int_sp ---- 1110 1.11 1000 .... 1010 s:1 1.0 .... \
|
|
vd=%vd_sp vm=%vm_sp
|
|
VCVT_int_dp ---- 1110 1.11 1000 .... 1011 s:1 1.0 .... \
|
|
vd=%vd_dp vm=%vm_sp
|
|
|
|
# VJCVT is always dp to sp
|
|
VJCVT ---- 1110 1.11 1001 .... 1011 11.0 .... @vfp_dm_sd
|
|
|
|
# VCVT between floating-point and fixed-point. The immediate value
|
|
# is in the same format as a Vm single-precision register number.
|
|
# We assemble bits 18 (op), 16 (u) and 7 (sx) into a single opc field
|
|
# for the convenience of the trans_VCVT_fix functions.
|
|
%vcvt_fix_op 18:1 16:1 7:1
|
|
VCVT_fix_sp ---- 1110 1.11 1.1. .... 1010 .1.0 .... \
|
|
vd=%vd_sp imm=%vm_sp opc=%vcvt_fix_op
|
|
VCVT_fix_dp ---- 1110 1.11 1.1. .... 1011 .1.0 .... \
|
|
vd=%vd_dp imm=%vm_sp opc=%vcvt_fix_op
|
|
|
|
# VCVT float to integer (VCVT and VCVTR): Vd always single; Vd depends on size
|
|
VCVT_sp_int ---- 1110 1.11 110 s:1 .... 1010 rz:1 1.0 .... \
|
|
vd=%vd_sp vm=%vm_sp
|
|
VCVT_dp_int ---- 1110 1.11 110 s:1 .... 1011 rz:1 1.0 .... \
|
|
vd=%vd_sp vm=%vm_dp
|