diff --git a/qemu/aarch64.h b/qemu/aarch64.h
index cedc63f0..1344412d 100644
--- a/qemu/aarch64.h
+++ b/qemu/aarch64.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_aarch64
 #define helper_gvec_eq64 helper_gvec_eq64_aarch64
 #define helper_gvec_eqv helper_gvec_eqv_aarch64
+#define helper_gvec_fabd_s helper_gvec_fabd_s_aarch64
 #define helper_gvec_fadd_d helper_gvec_fadd_d_aarch64
 #define helper_gvec_fadd_h helper_gvec_fadd_h_aarch64
 #define helper_gvec_fadd_s helper_gvec_fadd_s_aarch64
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_aarch64
 #define helper_msa_st_w helper_msa_st_w_aarch64
 #define helper_msr_banked helper_msr_banked_aarch64
-#define helper_neon_abd_f32 helper_neon_abd_f32_aarch64
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_aarch64
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_aarch64
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_aarch64
diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h
index 6c6673d4..e7634258 100644
--- a/qemu/aarch64eb.h
+++ b/qemu/aarch64eb.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_aarch64eb
 #define helper_gvec_eq64 helper_gvec_eq64_aarch64eb
 #define helper_gvec_eqv helper_gvec_eqv_aarch64eb
+#define helper_gvec_fabd_s helper_gvec_fabd_s_aarch64eb
 #define helper_gvec_fadd_d helper_gvec_fadd_d_aarch64eb
 #define helper_gvec_fadd_h helper_gvec_fadd_h_aarch64eb
 #define helper_gvec_fadd_s helper_gvec_fadd_s_aarch64eb
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_aarch64eb
 #define helper_msa_st_w helper_msa_st_w_aarch64eb
 #define helper_msr_banked helper_msr_banked_aarch64eb
-#define helper_neon_abd_f32 helper_neon_abd_f32_aarch64eb
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_aarch64eb
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_aarch64eb
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_aarch64eb
diff --git a/qemu/arm.h b/qemu/arm.h
index 899634b7..b49c2b8d 100644
--- a/qemu/arm.h
+++ b/qemu/arm.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_arm
 #define helper_gvec_eq64 helper_gvec_eq64_arm
 #define helper_gvec_eqv helper_gvec_eqv_arm
+#define helper_gvec_fabd_s helper_gvec_fabd_s_arm
 #define helper_gvec_fadd_d helper_gvec_fadd_d_arm
 #define helper_gvec_fadd_h helper_gvec_fadd_h_arm
 #define helper_gvec_fadd_s helper_gvec_fadd_s_arm
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_arm
 #define helper_msa_st_w helper_msa_st_w_arm
 #define helper_msr_banked helper_msr_banked_arm
-#define helper_neon_abd_f32 helper_neon_abd_f32_arm
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_arm
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_arm
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_arm
diff --git a/qemu/armeb.h b/qemu/armeb.h
index cf364fbf..5c1a357e 100644
--- a/qemu/armeb.h
+++ b/qemu/armeb.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_armeb
 #define helper_gvec_eq64 helper_gvec_eq64_armeb
 #define helper_gvec_eqv helper_gvec_eqv_armeb
+#define helper_gvec_fabd_s helper_gvec_fabd_s_armeb
 #define helper_gvec_fadd_d helper_gvec_fadd_d_armeb
 #define helper_gvec_fadd_h helper_gvec_fadd_h_armeb
 #define helper_gvec_fadd_s helper_gvec_fadd_s_armeb
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_armeb
 #define helper_msa_st_w helper_msa_st_w_armeb
 #define helper_msr_banked helper_msr_banked_armeb
-#define helper_neon_abd_f32 helper_neon_abd_f32_armeb
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_armeb
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_armeb
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_armeb
diff --git a/qemu/header_gen.py b/qemu/header_gen.py
index e431ab72..a658c3d1 100644
--- a/qemu/header_gen.py
+++ b/qemu/header_gen.py
@@ -1158,6 +1158,7 @@ symbols = (
     'helper_gvec_eq32',
     'helper_gvec_eq64',
     'helper_gvec_eqv',
+    'helper_gvec_fabd_s',
     'helper_gvec_fadd_d',
     'helper_gvec_fadd_h',
     'helper_gvec_fadd_s',
@@ -1472,7 +1473,6 @@ symbols = (
     'helper_msa_st_h',
     'helper_msa_st_w',
     'helper_msr_banked',
-    'helper_neon_abd_f32',
     'helper_neon_abdl_s16',
     'helper_neon_abdl_s32',
     'helper_neon_abdl_s64',
diff --git a/qemu/m68k.h b/qemu/m68k.h
index 46a435b0..51803f3f 100644
--- a/qemu/m68k.h
+++ b/qemu/m68k.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_m68k
 #define helper_gvec_eq64 helper_gvec_eq64_m68k
 #define helper_gvec_eqv helper_gvec_eqv_m68k
+#define helper_gvec_fabd_s helper_gvec_fabd_s_m68k
 #define helper_gvec_fadd_d helper_gvec_fadd_d_m68k
 #define helper_gvec_fadd_h helper_gvec_fadd_h_m68k
 #define helper_gvec_fadd_s helper_gvec_fadd_s_m68k
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_m68k
 #define helper_msa_st_w helper_msa_st_w_m68k
 #define helper_msr_banked helper_msr_banked_m68k
-#define helper_neon_abd_f32 helper_neon_abd_f32_m68k
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_m68k
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_m68k
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_m68k
diff --git a/qemu/mips.h b/qemu/mips.h
index 78f77c0b..f20c2723 100644
--- a/qemu/mips.h
+++ b/qemu/mips.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_mips
 #define helper_gvec_eq64 helper_gvec_eq64_mips
 #define helper_gvec_eqv helper_gvec_eqv_mips
+#define helper_gvec_fabd_s helper_gvec_fabd_s_mips
 #define helper_gvec_fadd_d helper_gvec_fadd_d_mips
 #define helper_gvec_fadd_h helper_gvec_fadd_h_mips
 #define helper_gvec_fadd_s helper_gvec_fadd_s_mips
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_mips
 #define helper_msa_st_w helper_msa_st_w_mips
 #define helper_msr_banked helper_msr_banked_mips
-#define helper_neon_abd_f32 helper_neon_abd_f32_mips
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_mips
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_mips
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_mips
diff --git a/qemu/mips64.h b/qemu/mips64.h
index b8c27d62..113b65df 100644
--- a/qemu/mips64.h
+++ b/qemu/mips64.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_mips64
 #define helper_gvec_eq64 helper_gvec_eq64_mips64
 #define helper_gvec_eqv helper_gvec_eqv_mips64
+#define helper_gvec_fabd_s helper_gvec_fabd_s_mips64
 #define helper_gvec_fadd_d helper_gvec_fadd_d_mips64
 #define helper_gvec_fadd_h helper_gvec_fadd_h_mips64
 #define helper_gvec_fadd_s helper_gvec_fadd_s_mips64
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_mips64
 #define helper_msa_st_w helper_msa_st_w_mips64
 #define helper_msr_banked helper_msr_banked_mips64
-#define helper_neon_abd_f32 helper_neon_abd_f32_mips64
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_mips64
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_mips64
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_mips64
diff --git a/qemu/mips64el.h b/qemu/mips64el.h
index e6408917..deb691a1 100644
--- a/qemu/mips64el.h
+++ b/qemu/mips64el.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_mips64el
 #define helper_gvec_eq64 helper_gvec_eq64_mips64el
 #define helper_gvec_eqv helper_gvec_eqv_mips64el
+#define helper_gvec_fabd_s helper_gvec_fabd_s_mips64el
 #define helper_gvec_fadd_d helper_gvec_fadd_d_mips64el
 #define helper_gvec_fadd_h helper_gvec_fadd_h_mips64el
 #define helper_gvec_fadd_s helper_gvec_fadd_s_mips64el
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_mips64el
 #define helper_msa_st_w helper_msa_st_w_mips64el
 #define helper_msr_banked helper_msr_banked_mips64el
-#define helper_neon_abd_f32 helper_neon_abd_f32_mips64el
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_mips64el
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_mips64el
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_mips64el
diff --git a/qemu/mipsel.h b/qemu/mipsel.h
index 983a598f..d1f9b655 100644
--- a/qemu/mipsel.h
+++ b/qemu/mipsel.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_mipsel
 #define helper_gvec_eq64 helper_gvec_eq64_mipsel
 #define helper_gvec_eqv helper_gvec_eqv_mipsel
+#define helper_gvec_fabd_s helper_gvec_fabd_s_mipsel
 #define helper_gvec_fadd_d helper_gvec_fadd_d_mipsel
 #define helper_gvec_fadd_h helper_gvec_fadd_h_mipsel
 #define helper_gvec_fadd_s helper_gvec_fadd_s_mipsel
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_mipsel
 #define helper_msa_st_w helper_msa_st_w_mipsel
 #define helper_msr_banked helper_msr_banked_mipsel
-#define helper_neon_abd_f32 helper_neon_abd_f32_mipsel
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_mipsel
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_mipsel
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_mipsel
diff --git a/qemu/powerpc.h b/qemu/powerpc.h
index a44420a4..8be534e8 100644
--- a/qemu/powerpc.h
+++ b/qemu/powerpc.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_powerpc
 #define helper_gvec_eq64 helper_gvec_eq64_powerpc
 #define helper_gvec_eqv helper_gvec_eqv_powerpc
+#define helper_gvec_fabd_s helper_gvec_fabd_s_powerpc
 #define helper_gvec_fadd_d helper_gvec_fadd_d_powerpc
 #define helper_gvec_fadd_h helper_gvec_fadd_h_powerpc
 #define helper_gvec_fadd_s helper_gvec_fadd_s_powerpc
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_powerpc
 #define helper_msa_st_w helper_msa_st_w_powerpc
 #define helper_msr_banked helper_msr_banked_powerpc
-#define helper_neon_abd_f32 helper_neon_abd_f32_powerpc
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_powerpc
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_powerpc
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_powerpc
diff --git a/qemu/riscv32.h b/qemu/riscv32.h
index 35ea0161..d1844700 100644
--- a/qemu/riscv32.h
+++ b/qemu/riscv32.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_riscv32
 #define helper_gvec_eq64 helper_gvec_eq64_riscv32
 #define helper_gvec_eqv helper_gvec_eqv_riscv32
+#define helper_gvec_fabd_s helper_gvec_fabd_s_riscv32
 #define helper_gvec_fadd_d helper_gvec_fadd_d_riscv32
 #define helper_gvec_fadd_h helper_gvec_fadd_h_riscv32
 #define helper_gvec_fadd_s helper_gvec_fadd_s_riscv32
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_riscv32
 #define helper_msa_st_w helper_msa_st_w_riscv32
 #define helper_msr_banked helper_msr_banked_riscv32
-#define helper_neon_abd_f32 helper_neon_abd_f32_riscv32
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_riscv32
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_riscv32
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_riscv32
diff --git a/qemu/riscv64.h b/qemu/riscv64.h
index b9779511..1517ced3 100644
--- a/qemu/riscv64.h
+++ b/qemu/riscv64.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_riscv64
 #define helper_gvec_eq64 helper_gvec_eq64_riscv64
 #define helper_gvec_eqv helper_gvec_eqv_riscv64
+#define helper_gvec_fabd_s helper_gvec_fabd_s_riscv64
 #define helper_gvec_fadd_d helper_gvec_fadd_d_riscv64
 #define helper_gvec_fadd_h helper_gvec_fadd_h_riscv64
 #define helper_gvec_fadd_s helper_gvec_fadd_s_riscv64
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_riscv64
 #define helper_msa_st_w helper_msa_st_w_riscv64
 #define helper_msr_banked helper_msr_banked_riscv64
-#define helper_neon_abd_f32 helper_neon_abd_f32_riscv64
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_riscv64
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_riscv64
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_riscv64
diff --git a/qemu/sparc.h b/qemu/sparc.h
index 69998a55..e95e11c0 100644
--- a/qemu/sparc.h
+++ b/qemu/sparc.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_sparc
 #define helper_gvec_eq64 helper_gvec_eq64_sparc
 #define helper_gvec_eqv helper_gvec_eqv_sparc
+#define helper_gvec_fabd_s helper_gvec_fabd_s_sparc
 #define helper_gvec_fadd_d helper_gvec_fadd_d_sparc
 #define helper_gvec_fadd_h helper_gvec_fadd_h_sparc
 #define helper_gvec_fadd_s helper_gvec_fadd_s_sparc
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_sparc
 #define helper_msa_st_w helper_msa_st_w_sparc
 #define helper_msr_banked helper_msr_banked_sparc
-#define helper_neon_abd_f32 helper_neon_abd_f32_sparc
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_sparc
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_sparc
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_sparc
diff --git a/qemu/sparc64.h b/qemu/sparc64.h
index 363701e3..b9cfb059 100644
--- a/qemu/sparc64.h
+++ b/qemu/sparc64.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_sparc64
 #define helper_gvec_eq64 helper_gvec_eq64_sparc64
 #define helper_gvec_eqv helper_gvec_eqv_sparc64
+#define helper_gvec_fabd_s helper_gvec_fabd_s_sparc64
 #define helper_gvec_fadd_d helper_gvec_fadd_d_sparc64
 #define helper_gvec_fadd_h helper_gvec_fadd_h_sparc64
 #define helper_gvec_fadd_s helper_gvec_fadd_s_sparc64
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_sparc64
 #define helper_msa_st_w helper_msa_st_w_sparc64
 #define helper_msr_banked helper_msr_banked_sparc64
-#define helper_neon_abd_f32 helper_neon_abd_f32_sparc64
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_sparc64
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_sparc64
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_sparc64
diff --git a/qemu/target/arm/helper.h b/qemu/target/arm/helper.h
index 1a97d32d..15c02ef2 100644
--- a/qemu/target/arm/helper.h
+++ b/qemu/target/arm/helper.h
@@ -392,7 +392,6 @@ DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
 DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
 DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
 
-DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_cge_f32, i32, i32, i32, ptr)
 DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr)
@@ -591,6 +590,8 @@ DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
diff --git a/qemu/target/arm/neon-dp.decode b/qemu/target/arm/neon-dp.decode
index fd32837f..0dd02f3b 100644
--- a/qemu/target/arm/neon-dp.decode
+++ b/qemu/target/arm/neon-dp.decode
@@ -45,6 +45,10 @@
 @3same_q0        .... ... . . . size:2 .... .... .... . 0 . . .... \
                  &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp q=0
 
+# For FP insns the high bit of 'size' is used as part of opcode decode
+@3same_fp        .... ... . . . . size:1 .... .... .... . q:1 . . .... \
+                 &3same vm=%vm_dp vn=%vn_dp vd=%vd_dp
+
 VHADD_S_3s       1111 001 0 0 . .. .... .... 0000 . . . 0 .... @3same
 VHADD_U_3s       1111 001 1 0 . .. .... .... 0000 . . . 0 .... @3same
 VQADD_S_3s       1111 001 0 0 . .. .... .... 0000 . . . 1 .... @3same
@@ -169,3 +173,7 @@ SHA256SU1_3s     1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
                  vm=%vm_dp vn=%vn_dp vd=%vd_dp
 
 VQRDMLSH_3s      1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same
+
+VADD_fp_3s       1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
+VSUB_fp_3s       1111 001 0 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
+VABD_fp_3s       1111 001 1 0 . 1 . .... .... 1101 ... 0 .... @3same_fp
diff --git a/qemu/target/arm/neon_helper.c b/qemu/target/arm/neon_helper.c
index 0b3b2fd3..9cb4f246 100644
--- a/qemu/target/arm/neon_helper.c
+++ b/qemu/target/arm/neon_helper.c
@@ -1836,15 +1836,6 @@ uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x)
     return x;
 }
 
-/* NEON Float helpers.  */
-uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    float32 f0 = make_float32(a);
-    float32 f1 = make_float32(b);
-    return float32_val(float32_abs(float32_sub(f0, f1, fpst)));
-}
-
 /* Floating point comparisons produce an integer result.
  * Note that EQ doesn't signal InvalidOp for QNaNs but GE and GT do.
  * Softfloat routines return 0/1, which we convert to the 0/-1 Neon requires.
diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c
index 23f4ab3a..978d08c5 100644
--- a/qemu/target/arm/translate-a64.c
+++ b/qemu/target/arm/translate-a64.c
@@ -678,9 +678,8 @@ static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
     tcg_temp_free_i64(tcg_ctx, tmp);
 }
 
-TCGv_ptr get_fpstatus_ptr(DisasContext *s, bool is_f16)
+TCGv_ptr get_fpstatus_ptr(TCGContext *tcg_ctx, bool is_f16)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_ptr statusptr = tcg_temp_new_ptr(tcg_ctx);
     int offset;
 
@@ -762,7 +761,7 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
 
-    TCGv_ptr fpst = get_fpstatus_ptr(s, is_fp16);
+    TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, is_fp16);
     tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
                        vec_full_reg_offset(s, rn),
                        vec_full_reg_offset(s, rm), fpst,
@@ -5613,7 +5612,7 @@ static void handle_fp_compare(DisasContext *s, int size,
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_flags = tcg_temp_new_i64(tcg_ctx);
-    TCGv_ptr fpst = get_fpstatus_ptr(s, size == MO_16);
+    TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, size == MO_16);
 
     if (size == MO_64) {
         TCGv_i64 tcg_vn, tcg_vm;
@@ -5875,7 +5874,7 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
         tcg_gen_xori_i32(tcg_ctx, tcg_res, tcg_op, 0x8000);
         break;
     case 0x3: /* FSQRT */
-        fpst = get_fpstatus_ptr(s, true);
+        fpst = get_fpstatus_ptr(tcg_ctx, true);
         gen_helper_sqrt_f16(tcg_ctx, tcg_res, tcg_op, fpst);
         break;
     case 0x8: /* FRINTN */
@@ -5885,7 +5884,7 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
     case 0xc: /* FRINTA */
     {
         TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(opcode & 7));
-        fpst = get_fpstatus_ptr(s, true);
+        fpst = get_fpstatus_ptr(tcg_ctx, true);
 
         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
         gen_helper_advsimd_rinth(tcg_ctx, tcg_res, tcg_op, fpst);
@@ -5895,11 +5894,11 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
         break;
     }
     case 0xe: /* FRINTX */
-        fpst = get_fpstatus_ptr(s, true);
+        fpst = get_fpstatus_ptr(tcg_ctx, true);
         gen_helper_advsimd_rinth_exact(tcg_ctx, tcg_res, tcg_op, fpst);
         break;
     case 0xf: /* FRINTI */
-        fpst = get_fpstatus_ptr(s, true);
+        fpst = get_fpstatus_ptr(tcg_ctx, true);
         gen_helper_advsimd_rinth(tcg_ctx, tcg_res, tcg_op, fpst);
         break;
     default:
@@ -5972,7 +5971,7 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
         g_assert_not_reached();
     }
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     if (rmode >= 0) {
         TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, rmode);
         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
@@ -6050,7 +6049,7 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
         g_assert_not_reached();
     }
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     if (rmode >= 0) {
         TCGv_i32 tcg_rmode = tcg_const_i32(tcg_ctx, rmode);
         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
@@ -6087,7 +6086,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
             /* Single to half */
             TCGv_i32 tcg_rd = tcg_temp_new_i32(tcg_ctx);
             TCGv_i32 ahp = get_ahp_flag(s);
-            TCGv_ptr fpst = get_fpstatus_ptr(s, false);
+            TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, false);
 
             gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tcg_rd, tcg_rn, fpst, ahp);
             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
@@ -6107,7 +6106,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
             /* Double to single */
             gen_helper_vfp_fcvtsd(tcg_ctx, tcg_rd, tcg_rn, tcg_ctx->cpu_env);
         } else {
-            TCGv_ptr fpst = get_fpstatus_ptr(s, false);
+            TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, false);
             TCGv_i32 ahp = get_ahp_flag(s);
             /* Double to half */
             gen_helper_vfp_fcvt_f64_to_f16(tcg_ctx, tcg_rd, tcg_rn, fpst, ahp);
@@ -6123,7 +6122,7 @@ static void handle_fp_fcvt(DisasContext *s, int opcode,
     case 0x3:
     {
         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
-        TCGv_ptr tcg_fpst = get_fpstatus_ptr(s, false);
+        TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx, false);
         TCGv_i32 tcg_ahp = get_ahp_flag(s);
         tcg_gen_ext16u_i32(tcg_ctx, tcg_rn, tcg_rn);
         if (dtype == 0) {
@@ -6241,7 +6240,7 @@ static void handle_fp_2src_single(DisasContext *s, int opcode,
     TCGv_ptr fpst;
 
     tcg_res = tcg_temp_new_i32(tcg_ctx);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     tcg_op1 = read_fp_sreg(s, rn);
     tcg_op2 = read_fp_sreg(s, rm);
 
@@ -6295,7 +6294,7 @@ static void handle_fp_2src_double(DisasContext *s, int opcode,
     TCGv_ptr fpst;
 
     tcg_res = tcg_temp_new_i64(tcg_ctx);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     tcg_op1 = read_fp_dreg(s, rn);
     tcg_op2 = read_fp_dreg(s, rm);
 
@@ -6350,7 +6349,7 @@ static void handle_fp_2src_half(DisasContext *s, int opcode,
     TCGv_ptr fpst;
 
     tcg_res = tcg_temp_new_i32(tcg_ctx);
-    fpst = get_fpstatus_ptr(s, true);
+    fpst = get_fpstatus_ptr(tcg_ctx, true);
     tcg_op1 = read_fp_hreg(s, rn);
     tcg_op2 = read_fp_hreg(s, rm);
 
@@ -6450,7 +6449,7 @@ static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
     TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
-    TCGv_ptr fpst = get_fpstatus_ptr(s, false);
+    TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, false);
 
     tcg_op1 = read_fp_sreg(s, rn);
     tcg_op2 = read_fp_sreg(s, rm);
@@ -6489,7 +6488,7 @@ static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
     TCGv_i64 tcg_res = tcg_temp_new_i64(tcg_ctx);
-    TCGv_ptr fpst = get_fpstatus_ptr(s, false);
+    TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, false);
 
     tcg_op1 = read_fp_dreg(s, rn);
     tcg_op2 = read_fp_dreg(s, rm);
@@ -6529,7 +6528,7 @@ static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
 
     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
     TCGv_i32 tcg_res = tcg_temp_new_i32(tcg_ctx);
-    TCGv_ptr fpst = get_fpstatus_ptr(s, true);
+    TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, true);
 
     tcg_op1 = read_fp_hreg(s, rn);
     tcg_op2 = read_fp_hreg(s, rm);
@@ -6677,7 +6676,7 @@ static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
     TCGv_i32 tcg_shift, tcg_single;
     TCGv_i64 tcg_double;
 
-    tcg_fpstatus = get_fpstatus_ptr(s, type == 3);
+    tcg_fpstatus = get_fpstatus_ptr(tcg_ctx, type == 3);
 
     tcg_shift = tcg_const_i32(tcg_ctx, 64 - scale);
 
@@ -6967,7 +6966,7 @@ static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_i64 t = read_fp_dreg(s, rn);
-    TCGv_ptr fpstatus = get_fpstatus_ptr(s, false);
+    TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, false);
 
     gen_helper_fjcvtzs(tcg_ctx, t, t, fpstatus);
 
@@ -7577,7 +7576,7 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
          * Note that correct NaN propagation requires that we do these
          * operations in exactly the order specified by the pseudocode.
          */
-        TCGv_ptr fpst = get_fpstatus_ptr(s, size == MO_16);
+        TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, size == MO_16);
         int fpopcode = opcode | is_min << 4 | is_u << 5;
         int vmap = (1 << elements) - 1;
         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
@@ -8096,7 +8095,7 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
             return;
         }
 
-        fpst = get_fpstatus_ptr(s, size == MO_16);
+        fpst = get_fpstatus_ptr(tcg_ctx, size == MO_16);
         break;
     default:
         unallocated_encoding(s);
@@ -8615,7 +8614,7 @@ static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
                                    int fracbits, int size)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
-    TCGv_ptr tcg_fpst = get_fpstatus_ptr(s, size == MO_16);
+    TCGv_ptr tcg_fpst = get_fpstatus_ptr(tcg_ctx, size == MO_16);
     TCGv_i32 tcg_shift = NULL;
 
     MemOp mop = size | (is_signed ? MO_SIGN : 0);
@@ -8797,7 +8796,7 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
     assert(!(is_scalar && is_q));
 
     tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(FPROUNDING_ZERO));
-    tcg_fpstatus = get_fpstatus_ptr(s, size == MO_16);
+    tcg_fpstatus = get_fpstatus_ptr(tcg_ctx, size == MO_16);
     gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_fpstatus);
     fracbits = (16 << size) - immhb;
     tcg_shift = tcg_const_i32(tcg_ctx, fracbits);
@@ -9139,7 +9138,7 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     int pass;
-    TCGv_ptr fpst = get_fpstatus_ptr(s, false);
+    TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, false);
 
     for (pass = 0; pass < elements; pass++) {
         if (size) {
@@ -9534,7 +9533,7 @@ static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
         return;
     }
 
-    fpst = get_fpstatus_ptr(s, true);
+    fpst = get_fpstatus_ptr(tcg_ctx, true);
 
     tcg_op1 = read_fp_hreg(s, rn);
     tcg_op2 = read_fp_hreg(s, rm);
@@ -9790,7 +9789,7 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
         return;
     }
 
-    fpst = get_fpstatus_ptr(s, size == MO_16);
+    fpst = get_fpstatus_ptr(tcg_ctx, size == MO_16);
 
     if (is_double) {
         TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
@@ -9921,7 +9920,7 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     bool is_double = (size == 3);
-    TCGv_ptr fpst = get_fpstatus_ptr(s, false);
+    TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, false);
 
     if (is_double) {
         TCGv_i64 tcg_op = tcg_temp_new_i64(tcg_ctx);
@@ -10063,7 +10062,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
             } else {
                 TCGv_i32 tcg_lo = tcg_temp_new_i32(tcg_ctx);
                 TCGv_i32 tcg_hi = tcg_temp_new_i32(tcg_ctx);
-                TCGv_ptr fpst = get_fpstatus_ptr(s, false);
+                TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, false);
                 TCGv_i32 ahp = get_ahp_flag(s);
 
                 tcg_gen_extr_i64_i32(tcg_ctx, tcg_lo, tcg_hi, tcg_op);
@@ -10327,7 +10326,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
 
     if (is_fcvt) {
         tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rmode));
-        tcg_fpstatus = get_fpstatus_ptr(s, false);
+        tcg_fpstatus = get_fpstatus_ptr(tcg_ctx, false);
         gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, tcg_fpstatus);
     } else {
         tcg_rmode = NULL;
@@ -11159,7 +11158,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
 
     /* Floating point operations need fpst */
     if (opcode >= 0x58) {
-        fpst = get_fpstatus_ptr(s, false);
+        fpst = get_fpstatus_ptr(tcg_ctx, false);
     } else {
         fpst = NULL;
     }
@@ -11776,7 +11775,7 @@ static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
         break;
     }
 
-    fpst = get_fpstatus_ptr(s, true);
+    fpst = get_fpstatus_ptr(tcg_ctx, true);
 
     if (pairwise) {
         int maxpass = is_q ? 8 : 4;
@@ -12069,7 +12068,7 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
         /* 16 -> 32 bit fp conversion */
         int srcelt = is_q ? 4 : 0;
         TCGv_i32 tcg_res[4];
-        TCGv_ptr fpst = get_fpstatus_ptr(s, false);
+        TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, false);
         TCGv_i32 ahp = get_ahp_flag(s);
 
         for (pass = 0; pass < 4; pass++) {
@@ -12546,7 +12545,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
     }
 
     if (need_fpstatus || need_rmode) {
-        tcg_fpstatus = get_fpstatus_ptr(s, false);
+        tcg_fpstatus = get_fpstatus_ptr(tcg_ctx, false);
     } else {
         tcg_fpstatus = NULL;
     }
@@ -12937,7 +12936,7 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
     }
 
     if (need_rmode || need_fpst) {
-        tcg_fpstatus = get_fpstatus_ptr(s, true);
+        tcg_fpstatus = get_fpstatus_ptr(tcg_ctx, true);
     }
 
     if (need_rmode) {
@@ -13247,7 +13246,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     }
 
     if (is_fp) {
-        fpst = get_fpstatus_ptr(s, is_fp16);
+        fpst = get_fpstatus_ptr(tcg_ctx, is_fp16);
     } else {
         fpst = NULL;
     }
diff --git a/qemu/target/arm/translate-a64.h b/qemu/target/arm/translate-a64.h
index 0e2e2612..34f9b155 100644
--- a/qemu/target/arm/translate-a64.h
+++ b/qemu/target/arm/translate-a64.h
@@ -36,7 +36,7 @@ TCGv_i64 cpu_reg_sp(DisasContext *s, int reg);
 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf);
 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf);
 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
-TCGv_ptr get_fpstatus_ptr(DisasContext *, bool);
+TCGv_ptr get_fpstatus_ptr(TCGContext *, bool);
 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
                             unsigned int imms, unsigned int immr);
 bool sve_access_check(DisasContext *s);
diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c
index d1c0b177..6448ffcc 100644
--- a/qemu/target/arm/translate-neon.inc.c
+++ b/qemu/target/arm/translate-neon.inc.c
@@ -63,7 +63,7 @@ static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
     }
 
     opr_sz = (1 + a->q) * 8;
-    fpst = get_fpstatus_ptr(s, 1);
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
     fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
     tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),
                        vfp_reg_offset(1, a->vn),
@@ -101,7 +101,7 @@ static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
     }
 
     opr_sz = (1 + a->q) * 8;
-    fpst = get_fpstatus_ptr(s, 1);
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
     fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
     tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),
                        vfp_reg_offset(1, a->vn),
@@ -208,7 +208,7 @@ static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
     fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx
                    : gen_helper_gvec_fcmlah_idx);
     opr_sz = (1 + a->q) * 8;
-    fpst = get_fpstatus_ptr(s, 1);
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
     tcg_gen_gvec_3_ptr(tcg_ctx, vfp_reg_offset(1, a->vd),
                        vfp_reg_offset(1, a->vn),
                        vfp_reg_offset(1, a->vm),
@@ -245,7 +245,7 @@ static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a)
 
     fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
     opr_sz = (1 + a->q) * 8;
-    fpst = get_fpstatus_ptr(s, 1);
+    fpst = get_fpstatus_ptr(tcg_ctx, 1);
     tcg_gen_gvec_3_ool(tcg_ctx, vfp_reg_offset(1, a->vd),
                        vfp_reg_offset(1, a->vn),
                        vfp_reg_offset(1, a->rm),
@@ -1039,3 +1039,31 @@ DO_3SAME_PAIR(VPADD, padd_u)
 
 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
+
+/*
+ * For all the functions using this macro, size == 1 means fp16,
+ * which is an architecture extension we don't implement yet.
+ */
+#define DO_3S_FP_GVEC(INSN,FUNC)                                        \
+    static void gen_##INSN##_3s(TCGContext *s, unsigned vece, uint32_t rd_ofs,         \
+                                uint32_t rn_ofs, uint32_t rm_ofs,       \
+                                uint32_t oprsz, uint32_t maxsz)         \
+    {                                                                   \
+        TCGv_ptr fpst = get_fpstatus_ptr(s, 1);                         \
+        tcg_gen_gvec_3_ptr(s, rd_ofs, rn_ofs, rm_ofs, fpst,             \
+                           oprsz, maxsz, 0, FUNC);                      \
+        tcg_temp_free_ptr(s, fpst);                                     \
+    }                                                                   \
+    static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
+    {                                                                   \
+        if (a->size != 0) {                                             \
+            /* TODO fp16 support */                                     \
+            return false;                                               \
+        }                                                               \
+        return do_3same(s, a, gen_##INSN##_3s);                         \
+    }
+
+
+DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s)
+DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s)
+DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s)
diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c
index 4d30fd1f..969231d0 100644
--- a/qemu/target/arm/translate-sve.c
+++ b/qemu/target/arm/translate-sve.c
@@ -3598,7 +3598,7 @@ static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_4_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -3625,7 +3625,7 @@ static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -3658,7 +3658,7 @@ static void do_reduce(DisasContext *s, arg_rpr_esz *a,
 
     tcg_gen_addi_ptr(tcg_ctx, t_zn, tcg_ctx->cpu_env, vec_full_reg_offset(s, a->rn));
     tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->pg));
-    status = get_fpstatus_ptr(s, a->esz == MO_16);
+    status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
 
     fn(tcg_ctx, temp, t_zn, t_pg, status, t_desc);
     tcg_temp_free_ptr(tcg_ctx, t_zn);
@@ -3701,7 +3701,7 @@ static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     unsigned vsz = vec_full_reg_size(s);
-    TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+    TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
 
     tcg_gen_gvec_2_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                        vec_full_reg_offset(s, a->rn),
@@ -3750,7 +3750,7 @@ static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
 {
     TCGContext *tcg_ctx = s->uc->tcg_ctx;
     unsigned vsz = vec_full_reg_size(s);
-    TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+    TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
 
     tcg_gen_gvec_3_ptr(tcg_ctx, pred_full_reg_offset(s, a->rd),
                        vec_full_reg_offset(s, a->rn),
@@ -3803,7 +3803,7 @@ static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -3844,7 +3844,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
     t_pg = tcg_temp_new_ptr(tcg_ctx);
     tcg_gen_addi_ptr(tcg_ctx, t_rm, tcg_ctx->cpu_env, vec_full_reg_offset(s, a->rm));
     tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->pg));
-    t_fpst = get_fpstatus_ptr(s, a->esz == MO_16);
+    t_fpst = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
     t_desc = tcg_const_i32(tcg_ctx, simd_desc(vsz, vsz, 0));
 
     fns[a->esz - 1](tcg_ctx, t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
@@ -3872,7 +3872,7 @@ static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -3915,7 +3915,7 @@ static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_4_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -3968,7 +3968,7 @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
     tcg_gen_addi_ptr(tcg_ctx, t_zn, tcg_ctx->cpu_env, vec_full_reg_offset(s, zn));
     tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, pg));
 
-    status = get_fpstatus_ptr(s, is_fp16);
+    status = get_fpstatus_ptr(tcg_ctx, is_fp16);
     desc = tcg_const_i32(tcg_ctx, simd_desc(vsz, vsz, 0));
     fn(tcg_ctx, t_zd, t_zn, t_pg, scalar, status, desc);
 
@@ -4034,7 +4034,7 @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_4_ptr(tcg_ctx, pred_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -4079,7 +4079,7 @@ static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_4_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -4100,7 +4100,7 @@ static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_5_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -4145,7 +4145,7 @@ static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_5_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -4169,7 +4169,7 @@ static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
         tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, a->rd),
                            vec_full_reg_offset(s, a->rn),
                            vec_full_reg_offset(s, a->rm),
@@ -4191,7 +4191,7 @@ static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
     if (sve_access_check(s)) {
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
-        TCGv_ptr status = get_fpstatus_ptr(s, is_fp16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, is_fp16);
         tcg_gen_gvec_3_ptr(tcg_ctx, vec_full_reg_offset(s, rd),
                            vec_full_reg_offset(s, rn),
                            pred_full_reg_offset(s, pg),
@@ -4338,7 +4338,7 @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
         TCGContext *tcg_ctx = s->uc->tcg_ctx;
         unsigned vsz = vec_full_reg_size(s);
         TCGv_i32 tmode = tcg_const_i32(tcg_ctx, mode);
-        TCGv_ptr status = get_fpstatus_ptr(s, a->esz == MO_16);
+        TCGv_ptr status = get_fpstatus_ptr(tcg_ctx, a->esz == MO_16);
 
         gen_helper_set_rmode(tcg_ctx, tmode, tmode, status);
 
diff --git a/qemu/target/arm/translate-vfp.inc.c b/qemu/target/arm/translate-vfp.inc.c
index bd6342cd..0b8ab0a3 100644
--- a/qemu/target/arm/translate-vfp.inc.c
+++ b/qemu/target/arm/translate-vfp.inc.c
@@ -368,7 +368,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
         return true;
     }
 
-    fpst = get_fpstatus_ptr(s, 0);
+    fpst = get_fpstatus_ptr(tcg_ctx, 0);
 
     tcg_rmode = tcg_const_i32(tcg_ctx, arm_rmode_to_sf(rounding));
     gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
@@ -432,7 +432,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
         return true;
     }
 
-    fpst = get_fpstatus_ptr(s, 0);
+    fpst = get_fpstatus_ptr(tcg_ctx, 0);
 
     tcg_shift = tcg_const_i32(tcg_ctx, 0);
 
@@ -1253,7 +1253,7 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
     f0 = tcg_temp_new_i32(tcg_ctx);
     f1 = tcg_temp_new_i32(tcg_ctx);
     fd = tcg_temp_new_i32(tcg_ctx);
-    fpst = get_fpstatus_ptr(s, 0);
+    fpst = get_fpstatus_ptr(tcg_ctx, 0);
 
     neon_load_reg32(s, f0, vn);
     neon_load_reg32(s, f1, vm);
@@ -1337,7 +1337,7 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
     f0 = tcg_temp_new_i64(tcg_ctx);
     f1 = tcg_temp_new_i64(tcg_ctx);
     fd = tcg_temp_new_i64(tcg_ctx);
-    fpst = get_fpstatus_ptr(s, 0);
+    fpst = get_fpstatus_ptr(tcg_ctx, 0);
 
     neon_load_reg64(s, f0, vn);
     neon_load_reg64(s, f1, vm);
@@ -1823,7 +1823,7 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
         /* VFNMA, VFNMS */
         gen_helper_vfp_negs(tcg_ctx, vd, vd);
     }
-    fpst = get_fpstatus_ptr(s, 0);
+    fpst = get_fpstatus_ptr(tcg_ctx, 0);
     gen_helper_vfp_muladds(tcg_ctx, vd, vn, vm, vd, fpst);
     neon_store_reg32(s, vd, a->vd);
 
@@ -1915,7 +1915,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
         /* VFNMA, VFNMS */
         gen_helper_vfp_negd(tcg_ctx, vd, vd);
     }
-    fpst = get_fpstatus_ptr(s, 0);
+    fpst = get_fpstatus_ptr(tcg_ctx, 0);
     gen_helper_vfp_muladdd(tcg_ctx, vd, vn, vm, vd, fpst);
     neon_store_reg64(s, vd, a->vd);
 
@@ -2204,7 +2204,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
         return true;
     }
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     ahp_mode = get_ahp_flag(s);
     tmp = tcg_temp_new_i32(tcg_ctx);
     /* The T bit tells us if we want the low or high 16 bits of Vm */
@@ -2243,7 +2243,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
         return true;
     }
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     ahp_mode = get_ahp_flag(s);
     tmp = tcg_temp_new_i32(tcg_ctx);
     /* The T bit tells us if we want the low or high 16 bits of Vm */
@@ -2273,7 +2273,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
         return true;
     }
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     ahp_mode = get_ahp_flag(s);
     tmp = tcg_temp_new_i32(tcg_ctx);
 
@@ -2311,7 +2311,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
         return true;
     }
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     ahp_mode = get_ahp_flag(s);
     tmp = tcg_temp_new_i32(tcg_ctx);
     vm = tcg_temp_new_i64(tcg_ctx);
@@ -2342,7 +2342,7 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
 
     tmp = tcg_temp_new_i32(tcg_ctx);
     neon_load_reg32(s, tmp, a->vm);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     gen_helper_rints(tcg_ctx, tmp, tmp, fpst);
     neon_store_reg32(s, tmp, a->vd);
     tcg_temp_free_ptr(tcg_ctx, fpst);
@@ -2375,7 +2375,7 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
 
     tmp = tcg_temp_new_i64(tcg_ctx);
     neon_load_reg64(s, tmp, a->vm);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     gen_helper_rintd(tcg_ctx, tmp, tmp, fpst);
     neon_store_reg64(s, tmp, a->vd);
     tcg_temp_free_ptr(tcg_ctx, fpst);
@@ -2400,7 +2400,7 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
 
     tmp = tcg_temp_new_i32(tcg_ctx);
     neon_load_reg32(s, tmp, a->vm);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     tcg_rmode = tcg_const_i32(tcg_ctx, float_round_to_zero);
     gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
     gen_helper_rints(tcg_ctx, tmp, tmp, fpst);
@@ -2438,7 +2438,7 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
 
     tmp = tcg_temp_new_i64(tcg_ctx);
     neon_load_reg64(s, tmp, a->vm);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     tcg_rmode = tcg_const_i32(tcg_ctx, float_round_to_zero);
     gen_helper_set_rmode(tcg_ctx, tcg_rmode, tcg_rmode, fpst);
     gen_helper_rintd(tcg_ctx, tmp, tmp, fpst);
@@ -2466,7 +2466,7 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
 
     tmp = tcg_temp_new_i32(tcg_ctx);
     neon_load_reg32(s, tmp, a->vm);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     gen_helper_rints_exact(tcg_ctx, tmp, tmp, fpst);
     neon_store_reg32(s, tmp, a->vd);
     tcg_temp_free_ptr(tcg_ctx, fpst);
@@ -2499,7 +2499,7 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
 
     tmp = tcg_temp_new_i64(tcg_ctx);
     neon_load_reg64(s, tmp, a->vm);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     gen_helper_rintd_exact(tcg_ctx, tmp, tmp, fpst);
     neon_store_reg64(s, tmp, a->vd);
     tcg_temp_free_ptr(tcg_ctx, fpst);
@@ -2581,7 +2581,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
 
     vm = tcg_temp_new_i32(tcg_ctx);
     neon_load_reg32(s, vm, a->vm);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     if (a->s) {
         /* i32 -> f32 */
         gen_helper_vfp_sitos(tcg_ctx, vm, vm, fpst);
@@ -2618,7 +2618,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
     vm = tcg_temp_new_i32(tcg_ctx);
     vd = tcg_temp_new_i64(tcg_ctx);
     neon_load_reg32(s, vm, a->vm);
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     if (a->s) {
         /* i32 -> f64 */
         gen_helper_vfp_sitod(tcg_ctx, vd, vm, fpst);
@@ -2686,7 +2686,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
     vd = tcg_temp_new_i32(tcg_ctx);
     neon_load_reg32(s, vd, a->vd);
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     shift = tcg_const_i32(tcg_ctx, frac_bits);
 
     /* Switch on op:U:sx bits */
@@ -2752,7 +2752,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
     vd = tcg_temp_new_i64(tcg_ctx);
     neon_load_reg64(s, vd, a->vd);
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     shift = tcg_const_i32(tcg_ctx, frac_bits);
 
     /* Switch on op:U:sx bits */
@@ -2806,7 +2806,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
         return true;
     }
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     vm = tcg_temp_new_i32(tcg_ctx);
     neon_load_reg32(s, vm, a->vm);
 
@@ -2849,7 +2849,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
         return true;
     }
 
-    fpst = get_fpstatus_ptr(s, false);
+    fpst = get_fpstatus_ptr(tcg_ctx, false);
     vm = tcg_temp_new_i64(tcg_ctx);
     vd = tcg_temp_new_i32(tcg_ctx);
     neon_load_reg64(s, vm, a->vm);
diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c
index fa763250..336e3ac3 100644
--- a/qemu/target/arm/translate.c
+++ b/qemu/target/arm/translate.c
@@ -1182,9 +1182,8 @@ static inline void gen_hlt(DisasContext *s, int imm)
     unallocated_encoding(s);
 }
 
-static TCGv_ptr get_fpstatus_ptr(DisasContext *s, int neon)
+static TCGv_ptr get_fpstatus_ptr(TCGContext *tcg_ctx, int neon)
 {
-    TCGContext *tcg_ctx = s->uc->tcg_ctx;
     TCGv_ptr statusptr = tcg_temp_new_ptr(tcg_ctx);
     int offset;
     if (neon) {
@@ -5572,6 +5571,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         switch (op) {
         case NEON_3R_FLOAT_ARITH:
             pairwise = (u && size < 2); /* if VPADD (float) */
+            if (!pairwise) {
+                return 1; /* handled by decodetree */
+            }
             break;
         case NEON_3R_FLOAT_MINMAX:
             pairwise = u; /* if VPMIN/VPMAX (float) */
@@ -5626,18 +5628,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         switch (op) {
         case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
         {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
             switch ((u << 2) | size) {
-            case 0: /* VADD */
             case 4: /* VPADD */
                 gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus);
                 break;
-            case 2: /* VSUB */
-                gen_helper_vfp_subs(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                break;
-            case 6: /* VABD */
-                gen_helper_neon_abd_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
-                break;
             default:
                 abort();
             }
@@ -5646,7 +5641,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         }
         case NEON_3R_FLOAT_MULTIPLY:
         {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
             gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus);
             if (!u) {
                 tcg_temp_free_i32(tcg_ctx, tmp2);
@@ -5662,7 +5657,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         }
         case NEON_3R_FLOAT_CMP:
         {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
             if (!u) {
                 gen_helper_neon_ceq_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
             } else {
@@ -5677,7 +5672,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         }
         case NEON_3R_FLOAT_ACMP:
         {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
             if (size == 0) {
                 gen_helper_neon_acge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
             } else {
@@ -5688,7 +5683,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         }
         case NEON_3R_FLOAT_MINMAX:
         {
-            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
             if (size == 0) {
                 gen_helper_vfp_maxs(tcg_ctx, tmp, tmp, tmp2, fpstatus);
             } else {
@@ -5700,7 +5695,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         case NEON_3R_FLOAT_MISC:
             if (u) {
                 /* VMAXNM/VMINNM */
-                TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                 if (size == 0) {
                     gen_helper_vfp_maxnums(tcg_ctx, tmp, tmp, tmp2, fpstatus);
                 } else {
@@ -5718,7 +5713,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         case NEON_3R_VFM_VQRDMLSH:
         {
             /* VFMA, VFMS: fused multiply-add */
-            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
             TCGv_i32 tmp3 = neon_load_reg(s, rd, pass);
             if (size) {
                 /* VFMS */
@@ -6070,7 +6065,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                  * hence this 32-shift where the ARM ARM has 64-imm6.
                  */
                 shift = 32 - shift;
-                fpst = get_fpstatus_ptr(s, 1);
+                fpst = get_fpstatus_ptr(tcg_ctx, 1);
                 shiftv = tcg_const_i32(tcg_ctx, shift);
                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
                     TCGv_i32 tmpf = neon_load_reg(s, rm, pass);
@@ -6431,7 +6426,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                                 gen_helper_neon_qrdmulh_s32(tcg_ctx, tmp, tcg_ctx->cpu_env, tmp, tmp2);
                             }
                         } else if (op & 1) {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             gen_helper_vfp_muls(tcg_ctx, tmp, tmp, tmp2, fpstatus);
                             tcg_temp_free_ptr(tcg_ctx, fpstatus);
                         } else {
@@ -6452,7 +6447,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                                 break;
                             case 1:
                             {
-                                TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                                TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                                 gen_helper_vfp_adds(tcg_ctx, tmp, tmp, tmp2, fpstatus);
                                 tcg_temp_free_ptr(tcg_ctx, fpstatus);
                                 break;
@@ -6462,7 +6457,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                                 break;
                             case 5:
                             {
-                                TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                                TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                                 gen_helper_vfp_subs(tcg_ctx, tmp, tmp2, tmp, fpstatus);
                                 tcg_temp_free_ptr(tcg_ctx, fpstatus);
                                 break;
@@ -6754,7 +6749,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         q || (rm & 1)) {
                         return 1;
                     }
-                    fpst = get_fpstatus_ptr(s, true);
+                    fpst = get_fpstatus_ptr(tcg_ctx, true);
                     ahp = get_ahp_flag(s);
                     tmp = neon_load_reg(s, rm, 0);
                     gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
@@ -6784,7 +6779,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         q || (rd & 1)) {
                         return 1;
                     }
-                    fpst = get_fpstatus_ptr(s, true);
+                    fpst = get_fpstatus_ptr(tcg_ctx, true);
                     ahp = get_ahp_flag(s);
                     tmp3 = tcg_temp_new_i32(tcg_ctx);
                     tmp = neon_load_reg(s, rm, 0);
@@ -6952,7 +6947,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                             break;
                         case NEON_2RM_VCGT0_F:
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             tmp2 = tcg_const_i32(tcg_ctx, 0);
                             gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
                             tcg_temp_free_i32(tcg_ctx, tmp2);
@@ -6961,7 +6956,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         }
                         case NEON_2RM_VCGE0_F:
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             tmp2 = tcg_const_i32(tcg_ctx, 0);
                             gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
                             tcg_temp_free_i32(tcg_ctx, tmp2);
@@ -6970,7 +6965,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         }
                         case NEON_2RM_VCEQ0_F:
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             tmp2 = tcg_const_i32(tcg_ctx, 0);
                             gen_helper_neon_ceq_f32(tcg_ctx, tmp, tmp, tmp2, fpstatus);
                             tcg_temp_free_i32(tcg_ctx, tmp2);
@@ -6979,7 +6974,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         }
                         case NEON_2RM_VCLE0_F:
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             tmp2 = tcg_const_i32(tcg_ctx, 0);
                             gen_helper_neon_cge_f32(tcg_ctx, tmp, tmp2, tmp, fpstatus);
                             tcg_temp_free_i32(tcg_ctx, tmp2);
@@ -6988,7 +6983,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         }
                         case NEON_2RM_VCLT0_F:
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             tmp2 = tcg_const_i32(tcg_ctx, 0);
                             gen_helper_neon_cgt_f32(tcg_ctx, tmp, tmp2, tmp, fpstatus);
                             tcg_temp_free_i32(tcg_ctx, tmp2);
@@ -7021,7 +7016,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         case NEON_2RM_VRINTZ:
                         {
                             TCGv_i32 tcg_rmode;
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             int rmode;
 
                             if (op == NEON_2RM_VRINTZ) {
@@ -7042,7 +7037,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         }
                         case NEON_2RM_VRINTX:
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             gen_helper_rints_exact(tcg_ctx, tmp, tmp, fpstatus);
                             tcg_temp_free_ptr(tcg_ctx, fpstatus);
                             break;
@@ -7057,7 +7052,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                         case NEON_2RM_VCVTMS:
                         {
                             bool is_signed = !extract32(insn, 7, 1);
-                            TCGv_ptr fpst = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpst = get_fpstatus_ptr(tcg_ctx, 1);
                             TCGv_i32 tcg_rmode, tcg_shift;
                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
 
@@ -7089,42 +7084,42 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                             break;
                         case NEON_2RM_VRECPE_F:
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             gen_helper_recpe_f32(tcg_ctx, tmp, tmp, fpstatus);
                             tcg_temp_free_ptr(tcg_ctx, fpstatus);
                             break;
                         }
                         case NEON_2RM_VRSQRTE_F:
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             gen_helper_rsqrte_f32(tcg_ctx, tmp, tmp, fpstatus);
                             tcg_temp_free_ptr(tcg_ctx, fpstatus);
                             break;
                         }
                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             gen_helper_vfp_sitos(tcg_ctx, tmp, tmp, fpstatus);
                             tcg_temp_free_ptr(tcg_ctx, fpstatus);
                             break;
                         }
                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             gen_helper_vfp_uitos(tcg_ctx, tmp, tmp, fpstatus);
                             tcg_temp_free_ptr(tcg_ctx, fpstatus);
                             break;
                         }
                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             gen_helper_vfp_tosizs(tcg_ctx, tmp, tmp, fpstatus);
                             tcg_temp_free_ptr(tcg_ctx, fpstatus);
                             break;
                         }
                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
                         {
-                            TCGv_ptr fpstatus = get_fpstatus_ptr(s, 1);
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(tcg_ctx, 1);
                             gen_helper_vfp_touizs(tcg_ctx, tmp, tmp, fpstatus);
                             tcg_temp_free_ptr(tcg_ctx, fpstatus);
                             break;
diff --git a/qemu/target/arm/vec_helper.c b/qemu/target/arm/vec_helper.c
index a3db18af..4b1ade24 100644
--- a/qemu/target/arm/vec_helper.c
+++ b/qemu/target/arm/vec_helper.c
@@ -692,6 +692,11 @@ static float64 float64_ftsmul(float64 op1, uint64_t op2, float_status *stat)
     return result;
 }
 
+static float32 float32_abd(float32 op1, float32 op2, float_status *stat)
+{
+    return float32_abs(float32_sub(op1, op2, stat));
+}
+
 #define DO_3OP(NAME, FUNC, TYPE) \
 void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
 {                                                                          \
@@ -719,6 +724,8 @@ DO_3OP(gvec_ftsmul_h, float16_ftsmul, float16)
 DO_3OP(gvec_ftsmul_s, float32_ftsmul, float32)
 DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64)
 
+DO_3OP(gvec_fabd_s, float32_abd, float32)
+
 #ifdef TARGET_AARCH64
 
 DO_3OP(gvec_recps_h, helper_recpsf_f16, float16)
diff --git a/qemu/x86_64.h b/qemu/x86_64.h
index 72eac9e8..1147fdd0 100644
--- a/qemu/x86_64.h
+++ b/qemu/x86_64.h
@@ -1152,6 +1152,7 @@
 #define helper_gvec_eq32 helper_gvec_eq32_x86_64
 #define helper_gvec_eq64 helper_gvec_eq64_x86_64
 #define helper_gvec_eqv helper_gvec_eqv_x86_64
+#define helper_gvec_fabd_s helper_gvec_fabd_s_x86_64
 #define helper_gvec_fadd_d helper_gvec_fadd_d_x86_64
 #define helper_gvec_fadd_h helper_gvec_fadd_h_x86_64
 #define helper_gvec_fadd_s helper_gvec_fadd_s_x86_64
@@ -1466,7 +1467,6 @@
 #define helper_msa_st_h helper_msa_st_h_x86_64
 #define helper_msa_st_w helper_msa_st_w_x86_64
 #define helper_msr_banked helper_msr_banked_x86_64
-#define helper_neon_abd_f32 helper_neon_abd_f32_x86_64
 #define helper_neon_abdl_s16 helper_neon_abdl_s16_x86_64
 #define helper_neon_abdl_s32 helper_neon_abdl_s32_x86_64
 #define helper_neon_abdl_s64 helper_neon_abdl_s64_x86_64