mirror of
				https://github.com/yuzu-emu/unicorn.git
				synced 2025-11-04 14:14:57 +00:00 
			
		
		
		
	target/arm: Implement v8.1M low-overhead-loop instructions
v8.1M's "low-overhead-loop" extension has three instructions for looping: * DLS (start of a do-loop) * WLS (start of a while-loop) * LE (end of a loop) The loop-start instructions are both simple operations to start a loop whose iteration count (if any) is in LR. The loop-end instruction handles "decrement iteration count and jump back to loop start"; it also caches the information about the branch back to the start of the loop to improve performance of the branch on subsequent iterations. As with the branch-future instructions, the architecture permits an implementation to discard the LO_BRANCH_INFO cache at any time, and QEMU takes the IMPDEF option to never set it in the first place (equivalent to discarding it immediately), because for us a "real" implementation would be unnecessary complexity. (This implementation only provides the simple looping constructs; the vector extension MVE (Helium) adds some extra variants to handle looping across vectors. We'll add those later when we implement MVE.) Backports commit b7226369721896ab9ef71544e4fe95b40710e05a
This commit is contained in:
		
							parent
							
								
									be197f9857
								
							
						
					
					
						commit
						3ae5543825
					
				| 
						 | 
					@ -659,4 +659,12 @@ BL               1111 0. .......... 11.1 ............         @branch24
 | 
				
			||||||
    BF           1111 0 boff:4 10 ----- 1110 - ---------- 1    # BF
 | 
					    BF           1111 0 boff:4 10 ----- 1110 - ---------- 1    # BF
 | 
				
			||||||
    BF           1111 0 boff:4 11 ----- 1110 0 0000000000 1    # BFX, BFLX
 | 
					    BF           1111 0 boff:4 11 ----- 1110 0 0000000000 1    # BFX, BFLX
 | 
				
			||||||
  ]
 | 
					  ]
 | 
				
			||||||
 | 
					  [
 | 
				
			||||||
 | 
					    # LE and WLS immediate
 | 
				
			||||||
 | 
					    %lob_imm 1:10 11:1 !function=times_2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    DLS          1111 0 0000 100     rn:4 1110 0000 0000 0001
 | 
				
			||||||
 | 
					    WLS          1111 0 0000 100     rn:4 1100 . .......... 1 imm=%lob_imm
 | 
				
			||||||
 | 
					    LE           1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
 | 
				
			||||||
 | 
					  ]
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2562,17 +2562,23 @@ static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
 | 
				
			||||||
    s->base.is_jmp = DISAS_NORETURN;
 | 
					    s->base.is_jmp = DISAS_NORETURN;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void gen_jmp(DisasContext *s, uint32_t dest)
 | 
					/* Jump, specifying which TB number to use if we gen_goto_tb() */
 | 
				
			||||||
 | 
					static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    if (unlikely(is_singlestepping(s))) {
 | 
					    if (unlikely(is_singlestepping(s))) {
 | 
				
			||||||
        /* An indirect jump so that we still trigger the debug exception.  */
 | 
					        /* An indirect jump so that we still trigger the debug exception.  */
 | 
				
			||||||
        gen_set_pc_im(s, dest);
 | 
					        gen_set_pc_im(s, dest);
 | 
				
			||||||
        s->base.is_jmp = DISAS_JUMP;
 | 
					        s->base.is_jmp = DISAS_JUMP;
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        gen_goto_tb(s, 0, dest);
 | 
					        gen_goto_tb(s, tbno, dest);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void gen_jmp(DisasContext *s, uint32_t dest)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    gen_jmp_tb(s, dest, 0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static inline void gen_mulxy(DisasContext *s, TCGv_i32 t0, TCGv_i32 t1, int x, int y)
 | 
					static inline void gen_mulxy(DisasContext *s, TCGv_i32 t0, TCGv_i32 t1, int x, int y)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
 | 
					    TCGContext *tcg_ctx = s->uc->tcg_ctx;
 | 
				
			||||||
| 
						 | 
					@ -8247,6 +8253,91 @@ static bool trans_BF(DisasContext *s, arg_BF *a)
 | 
				
			||||||
    return true;
 | 
					    return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool trans_DLS(DisasContext *s, arg_DLS *a)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    /* M-profile low-overhead loop start */
 | 
				
			||||||
 | 
					    TCGv_i32 tmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!dc_isar_feature(aa32_lob, s)) {
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (a->rn == 13 || a->rn == 15) {
 | 
				
			||||||
 | 
					        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* Not a while loop, no tail predication: just set LR to the count */
 | 
				
			||||||
 | 
					    tmp = load_reg(s, a->rn);
 | 
				
			||||||
 | 
					    store_reg(s, 14, tmp);
 | 
				
			||||||
 | 
					    return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool trans_WLS(DisasContext *s, arg_WLS *a)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    /* M-profile low-overhead while-loop start */
 | 
				
			||||||
 | 
					    TCGv_i32 tmp;
 | 
				
			||||||
 | 
					    TCGLabel *nextlabel;
 | 
				
			||||||
 | 
					    TCGContext *tcg_ctx = s->uc->tcg_ctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!dc_isar_feature(aa32_lob, s)) {
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (a->rn == 13 || a->rn == 15) {
 | 
				
			||||||
 | 
					        /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (s->condexec_mask) {
 | 
				
			||||||
 | 
					        /*
 | 
				
			||||||
 | 
					         * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
 | 
				
			||||||
 | 
					         * we choose to UNDEF, because otherwise our use of
 | 
				
			||||||
 | 
					         * gen_goto_tb(1) would clash with the use of TB exit 1
 | 
				
			||||||
 | 
					         * in the dc->condjmp condition-failed codepath in
 | 
				
			||||||
 | 
					         * arm_tr_tb_stop() and we'd get an assertion.
 | 
				
			||||||
 | 
					         */
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    nextlabel = gen_new_label(tcg_ctx);
 | 
				
			||||||
 | 
					    tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_EQ, tcg_ctx->cpu_R[a->rn], 0, nextlabel);
 | 
				
			||||||
 | 
					    tmp = load_reg(s, a->rn);
 | 
				
			||||||
 | 
					    store_reg(s, 14, tmp);
 | 
				
			||||||
 | 
					    gen_jmp_tb(s, s->base.pc_next, 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    gen_set_label(tcg_ctx, nextlabel);
 | 
				
			||||||
 | 
					    gen_jmp(s, read_pc(s) + a->imm);
 | 
				
			||||||
 | 
					    return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool trans_LE(DisasContext *s, arg_LE *a)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					     * M-profile low-overhead loop end. The architecture permits an
 | 
				
			||||||
 | 
					     * implementation to discard the LO_BRANCH_INFO cache at any time,
 | 
				
			||||||
 | 
					     * and we take the IMPDEF option to never set it in the first place
 | 
				
			||||||
 | 
					     * (equivalent to always discarding it immediately), because for QEMU
 | 
				
			||||||
 | 
					     * a "real" implementation would be complicated and wouldn't execute
 | 
				
			||||||
 | 
					     * any faster.
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    TCGv_i32 tmp;
 | 
				
			||||||
 | 
					    TCGContext *tcg_ctx = s->uc->tcg_ctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!dc_isar_feature(aa32_lob, s)) {
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!a->f) {
 | 
				
			||||||
 | 
					        /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
 | 
				
			||||||
 | 
					        arm_gen_condlabel(s);
 | 
				
			||||||
 | 
					        tcg_gen_brcondi_i32(tcg_ctx, TCG_COND_LEU, tcg_ctx->cpu_R[14], 1, s->condlabel);
 | 
				
			||||||
 | 
					        /* Decrement LR */
 | 
				
			||||||
 | 
					        tmp = load_reg(s, 14);
 | 
				
			||||||
 | 
					        tcg_gen_addi_i32(tcg_ctx, tmp, tmp, -1);
 | 
				
			||||||
 | 
					        store_reg(s, 14, tmp);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    /* Jump back to the loop start */
 | 
				
			||||||
 | 
					    gen_jmp(s, read_pc(s) - a->imm);
 | 
				
			||||||
 | 
					    return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
 | 
					static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
 | 
					    TCGContext *tcg_ctx = s->uc->tcg_ctx;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue