target/arm: optimize indirect branches

Speed up indirect branches by jumping to the target if it is valid.

Softmmu measurements (see later commit for user-mode results):

Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0.

- Impact on Boot time

| setup | ARM debian jessie boot+shutdown time | stddev |
|--------+--------------------------------------+--------|
| v2.9.0 | 8.84 | 0.07 |
| +cross | 8.85 | 0.03 |
| +jr | 8.83 | 0.06 |

- NBench, arm-softmmu (debian jessie guest). Host: Intel i7-4790K @ 4.00GHz

1.3x +-+-------------------------------------------------------------------------------------------------------------+-+
| |
| cross #### |
1.25x +cross+jr..........................................................#++#.........................................+-+
| #### # # |
| +++# # # # |
| +++ **** # # # |
1.2x +-+...................................####............*..*..#......#..#.........................................+-+
| **** # * * # # # #### |
| * * # * * # # # # # |
1.15x +-+................................*..*..#............*..*..#......#..#.....#..#................................+-+
| * * # * * # # # # # |
| * * # #### * * # # # # # |
| * * # # # * * # # # # # #### |
1.1x +-+................................*..*..#......#..#..*..*..#......#..#.....#..#.........................#..#...+-+
| * * # # # * * # # # # # # # |
| * * # # # * * # # # # # # # |
1.05x +-+..........................####..*..*..#......#..#..*..*..#......#..#.....#..#......+++............*****..#...+-+
| ***** # * * # # # * * # ***** # # # +++ | ****### * * # |
| *+++* # * * # # # * * # *+++* # **** # *****### * * # * * # |
| *****### +++#### * * # * * # ***** # * * # * * # * * # * | *++# * * # * * # |
1x +-++-+*+++*-+#++****++#++*+-+*++#+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-*++*++#-+*+++*-+#++*++*++#++*+-+*++#+-++-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.95x +-+---*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###--****###--*****###---+-+
ASSIGNMENT BITFIELD FOURFP EMULATION HUFFMAN LU DECOMPOSITIONEURAL NNUMERIC SOSTRING SORT hmean
png: http://imgur.com/eOLmZNR

NB. 'cross' represents the previous commit.

Backports commit 8a6b28c7b5104263344508df0f4bce97f22cfcaf from qemu
This commit is contained in:
Emilio G. Cota 2018-03-02 21:16:56 -05:00 committed by Lioncash
parent 5a42602b92
commit 9aaad9ed27
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
2 changed files with 22 additions and 8 deletions

View file

@ -1222,7 +1222,7 @@ static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
gen_set_condexec(s);
gen_set_pc_im(s, s->pc - offset);
gen_exception_internal(s, excp);
s->is_jmp = DISAS_JUMP;
s->is_jmp = DISAS_EXC;
}
static void gen_exception_insn(DisasContext *s, int offset, int excp,
@ -1231,7 +1231,7 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp,
gen_set_condexec(s);
gen_set_pc_im(s, s->pc - offset);
gen_exception(s, excp, syn, target_el);
s->is_jmp = DISAS_JUMP;
s->is_jmp = DISAS_EXC;
}
/* Force a TB lookup after an instruction that changes the CPU state. */
@ -1239,7 +1239,7 @@ static inline void gen_lookup_tb(DisasContext *s)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
tcg_gen_movi_i32(tcg_ctx, tcg_ctx->cpu_R[15], s->pc & ~1);
s->is_jmp = DISAS_JUMP;
s->is_jmp = DISAS_EXIT;
}
static inline void gen_hlt(DisasContext *s, int imm)
@ -4257,7 +4257,17 @@ static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
#endif
}
static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
static void gen_goto_ptr(DisasContext *s)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
TCGv addr = tcg_temp_new(tcg_ctx);
tcg_gen_extu_i32_tl(tcg_ctx, addr, tcg_ctx->cpu_R[15]);
tcg_gen_lookup_and_goto_ptr(tcg_ctx, addr);
tcg_temp_free(tcg_ctx, addr);
}
static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
@ -4266,11 +4276,8 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
gen_set_pc_im(s, dest);
tcg_gen_exit_tb(tcg_ctx, (uintptr_t)s->tb + n);
} else {
TCGv addr = tcg_temp_new(tcg_ctx);
gen_set_pc_im(s, dest);
tcg_gen_extu_i32_tl(tcg_ctx, addr, tcg_ctx->cpu_R[15]);
tcg_gen_lookup_and_goto_ptr(tcg_ctx, addr);
tcg_temp_free(tcg_ctx, addr);
gen_goto_ptr(s);
}
}
@ -12326,11 +12333,14 @@ tb_end:
gen_set_pc_im(dc, dc->pc);
/* fall through */
case DISAS_JUMP:
gen_goto_ptr(dc);
break;
default:
/* indicate that the hash table must be used to find the next TB */
tcg_gen_exit_tb(tcg_ctx, 0);
break;
case DISAS_TB_JUMP:
case DISAS_EXC:
/* nothing more to generate */
break;
case DISAS_WFI:

View file

@ -138,6 +138,10 @@ static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
* custom end-of-TB code)
*/
#define DISAS_BX_EXCRET 11
/* For instructions which want an immediate exit to the main loop,
* as opposed to attempting to use lookup_and_goto_ptr.
*/
#define DISAS_EXIT 12
#ifdef TARGET_AARCH64
void a64_translate_init(struct uc_struct *uc);