target-i386: remove helper_lock()

It's been superseded by the atomic helpers. The use of the atomic helpers provides a significant performance and scalability improvement. Below is the result of running the atomic_add-test microbenchmark with: $ x86_64-linux-user/qemu-x86_64 tests/atomic_add-bench -o 5000000 -r $r -n $n , where $n is the number of threads and $r is the allowed range for the additions. The scenarios measured are: - atomic: implements x86' ADDL with the atomic_add helper (i.e. this patchset) - cmpxchg: implement x86' ADDL with a TCG loop using the cmpxchg helper - master: before this patchset Results sorted in ascending range, i.e. descending degree of contention. Y axis is Throughput in Mops/s. Tests are run on an AMD machine with 64 Opteron 6376 cores. atomic_add-bench: 5000000 ops/thread, [0,1] range 25 ++---------+----------+---------+----------+----------+----------+---++ + atomic +-E--+ + + + + + | |cmpxchg +-H--+ | 20 +Emaster +-N--+ ++ || | |++ | || | 15 +++ ++ |N| | |+| | 10 ++| ++ |+|+ | | | -+E+------ +++ ---+E+------+E+------+E+-----+E+------+E| |+E+E+- +++ +E+------+E+-- | 5 ++|+ ++ |+N+H+--- +++ | ++++N+--+H++----+++ + +++ --++H+------+H+------+H++----+H+---+--- | 0 ++---------+-----H----+---H-----+----------+----------+----------+---H+ 0 10 20 30 40 50 60 Number of threads atomic_add-bench: 5000000 ops/thread, [0,2] range 25 ++---------+----------+---------+----------+----------+----------+---++ ++atomic +-E--+ + + + + + | |cmpxchg +-H--+ | 20 ++master +-N--+ ++ |E| | |++ | ||E | 15 ++| ++ |N|| | |+|| ---+E+------+E+-----+E+------+E| 10 ++| | ---+E+------+E+-----+E+--- +++ +++ ||H+E+--+E+-- | |+++++ | | || | 5 ++|+H+-- +++ ++ |+N+ - ---+H+------+H+------ | + +N+--+H++----+H+---+--+H+----++H+--- + + +H+---+--+H| 0 ++---------+----------+---------+----------+----------+----------+---++ 0 10 20 30 40 50 60 Number of threads atomic_add-bench: 5000000 ops/thread, [0,8] range 40 ++---------+----------+---------+----------+----------+----------+---++ ++atomic +-E--+ + + + + + | 35 +cmpxchg +-H--+ ++ | master +-N--+ ---+E+------+E+------+E+-----+E+------+E| 30 ++| ---+E+-- +++ ++ | | -+E+--- | 25 ++E ---- +++ ++ |+++++ -+E+ | 20 +E+ E-- +++ ++ |H|+++ | |+| +H+------- | 15 ++H+ ---+++ +H+------ ++ |N++H+-- +++--- +H+------++| 10 ++ +++ - +++ ---+H+ +++ +H+ | | +H+-----+H+------+H+-- | 5 ++| +++ ++ ++N+N+--+N++ + + + + + | 0 ++---------+----------+---------+----------+----------+----------+---++ 0 10 20 30 40 50 60 Number of threads atomic_add-bench: 5000000 ops/thread, [0,128] range 160 ++---------+---------+----------+---------+----------+----------+---++ + atomic +-E--+ + + + + + | 140 +cmpxchg +-H--+ +++ +++ ++ | master +-N--+ E--------E------+E+------++| 120 ++ --| | +++ E+ | -- +++ +++ ++| 100 ++ - ++ | +++- +++ ++| 80 ++ -+E+ -+H+------+H+------H--------++ | ---- ---- +++ H| | ---+E+-----+E+- ---+H+ ++| 60 ++ +E+--- +++ ---+H+--- ++ | --+++ ---+H+-- | 40 ++ +E+-+H+--- ++ | +H+ | 20 +EE+ ++ +N+ + + + + + + | 0 ++N-N---N--+---------+----------+---------+----------+----------+---++ 0 10 20 30 40 50 60 Number of threads atomic_add-bench: 5000000 ops/thread, [0,1024] range 350 ++---------+---------+----------+---------+----------+----------+---++ + atomic +-E--+ + + + + + | 300 +cmpxchg +-H--+ +++ | master +-N--+ +++ || | +++ | ----E| 250 ++ | ----E---- ++ | ----E--- | ---+H| 200 ++ -+E+--- +++ ---+H+--- ++ | ---- -+H+-- | | +E+ +++ ---- +++ | 150 ++ ---+++ ---+H+- ++ | --- -+H+-- | 100 ++ ---+E+ ---- +++ ++ | +++ ---+E+-----+H+- | | -+E+------+H+-- | 50 ++ +E+ ++ +EE+ + + + + + + | 0 ++N-N---N--+---------+----------+---------+----------+----------+---++ 0 10 20 30 40 50 60 Number of threads hi-res: http://imgur.com/a/fMRmq For master I stopped measuring master after 8 threads, because there is little point in measuring the well-known performance collapse of a contended lock. Backports commit 37b995f6e7a1cb6fa378c5cd4217b9dd9e1fc98b from qemu
2026-05-06 16:13:18 +00:00 · 2018-02-27 23:41:58 -05:00 · 2018-02-27 23:41:58 -05:00 · 3dc16ebca3
parent 9d9b7dedac
commit 3dc16ebca3
3 changed files with 0 additions and 25 deletions
--- a/qemu/target-i386/helper.h
+++ b/qemu/target-i386/helper.h
@ -3,8 +3,6 @@ DEF_HELPER_4(uc_tracecode, void, i32, i32, ptr, i64)
 DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
 DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)

-DEF_HELPER_1(lock, void, env)
-DEF_HELPER_1(unlock, void, env)
 DEF_HELPER_3(write_eflags, void, env, tl, i32)
 DEF_HELPER_1(read_eflags, tl, env)
 DEF_HELPER_2(divb_AL, void, env, tl)
--- a/qemu/target-i386/mem_helper.c
+++ b/qemu/target-i386/mem_helper.c
@ -27,16 +27,6 @@

 #include "uc_priv.h"

-/* broken thread support */
-
-void helper_lock(CPUX86State *env)
-{
-}
-
-void helper_unlock(CPUX86State *env)
-{
-}
-
 void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
 {
    uintptr_t ra = GETPC();
--- a/qemu/target-i386/translate.c
+++ b/qemu/target-i386/translate.c
@ -5184,10 +5184,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
    s->aflag = aflag;
    s->dflag = dflag;

-    /* lock generation */
-    if (prefixes & PREFIX_LOCK)
-        gen_helper_lock(tcg_ctx, cpu_env);
-
    /* now check op code */
 reswitch:
    switch(b) {
@ -8947,9 +8943,6 @@ case 0x101:
    default:
        goto unknown_op;
    }
-    /* lock generation */
-    if (s->prefix & PREFIX_LOCK)
-        gen_helper_unlock(tcg_ctx, cpu_env);

    // Unicorn: patch the callback for the instruction size
    if (HOOK_EXISTS_BOUNDED(env->uc, UC_HOOK_CODE, pc_start)) {
@ -8975,15 +8968,9 @@ case 0x101:

    return s->pc;
 illegal_op:
-    if (s->prefix & PREFIX_LOCK)
-        gen_helper_unlock(tcg_ctx, cpu_env);
-    /* XXX: ensure that no lock was generated */
    gen_illegal_opcode(s);
    return s->pc;
 unknown_op:
-    if (s->prefix & PREFIX_LOCK)
-        gen_helper_unlock(tcg_ctx, cpu_env);
-    /* XXX: ensure that no lock was generated */
    gen_unknown_opcode(env, s);
    return s->pc;
 }