Every vCPU now uses a separate set of TBs for each set of dynamic
tracing event state values. Each set of TBs can be used by any number of
vCPUs to maximize TB reuse when vCPUs have the same tracing state.
This feature is later used by tracetool to optimize tracing of guest
code events.
The maximum number of TB sets is defined as 2^E, where E is the number
of events that have the 'vcpu' property (their state is stored in
CPUState->trace_dstate).
For this to work, a change on the dynamic tracing state of a vCPU will
force it to flush its virtual TB cache (which is only indexed by
address), and fall back to the physical TB cache (which now contains the
vCPU's dynamic tracing state as part of the hashing function).
Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-id:
149915775266.6295.
10060144081246467690.stgit@frigg.lan
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
CPUArchState *env;
tb_page_addr_t phys_page1;
uint32_t flags;
+ uint32_t trace_vcpu_dstate;
};
static bool tb_cmp(const void *p, const void *d)
tb->page_addr[0] == desc->phys_page1 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
+ tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
!atomic_read(&tb->invalid)) {
/* check next page if needed */
if (tb->page_addr[1] == -1) {
desc.env = (CPUArchState *)cpu->env_ptr;
desc.cs_base = cs_base;
desc.flags = flags;
+ desc.trace_vcpu_dstate = *cpu->trace_dstate;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
- h = tb_hash_func(phys_pc, pc, flags);
+ h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
}
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
- tb->flags != flags)) {
+ tb->flags != flags ||
+ tb->trace_vcpu_dstate != *cpu->trace_dstate)) {
tb = tb_htable_lookup(cpu, pc, cs_base, flags);
if (!tb) {
#include "exec/tb-hash.h"
#include "translate-all.h"
#include "qemu/bitmap.h"
+#include "qemu/error-report.h"
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "exec/log.h"
#define V_L2_BITS 10
#define V_L2_SIZE (1 << V_L2_BITS)
+/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
+QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
+ sizeof(((TranslationBlock *)0)->trace_vcpu_dstate)
+ * BITS_PER_BYTE);
+
/*
* L1 Mapping properties
*/
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
- h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+ h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);
/* remove the TB from the page list */
}
/* add in the hash table */
- h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+ h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
#ifdef DEBUG_TB_CHECK
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
+ tb->trace_vcpu_dstate = *cpu->trace_dstate;
tb->invalid = false;
#ifdef CONFIG_PROFILER
#define CF_USE_ICOUNT 0x20000
#define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */
+ /* Per-vCPU dynamic tracing state used to generate this TB */
+ uint32_t trace_vcpu_dstate;
+
uint16_t invalid;
void *tc_ptr; /* pointer to the translated code */
* contiguous in memory.
*/
static inline
-uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e)
+uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
{
uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
v4 *= PRIME32_1;
h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
- h32 += 20;
+ h32 += 24;
h32 += e * PRIME32_3;
h32 = rol32(h32, 17) * PRIME32_4;
+ h32 += f * PRIME32_3;
+ h32 = rol32(h32, 17) * PRIME32_4;
+
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
#endif /* CONFIG_SOFTMMU */
static inline
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags)
+uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
+ uint32_t trace_vcpu_dstate)
{
- return tb_hash_func5(phys_pc, pc, flags);
+ return tb_hash_func6(phys_pc, pc, flags, trace_vcpu_dstate);
}
#endif
if (unlikely(!(tb
&& tb->pc == addr
&& tb->cs_base == cs_base
- && tb->flags == flags))) {
+ && tb->flags == flags
+ && tb->trace_vcpu_dstate == *cpu->trace_dstate))) {
tb = tb_htable_lookup(cpu, addr, cs_base, flags);
if (!tb) {
return tcg_ctx.code_gen_epilogue;
static inline uint32_t h(unsigned long v)
{
- return tb_hash_func5(v, 0, 0);
+ return tb_hash_func6(v, 0, 0, 0);
}
/*
{
bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
CPU_TRACE_DSTATE_MAX_EVENTS);
+ cpu_tb_jmp_cache_clear(vcpu);
}
void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
* Set the dynamic tracing state of an event for the given vCPU.
*
* Pre-condition: trace_event_get_vcpu_state_static(ev) == true
+ *
+ * Note: Changes for execution-time events with the 'tcg' property will not be
+ * propagated until the next TB is executed (iff executing in TCG mode).
*/
void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
TraceEvent *ev, bool state);