cpu-kvm: Support overflows when migrating across hybrid cores

Add support for event overflows when the host thread migrates across
differnt types of cores on a hybrid host architecture. This patch
achieves this by simply halving the sample period for each performance
counter. Since there are two types of cores, this guarantees that an
overflow event will trigger before N events occur, where N is the
requested period (e.g., number of instructions to simulate). This
may result in many early triggers (up to log2(N)) before the requested
period is reached. However, gem5's existing bookkeeping logic already
handles this case properly: if fewer events than requested occurred,
it will set a new period (N - observed) and resume execution. This loop
will exit once N events have actually occurred.

Change-Id: Iff85237da1ae1aa25bc2045fbf9091726291fe36
This commit is contained in:
Nicholas Mosier
2024-04-23 16:33:18 +00:00
committed by Ivana Mitrovic
parent 30ea15009f
commit cf5ec880c9
5 changed files with 66 additions and 31 deletions

View File

@@ -72,6 +72,12 @@ class BaseKvmCPU(BaseCPU):
usePerfOverflow = Param.Bool(
False, "Use perf event overflow counters (EXPERIMENTAL)"
)
allowHybridPerf = Param.Bool(
True,
"Enable hybrid performance counters if hybrid host architecture "
"detected. Required for accurate stats if gem5 may run on E-core on a "
"hybrid host architecture (uncommon).",
)
alwaysSyncTC = Param.Bool(
False, "Always sync thread contexts on entry/exit"
)

View File

@@ -72,6 +72,7 @@ BaseKvmCPU::BaseKvmCPU(const BaseKvmCPUParams &params)
threadContextDirty(true),
kvmStateDirty(false),
usePerf(params.usePerf),
allowHybridPerf(params.allowHybridPerf),
vcpuID(-1), vcpuFD(-1), vcpuMMapSize(0),
_kvmRun(NULL), mmioRing(NULL),
pageSize(sysconf(_SC_PAGE_SIZE)),
@@ -107,9 +108,8 @@ BaseKvmCPU::BaseKvmCPU(const BaseKvmCPUParams &params)
// If we use perf, we create new PerfKVMCounters
if (usePerf) {
hwCycles = std::unique_ptr<PerfKvmCounter>(PerfKvmCounter::create());
hwInstructions =
std::unique_ptr<PerfKvmCounter>(PerfKvmCounter::create());
hwCycles.reset(PerfKvmCounter::create(allowHybridPerf));
hwInstructions.reset(PerfKvmCounter::create(allowHybridPerf));
} else {
inform("Using KVM CPU without perf. The stats related to the number "
"of cycles and instructions executed by the KVM CPU will not "

View File

@@ -656,6 +656,12 @@ class BaseKvmCPU : public BaseCPU
/** True if using perf; False otherwise*/
bool usePerf;
/**
* Whether to permit using hybrid performance counters if hybrid host
* architecture is auto-detected.
*/
bool allowHybridPerf;
/** KVM internal ID of the vCPU */
long vcpuID;

View File

@@ -73,7 +73,7 @@ PerfKvmCounter::PerfKvmCounter()
}
PerfKvmCounter *
PerfKvmCounter::create()
PerfKvmCounter::create(bool allow_hybrid)
{
// Check if we're running on a hybrid host architecture. Linux exposes
// this via sysfs. If the directory /sys/devices/cpu exists, then we are
@@ -82,22 +82,25 @@ PerfKvmCounter::create()
// existence of /sys/devices/cpu_atom to indicate a hybrid host
// architecture.
const char *atom_path = "/sys/devices/cpu_atom";
if (DIR *atom_dir = opendir(atom_path)) {
closedir(atom_dir);
if (allow_hybrid) {
if (DIR *atom_dir = opendir(atom_path)) {
closedir(atom_dir);
// Since we're running on a hybrid architecture, use a hybrid
// performance counter. This uses two 'physical' performance
// counters to implement a 'logical' one which is the sum of the
// two.
return new HybridPerfKvmCounter();
}
// Since we're running on a hybrid architecture, use a hybrid
// performance counter. This uses two 'physical' performance counters
// to implement a 'logical' one which is the sum of the two.
return new HybridPerfKvmCounter();
} else {
if (errno != ENOENT)
warn("Unexpected error code from opendir(%s): %s\n",
atom_path, std::strerror(errno));
// We're running on a regular architecture, so use a regular
// performance counter.
return new SimplePerfKvmCounter();
}
// We're running on a regular architecture, so use a regular
// performance counter.
return new SimplePerfKvmCounter();
}
SimplePerfKvmCounter::SimplePerfKvmCounter()
@@ -178,7 +181,7 @@ SimplePerfKvmCounter::enableSignals(pid_t tid, int signal)
}
void
SimplePerfKvmCounter::attach(PerfKvmCounterConfig &config,
SimplePerfKvmCounter::attach(const PerfKvmCounterConfig &config,
pid_t tid, const PerfKvmCounter *parent)
{
assert(!attached());
@@ -288,8 +291,19 @@ SimplePerfKvmCounter::read(void *buf, size_t size) const
} while (_size);
}
PerfKvmCounterConfig
HybridPerfKvmCounter::fixupConfig(const PerfKvmCounterConfig &in,
ConfigSubtype config_subtype)
{
PerfKvmCounterConfig out = in;
out.attr.config |= config_subtype;
if (out.attr.sample_period > 1)
out.attr.sample_period /= 2;
return out;
}
void
HybridPerfKvmCounter::attach(PerfKvmCounterConfig &config, pid_t tid,
HybridPerfKvmCounter::attach(const PerfKvmCounterConfig &config, pid_t tid,
const PerfKvmCounter *parent)
{
// We should only be using hybrid performance counters for hardware
@@ -305,13 +319,10 @@ HybridPerfKvmCounter::attach(PerfKvmCounterConfig &config, pid_t tid,
parent_atom_counter = &hybrid_parent.atomCounter;
}
PerfKvmCounterConfig config_core = config;
config_core.attr.config |= ConfigCore;
coreCounter.attach(config_core, tid, parent_core_counter);
PerfKvmCounterConfig config_atom = config;
config_atom.attr.config |= ConfigAtom;
atomCounter.attach(config_atom, tid, parent_atom_counter);
coreCounter.attach(fixupConfig(config, ConfigCore), tid,
parent_core_counter);
atomCounter.attach(fixupConfig(config, ConfigAtom), tid,
parent_atom_counter);
}
void
@@ -345,6 +356,8 @@ HybridPerfKvmCounter::stop()
void
HybridPerfKvmCounter::period(uint64_t period)
{
if (period > 1)
period /= 2;
coreCounter.period(period);
atomCounter.period(period);
}

View File

@@ -183,7 +183,7 @@ class PerfKvmCounter
* PerfKvmCounter, depending on whether the host has a hybrid architecture
* (rare case) or not (common case).
*/
static PerfKvmCounter *create();
static PerfKvmCounter *create(bool allow_hybrid);
/**
* Attach a counter and optionally make it a member of an existing counter
@@ -196,7 +196,7 @@ class PerfKvmCounter
* @param tid Thread to sample (0 indicates current thread)
* @param parent Group leader (nullptr indicates no group leader)
*/
virtual void attach(PerfKvmCounterConfig &config,
virtual void attach(const PerfKvmCounterConfig &config,
pid_t tid, const PerfKvmCounter *parent = nullptr) = 0;
/** Detach a counter from PerfEvent. */
@@ -235,6 +235,12 @@ class PerfKvmCounter
* since it has inverted check for the return value when copying
* parameters from userspace.
*
* @note When using a hybrid perf counter, this actually sets
* the period to 1/2 of the value provided. This ensures that an
* overflow will always trigger before more than \p period events
* occur, even in the pathological case when the host execution is
* evenly split between a P-core and E-core.
*
* @param period Overflow period in events
*/
virtual void period(uint64_t period) = 0;
@@ -305,7 +311,7 @@ class SimplePerfKvmCounter final : public PerfKvmCounter
public:
~SimplePerfKvmCounter();
void attach(PerfKvmCounterConfig &config,
void attach(const PerfKvmCounterConfig &config,
pid_t tid, const PerfKvmCounter *parent) override;
void detach() override;
@@ -402,7 +408,7 @@ class HybridPerfKvmCounter : public PerfKvmCounter
HybridPerfKvmCounter() = default;
public:
void attach(PerfKvmCounterConfig &config, pid_t tid,
void attach(const PerfKvmCounterConfig &config, pid_t tid,
const PerfKvmCounter *parent) override;
void detach() override;
bool attached() const override;
@@ -417,7 +423,8 @@ class HybridPerfKvmCounter : public PerfKvmCounter
SimplePerfKvmCounter coreCounter;
SimplePerfKvmCounter atomCounter;
using Config = decltype(perf_event_attr::config);
using ConfigSubtype = decltype(perf_event_attr::config);
using SamplePeriod = decltype(perf_event_attr::sample_type);
/** @{ */
/**
@@ -425,10 +432,13 @@ class HybridPerfKvmCounter : public PerfKvmCounter
* Linux perf's documentation (tools/perf/Documentation/intel-hybrid.txt
* in the linux source tree).
*/
static inline constexpr Config ConfigCore = 0x4UL << 32;
static inline constexpr Config ConfigAtom = 0x8UL << 32;
static inline constexpr ConfigSubtype ConfigCore = 0x4UL << 32;
static inline constexpr ConfigSubtype ConfigAtom = 0x8UL << 32;
/** @} */
static PerfKvmCounterConfig fixupConfig(const PerfKvmCounterConfig &in,
ConfigSubtype config_subtype);
friend class PerfKvmCounter;
};