From cf5ec880c97feea2189a3ffcec3858957b2ba283 Mon Sep 17 00:00:00 2001 From: Nicholas Mosier Date: Tue, 23 Apr 2024 16:33:18 +0000 Subject: [PATCH] cpu-kvm: Support overflows when migrating across hybrid cores Add support for event overflows when the host thread migrates across differnt types of cores on a hybrid host architecture. This patch achieves this by simply halving the sample period for each performance counter. Since there are two types of cores, this guarantees that an overflow event will trigger before N events occur, where N is the requested period (e.g., number of instructions to simulate). This may result in many early triggers (up to log2(N)) before the requested period is reached. However, gem5's existing bookkeeping logic already handles this case properly: if fewer events than requested occurred, it will set a new period (N - observed) and resume execution. This loop will exit once N events have actually occurred. Change-Id: Iff85237da1ae1aa25bc2045fbf9091726291fe36 --- src/cpu/kvm/BaseKvmCPU.py | 6 +++++ src/cpu/kvm/base.cc | 6 ++--- src/cpu/kvm/base.hh | 6 +++++ src/cpu/kvm/perfevent.cc | 55 ++++++++++++++++++++++++--------------- src/cpu/kvm/perfevent.hh | 24 ++++++++++++----- 5 files changed, 66 insertions(+), 31 deletions(-) diff --git a/src/cpu/kvm/BaseKvmCPU.py b/src/cpu/kvm/BaseKvmCPU.py index 2c90acf3e8..a6590ac4f4 100644 --- a/src/cpu/kvm/BaseKvmCPU.py +++ b/src/cpu/kvm/BaseKvmCPU.py @@ -72,6 +72,12 @@ class BaseKvmCPU(BaseCPU): usePerfOverflow = Param.Bool( False, "Use perf event overflow counters (EXPERIMENTAL)" ) + allowHybridPerf = Param.Bool( + True, + "Enable hybrid performance counters if hybrid host architecture " + "detected. Required for accurate stats if gem5 may run on E-core on a " + "hybrid host architecture (uncommon).", + ) alwaysSyncTC = Param.Bool( False, "Always sync thread contexts on entry/exit" ) diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc index 8eb625c8bc..1c9764465d 100644 --- a/src/cpu/kvm/base.cc +++ b/src/cpu/kvm/base.cc @@ -72,6 +72,7 @@ BaseKvmCPU::BaseKvmCPU(const BaseKvmCPUParams ¶ms) threadContextDirty(true), kvmStateDirty(false), usePerf(params.usePerf), + allowHybridPerf(params.allowHybridPerf), vcpuID(-1), vcpuFD(-1), vcpuMMapSize(0), _kvmRun(NULL), mmioRing(NULL), pageSize(sysconf(_SC_PAGE_SIZE)), @@ -107,9 +108,8 @@ BaseKvmCPU::BaseKvmCPU(const BaseKvmCPUParams ¶ms) // If we use perf, we create new PerfKVMCounters if (usePerf) { - hwCycles = std::unique_ptr(PerfKvmCounter::create()); - hwInstructions = - std::unique_ptr(PerfKvmCounter::create()); + hwCycles.reset(PerfKvmCounter::create(allowHybridPerf)); + hwInstructions.reset(PerfKvmCounter::create(allowHybridPerf)); } else { inform("Using KVM CPU without perf. The stats related to the number " "of cycles and instructions executed by the KVM CPU will not " diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh index 3cf70a0bef..e592c27688 100644 --- a/src/cpu/kvm/base.hh +++ b/src/cpu/kvm/base.hh @@ -656,6 +656,12 @@ class BaseKvmCPU : public BaseCPU /** True if using perf; False otherwise*/ bool usePerf; + /** + * Whether to permit using hybrid performance counters if hybrid host + * architecture is auto-detected. + */ + bool allowHybridPerf; + /** KVM internal ID of the vCPU */ long vcpuID; diff --git a/src/cpu/kvm/perfevent.cc b/src/cpu/kvm/perfevent.cc index 44d956f40f..187a0cdd76 100644 --- a/src/cpu/kvm/perfevent.cc +++ b/src/cpu/kvm/perfevent.cc @@ -73,7 +73,7 @@ PerfKvmCounter::PerfKvmCounter() } PerfKvmCounter * -PerfKvmCounter::create() +PerfKvmCounter::create(bool allow_hybrid) { // Check if we're running on a hybrid host architecture. Linux exposes // this via sysfs. If the directory /sys/devices/cpu exists, then we are @@ -82,22 +82,25 @@ PerfKvmCounter::create() // existence of /sys/devices/cpu_atom to indicate a hybrid host // architecture. const char *atom_path = "/sys/devices/cpu_atom"; - if (DIR *atom_dir = opendir(atom_path)) { - closedir(atom_dir); + if (allow_hybrid) { + if (DIR *atom_dir = opendir(atom_path)) { + closedir(atom_dir); + + // Since we're running on a hybrid architecture, use a hybrid + // performance counter. This uses two 'physical' performance + // counters to implement a 'logical' one which is the sum of the + // two. + return new HybridPerfKvmCounter(); + } - // Since we're running on a hybrid architecture, use a hybrid - // performance counter. This uses two 'physical' performance counters - // to implement a 'logical' one which is the sum of the two. - return new HybridPerfKvmCounter(); - } else { if (errno != ENOENT) warn("Unexpected error code from opendir(%s): %s\n", atom_path, std::strerror(errno)); - - // We're running on a regular architecture, so use a regular - // performance counter. - return new SimplePerfKvmCounter(); } + + // We're running on a regular architecture, so use a regular + // performance counter. + return new SimplePerfKvmCounter(); } SimplePerfKvmCounter::SimplePerfKvmCounter() @@ -178,7 +181,7 @@ SimplePerfKvmCounter::enableSignals(pid_t tid, int signal) } void -SimplePerfKvmCounter::attach(PerfKvmCounterConfig &config, +SimplePerfKvmCounter::attach(const PerfKvmCounterConfig &config, pid_t tid, const PerfKvmCounter *parent) { assert(!attached()); @@ -288,8 +291,19 @@ SimplePerfKvmCounter::read(void *buf, size_t size) const } while (_size); } +PerfKvmCounterConfig +HybridPerfKvmCounter::fixupConfig(const PerfKvmCounterConfig &in, + ConfigSubtype config_subtype) +{ + PerfKvmCounterConfig out = in; + out.attr.config |= config_subtype; + if (out.attr.sample_period > 1) + out.attr.sample_period /= 2; + return out; +} + void -HybridPerfKvmCounter::attach(PerfKvmCounterConfig &config, pid_t tid, +HybridPerfKvmCounter::attach(const PerfKvmCounterConfig &config, pid_t tid, const PerfKvmCounter *parent) { // We should only be using hybrid performance counters for hardware @@ -305,13 +319,10 @@ HybridPerfKvmCounter::attach(PerfKvmCounterConfig &config, pid_t tid, parent_atom_counter = &hybrid_parent.atomCounter; } - PerfKvmCounterConfig config_core = config; - config_core.attr.config |= ConfigCore; - coreCounter.attach(config_core, tid, parent_core_counter); - - PerfKvmCounterConfig config_atom = config; - config_atom.attr.config |= ConfigAtom; - atomCounter.attach(config_atom, tid, parent_atom_counter); + coreCounter.attach(fixupConfig(config, ConfigCore), tid, + parent_core_counter); + atomCounter.attach(fixupConfig(config, ConfigAtom), tid, + parent_atom_counter); } void @@ -345,6 +356,8 @@ HybridPerfKvmCounter::stop() void HybridPerfKvmCounter::period(uint64_t period) { + if (period > 1) + period /= 2; coreCounter.period(period); atomCounter.period(period); } diff --git a/src/cpu/kvm/perfevent.hh b/src/cpu/kvm/perfevent.hh index 5424629074..ab75bf6874 100644 --- a/src/cpu/kvm/perfevent.hh +++ b/src/cpu/kvm/perfevent.hh @@ -183,7 +183,7 @@ class PerfKvmCounter * PerfKvmCounter, depending on whether the host has a hybrid architecture * (rare case) or not (common case). */ - static PerfKvmCounter *create(); + static PerfKvmCounter *create(bool allow_hybrid); /** * Attach a counter and optionally make it a member of an existing counter @@ -196,7 +196,7 @@ class PerfKvmCounter * @param tid Thread to sample (0 indicates current thread) * @param parent Group leader (nullptr indicates no group leader) */ - virtual void attach(PerfKvmCounterConfig &config, + virtual void attach(const PerfKvmCounterConfig &config, pid_t tid, const PerfKvmCounter *parent = nullptr) = 0; /** Detach a counter from PerfEvent. */ @@ -235,6 +235,12 @@ class PerfKvmCounter * since it has inverted check for the return value when copying * parameters from userspace. * + * @note When using a hybrid perf counter, this actually sets + * the period to 1/2 of the value provided. This ensures that an + * overflow will always trigger before more than \p period events + * occur, even in the pathological case when the host execution is + * evenly split between a P-core and E-core. + * * @param period Overflow period in events */ virtual void period(uint64_t period) = 0; @@ -305,7 +311,7 @@ class SimplePerfKvmCounter final : public PerfKvmCounter public: ~SimplePerfKvmCounter(); - void attach(PerfKvmCounterConfig &config, + void attach(const PerfKvmCounterConfig &config, pid_t tid, const PerfKvmCounter *parent) override; void detach() override; @@ -402,7 +408,7 @@ class HybridPerfKvmCounter : public PerfKvmCounter HybridPerfKvmCounter() = default; public: - void attach(PerfKvmCounterConfig &config, pid_t tid, + void attach(const PerfKvmCounterConfig &config, pid_t tid, const PerfKvmCounter *parent) override; void detach() override; bool attached() const override; @@ -417,7 +423,8 @@ class HybridPerfKvmCounter : public PerfKvmCounter SimplePerfKvmCounter coreCounter; SimplePerfKvmCounter atomCounter; - using Config = decltype(perf_event_attr::config); + using ConfigSubtype = decltype(perf_event_attr::config); + using SamplePeriod = decltype(perf_event_attr::sample_type); /** @{ */ /** @@ -425,10 +432,13 @@ class HybridPerfKvmCounter : public PerfKvmCounter * Linux perf's documentation (tools/perf/Documentation/intel-hybrid.txt * in the linux source tree). */ - static inline constexpr Config ConfigCore = 0x4UL << 32; - static inline constexpr Config ConfigAtom = 0x8UL << 32; + static inline constexpr ConfigSubtype ConfigCore = 0x4UL << 32; + static inline constexpr ConfigSubtype ConfigAtom = 0x8UL << 32; /** @} */ + static PerfKvmCounterConfig fixupConfig(const PerfKvmCounterConfig &in, + ConfigSubtype config_subtype); + friend class PerfKvmCounter; };