Implement pseudo-channel and rank specific BW information

This commit is contained in:
2024-11-18 13:18:33 +01:00
parent e74a617273
commit e409bab47a
21 changed files with 98 additions and 52 deletions

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecDDR5::MemSpecDDR5(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups"),

View File

@@ -49,7 +49,6 @@ MemSpecHBM3::MemSpecHBM3(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
memSpec.memarchitecturespec.entries.at("nbrOfPseudoChannels"),
memSpec.memarchitecturespec.entries.at("nbrOfPseudoChannels"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks") /
@@ -140,6 +139,11 @@ bool MemSpecHBM3::hasRasAndCasBus() const
return true;
}
bool MemSpecHBM3::pseudoChannelMode() const
{
return true;
}
sc_time MemSpecHBM3::getExecutionTime(Command command, const tlm_generic_payload& payload) const
{
if (command == Command::PREPB || command == Command::PREAB)

View File

@@ -97,6 +97,7 @@ public:
[[nodiscard]] unsigned getRAAMMT() const override;
[[nodiscard]] bool hasRasAndCasBus() const override;
[[nodiscard]] bool pseudoChannelMode() const override;
[[nodiscard]] sc_core::sc_time
getExecutionTime(Command command, const tlm::tlm_generic_payload& payload) const override;

View File

@@ -48,7 +48,6 @@ namespace DRAMSys
MemSpecLPDDR5::MemSpecLPDDR5(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups"),

View File

@@ -472,18 +472,8 @@ void TlmRecorder::insertGeneralInfo(const std::string& mcConfigString,
sqlite3_bind_int(insertGeneralInfoStatement, 13, static_cast<int>(mcConfig.requestBufferSize));
sqlite3_bind_int(insertGeneralInfoStatement, 14, static_cast<int>(memSpec.getPer2BankOffset()));
const auto memoryType = memSpec.memoryType;
bool rowColumnCommandBus =
(memoryType == Config::MemoryType::HBM2) || (memoryType == Config::MemoryType::HBM3);
bool pseudoChannelMode = [this, memoryType]() -> bool
{
if (memoryType != Config::MemoryType::HBM2 && memoryType != Config::MemoryType::HBM3)
return false;
return memSpec.pseudoChannelsPerChannel != 1;
}();
bool rowColumnCommandBus = memSpec.hasRasAndCasBus();
bool pseudoChannelMode = memSpec.pseudoChannelMode();
sqlite3_bind_int(insertGeneralInfoStatement, 15, static_cast<int>(rowColumnCommandBus));
sqlite3_bind_int(insertGeneralInfoStatement, 16, static_cast<int>(pseudoChannelMode));

View File

@@ -44,7 +44,6 @@ namespace DRAMSys
MemSpec::MemSpec(const Config::MemSpec& memSpec,
unsigned numberOfChannels,
unsigned pseudoChannelsPerChannel,
unsigned ranksPerChannel,
unsigned banksPerRank,
unsigned groupsPerRank,
@@ -53,7 +52,6 @@ MemSpec::MemSpec(const Config::MemSpec& memSpec,
unsigned bankGroupsPerChannel,
unsigned devicesPerRank) :
numberOfChannels(numberOfChannels),
pseudoChannelsPerChannel(pseudoChannelsPerChannel),
ranksPerChannel(ranksPerChannel),
banksPerRank(banksPerRank),
groupsPerRank(groupsPerRank),
@@ -150,6 +148,11 @@ bool MemSpec::hasRasAndCasBus() const
return false;
}
bool MemSpec::pseudoChannelMode() const
{
return false;
}
bool MemSpec::requiresMaskedWrite(const tlm::tlm_generic_payload& payload) const
{
if (allBytesEnabled(payload))

View File

@@ -64,7 +64,6 @@ public:
virtual ~MemSpec() = default;
const unsigned numberOfChannels;
const unsigned pseudoChannelsPerChannel;
const unsigned ranksPerChannel;
const unsigned banksPerRank;
const unsigned groupsPerRank;
@@ -101,6 +100,7 @@ public:
[[nodiscard]] virtual unsigned getRAADEC() const;
[[nodiscard]] virtual bool hasRasAndCasBus() const;
[[nodiscard]] virtual bool pseudoChannelMode() const;
[[nodiscard]] virtual sc_core::sc_time
getExecutionTime(Command command, const tlm::tlm_generic_payload& payload) const = 0;
@@ -119,7 +119,6 @@ public:
protected:
MemSpec(const Config::MemSpec& memSpec,
unsigned numberOfChannels,
unsigned pseudoChannelsPerChannel,
unsigned ranksPerChannel,
unsigned banksPerRank,
unsigned groupsPerRank,

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecDDR3::MemSpecDDR3(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
1,

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecDDR4::MemSpecDDR4(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups"),

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecGDDR5::MemSpecGDDR5(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups"),

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecGDDR5X::MemSpecGDDR5X(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups"),

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecGDDR6::MemSpecGDDR6(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups"),

View File

@@ -50,7 +50,6 @@ MemSpecHBM2::MemSpecHBM2(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
memSpec.memarchitecturespec.entries.at("nbrOfPseudoChannels"),
memSpec.memarchitecturespec.entries.at("nbrOfPseudoChannels"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks") /
@@ -129,6 +128,11 @@ bool MemSpecHBM2::hasRasAndCasBus() const
return true;
}
bool MemSpecHBM2::pseudoChannelMode() const
{
return ranksPerChannel != 1;
}
sc_time MemSpecHBM2::getExecutionTime(Command command, const tlm_generic_payload& payload) const
{
if (command == Command::PREPB || command == Command::PREAB)

View File

@@ -89,6 +89,7 @@ public:
[[nodiscard]] sc_core::sc_time getRefreshIntervalPB() const override;
[[nodiscard]] bool hasRasAndCasBus() const override;
[[nodiscard]] bool pseudoChannelMode() const override;
[[nodiscard]] sc_core::sc_time
getExecutionTime(Command command, const tlm::tlm_generic_payload& payload) const override;

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecLPDDR4::MemSpecLPDDR4(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
1,

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecSTTMRAM::MemSpecSTTMRAM(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
1,

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecWideIO::MemSpecWideIO(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
1,

View File

@@ -49,7 +49,6 @@ namespace DRAMSys
MemSpecWideIO2::MemSpecWideIO2(const Config::MemSpec& memSpec) :
MemSpec(memSpec,
memSpec.memarchitecturespec.entries.at("nbrOfChannels"),
1,
memSpec.memarchitecturespec.entries.at("nbrOfRanks"),
memSpec.memarchitecturespec.entries.at("nbrOfBanks"),
1,

View File

@@ -64,6 +64,9 @@
#include "DRAMSys/controller/scheduler/SchedulerFrFcfsGrp.h"
#include "DRAMSys/controller/scheduler/SchedulerGrpFrFcfs.h"
#include "DRAMSys/controller/scheduler/SchedulerGrpFrFcfsWm.h"
#include <cstdint>
#include <numeric>
#include <string>
#ifdef DDR5_SIM
#include "DRAMSys/controller/checker/CheckerDDR5.h"
@@ -89,6 +92,7 @@ Controller::Controller(const sc_module_name& name,
config(config),
memSpec(memSpec),
addressDecoder(addressDecoder),
numberOfBeatsServed(memSpec.ranksPerChannel, 0),
minBytesPerBurst(memSpec.defaultBytesPerBurst),
maxBytesPerBurst(memSpec.maxBytesPerBurst)
{
@@ -640,8 +644,14 @@ void Controller::manageResponses()
if (nextTransInRespQueue != nullptr)
{
// Ignore ECC requests
// TODO in future, use a tagging mechanism to distinguish between normal, ECC and maybe
// masked requests
if (nextTransInRespQueue->get_extension<EccExtension>() == nullptr)
numberOfBeatsServed += ControllerExtension::getBurstLength(*nextTransInRespQueue);
{
auto rank = ControllerExtension::getRank(*nextTransInRespQueue);
numberOfBeatsServed[static_cast<std::size_t>(rank)] +=
ControllerExtension::getBurstLength(*nextTransInRespQueue);
}
if (ChildExtension::isChildTrans(*nextTransInRespQueue))
{
@@ -780,8 +790,15 @@ void Controller::end_of_simulation()
{
idleTimeCollector.end();
sc_core::sc_time activeTime = static_cast<double>(numberOfBeatsServed) / memSpec.dataRate *
memSpec.tCK / memSpec.pseudoChannelsPerChannel;
std::uint64_t totalNumberOfBeatsServed =
std::accumulate(numberOfBeatsServed.begin(), numberOfBeatsServed.end(), 0);
sc_core::sc_time activeTime =
static_cast<double>(totalNumberOfBeatsServed) / memSpec.dataRate * memSpec.tCK;
// HBM specific, pseudo channels get averaged
if (memSpec.pseudoChannelMode())
activeTime /= memSpec.ranksPerChannel;
double bandwidth = activeTime / sc_core::sc_time_stamp();
double bandwidthWoIdle =
@@ -795,23 +812,59 @@ void Controller::end_of_simulation()
// BusWidth e.g. 8 or 64
* memSpec.bitWidth
// Number of devices that form a rank, e.g., 8 on a DDR3 DIMM
* memSpec.devicesPerRank
// HBM specific, one or two pseudo channels per channel
* memSpec.pseudoChannelsPerChannel);
* memSpec.devicesPerRank);
std::cout << name() << std::string(" Total Time: ") << sc_core::sc_time_stamp().to_string()
<< std::endl;
std::cout << name() << std::string(" AVG BW: ") << std::fixed << std::setprecision(2)
<< std::setw(6) << (bandwidth * maxBandwidth) << " Gb/s | " << std::setw(6)
<< (bandwidth * maxBandwidth / 8) << " GB/s | " << std::setw(6) << (bandwidth * 100)
<< " %" << std::endl;
std::cout << name() << std::string(" AVG BW\\IDLE: ") << std::fixed << std::setprecision(2)
<< std::setw(6) << (bandwidthWoIdle * maxBandwidth) << " Gb/s | " << std::setw(6)
// HBM specific, one or two pseudo channels per channel
if (memSpec.pseudoChannelMode())
maxBandwidth *= memSpec.ranksPerChannel;
std::cout << std::left << std::setw(24) << name() << std::string(" Total Time: ")
<< sc_core::sc_time_stamp().to_string() << std::endl;
std::cout << std::left << std::setw(24) << name() << std::string(" AVG BW: ")
<< std::fixed << std::setprecision(2) << std::setw(6) << (bandwidth * maxBandwidth)
<< " Gb/s | " << std::setw(6) << (bandwidth * maxBandwidth / 8) << " GB/s | "
<< std::setw(6) << (bandwidth * 100) << " %" << std::endl;
if (memSpec.ranksPerChannel > 1)
{
for (std::size_t i = 0; i < memSpec.ranksPerChannel; i++)
{
std::string baseName = memSpec.pseudoChannelMode() ? "pc" : "ra";
std::string rankName = "." + baseName + std::to_string(i);
sc_core::sc_time rankActiveTime =
numberOfBeatsServed[i] * memSpec.tCK / memSpec.dataRate;
double rankBandwidth = rankActiveTime / sc_core::sc_time_stamp();
double rankMaxBandwidth = (
// fCK in GHz e.g. 1 [GHz] (tCK in ps):
(1000 / memSpec.tCK.to_double())
// DataRate e.g. 2
* memSpec.dataRate
// BusWidth e.g. 8 or 64
* memSpec.bitWidth
// Number of devices that form a rank, e.g., 8 on a DDR3 DIMM
* memSpec.devicesPerRank);
std::string componentName = name() + rankName;
std::cout << std::left << std::setw(24) << componentName
<< std::string(" AVG BW: ") << std::fixed << std::setprecision(2)
<< std::setw(6) << (rankBandwidth * rankMaxBandwidth) << " Gb/s | "
<< std::setw(6) << (rankBandwidth * rankMaxBandwidth / 8) << " GB/s | "
<< std::setw(6) << (rankBandwidth * 100) << " %" << std::endl;
}
}
std::cout << std::left << std::setw(24) << name() << std::string(" AVG BW\\IDLE: ")
<< std::fixed << std::setprecision(2) << std::setw(6)
<< (bandwidthWoIdle * maxBandwidth) << " Gb/s | " << std::setw(6)
<< (bandwidthWoIdle * maxBandwidth / 8) << " GB/s | " << std::setw(6)
<< (bandwidthWoIdle * 100) << " %" << std::endl;
std::cout << name() << std::string(" MAX BW: ") << std::fixed << std::setprecision(2)
<< std::setw(6) << maxBandwidth << " Gb/s | " << std::setw(6) << maxBandwidth / 8
<< " GB/s | " << std::setw(6) << 100.0 << " %" << std::endl;
std::cout << std::left << std::setw(24) << name() << std::string(" MAX BW: ")
<< std::fixed << std::setprecision(2) << std::setw(6) << maxBandwidth << " Gb/s | "
<< std::setw(6) << maxBandwidth / 8 << " GB/s | " << std::setw(6) << 100.0 << " %"
<< std::endl;
}
} // namespace DRAMSys

View File

@@ -101,7 +101,7 @@ protected:
sc_core::sc_time scMaxTime = sc_core::sc_max_time();
uint64_t numberOfBeatsServed = 0;
std::vector<uint64_t> numberOfBeatsServed;
unsigned totalNumberOfPayloads = 0;
std::function<void()> idleCallback;
ControllerVector<Rank, unsigned> ranksNumberOfPayloads;

View File

@@ -113,12 +113,14 @@ void ControllerRecordable::controllerMethod()
Controller::controllerMethod();
uint64_t windowNumberOfBeatsServed = numberOfBeatsServed - lastNumberOfBeatsServed;
lastNumberOfBeatsServed = numberOfBeatsServed;
std::uint64_t totalNumberOfBeatsServed = std::accumulate(numberOfBeatsServed.begin(), numberOfBeatsServed.end(), 0);
// FIX: HBM pseudo-channels
// maybe better solution in the future?
windowNumberOfBeatsServed /= memSpec.pseudoChannelsPerChannel;
uint64_t windowNumberOfBeatsServed = totalNumberOfBeatsServed - lastNumberOfBeatsServed;
lastNumberOfBeatsServed = totalNumberOfBeatsServed;
// HBM specific, pseudo channels get averaged
if (memSpec.pseudoChannelMode())
windowNumberOfBeatsServed /= memSpec.ranksPerChannel;
sc_time windowActiveTime =
activeTimeMultiplier * static_cast<double>(windowNumberOfBeatsServed);