Implement stack ID for HBM3

This commit is contained in:
2025-01-13 15:36:05 +01:00
parent a82efdbb3a
commit 7a8633d36e
17 changed files with 169 additions and 26 deletions

View File

@@ -1,15 +1,18 @@
{
"addressmapping": {
"PSEUDOCHANNEL_BIT":[
29
5
],
"STACK_BIT":[
6
],
"BANKGROUP_BIT":[
27,
28
7,
8
],
"BANK_BIT": [
25,
26
9,
10
],
"BYTE_BIT": [
0,
@@ -19,18 +22,12 @@
2,
3,
4,
5,
6,
7,
8
],
"ROW_BIT": [
9,
10,
11,
12,
13,
14,
14
],
"ROW_BIT": [
15,
16,
17,
@@ -40,7 +37,13 @@
21,
22,
23,
24
24,
25,
26,
27,
28,
29,
30
]
}
}

View File

@@ -7,8 +7,12 @@
"simulationid": "hbm3-example",
"tracesetup": [
{
"clkMhz": 1000,
"name": "example.stl"
"clkMhz": 2000,
"type": "generator",
"name": "gen0",
"numRequests": 2000,
"rwRatio": 0.5,
"addressDistribution": "random"
}
]
}

View File

@@ -7,6 +7,7 @@
"nbrOfBanks": 16,
"nbrOfColumns": 128,
"nbrOfPseudoChannels": 2,
"nbrOfStacks": 2,
"nbrOfRows": 65536,
"width": 32,
"nbrOfDevices": 1,
@@ -20,6 +21,7 @@
"memtimingspec": {
"CCDL": 4,
"CCDS": 2,
"CCDR": 3,
"CKE": 8,
"DQSCK": 1,
"FAW": 16,

View File

@@ -58,6 +58,7 @@ MemSpecHBM3::MemSpecHBM3(const Config::MemSpec& memSpec) :
memSpec.memarchitecturespec.entries.at("nbrOfBankGroups") *
memSpec.memarchitecturespec.entries.at("nbrOfPseudoChannels"),
memSpec.memarchitecturespec.entries.at("nbrOfDevices")),
stacksPerChannel(memSpec.memarchitecturespec.entries.at("nbrOfStacks")),
RAAIMT(memSpec.memarchitecturespec.entries.at("RAAIMT")),
RAAMMT(memSpec.memarchitecturespec.entries.at("RAAMMT")),
RAADEC(memSpec.memarchitecturespec.entries.at("RAADEC")),
@@ -77,6 +78,7 @@ MemSpecHBM3::MemSpecHBM3(const Config::MemSpec& memSpec) :
tWR(tCK * memSpec.memtimingspec.entries.at("WR")),
tCCDL(tCK * memSpec.memtimingspec.entries.at("CCDL")),
tCCDS(tCK * memSpec.memtimingspec.entries.at("CCDS")),
tCCDR(tCK * memSpec.memtimingspec.entries.at("CCDR")),
tWTRL(tCK * memSpec.memtimingspec.entries.at("WTRL")),
tWTRS(tCK * memSpec.memtimingspec.entries.at("WTRS")),
tRTW(tCK * memSpec.memtimingspec.entries.at("RTW")),

View File

@@ -48,6 +48,8 @@ class MemSpecHBM3 final : public MemSpec
public:
explicit MemSpecHBM3(const Config::MemSpec& memSpec);
const unsigned stacksPerChannel;
const unsigned RAAIMT;
const unsigned RAAMMT;
const unsigned RAADEC;
@@ -70,7 +72,7 @@ public:
const sc_core::sc_time tWR;
const sc_core::sc_time tCCDL;
const sc_core::sc_time tCCDS;
// sc_time tCCDR; // TODO: consecutive reads to different stack IDs
const sc_core::sc_time tCCDR;
const sc_core::sc_time tWTRL;
const sc_core::sc_time tWTRS;
const sc_core::sc_time tRTW;

View File

@@ -52,6 +52,7 @@ CheckerHBM3::CheckerHBM3(const MemSpecHBM3& memSpec) : memSpec(memSpec)
nextCommandByBank.fill({BankVector<sc_time>(memSpec.banksPerChannel, SC_ZERO_TIME)});
nextCommandByBankGroup.fill({BankGroupVector<sc_time>(memSpec.bankGroupsPerChannel, SC_ZERO_TIME)});
nextCommandByRank.fill({RankVector<sc_time>(memSpec.ranksPerChannel, SC_ZERO_TIME)});
nextCommandByStack.fill({StackVector<sc_time>(memSpec.stacksPerChannel, SC_ZERO_TIME)});
last4ActivatesOnRank = RankVector<std::queue<sc_time>>(memSpec.ranksPerChannel);
tRDPDE = ((memSpec.tRL + memSpec.tPL) + (memSpec.tCK * 2));
@@ -70,6 +71,7 @@ sc_time CheckerHBM3::timeToSatisfyConstraints(Command command, const tlm_generic
Bank bank = ControllerExtension::getBank(payload);
BankGroup bankGroup = ControllerExtension::getBankGroup(payload);
Rank rank = ControllerExtension::getRank(payload);
Stack stack = ControllerExtension::getStack(payload);
sc_time earliestTimeToStart = sc_time_stamp();
@@ -78,6 +80,7 @@ sc_time CheckerHBM3::timeToSatisfyConstraints(Command command, const tlm_generic
earliestTimeToStart = std::max(earliestTimeToStart, nextCommandByBank[command][bank]);
earliestTimeToStart = std::max(earliestTimeToStart, nextCommandByBankGroup[command][bankGroup]);
earliestTimeToStart = std::max(earliestTimeToStart, nextCommandByRank[command][rank]);
earliestTimeToStart = std::max(earliestTimeToStart, nextCommandByStack[command][stack]);
if (command.isRasCommand())
{
earliestTimeToStart = std::max(earliestTimeToStart, nextCommandOnRasBus);
@@ -99,6 +102,7 @@ void CheckerHBM3::insert(Command command, const tlm_generic_payload& payload)
const Bank bank = ControllerExtension::getBank(payload);
const BankGroup bankGroup = ControllerExtension::getBankGroup(payload);
const Rank rank = ControllerExtension::getRank(payload);
const Stack stack = ControllerExtension::getStack(payload);
PRINTDEBUGMESSAGE("CheckerHBM3", "Changing state on bank " + std::to_string(static_cast<std::size_t>(bank))
@@ -198,6 +202,36 @@ void CheckerHBM3::insert(Command command, const tlm_generic_payload& payload)
earliestTimeToStart = std::max(earliestTimeToStart, constraint);
}
// Channel (RD,RD) memSpec.tCCDR [] Different(level=<ComponentLevel.Stack: 7>)
{
const sc_time constraint = currentTime + memSpec.tCCDR;
for (unsigned int i = memSpec.stacksPerChannel * static_cast<unsigned>(0); i < memSpec.stacksPerChannel * (1 + static_cast<unsigned>(0)); i++)
{
Stack currentStack{i};
if (currentStack == stack)
continue;
sc_time &earliestTimeToStart = nextCommandByStack[Command::RD][currentStack];
earliestTimeToStart = std::max(earliestTimeToStart, constraint);
}
}
// Channel (RD,RDA) memSpec.tCCDR [] Different(level=<ComponentLevel.Stack: 7>)
{
const sc_time constraint = currentTime + memSpec.tCCDR;
for (unsigned int i = memSpec.stacksPerChannel * static_cast<unsigned>(0); i < memSpec.stacksPerChannel * (1 + static_cast<unsigned>(0)); i++)
{
Stack currentStack{i};
if (currentStack == stack)
continue;
sc_time &earliestTimeToStart = nextCommandByStack[Command::RDA][currentStack];
earliestTimeToStart = std::max(earliestTimeToStart, constraint);
}
}
break;
}
@@ -542,6 +576,36 @@ void CheckerHBM3::insert(Command command, const tlm_generic_payload& payload)
earliestTimeToStart = std::max(earliestTimeToStart, constraint);
}
// Channel (RDA,RD) memSpec.tCCDR [] Different(level=<ComponentLevel.Stack: 7>)
{
const sc_time constraint = currentTime + memSpec.tCCDR;
for (unsigned int i = memSpec.stacksPerChannel * static_cast<unsigned>(0); i < memSpec.stacksPerChannel * (1 + static_cast<unsigned>(0)); i++)
{
Stack currentStack{i};
if (currentStack == stack)
continue;
sc_time &earliestTimeToStart = nextCommandByStack[Command::RD][currentStack];
earliestTimeToStart = std::max(earliestTimeToStart, constraint);
}
}
// Channel (RDA,RDA) memSpec.tCCDR [] Different(level=<ComponentLevel.Stack: 7>)
{
const sc_time constraint = currentTime + memSpec.tCCDR;
for (unsigned int i = memSpec.stacksPerChannel * static_cast<unsigned>(0); i < memSpec.stacksPerChannel * (1 + static_cast<unsigned>(0)); i++)
{
Stack currentStack{i};
if (currentStack == stack)
continue;
sc_time &earliestTimeToStart = nextCommandByStack[Command::RDA][currentStack];
earliestTimeToStart = std::max(earliestTimeToStart, constraint);
}
}
break;
}

View File

@@ -71,11 +71,14 @@ private:
using BankGroupVector = ControllerVector<BankGroup, T>;
template<typename T>
using RankVector = ControllerVector<Rank, T>;
template<typename T>
using StackVector = ControllerVector<Stack, T>;
CommandArray<BankVector<sc_core::sc_time>> nextCommandByBank;
CommandArray<BankGroupVector<sc_core::sc_time>> nextCommandByBankGroup;
CommandArray<RankVector<sc_core::sc_time>> nextCommandByRank;
CommandArray<StackVector<sc_core::sc_time>> nextCommandByStack;
RankVector<std::queue<sc_core::sc_time>> last4ActivatesOnRank;
ControllerVector<Rank, unsigned> bankwiseRefreshCounter;

View File

@@ -56,6 +56,7 @@ struct AddressMapping
std::optional<std::vector<BitEntry>> BANK_BIT;
std::optional<std::vector<BitEntry>> BANKGROUP_BIT;
std::optional<std::vector<BitEntry>> RANK_BIT;
std::optional<std::vector<BitEntry>> STACK_BIT;
std::optional<std::vector<BitEntry>> PSEUDOCHANNEL_BIT;
std::optional<std::vector<BitEntry>> CHANNEL_BIT;
};
@@ -67,6 +68,7 @@ NLOHMANN_JSONIFY_ALL_THINGS(AddressMapping,
BANK_BIT,
BANKGROUP_BIT,
RANK_BIT,
STACK_BIT,
PSEUDOCHANNEL_BIT,
CHANNEL_BIT)

View File

@@ -33,6 +33,7 @@
* Janik Schlemminger
* Robert Gernhardt
* Matthias Jung
* Derek Christ
*/
#include "dramExtensions.h"
@@ -157,6 +158,7 @@ sc_time ArbiterExtension::getTimeOfGeneration(const tlm::tlm_generic_payload& tr
ControllerExtension::ControllerExtension(uint64_t channelPayloadID,
Rank rank,
Stack stack,
BankGroup bankGroup,
Bank bank,
Row row,
@@ -164,6 +166,7 @@ ControllerExtension::ControllerExtension(uint64_t channelPayloadID,
unsigned int burstLength) :
channelPayloadID(channelPayloadID),
rank(rank),
stack(stack),
bankGroup(bankGroup),
bank(bank),
row(row),
@@ -175,6 +178,7 @@ ControllerExtension::ControllerExtension(uint64_t channelPayloadID,
void ControllerExtension::setAutoExtension(tlm::tlm_generic_payload& trans,
uint64_t channelPayloadID,
Rank rank,
Stack stack,
BankGroup bankGroup,
Bank bank,
Row row,
@@ -196,7 +200,7 @@ void ControllerExtension::setAutoExtension(tlm::tlm_generic_payload& trans,
else
{
extension = new ControllerExtension(
channelPayloadID, rank, bankGroup, bank, row, column, burstLength);
channelPayloadID, rank, stack, bankGroup, bank, row, column, burstLength);
trans.set_auto_extension(extension);
}
}
@@ -204,6 +208,7 @@ void ControllerExtension::setAutoExtension(tlm::tlm_generic_payload& trans,
void ControllerExtension::setExtension(tlm::tlm_generic_payload& trans,
uint64_t channelPayloadID,
Rank rank,
Stack stack,
BankGroup bankGroup,
Bank bank,
Row row,
@@ -211,15 +216,15 @@ void ControllerExtension::setExtension(tlm::tlm_generic_payload& trans,
unsigned int burstLength)
{
assert(trans.get_extension<ControllerExtension>() == nullptr);
auto* extension =
new ControllerExtension(channelPayloadID, rank, bankGroup, bank, row, column, burstLength);
auto* extension = new ControllerExtension(
channelPayloadID, rank, stack, bankGroup, bank, row, column, burstLength);
trans.set_extension(extension);
}
tlm_extension_base* ControllerExtension::clone() const
{
return new ControllerExtension(
channelPayloadID, rank, bankGroup, bank, row, column, burstLength);
channelPayloadID, rank, stack, bankGroup, bank, row, column, burstLength);
}
void ControllerExtension::copy_from(const tlm_extension_base& ext)
@@ -244,6 +249,11 @@ Rank ControllerExtension::getRank() const
return rank;
}
Stack ControllerExtension::getStack() const
{
return stack;
}
BankGroup ControllerExtension::getBankGroup() const
{
return bankGroup;
@@ -284,6 +294,11 @@ Rank ControllerExtension::getRank(const tlm::tlm_generic_payload& trans)
return trans.get_extension<ControllerExtension>()->rank;
}
Stack ControllerExtension::getStack(const tlm::tlm_generic_payload& trans)
{
return trans.get_extension<ControllerExtension>()->stack;
}
BankGroup ControllerExtension::getBankGroup(const tlm::tlm_generic_payload& trans)
{
return trans.get_extension<ControllerExtension>()->bankGroup;

View File

@@ -32,12 +32,12 @@
* Authors:
* Robert Gernhardt
* Matthias Jung
* Derek Christ
*/
#ifndef DRAMEXTENSIONS_H
#define DRAMEXTENSIONS_H
#include <iostream>
#include <vector>
#include <systemc>
@@ -49,6 +49,7 @@ namespace DRAMSys
enum class Thread : std::size_t;
enum class Channel : std::size_t;
enum class Rank : std::size_t;
enum class Stack : std::size_t;
enum class LogicalRank : std::size_t;
enum class PhysicalRank : std::size_t;
enum class DimmRank : std::size_t;
@@ -122,6 +123,7 @@ public:
static void setAutoExtension(tlm::tlm_generic_payload& trans,
uint64_t channelPayloadID,
Rank rank,
Stack stack,
BankGroup bankGroup,
Bank bank,
Row row,
@@ -131,6 +133,7 @@ public:
static void setExtension(tlm::tlm_generic_payload& trans,
uint64_t channelPayloadID,
Rank rank,
Stack stack,
BankGroup bankGroup,
Bank bank,
Row row,
@@ -143,6 +146,7 @@ public:
void copy_from(const tlm::tlm_extension_base& ext) override;
[[nodiscard]] uint64_t getChannelPayloadID() const;
[[nodiscard]] Stack getStack() const;
[[nodiscard]] Rank getRank() const;
[[nodiscard]] BankGroup getBankGroup() const;
[[nodiscard]] Bank getBank() const;
@@ -152,6 +156,7 @@ public:
static const ControllerExtension& getExtension(const tlm::tlm_generic_payload& trans);
static uint64_t getChannelPayloadID(const tlm::tlm_generic_payload& trans);
static Stack getStack(const tlm::tlm_generic_payload& trans);
static Rank getRank(const tlm::tlm_generic_payload& trans);
static BankGroup getBankGroup(const tlm::tlm_generic_payload& trans);
static Bank getBank(const tlm::tlm_generic_payload& trans);
@@ -162,6 +167,7 @@ public:
private:
ControllerExtension(uint64_t channelPayloadID,
Rank rank,
Stack stack,
BankGroup bankGroup,
Bank bank,
Row row,
@@ -169,6 +175,7 @@ private:
unsigned burstLength);
uint64_t channelPayloadID;
Rank rank;
Stack stack;
BankGroup bankGroup;
Bank bank;
Row row;

View File

@@ -86,7 +86,7 @@ void setUpDummy(tlm_generic_payload& payload,
payload.set_byte_enable_length(0);
payload.set_streaming_width(0);
ControllerExtension::setExtension(
payload, channelPayloadID, rank, bankGroup, bank, Row(0), Column(0), 0);
payload, channelPayloadID, rank, Stack(0), bankGroup, bank, Row(0), Column(0), 0);
ArbiterExtension::setExtension(payload, Thread(UINT_MAX), Channel(0), 0, SC_ZERO_TIME);
}

View File

@@ -566,6 +566,7 @@ void Controller::manageRequests(const sc_time& delay)
ControllerExtension::setAutoExtension(*transToAcquire.payload,
nextChannelPayloadIDToAppend++,
Rank(decodedAddress.rank),
Stack(decodedAddress.stack),
BankGroup(decodedAddress.bankgroup),
Bank(decodedAddress.bank),
Row(decodedAddress.row),
@@ -774,6 +775,7 @@ void Controller::createChildTranses(tlm::tlm_generic_payload& parentTrans)
ControllerExtension::setAutoExtension(*childTrans,
nextChannelPayloadIDToAppend,
Rank(decodedAddress.rank),
Stack(decodedAddress.stack),
BankGroup(decodedAddress.bankgroup),
Bank(decodedAddress.bank),
Row(decodedAddress.row),

View File

@@ -82,6 +82,11 @@ AddressDecoder::AddressDecoder(const Config::AddressMapping& addressMapping)
addMapping(*rankBits, vRankBits, vXor);
}
if (const auto& stackBits = addressMapping.STACK_BIT)
{
addMapping(*stackBits, vStackBits, vXor);
}
// HBM pseudo channels are internally modelled as ranks
if (const auto& pseudoChannelBits = addressMapping.PSEUDOCHANNEL_BIT)
{
@@ -132,6 +137,7 @@ void AddressDecoder::plausibilityCheck(const MemSpec& memSpec)
{
unsigned channels = std::lround(std::pow(2.0, vChannelBits.size()));
unsigned ranks = std::lround(std::pow(2.0, vRankBits.size()));
unsigned stacks = std::lround(std::pow(2.0, vStackBits.size()));
unsigned bankGroups = std::lround(std::pow(2.0, vBankGroupBits.size()));
unsigned banks = std::lround(std::pow(2.0, vBankBits.size()));
unsigned rows = std::lround(std::pow(2.0, vRowBits.size()));
@@ -139,13 +145,14 @@ void AddressDecoder::plausibilityCheck(const MemSpec& memSpec)
unsigned bytes = std::lround(std::pow(2.0, vByteBits.size()));
maximumAddress =
static_cast<uint64_t>(bytes) * columns * rows * banks * bankGroups * ranks * channels - 1;
static_cast<uint64_t>(bytes) * columns * rows * banks * bankGroups * stacks * ranks * channels - 1;
auto totalAddressBits = static_cast<unsigned>(std::log2(maximumAddress));
for (unsigned bitPosition = 0; bitPosition < totalAddressBits; bitPosition++)
{
if (std::count(vChannelBits.begin(), vChannelBits.end(), bitPosition) +
std::count(vRankBits.begin(), vRankBits.end(), bitPosition) +
std::count(vStackBits.begin(), vStackBits.end(), bitPosition) +
std::count(vBankGroupBits.begin(), vBankGroupBits.end(), bitPosition) +
std::count(vBankBits.begin(), vBankBits.end(), bitPosition) +
std::count(vRowBits.begin(), vRowBits.end(), bitPosition) +
@@ -222,6 +229,9 @@ DecodedAddress AddressDecoder::decodeAddress(uint64_t encAddr) const
for (unsigned it = 0; it < vRankBits.size(); it++)
decAddr.rank |= ((encAddr >> vRankBits[it]) & UINT64_C(1)) << it;
for (unsigned it = 0; it < vStackBits.size(); it++)
decAddr.stack |= ((encAddr >> vStackBits[it]) & UINT64_C(1)) << it;
for (unsigned it = 0; it < vBankGroupBits.size(); it++)
decAddr.bankgroup |= ((encAddr >> vBankGroupBits[it]) & UINT64_C(1)) << it;
@@ -290,6 +300,9 @@ uint64_t AddressDecoder::encodeAddress(DecodedAddress decodedAddress) const
for (unsigned i = 0; i < vRankBits.size(); i++)
address |= ((decodedAddress.rank >> i) & 0x1) << vRankBits[i];
for (unsigned i = 0; i < vStackBits.size(); i++)
address |= ((decodedAddress.stack >> i) & 0x1) << vStackBits[i];
for (unsigned i = 0; i < vBankGroupBits.size(); i++)
address |= ((decodedAddress.bankgroup >> i) & 0x1) << vBankGroupBits[i];
@@ -348,6 +361,22 @@ void AddressDecoder::print() const
<< std::endl;
}
for (int it = static_cast<int>(vStackBits.size() - 1); it >= 0; it--)
{
uint64_t addressBits =
(UINT64_C(1) << vStackBits[static_cast<std::vector<unsigned>::size_type>(it)]);
for (auto xorMapping : vXor)
{
if (xorMapping.at(0) == vStackBits[static_cast<std::vector<unsigned>::size_type>(it)])
{
for (auto it = xorMapping.cbegin() + 1; it != xorMapping.cend(); it++)
addressBits |= (UINT64_C(1) << *it);
}
}
std::cout << " SID " << std::setw(2) << it << ": " << std::bitset<64>(addressBits)
<< std::endl;
}
for (int it = static_cast<int>(vBankGroupBits.size() - 1); it >= 0; it--)
{
uint64_t addressBits =

View File

@@ -52,6 +52,7 @@ struct DecodedAddress
{
DecodedAddress(unsigned channel,
unsigned rank,
unsigned stack,
unsigned bankgroup,
unsigned bank,
unsigned row,
@@ -59,6 +60,7 @@ struct DecodedAddress
unsigned bytes) :
channel(channel),
rank(rank),
stack(stack),
bankgroup(bankgroup),
bank(bank),
row(row),
@@ -71,6 +73,7 @@ struct DecodedAddress
unsigned channel = 0;
unsigned rank = 0;
unsigned stack = 0;
unsigned bankgroup = 0;
unsigned bank = 0;
unsigned row = 0;
@@ -102,6 +105,7 @@ private:
std::vector<std::vector<unsigned>> vXor;
std::vector<unsigned> vChannelBits;
std::vector<unsigned> vRankBits;
std::vector<unsigned> vStackBits;
std::vector<unsigned> vBankGroupBits;
std::vector<unsigned> vBankBits;
std::vector<unsigned> vRowBits;

View File

@@ -75,6 +75,7 @@ protected:
addressMapBitVector({17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}),
addressMapBitVector({33}),
std::nullopt,
std::nullopt,
std::nullopt};
DRAMSys::Config::McConfig mcConfig{PagePolicyType::Open,

View File

@@ -81,13 +81,14 @@ TEST_F(AddressDecoderFixture, Encoding)
{
unsigned int channel = 0;
unsigned int rank = 0;
unsigned int stack = 0;
unsigned int bankgroup = 3;
unsigned int bank = 12;
unsigned int row = 29874;
unsigned int column = 170;
unsigned int byte = 0;
DRAMSys::DecodedAddress decodedAddress(channel, rank, bankgroup, bank, row, column, byte);
DRAMSys::DecodedAddress decodedAddress(channel, rank, stack, bankgroup, bank, row, column, byte);
uint64_t address = addressDecoder.encodeAddress(decodedAddress);
EXPECT_EQ(address, 0x3A59'1474);

View File

@@ -64,6 +64,7 @@
"nbrOfBanks": 16,
"nbrOfColumns": 128,
"nbrOfPseudoChannels": 2,
"nbrOfStacks": 1,
"nbrOfRows": 65536,
"width": 32,
"nbrOfDevices": 1,
@@ -77,6 +78,7 @@
"memtimingspec": {
"CCDL": 4,
"CCDS": 2,
"CCDR": 3,
"CKE": 8,
"DQSCK": 1,
"FAW": 16,