configs,tests: Add tokens to GPU VIPER tester
This patch integrates tokens into the VIPER tester by adding a GMTokenPort to the tester, having the tester acquire tokens for requests that use tokens, and checking for available tokens before issuing any requests. Change-Id: Id317d703e4765dd5fa7de0d16f5eb595aab7096c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/35135 Maintainer: Matthew Poremba <matthew.poremba@amd.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Matthew Poremba
parent
80221d7e1d
commit
dfc64880fe
@@ -41,6 +41,7 @@ class ProtocolTester(ClockedObject):
|
||||
cu_vector_ports = VectorRequestPort("Vector ports for GPUs")
|
||||
cu_sqc_ports = VectorRequestPort("SQC ports for GPUs")
|
||||
cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs")
|
||||
cu_token_ports = VectorRequestPort("Token ports for GPU")
|
||||
|
||||
cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
|
||||
cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
|
||||
@@ -48,6 +49,11 @@ class ProtocolTester(ClockedObject):
|
||||
wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
|
||||
workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")
|
||||
|
||||
max_cu_tokens = Param.Int(4, "Maximum number of tokens, i.e., the number"
|
||||
" of instructions that can be uncoalesced"
|
||||
" before back-pressure occurs from the"
|
||||
" coalescer.")
|
||||
|
||||
cpu_threads = VectorParam.CpuThread("All cpus")
|
||||
wavefronts = VectorParam.GpuWavefront("All wavefronts")
|
||||
|
||||
|
||||
@@ -125,11 +125,13 @@ GpuThread::scheduleDeadlockCheckEvent()
|
||||
void
|
||||
GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester,
|
||||
ProtocolTester::SeqPort *_port,
|
||||
ProtocolTester::GMTokenPort *_tokenPort,
|
||||
ProtocolTester::SeqPort *_scalarPort,
|
||||
ProtocolTester::SeqPort *_sqcPort)
|
||||
{
|
||||
tester = _tester;
|
||||
port = _port;
|
||||
tokenPort = _tokenPort;
|
||||
scalarPort = _scalarPort;
|
||||
sqcPort = _sqcPort;
|
||||
|
||||
@@ -163,7 +165,8 @@ GpuThread::isNextActionReady()
|
||||
// to complete
|
||||
if (pendingLdStCount == 0 &&
|
||||
pendingFenceCount == 0 &&
|
||||
pendingAtomicCount == 0) {
|
||||
pendingAtomicCount == 0 &&
|
||||
tokenPort->haveTokens(numLanes)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -198,7 +201,8 @@ GpuThread::isNextActionReady()
|
||||
assert(pendingAtomicCount == 0);
|
||||
|
||||
// can't issue if there is a pending fence
|
||||
if (pendingFenceCount > 0) {
|
||||
if (pendingFenceCount > 0 ||
|
||||
!tokenPort->haveTokens(numLanes)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -241,6 +245,7 @@ GpuThread::issueNextAction()
|
||||
{
|
||||
switch(curAction->getType()) {
|
||||
case Episode::Action::Type::ATOMIC:
|
||||
tokenPort->acquireTokens(numLanes);
|
||||
issueAtomicOps();
|
||||
break;
|
||||
case Episode::Action::Type::ACQUIRE:
|
||||
@@ -250,9 +255,11 @@ GpuThread::issueNextAction()
|
||||
issueReleaseOp();
|
||||
break;
|
||||
case Episode::Action::Type::LOAD:
|
||||
tokenPort->acquireTokens(numLanes);
|
||||
issueLoadOps();
|
||||
break;
|
||||
case Episode::Action::Type::STORE:
|
||||
tokenPort->acquireTokens(numLanes);
|
||||
issueStoreOps();
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include "cpu/testers/gpu_ruby_test/episode.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
|
||||
#include "gpu-compute/gpu_dyn_inst.hh"
|
||||
#include "mem/token_port.hh"
|
||||
#include "sim/clocked_object.hh"
|
||||
|
||||
class GpuThread : public ClockedObject
|
||||
@@ -61,6 +62,7 @@ class GpuThread : public ClockedObject
|
||||
|
||||
void attachGpuThreadToPorts(ProtocolTester *_tester,
|
||||
ProtocolTester::SeqPort *_port,
|
||||
ProtocolTester::GMTokenPort *_tokenPort = nullptr,
|
||||
ProtocolTester::SeqPort *_sqcPort = nullptr,
|
||||
ProtocolTester::SeqPort *_scalarPort = nullptr);
|
||||
|
||||
@@ -136,6 +138,7 @@ class GpuThread : public ClockedObject
|
||||
AddressManager *addrManager;
|
||||
|
||||
ProtocolTester::SeqPort *port; // main data port (GPU-vector data)
|
||||
ProtocolTester::GMTokenPort *tokenPort;
|
||||
ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
|
||||
ProtocolTester::SeqPort *sqcPort; // nullptr for CPU
|
||||
|
||||
|
||||
@@ -53,10 +53,12 @@ ProtocolTester::ProtocolTester(const Params &p)
|
||||
numVectorPorts(p.port_cu_vector_ports_connection_count),
|
||||
numSqcPorts(p.port_cu_sqc_ports_connection_count),
|
||||
numScalarPorts(p.port_cu_scalar_ports_connection_count),
|
||||
numTokenPorts(p.port_cu_token_ports_connection_count),
|
||||
numCusPerSqc(p.cus_per_sqc),
|
||||
numCusPerScalar(p.cus_per_scalar),
|
||||
numWfsPerCu(p.wavefronts_per_cu),
|
||||
numWisPerWf(p.workitems_per_wavefront),
|
||||
numCuTokens(p.max_cu_tokens),
|
||||
numAtomicLocs(p.num_atomic_locations),
|
||||
numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
|
||||
episodeLength(p.episode_length),
|
||||
@@ -107,6 +109,14 @@ ProtocolTester::ProtocolTester(const Params &p)
|
||||
idx++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numTokenPorts; ++i) {
|
||||
cuTokenPorts.push_back(new GMTokenPort(csprintf("%s-cuTokenPort%d",
|
||||
name(), i),
|
||||
this, i));
|
||||
cuTokenManagers.push_back(new TokenManager(numCuTokens));
|
||||
cuTokenPorts[i]->setTokenManager(cuTokenManagers[i]);
|
||||
}
|
||||
|
||||
// create an address manager
|
||||
addrManager = new AddressManager(numAtomicLocs,
|
||||
numNormalLocsPerAtomic);
|
||||
@@ -194,6 +204,7 @@ ProtocolTester::init()
|
||||
wfId = cu_id * numWfsPerCu + i;
|
||||
wfs[wfId]->attachGpuThreadToPorts(this,
|
||||
static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
|
||||
cuTokenPorts[vectorPortId],
|
||||
static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
|
||||
static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
|
||||
wfs[wfId]->scheduleWakeup();
|
||||
@@ -206,7 +217,8 @@ Port&
|
||||
ProtocolTester::getPort(const std::string &if_name, PortID idx)
|
||||
{
|
||||
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
|
||||
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
|
||||
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports" &&
|
||||
if_name != "cu_token_ports") {
|
||||
// pass along to super class
|
||||
return ClockedObject::getPort(if_name, idx);
|
||||
} else {
|
||||
@@ -222,6 +234,10 @@ ProtocolTester::getPort(const std::string &if_name, PortID idx)
|
||||
if (idx > numSqcPorts)
|
||||
panic("ProtocolTester: unknown cu sqc port %d\n", idx);
|
||||
return *cuSqcPorts[idx];
|
||||
} else if (if_name == "cu_token_ports") {
|
||||
if (idx > numTokenPorts)
|
||||
panic("ProtocolTester: unknown cu token port %d\n", idx);
|
||||
return *cuTokenPorts[idx];
|
||||
} else {
|
||||
assert(if_name == "cu_scalar_ports");
|
||||
if (idx > numScalarPorts)
|
||||
|
||||
@@ -58,6 +58,7 @@
|
||||
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/ruby/system/RubyPort.hh"
|
||||
#include "mem/token_port.hh"
|
||||
#include "params/ProtocolTester.hh"
|
||||
|
||||
class GpuThread;
|
||||
@@ -81,6 +82,20 @@ class ProtocolTester : public ClockedObject
|
||||
{ panic("%s does not expect a retry\n", name()); }
|
||||
};
|
||||
|
||||
class GMTokenPort : public TokenRequestPort
|
||||
{
|
||||
public:
|
||||
GMTokenPort(const std::string& name, ProtocolTester *_tester,
|
||||
PortID id = InvalidPortID)
|
||||
: TokenRequestPort(name, _tester, id)
|
||||
{}
|
||||
~GMTokenPort() {}
|
||||
|
||||
protected:
|
||||
bool recvTimingResp(PacketPtr) { return false; }
|
||||
void recvReqRetry() {}
|
||||
};
|
||||
|
||||
struct SenderState : public Packet::SenderState
|
||||
{
|
||||
GpuThread* th;
|
||||
@@ -131,10 +146,12 @@ class ProtocolTester : public ClockedObject
|
||||
int numVectorPorts;
|
||||
int numSqcPorts;
|
||||
int numScalarPorts;
|
||||
int numTokenPorts;
|
||||
int numCusPerSqc;
|
||||
int numCusPerScalar;
|
||||
int numWfsPerCu;
|
||||
int numWisPerWf;
|
||||
int numCuTokens;
|
||||
// parameters controlling the address range that the tester can access
|
||||
int numAtomicLocs;
|
||||
int numNormalLocsPerAtomic;
|
||||
@@ -150,6 +167,8 @@ class ProtocolTester : public ClockedObject
|
||||
std::vector<RequestPort*> cuVectorPorts; // ports to GPU vector cache
|
||||
std::vector<RequestPort*> cuSqcPorts; // ports to GPU inst cache
|
||||
std::vector<RequestPort*> cuScalarPorts; // ports to GPU scalar cache
|
||||
std::vector<TokenManager*> cuTokenManagers;
|
||||
std::vector<GMTokenPort*> cuTokenPorts;
|
||||
// all CPU and GPU threads
|
||||
std::vector<CpuThread*> cpuThreads;
|
||||
std::vector<GpuWavefront*> wfs;
|
||||
|
||||
Reference in New Issue
Block a user