configs,tests: Add tokens to GPU VIPER tester

This patch integrates tokens into the VIPER tester by adding a
GMTokenPort to the tester, having the tester acquire tokens for
requests that use tokens, and checking for available tokens
before issuing any requests.

Change-Id: Id317d703e4765dd5fa7de0d16f5eb595aab7096c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/35135
Maintainer: Matthew Poremba <matthew.poremba@amd.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Kyle Roarty
2020-09-24 21:50:58 -05:00
committed by Matthew Poremba
parent 80221d7e1d
commit dfc64880fe
5 changed files with 54 additions and 3 deletions

View File

@@ -41,6 +41,7 @@ class ProtocolTester(ClockedObject):
cu_vector_ports = VectorRequestPort("Vector ports for GPUs")
cu_sqc_ports = VectorRequestPort("SQC ports for GPUs")
cu_scalar_ports = VectorRequestPort("Scalar ports for GPUs")
cu_token_ports = VectorRequestPort("Token ports for GPU")
cus_per_sqc = Param.Int(4, "Number of CUs per SQC")
cus_per_scalar = Param.Int(4, "Number of CUs per scalar cache")
@@ -48,6 +49,11 @@ class ProtocolTester(ClockedObject):
wavefronts_per_cu = Param.Int(1, "Number of wavefronts per CU")
workitems_per_wavefront = Param.Int(64, "Number of workitems per wf")
max_cu_tokens = Param.Int(4, "Maximum number of tokens, i.e., the number"
" of instructions that can be uncoalesced"
" before back-pressure occurs from the"
" coalescer.")
cpu_threads = VectorParam.CpuThread("All cpus")
wavefronts = VectorParam.GpuWavefront("All wavefronts")

View File

@@ -125,11 +125,13 @@ GpuThread::scheduleDeadlockCheckEvent()
void
GpuThread::attachGpuThreadToPorts(ProtocolTester *_tester,
ProtocolTester::SeqPort *_port,
ProtocolTester::GMTokenPort *_tokenPort,
ProtocolTester::SeqPort *_scalarPort,
ProtocolTester::SeqPort *_sqcPort)
{
tester = _tester;
port = _port;
tokenPort = _tokenPort;
scalarPort = _scalarPort;
sqcPort = _sqcPort;
@@ -163,7 +165,8 @@ GpuThread::isNextActionReady()
// to complete
if (pendingLdStCount == 0 &&
pendingFenceCount == 0 &&
pendingAtomicCount == 0) {
pendingAtomicCount == 0 &&
tokenPort->haveTokens(numLanes)) {
return true;
}
@@ -198,7 +201,8 @@ GpuThread::isNextActionReady()
assert(pendingAtomicCount == 0);
// can't issue if there is a pending fence
if (pendingFenceCount > 0) {
if (pendingFenceCount > 0 ||
!tokenPort->haveTokens(numLanes)) {
return false;
}
@@ -241,6 +245,7 @@ GpuThread::issueNextAction()
{
switch(curAction->getType()) {
case Episode::Action::Type::ATOMIC:
tokenPort->acquireTokens(numLanes);
issueAtomicOps();
break;
case Episode::Action::Type::ACQUIRE:
@@ -250,9 +255,11 @@ GpuThread::issueNextAction()
issueReleaseOp();
break;
case Episode::Action::Type::LOAD:
tokenPort->acquireTokens(numLanes);
issueLoadOps();
break;
case Episode::Action::Type::STORE:
tokenPort->acquireTokens(numLanes);
issueStoreOps();
break;
default:

View File

@@ -42,6 +42,7 @@
#include "cpu/testers/gpu_ruby_test/episode.hh"
#include "cpu/testers/gpu_ruby_test/protocol_tester.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
#include "mem/token_port.hh"
#include "sim/clocked_object.hh"
class GpuThread : public ClockedObject
@@ -61,6 +62,7 @@ class GpuThread : public ClockedObject
void attachGpuThreadToPorts(ProtocolTester *_tester,
ProtocolTester::SeqPort *_port,
ProtocolTester::GMTokenPort *_tokenPort = nullptr,
ProtocolTester::SeqPort *_sqcPort = nullptr,
ProtocolTester::SeqPort *_scalarPort = nullptr);
@@ -136,6 +138,7 @@ class GpuThread : public ClockedObject
AddressManager *addrManager;
ProtocolTester::SeqPort *port; // main data port (GPU-vector data)
ProtocolTester::GMTokenPort *tokenPort;
ProtocolTester::SeqPort *scalarPort; // nullptr for CPU
ProtocolTester::SeqPort *sqcPort; // nullptr for CPU

View File

@@ -53,10 +53,12 @@ ProtocolTester::ProtocolTester(const Params &p)
numVectorPorts(p.port_cu_vector_ports_connection_count),
numSqcPorts(p.port_cu_sqc_ports_connection_count),
numScalarPorts(p.port_cu_scalar_ports_connection_count),
numTokenPorts(p.port_cu_token_ports_connection_count),
numCusPerSqc(p.cus_per_sqc),
numCusPerScalar(p.cus_per_scalar),
numWfsPerCu(p.wavefronts_per_cu),
numWisPerWf(p.workitems_per_wavefront),
numCuTokens(p.max_cu_tokens),
numAtomicLocs(p.num_atomic_locations),
numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
episodeLength(p.episode_length),
@@ -107,6 +109,14 @@ ProtocolTester::ProtocolTester(const Params &p)
idx++;
}
for (int i = 0; i < numTokenPorts; ++i) {
cuTokenPorts.push_back(new GMTokenPort(csprintf("%s-cuTokenPort%d",
name(), i),
this, i));
cuTokenManagers.push_back(new TokenManager(numCuTokens));
cuTokenPorts[i]->setTokenManager(cuTokenManagers[i]);
}
// create an address manager
addrManager = new AddressManager(numAtomicLocs,
numNormalLocsPerAtomic);
@@ -194,6 +204,7 @@ ProtocolTester::init()
wfId = cu_id * numWfsPerCu + i;
wfs[wfId]->attachGpuThreadToPorts(this,
static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
cuTokenPorts[vectorPortId],
static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
wfs[wfId]->scheduleWakeup();
@@ -206,7 +217,8 @@ Port&
ProtocolTester::getPort(const std::string &if_name, PortID idx)
{
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports") {
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports" &&
if_name != "cu_token_ports") {
// pass along to super class
return ClockedObject::getPort(if_name, idx);
} else {
@@ -222,6 +234,10 @@ ProtocolTester::getPort(const std::string &if_name, PortID idx)
if (idx > numSqcPorts)
panic("ProtocolTester: unknown cu sqc port %d\n", idx);
return *cuSqcPorts[idx];
} else if (if_name == "cu_token_ports") {
if (idx > numTokenPorts)
panic("ProtocolTester: unknown cu token port %d\n", idx);
return *cuTokenPorts[idx];
} else {
assert(if_name == "cu_scalar_ports");
if (idx > numScalarPorts)

View File

@@ -58,6 +58,7 @@
#include "cpu/testers/gpu_ruby_test/address_manager.hh"
#include "mem/packet.hh"
#include "mem/ruby/system/RubyPort.hh"
#include "mem/token_port.hh"
#include "params/ProtocolTester.hh"
class GpuThread;
@@ -81,6 +82,20 @@ class ProtocolTester : public ClockedObject
{ panic("%s does not expect a retry\n", name()); }
};
class GMTokenPort : public TokenRequestPort
{
public:
GMTokenPort(const std::string& name, ProtocolTester *_tester,
PortID id = InvalidPortID)
: TokenRequestPort(name, _tester, id)
{}
~GMTokenPort() {}
protected:
bool recvTimingResp(PacketPtr) { return false; }
void recvReqRetry() {}
};
struct SenderState : public Packet::SenderState
{
GpuThread* th;
@@ -131,10 +146,12 @@ class ProtocolTester : public ClockedObject
int numVectorPorts;
int numSqcPorts;
int numScalarPorts;
int numTokenPorts;
int numCusPerSqc;
int numCusPerScalar;
int numWfsPerCu;
int numWisPerWf;
int numCuTokens;
// parameters controlling the address range that the tester can access
int numAtomicLocs;
int numNormalLocsPerAtomic;
@@ -150,6 +167,8 @@ class ProtocolTester : public ClockedObject
std::vector<RequestPort*> cuVectorPorts; // ports to GPU vector cache
std::vector<RequestPort*> cuSqcPorts; // ports to GPU inst cache
std::vector<RequestPort*> cuScalarPorts; // ports to GPU scalar cache
std::vector<TokenManager*> cuTokenManagers;
std::vector<GMTokenPort*> cuTokenPorts;
// all CPU and GPU threads
std::vector<CpuThread*> cpuThreads;
std::vector<GpuWavefront*> wfs;