cpu,configs: Add DMA thread to Ruby GPU tester
Add a DMA thread tester to the Ruby GPU tester to test the DMA state
machine in the protocol. Currently creates a dummy DMA device to pass
through Ruby.py and scans for the DMA sequencers due to opaqueness of
Ruby.py.
DMA atomics not yet supported as there is no protocol that implements
atomic transitions in the DMA state machine file.
Example run command:
build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py \
--test-length=1000
Change-Id: I63d83e00fd0dcbb1e34c6704d1c2d49ed4e77722
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39936
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -39,6 +39,7 @@
|
||||
#include <random>
|
||||
|
||||
#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/dma_thread.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
|
||||
#include "cpu/testers/gpu_ruby_test/tester_thread.hh"
|
||||
#include "debug/ProtocolTest.hh"
|
||||
@@ -50,6 +51,7 @@ ProtocolTester::ProtocolTester(const Params &p)
|
||||
: ClockedObject(p),
|
||||
_requestorId(p.system->getRequestorId(this)),
|
||||
numCpuPorts(p.port_cpu_ports_connection_count),
|
||||
numDmaPorts(p.port_dma_ports_connection_count),
|
||||
numVectorPorts(p.port_cu_vector_ports_connection_count),
|
||||
numSqcPorts(p.port_cu_sqc_ports_connection_count),
|
||||
numScalarPorts(p.port_cu_scalar_ports_connection_count),
|
||||
@@ -65,11 +67,13 @@ ProtocolTester::ProtocolTester(const Params &p)
|
||||
maxNumEpisodes(p.max_num_episodes),
|
||||
debugTester(p.debug_tester),
|
||||
cpuThreads(p.cpu_threads),
|
||||
dmaThreads(p.dma_threads),
|
||||
wfs(p.wavefronts)
|
||||
{
|
||||
int idx = 0; // global port index
|
||||
|
||||
numCpus = numCpuPorts; // 1 cpu port per CPU
|
||||
numDmas = numDmaPorts; // 1 dma port per DMA
|
||||
numCus = numVectorPorts; // 1 vector port per CU
|
||||
|
||||
// create all physical cpu's data ports
|
||||
@@ -81,6 +85,15 @@ ProtocolTester::ProtocolTester(const Params &p)
|
||||
idx++;
|
||||
}
|
||||
|
||||
// create all physical DMA data ports
|
||||
for (int i = 0; i < numDmaPorts; ++i) {
|
||||
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||
csprintf("%s-dmaPort%d", name(), i));
|
||||
dmaPorts.push_back(new SeqPort(csprintf("%s-dmaPort%d", name(), i),
|
||||
this, i, idx));
|
||||
idx++;
|
||||
}
|
||||
|
||||
// create all physical gpu's data ports
|
||||
for (int i = 0; i < numVectorPorts; ++i) {
|
||||
DPRINTF(ProtocolTest, "Creating %s\n",
|
||||
@@ -144,6 +157,7 @@ ProtocolTester::ProtocolTester(const Params &p)
|
||||
std::stringstream ss;
|
||||
ss << "GPU Ruby test's configurations" << std::endl
|
||||
<< "\tNumber of CPUs: " << numCpus << std::endl
|
||||
<< "\tNumber of DMAs: " << numDmas << std::endl
|
||||
<< "\tNumber of CUs: " << numCus << std::endl
|
||||
<< "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl
|
||||
<< "\tWavefront size: " << numWisPerWf << std::endl
|
||||
@@ -164,6 +178,8 @@ ProtocolTester::~ProtocolTester()
|
||||
{
|
||||
for (int i = 0; i < cpuPorts.size(); ++i)
|
||||
delete cpuPorts[i];
|
||||
for (int i = 0; i < dmaPorts.size(); ++i)
|
||||
delete dmaPorts[i];
|
||||
for (int i = 0; i < cuVectorPorts.size(); ++i)
|
||||
delete cuVectorPorts[i];
|
||||
for (int i = 0; i < cuScalarPorts.size(); ++i)
|
||||
@@ -189,6 +205,14 @@ ProtocolTester::init()
|
||||
cpuThreads[cpu_id]->scheduleDeadlockCheckEvent();
|
||||
}
|
||||
|
||||
// connect dma threads to dma's ports
|
||||
for (int dma_id = 0; dma_id < numDmas; ++dma_id) {
|
||||
dmaThreads[dma_id]->attachTesterThreadToPorts(this,
|
||||
static_cast<SeqPort*>(dmaPorts[dma_id]));
|
||||
dmaThreads[dma_id]->scheduleWakeup();
|
||||
dmaThreads[dma_id]->scheduleDeadlockCheckEvent();
|
||||
}
|
||||
|
||||
// connect gpu wavefronts to gpu's ports
|
||||
int wfId = 0;
|
||||
int vectorPortId = 0;
|
||||
@@ -216,9 +240,9 @@ ProtocolTester::init()
|
||||
Port&
|
||||
ProtocolTester::getPort(const std::string &if_name, PortID idx)
|
||||
{
|
||||
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
|
||||
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports" &&
|
||||
if_name != "cu_token_ports") {
|
||||
if (if_name != "cpu_ports" && if_name != "dma_ports" &&
|
||||
if_name != "cu_vector_ports" && if_name != "cu_sqc_ports" &&
|
||||
if_name != "cu_scalar_ports" && if_name != "cu_token_ports") {
|
||||
// pass along to super class
|
||||
return ClockedObject::getPort(if_name, idx);
|
||||
} else {
|
||||
@@ -226,6 +250,10 @@ ProtocolTester::getPort(const std::string &if_name, PortID idx)
|
||||
if (idx > numCpuPorts)
|
||||
panic("ProtocolTester: unknown cpu port %d\n", idx);
|
||||
return *cpuPorts[idx];
|
||||
} else if (if_name == "dma_ports") {
|
||||
if (idx > numDmaPorts)
|
||||
panic("ProtocolTester: unknown dma port %d\n", idx);
|
||||
return *dmaPorts[idx];
|
||||
} else if (if_name == "cu_vector_ports") {
|
||||
if (idx > numVectorPorts)
|
||||
panic("ProtocolTester: unknown cu vect port %d\n", idx);
|
||||
@@ -279,6 +307,11 @@ ProtocolTester::checkDRF(Location atomic_loc,
|
||||
if (!th->checkDRF(atomic_loc, loc, isStore))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const TesterThread* th : dmaThreads) {
|
||||
if (!th->checkDRF(atomic_loc, loc, isStore))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -293,6 +326,10 @@ ProtocolTester::dumpErrorLog(std::stringstream& ss)
|
||||
t->printAllOutstandingReqs(ss);
|
||||
}
|
||||
|
||||
for (auto t : dmaThreads) {
|
||||
t->printAllOutstandingReqs(ss);
|
||||
}
|
||||
|
||||
for (auto t : wfs) {
|
||||
t->printAllOutstandingReqs(ss);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user