cpu,configs: Add DMA thread to Ruby GPU tester

Add a DMA thread tester to the Ruby GPU tester to test the DMA state
machine in the protocol. Currently creates a dummy DMA device to pass
through Ruby.py and scans for the DMA sequencers due to opaqueness of
Ruby.py.

DMA atomics not yet supported as there is no protocol that implements
atomic transitions in the DMA state machine file.

Example run command:
build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py \
    --test-length=1000

Change-Id: I63d83e00fd0dcbb1e34c6704d1c2d49ed4e77722
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39936
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Matthew Poremba
2021-01-27 17:13:31 -08:00
parent 2b0ab1f48e
commit 391322ff9b
11 changed files with 617 additions and 17 deletions

View File

@@ -39,6 +39,7 @@
#include <random>
#include "cpu/testers/gpu_ruby_test/cpu_thread.hh"
#include "cpu/testers/gpu_ruby_test/dma_thread.hh"
#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"
#include "cpu/testers/gpu_ruby_test/tester_thread.hh"
#include "debug/ProtocolTest.hh"
@@ -50,6 +51,7 @@ ProtocolTester::ProtocolTester(const Params &p)
: ClockedObject(p),
_requestorId(p.system->getRequestorId(this)),
numCpuPorts(p.port_cpu_ports_connection_count),
numDmaPorts(p.port_dma_ports_connection_count),
numVectorPorts(p.port_cu_vector_ports_connection_count),
numSqcPorts(p.port_cu_sqc_ports_connection_count),
numScalarPorts(p.port_cu_scalar_ports_connection_count),
@@ -65,11 +67,13 @@ ProtocolTester::ProtocolTester(const Params &p)
maxNumEpisodes(p.max_num_episodes),
debugTester(p.debug_tester),
cpuThreads(p.cpu_threads),
dmaThreads(p.dma_threads),
wfs(p.wavefronts)
{
int idx = 0; // global port index
numCpus = numCpuPorts; // 1 cpu port per CPU
numDmas = numDmaPorts; // 1 dma port per DMA
numCus = numVectorPorts; // 1 vector port per CU
// create all physical cpu's data ports
@@ -81,6 +85,15 @@ ProtocolTester::ProtocolTester(const Params &p)
idx++;
}
// create all physical DMA data ports
for (int i = 0; i < numDmaPorts; ++i) {
DPRINTF(ProtocolTest, "Creating %s\n",
csprintf("%s-dmaPort%d", name(), i));
dmaPorts.push_back(new SeqPort(csprintf("%s-dmaPort%d", name(), i),
this, i, idx));
idx++;
}
// create all physical gpu's data ports
for (int i = 0; i < numVectorPorts; ++i) {
DPRINTF(ProtocolTest, "Creating %s\n",
@@ -144,6 +157,7 @@ ProtocolTester::ProtocolTester(const Params &p)
std::stringstream ss;
ss << "GPU Ruby test's configurations" << std::endl
<< "\tNumber of CPUs: " << numCpus << std::endl
<< "\tNumber of DMAs: " << numDmas << std::endl
<< "\tNumber of CUs: " << numCus << std::endl
<< "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl
<< "\tWavefront size: " << numWisPerWf << std::endl
@@ -164,6 +178,8 @@ ProtocolTester::~ProtocolTester()
{
for (int i = 0; i < cpuPorts.size(); ++i)
delete cpuPorts[i];
for (int i = 0; i < dmaPorts.size(); ++i)
delete dmaPorts[i];
for (int i = 0; i < cuVectorPorts.size(); ++i)
delete cuVectorPorts[i];
for (int i = 0; i < cuScalarPorts.size(); ++i)
@@ -189,6 +205,14 @@ ProtocolTester::init()
cpuThreads[cpu_id]->scheduleDeadlockCheckEvent();
}
// connect dma threads to dma's ports
for (int dma_id = 0; dma_id < numDmas; ++dma_id) {
dmaThreads[dma_id]->attachTesterThreadToPorts(this,
static_cast<SeqPort*>(dmaPorts[dma_id]));
dmaThreads[dma_id]->scheduleWakeup();
dmaThreads[dma_id]->scheduleDeadlockCheckEvent();
}
// connect gpu wavefronts to gpu's ports
int wfId = 0;
int vectorPortId = 0;
@@ -216,9 +240,9 @@ ProtocolTester::init()
Port&
ProtocolTester::getPort(const std::string &if_name, PortID idx)
{
if (if_name != "cpu_ports" && if_name != "cu_vector_ports" &&
if_name != "cu_sqc_ports" && if_name != "cu_scalar_ports" &&
if_name != "cu_token_ports") {
if (if_name != "cpu_ports" && if_name != "dma_ports" &&
if_name != "cu_vector_ports" && if_name != "cu_sqc_ports" &&
if_name != "cu_scalar_ports" && if_name != "cu_token_ports") {
// pass along to super class
return ClockedObject::getPort(if_name, idx);
} else {
@@ -226,6 +250,10 @@ ProtocolTester::getPort(const std::string &if_name, PortID idx)
if (idx > numCpuPorts)
panic("ProtocolTester: unknown cpu port %d\n", idx);
return *cpuPorts[idx];
} else if (if_name == "dma_ports") {
if (idx > numDmaPorts)
panic("ProtocolTester: unknown dma port %d\n", idx);
return *dmaPorts[idx];
} else if (if_name == "cu_vector_ports") {
if (idx > numVectorPorts)
panic("ProtocolTester: unknown cu vect port %d\n", idx);
@@ -279,6 +307,11 @@ ProtocolTester::checkDRF(Location atomic_loc,
if (!th->checkDRF(atomic_loc, loc, isStore))
return false;
}
for (const TesterThread* th : dmaThreads) {
if (!th->checkDRF(atomic_loc, loc, isStore))
return false;
}
}
return true;
@@ -293,6 +326,10 @@ ProtocolTester::dumpErrorLog(std::stringstream& ss)
t->printAllOutstandingReqs(ss);
}
for (auto t : dmaThreads) {
t->printAllOutstandingReqs(ss);
}
for (auto t : wfs) {
t->printAllOutstandingReqs(ss);
}