misc: Remove all references to GCN3
Replace instances of "GCN3" with Vega. Remove gfx801 and gfx803. Rename FIJI to Vega and Carrizo to Raven. Using misc since there is not enough room to fit all the tags. Change-Id: Ibafc939d49a69be9068107a906e878408c7a5891
This commit is contained in:
@@ -38,11 +38,11 @@ assumes tested protocols supports release consistency.
|
||||
To start using the tester quickly, you can use the following example command
|
||||
line to get running immediately:
|
||||
|
||||
build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py \
|
||||
build/VEGA_X86/gem5.opt configs/example/ruby_gpu_random_test.py \
|
||||
--test-length=1000 --system-size=medium --cache-size=small
|
||||
|
||||
An overview of the main command line options is as follows. For all options
|
||||
use `build/GCN3_X86/gem5.opt configs/example/ruby_gpu_random_test.py --help`
|
||||
use `build/VEGA_X86/gem5.opt configs/example/ruby_gpu_random_test.py --help`
|
||||
or see the configuration file.
|
||||
|
||||
* --cache-size (small, large): Use smaller sizes for testing evict, etc.
|
||||
|
||||
@@ -44,7 +44,7 @@ namespace gem5
|
||||
|
||||
/*
|
||||
* Further descriptions can be found in the "Hardware Register Values" table
|
||||
* in any of the GCN3, Vega, CDNA1, CNDA2, or RDNA ISA manuals.
|
||||
* in any of the Vega, CDNA, or RDNA ISA manuals.
|
||||
*/
|
||||
enum amdgpu_hwreg
|
||||
{
|
||||
|
||||
@@ -115,28 +115,13 @@ class HSAQueueDescriptor
|
||||
10ca0a99bbd0252f5bf6f08d1503e59f1129df4a/ROCm_Libraries/
|
||||
rocr/src/core/runtime/amd_aql_queue.cpp#L624
|
||||
*
|
||||
* GFX7 and GFX8 will allocate twice as much space for their HSA
|
||||
* queues as they actually access (using mod operations to map the
|
||||
* virtual addresses from the upper half of the queue to the same
|
||||
* virtual addresses as the lower half). Thus, we need to check if
|
||||
* the ISA is GFX8 and mod the address by half of the queue size if
|
||||
* so.
|
||||
*/
|
||||
uint64_t retAddr = 0ll;
|
||||
if ((gfxVersion == GfxVersion::gfx801) ||
|
||||
(gfxVersion == GfxVersion::gfx803)) {
|
||||
retAddr = basePointer + ((ix % (numElts/2)) * objSize());
|
||||
DPRINTF(HSAPacketProcessor, "ptr() gfx8: base: 0x%x, "
|
||||
"index: 0x%x, numElts: 0x%x, numElts/2: 0x%x, "
|
||||
"objSize: 0x%x, retAddr: 0x%x\n", basePointer, ix,
|
||||
numElts, numElts/2, objSize(), retAddr);
|
||||
} else {
|
||||
retAddr = basePointer + ((ix % numElts) * objSize());
|
||||
DPRINTF(HSAPacketProcessor, "ptr() gfx9: base: 0x%x, "
|
||||
"index: 0x%x, numElts: 0x%x, objSize: 0x%x, "
|
||||
"retAddr: 0x%x\n", basePointer, ix, numElts, objSize(),
|
||||
retAddr);
|
||||
}
|
||||
retAddr = basePointer + ((ix % numElts) * objSize());
|
||||
DPRINTF(HSAPacketProcessor, "ptr() gfx9: base: 0x%x, "
|
||||
"index: 0x%x, numElts: 0x%x, objSize: 0x%x, "
|
||||
"retAddr: 0x%x\n", basePointer, ix, numElts, objSize(),
|
||||
retAddr);
|
||||
return retAddr;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -45,7 +45,7 @@ class PrefetchType(Enum):
|
||||
|
||||
|
||||
class GfxVersion(ScopedEnum):
|
||||
vals = ["gfx801", "gfx803", "gfx900", "gfx902", "gfx908", "gfx90a"]
|
||||
vals = ["gfx900", "gfx902", "gfx908", "gfx90a"]
|
||||
|
||||
|
||||
class PoolManager(SimObject):
|
||||
@@ -320,7 +320,7 @@ class GPUComputeDriver(EmulatedDriver):
|
||||
cxx_header = "gpu-compute/gpu_compute_driver.hh"
|
||||
device = Param.GPUCommandProcessor("GPU controlled by this driver")
|
||||
isdGPU = Param.Bool(False, "Driver is for a dGPU")
|
||||
gfxVersion = Param.GfxVersion("gfx801", "ISA of gpu to model")
|
||||
gfxVersion = Param.GfxVersion("gfx902", "ISA of gpu to model")
|
||||
dGPUPoolID = Param.Int(0, "Pool ID for dGPU.")
|
||||
# Default Mtype for caches
|
||||
# -- 1 1 1 C_RW_S (Cached-ReadWrite-Shared)
|
||||
|
||||
@@ -327,13 +327,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
*/
|
||||
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx801:
|
||||
case GfxVersion::gfx803:
|
||||
args->process_apertures[i].scratch_base =
|
||||
scratchApeBase(i + 1);
|
||||
args->process_apertures[i].lds_base =
|
||||
ldsApeBase(i + 1);
|
||||
break;
|
||||
case GfxVersion::gfx900:
|
||||
case GfxVersion::gfx902:
|
||||
args->process_apertures[i].scratch_base =
|
||||
@@ -345,7 +338,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
fatal("Invalid gfx version\n");
|
||||
}
|
||||
|
||||
// GFX8 and GFX9 set lds and scratch limits the same way
|
||||
args->process_apertures[i].scratch_limit =
|
||||
scratchApeLimit(args->process_apertures[i].scratch_base);
|
||||
|
||||
@@ -353,13 +345,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
ldsApeLimit(args->process_apertures[i].lds_base);
|
||||
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx801:
|
||||
args->process_apertures[i].gpuvm_base =
|
||||
gpuVmApeBase(i + 1);
|
||||
args->process_apertures[i].gpuvm_limit =
|
||||
gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
|
||||
break;
|
||||
case GfxVersion::gfx803:
|
||||
case GfxVersion::gfx900:
|
||||
case GfxVersion::gfx902:
|
||||
// Taken from SVM_USE_BASE in Linux kernel
|
||||
@@ -383,9 +368,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
// id composed out of a non-zero base and an offset.
|
||||
if (isdGPU) {
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx803:
|
||||
args->process_apertures[i].gpu_id = 50156;
|
||||
break;
|
||||
case GfxVersion::gfx900:
|
||||
args->process_apertures[i].gpu_id = 22124;
|
||||
break;
|
||||
@@ -394,7 +376,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
}
|
||||
} else {
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx801:
|
||||
case GfxVersion::gfx902:
|
||||
args->process_apertures[i].gpu_id = 2765;
|
||||
break;
|
||||
@@ -630,11 +611,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
(ioc_args->kfd_process_device_apertures_ptr);
|
||||
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx801:
|
||||
case GfxVersion::gfx803:
|
||||
ape_args->scratch_base = scratchApeBase(i + 1);
|
||||
ape_args->lds_base = ldsApeBase(i + 1);
|
||||
break;
|
||||
case GfxVersion::gfx900:
|
||||
case GfxVersion::gfx902:
|
||||
ape_args->scratch_base = scratchApeBaseV9();
|
||||
@@ -644,18 +620,11 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
fatal("Invalid gfx version\n");
|
||||
}
|
||||
|
||||
// GFX8 and GFX9 set lds and scratch limits the same way
|
||||
ape_args->scratch_limit =
|
||||
scratchApeLimit(ape_args->scratch_base);
|
||||
ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);
|
||||
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx801:
|
||||
ape_args->gpuvm_base = gpuVmApeBase(i + 1);
|
||||
ape_args->gpuvm_limit =
|
||||
gpuVmApeLimit(ape_args->gpuvm_base);
|
||||
break;
|
||||
case GfxVersion::gfx803:
|
||||
case GfxVersion::gfx900:
|
||||
case GfxVersion::gfx902:
|
||||
// Taken from SVM_USE_BASE in Linux kernel
|
||||
@@ -670,9 +639,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
// NOTE: Must match ID populated by hsaTopology.py
|
||||
if (isdGPU) {
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx803:
|
||||
ape_args->gpu_id = 50156;
|
||||
break;
|
||||
case GfxVersion::gfx900:
|
||||
ape_args->gpu_id = 22124;
|
||||
break;
|
||||
@@ -681,7 +647,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
|
||||
}
|
||||
} else {
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx801:
|
||||
case GfxVersion::gfx902:
|
||||
ape_args->gpu_id = 2765;
|
||||
break;
|
||||
|
||||
@@ -86,8 +86,6 @@ class GPUComputeDriver final : public EmulatedDriver
|
||||
doorbellSize()
|
||||
{
|
||||
switch (gfxVersion) {
|
||||
case GfxVersion::gfx801:
|
||||
case GfxVersion::gfx803:
|
||||
case GfxVersion::gfx902:
|
||||
return 4;
|
||||
case GfxVersion::gfx900:
|
||||
|
||||
@@ -56,7 +56,7 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
|
||||
a_data = new uint8_t[computeUnit()->wfSize() * 8];
|
||||
x_data = new uint8_t[computeUnit()->wfSize() * 8];
|
||||
// scalar loads can read up to 16 Dwords of data (see publicly
|
||||
// available GCN3 ISA manual)
|
||||
// available Vega ISA manual)
|
||||
scalar_data = new uint8_t[16 * sizeof(uint32_t)];
|
||||
for (int i = 0; i < (16 * sizeof(uint32_t)); ++i) {
|
||||
scalar_data[i] = 0;
|
||||
|
||||
@@ -102,14 +102,9 @@ class HSAQueueEntry
|
||||
numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
|
||||
}
|
||||
|
||||
// SGPR allocation granularies:
|
||||
// - GFX8: 8
|
||||
// - GFX9: 16
|
||||
// Source: https://llvm.org/docs/.html
|
||||
if (gfx_version == GfxVersion::gfx801 ||
|
||||
gfx_version == GfxVersion::gfx803) {
|
||||
numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
|
||||
} else if (gfx_version == GfxVersion::gfx900 ||
|
||||
// SGPR allocation granulary is 16 in GFX9
|
||||
// Source: https://llvm.org/docs/AMDGPUUsage.html
|
||||
if (gfx_version == GfxVersion::gfx900 ||
|
||||
gfx_version == GfxVersion::gfx902 ||
|
||||
gfx_version == GfxVersion::gfx908 ||
|
||||
gfx_version == GfxVersion::gfx90a) {
|
||||
|
||||
@@ -626,8 +626,6 @@ void
|
||||
ScheduleStage::arbitrateVrfToLdsBus()
|
||||
{
|
||||
// Arbitrate the VRF->GM and VRF->LDS buses for Flat memory ops
|
||||
// Note: a Flat instruction in GFx8 reserves both VRF->Glb memory bus
|
||||
// and a VRF->LDS bus. In GFx9, this is not the case.
|
||||
|
||||
// iterate the GM pipelines
|
||||
for (int i = 0; i < computeUnit.numVectorGlobalMemUnits; i++) {
|
||||
|
||||
@@ -292,8 +292,8 @@ class Request : public Extensible<Request>
|
||||
|
||||
/**
|
||||
* These bits are used to set the coherence policy for the GPU and are
|
||||
* encoded in the GCN3 instructions. The GCN3 ISA defines two cache levels
|
||||
* See the AMD GCN3 ISA Architecture Manual for more details.
|
||||
* encoded in the Vega instructions. The Vega ISA defines two cache levels
|
||||
* See the AMD Vega ISA Architecture Manual for more details.
|
||||
*
|
||||
* INV_L1: L1 cache invalidation
|
||||
* FLUSH_L2: L2 cache flush
|
||||
|
||||
@@ -77,7 +77,7 @@ VIPERCoalescer::makeRequest(PacketPtr pkt)
|
||||
// AtomicOp : cache atomic
|
||||
// Flush : flush and invalidate cache
|
||||
//
|
||||
// VIPER does not expect MemSyncReq & Release since in GCN3, compute unit
|
||||
// VIPER does not expect MemSyncReq & Release since compute unit
|
||||
// does not specify an equivalent type of memory request.
|
||||
assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) ||
|
||||
pkt->cmd == MemCmd::ReadReq ||
|
||||
|
||||
Reference in New Issue
Block a user