misc: Remove all references to GCN3

Replace instances of "GCN3" with Vega. Remove gfx801 and gfx803. Rename FIJI to Vega and Carrizo to Raven. Using misc since there is not enough room to fit all the tags. Change-Id: Ibafc939d49a69be9068107a906e878408c7a5891
2024-01-17 10:45:18 -06:00
parent 57fb083f43
commit 63caa780c2
13 changed files with 22 additions and 275 deletions
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -45,7 +45,7 @@ class PrefetchType(Enum):


 class GfxVersion(ScopedEnum):
-    vals = ["gfx801", "gfx803", "gfx900", "gfx902", "gfx908", "gfx90a"]
+    vals = ["gfx900", "gfx902", "gfx908", "gfx90a"]


 class PoolManager(SimObject):
@@ -320,7 +320,7 @@ class GPUComputeDriver(EmulatedDriver):
    cxx_header = "gpu-compute/gpu_compute_driver.hh"
    device = Param.GPUCommandProcessor("GPU controlled by this driver")
    isdGPU = Param.Bool(False, "Driver is for a dGPU")
-    gfxVersion = Param.GfxVersion("gfx801", "ISA of gpu to model")
+    gfxVersion = Param.GfxVersion("gfx902", "ISA of gpu to model")
    dGPUPoolID = Param.Int(0, "Pool ID for dGPU.")
    # Default Mtype for caches
    # --     1   1   1   C_RW_S  (Cached-ReadWrite-Shared)
--- a/src/gpu-compute/gpu_compute_driver.cc
+++ b/src/gpu-compute/gpu_compute_driver.cc
@@ -327,13 +327,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                 */

                switch (gfxVersion) {
-                  case GfxVersion::gfx801:
-                  case GfxVersion::gfx803:
-                    args->process_apertures[i].scratch_base =
-                        scratchApeBase(i + 1);
-                    args->process_apertures[i].lds_base =
-                        ldsApeBase(i + 1);
-                    break;
                  case GfxVersion::gfx900:
                  case GfxVersion::gfx902:
                    args->process_apertures[i].scratch_base =
@@ -345,7 +338,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                    fatal("Invalid gfx version\n");
                }

-                // GFX8 and GFX9 set lds and scratch limits the same way
                args->process_apertures[i].scratch_limit =
                    scratchApeLimit(args->process_apertures[i].scratch_base);

@@ -353,13 +345,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                    ldsApeLimit(args->process_apertures[i].lds_base);

                switch (gfxVersion) {
-                  case GfxVersion::gfx801:
-                    args->process_apertures[i].gpuvm_base =
-                        gpuVmApeBase(i + 1);
-                    args->process_apertures[i].gpuvm_limit =
-                        gpuVmApeLimit(args->process_apertures[i].gpuvm_base);
-                    break;
-                  case GfxVersion::gfx803:
                  case GfxVersion::gfx900:
                  case GfxVersion::gfx902:
                    // Taken from SVM_USE_BASE in Linux kernel
@@ -383,9 +368,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                // id composed out of a non-zero base and an offset.
                if (isdGPU) {
                    switch (gfxVersion) {
-                      case GfxVersion::gfx803:
-                        args->process_apertures[i].gpu_id = 50156;
-                        break;
                      case GfxVersion::gfx900:
                        args->process_apertures[i].gpu_id = 22124;
                        break;
@@ -394,7 +376,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                    }
                } else {
                    switch (gfxVersion) {
-                      case GfxVersion::gfx801:
                      case GfxVersion::gfx902:
                        args->process_apertures[i].gpu_id = 2765;
                        break;
@@ -630,11 +611,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                    (ioc_args->kfd_process_device_apertures_ptr);

                switch (gfxVersion) {
-                  case GfxVersion::gfx801:
-                  case GfxVersion::gfx803:
-                    ape_args->scratch_base = scratchApeBase(i + 1);
-                    ape_args->lds_base = ldsApeBase(i + 1);
-                    break;
                  case GfxVersion::gfx900:
                  case GfxVersion::gfx902:
                    ape_args->scratch_base = scratchApeBaseV9();
@@ -644,18 +620,11 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                    fatal("Invalid gfx version\n");
                }

-                // GFX8 and GFX9 set lds and scratch limits the same way
                ape_args->scratch_limit =
                    scratchApeLimit(ape_args->scratch_base);
                ape_args->lds_limit = ldsApeLimit(ape_args->lds_base);

                switch (gfxVersion) {
-                  case GfxVersion::gfx801:
-                    ape_args->gpuvm_base = gpuVmApeBase(i + 1);
-                    ape_args->gpuvm_limit =
-                        gpuVmApeLimit(ape_args->gpuvm_base);
-                    break;
-                  case GfxVersion::gfx803:
                  case GfxVersion::gfx900:
                  case GfxVersion::gfx902:
                    // Taken from SVM_USE_BASE in Linux kernel
@@ -670,9 +639,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                // NOTE: Must match ID populated by hsaTopology.py
                if (isdGPU) {
                    switch (gfxVersion) {
-                      case GfxVersion::gfx803:
-                        ape_args->gpu_id = 50156;
-                        break;
                      case GfxVersion::gfx900:
                        ape_args->gpu_id = 22124;
                        break;
@@ -681,7 +647,6 @@ GPUComputeDriver::ioctl(ThreadContext *tc, unsigned req, Addr ioc_buf)
                    }
                } else {
                    switch (gfxVersion) {
-                      case GfxVersion::gfx801:
                      case GfxVersion::gfx902:
                        ape_args->gpu_id = 2765;
                        break;
--- a/src/gpu-compute/gpu_compute_driver.hh
+++ b/src/gpu-compute/gpu_compute_driver.hh
@@ -86,8 +86,6 @@ class GPUComputeDriver final : public EmulatedDriver
    doorbellSize()
    {
        switch (gfxVersion) {
-          case GfxVersion::gfx801:
-          case GfxVersion::gfx803:
          case GfxVersion::gfx902:
            return 4;
          case GfxVersion::gfx900:
--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -56,7 +56,7 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
    a_data = new uint8_t[computeUnit()->wfSize() * 8];
    x_data = new uint8_t[computeUnit()->wfSize() * 8];
    // scalar loads can read up to 16 Dwords of data (see publicly
-    // available GCN3 ISA manual)
+    // available Vega ISA manual)
    scalar_data = new uint8_t[16 * sizeof(uint32_t)];
    for (int i = 0; i < (16 * sizeof(uint32_t)); ++i) {
        scalar_data[i] = 0;
--- a/src/gpu-compute/hsa_queue_entry.hh
+++ b/src/gpu-compute/hsa_queue_entry.hh
@@ -102,14 +102,9 @@ class HSAQueueEntry
            numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
        }

-        // SGPR allocation granularies:
-        // - GFX8: 8
-        // - GFX9: 16
-        // Source: https://llvm.org/docs/.html
-        if (gfx_version == GfxVersion::gfx801 ||
-                gfx_version == GfxVersion::gfx803) {
-            numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
-        } else if (gfx_version == GfxVersion::gfx900 ||
+        // SGPR allocation granulary is 16 in GFX9
+        // Source: https://llvm.org/docs/AMDGPUUsage.html
+        if (gfx_version == GfxVersion::gfx900 ||
                gfx_version == GfxVersion::gfx902 ||
                gfx_version == GfxVersion::gfx908 ||
                gfx_version == GfxVersion::gfx90a) {
--- a/src/gpu-compute/schedule_stage.cc
+++ b/src/gpu-compute/schedule_stage.cc
@@ -626,8 +626,6 @@ void
 ScheduleStage::arbitrateVrfToLdsBus()
 {
    // Arbitrate the VRF->GM and VRF->LDS buses for Flat memory ops
-    // Note: a Flat instruction in GFx8 reserves both VRF->Glb memory bus
-    // and a VRF->LDS bus. In GFx9, this is not the case.

    // iterate the GM pipelines
    for (int i = 0; i < computeUnit.numVectorGlobalMemUnits; i++) {