dev,dev-hsa,gpu-compute: Refactor dmaVirt calls

Remove the duplicate dmaVirt calls from HSA packet processor and GPU command processor and move them into their own class. This removes some duplicate code and allows a DmaVirtDevice to be created which will be useful for upcoming full system GPU commits. The DmaVirtDevice is an abstraction of the base DmaDevice but iterates using ChunkGenerator over virtual addresses. Classes which inherit from DmaVirtDevice must provide a translation function to translate from virtual address to physical address. Once translated, the physical address is passed to DmaDevice to do the work. Change-Id: Idd59ccb4d9ba21c0b1150ee328ededf5a88d824e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47179 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
2021-06-23 18:50:58 -05:00
parent 5c7e1bd917
commit 897c0c11ed
8 changed files with 275 additions and 223 deletions
--- a/src/gpu-compute/gpu_command_processor.cc
+++ b/src/gpu-compute/gpu_command_processor.cc
@@ -48,7 +48,8 @@ namespace gem5
 {

 GPUCommandProcessor::GPUCommandProcessor(const Params &p)
-    : DmaDevice(p), dispatcher(*p.dispatcher), _driver(nullptr), hsaPP(p.hsapp)
+    : DmaVirtDevice(p), dispatcher(*p.dispatcher), _driver(nullptr),
+      hsaPP(p.hsapp)
 {
    assert(hsaPP);
    hsaPP->setDevice(this);
@@ -61,47 +62,6 @@ GPUCommandProcessor::hsaPacketProc()
    return *hsaPP;
 }

-void
-GPUCommandProcessor::dmaReadVirt(Addr host_addr, unsigned size,
-                                 DmaCallback *cb, void *data, Tick delay)
-{
-    dmaVirt(&DmaDevice::dmaRead, host_addr, size, cb, data, delay);
-}
-
-void
-GPUCommandProcessor::dmaWriteVirt(Addr host_addr, unsigned size,
-                                  DmaCallback *cb, void *data, Tick delay)
-{
-    dmaVirt(&DmaDevice::dmaWrite, host_addr, size, cb, data, delay);
-}
-
-void
-GPUCommandProcessor::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
-                             DmaCallback *cb, void *data, Tick delay)
-{
-    if (size == 0) {
-        if (cb)
-            schedule(cb->getChunkEvent(), curTick() + delay);
-        return;
-    }
-
-    // move the buffer data pointer with the chunks
-    uint8_t *loc_data = (uint8_t*)data;
-
-    for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done(); gen.next()) {
-        Addr phys;
-
-        // translate pages into their corresponding frames
-        translateOrDie(gen.addr(), phys);
-
-        Event *event = cb ? cb->getChunkEvent() : nullptr;
-
-        (this->*dmaFn)(phys, gen.size(), event, loc_data, delay);
-
-        loc_data += gen.size();
-    }
-}
-
 void
 GPUCommandProcessor::translateOrDie(Addr vaddr, Addr &paddr)
 {
@@ -233,7 +193,7 @@ GPUCommandProcessor::updateHsaSignal(Addr signal_handle, uint64_t signal_value,
    Addr event_addr = getHsaSignalEventAddr(signal_handle);
    DPRINTF(GPUCommandProc, "Triggering completion signal: %x!\n", value_addr);

-    auto cb = new CPDmaCallback<uint64_t>(function, signal_value);
+    auto cb = new DmaVirtCallback<uint64_t>(function, signal_value);

    dmaWriteVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer, 0);

@@ -372,7 +332,7 @@ GPUCommandProcessor::signalWakeupEvent(uint32_t event_id)
 void
 GPUCommandProcessor::initABI(HSAQueueEntry *task)
 {
-    auto cb = new CPDmaCallback<uint32_t>(
+    auto cb = new DmaVirtCallback<uint32_t>(
        [ = ] (const uint32_t &readDispIdOffset)
            { ReadDispIdOffsetDmaEvent(task, readDispIdOffset); }, 0);