gpu-compute, mem-ruby, configs: Add GCN3 ISA support to GPU model
Change-Id: Ibe46970f3ba25d62ca2ade5cbc2054ad746b2254 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29912 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
b0eac7857a
commit
b8da9abba7
215
src/gpu-compute/gpu_command_processor.cc
Normal file
215
src/gpu-compute/gpu_command_processor.cc
Normal file
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* For use for simulation and test purposes only
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Anthony Gutierrez
|
||||
*/
|
||||
|
||||
#include "gpu-compute/gpu_command_processor.hh"
|
||||
|
||||
#include "debug/GPUCommandProc.hh"
|
||||
#include "debug/GPUKernelInfo.hh"
|
||||
#include "gpu-compute/dispatcher.hh"
|
||||
#include "params/GPUCommandProcessor.hh"
|
||||
|
||||
GPUCommandProcessor::GPUCommandProcessor(const Params *p)
|
||||
: HSADevice(p), dispatcher(*p->dispatcher)
|
||||
{
|
||||
dispatcher.setCommandProcessor(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* submitDispatchPkt() is the entry point into the CP from the HSAPP
|
||||
* and is only meant to be used with AQL kernel dispatch packets.
|
||||
* After the HSAPP receives and extracts an AQL packet, it sends
|
||||
* it to the CP, which is responsible for gathering all relevant
|
||||
* information about a task, initializing CU state, and sending
|
||||
* it to the dispatcher for WG creation and dispatch.
|
||||
*
|
||||
* First we need capture all information from the the AQL pkt and
|
||||
* the code object, then store it in an HSAQueueEntry. Once the
|
||||
* packet and code are extracted, we extract information from the
|
||||
* queue descriptor that the CP needs to perform state initialization
|
||||
* on the CU. Finally we call dispatch() to send the task to the
|
||||
* dispatcher. When the task completely finishes, we call finishPkt()
|
||||
* on the HSA packet processor in order to remove the packet from the
|
||||
* queue, and notify the runtime that the task has completed.
|
||||
*/
|
||||
void
|
||||
GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
|
||||
Addr host_pkt_addr)
|
||||
{
|
||||
static int dynamic_task_id = 0;
|
||||
_hsa_dispatch_packet_t *disp_pkt = (_hsa_dispatch_packet_t*)raw_pkt;
|
||||
|
||||
/**
|
||||
* we need to read a pointer in the application's address
|
||||
* space to pull out the kernel code descriptor.
|
||||
*/
|
||||
auto *tc = sys->threads[0];
|
||||
auto &virt_proxy = tc->getVirtProxy();
|
||||
|
||||
/**
|
||||
* The kernel_object is a pointer to the machine code, whose entry
|
||||
* point is an 'amd_kernel_code_t' type, which is included in the
|
||||
* kernel binary, and describes various aspects of the kernel. The
|
||||
* desired entry is the 'kernel_code_entry_byte_offset' field,
|
||||
* which provides the byte offset (positive or negative) from the
|
||||
* address of the amd_kernel_code_t to the start of the machine
|
||||
* instructions.
|
||||
*/
|
||||
AMDKernelCode akc;
|
||||
virt_proxy.readBlob(disp_pkt->kernel_object, (uint8_t*)&akc,
|
||||
sizeof(AMDKernelCode));
|
||||
|
||||
DPRINTF(GPUCommandProc, "GPU machine code is %lli bytes from start of the "
|
||||
"kernel object\n", akc.kernel_code_entry_byte_offset);
|
||||
|
||||
Addr machine_code_addr = (Addr)disp_pkt->kernel_object
|
||||
+ akc.kernel_code_entry_byte_offset;
|
||||
|
||||
DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n",
|
||||
machine_code_addr);
|
||||
|
||||
Addr kern_name_addr(0);
|
||||
virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10,
|
||||
(uint8_t*)&kern_name_addr, 0x8);
|
||||
|
||||
std::string kernel_name;
|
||||
virt_proxy.readString(kernel_name, kern_name_addr);
|
||||
|
||||
DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
|
||||
|
||||
HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
|
||||
dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr);
|
||||
|
||||
DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
|
||||
"grid size (%dx%dx%d) kernarg addr: %#x, completion "
|
||||
"signal addr:%#x\n", dynamic_task_id, disp_pkt->workgroup_size_x,
|
||||
disp_pkt->workgroup_size_y, disp_pkt->workgroup_size_z,
|
||||
disp_pkt->grid_size_x, disp_pkt->grid_size_y,
|
||||
disp_pkt->grid_size_z, disp_pkt->kernarg_address,
|
||||
disp_pkt->completion_signal);
|
||||
|
||||
DPRINTF(GPUCommandProc, "Extracted code object: %s (num vector regs: %d, "
|
||||
"num scalar regs: %d, code addr: %#x, kernarg size: %d, "
|
||||
"LDS size: %d)\n", kernel_name, task->numVectorRegs(),
|
||||
task->numScalarRegs(), task->codeAddr(), 0, 0);
|
||||
|
||||
initABI(task);
|
||||
++dynamic_task_id;
|
||||
}
|
||||
|
||||
/**
|
||||
* submitVendorPkt() is for accepting vendor-specific packets from
|
||||
* the HSAPP. Vendor-specific packets may be used by the runtime to
|
||||
* send commands to the HSA device that are specific to a particular
|
||||
* vendor. The vendor-specific packets should be defined by the vendor
|
||||
* in the runtime.
|
||||
*/
|
||||
|
||||
/**
|
||||
* TODO: For now we simply tell the HSAPP to finish the packet,
|
||||
* however a future patch will update this method to provide
|
||||
* the proper handling of any required vendor-specific packets.
|
||||
* In the version of ROCm that is currently supported (1.6)
|
||||
* the runtime will send packets that direct the CP to
|
||||
* invalidate the GPUs caches. We do this automatically on
|
||||
* each kernel launch in the CU, so this is safe for now.
|
||||
*/
|
||||
void
|
||||
GPUCommandProcessor::submitVendorPkt(void *raw_pkt, uint32_t queue_id,
|
||||
Addr host_pkt_addr)
|
||||
{
|
||||
hsaPP->finishPkt(raw_pkt, queue_id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Once the CP has finished extracting all relevant information about
|
||||
* a task and has initialized the ABI state, we send a description of
|
||||
* the task to the dispatcher. The dispatcher will create and dispatch
|
||||
* WGs to the CUs.
|
||||
*/
|
||||
void
|
||||
GPUCommandProcessor::dispatchPkt(HSAQueueEntry *task)
|
||||
{
|
||||
dispatcher.dispatch(task);
|
||||
}
|
||||
|
||||
/**
|
||||
* The CP is responsible for traversing all HSA-ABI-related data
|
||||
* structures from memory and initializing the ABI state.
|
||||
* Information provided by the MQD, AQL packet, and code object
|
||||
* metadata will be used to initialze register file state.
|
||||
*/
|
||||
void
|
||||
GPUCommandProcessor::initABI(HSAQueueEntry *task)
|
||||
{
|
||||
auto *readDispIdOffEvent = new ReadDispIdOffsetDmaEvent(*this, task);
|
||||
|
||||
Addr hostReadIdxPtr
|
||||
= hsaPP->getQueueDesc(task->queueId())->hostReadIndexPtr;
|
||||
|
||||
dmaReadVirt(hostReadIdxPtr + sizeof(hostReadIdxPtr),
|
||||
sizeof(readDispIdOffEvent->readDispIdOffset), readDispIdOffEvent,
|
||||
&readDispIdOffEvent->readDispIdOffset);
|
||||
}
|
||||
|
||||
System*
|
||||
GPUCommandProcessor::system()
|
||||
{
|
||||
return sys;
|
||||
}
|
||||
|
||||
AddrRangeList
|
||||
GPUCommandProcessor::getAddrRanges() const
|
||||
{
|
||||
AddrRangeList ranges;
|
||||
return ranges;
|
||||
}
|
||||
|
||||
void
|
||||
GPUCommandProcessor::setShader(Shader *shader)
|
||||
{
|
||||
_shader = shader;
|
||||
}
|
||||
|
||||
Shader*
|
||||
GPUCommandProcessor::shader()
|
||||
{
|
||||
return _shader;
|
||||
}
|
||||
|
||||
GPUCommandProcessor*
|
||||
GPUCommandProcessorParams::create()
|
||||
{
|
||||
return new GPUCommandProcessor(this);
|
||||
}
|
||||
Reference in New Issue
Block a user