diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index b6205ac133..fccc035823 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -100,11 +100,25 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id, machine_code_addr); Addr kern_name_addr(0); - virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10, - (uint8_t*)&kern_name_addr, 0x8); - std::string kernel_name; - virt_proxy.readString(kernel_name, kern_name_addr); + + /** + * BLIT kernels don't have symbol names. BLIT kernels are built-in compute + * kernels issued by ROCm to handle DMAs for dGPUs when the SDMA + * hardware engines are unavailable or explicitly disabled. They can also + * be used to do copies that ROCm things would be better performed + * by the shader than the SDMA engines. They are also sometimes used on + * APUs to implement asynchronous memcopy operations from 2 pointers in + * host memory. I have no idea what BLIT stands for. + * */ + if (akc.runtime_loader_kernel_symbol) { + virt_proxy.readBlob(akc.runtime_loader_kernel_symbol + 0x10, + (uint8_t*)&kern_name_addr, 0x8); + + virt_proxy.readString(kernel_name, kern_name_addr); + } else { + kernel_name = "Blit kernel"; + } DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str()); diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh index a6917db3ee..5fc5e56c2e 100644 --- a/src/gpu-compute/hsa_queue_entry.hh +++ b/src/gpu-compute/hsa_queue_entry.hh @@ -88,6 +88,19 @@ class HSAQueueEntry _globalWgId(0), dispatchComplete(false) { + // Precompiled BLIT kernels actually violate the spec a bit + // and don't set many of the required akc fields. For these kernels, + // we need to rip register usage from the resource registers. + // + // We can't get an exact number of registers from the resource + // registers because they round, but we can get an upper bound on it + if (!numVgprs) + numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4; + + // TODO: Granularity changes for GFX9! + if (!numSgprs) + numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8; + initialVgprState.reset(); initialSgprState.reset();