diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index bd7ef7051e..32d048e327 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -39848,7 +39848,13 @@ namespace VegaISA gpuDynInst->execUnitId = wf->execUnitId; gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + } else { + fatal("Unsupported scope for flat instruction.\n"); + } } // execute void @@ -39901,7 +39907,13 @@ namespace VegaISA gpuDynInst->execUnitId = wf->execUnitId; gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + } else { + fatal("Unsupported scope for flat instruction.\n"); + } } // execute void Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst) diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index da16dbdb4b..a6dc58c0d0 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -800,35 +800,107 @@ namespace VegaISA void initMemRead(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::ReadReq); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::ReadReq); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane] + = wf->ldsChunk->read(vaddr); + } + } + } } template void initMemRead(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::ReadReq); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::ReadReq); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + for (int i = 0; i < N; ++i) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * N + i] + = wf->ldsChunk->read( + vaddr + i*sizeof(VecElemU32)); + } + } + } + } } template void initMemWrite(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::WriteReq); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::WriteReq); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + wf->ldsChunk->write(vaddr, + (reinterpret_cast(gpuDynInst->d_data))[lane]); + } + } + } } template void initMemWrite(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::WriteReq); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::WriteReq); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + for (int i = 0; i < N; ++i) { + wf->ldsChunk->write( + vaddr + i*sizeof(VecElemU32), + (reinterpret_cast( + gpuDynInst->d_data))[lane * N + i]); + } + } + } + } } template void initAtomicAccess(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::SwapReq, true); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::SwapReq, true); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + auto amo_op = + gpuDynInst->makeAtomicOpFunctor( + &(reinterpret_cast( + gpuDynInst->a_data))[lane], + &(reinterpret_cast( + gpuDynInst->x_data))[lane]); + + T tmp = wf->ldsChunk->read(vaddr); + (*amo_op)(reinterpret_cast(&tmp)); + wf->ldsChunk->write(vaddr, tmp); + (reinterpret_cast(gpuDynInst->d_data))[lane] = tmp; + } + } + } } void