diff --git a/src/arch/amdgpu/vega/insts/flat.cc b/src/arch/amdgpu/vega/insts/flat.cc index 7f79025b3f..8dce8d4299 100644 --- a/src/arch/amdgpu/vega/insts/flat.cc +++ b/src/arch/amdgpu/vega/insts/flat.cc @@ -110,17 +110,43 @@ namespace VegaISA void Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); } // execute void Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) { + initMemRead(gpuDynInst); } // initiateAcc void Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemI32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + vdst.write(); } // execute // --- Inst_FLAT__FLAT_LOAD_USHORT class methods ---