arch-vega: Update FLAT memory access helpers to support LDS
This patch ports the changes from a similar patch for arch-gcn3: https://gem5-review.googlesource.com/c/public/gem5/+/48343. Vega already has an helper function to send to the correct pipe depending on the scope, however the initMem helpers currently always assume global scope. In addition the MUBUF WBINVL1 instructions are updated similarly to the GCN3 patch. Change-Id: I612b9198cb56e226721a90e72bba64395c84ebcd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/55465 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -39848,7 +39848,13 @@ namespace VegaISA
|
||||
gpuDynInst->execUnitId = wf->execUnitId;
|
||||
gpuDynInst->latency.init(gpuDynInst->computeUnit());
|
||||
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
|
||||
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
} // execute
|
||||
|
||||
void
|
||||
@@ -39901,7 +39907,13 @@ namespace VegaISA
|
||||
gpuDynInst->execUnitId = wf->execUnitId;
|
||||
gpuDynInst->latency.init(gpuDynInst->computeUnit());
|
||||
gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst);
|
||||
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
} // execute
|
||||
void
|
||||
Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst)
|
||||
|
||||
@@ -800,35 +800,107 @@ namespace VegaISA
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
|
||||
= wf->ldsChunk->read<T>(vaddr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
for (int i = 0; i < N; ++i) {
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * N + i]
|
||||
= wf->ldsChunk->read<VecElemU32>(
|
||||
vaddr + i*sizeof(VecElemU32));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
wf->ldsChunk->write<T>(vaddr,
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
for (int i = 0; i < N; ++i) {
|
||||
wf->ldsChunk->write<VecElemU32>(
|
||||
vaddr + i*sizeof(VecElemU32),
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * N + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initAtomicAccess(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
auto amo_op =
|
||||
gpuDynInst->makeAtomicOpFunctor<T>(
|
||||
&(reinterpret_cast<T*>(
|
||||
gpuDynInst->a_data))[lane],
|
||||
&(reinterpret_cast<T*>(
|
||||
gpuDynInst->x_data))[lane]);
|
||||
|
||||
T tmp = wf->ldsChunk->read<T>(vaddr);
|
||||
(*amo_op)(reinterpret_cast<uint8_t *>(&tmp));
|
||||
wf->ldsChunk->write<T>(vaddr, tmp);
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
Reference in New Issue
Block a user