arch-gcn3: Implement LDS accesses in Flat instructions
Add support for LDS accesses by allowing Flat instructions to dispatch into the local memory pipeline if the requested address is in the group aperture. This requires implementing LDS accesses in the Flat initMemRead/Write functions, in a similar fashion to the DS functions of the same name. Because we now can potentially dispatch to the local memory pipeline, this change also adds a check to regain any tokens we requested as a flat instruction. Change-Id: Id26191f7ee43291a5e5ca5f39af06af981ec23ab Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/48343 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Matthew Poremba <matthew.poremba@amd.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Matt Sinclair
parent
523a92f7f0
commit
9a7fc4ff69
@@ -36314,7 +36314,7 @@ namespace Gcn3ISA
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36363,7 +36363,7 @@ namespace Gcn3ISA
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
void
|
||||
@@ -39384,8 +39384,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
} // execute
|
||||
|
||||
@@ -39448,8 +39451,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39511,8 +39517,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39603,8 +39612,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39667,8 +39679,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39731,8 +39746,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39804,8 +39822,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39889,8 +39910,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
} // execute
|
||||
|
||||
@@ -39952,8 +39976,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40015,8 +40042,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40079,8 +40109,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40151,8 +40184,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40227,8 +40263,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40294,8 +40333,11 @@ namespace Gcn3ISA
|
||||
"Flats to private aperture not tested yet\n");
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
|
||||
ConstVecOperandU32 data(gpuDynInst, extData.DATA);
|
||||
@@ -40408,8 +40450,11 @@ namespace Gcn3ISA
|
||||
"Flats to private aperture not tested yet\n");
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40492,8 +40537,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40576,8 +40624,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
void
|
||||
@@ -40834,8 +40885,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40918,8 +40972,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41044,8 +41101,11 @@ namespace Gcn3ISA
|
||||
"Flats to private aperture not tested yet\n");
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41129,8 +41189,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41215,8 +41278,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41483,8 +41549,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41570,8 +41639,11 @@ namespace Gcn3ISA
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
gpuDynInst->computeUnit()->globalMemoryPipe.
|
||||
issueRequest(gpuDynInst);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
gpuDynInst->computeUnit()->localMemoryPipe
|
||||
.issueRequest(gpuDynInst);
|
||||
} else {
|
||||
fatal("Non global flat instructions not implemented yet.\n");
|
||||
fatal("Unsupported scope for flat instruction.\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -799,35 +799,107 @@ namespace Gcn3ISA
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::ReadReq);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane]
|
||||
= wf->ldsChunk->read<T>(vaddr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemRead(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::ReadReq);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
for (int i = 0; i < N; ++i) {
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * N + i]
|
||||
= wf->ldsChunk->read<VecElemU32>(
|
||||
vaddr + i*sizeof(VecElemU32));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::WriteReq);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
wf->ldsChunk->write<T>(vaddr,
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int N>
|
||||
void
|
||||
initMemWrite(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<VecElemU32, N>(gpuDynInst, MemCmd::WriteReq);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
for (int i = 0; i < N; ++i) {
|
||||
wf->ldsChunk->write<VecElemU32>(
|
||||
vaddr + i*sizeof(VecElemU32),
|
||||
(reinterpret_cast<VecElemU32*>(
|
||||
gpuDynInst->d_data))[lane * N + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
initAtomicAccess(GPUDynInstPtr gpuDynInst)
|
||||
{
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
|
||||
if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
|
||||
initMemReqHelper<T, 1>(gpuDynInst, MemCmd::SwapReq, true);
|
||||
} else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (gpuDynInst->exec_mask[lane]) {
|
||||
Addr vaddr = gpuDynInst->addr[lane];
|
||||
AtomicOpFunctor* amo_op =
|
||||
gpuDynInst->makeAtomicOpFunctor<T>(
|
||||
&(reinterpret_cast<T*>(
|
||||
gpuDynInst->a_data))[lane],
|
||||
&(reinterpret_cast<T*>(
|
||||
gpuDynInst->x_data))[lane]).get();
|
||||
|
||||
T tmp = wf->ldsChunk->read<T>(vaddr);
|
||||
(*amo_op)(reinterpret_cast<uint8_t *>(&tmp));
|
||||
wf->ldsChunk->write<T>(vaddr, tmp);
|
||||
(reinterpret_cast<T*>(gpuDynInst->d_data))[lane] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -834,7 +834,10 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
|
||||
if (mask[lane]) {
|
||||
// flat address calculation goes here.
|
||||
// addr[lane] = segmented address
|
||||
panic("Flat group memory operation is unimplemented!\n");
|
||||
addr[lane] = addr[lane] -
|
||||
wavefront()->computeUnit->shader->ldsApe().base;
|
||||
assert(addr[lane] <
|
||||
wavefront()->computeUnit->getLds().getAddrRange().size());
|
||||
}
|
||||
}
|
||||
wavefront()->execUnitId = wavefront()->flatLmUnitId;
|
||||
|
||||
@@ -76,6 +76,11 @@ LocalMemPipeline::exec()
|
||||
lmReturnedRequests.pop();
|
||||
w = m->wavefront();
|
||||
|
||||
if (m->isFlat() && !m->isMemSync() && !m->isEndOfKernel()
|
||||
&& m->allLanesZero()) {
|
||||
computeUnit.getTokenManager()->recvTokens(1);
|
||||
}
|
||||
|
||||
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing local mem instr %s\n",
|
||||
m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
|
||||
m->completeAcc(m);
|
||||
|
||||
Reference in New Issue
Block a user