Lots of times the params() helper has been defined but not used Change-Id: Id71829aca71341d46964d8f071099342b946b62f Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/41613 Tested-by: kokoro <noreply+kokoro@google.com>
533 lines
13 KiB
C++
533 lines
13 KiB
C++
/*
|
|
* Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* For use for simulation and test purposes only
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* 3. Neither the name of the copyright holder nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef __LDS_STATE_HH__
|
|
#define __LDS_STATE_HH__
|
|
|
|
#include <array>
|
|
#include <queue>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "gpu-compute/misc.hh"
|
|
#include "mem/port.hh"
|
|
#include "params/LdsState.hh"
|
|
#include "sim/clocked_object.hh"
|
|
|
|
class ComputeUnit;
|
|
|
|
/**
|
|
* this represents a slice of the overall LDS, intended to be associated with
|
|
* an individual workgroup
|
|
*/
|
|
class LdsChunk
|
|
{
|
|
public:
|
|
LdsChunk(const uint32_t x_size):
|
|
chunk(x_size)
|
|
{
|
|
}
|
|
|
|
LdsChunk() {}
|
|
|
|
/**
|
|
* a read operation
|
|
*/
|
|
template<class T>
|
|
T
|
|
read(const uint32_t index)
|
|
{
|
|
/**
|
|
* For reads that are outside the bounds of the LDS
|
|
* chunk allocated to this WG we return 0.
|
|
*/
|
|
if (index >= chunk.size()) {
|
|
return (T)0;
|
|
}
|
|
|
|
T *p0 = (T *) (&(chunk.at(index)));
|
|
return *p0;
|
|
}
|
|
|
|
/**
|
|
* a write operation
|
|
*/
|
|
template<class T>
|
|
void
|
|
write(const uint32_t index, const T value)
|
|
{
|
|
/**
|
|
* Writes that are outside the bounds of the LDS
|
|
* chunk allocated to this WG are dropped.
|
|
*/
|
|
if (index >= chunk.size()) {
|
|
return;
|
|
}
|
|
|
|
T *p0 = (T *) (&(chunk.at(index)));
|
|
*p0 = value;
|
|
}
|
|
|
|
/**
|
|
* get the size of this chunk
|
|
*/
|
|
std::vector<uint8_t>::size_type
|
|
size() const
|
|
{
|
|
return chunk.size();
|
|
}
|
|
|
|
protected:
|
|
// the actual data store for this slice of the LDS
|
|
std::vector<uint8_t> chunk;
|
|
};
|
|
|
|
// Local Data Share (LDS) State per Wavefront (contents of the LDS region
|
|
// allocated to the WorkGroup of this Wavefront)
|
|
class LdsState: public ClockedObject
|
|
{
|
|
protected:
|
|
|
|
/**
|
|
* an event to allow event-driven execution
|
|
*/
|
|
class TickEvent: public Event
|
|
{
|
|
protected:
|
|
|
|
LdsState *ldsState = nullptr;
|
|
|
|
Tick nextTick = 0;
|
|
|
|
public:
|
|
|
|
TickEvent(LdsState *_ldsState) :
|
|
ldsState(_ldsState)
|
|
{
|
|
}
|
|
|
|
virtual void
|
|
process();
|
|
|
|
void
|
|
schedule(Tick when)
|
|
{
|
|
mainEventQueue[0]->schedule(this, when);
|
|
}
|
|
|
|
void
|
|
deschedule()
|
|
{
|
|
mainEventQueue[0]->deschedule(this);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* CuSidePort is the LDS Port closer to the CU side
|
|
*/
|
|
class CuSidePort: public ResponsePort
|
|
{
|
|
public:
|
|
CuSidePort(const std::string &_name, LdsState *_ownerLds) :
|
|
ResponsePort(_name, _ownerLds), ownerLds(_ownerLds)
|
|
{
|
|
}
|
|
|
|
protected:
|
|
LdsState *ownerLds;
|
|
|
|
virtual bool
|
|
recvTimingReq(PacketPtr pkt);
|
|
|
|
virtual Tick
|
|
recvAtomic(PacketPtr pkt)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
virtual void
|
|
recvFunctional(PacketPtr pkt);
|
|
|
|
virtual void
|
|
recvRangeChange()
|
|
{
|
|
}
|
|
|
|
virtual void
|
|
recvRetry();
|
|
|
|
virtual void
|
|
recvRespRetry();
|
|
|
|
virtual AddrRangeList
|
|
getAddrRanges() const
|
|
{
|
|
AddrRangeList ranges;
|
|
ranges.push_back(ownerLds->getAddrRange());
|
|
return ranges;
|
|
}
|
|
|
|
template<typename T>
|
|
void
|
|
loadData(PacketPtr packet);
|
|
|
|
template<typename T>
|
|
void
|
|
storeData(PacketPtr packet);
|
|
|
|
template<typename T>
|
|
void
|
|
atomicOperation(PacketPtr packet);
|
|
};
|
|
|
|
protected:
|
|
|
|
/**
|
|
* the lds reference counter
|
|
* The key is the workgroup ID and dispatch ID
|
|
* The value is the number of wavefronts that reference this LDS, as
|
|
* wavefronts are launched, the counter goes up for that workgroup and when
|
|
* they return it decreases, once it reaches 0 then this chunk of the LDS
|
|
* is returned to the available pool. However,it is deallocated on the 1->0
|
|
* transition, not whenever the counter is 0 as it always starts with 0
|
|
* when the workgroup asks for space
|
|
*/
|
|
std::unordered_map<uint32_t,
|
|
std::unordered_map<uint32_t, int32_t>> refCounter;
|
|
|
|
// the map that allows workgroups to access their own chunk of the LDS
|
|
std::unordered_map<uint32_t,
|
|
std::unordered_map<uint32_t, LdsChunk>> chunkMap;
|
|
|
|
// an event to allow the LDS to wake up at a specified time
|
|
TickEvent tickEvent;
|
|
|
|
// the queue of packets that are going back to the CU after a
|
|
// read/write/atomic op
|
|
// TODO need to make this have a maximum size to create flow control
|
|
std::queue<std::pair<Tick, PacketPtr>> returnQueue;
|
|
|
|
// whether or not there are pending responses
|
|
bool retryResp = false;
|
|
|
|
bool
|
|
process();
|
|
|
|
GPUDynInstPtr
|
|
getDynInstr(PacketPtr packet);
|
|
|
|
bool
|
|
processPacket(PacketPtr packet);
|
|
|
|
unsigned
|
|
countBankConflicts(PacketPtr packet, unsigned *bankAccesses);
|
|
|
|
unsigned
|
|
countBankConflicts(GPUDynInstPtr gpuDynInst,
|
|
unsigned *numBankAccesses);
|
|
|
|
public:
|
|
using Params = LdsStateParams;
|
|
|
|
LdsState(const Params ¶ms);
|
|
|
|
// prevent copy construction
|
|
LdsState(const LdsState&) = delete;
|
|
|
|
~LdsState()
|
|
{
|
|
parent = nullptr;
|
|
}
|
|
|
|
bool
|
|
isRetryResp() const
|
|
{
|
|
return retryResp;
|
|
}
|
|
|
|
void
|
|
setRetryResp(const bool value)
|
|
{
|
|
retryResp = value;
|
|
}
|
|
|
|
// prevent assignment
|
|
LdsState &
|
|
operator=(const LdsState &) = delete;
|
|
|
|
/**
|
|
* use the dynamic wave id to create or just increase the reference count
|
|
*/
|
|
int
|
|
increaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
|
|
{
|
|
int refCount = getRefCounter(dispatchId, wgId);
|
|
fatal_if(refCount < 0,
|
|
"reference count should not be below zero");
|
|
return ++refCounter[dispatchId][wgId];
|
|
}
|
|
|
|
/**
|
|
* decrease the reference count after making sure it is in the list
|
|
* give back this chunk if the ref counter has reached 0
|
|
*/
|
|
int
|
|
decreaseRefCounter(const uint32_t dispatchId, const uint32_t wgId)
|
|
{
|
|
int refCount = getRefCounter(dispatchId, wgId);
|
|
|
|
fatal_if(refCount <= 0,
|
|
"reference count should not be below zero or at zero to"
|
|
"decrement");
|
|
|
|
refCounter[dispatchId][wgId]--;
|
|
|
|
if (refCounter[dispatchId][wgId] == 0) {
|
|
releaseSpace(dispatchId, wgId);
|
|
return 0;
|
|
} else {
|
|
return refCounter[dispatchId][wgId];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* return the current reference count for this workgroup id
|
|
*/
|
|
int
|
|
getRefCounter(const uint32_t dispatchId, const uint32_t wgId) const
|
|
{
|
|
auto dispatchIter = chunkMap.find(dispatchId);
|
|
fatal_if(dispatchIter == chunkMap.end(),
|
|
"could not locate this dispatch id [%d]", dispatchId);
|
|
|
|
auto workgroup = dispatchIter->second.find(wgId);
|
|
fatal_if(workgroup == dispatchIter->second.end(),
|
|
"could not find this workgroup id within this dispatch id"
|
|
" did[%d] wgid[%d]", dispatchId, wgId);
|
|
|
|
auto refCountIter = refCounter.find(dispatchId);
|
|
if (refCountIter == refCounter.end()) {
|
|
fatal("could not locate this dispatch id [%d]", dispatchId);
|
|
} else {
|
|
auto workgroup = refCountIter->second.find(wgId);
|
|
if (workgroup == refCountIter->second.end()) {
|
|
fatal("could not find this workgroup id within this dispatch id"
|
|
" did[%d] wgid[%d]", dispatchId, wgId);
|
|
} else {
|
|
return refCounter.at(dispatchId).at(wgId);
|
|
}
|
|
}
|
|
|
|
fatal("should not reach this point");
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* assign a parent and request this amount of space be set aside
|
|
* for this wgid
|
|
*/
|
|
LdsChunk *
|
|
reserveSpace(const uint32_t dispatchId, const uint32_t wgId,
|
|
const uint32_t size)
|
|
{
|
|
if (chunkMap.find(dispatchId) != chunkMap.end()) {
|
|
panic_if(
|
|
chunkMap[dispatchId].find(wgId) != chunkMap[dispatchId].end(),
|
|
"duplicate workgroup ID asking for space in the LDS "
|
|
"did[%d] wgid[%d]", dispatchId, wgId);
|
|
}
|
|
|
|
if (bytesAllocated + size > maximumSize) {
|
|
return nullptr;
|
|
} else {
|
|
bytesAllocated += size;
|
|
|
|
auto value = chunkMap[dispatchId].emplace(wgId, LdsChunk(size));
|
|
panic_if(!value.second, "was unable to allocate a new chunkMap");
|
|
|
|
// make an entry for this workgroup
|
|
refCounter[dispatchId][wgId] = 0;
|
|
|
|
return &chunkMap[dispatchId][wgId];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* return pointer to lds chunk for wgid
|
|
*/
|
|
LdsChunk *
|
|
getLdsChunk(const uint32_t dispatchId, const uint32_t wgId)
|
|
{
|
|
fatal_if(chunkMap.find(dispatchId) == chunkMap.end(),
|
|
"fetch for unknown dispatch ID did[%d]", dispatchId);
|
|
|
|
fatal_if(chunkMap[dispatchId].find(wgId) == chunkMap[dispatchId].end(),
|
|
"fetch for unknown workgroup ID wgid[%d] in dispatch ID did[%d]",
|
|
wgId, dispatchId);
|
|
|
|
return &chunkMap[dispatchId][wgId];
|
|
}
|
|
|
|
bool
|
|
returnQueuePush(std::pair<Tick, PacketPtr> thePair);
|
|
|
|
Tick
|
|
earliestReturnTime() const
|
|
{
|
|
// TODO set to max(lastCommand+1, curTick())
|
|
return returnQueue.empty() ? curTick() : returnQueue.back().first;
|
|
}
|
|
|
|
void
|
|
setParent(ComputeUnit *x_parent);
|
|
|
|
// accessors
|
|
ComputeUnit *
|
|
getParent() const
|
|
{
|
|
return parent;
|
|
}
|
|
|
|
std::string
|
|
getName()
|
|
{
|
|
return _name;
|
|
}
|
|
|
|
int
|
|
getBanks() const
|
|
{
|
|
return banks;
|
|
}
|
|
|
|
ComputeUnit *
|
|
getComputeUnit() const
|
|
{
|
|
return parent;
|
|
}
|
|
|
|
int
|
|
getBankConflictPenalty() const
|
|
{
|
|
return bankConflictPenalty;
|
|
}
|
|
|
|
/**
|
|
* get the allocated size for this workgroup
|
|
*/
|
|
std::size_t
|
|
ldsSize(const uint32_t x_wgId)
|
|
{
|
|
return chunkMap[x_wgId].size();
|
|
}
|
|
|
|
AddrRange
|
|
getAddrRange() const
|
|
{
|
|
return range;
|
|
}
|
|
|
|
Port &
|
|
getPort(const std::string &if_name, PortID idx)
|
|
{
|
|
if (if_name == "cuPort") {
|
|
// TODO need to set name dynamically at this point?
|
|
return cuPort;
|
|
} else {
|
|
fatal("cannot resolve the port name " + if_name);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* can this much space be reserved for a workgroup?
|
|
*/
|
|
bool
|
|
canReserve(uint32_t x_size) const
|
|
{
|
|
return bytesAllocated + x_size <= maximumSize;
|
|
}
|
|
|
|
private:
|
|
/**
|
|
* give back the space
|
|
*/
|
|
bool
|
|
releaseSpace(const uint32_t x_dispatchId, const uint32_t x_wgId)
|
|
{
|
|
auto dispatchIter = chunkMap.find(x_dispatchId);
|
|
|
|
if (dispatchIter == chunkMap.end()) {
|
|
fatal("dispatch id not found [%d]", x_dispatchId);
|
|
} else {
|
|
auto workgroupIter = dispatchIter->second.find(x_wgId);
|
|
if (workgroupIter == dispatchIter->second.end()) {
|
|
fatal("workgroup id [%d] not found in dispatch id [%d]",
|
|
x_wgId, x_dispatchId);
|
|
}
|
|
}
|
|
|
|
fatal_if(bytesAllocated < chunkMap[x_dispatchId][x_wgId].size(),
|
|
"releasing more space than was allocated");
|
|
|
|
bytesAllocated -= chunkMap[x_dispatchId][x_wgId].size();
|
|
chunkMap[x_dispatchId].erase(chunkMap[x_dispatchId].find(x_wgId));
|
|
return true;
|
|
}
|
|
|
|
// the port that connects this LDS to its owner CU
|
|
CuSidePort cuPort;
|
|
|
|
ComputeUnit* parent = nullptr;
|
|
|
|
std::string _name;
|
|
|
|
// the number of bytes currently reserved by all workgroups
|
|
int bytesAllocated = 0;
|
|
|
|
// the size of the LDS, the most bytes available
|
|
int maximumSize;
|
|
|
|
// Address range of this memory
|
|
AddrRange range;
|
|
|
|
// the penalty, in cycles, for each LDS bank conflict
|
|
int bankConflictPenalty = 0;
|
|
|
|
// the number of banks in the LDS underlying data store
|
|
int banks = 0;
|
|
};
|
|
|
|
#endif // __LDS_STATE_HH__
|