mem-ruby: update memory interfaces to support GPU ISA
This patch deprecates HSA-based memory request types and adds new types that can be used by real ISA instructions. Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28408 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
18e435546c
commit
adc9de4d61
@@ -181,6 +181,10 @@ MemCmd::commandInfo[] =
|
||||
{ 0, InvalidCmd, "Deprecated_MessageResp" },
|
||||
/* MemFenceReq -- for synchronization requests */
|
||||
{SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"},
|
||||
/* MemSyncReq */
|
||||
{SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"},
|
||||
/* MemSyncResp */
|
||||
{SET1(IsResponse), InvalidCmd, "MemSyncResp"},
|
||||
/* MemFenceResp -- for synchronization responses */
|
||||
{SET1(IsResponse), InvalidCmd, "MemFenceResp"},
|
||||
/* Cache Clean Request -- Update with the latest data all existing
|
||||
|
||||
@@ -110,6 +110,8 @@ class MemCmd
|
||||
SwapResp,
|
||||
// MessageReq and MessageResp are deprecated.
|
||||
MemFenceReq = SwapResp + 3,
|
||||
MemSyncReq, // memory synchronization request (e.g., cache invalidate)
|
||||
MemSyncResp, // memory synchronization response
|
||||
MemFenceResp,
|
||||
CleanSharedReq,
|
||||
CleanSharedResp,
|
||||
|
||||
@@ -110,7 +110,7 @@ class Request
|
||||
* STRICT_ORDER flag should be set if such reordering is
|
||||
* undesirable.
|
||||
*/
|
||||
UNCACHEABLE = 0x00000400,
|
||||
UNCACHEABLE = 0x00000400,
|
||||
/**
|
||||
* The request is required to be strictly ordered by <i>CPU
|
||||
* models</i> and is non-speculative.
|
||||
@@ -216,35 +216,30 @@ class Request
|
||||
};
|
||||
/** @} */
|
||||
|
||||
typedef uint32_t MemSpaceConfigFlagsType;
|
||||
typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
|
||||
typedef uint64_t CacheCoherenceFlagsType;
|
||||
typedef ::Flags<CacheCoherenceFlagsType> CacheCoherenceFlags;
|
||||
|
||||
enum : MemSpaceConfigFlagsType {
|
||||
/** Has a synchronization scope been set? */
|
||||
SCOPE_VALID = 0x00000001,
|
||||
/** Access has Wavefront scope visibility */
|
||||
WAVEFRONT_SCOPE = 0x00000002,
|
||||
/** Access has Workgroup scope visibility */
|
||||
WORKGROUP_SCOPE = 0x00000004,
|
||||
/** Access has Device (e.g., GPU) scope visibility */
|
||||
DEVICE_SCOPE = 0x00000008,
|
||||
/** Access has System (e.g., CPU + GPU) scope visibility */
|
||||
SYSTEM_SCOPE = 0x00000010,
|
||||
|
||||
/** Global Segment */
|
||||
GLOBAL_SEGMENT = 0x00000020,
|
||||
/** Group Segment */
|
||||
GROUP_SEGMENT = 0x00000040,
|
||||
/** Private Segment */
|
||||
PRIVATE_SEGMENT = 0x00000080,
|
||||
/** Kergarg Segment */
|
||||
KERNARG_SEGMENT = 0x00000100,
|
||||
/** Readonly Segment */
|
||||
READONLY_SEGMENT = 0x00000200,
|
||||
/** Spill Segment */
|
||||
SPILL_SEGMENT = 0x00000400,
|
||||
/** Arg Segment */
|
||||
ARG_SEGMENT = 0x00000800,
|
||||
/**
|
||||
* These bits are used to set the coherence policy
|
||||
* for the GPU and are encoded in the GCN3 instructions.
|
||||
* See the AMD GCN3 ISA Architecture Manual for more
|
||||
* details.
|
||||
*
|
||||
* SLC: System Level Coherent. Accesses are forced to miss in
|
||||
* the L2 cache and are coherent with system memory.
|
||||
*
|
||||
* GLC: Globally Coherent. Controls how reads and writes are
|
||||
* handled by the L1 cache. Global here referes to the
|
||||
* data being visible globally on the GPU (i.e., visible
|
||||
* to all WGs).
|
||||
*
|
||||
* For atomics, the GLC bit is used to distinguish between
|
||||
* between atomic return/no-return operations.
|
||||
*/
|
||||
enum : CacheCoherenceFlagsType {
|
||||
/** user-policy flags */
|
||||
SLC_BIT = 0x00000080,
|
||||
GLC_BIT = 0x00000100,
|
||||
};
|
||||
|
||||
using LocalAccessor =
|
||||
@@ -305,8 +300,8 @@ class Request
|
||||
/** Flag structure for the request. */
|
||||
Flags _flags;
|
||||
|
||||
/** Memory space configuraiton flag structure for the request. */
|
||||
MemSpaceConfigFlags _memSpaceConfigFlags;
|
||||
/** Flags that control how downstream cache system maintains coherence*/
|
||||
CacheCoherenceFlags _cacheCoherenceFlags;
|
||||
|
||||
/** Private flags for field validity checking. */
|
||||
PrivateFlags privateFlags;
|
||||
@@ -394,7 +389,7 @@ class Request
|
||||
_byteEnable(other._byteEnable),
|
||||
_masterId(other._masterId),
|
||||
_flags(other._flags),
|
||||
_memSpaceConfigFlags(other._memSpaceConfigFlags),
|
||||
_cacheCoherenceFlags(other._cacheCoherenceFlags),
|
||||
privateFlags(other.privateFlags),
|
||||
_time(other._time),
|
||||
_taskId(other._taskId), _vaddr(other._vaddr),
|
||||
@@ -629,10 +624,11 @@ class Request
|
||||
}
|
||||
|
||||
void
|
||||
setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
|
||||
setCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
|
||||
{
|
||||
// TODO: do mem_sync_op requests have valid paddr/vaddr?
|
||||
assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
|
||||
_memSpaceConfigFlags.set(extraFlags);
|
||||
_cacheCoherenceFlags.set(extraFlags);
|
||||
}
|
||||
|
||||
/** Accessor function for vaddr.*/
|
||||
@@ -840,82 +836,10 @@ class Request
|
||||
* Accessor functions for the memory space configuration flags and used by
|
||||
* GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
|
||||
* these are for testing only; setting extraFlags should be done via
|
||||
* setMemSpaceConfigFlags().
|
||||
* setCacheCoherenceFlags().
|
||||
*/
|
||||
bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
|
||||
|
||||
bool
|
||||
isWavefrontScope() const
|
||||
{
|
||||
assert(isScoped());
|
||||
return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
|
||||
}
|
||||
|
||||
bool
|
||||
isWorkgroupScope() const
|
||||
{
|
||||
assert(isScoped());
|
||||
return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
|
||||
}
|
||||
|
||||
bool
|
||||
isDeviceScope() const
|
||||
{
|
||||
assert(isScoped());
|
||||
return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
|
||||
}
|
||||
|
||||
bool
|
||||
isSystemScope() const
|
||||
{
|
||||
assert(isScoped());
|
||||
return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
|
||||
}
|
||||
|
||||
bool
|
||||
isGlobalSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
|
||||
(!isGroupSegment() && !isPrivateSegment() &&
|
||||
!isKernargSegment() && !isReadonlySegment() &&
|
||||
!isSpillSegment() && !isArgSegment());
|
||||
}
|
||||
|
||||
bool
|
||||
isGroupSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isPrivateSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isKernargSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isReadonlySegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isSpillSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
|
||||
}
|
||||
|
||||
bool
|
||||
isArgSegment() const
|
||||
{
|
||||
return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
|
||||
}
|
||||
bool isSLC() const { return _cacheCoherenceFlags.isSet(SLC_BIT); }
|
||||
bool isGLC() const { return _cacheCoherenceFlags.isSet(GLC_BIT); }
|
||||
|
||||
/**
|
||||
* Accessor functions to determine whether this request is part of
|
||||
|
||||
@@ -58,16 +58,11 @@ class RubyRequest : public Message
|
||||
WriteMask m_writeMask;
|
||||
DataBlock m_WTData;
|
||||
int m_wfid;
|
||||
HSAScope m_scope;
|
||||
HSASegment m_segment;
|
||||
|
||||
|
||||
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
|
||||
uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
|
||||
PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
|
||||
ContextID _proc_id = 100, ContextID _core_id = 99,
|
||||
HSAScope _scope = HSAScope_UNSPECIFIED,
|
||||
HSASegment _segment = HSASegment_GLOBAL)
|
||||
ContextID _proc_id = 100, ContextID _core_id = 99)
|
||||
: Message(curTime),
|
||||
m_PhysicalAddress(_paddr),
|
||||
m_Type(_type),
|
||||
@@ -77,9 +72,7 @@ class RubyRequest : public Message
|
||||
m_Prefetch(_pb),
|
||||
data(_data),
|
||||
m_pkt(_pkt),
|
||||
m_contextId(_core_id),
|
||||
m_scope(_scope),
|
||||
m_segment(_segment)
|
||||
m_contextId(_core_id)
|
||||
{
|
||||
m_LineAddress = makeLineAddress(m_PhysicalAddress);
|
||||
}
|
||||
@@ -89,9 +82,7 @@ class RubyRequest : public Message
|
||||
RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
|
||||
unsigned _proc_id, unsigned _core_id,
|
||||
int _wm_size, std::vector<bool> & _wm_mask,
|
||||
DataBlock & _Data,
|
||||
HSAScope _scope = HSAScope_UNSPECIFIED,
|
||||
HSASegment _segment = HSASegment_GLOBAL)
|
||||
DataBlock & _Data)
|
||||
: Message(curTime),
|
||||
m_PhysicalAddress(_paddr),
|
||||
m_Type(_type),
|
||||
@@ -104,9 +95,7 @@ class RubyRequest : public Message
|
||||
m_contextId(_core_id),
|
||||
m_writeMask(_wm_size,_wm_mask),
|
||||
m_WTData(_Data),
|
||||
m_wfid(_proc_id),
|
||||
m_scope(_scope),
|
||||
m_segment(_segment)
|
||||
m_wfid(_proc_id)
|
||||
{
|
||||
m_LineAddress = makeLineAddress(m_PhysicalAddress);
|
||||
}
|
||||
@@ -117,9 +106,7 @@ class RubyRequest : public Message
|
||||
unsigned _proc_id, unsigned _core_id,
|
||||
int _wm_size, std::vector<bool> & _wm_mask,
|
||||
DataBlock & _Data,
|
||||
std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
|
||||
HSAScope _scope = HSAScope_UNSPECIFIED,
|
||||
HSASegment _segment = HSASegment_GLOBAL)
|
||||
std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps)
|
||||
: Message(curTime),
|
||||
m_PhysicalAddress(_paddr),
|
||||
m_Type(_type),
|
||||
@@ -132,14 +119,11 @@ class RubyRequest : public Message
|
||||
m_contextId(_core_id),
|
||||
m_writeMask(_wm_size,_wm_mask,_atomicOps),
|
||||
m_WTData(_Data),
|
||||
m_wfid(_proc_id),
|
||||
m_scope(_scope),
|
||||
m_segment(_segment)
|
||||
m_wfid(_proc_id)
|
||||
{
|
||||
m_LineAddress = makeLineAddress(m_PhysicalAddress);
|
||||
}
|
||||
|
||||
|
||||
RubyRequest(Tick curTime) : Message(curTime) {}
|
||||
MsgPtr clone() const
|
||||
{ return std::shared_ptr<Message>(new RubyRequest(*this)); }
|
||||
|
||||
Reference in New Issue
Block a user