mem-ruby: update memory interfaces to support GPU ISA

This patch deprecates HSA-based memory request types and adds new
types that can be used by real ISA instructions.

Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28408
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Tuan Ta
2018-05-01 11:43:16 -04:00
committed by Anthony Gutierrez
parent 18e435546c
commit adc9de4d61
4 changed files with 45 additions and 131 deletions

View File

@@ -181,6 +181,10 @@ MemCmd::commandInfo[] =
{ 0, InvalidCmd, "Deprecated_MessageResp" },
/* MemFenceReq -- for synchronization requests */
{SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"},
/* MemSyncReq */
{SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"},
/* MemSyncResp */
{SET1(IsResponse), InvalidCmd, "MemSyncResp"},
/* MemFenceResp -- for synchronization responses */
{SET1(IsResponse), InvalidCmd, "MemFenceResp"},
/* Cache Clean Request -- Update with the latest data all existing

View File

@@ -110,6 +110,8 @@ class MemCmd
SwapResp,
// MessageReq and MessageResp are deprecated.
MemFenceReq = SwapResp + 3,
MemSyncReq, // memory synchronization request (e.g., cache invalidate)
MemSyncResp, // memory synchronization response
MemFenceResp,
CleanSharedReq,
CleanSharedResp,

View File

@@ -110,7 +110,7 @@ class Request
* STRICT_ORDER flag should be set if such reordering is
* undesirable.
*/
UNCACHEABLE = 0x00000400,
UNCACHEABLE = 0x00000400,
/**
* The request is required to be strictly ordered by <i>CPU
* models</i> and is non-speculative.
@@ -216,35 +216,30 @@ class Request
};
/** @} */
typedef uint32_t MemSpaceConfigFlagsType;
typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
typedef uint64_t CacheCoherenceFlagsType;
typedef ::Flags<CacheCoherenceFlagsType> CacheCoherenceFlags;
enum : MemSpaceConfigFlagsType {
/** Has a synchronization scope been set? */
SCOPE_VALID = 0x00000001,
/** Access has Wavefront scope visibility */
WAVEFRONT_SCOPE = 0x00000002,
/** Access has Workgroup scope visibility */
WORKGROUP_SCOPE = 0x00000004,
/** Access has Device (e.g., GPU) scope visibility */
DEVICE_SCOPE = 0x00000008,
/** Access has System (e.g., CPU + GPU) scope visibility */
SYSTEM_SCOPE = 0x00000010,
/** Global Segment */
GLOBAL_SEGMENT = 0x00000020,
/** Group Segment */
GROUP_SEGMENT = 0x00000040,
/** Private Segment */
PRIVATE_SEGMENT = 0x00000080,
/** Kergarg Segment */
KERNARG_SEGMENT = 0x00000100,
/** Readonly Segment */
READONLY_SEGMENT = 0x00000200,
/** Spill Segment */
SPILL_SEGMENT = 0x00000400,
/** Arg Segment */
ARG_SEGMENT = 0x00000800,
/**
* These bits are used to set the coherence policy
* for the GPU and are encoded in the GCN3 instructions.
* See the AMD GCN3 ISA Architecture Manual for more
* details.
*
* SLC: System Level Coherent. Accesses are forced to miss in
* the L2 cache and are coherent with system memory.
*
* GLC: Globally Coherent. Controls how reads and writes are
* handled by the L1 cache. Global here referes to the
* data being visible globally on the GPU (i.e., visible
* to all WGs).
*
* For atomics, the GLC bit is used to distinguish between
* between atomic return/no-return operations.
*/
enum : CacheCoherenceFlagsType {
/** user-policy flags */
SLC_BIT = 0x00000080,
GLC_BIT = 0x00000100,
};
using LocalAccessor =
@@ -305,8 +300,8 @@ class Request
/** Flag structure for the request. */
Flags _flags;
/** Memory space configuraiton flag structure for the request. */
MemSpaceConfigFlags _memSpaceConfigFlags;
/** Flags that control how downstream cache system maintains coherence*/
CacheCoherenceFlags _cacheCoherenceFlags;
/** Private flags for field validity checking. */
PrivateFlags privateFlags;
@@ -394,7 +389,7 @@ class Request
_byteEnable(other._byteEnable),
_masterId(other._masterId),
_flags(other._flags),
_memSpaceConfigFlags(other._memSpaceConfigFlags),
_cacheCoherenceFlags(other._cacheCoherenceFlags),
privateFlags(other.privateFlags),
_time(other._time),
_taskId(other._taskId), _vaddr(other._vaddr),
@@ -629,10 +624,11 @@ class Request
}
void
setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
setCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
{
// TODO: do mem_sync_op requests have valid paddr/vaddr?
assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
_memSpaceConfigFlags.set(extraFlags);
_cacheCoherenceFlags.set(extraFlags);
}
/** Accessor function for vaddr.*/
@@ -840,82 +836,10 @@ class Request
* Accessor functions for the memory space configuration flags and used by
* GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
* these are for testing only; setting extraFlags should be done via
* setMemSpaceConfigFlags().
* setCacheCoherenceFlags().
*/
bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
bool
isWavefrontScope() const
{
assert(isScoped());
return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
}
bool
isWorkgroupScope() const
{
assert(isScoped());
return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
}
bool
isDeviceScope() const
{
assert(isScoped());
return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
}
bool
isSystemScope() const
{
assert(isScoped());
return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
}
bool
isGlobalSegment() const
{
return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
(!isGroupSegment() && !isPrivateSegment() &&
!isKernargSegment() && !isReadonlySegment() &&
!isSpillSegment() && !isArgSegment());
}
bool
isGroupSegment() const
{
return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
}
bool
isPrivateSegment() const
{
return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
}
bool
isKernargSegment() const
{
return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
}
bool
isReadonlySegment() const
{
return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
}
bool
isSpillSegment() const
{
return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
}
bool
isArgSegment() const
{
return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
}
bool isSLC() const { return _cacheCoherenceFlags.isSet(SLC_BIT); }
bool isGLC() const { return _cacheCoherenceFlags.isSet(GLC_BIT); }
/**
* Accessor functions to determine whether this request is part of

View File

@@ -58,16 +58,11 @@ class RubyRequest : public Message
WriteMask m_writeMask;
DataBlock m_WTData;
int m_wfid;
HSAScope m_scope;
HSASegment m_segment;
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
ContextID _proc_id = 100, ContextID _core_id = 99,
HSAScope _scope = HSAScope_UNSPECIFIED,
HSASegment _segment = HSASegment_GLOBAL)
ContextID _proc_id = 100, ContextID _core_id = 99)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
@@ -77,9 +72,7 @@ class RubyRequest : public Message
m_Prefetch(_pb),
data(_data),
m_pkt(_pkt),
m_contextId(_core_id),
m_scope(_scope),
m_segment(_segment)
m_contextId(_core_id)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
@@ -89,9 +82,7 @@ class RubyRequest : public Message
RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
unsigned _proc_id, unsigned _core_id,
int _wm_size, std::vector<bool> & _wm_mask,
DataBlock & _Data,
HSAScope _scope = HSAScope_UNSPECIFIED,
HSASegment _segment = HSASegment_GLOBAL)
DataBlock & _Data)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
@@ -104,9 +95,7 @@ class RubyRequest : public Message
m_contextId(_core_id),
m_writeMask(_wm_size,_wm_mask),
m_WTData(_Data),
m_wfid(_proc_id),
m_scope(_scope),
m_segment(_segment)
m_wfid(_proc_id)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
@@ -117,9 +106,7 @@ class RubyRequest : public Message
unsigned _proc_id, unsigned _core_id,
int _wm_size, std::vector<bool> & _wm_mask,
DataBlock & _Data,
std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
HSAScope _scope = HSAScope_UNSPECIFIED,
HSASegment _segment = HSASegment_GLOBAL)
std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
@@ -132,14 +119,11 @@ class RubyRequest : public Message
m_contextId(_core_id),
m_writeMask(_wm_size,_wm_mask,_atomicOps),
m_WTData(_Data),
m_wfid(_proc_id),
m_scope(_scope),
m_segment(_segment)
m_wfid(_proc_id)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
RubyRequest(Tick curTime) : Message(curTime) {}
MsgPtr clone() const
{ return std::shared_ptr<Message>(new RubyRequest(*this)); }