diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 2d69ba2978..1c1da212d5 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -181,6 +181,10 @@ MemCmd::commandInfo[] =
{ 0, InvalidCmd, "Deprecated_MessageResp" },
/* MemFenceReq -- for synchronization requests */
{SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"},
+ /* MemSyncReq */
+ {SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"},
+ /* MemSyncResp */
+ {SET1(IsResponse), InvalidCmd, "MemSyncResp"},
/* MemFenceResp -- for synchronization responses */
{SET1(IsResponse), InvalidCmd, "MemFenceResp"},
/* Cache Clean Request -- Update with the latest data all existing
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index d390c0092c..42d286a5e5 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -110,6 +110,8 @@ class MemCmd
SwapResp,
// MessageReq and MessageResp are deprecated.
MemFenceReq = SwapResp + 3,
+ MemSyncReq, // memory synchronization request (e.g., cache invalidate)
+ MemSyncResp, // memory synchronization response
MemFenceResp,
CleanSharedReq,
CleanSharedResp,
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 01252bf668..4e0ba974cb 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -110,7 +110,7 @@ class Request
* STRICT_ORDER flag should be set if such reordering is
* undesirable.
*/
- UNCACHEABLE = 0x00000400,
+ UNCACHEABLE = 0x00000400,
/**
* The request is required to be strictly ordered by CPU
* models and is non-speculative.
@@ -216,35 +216,30 @@ class Request
};
/** @} */
- typedef uint32_t MemSpaceConfigFlagsType;
- typedef ::Flags MemSpaceConfigFlags;
+ typedef uint64_t CacheCoherenceFlagsType;
+ typedef ::Flags CacheCoherenceFlags;
- enum : MemSpaceConfigFlagsType {
- /** Has a synchronization scope been set? */
- SCOPE_VALID = 0x00000001,
- /** Access has Wavefront scope visibility */
- WAVEFRONT_SCOPE = 0x00000002,
- /** Access has Workgroup scope visibility */
- WORKGROUP_SCOPE = 0x00000004,
- /** Access has Device (e.g., GPU) scope visibility */
- DEVICE_SCOPE = 0x00000008,
- /** Access has System (e.g., CPU + GPU) scope visibility */
- SYSTEM_SCOPE = 0x00000010,
-
- /** Global Segment */
- GLOBAL_SEGMENT = 0x00000020,
- /** Group Segment */
- GROUP_SEGMENT = 0x00000040,
- /** Private Segment */
- PRIVATE_SEGMENT = 0x00000080,
- /** Kergarg Segment */
- KERNARG_SEGMENT = 0x00000100,
- /** Readonly Segment */
- READONLY_SEGMENT = 0x00000200,
- /** Spill Segment */
- SPILL_SEGMENT = 0x00000400,
- /** Arg Segment */
- ARG_SEGMENT = 0x00000800,
+ /**
+ * These bits are used to set the coherence policy
+ * for the GPU and are encoded in the GCN3 instructions.
+ * See the AMD GCN3 ISA Architecture Manual for more
+ * details.
+ *
+ * SLC: System Level Coherent. Accesses are forced to miss in
+ * the L2 cache and are coherent with system memory.
+ *
+ * GLC: Globally Coherent. Controls how reads and writes are
+ * handled by the L1 cache. Global here referes to the
+ * data being visible globally on the GPU (i.e., visible
+ * to all WGs).
+ *
+ * For atomics, the GLC bit is used to distinguish between
+ * between atomic return/no-return operations.
+ */
+ enum : CacheCoherenceFlagsType {
+ /** user-policy flags */
+ SLC_BIT = 0x00000080,
+ GLC_BIT = 0x00000100,
};
using LocalAccessor =
@@ -305,8 +300,8 @@ class Request
/** Flag structure for the request. */
Flags _flags;
- /** Memory space configuraiton flag structure for the request. */
- MemSpaceConfigFlags _memSpaceConfigFlags;
+ /** Flags that control how downstream cache system maintains coherence*/
+ CacheCoherenceFlags _cacheCoherenceFlags;
/** Private flags for field validity checking. */
PrivateFlags privateFlags;
@@ -394,7 +389,7 @@ class Request
_byteEnable(other._byteEnable),
_masterId(other._masterId),
_flags(other._flags),
- _memSpaceConfigFlags(other._memSpaceConfigFlags),
+ _cacheCoherenceFlags(other._cacheCoherenceFlags),
privateFlags(other.privateFlags),
_time(other._time),
_taskId(other._taskId), _vaddr(other._vaddr),
@@ -629,10 +624,11 @@ class Request
}
void
- setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
+ setCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
{
+ // TODO: do mem_sync_op requests have valid paddr/vaddr?
assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
- _memSpaceConfigFlags.set(extraFlags);
+ _cacheCoherenceFlags.set(extraFlags);
}
/** Accessor function for vaddr.*/
@@ -840,82 +836,10 @@ class Request
* Accessor functions for the memory space configuration flags and used by
* GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
* these are for testing only; setting extraFlags should be done via
- * setMemSpaceConfigFlags().
+ * setCacheCoherenceFlags().
*/
- bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
-
- bool
- isWavefrontScope() const
- {
- assert(isScoped());
- return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
- }
-
- bool
- isWorkgroupScope() const
- {
- assert(isScoped());
- return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
- }
-
- bool
- isDeviceScope() const
- {
- assert(isScoped());
- return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
- }
-
- bool
- isSystemScope() const
- {
- assert(isScoped());
- return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
- }
-
- bool
- isGlobalSegment() const
- {
- return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
- (!isGroupSegment() && !isPrivateSegment() &&
- !isKernargSegment() && !isReadonlySegment() &&
- !isSpillSegment() && !isArgSegment());
- }
-
- bool
- isGroupSegment() const
- {
- return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
- }
-
- bool
- isPrivateSegment() const
- {
- return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
- }
-
- bool
- isKernargSegment() const
- {
- return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
- }
-
- bool
- isReadonlySegment() const
- {
- return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
- }
-
- bool
- isSpillSegment() const
- {
- return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
- }
-
- bool
- isArgSegment() const
- {
- return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
- }
+ bool isSLC() const { return _cacheCoherenceFlags.isSet(SLC_BIT); }
+ bool isGLC() const { return _cacheCoherenceFlags.isSet(GLC_BIT); }
/**
* Accessor functions to determine whether this request is part of
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index f6b25bf9a0..68b11f55dc 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -58,16 +58,11 @@ class RubyRequest : public Message
WriteMask m_writeMask;
DataBlock m_WTData;
int m_wfid;
- HSAScope m_scope;
- HSASegment m_segment;
-
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
- ContextID _proc_id = 100, ContextID _core_id = 99,
- HSAScope _scope = HSAScope_UNSPECIFIED,
- HSASegment _segment = HSASegment_GLOBAL)
+ ContextID _proc_id = 100, ContextID _core_id = 99)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
@@ -77,9 +72,7 @@ class RubyRequest : public Message
m_Prefetch(_pb),
data(_data),
m_pkt(_pkt),
- m_contextId(_core_id),
- m_scope(_scope),
- m_segment(_segment)
+ m_contextId(_core_id)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
@@ -89,9 +82,7 @@ class RubyRequest : public Message
RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
unsigned _proc_id, unsigned _core_id,
int _wm_size, std::vector & _wm_mask,
- DataBlock & _Data,
- HSAScope _scope = HSAScope_UNSPECIFIED,
- HSASegment _segment = HSASegment_GLOBAL)
+ DataBlock & _Data)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
@@ -104,9 +95,7 @@ class RubyRequest : public Message
m_contextId(_core_id),
m_writeMask(_wm_size,_wm_mask),
m_WTData(_Data),
- m_wfid(_proc_id),
- m_scope(_scope),
- m_segment(_segment)
+ m_wfid(_proc_id)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
@@ -117,9 +106,7 @@ class RubyRequest : public Message
unsigned _proc_id, unsigned _core_id,
int _wm_size, std::vector & _wm_mask,
DataBlock & _Data,
- std::vector< std::pair > _atomicOps,
- HSAScope _scope = HSAScope_UNSPECIFIED,
- HSASegment _segment = HSASegment_GLOBAL)
+ std::vector< std::pair > _atomicOps)
: Message(curTime),
m_PhysicalAddress(_paddr),
m_Type(_type),
@@ -132,14 +119,11 @@ class RubyRequest : public Message
m_contextId(_core_id),
m_writeMask(_wm_size,_wm_mask,_atomicOps),
m_WTData(_Data),
- m_wfid(_proc_id),
- m_scope(_scope),
- m_segment(_segment)
+ m_wfid(_proc_id)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
}
-
RubyRequest(Tick curTime) : Message(curTime) {}
MsgPtr clone() const
{ return std::shared_ptr(new RubyRequest(*this)); }