mem-ruby: update memory interfaces to support GPU ISA

This patch deprecates HSA-based memory request types and adds new types that can be used by real ISA instructions. Change-Id: Ie107a69d8a35e9de0853f1407392ad01a8b3e930 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28408 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
2018-05-01 11:43:16 -04:00
parent 18e435546c
commit adc9de4d61
4 changed files with 45 additions and 131 deletions
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -181,6 +181,10 @@ MemCmd::commandInfo[] =
    { 0, InvalidCmd, "Deprecated_MessageResp" },
    /* MemFenceReq -- for synchronization requests */
    {SET2(IsRequest, NeedsResponse), MemFenceResp, "MemFenceReq"},
+    /* MemSyncReq */
+    {SET2(IsRequest, NeedsResponse), MemSyncResp, "MemSyncReq"},
+    /* MemSyncResp */
+    {SET1(IsResponse), InvalidCmd, "MemSyncResp"},
    /* MemFenceResp -- for synchronization responses */
    {SET1(IsResponse), InvalidCmd, "MemFenceResp"},
    /* Cache Clean Request -- Update with the latest data all existing
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -110,6 +110,8 @@ class MemCmd
        SwapResp,
        // MessageReq and MessageResp are deprecated.
        MemFenceReq = SwapResp + 3,
+        MemSyncReq,  // memory synchronization request (e.g., cache invalidate)
+        MemSyncResp, // memory synchronization response
        MemFenceResp,
        CleanSharedReq,
        CleanSharedResp,
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -110,7 +110,7 @@ class Request
         * STRICT_ORDER flag should be set if such reordering is
         * undesirable.
         */
-        UNCACHEABLE                = 0x00000400,
+        UNCACHEABLE                 = 0x00000400,
        /**
         * The request is required to be strictly ordered by <i>CPU
         * models</i> and is non-speculative.
@@ -216,35 +216,30 @@ class Request
    };
    /** @} */

-    typedef uint32_t MemSpaceConfigFlagsType;
-    typedef ::Flags<MemSpaceConfigFlagsType> MemSpaceConfigFlags;
+    typedef uint64_t CacheCoherenceFlagsType;
+    typedef ::Flags<CacheCoherenceFlagsType> CacheCoherenceFlags;

-    enum : MemSpaceConfigFlagsType {
-        /** Has a synchronization scope been set? */
-        SCOPE_VALID            = 0x00000001,
-        /** Access has Wavefront scope visibility */
-        WAVEFRONT_SCOPE        = 0x00000002,
-        /** Access has Workgroup scope visibility */
-        WORKGROUP_SCOPE        = 0x00000004,
-        /** Access has Device (e.g., GPU) scope visibility */
-        DEVICE_SCOPE           = 0x00000008,
-        /** Access has System (e.g., CPU + GPU) scope visibility */
-        SYSTEM_SCOPE           = 0x00000010,
-
-        /** Global Segment */
-        GLOBAL_SEGMENT         = 0x00000020,
-        /** Group Segment */
-        GROUP_SEGMENT          = 0x00000040,
-        /** Private Segment */
-        PRIVATE_SEGMENT        = 0x00000080,
-        /** Kergarg Segment */
-        KERNARG_SEGMENT        = 0x00000100,
-        /** Readonly Segment */
-        READONLY_SEGMENT       = 0x00000200,
-        /** Spill Segment */
-        SPILL_SEGMENT          = 0x00000400,
-        /** Arg Segment */
-        ARG_SEGMENT            = 0x00000800,
+    /**
+     * These bits are used to set the coherence policy
+     * for the GPU and are encoded in the GCN3 instructions.
+     * See the AMD GCN3 ISA Architecture Manual for more
+     * details.
+     *
+     * SLC: System Level Coherent. Accesses are forced to miss in
+     *      the L2 cache and are coherent with system memory.
+     *
+     * GLC: Globally Coherent. Controls how reads and writes are
+     *      handled by the L1 cache. Global here referes to the
+     *      data being visible globally on the GPU (i.e., visible
+     *      to all WGs).
+     *
+     * For atomics, the GLC bit is used to distinguish between
+     * between atomic return/no-return operations.
+     */
+    enum : CacheCoherenceFlagsType {
+        /** user-policy flags */
+        SLC_BIT                 = 0x00000080,
+        GLC_BIT                 = 0x00000100,
    };

    using LocalAccessor =
@@ -305,8 +300,8 @@ class Request
    /** Flag structure for the request. */
    Flags _flags;

-    /** Memory space configuraiton flag structure for the request. */
-    MemSpaceConfigFlags _memSpaceConfigFlags;
+    /** Flags that control how downstream cache system maintains coherence*/
+    CacheCoherenceFlags _cacheCoherenceFlags;

    /** Private flags for field validity checking. */
    PrivateFlags privateFlags;
@@ -394,7 +389,7 @@ class Request
          _byteEnable(other._byteEnable),
          _masterId(other._masterId),
          _flags(other._flags),
-          _memSpaceConfigFlags(other._memSpaceConfigFlags),
+          _cacheCoherenceFlags(other._cacheCoherenceFlags),
          privateFlags(other.privateFlags),
          _time(other._time),
          _taskId(other._taskId), _vaddr(other._vaddr),
@@ -629,10 +624,11 @@ class Request
    }

    void
-    setMemSpaceConfigFlags(MemSpaceConfigFlags extraFlags)
+    setCacheCoherenceFlags(CacheCoherenceFlags extraFlags)
    {
+        // TODO: do mem_sync_op requests have valid paddr/vaddr?
        assert(privateFlags.isSet(VALID_PADDR | VALID_VADDR));
-        _memSpaceConfigFlags.set(extraFlags);
+        _cacheCoherenceFlags.set(extraFlags);
    }

    /** Accessor function for vaddr.*/
@@ -840,82 +836,10 @@ class Request
     * Accessor functions for the memory space configuration flags and used by
     * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
     * these are for testing only; setting extraFlags should be done via
-     * setMemSpaceConfigFlags().
+     * setCacheCoherenceFlags().
     */
-    bool isScoped() const { return _memSpaceConfigFlags.isSet(SCOPE_VALID); }
-
-    bool
-    isWavefrontScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(WAVEFRONT_SCOPE);
-    }
-
-    bool
-    isWorkgroupScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(WORKGROUP_SCOPE);
-    }
-
-    bool
-    isDeviceScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(DEVICE_SCOPE);
-    }
-
-    bool
-    isSystemScope() const
-    {
-        assert(isScoped());
-        return _memSpaceConfigFlags.isSet(SYSTEM_SCOPE);
-    }
-
-    bool
-    isGlobalSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(GLOBAL_SEGMENT) ||
-               (!isGroupSegment() && !isPrivateSegment() &&
-                !isKernargSegment() && !isReadonlySegment() &&
-                !isSpillSegment() && !isArgSegment());
-    }
-
-    bool
-    isGroupSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(GROUP_SEGMENT);
-    }
-
-    bool
-    isPrivateSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(PRIVATE_SEGMENT);
-    }
-
-    bool
-    isKernargSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(KERNARG_SEGMENT);
-    }
-
-    bool
-    isReadonlySegment() const
-    {
-        return _memSpaceConfigFlags.isSet(READONLY_SEGMENT);
-    }
-
-    bool
-    isSpillSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(SPILL_SEGMENT);
-    }
-
-    bool
-    isArgSegment() const
-    {
-        return _memSpaceConfigFlags.isSet(ARG_SEGMENT);
-    }
+    bool isSLC() const { return _cacheCoherenceFlags.isSet(SLC_BIT); }
+    bool isGLC() const { return _cacheCoherenceFlags.isSet(GLC_BIT); }

    /**
     * Accessor functions to determine whether this request is part of
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -58,16 +58,11 @@ class RubyRequest : public Message
    WriteMask m_writeMask;
    DataBlock m_WTData;
    int m_wfid;
-    HSAScope m_scope;
-    HSASegment m_segment;
-

    RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
        uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
        PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
-        ContextID _proc_id = 100, ContextID _core_id = 99,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        ContextID _proc_id = 100, ContextID _core_id = 99)
        : Message(curTime),
          m_PhysicalAddress(_paddr),
          m_Type(_type),
@@ -77,9 +72,7 @@ class RubyRequest : public Message
          m_Prefetch(_pb),
          data(_data),
          m_pkt(_pkt),
-          m_contextId(_core_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_contextId(_core_id)
    {
        m_LineAddress = makeLineAddress(m_PhysicalAddress);
    }
@@ -89,9 +82,7 @@ class RubyRequest : public Message
        RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
        unsigned _proc_id, unsigned _core_id,
        int _wm_size, std::vector<bool> & _wm_mask,
-        DataBlock & _Data,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        DataBlock & _Data)
        : Message(curTime),
          m_PhysicalAddress(_paddr),
          m_Type(_type),
@@ -104,9 +95,7 @@ class RubyRequest : public Message
          m_contextId(_core_id),
          m_writeMask(_wm_size,_wm_mask),
          m_WTData(_Data),
-          m_wfid(_proc_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_wfid(_proc_id)
    {
        m_LineAddress = makeLineAddress(m_PhysicalAddress);
    }
@@ -117,9 +106,7 @@ class RubyRequest : public Message
        unsigned _proc_id, unsigned _core_id,
        int _wm_size, std::vector<bool> & _wm_mask,
        DataBlock & _Data,
-        std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
-        HSAScope _scope = HSAScope_UNSPECIFIED,
-        HSASegment _segment = HSASegment_GLOBAL)
+        std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps)
        : Message(curTime),
          m_PhysicalAddress(_paddr),
          m_Type(_type),
@@ -132,14 +119,11 @@ class RubyRequest : public Message
          m_contextId(_core_id),
          m_writeMask(_wm_size,_wm_mask,_atomicOps),
          m_WTData(_Data),
-          m_wfid(_proc_id),
-          m_scope(_scope),
-          m_segment(_segment)
+          m_wfid(_proc_id)
    {
        m_LineAddress = makeLineAddress(m_PhysicalAddress);
    }

-
    RubyRequest(Tick curTime) : Message(curTime) {}
    MsgPtr clone() const
    { return std::shared_ptr<Message>(new RubyRequest(*this)); }