gpu-compute, mem-ruby, configs: Add GCN3 ISA support to GPU model

Change-Id: Ibe46970f3ba25d62ca2ade5cbc2054ad746b2254 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29912 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
2018-05-01 16:59:35 -04:00
parent b0eac7857a
commit b8da9abba7
86 changed files with 10299 additions and 3734 deletions
--- a/src/gpu-compute/fetch_unit.hh
+++ b/src/gpu-compute/fetch_unit.hh
@@ -36,7 +36,6 @@

 #include <string>
 #include <utility>
-#include <vector>

 #include "arch/gpu_decoder.hh"
 #include "base/statistics.hh"
@@ -58,9 +57,170 @@ class FetchUnit
    void initiateFetch(Wavefront *wavefront);
    void fetch(PacketPtr pkt, Wavefront *wavefront);
    void processFetchReturn(PacketPtr pkt);
+    void flushBuf(int wfSlotId);
    static uint32_t globalFetchUnitID;

  private:
+    /**
+     * fetch buffer descriptor. holds buffered
+     * instruction data in the fetch unit.
+     */
+    class FetchBufDesc
+    {
+      public:
+        FetchBufDesc() : bufStart(nullptr), bufEnd(nullptr),
+            readPtr(nullptr), fetchDepth(0), maxIbSize(0), maxFbSize(0),
+            cacheLineSize(0), restartFromBranch(false), wavefront(nullptr),
+            _decoder(nullptr)
+        {
+        }
+
+        ~FetchBufDesc()
+        {
+            delete[] bufStart;
+        }
+
+        /**
+         * allocate the fetch buffer space, and set the fetch depth
+         * (number of lines that may be buffered), fetch size
+         * (cache line size), and parent WF for this fetch buffer.
+         */
+        void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf);
+
+        int
+        bufferedAndReservedLines() const
+        {
+            return bufferedLines() + reservedLines();
+        }
+
+        int bufferedLines() const { return bufferedPCs.size(); }
+        int bufferedBytes() const { return bufferedLines() * cacheLineSize; }
+        int reservedLines() const { return reservedPCs.size(); }
+        bool hasFreeSpace() const { return !freeList.empty(); }
+        void flushBuf();
+        Addr nextFetchAddr();
+
+        /**
+         * reserve an entry in the fetch buffer for PC = vaddr,
+         */
+        void reserveBuf(Addr vaddr);
+
+        /**
+         * return a pointer to the raw fetch buffer data.
+         * this allows the fetch pkt to use this data directly
+         * to avoid unnecessary memcpy and malloc/new.
+         */
+        uint8_t*
+        reservedBuf(Addr vaddr) const
+        {
+            auto reserved_pc = reservedPCs.find(vaddr);
+            assert(reserved_pc != reservedPCs.end());
+            assert(reserved_pc == reservedPCs.begin());
+
+            return reserved_pc->second;
+        }
+
+        void fetchDone(Addr vaddr);
+
+        /**
+         * checks if the buffer contains valid data. this essentially
+         * tells fetch when there is data remaining that needs to be
+         * decoded into the WF's IB.
+         */
+        bool hasFetchDataToProcess() const;
+
+        /**
+         * each time the fetch stage is ticked, we check if there
+         * are any data in the fetch buffer that may be decoded and
+         * sent to the IB. because we are modeling the fetch buffer
+         * as a circular buffer, it is possible that an instruction
+         * can straddle the end/beginning of the fetch buffer, so
+         * decodeSplitInsts() handles that case.
+         */
+        void decodeInsts();
+
+        /**
+         * checks if the wavefront can release any of its fetch
+         * buffer entries. this will occur when the WF's PC goes
+         * beyond any of the currently buffered cache lines.
+         */
+        void checkWaveReleaseBuf();
+
+        void
+        decoder(TheGpuISA::Decoder *dec)
+        {
+            _decoder = dec;
+        }
+
+        bool
+        pcBuffered(Addr pc) const
+        {
+            bool buffered = bufferedPCs.find(pc) != bufferedPCs.end()
+                            && reservedPCs.find(pc) != reservedPCs.end();
+
+            return buffered;
+        }
+
+        /**
+         * calculates the number of fetched bytes that have yet
+         * to be decoded.
+         */
+        int fetchBytesRemaining() const;
+
+      private:
+        void decodeSplitInst();
+
+        /**
+         * check if the next instruction to be processed out of
+         * the fetch buffer is split across the end/beginning of
+         * the fetch buffer.
+         */
+        bool splitDecode() const;
+
+        /**
+         * the set of PCs (fetch addresses) that are currently
+         * buffered. bufferedPCs are valid, reservedPCs are
+         * waiting for their buffers to be filled with valid
+         * fetch data.
+         */
+        std::map<Addr, uint8_t*> bufferedPCs;
+        std::map<Addr, uint8_t*> reservedPCs;
+
+        /**
+         * represents the fetch buffer free list. holds buffer space
+         * that is currently free. each pointer in this array must
+         * have enough space to hold a cache line. in reality we
+         * have one actual fetch buffer: 'bufStart', these pointers
+         * point to addresses within bufStart that are aligned to the
+         * cache line size.
+         */
+        std::deque<uint8_t*> freeList;
+
+        /**
+         * raw instruction buffer. holds cache line data associated with
+         * the set of PCs (fetch addresses) that are buffered here.
+         */
+        uint8_t *bufStart;
+        uint8_t *bufEnd;
+        /**
+         * pointer that points to the next chunk of inst data to be
+         * decoded.
+         */
+        uint8_t *readPtr;
+        // how many lines the fetch unit may buffer
+        int fetchDepth;
+        // maximum size (in number of insts) of the WF's IB
+        int maxIbSize;
+        // maximum size (in bytes) of this fetch buffer
+        int maxFbSize;
+        int cacheLineSize;
+        int cacheLineBits;
+        bool restartFromBranch;
+        // wavefront whose IB is serviced by this fetch buffer
+        Wavefront *wavefront;
+        TheGpuISA::Decoder *_decoder;
+    };
+
    bool timingSim;
    ComputeUnit *computeUnit;
    TheGpuISA::Decoder decoder;
@@ -82,6 +242,15 @@ class FetchUnit

    // Pointer to list of waves dispatched on to this SIMD unit
    std::vector<Wavefront*> *waveList;
+    // holds the fetch buffers. each wave has 1 entry.
+    std::vector<FetchBufDesc> fetchBuf;
+    /**
+     * number of cache lines we can fetch and buffer.
+     * this includes the currently fetched line (i.e., the
+     * line that corresponds to the WF's current PC), as
+     * well as any lines that may be prefetched.
+     */
+    int fetchDepth;
 };

 #endif // __FETCH_UNIT_HH__