Change-Id: I545ff03041e8fe66dc489c6aa95c009e54df0970 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38995 Reviewed-by: Gabe Black <gabe.black@gmail.com> Maintainer: Gabe Black <gabe.black@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
273 lines
8.8 KiB
C++
273 lines
8.8 KiB
C++
/*
|
|
* Copyright (c) 2014-2017 Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* For use for simulation and test purposes only
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* 3. Neither the name of the copyright holder nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef __FETCH_UNIT_HH__
|
|
#define __FETCH_UNIT_HH__
|
|
|
|
#include <cassert>
|
|
#include <cstdint>
|
|
#include <deque>
|
|
#include <map>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "arch/gpu_decoder.hh"
|
|
#include "base/types.hh"
|
|
#include "config/the_gpu_isa.hh"
|
|
#include "gpu-compute/scheduler.hh"
|
|
#include "mem/packet.hh"
|
|
|
|
class ComputeUnit;
|
|
class Wavefront;
|
|
|
|
class FetchUnit
|
|
{
|
|
public:
|
|
FetchUnit(const ComputeUnitParams &p, ComputeUnit &cu);
|
|
~FetchUnit();
|
|
void init();
|
|
void exec();
|
|
void bindWaveList(std::vector<Wavefront*> *list);
|
|
void initiateFetch(Wavefront *wavefront);
|
|
void fetch(PacketPtr pkt, Wavefront *wavefront);
|
|
void processFetchReturn(PacketPtr pkt);
|
|
void flushBuf(int wfSlotId);
|
|
static uint32_t globalFetchUnitID;
|
|
|
|
private:
|
|
/**
|
|
* fetch buffer descriptor. holds buffered
|
|
* instruction data in the fetch unit.
|
|
*/
|
|
class FetchBufDesc
|
|
{
|
|
public:
|
|
FetchBufDesc() : bufStart(nullptr), bufEnd(nullptr),
|
|
readPtr(nullptr), fetchDepth(0), maxIbSize(0), maxFbSize(0),
|
|
cacheLineSize(0), restartFromBranch(false), wavefront(nullptr),
|
|
_decoder(nullptr)
|
|
{
|
|
}
|
|
|
|
~FetchBufDesc()
|
|
{
|
|
delete[] bufStart;
|
|
}
|
|
|
|
/**
|
|
* allocate the fetch buffer space, and set the fetch depth
|
|
* (number of lines that may be buffered), fetch size
|
|
* (cache line size), and parent WF for this fetch buffer.
|
|
*/
|
|
void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf);
|
|
|
|
int
|
|
bufferedAndReservedLines() const
|
|
{
|
|
return bufferedLines() + reservedLines();
|
|
}
|
|
|
|
int bufferedLines() const { return bufferedPCs.size(); }
|
|
int bufferedBytes() const { return bufferedLines() * cacheLineSize; }
|
|
int reservedLines() const { return reservedPCs.size(); }
|
|
bool hasFreeSpace() const { return !freeList.empty(); }
|
|
void flushBuf();
|
|
Addr nextFetchAddr();
|
|
|
|
/**
|
|
* reserve an entry in the fetch buffer for PC = vaddr,
|
|
*/
|
|
void reserveBuf(Addr vaddr);
|
|
|
|
/**
|
|
* return a pointer to the raw fetch buffer data.
|
|
* this allows the fetch pkt to use this data directly
|
|
* to avoid unnecessary memcpy and malloc/new.
|
|
*/
|
|
uint8_t*
|
|
reservedBuf(Addr vaddr) const
|
|
{
|
|
auto reserved_pc = reservedPCs.find(vaddr);
|
|
assert(reserved_pc != reservedPCs.end());
|
|
assert(reserved_pc == reservedPCs.begin());
|
|
|
|
return reserved_pc->second;
|
|
}
|
|
|
|
/**
|
|
* returns true if there is an entry reserved for this address,
|
|
* and false otherwise
|
|
*/
|
|
bool
|
|
isReserved(Addr vaddr) const
|
|
{
|
|
auto reserved_pc = reservedPCs.find(vaddr);
|
|
bool is_reserved = (reserved_pc != reservedPCs.end());
|
|
return is_reserved;
|
|
}
|
|
|
|
void fetchDone(Addr vaddr);
|
|
|
|
/**
|
|
* checks if the buffer contains valid data. this essentially
|
|
* tells fetch when there is data remaining that needs to be
|
|
* decoded into the WF's IB.
|
|
*/
|
|
bool hasFetchDataToProcess() const;
|
|
|
|
/**
|
|
* each time the fetch stage is ticked, we check if there
|
|
* are any data in the fetch buffer that may be decoded and
|
|
* sent to the IB. because we are modeling the fetch buffer
|
|
* as a circular buffer, it is possible that an instruction
|
|
* can straddle the end/beginning of the fetch buffer, so
|
|
* decodeSplitInsts() handles that case.
|
|
*/
|
|
void decodeInsts();
|
|
|
|
/**
|
|
* checks if the wavefront can release any of its fetch
|
|
* buffer entries. this will occur when the WF's PC goes
|
|
* beyond any of the currently buffered cache lines.
|
|
*/
|
|
void checkWaveReleaseBuf();
|
|
|
|
void
|
|
decoder(TheGpuISA::Decoder *dec)
|
|
{
|
|
_decoder = dec;
|
|
}
|
|
|
|
bool
|
|
pcBuffered(Addr pc) const
|
|
{
|
|
bool buffered = bufferedPCs.find(pc) != bufferedPCs.end()
|
|
&& reservedPCs.find(pc) != reservedPCs.end();
|
|
|
|
return buffered;
|
|
}
|
|
|
|
/**
|
|
* calculates the number of fetched bytes that have yet
|
|
* to be decoded.
|
|
*/
|
|
int fetchBytesRemaining() const;
|
|
|
|
private:
|
|
void decodeSplitInst();
|
|
|
|
/**
|
|
* check if the next instruction to be processed out of
|
|
* the fetch buffer is split across the end/beginning of
|
|
* the fetch buffer.
|
|
*/
|
|
bool splitDecode() const;
|
|
|
|
/**
|
|
* the set of PCs (fetch addresses) that are currently
|
|
* buffered. bufferedPCs are valid, reservedPCs are
|
|
* waiting for their buffers to be filled with valid
|
|
* fetch data.
|
|
*/
|
|
std::map<Addr, uint8_t*> bufferedPCs;
|
|
std::map<Addr, uint8_t*> reservedPCs;
|
|
|
|
/**
|
|
* represents the fetch buffer free list. holds buffer space
|
|
* that is currently free. each pointer in this array must
|
|
* have enough space to hold a cache line. in reality we
|
|
* have one actual fetch buffer: 'bufStart', these pointers
|
|
* point to addresses within bufStart that are aligned to the
|
|
* cache line size.
|
|
*/
|
|
std::deque<uint8_t*> freeList;
|
|
|
|
/**
|
|
* raw instruction buffer. holds cache line data associated with
|
|
* the set of PCs (fetch addresses) that are buffered here.
|
|
*/
|
|
uint8_t *bufStart;
|
|
uint8_t *bufEnd;
|
|
/**
|
|
* pointer that points to the next chunk of inst data to be
|
|
* decoded.
|
|
*/
|
|
uint8_t *readPtr;
|
|
// how many lines the fetch unit may buffer
|
|
int fetchDepth;
|
|
// maximum size (in number of insts) of the WF's IB
|
|
int maxIbSize;
|
|
// maximum size (in bytes) of this fetch buffer
|
|
int maxFbSize;
|
|
int cacheLineSize;
|
|
int cacheLineBits;
|
|
bool restartFromBranch;
|
|
// wavefront whose IB is serviced by this fetch buffer
|
|
Wavefront *wavefront;
|
|
TheGpuISA::Decoder *_decoder;
|
|
};
|
|
|
|
bool timingSim;
|
|
ComputeUnit &computeUnit;
|
|
TheGpuISA::Decoder decoder;
|
|
|
|
// Fetch scheduler; Selects one wave from
|
|
// the fetch queue for instruction fetching.
|
|
// The selection is made according to
|
|
// a scheduling policy
|
|
Scheduler fetchScheduler;
|
|
|
|
// Stores the list of waves that are
|
|
// ready to be fetched this cycle
|
|
std::vector<Wavefront*> fetchQueue;
|
|
|
|
// Stores the fetch status of all waves dispatched to this SIMD.
|
|
// TRUE implies the wave is ready to fetch and is already
|
|
// moved to fetchQueue
|
|
std::vector<std::pair<Wavefront*, bool>> fetchStatusQueue;
|
|
|
|
// Pointer to list of waves dispatched on to this SIMD unit
|
|
std::vector<Wavefront*> *waveList;
|
|
// holds the fetch buffers. each wave has 1 entry.
|
|
std::vector<FetchBufDesc> fetchBuf;
|
|
/**
|
|
* number of cache lines we can fetch and buffer.
|
|
* this includes the currently fetched line (i.e., the
|
|
* line that corresponds to the WF's current PC), as
|
|
* well as any lines that may be prefetched.
|
|
*/
|
|
int fetchDepth;
|
|
};
|
|
|
|
#endif // __FETCH_UNIT_HH__
|