The create() method on Params structs usually instantiate SimObjects using a constructor which takes the Params struct as a parameter somehow. There has been a lot of needless variation in how that was done, making it annoying to pass Params down to base classes. Some of the different forms were: const Params & Params & Params * const Params * Params const* This change goes through and fixes up every constructor and every create() method to use the const Params & form. We use a reference because the Params struct should never be null. We use const because neither the create method nor the consuming object should modify the record of the parameters as they came in from the config. That would make consuming them not idempotent, and make it impossible to tell what the actual simulation configuration was since it would change from any user visible form (config script, config.ini, dot pdf output). Change-Id: I77453cba52fdcfd5f4eec92dfb0bddb5a9945f31 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/35938 Reviewed-by: Gabe Black <gabeblack@google.com> Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Maintainer: Gabe Black <gabeblack@google.com> Tested-by: kokoro <noreply+kokoro@google.com>
363 lines
12 KiB
C++
363 lines
12 KiB
C++
/*
|
|
* Copyright (c) 2011-2017 Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* For use for simulation and test purposes only
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* 3. Neither the name of the copyright holder nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef __GPU_COMPUTE_WAVEFRONT_HH__
|
|
#define __GPU_COMPUTE_WAVEFRONT_HH__
|
|
|
|
#include <cassert>
|
|
#include <deque>
|
|
#include <list>
|
|
#include <memory>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include "arch/gpu_isa.hh"
|
|
#include "base/logging.hh"
|
|
#include "base/types.hh"
|
|
#include "config/the_gpu_isa.hh"
|
|
#include "gpu-compute/compute_unit.hh"
|
|
#include "gpu-compute/dispatcher.hh"
|
|
#include "gpu-compute/gpu_dyn_inst.hh"
|
|
#include "gpu-compute/hsa_queue_entry.hh"
|
|
#include "gpu-compute/lds_state.hh"
|
|
#include "gpu-compute/misc.hh"
|
|
#include "params/Wavefront.hh"
|
|
#include "sim/sim_object.hh"
|
|
|
|
class Wavefront : public SimObject
|
|
{
|
|
public:
|
|
enum status_e {
|
|
// wavefront is stalled
|
|
S_STOPPED,
|
|
// wavefront is returning from a kernel
|
|
S_RETURNING,
|
|
// wavefront is running normally
|
|
S_RUNNING,
|
|
// wavefront is stalled
|
|
S_STALLED,
|
|
/**
|
|
* wavefront has unsatisfied wait counts
|
|
*
|
|
* while in this state the WF will only execute if
|
|
* the oldest instruction is the waitcnt. while in
|
|
* S_WAITCNT, the wavefront will not be ready until
|
|
* all of its waitcnts have been satisfied. the
|
|
* scoreboard ready() function will check the status
|
|
* of the waitcnts whenever the WF is in S_WAITCNT,
|
|
* and once they are satisfied, it will resume normal
|
|
* operation.
|
|
*/
|
|
S_WAITCNT,
|
|
/**
|
|
* WF is stalled at a barrier.
|
|
*/
|
|
S_BARRIER
|
|
};
|
|
|
|
// HW slot id where the WF is mapped to inside a SIMD unit
|
|
const int wfSlotId;
|
|
int kernId;
|
|
// SIMD unit where the WV has been scheduled
|
|
const int simdId;
|
|
// id of the execution unit (or pipeline) where the oldest instruction
|
|
// of the WF is scheduled
|
|
int execUnitId;
|
|
int flatLmUnitId;
|
|
int flatGmUnitId;
|
|
// pointer to parent CU
|
|
ComputeUnit *computeUnit;
|
|
int maxIbSize;
|
|
|
|
std::deque<GPUDynInstPtr> instructionBuffer;
|
|
|
|
bool pendingFetch;
|
|
bool dropFetch;
|
|
// last tick during which all WFs in the CU are not idle
|
|
Tick lastNonIdleTick;
|
|
|
|
// Execution unit resource ID's associated with this WF
|
|
// These are static mappings set at WF slot construction and
|
|
// based off of the simdId and wfSlotId.
|
|
|
|
// Index to scalarALUs resource vector in CU
|
|
int scalarAlu;
|
|
|
|
// Indices into readyList/dispatchList of resources used by this
|
|
// wavefront
|
|
int scalarAluGlobalIdx;
|
|
int globalMem;
|
|
int localMem;
|
|
int scalarMem;
|
|
|
|
// number of VGPRs required by WF
|
|
uint32_t maxVgprs;
|
|
// number of SGPRs required by WF
|
|
uint32_t maxSgprs;
|
|
void freeResources();
|
|
GPUDynInstPtr nextInstr();
|
|
void setStatus(status_e newStatus);
|
|
status_e getStatus() { return status; }
|
|
void resizeRegFiles(int num_vregs, int num_sregs);
|
|
bool isGmInstruction(GPUDynInstPtr ii);
|
|
bool isLmInstruction(GPUDynInstPtr ii);
|
|
bool isOldestInstWaitcnt();
|
|
bool isOldestInstGMem();
|
|
bool isOldestInstLMem();
|
|
bool isOldestInstPrivMem();
|
|
bool isOldestInstFlatMem();
|
|
bool isOldestInstVectorALU();
|
|
bool isOldestInstScalarALU();
|
|
bool isOldestInstScalarMem();
|
|
bool isOldestInstBarrier();
|
|
|
|
// used for passing spill address to DDInstGPU
|
|
std::vector<Addr> lastAddr;
|
|
std::vector<uint32_t> workItemId[3];
|
|
std::vector<uint32_t> workItemFlatId;
|
|
/* kernel launch parameters */
|
|
uint32_t workGroupId[3];
|
|
uint32_t workGroupSz[3];
|
|
uint32_t gridSz[3];
|
|
uint32_t wgId;
|
|
uint32_t wgSz;
|
|
/* the actual WG size can differ than the maximum size */
|
|
uint32_t actualWgSz[3];
|
|
uint32_t actualWgSzTotal;
|
|
void computeActualWgSz(HSAQueueEntry *task);
|
|
// wavefront id within a workgroup
|
|
uint32_t wfId;
|
|
uint32_t maxDynWaveId;
|
|
uint32_t dispatchId;
|
|
// vector and scalar memory requests pending in memory system
|
|
int outstandingReqs;
|
|
// outstanding global memory write requests
|
|
int outstandingReqsWrGm;
|
|
// outstanding local memory write requests
|
|
int outstandingReqsWrLm;
|
|
// outstanding global memory read requests
|
|
int outstandingReqsRdGm;
|
|
// outstanding local memory read requests
|
|
int outstandingReqsRdLm;
|
|
// outstanding scalar memory read requests
|
|
int scalarOutstandingReqsRdGm;
|
|
// outstanding scalar memory write requests
|
|
int scalarOutstandingReqsWrGm;
|
|
int rdLmReqsInPipe;
|
|
int rdGmReqsInPipe;
|
|
int wrLmReqsInPipe;
|
|
int wrGmReqsInPipe;
|
|
int scalarRdGmReqsInPipe;
|
|
int scalarWrGmReqsInPipe;
|
|
|
|
int memTraceBusy;
|
|
uint64_t lastTrace;
|
|
// number of virtual vector registers reserved by WF
|
|
int reservedVectorRegs;
|
|
// number of virtual scalar registers reserved by WF
|
|
int reservedScalarRegs;
|
|
// Index into the Vector Register File's namespace where the WF's registers
|
|
// will live while the WF is executed
|
|
uint32_t startVgprIndex;
|
|
// Index into the Scalar Register File's namespace where the WF's registers
|
|
// will live while the WF is executed
|
|
uint32_t startSgprIndex;
|
|
|
|
// Old value of destination gpr (for trace)
|
|
std::vector<uint32_t> oldVgpr;
|
|
// Id of destination gpr (for trace)
|
|
uint32_t oldVgprId;
|
|
// Tick count of last old_vgpr copy
|
|
uint64_t oldVgprTcnt;
|
|
|
|
// Old value of destination gpr (for trace)
|
|
std::vector<uint64_t> oldDgpr;
|
|
// Id of destination gpr (for trace)
|
|
uint32_t oldDgprId;
|
|
// Tick count of last old_vgpr copy
|
|
uint64_t oldDgprTcnt;
|
|
|
|
// Execution mask at wavefront start
|
|
VectorMask initMask;
|
|
|
|
// a pointer to the fraction of the LDS allocated
|
|
// to this workgroup (thus this wavefront)
|
|
LdsChunk *ldsChunk;
|
|
|
|
// unique WF id over all WFs executed across all CUs
|
|
uint64_t wfDynId;
|
|
|
|
// Wavefront slot stats
|
|
|
|
// Number of instructions executed by this wavefront slot across all
|
|
// dynamic wavefronts
|
|
Stats::Scalar numInstrExecuted;
|
|
|
|
// Number of cycles this WF spends in SCH stage
|
|
Stats::Scalar schCycles;
|
|
|
|
// Number of stall cycles encounterd by this WF in SCH stage
|
|
Stats::Scalar schStalls;
|
|
|
|
// The following stats sum to the value of schStalls, and record, per
|
|
// WF slot, what the cause of each stall was at a coarse granularity.
|
|
|
|
// Cycles WF is selected by scheduler, but RFs cannot support instruction
|
|
Stats::Scalar schRfAccessStalls;
|
|
// Cycles spent waiting for execution resources
|
|
Stats::Scalar schResourceStalls;
|
|
// cycles spent waiting for RF reads to complete in SCH stage
|
|
Stats::Scalar schOpdNrdyStalls;
|
|
// LDS arbitration stall cycles. WF attempts to execute LM instruction,
|
|
// but another wave is executing FLAT, which requires LM and GM and forces
|
|
// this WF to stall.
|
|
Stats::Scalar schLdsArbStalls;
|
|
|
|
// number of times an instruction of a WF is blocked from being issued
|
|
// due to WAR and WAW dependencies
|
|
Stats::Scalar numTimesBlockedDueWAXDependencies;
|
|
// number of times an instruction of a WF is blocked from being issued
|
|
// due to WAR and WAW dependencies
|
|
Stats::Scalar numTimesBlockedDueRAWDependencies;
|
|
|
|
// dyn inst id (per SIMD) of last instruction exec from this wave
|
|
uint64_t lastInstExec;
|
|
|
|
// Distribution to track the distance between producer and consumer
|
|
// for vector register values
|
|
Stats::Distribution vecRawDistance;
|
|
// Map to track the dyn instruction id of each vector register value
|
|
// produced, indexed by physical vector register ID
|
|
std::unordered_map<int,uint64_t> rawDist;
|
|
|
|
// Distribution to track the number of times every vector register
|
|
// value produced is consumed.
|
|
Stats::Distribution readsPerWrite;
|
|
// Counts the number of reads performed to each physical register
|
|
// - counts are reset to 0 for each dynamic wavefront launched
|
|
std::vector<int> vecReads;
|
|
|
|
void initRegState(HSAQueueEntry *task, int wgSizeInWorkItems);
|
|
|
|
// context for save/restore
|
|
uint8_t *context;
|
|
|
|
typedef WavefrontParams Params;
|
|
Wavefront(const Params &p);
|
|
~Wavefront();
|
|
virtual void init();
|
|
|
|
void
|
|
setParent(ComputeUnit *cu)
|
|
{
|
|
computeUnit = cu;
|
|
}
|
|
|
|
void validateRequestCounters();
|
|
void start(uint64_t _wfDynId, uint64_t _base_ptr);
|
|
void exec();
|
|
// called by SCH stage to reserve
|
|
std::vector<int> reserveResources();
|
|
bool stopFetch();
|
|
void regStats();
|
|
|
|
Addr pc() const;
|
|
void pc(Addr new_pc);
|
|
|
|
VectorMask& execMask();
|
|
bool execMask(int lane) const;
|
|
|
|
|
|
void discardFetch();
|
|
|
|
bool waitCntsSatisfied();
|
|
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt);
|
|
void clearWaitCnts();
|
|
|
|
void incVMemInstsIssued();
|
|
void incExpInstsIssued();
|
|
void incLGKMInstsIssued();
|
|
void decVMemInstsIssued();
|
|
void decExpInstsIssued();
|
|
void decLGKMInstsIssued();
|
|
|
|
/** Freeing VRF space */
|
|
void freeRegisterFile();
|
|
|
|
TheGpuISA::GPUISA&
|
|
gpuISA()
|
|
{
|
|
return _gpuISA;
|
|
}
|
|
|
|
void barrierId(int bar_id);
|
|
int barrierId() const;
|
|
bool hasBarrier() const;
|
|
void releaseBarrier();
|
|
|
|
private:
|
|
TheGpuISA::GPUISA _gpuISA;
|
|
|
|
void reserveGmResource(GPUDynInstPtr ii);
|
|
void reserveLmResource(GPUDynInstPtr ii);
|
|
|
|
/**
|
|
* the following are used for waitcnt instructions
|
|
* vmWaitCnt: once set, we wait for the oustanding
|
|
* number of vector mem instructions to be
|
|
* at, or below vmWaitCnt.
|
|
*
|
|
* expWaitCnt: once set, we wait for the outstanding
|
|
* number outstanding VM writes or EXP
|
|
* insts to be at, or below expWaitCnt.
|
|
*
|
|
* lgkmWaitCnt: once set, we wait for the oustanding
|
|
* number of LDS, GDS, scalar memory,
|
|
* and message instructions to be at, or
|
|
* below lgkmCount. we currently do not
|
|
* support GDS/message ops.
|
|
*/
|
|
int vmWaitCnt;
|
|
int expWaitCnt;
|
|
int lgkmWaitCnt;
|
|
int vmemInstsIssued;
|
|
int expInstsIssued;
|
|
int lgkmInstsIssued;
|
|
status_e status;
|
|
Addr _pc;
|
|
VectorMask _execMask;
|
|
int barId;
|
|
};
|
|
|
|
#endif // __GPU_COMPUTE_WAVEFRONT_HH__
|