As part of recent decisions regarding namespace naming conventions, all namespaces will be changed to snake case. ::Stats became ::statistics. "statistics" was chosen over "stats" to avoid generating conflicts with the already existing variables (there are way too many "stats" in the codebase), which would make this patch even more disturbing for the users. Change-Id: If877b12d7dac356f86e3b3d941bf7558a4fd8719 Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/45421 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
319 lines
9.3 KiB
C++
319 lines
9.3 KiB
C++
/*
|
|
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* For use for simulation and test purposes only
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* 3. Neither the name of the copyright holder nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef __SHADER_HH__
|
|
#define __SHADER_HH__
|
|
|
|
#include <functional>
|
|
#include <string>
|
|
|
|
#include "base/statistics.hh"
|
|
#include "base/stats/group.hh"
|
|
#include "base/types.hh"
|
|
#include "cpu/simple/atomic.hh"
|
|
#include "cpu/simple/timing.hh"
|
|
#include "cpu/simple_thread.hh"
|
|
#include "cpu/thread_context.hh"
|
|
#include "cpu/thread_state.hh"
|
|
#include "gpu-compute/compute_unit.hh"
|
|
#include "gpu-compute/gpu_dyn_inst.hh"
|
|
#include "gpu-compute/gpu_tlb.hh"
|
|
#include "gpu-compute/hsa_queue_entry.hh"
|
|
#include "gpu-compute/lds_state.hh"
|
|
#include "mem/page_table.hh"
|
|
#include "mem/port.hh"
|
|
#include "mem/request.hh"
|
|
#include "params/Shader.hh"
|
|
#include "sim/faults.hh"
|
|
#include "sim/process.hh"
|
|
#include "sim/sim_object.hh"
|
|
|
|
class BaseTLB;
|
|
class GPUCommandProcessor;
|
|
class GPUDispatcher;
|
|
|
|
static const int LDS_SIZE = 65536;
|
|
|
|
// aperture (APE) registers define the base/limit
|
|
// pair for the ATC mapped memory space. currently
|
|
// the only APEs we consider are for GPUVM/LDS/scratch.
|
|
// the APEs are registered with unique values based
|
|
// on a per-device basis
|
|
struct ApertureRegister
|
|
{
|
|
Addr base;
|
|
Addr limit;
|
|
};
|
|
|
|
// Class Shader: This describes a single shader instance. Most
|
|
// configurations will only have a single shader.
|
|
|
|
class Shader : public ClockedObject
|
|
{
|
|
private:
|
|
ApertureRegister _gpuVmApe;
|
|
ApertureRegister _ldsApe;
|
|
ApertureRegister _scratchApe;
|
|
Addr shHiddenPrivateBaseVmid;
|
|
|
|
// Number of active Cus attached to this shader
|
|
int _activeCus;
|
|
|
|
// Last tick that all CUs attached to this shader were inactive
|
|
Tick _lastInactiveTick;
|
|
|
|
public:
|
|
typedef ShaderParams Params;
|
|
enum hsail_mode_e {SIMT,VECTOR_SCALAR};
|
|
|
|
GPUDispatcher &dispatcher();
|
|
void sampleLoad(const Tick accessTime);
|
|
void sampleStore(const Tick accessTime);
|
|
void sampleInstRoundTrip(std::vector<Tick> roundTripTime);
|
|
void sampleLineRoundTrip(const std::map<Addr,
|
|
std::vector<Tick>> &roundTripTime);
|
|
|
|
SimpleThread *cpuThread;
|
|
ThreadContext *gpuTc;
|
|
BaseCPU *cpuPointer;
|
|
|
|
const ApertureRegister&
|
|
gpuVmApe() const
|
|
{
|
|
return _gpuVmApe;
|
|
}
|
|
|
|
const ApertureRegister&
|
|
ldsApe() const
|
|
{
|
|
return _ldsApe;
|
|
}
|
|
|
|
const ApertureRegister&
|
|
scratchApe() const
|
|
{
|
|
return _scratchApe;
|
|
}
|
|
|
|
bool
|
|
isGpuVmApe(Addr addr) const
|
|
{
|
|
bool is_gpu_vm = addr >= _gpuVmApe.base && addr <= _gpuVmApe.limit;
|
|
|
|
return is_gpu_vm;
|
|
}
|
|
|
|
bool
|
|
isLdsApe(Addr addr) const
|
|
{
|
|
bool is_lds = addr >= _ldsApe.base && addr <= _ldsApe.limit;
|
|
|
|
return is_lds;
|
|
}
|
|
|
|
bool
|
|
isScratchApe(Addr addr) const
|
|
{
|
|
bool is_scratch
|
|
= addr >= _scratchApe.base && addr <= _scratchApe.limit;
|
|
|
|
return is_scratch;
|
|
}
|
|
|
|
Addr
|
|
getScratchBase()
|
|
{
|
|
return _scratchApe.base;
|
|
}
|
|
|
|
Addr
|
|
getHiddenPrivateBase()
|
|
{
|
|
return shHiddenPrivateBaseVmid;
|
|
}
|
|
|
|
void
|
|
initShHiddenPrivateBase(Addr queueBase, uint32_t offset)
|
|
{
|
|
Addr sh_hidden_base_new = queueBase - offset;
|
|
|
|
// We are initializing sh_hidden_private_base_vmid from the
|
|
// amd queue descriptor from the first queue.
|
|
// The sh_hidden_private_base_vmid is supposed to be same for
|
|
// all the queues from the same process
|
|
if (shHiddenPrivateBaseVmid != sh_hidden_base_new) {
|
|
// Do not panic if shHiddenPrivateBaseVmid == 0,
|
|
// that is if it is uninitialized. Panic only
|
|
// if the value is initilized and we get
|
|
// a differnt base later.
|
|
panic_if(shHiddenPrivateBaseVmid != 0,
|
|
"Currently we support only single process\n");
|
|
}
|
|
shHiddenPrivateBaseVmid = sh_hidden_base_new;
|
|
}
|
|
|
|
EventFunctionWrapper tickEvent;
|
|
|
|
// is this simulation going to be timing mode in the memory?
|
|
bool timingSim;
|
|
hsail_mode_e hsail_mode;
|
|
|
|
// If set, issue acq packet @ kernel launch
|
|
int impl_kern_launch_acq;
|
|
// If set, issue rel packet @ kernel end
|
|
int impl_kern_end_rel;
|
|
// If set, fetch returns may be coissued with instructions
|
|
int coissue_return;
|
|
// If set, always dump all 64 gprs to trace
|
|
int trace_vgpr_all;
|
|
// Number of cu units in the shader
|
|
int n_cu;
|
|
// Number of wavefront slots per SIMD per CU
|
|
int n_wf;
|
|
|
|
// The size of global memory
|
|
int globalMemSize;
|
|
|
|
// Tracks CU that rr dispatcher should attempt scheduling
|
|
int nextSchedCu;
|
|
|
|
// Size of scheduled add queue
|
|
uint32_t sa_n;
|
|
|
|
// Pointer to value to be increments
|
|
std::vector<int*> sa_val;
|
|
// When to do the increment
|
|
std::vector<uint64_t> sa_when;
|
|
// Amount to increment by
|
|
std::vector<int32_t> sa_x;
|
|
|
|
// List of Compute Units (CU's)
|
|
std::vector<ComputeUnit*> cuList;
|
|
|
|
GPUCommandProcessor &gpuCmdProc;
|
|
GPUDispatcher &_dispatcher;
|
|
|
|
int64_t max_valu_insts;
|
|
int64_t total_valu_insts;
|
|
|
|
Shader(const Params &p);
|
|
~Shader();
|
|
virtual void init();
|
|
|
|
// Run shader scheduled adds
|
|
void execScheduledAdds();
|
|
|
|
// Schedule a 32-bit value to be incremented some time in the future
|
|
void ScheduleAdd(int *val, Tick when, int x);
|
|
bool processTimingPacket(PacketPtr pkt);
|
|
|
|
void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
|
|
MemCmd cmd, bool suppress_func_errors);
|
|
|
|
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
|
|
|
|
void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
|
|
bool suppress_func_errors);
|
|
|
|
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
|
|
|
|
void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
|
|
bool suppress_func_errors);
|
|
|
|
void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data,
|
|
bool suppress_func_errors, int cu_id);
|
|
|
|
void
|
|
registerCU(int cu_id, ComputeUnit *compute_unit)
|
|
{
|
|
cuList[cu_id] = compute_unit;
|
|
}
|
|
|
|
void prepareInvalidate(HSAQueueEntry *task);
|
|
void prepareFlush(GPUDynInstPtr gpuDynInst);
|
|
|
|
bool dispatchWorkgroups(HSAQueueEntry *task);
|
|
Addr mmap(int length);
|
|
void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode);
|
|
void updateContext(int cid);
|
|
void notifyCuSleep();
|
|
|
|
void
|
|
incVectorInstSrcOperand(int num_operands)
|
|
{
|
|
stats.vectorInstSrcOperand[num_operands]++;
|
|
}
|
|
|
|
void
|
|
incVectorInstDstOperand(int num_operands)
|
|
{
|
|
stats.vectorInstDstOperand[num_operands]++;
|
|
}
|
|
|
|
protected:
|
|
struct ShaderStats : public statistics::Group
|
|
{
|
|
ShaderStats(statistics::Group *parent, int wf_size);
|
|
|
|
// some stats for measuring latency
|
|
statistics::Distribution allLatencyDist;
|
|
statistics::Distribution loadLatencyDist;
|
|
statistics::Distribution storeLatencyDist;
|
|
|
|
// average ticks from vmem inst initiateAcc to coalescer issue,
|
|
statistics::Distribution initToCoalesceLatency;
|
|
|
|
// average ticks from coalescer issue to coalescer hit callback,
|
|
statistics::Distribution rubyNetworkLatency;
|
|
|
|
// average ticks from coalescer hit callback to GM pipe enqueue,
|
|
statistics::Distribution gmEnqueueLatency;
|
|
|
|
// average ticks spent in GM pipe's ordered resp buffer.
|
|
statistics::Distribution gmToCompleteLatency;
|
|
|
|
// average number of cache blocks requested by vmem inst
|
|
statistics::Distribution coalsrLineAddresses;
|
|
|
|
// average ticks for cache blocks to main memory for the Nth
|
|
// cache block generated by a vmem inst.
|
|
statistics::Distribution *cacheBlockRoundTrip;
|
|
|
|
statistics::Scalar shaderActiveTicks;
|
|
statistics::Vector vectorInstSrcOperand;
|
|
statistics::Vector vectorInstDstOperand;
|
|
} stats;
|
|
};
|
|
|
|
#endif // __SHADER_HH__
|