Files
gem5/src/gpu-compute/gpu_dyn_inst.cc
Matthew Poremba 9313294efe misc: Remove AMD license addition
Remove the line "For use for simulation and test purposes only" in files
were AMD is the only copyright holder listed in the header. This happens
to be the case for all files where this line exists, removing it
completely from gem5.

Change-Id: I623f266b002f564301b28774f49081099cfc60fd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/53943
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
2021-12-11 04:00:56 +00:00

1014 lines
24 KiB
C++

/*
* Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "gpu-compute/gpu_dyn_inst.hh"
#include "debug/GPUInst.hh"
#include "debug/GPUMem.hh"
#include "gpu-compute/gpu_static_inst.hh"
#include "gpu-compute/scalar_register_file.hh"
#include "gpu-compute/shader.hh"
#include "gpu-compute/wavefront.hh"
namespace gem5
{
GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
GPUStaticInst *static_inst, InstSeqNum instSeqNum)
: GPUExecContext(_cu, _wf), scalarAddr(0), addr(computeUnit()->wfSize(),
(Addr)0), numScalarReqs(0), isSaveRestore(false),
_staticInst(static_inst), _seqNum(instSeqNum),
maxSrcVecRegOpSize(-1), maxSrcScalarRegOpSize(-1)
{
_staticInst->initOperandInfo();
statusVector.assign(TheGpuISA::NumVecElemPerVecReg, 0);
tlbHitLevel.assign(computeUnit()->wfSize(), -1);
// vector instructions can have up to 4 source/destination operands
d_data = new uint8_t[computeUnit()->wfSize() * 4 * sizeof(double)];
a_data = new uint8_t[computeUnit()->wfSize() * 8];
x_data = new uint8_t[computeUnit()->wfSize() * 8];
// scalar loads can read up to 16 Dwords of data (see publicly
// available GCN3 ISA manual)
scalar_data = new uint8_t[16 * sizeof(uint32_t)];
for (int i = 0; i < (16 * sizeof(uint32_t)); ++i) {
scalar_data[i] = 0;
}
for (int i = 0; i < (computeUnit()->wfSize() * 8); ++i) {
a_data[i] = 0;
x_data[i] = 0;
}
for (int i = 0; i < (computeUnit()->wfSize() * 4 * sizeof(double)); ++i) {
d_data[i] = 0;
}
time = 0;
cu_id = _cu->cu_id;
if (_wf) {
simdId = _wf->simdId;
wfDynId = _wf->wfDynId;
kern_id = _wf->kernId;
wg_id = _wf->wgId;
wfSlotId = _wf->wfSlotId;
} else {
simdId = -1;
wfDynId = -1;
kern_id = -1;
wg_id = -1;
wfSlotId = -1;
}
DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
disassemble(), getNumOperands());
_staticInst->initDynOperandInfo(wavefront(), computeUnit());
}
GPUDynInst::~GPUDynInst()
{
delete[] d_data;
delete[] a_data;
delete[] x_data;
delete[] scalar_data;
delete _staticInst;
}
void
GPUDynInst::execute(GPUDynInstPtr gpuDynInst)
{
_staticInst->execute(gpuDynInst);
}
const std::vector<OperandInfo>&
GPUDynInst::srcVecRegOperands() const
{
return _staticInst->srcVecRegOperands();
}
const std::vector<OperandInfo>&
GPUDynInst::dstVecRegOperands() const
{
return _staticInst->dstVecRegOperands();
}
const std::vector<OperandInfo>&
GPUDynInst::srcScalarRegOperands() const
{
return _staticInst->srcScalarRegOperands();
}
const std::vector<OperandInfo>&
GPUDynInst::dstScalarRegOperands() const
{
return _staticInst->dstScalarRegOperands();
}
int
GPUDynInst::numSrcRegOperands()
{
return _staticInst->numSrcRegOperands();
}
int
GPUDynInst::numDstRegOperands()
{
return _staticInst->numDstRegOperands();
}
int
GPUDynInst::numSrcVecRegOperands() const
{
return _staticInst->numSrcVecOperands();
}
int
GPUDynInst::numDstVecRegOperands() const
{
return _staticInst->numDstVecOperands();
}
int
GPUDynInst::maxSrcVecRegOperandSize()
{
if (maxSrcVecRegOpSize != -1)
return maxSrcVecRegOpSize;
maxSrcVecRegOpSize = 0;
for (const auto& srcVecOp : srcVecRegOperands())
if (srcVecOp.sizeInDWords() > maxSrcVecRegOpSize)
maxSrcVecRegOpSize = srcVecOp.sizeInDWords();
return maxSrcVecRegOpSize;
}
int
GPUDynInst::numSrcVecDWords()
{
return _staticInst->numSrcVecDWords();
}
int
GPUDynInst::numDstVecDWords()
{
return _staticInst->numDstVecDWords();
}
int
GPUDynInst::numSrcScalarRegOperands() const
{
return _staticInst->numSrcScalarOperands();
}
int
GPUDynInst::numDstScalarRegOperands() const
{
return _staticInst->numDstScalarOperands();
}
int
GPUDynInst::maxSrcScalarRegOperandSize()
{
if (maxSrcScalarRegOpSize != -1)
return maxSrcScalarRegOpSize;
maxSrcScalarRegOpSize = 0;
for (const auto& srcScOp : srcScalarRegOperands())
if (srcScOp.sizeInDWords() > maxSrcScalarRegOpSize)
maxSrcScalarRegOpSize = srcScOp.sizeInDWords();
return maxSrcScalarRegOpSize;
}
int
GPUDynInst::numSrcScalarDWords()
{
return _staticInst->numSrcScalarDWords();
}
int
GPUDynInst::numDstScalarDWords()
{
return _staticInst->numDstScalarDWords();
}
int
GPUDynInst::maxOperandSize()
{
return _staticInst->maxOperandSize();
}
int
GPUDynInst::getNumOperands() const
{
return _staticInst->getNumOperands();
}
bool
GPUDynInst::hasSourceVgpr() const
{
return !srcVecRegOperands().empty();
}
bool
GPUDynInst::hasDestinationVgpr() const
{
return !dstVecRegOperands().empty();
}
bool
GPUDynInst::hasSourceSgpr() const
{
return !srcScalarRegOperands().empty();
}
bool
GPUDynInst::hasDestinationSgpr() const
{
return !dstScalarRegOperands().empty();
}
bool
GPUDynInst::isOpcode(const std::string& opcodeStr,
const std::string& extStr) const
{
return _staticInst->opcode().find(opcodeStr) != std::string::npos &&
_staticInst->opcode().find(extStr) != std::string::npos;
}
bool
GPUDynInst::isOpcode(const std::string& opcodeStr) const
{
return _staticInst->opcode().find(opcodeStr) != std::string::npos;
}
const std::string&
GPUDynInst::disassemble() const
{
return _staticInst->disassemble();
}
InstSeqNum
GPUDynInst::seqNum() const
{
return _seqNum;
}
Addr
GPUDynInst::pc()
{
return wavefront()->pc();
}
void
GPUDynInst::pc(Addr _pc)
{
wavefront()->pc(_pc);
}
enums::StorageClassType
GPUDynInst::executedAs()
{
return _staticInst->executed_as;
}
// Process a memory instruction and (if necessary) submit timing request
void
GPUDynInst::initiateAcc(GPUDynInstPtr gpuDynInst)
{
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector=%#x\n",
cu->cu_id, simdId, wfSlotId, exec_mask);
_staticInst->initiateAcc(gpuDynInst);
}
void
GPUDynInst::completeAcc(GPUDynInstPtr gpuDynInst)
{
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: mempacket status bitvector="
"%#x\n complete",
cu->cu_id, simdId, wfSlotId, exec_mask);
_staticInst->completeAcc(gpuDynInst);
}
/**
* accessor methods for the attributes of
* the underlying GPU static instruction
*/
bool
GPUDynInst::isALU() const
{
return _staticInst->isALU();
}
bool
GPUDynInst::isBranch() const
{
return _staticInst->isBranch();
}
bool
GPUDynInst::isCondBranch() const
{
return _staticInst->isCondBranch();
}
bool
GPUDynInst::isNop() const
{
return _staticInst->isNop();
}
bool
GPUDynInst::isEndOfKernel() const
{
return _staticInst->isEndOfKernel();
}
bool
GPUDynInst::isKernelLaunch() const
{
return _staticInst->isKernelLaunch();
}
bool
GPUDynInst::isSDWAInst() const
{
return _staticInst->isSDWAInst();
}
bool
GPUDynInst::isDPPInst() const
{
return _staticInst->isDPPInst();
}
bool
GPUDynInst::isReturn() const
{
return _staticInst->isReturn();
}
bool
GPUDynInst::isUnconditionalJump() const
{
return _staticInst->isUnconditionalJump();
}
bool
GPUDynInst::isSpecialOp() const
{
return _staticInst->isSpecialOp();
}
bool
GPUDynInst::isWaitcnt() const
{
return _staticInst->isWaitcnt();
}
bool
GPUDynInst::isSleep() const
{
return _staticInst->isSleep();
}
bool
GPUDynInst::isBarrier() const
{
return _staticInst->isBarrier();
}
bool
GPUDynInst::isMemSync() const
{
return _staticInst->isMemSync();
}
bool
GPUDynInst::isMemRef() const
{
return _staticInst->isMemRef();
}
bool
GPUDynInst::isFlat() const
{
return _staticInst->isFlat();
}
bool
GPUDynInst::isFlatGlobal() const
{
return _staticInst->isFlatGlobal();
}
bool
GPUDynInst::isLoad() const
{
return _staticInst->isLoad();
}
bool
GPUDynInst::isStore() const
{
return _staticInst->isStore();
}
bool
GPUDynInst::isAtomic() const
{
return _staticInst->isAtomic();
}
bool
GPUDynInst::isAtomicNoRet() const
{
return _staticInst->isAtomicNoRet();
}
bool
GPUDynInst::isAtomicRet() const
{
return _staticInst->isAtomicRet();
}
bool
GPUDynInst::isVector() const
{
return !_staticInst->isScalar();
}
bool
GPUDynInst::isScalar() const
{
return _staticInst->isScalar();
}
bool
GPUDynInst::readsSCC() const
{
return _staticInst->readsSCC();
}
bool
GPUDynInst::writesSCC() const
{
return _staticInst->writesSCC();
}
bool
GPUDynInst::readsVCC() const
{
for (const auto& srcOp : _staticInst->srcOperands())
if (srcOp.isVcc())
return true;
return _staticInst->readsVCC();
}
bool
GPUDynInst::writesVCC() const
{
for (const auto& dstOp : _staticInst->dstOperands())
if (dstOp.isVcc())
return true;
return _staticInst->writesVCC();
}
bool
GPUDynInst::readsMode() const
{
return _staticInst->readsMode();
}
bool
GPUDynInst::writesMode() const
{
return _staticInst->writesMode();
}
bool
GPUDynInst::readsExec() const
{
return _staticInst->readsEXEC();
}
bool
GPUDynInst::writesExec() const
{
return _staticInst->writesEXEC();
}
bool
GPUDynInst::ignoreExec() const
{
return _staticInst->ignoreExec();
}
bool
GPUDynInst::writesExecMask() const
{
for (const auto& dstOp : _staticInst->dstOperands())
if (dstOp.isExec())
return true;
return _staticInst->writesEXEC();
}
bool
GPUDynInst::readsExecMask() const
{
for (const auto& srcOp : _staticInst->srcOperands())
if (srcOp.isExec())
return true;
return _staticInst->readsEXEC();
}
bool
GPUDynInst::writesFlatScratch() const
{
for (const auto& dstScalarOp : dstScalarRegOperands())
if (dstScalarOp.isFlatScratch())
return true;
return false;
}
bool
GPUDynInst::readsFlatScratch() const
{
for (const auto& srcScalarOp : srcScalarRegOperands())
if (srcScalarOp.isFlatScratch())
return true;
return false;
}
bool
GPUDynInst::isAtomicAnd() const
{
return _staticInst->isAtomicAnd();
}
bool
GPUDynInst::isAtomicOr() const
{
return _staticInst->isAtomicOr();
}
bool
GPUDynInst::isAtomicXor() const
{
return _staticInst->isAtomicXor();
}
bool
GPUDynInst::isAtomicCAS() const
{
return _staticInst->isAtomicCAS();
}
bool GPUDynInst::isAtomicExch() const
{
return _staticInst->isAtomicExch();
}
bool
GPUDynInst::isAtomicAdd() const
{
return _staticInst->isAtomicAdd();
}
bool
GPUDynInst::isAtomicSub() const
{
return _staticInst->isAtomicSub();
}
bool
GPUDynInst::isAtomicInc() const
{
return _staticInst->isAtomicInc();
}
bool
GPUDynInst::isAtomicDec() const
{
return _staticInst->isAtomicDec();
}
bool
GPUDynInst::isAtomicMax() const
{
return _staticInst->isAtomicMax();
}
bool
GPUDynInst::isAtomicMin() const
{
return _staticInst->isAtomicMin();
}
bool
GPUDynInst::isArgLoad() const
{
return _staticInst->isArgLoad();
}
bool
GPUDynInst::isGlobalMem() const
{
return _staticInst->isGlobalMem();
}
bool
GPUDynInst::isLocalMem() const
{
return _staticInst->isLocalMem();
}
bool
GPUDynInst::isArgSeg() const
{
return _staticInst->isArgSeg();
}
bool
GPUDynInst::isGlobalSeg() const
{
return _staticInst->isGlobalSeg();
}
bool
GPUDynInst::isGroupSeg() const
{
return _staticInst->isGroupSeg();
}
bool
GPUDynInst::isKernArgSeg() const
{
return _staticInst->isKernArgSeg();
}
bool
GPUDynInst::isPrivateSeg() const
{
return _staticInst->isPrivateSeg();
}
bool
GPUDynInst::isReadOnlySeg() const
{
return _staticInst->isReadOnlySeg();
}
bool
GPUDynInst::isSpillSeg() const
{
return _staticInst->isSpillSeg();
}
bool
GPUDynInst::isGloballyCoherent() const
{
return _staticInst->isGloballyCoherent();
}
bool
GPUDynInst::isSystemCoherent() const
{
return _staticInst->isSystemCoherent();
}
bool
GPUDynInst::isF16() const
{
return _staticInst->isF16();
}
bool
GPUDynInst::isF32() const
{
return _staticInst->isF32();
}
bool
GPUDynInst::isF64() const
{
return _staticInst->isF64();
}
bool
GPUDynInst::isFMA() const
{
return _staticInst->isFMA();
}
bool
GPUDynInst::isMAC() const
{
return _staticInst->isMAC();
}
bool
GPUDynInst::isMAD() const
{
return _staticInst->isMAD();
}
void
GPUDynInst::doApertureCheck(const VectorMask &mask)
{
assert(mask.any());
// find the segment of the first active address, after
// that we check that all other active addresses also
// fall within the same APE
for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
if (mask[lane]) {
if (computeUnit()->shader->isLdsApe(addr[lane])) {
// group segment
staticInstruction()->executed_as = enums::SC_GROUP;
break;
} else if (computeUnit()->shader->isScratchApe(addr[lane])) {
// private segment
staticInstruction()->executed_as = enums::SC_PRIVATE;
break;
} else if (computeUnit()->shader->isGpuVmApe(addr[lane])) {
// we won't support GPUVM
fatal("flat access is in GPUVM APE\n");
} else if (bits(addr[lane], 63, 47) != 0x1FFFF &&
bits(addr[lane], 63, 47)) {
// we are in the "hole", this is a memory violation
fatal("flat access at addr %#x has a memory violation\n",
addr[lane]);
} else {
// global memory segment
staticInstruction()->executed_as = enums::SC_GLOBAL;
break;
}
}
}
// we should have found the segment
assert(executedAs() != enums::SC_NONE);
// flat accesses should not straddle multiple APEs so we
// must check that all addresses fall within the same APE
if (executedAs() == enums::SC_GROUP) {
for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
if (mask[lane]) {
// if the first valid addr we found above was LDS,
// all the rest should be
assert(computeUnit()->shader->isLdsApe(addr[lane]));
}
}
} else if (executedAs() == enums::SC_PRIVATE) {
for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
if (mask[lane]) {
// if the first valid addr we found above was private,
// all the rest should be
assert(computeUnit()->shader->isScratchApe(addr[lane]));
}
}
} else {
for (int lane = 0; lane < computeUnit()->wfSize(); ++lane) {
if (mask[lane]) {
// if the first valid addr we found above was global,
// all the rest should be. because we don't have an
// explicit range of the global segment, we just make
// sure that the address fall in no other APE and that
// it is not a memory violation
assert(!computeUnit()->shader->isLdsApe(addr[lane]));
assert(!computeUnit()->shader->isScratchApe(addr[lane]));
assert(!computeUnit()->shader->isGpuVmApe(addr[lane]));
assert(!(bits(addr[lane], 63, 47) != 0x1FFFF
&& bits(addr[lane], 63, 47)));
}
}
}
}
void
GPUDynInst::resolveFlatSegment(const VectorMask &mask)
{
doApertureCheck(mask);
// Now that we know the aperature, do the following:
// 1. Transform the flat address to its segmented equivalent.
// 2. Set the execUnitId based an the aperture check.
// 3. Decrement any extra resources that were reserved. Other
// resources are released as normal, below.
if (executedAs() == enums::SC_GLOBAL) {
// no transormation for global segment
wavefront()->execUnitId = wavefront()->flatGmUnitId;
if (isLoad()) {
wavefront()->rdLmReqsInPipe--;
} else if (isStore()) {
wavefront()->wrLmReqsInPipe--;
} else if (isAtomic() || isMemSync()) {
wavefront()->wrLmReqsInPipe--;
wavefront()->rdLmReqsInPipe--;
} else {
panic("Invalid memory operation!\n");
}
} else if (executedAs() == enums::SC_GROUP) {
for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
// flat address calculation goes here.
// addr[lane] = segmented address
addr[lane] = addr[lane] -
wavefront()->computeUnit->shader->ldsApe().base;
assert(addr[lane] <
wavefront()->computeUnit->getLds().getAddrRange().size());
}
}
wavefront()->execUnitId = wavefront()->flatLmUnitId;
wavefront()->decVMemInstsIssued();
if (isLoad()) {
wavefront()->rdGmReqsInPipe--;
} else if (isStore()) {
wavefront()->wrGmReqsInPipe--;
} else if (isAtomic() || isMemSync()) {
wavefront()->rdGmReqsInPipe--;
wavefront()->wrGmReqsInPipe--;
} else {
panic("Invalid memory operation!\n");
}
} else if (executedAs() == enums::SC_PRIVATE) {
/**
* Flat instructions may resolve to the private segment (scratch),
* which is backed by main memory and provides per-lane scratch
* memory. Flat addressing uses apertures - registers that specify
* the address range in the VA space where LDS/private memory is
* mapped. The value of which is set by the kernel mode driver.
* These apertures use addresses that are not used by x86 CPUs.
* When the address of a Flat operation falls into one of the
* apertures, the Flat operation is redirected to either LDS or
* to the private memory segment.
*
* For private memory the SW runtime will allocate some space in
* the VA space for each AQL queue. The base address of which is
* stored in scalar registers per the AMD GPU ABI. The amd_queue_t
* scratch_backing_memory_location provides the base address in
* memory for the queue's private segment. Various other fields
* loaded into register state during kernel launch specify per-WF
* and per-work-item offsets so that individual lanes may access
* their private segment allocation.
*
* For more details about flat addressing see:
* http://rocm-documentation.readthedocs.io/en/latest/
* ROCm_Compiler_SDK/ROCm-Native-ISA.html#flat-scratch
*
* https://github.com/ROCm-Developer-Tools/
* ROCm-ComputeABI-Doc/blob/master/AMDGPU-ABI.md
* #flat-addressing
*/
uint32_t numSgprs = wavefront()->maxSgprs;
uint32_t physSgprIdx =
wavefront()->computeUnit->registerManager->mapSgpr(wavefront(),
numSgprs - 3);
uint32_t offset =
wavefront()->computeUnit->srf[simdId]->read(physSgprIdx);
physSgprIdx =
wavefront()->computeUnit->registerManager->mapSgpr(wavefront(),
numSgprs - 4);
uint32_t size =
wavefront()->computeUnit->srf[simdId]->read(physSgprIdx);
for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
addr[lane] = addr[lane] + lane * size + offset +
wavefront()->computeUnit->shader->getHiddenPrivateBase() -
wavefront()->computeUnit->shader->getScratchBase();
}
}
wavefront()->execUnitId = wavefront()->flatLmUnitId;
wavefront()->decLGKMInstsIssued();
if (isLoad()) {
wavefront()->rdGmReqsInPipe--;
} else if (isStore()) {
wavefront()->wrGmReqsInPipe--;
} else if (isAtomic() || isMemSync()) {
wavefront()->rdGmReqsInPipe--;
wavefront()->wrGmReqsInPipe--;
} else {
panic("Invalid memory operation!\n");
}
} else {
for (int lane = 0; lane < wavefront()->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
panic("flat addr %#llx maps to bad segment %d\n",
addr[lane], executedAs());
}
}
}
}
TheGpuISA::ScalarRegU32
GPUDynInst::srcLiteral() const
{
return _staticInst->srcLiteral();
}
void
GPUDynInst::updateStats()
{
if (_staticInst->isLocalMem()) {
// access to LDS (shared) memory
cu->stats.dynamicLMemInstrCnt++;
} else if (_staticInst->isFlat()) {
cu->stats.dynamicFlatMemInstrCnt++;
} else {
// access to global memory
// update PageDivergence histogram
int number_pages_touched = cu->pagesTouched.size();
assert(number_pages_touched);
cu->stats.pageDivergenceDist.sample(number_pages_touched);
std::pair<ComputeUnit::pageDataStruct::iterator, bool> ret;
for (auto it : cu->pagesTouched) {
// see if this page has been touched before. if not, this also
// inserts the page into the table.
ret = cu->pageAccesses
.insert(ComputeUnit::pageDataStruct::value_type(it.first,
std::make_pair(1, it.second)));
// if yes, then update the stats
if (!ret.second) {
ret.first->second.first++;
ret.first->second.second += it.second;
}
}
cu->pagesTouched.clear();
// total number of memory instructions (dynamic)
// Atomics are counted as a single memory instruction.
// this is # memory instructions per wavefronts, not per workitem
cu->stats.dynamicGMemInstrCnt++;
}
}
void
GPUDynInst::profileRoundTripTime(Tick currentTime, int hopId)
{
// Only take the first measurement in the case of coalescing
if (roundTripTime.size() > hopId)
return;
roundTripTime.push_back(currentTime);
}
void
GPUDynInst::profileLineAddressTime(Addr addr, Tick currentTime, int hopId)
{
if (lineAddressTime.count(addr)) {
if (lineAddressTime[addr].size() > hopId) {
return;
}
lineAddressTime[addr].push_back(currentTime);
} else if (hopId == 0) {
auto addressTimeVec = std::vector<Tick> { currentTime };
lineAddressTime.insert(std::make_pair(addr, addressTimeVec));
}
}
} // namespace gem5