Files
gem5/src/mem/ruby/system/VIPERCoalescer.cc
Matthew Poremba 9313294efe misc: Remove AMD license addition
Remove the line "For use for simulation and test purposes only" in files
were AMD is the only copyright holder listed in the header. This happens
to be the case for all files where this line exists, removing it
completely from gem5.

Change-Id: I623f266b002f564301b28774f49081099cfc60fd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/53943
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
2021-12-11 04:00:56 +00:00

309 lines
11 KiB
C++

/*
* Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "mem/ruby/system/VIPERCoalescer.hh"
#include "base/logging.hh"
#include "base/str.hh"
#include "cpu/testers/rubytest/RubyTester.hh"
#include "debug/GPUCoalescer.hh"
#include "debug/MemoryAccess.hh"
#include "debug/ProtocolTrace.hh"
#include "mem/packet.hh"
#include "mem/ruby/common/SubBlock.hh"
#include "mem/ruby/network/MessageBuffer.hh"
#include "mem/ruby/profiler/Profiler.hh"
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "mem/ruby/slicc_interface/RubyRequest.hh"
#include "mem/ruby/structures/CacheMemory.hh"
#include "mem/ruby/system/GPUCoalescer.hh"
#include "mem/ruby/system/RubySystem.hh"
#include "params/VIPERCoalescer.hh"
namespace gem5
{
namespace ruby
{
VIPERCoalescer::VIPERCoalescer(const Params &p)
: GPUCoalescer(p),
m_cache_inv_pkt(nullptr),
m_num_pending_invs(0)
{
}
VIPERCoalescer::~VIPERCoalescer()
{
}
// Places an uncoalesced packet in uncoalescedTable. If the packet is a
// special type (MemFence, scoping, etc), it is issued immediately.
RequestStatus
VIPERCoalescer::makeRequest(PacketPtr pkt)
{
// VIPER only supports following memory request types
// MemSyncReq & INV_L1 : TCP cache invalidation
// ReadReq : cache read
// WriteReq : cache write
// AtomicOp : cache atomic
//
// VIPER does not expect MemSyncReq & Release since in GCN3, compute unit
// does not specify an equivalent type of memory request.
assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) ||
pkt->cmd == MemCmd::ReadReq ||
pkt->cmd == MemCmd::WriteReq ||
pkt->isAtomicOp());
if (pkt->req->isInvL1() && m_cache_inv_pkt) {
// In VIPER protocol, the coalescer is not able to handle two or
// more cache invalidation requests at a time. Cache invalidation
// requests must be serialized to ensure that all stale data in
// TCP are invalidated correctly. If there's already a pending
// cache invalidation request, we must retry this request later
return RequestStatus_Aliased;
}
GPUCoalescer::makeRequest(pkt);
if (pkt->req->isInvL1()) {
// In VIPER protocol, a compute unit sends a MemSyncReq with INV_L1
// flag to invalidate TCP. Upon receiving a request of this type,
// VIPERCoalescer starts a cache walk to invalidate all valid entries
// in TCP. The request is completed once all entries are invalidated.
assert(!m_cache_inv_pkt);
m_cache_inv_pkt = pkt;
invTCP();
}
return RequestStatus_Issued;
}
void
VIPERCoalescer::issueRequest(CoalescedRequest* crequest)
{
PacketPtr pkt = crequest->getFirstPkt();
int proc_id = -1;
if (pkt != NULL && pkt->req->hasContextId()) {
proc_id = pkt->req->contextId();
}
// If valid, copy the pc to the ruby request
Addr pc = 0;
if (pkt->req->hasPC()) {
pc = pkt->req->getPC();
}
Addr line_addr = makeLineAddress(pkt->getAddr());
// Creating WriteMask that records written bytes
// and atomic operations. This enables partial writes
// and partial reads of those writes
DataBlock dataBlock;
dataBlock.clear();
uint32_t blockSize = RubySystem::getBlockSizeBytes();
std::vector<bool> accessMask(blockSize,false);
std::vector< std::pair<int,AtomicOpFunctor*> > atomicOps;
uint32_t tableSize = crequest->getPackets().size();
for (int i = 0; i < tableSize; i++) {
PacketPtr tmpPkt = crequest->getPackets()[i];
uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
uint32_t tmpSize = tmpPkt->getSize();
if (tmpPkt->isAtomicOp()) {
std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
tmpPkt->getAtomicOp());
atomicOps.push_back(tmpAtomicOp);
} else if (tmpPkt->isWrite()) {
dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
tmpOffset, tmpSize);
}
for (int j = 0; j < tmpSize; j++) {
accessMask[tmpOffset + j] = true;
}
}
std::shared_ptr<RubyRequest> msg;
if (pkt->isAtomicOp()) {
msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
pkt->getSize(), pc, crequest->getRubyType(),
RubyAccessMode_Supervisor, pkt,
PrefetchBit_No, proc_id, 100,
blockSize, accessMask,
dataBlock, atomicOps, crequest->getSeqNum());
} else {
msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
pkt->getSize(), pc, crequest->getRubyType(),
RubyAccessMode_Supervisor, pkt,
PrefetchBit_No, proc_id, 100,
blockSize, accessMask,
dataBlock, crequest->getSeqNum());
}
if (pkt->cmd == MemCmd::WriteReq) {
makeWriteCompletePkts(crequest);
}
DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
curTick(), m_version, "Coal", "Begin", "", "",
printAddress(msg->getPhysicalAddress()),
RubyRequestType_to_string(crequest->getRubyType()));
fatal_if(crequest->getRubyType() == RubyRequestType_IFETCH,
"there should not be any I-Fetch requests in the GPU Coalescer");
if (!deadlockCheckEvent.scheduled()) {
schedule(deadlockCheckEvent,
m_deadlock_threshold * clockPeriod() +
curTick());
}
assert(m_mandatory_q_ptr);
Tick latency = cyclesToTicks(
m_controller->mandatoryQueueLatency(crequest->getRubyType()));
m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
}
void
VIPERCoalescer::makeWriteCompletePkts(CoalescedRequest* crequest)
{
// In VIPER protocol, for each write request, down-stream caches
// return two responses: writeCallback and writeCompleteCallback.
// We need to prepare a writeCompletePkt for each write request so
// that when writeCompleteCallback is called, we can respond
// requesting wavefront right away.
// writeCompletePkt inherits request and senderState of the original
// write request packet so that we can find the original requestor
// later. This assumes that request and senderState are not deleted
// before writeCompleteCallback is called.
auto key = crequest->getSeqNum();
std::vector<PacketPtr>& req_pkts = crequest->getPackets();
for (auto pkt : req_pkts) {
DPRINTF(GPUCoalescer, "makeWriteCompletePkts: instSeqNum %d\n",
key);
assert(pkt->cmd == MemCmd::WriteReq);
PacketPtr writeCompletePkt = new Packet(pkt->req,
MemCmd::WriteCompleteResp);
writeCompletePkt->setAddr(pkt->getAddr());
writeCompletePkt->senderState = pkt->senderState;
m_writeCompletePktMap[key].push_back(writeCompletePkt);
}
}
void
VIPERCoalescer::writeCompleteCallback(Addr addr, uint64_t instSeqNum)
{
DPRINTF(GPUCoalescer, "writeCompleteCallback: instSeqNum %d addr 0x%x\n",
instSeqNum, addr);
auto key = instSeqNum;
assert(m_writeCompletePktMap.count(key) == 1 &&
!m_writeCompletePktMap[key].empty());
m_writeCompletePktMap[key].erase(
std::remove_if(
m_writeCompletePktMap[key].begin(),
m_writeCompletePktMap[key].end(),
[addr](PacketPtr writeCompletePkt) -> bool {
if (makeLineAddress(writeCompletePkt->getAddr()) == addr) {
RubyPort::SenderState *ss =
safe_cast<RubyPort::SenderState *>
(writeCompletePkt->senderState);
MemResponsePort *port = ss->port;
assert(port != NULL);
writeCompletePkt->senderState = ss->predecessor;
delete ss;
port->hitCallback(writeCompletePkt);
return true;
}
return false;
}
),
m_writeCompletePktMap[key].end()
);
trySendRetries();
if (m_writeCompletePktMap[key].empty())
m_writeCompletePktMap.erase(key);
}
void
VIPERCoalescer::invTCPCallback(Addr addr)
{
assert(m_cache_inv_pkt && m_num_pending_invs > 0);
m_num_pending_invs--;
if (m_num_pending_invs == 0) {
std::vector<PacketPtr> pkt_list { m_cache_inv_pkt };
m_cache_inv_pkt = nullptr;
completeHitCallback(pkt_list);
}
}
/**
* Invalidate TCP
*/
void
VIPERCoalescer::invTCP()
{
int size = m_dataCache_ptr->getNumBlocks();
DPRINTF(GPUCoalescer,
"There are %d Invalidations outstanding before Cache Walk\n",
m_num_pending_invs);
// Walk the cache
for (int i = 0; i < size; i++) {
Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
// Evict Read-only data
RubyRequestType request_type = RubyRequestType_REPLACEMENT;
std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
clockEdge(), addr, 0, 0,
request_type, RubyAccessMode_Supervisor,
nullptr);
DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr);
assert(m_mandatory_q_ptr != NULL);
Tick latency = cyclesToTicks(
m_controller->mandatoryQueueLatency(request_type));
m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
m_num_pending_invs++;
}
DPRINTF(GPUCoalescer,
"There are %d Invalidatons outstanding after Cache Walk\n",
m_num_pending_invs);
}
} // namespace ruby
} // namespace gem5