/* * Copyright (c) 2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "cpu/testers/gpu_ruby_test/dma_thread.hh" #include "debug/ProtocolTest.hh" namespace gem5 { DmaThread::DmaThread(const Params& _params) : TesterThread(_params) { threadName = "DmaThread(Thread ID " + std::to_string(threadId) + ")"; threadEvent.setDesc("DmaThread tick"); assert(numLanes == 1); } DmaThread::~DmaThread() { } void DmaThread::issueLoadOps() { assert(curAction); assert(curAction->getType() == Episode::Action::Type::LOAD); // we should not have any outstanding fence or atomic op at this point assert(pendingFenceCount == 0); assert(pendingAtomicCount == 0); // DMA thread is a scalar thread so always set lane to zero. This allows // us to reuse the API for GPU threads rather than have a specific API // for scalar tester threads int lane = 0; Location location = curAction->getLocation(lane); assert(location >= AddressManager::INVALID_LOCATION); if (location >= 0) { Addr address = addrManager->getAddress(location); DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), ruby::printAddress(address)); int load_size = sizeof(Value); // for now, assert address is 4-byte aligned assert(address % load_size == 0); auto req = std::make_shared(address, load_size, 0, tester->requestorId(), 0, threadId, nullptr); req->setPaddr(address); req->setReqInstSeqNum(tester->getActionSeqNum()); PacketPtr pkt = new Packet(req, MemCmd::ReadReq); uint8_t* data = new uint8_t[load_size]; pkt->dataDynamic(data); pkt->senderState = new ProtocolTester::SenderState(this); if (!port->sendTimingReq(pkt)) { panic("Not expected failed sendTimingReq\n"); } // insert an outstanding load addOutstandingReqs(outstandingLoads, address, lane, location); // increment the number of outstanding ld_st requests pendingLdStCount++; } } void DmaThread::issueStoreOps() { assert(curAction); assert(curAction->getType() == Episode::Action::Type::STORE); // we should not have any outstanding fence or atomic op at this point assert(pendingFenceCount == 0); assert(pendingAtomicCount == 0); // DMA thread is a scalar thread so always set lane to zero. This allows // us to reuse the API for GPU threads rather than have a specific API // for scalar tester threads int lane = 0; Location location = curAction->getLocation(lane); assert(location >= AddressManager::INVALID_LOCATION); if (location >= 0) { // prepare the next value to store Value new_value = addrManager->getLoggedValue(location) + 1; Addr address = addrManager->getAddress(location); // must be aligned with store size assert(address % sizeof(Value) == 0); DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - " "Value %d\n", this->getName(), curEpisode->getEpisodeId(), ruby::printAddress(address), new_value); auto req = std::make_shared(address, sizeof(Value), 0, tester->requestorId(), 0, threadId, nullptr); req->setPaddr(address); req->setReqInstSeqNum(tester->getActionSeqNum()); PacketPtr pkt = new Packet(req, MemCmd::WriteReq); uint8_t *writeData = new uint8_t[sizeof(Value)]; for (int j = 0; j < sizeof(Value); ++j) { writeData[j] = ((uint8_t*)&new_value)[j]; } pkt->dataDynamic(writeData); pkt->senderState = new ProtocolTester::SenderState(this); if (!port->sendTimingReq(pkt)) { panic("Not expecting a failed sendTimingReq\n"); } // add an outstanding store addOutstandingReqs(outstandingStores, address, lane, location, new_value); // increment the number of outstanding ld_st requests pendingLdStCount++; } } void DmaThread::issueAtomicOps() { DPRINTF(ProtocolTest, "Issuing Atomic Op ...\n"); assert(curAction); assert(curAction->getType() == Episode::Action::Type::ATOMIC); // we should not have any outstanding ops at this point assert(pendingFenceCount == 0); assert(pendingLdStCount == 0); assert(pendingAtomicCount == 0); // no-op: No DMA protocol exists with Atomics } void DmaThread::issueAcquireOp() { DPRINTF(ProtocolTest, "Issuing Acquire Op ...\n"); assert(curAction); assert(curAction->getType() == Episode::Action::Type::ACQUIRE); // we should not have any outstanding ops at this point assert(pendingFenceCount == 0); assert(pendingLdStCount == 0); assert(pendingAtomicCount == 0); // no-op: Acquire does not apply to DMA threads } void DmaThread::issueReleaseOp() { DPRINTF(ProtocolTest, "Issuing Release Op ...\n"); assert(curAction); assert(curAction->getType() == Episode::Action::Type::RELEASE); // we should not have any outstanding ops at this point assert(pendingFenceCount == 0); assert(pendingLdStCount == 0); assert(pendingAtomicCount == 0); // no-op: Release does not apply to DMA threads } void DmaThread::hitCallback(PacketPtr pkt) { assert(pkt); MemCmd resp_cmd = pkt->cmd; Addr addr = pkt->getAddr(); DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -" " Addr %s\n", this->getName(), curEpisode->getEpisodeId(), resp_cmd.toString(), ruby::printAddress(addr)); if (resp_cmd == MemCmd::SwapResp) { // response to a pending atomic assert(pendingAtomicCount > 0); assert(pendingLdStCount == 0); assert(outstandingAtomics.count(addr) > 0); // get return data Value value = *(pkt->getPtr()); // validate atomic op return OutstandingReq req = popOutstandingReq(outstandingAtomics, addr); assert(req.lane == 0); validateAtomicResp(req.origLoc, req.lane, value); // update log table addrManager->updateLogTable(req.origLoc, threadId, curEpisode->getEpisodeId(), value, curTick(), 0); // this Atomic is done pendingAtomicCount--; } else if (resp_cmd == MemCmd::ReadResp) { // response to a pending read assert(pendingLdStCount > 0); assert(pendingAtomicCount == 0); assert(outstandingLoads.count(addr) > 0); // get return data Value value = *(pkt->getPtr()); OutstandingReq req = popOutstandingReq(outstandingLoads, addr); assert(req.lane == 0); validateLoadResp(req.origLoc, req.lane, value); // this Read is done pendingLdStCount--; } else if (resp_cmd == MemCmd::WriteResp) { // response to a pending write assert(pendingLdStCount > 0); assert(pendingAtomicCount == 0); // no need to validate Write response // just pop it from the outstanding req table so that subsequent // requests dependent on this write can proceed // note that unlike GpuWavefront we do decrement pendingLdStCount here // since the write is guaranteed to be completed in downstream memory. assert(outstandingStores.count(addr) > 0); OutstandingReq req = popOutstandingReq(outstandingStores, addr); assert(req.storedValue != AddressManager::INVALID_VALUE); // update log table addrManager->updateLogTable(req.origLoc, threadId, curEpisode->getEpisodeId(), req.storedValue, curTick(), 0); // the Write is now done pendingLdStCount--; } else { panic("UnsupportedMemCmd response type: %s", resp_cmd.toString().c_str()); } delete pkt->senderState; delete pkt; // record the last active cycle to check for deadlock lastActiveCycle = curCycle(); // we may be able to issue an action. Let's check if (!threadEvent.scheduled()) { scheduleWakeup(); } } } // namespace gem5