/*
 * Copyright (c) 2017-2021 Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * For use for simulation and test purposes only
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its
 * contributors may be used to endorse or promote products derived from this
 * software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "cpu/testers/gpu_ruby_test/gpu_wavefront.hh"

#include "debug/ProtocolTest.hh"

namespace gem5
{

GpuWavefront::GpuWavefront(const Params &p)
      : TesterThread(p), cuId(p.cu_id)
{
    threadName = "GpuWavefront(TesterThread ID = " + std::to_string(threadId) +
                 ", CU ID = " + std::to_string(cuId) + ")";
    threadEvent.setDesc("GpuWavefront tick");
}

GpuWavefront::~GpuWavefront()
{

}

void
GpuWavefront::issueLoadOps()
{
    assert(curAction);
    assert(curAction->getType() == Episode::Action::Type::LOAD);
    // we should not have any outstanding fence or atomic op at this point
    assert(pendingFenceCount == 0);
    assert(pendingAtomicCount == 0);

    for (int lane = 0; lane < numLanes; ++lane) {
        Location location = curAction->getLocation(lane);
        assert(location >= AddressManager::INVALID_LOCATION);

        // Make a request if we do not get an INVALID_LOCATION for this lane.
        if (location >= 0) {
            Addr address = addrManager->getAddress(location);
            DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
                    this->getName(), curEpisode->getEpisodeId(),
                    ruby::printAddress(address));

            int load_size = sizeof(Value);

            // for now, assert address is 4-byte aligned
            assert(address % load_size == 0);

            auto req = std::make_shared<Request>(address, load_size,
                                                 0, tester->requestorId(),
                                                 0, threadId, nullptr);
            req->setPaddr(address);
            req->setReqInstSeqNum(tester->getActionSeqNum());
            // set protocol-specific flags
            setExtraRequestFlags(req);

            PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
            uint8_t* data = new uint8_t[load_size];
            pkt->dataDynamic(data);
            pkt->senderState = new ProtocolTester::SenderState(this);

            // increment the number of outstanding ld_st requests
            pendingLdStCount++;

            if (!port->sendTimingReq(pkt)) {
                panic("Not expected failed sendTimingReq\n");
            }

            // insert an outstanding load
            addOutstandingReqs(outstandingLoads, address, lane, location);
        }
    }
}

void
GpuWavefront::issueStoreOps()
{
    assert(curAction);
    assert(curAction->getType() == Episode::Action::Type::STORE);
    // we should not have any outstanding fence or atomic op at this point
    assert(pendingFenceCount == 0);
    assert(pendingAtomicCount == 0);

    for (int lane = 0; lane < numLanes; ++lane) {
        Location location = curAction->getLocation(lane);
        assert(location >= AddressManager::INVALID_LOCATION);

        // Make a request if we do not get an INVALID_LOCATION for this lane.
        if (location >= 0) {
            // prepare the next value to store
            Value new_value = addrManager->getLoggedValue(location) + 1;

            Addr address = addrManager->getAddress(location);
            // must be aligned with store size
            assert(address % sizeof(Value) == 0);

            DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
                    "Value %d\n", this->getName(),
                    curEpisode->getEpisodeId(), ruby::printAddress(address),
                    new_value);

            auto req = std::make_shared<Request>(address, sizeof(Value),
                                                 0, tester->requestorId(), 0,
                                                 threadId, nullptr);
            req->setPaddr(address);
            req->setReqInstSeqNum(tester->getActionSeqNum());
            // set protocol-specific flags
            setExtraRequestFlags(req);

            PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
            uint8_t *writeData = new uint8_t[sizeof(Value)];
            for (int j = 0; j < sizeof(Value); ++j) {
                writeData[j] = ((uint8_t*)&new_value)[j];
            }
            pkt->dataDynamic(writeData);
            pkt->senderState = new ProtocolTester::SenderState(this);

            // increment the number of outstanding ld_st requests
            pendingLdStCount++;

            if (!port->sendTimingReq(pkt)) {
                panic("Not expecting a failed sendTimingReq\n");
            }

            // add an outstanding store
            addOutstandingReqs(outstandingStores, address, lane, location,
                               new_value);
        }
    }
}

void
GpuWavefront::issueAtomicOps()
{
    assert(curAction);
    assert(curAction->getType() == Episode::Action::Type::ATOMIC);
    // we should not have any outstanding ops at this point
    assert(pendingFenceCount == 0);
    assert(pendingLdStCount == 0);
    assert(pendingAtomicCount == 0);

    // we use atomic_inc in the tester
    Request::Flags flags = Request::ATOMIC_RETURN_OP;

    for (int lane = 0; lane < numLanes; ++lane) {
        Location location = curAction->getLocation(lane);
        assert(location >= 0);

        Addr address = addrManager->getAddress(location);

        DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
                this->getName(), curEpisode->getEpisodeId(),
                ruby::printAddress(address));

        // must be aligned with store size
        assert(address % sizeof(Value) == 0);
        AtomicOpFunctor *amo_op = new AtomicOpInc<Value>();
        auto req = std::make_shared<Request>(address, sizeof(Value),
                                             flags, tester->requestorId(),
                                             0, threadId,
                                             AtomicOpFunctorPtr(amo_op));
        req->setPaddr(address);
        req->setReqInstSeqNum(tester->getActionSeqNum());
        // set protocol-specific flags
        setExtraRequestFlags(req);

        PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
        uint8_t* data = new uint8_t[sizeof(Value)];
        pkt->dataDynamic(data);
        pkt->senderState = new ProtocolTester::SenderState(this);

        if (!port->sendTimingReq(pkt)) {
            panic("Not expecting failed sendTimingReq\n");
        }

        // increment the number of outstanding atomic ops
        pendingAtomicCount++;

        // add an outstanding atomic
        addOutstandingReqs(outstandingAtomics, address, lane, location);
    }
}

void
GpuWavefront::issueAcquireOp()
{
    DPRINTF(ProtocolTest, "%s Episode %d: Issuing Acquire\n", this->getName(),
            curEpisode->getEpisodeId());

    assert(curAction);
    assert(curAction->getType() == Episode::Action::Type::ACQUIRE);
    // we should not have any outstanding ops at this point
    assert(pendingFenceCount == 0);
    assert(pendingLdStCount == 0);
    assert(pendingAtomicCount == 0);

    auto acq_req = std::make_shared<Request>(0, 0, 0,
                                             tester->requestorId(), 0,
                                             threadId, nullptr);
    acq_req->setPaddr(0);
    acq_req->setReqInstSeqNum(tester->getActionSeqNum());
    acq_req->setCacheCoherenceFlags(Request::INV_L1);
    // set protocol-specific flags
    setExtraRequestFlags(acq_req);

    PacketPtr pkt = new Packet(acq_req, MemCmd::MemSyncReq);
    pkt->senderState = new ProtocolTester::SenderState(this);

    // increment the number of outstanding fence requests
    pendingFenceCount++;

    if (!port->sendTimingReq(pkt)) {
        panic("Not expecting failed sendTimingReq\n");
    }
}

void
GpuWavefront::issueReleaseOp()
{
    DPRINTF(ProtocolTest, "%s Episode %d: Issuing Release\n", this->getName(),
            curEpisode->getEpisodeId());

    // A release fence simply waits for all previous stores to complete. All
    // previous loads and stores were done before this release operation is
    // issued, so issueReleaseOp is just a no-op in this tester.

    // we may be able to issue an action. Let's check
    if (!threadEvent.scheduled()) {
        scheduleWakeup();
    }
}

void
GpuWavefront::hitCallback(PacketPtr pkt)
{
    assert(pkt);
    MemCmd resp_cmd = pkt->cmd;
    Addr addr = (resp_cmd == MemCmd::WriteCompleteResp) ? 0 : pkt->getAddr();

    DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
                    "Addr %s\n", this->getName(),
                    curEpisode->getEpisodeId(), resp_cmd.toString(),
                    ruby::printAddress(addr));

    // whether the transaction is done after this hitCallback
    bool isTransactionDone = true;

    if (resp_cmd == MemCmd::MemSyncResp) {
        // response to a pending fence
        // no validation needed for fence responses
        assert(pendingFenceCount > 0);
        assert(pendingLdStCount == 0);
        assert(pendingAtomicCount == 0);
        pendingFenceCount--;
    } else if (resp_cmd == MemCmd::ReadResp) {
        // response to a pending read
        assert(pendingLdStCount > 0);
        assert(pendingAtomicCount == 0);
        assert(outstandingLoads.count(addr) > 0);

        // get return data
        Value value = *(pkt->getPtr<Value>());
        OutstandingReq req = popOutstandingReq(outstandingLoads, addr);
        validateLoadResp(req.origLoc, req.lane, value);

        // this Read is done
        pendingLdStCount--;
    } else if (resp_cmd == MemCmd::WriteResp) {
        // response to a pending write
        assert(pendingLdStCount > 0);
        assert(pendingAtomicCount == 0);

        // no need to validate Write response
        // just pop it from the outstanding req table so that subsequent
        // requests dependent on this write can proceed
        // note that we don't decrement pendingLdStCount here yet since
        // the write is not yet completed in downstream memory. Instead, we
        // decrement the counter when we receive the write completion ack
        assert(outstandingStores.count(addr) > 0);
        OutstandingReq req = popOutstandingReq(outstandingStores, addr);
        assert(req.storedValue != AddressManager::INVALID_VALUE);

        // update log table
        addrManager->updateLogTable(req.origLoc, threadId,
                                    curEpisode->getEpisodeId(),
                                    req.storedValue,
                                    curTick(),
                                    cuId);

        // the transaction is not done yet. Waiting for write completion ack
        isTransactionDone = false;
    } else if (resp_cmd == MemCmd::SwapResp) {
        // response to a pending atomic
        assert(pendingAtomicCount > 0);
        assert(pendingLdStCount == 0);
        assert(outstandingAtomics.count(addr) > 0);

        // get return data
        Value value = *(pkt->getPtr<Value>());

        // validate atomic op return
        OutstandingReq req = popOutstandingReq(outstandingAtomics, addr);
        validateAtomicResp(req.origLoc, req.lane, value);

        // update log table
        addrManager->updateLogTable(req.origLoc, threadId,
                                    curEpisode->getEpisodeId(), value,
                                    curTick(),
                                    cuId);

        // this Atomic is done
        pendingAtomicCount--;
    } else if (resp_cmd == MemCmd::WriteCompleteResp) {
        // write completion ACK
        assert(pendingLdStCount > 0);
        assert(pendingAtomicCount == 0);

        // the Write is now done
        pendingLdStCount--;
    } else {
        panic("Unsupported MemCmd response type");
    }

    if (isTransactionDone) {
        // no need to keep senderState and request around
        delete pkt->senderState;
    }

    delete pkt;

    // record the last active cycle to check for deadlock
    lastActiveCycle = curCycle();

    // we may be able to issue an action. Let's check
    if (!threadEvent.scheduled()) {
        scheduleWakeup();
    }
}

void
GpuWavefront::setExtraRequestFlags(RequestPtr req)
{
    // No extra request flag is set
}

} // namespace gem5