/*
 * Copyright (c) 2010-2011, 2021 ARM Limited
 * All rights reserved
 *
 * The license below extends only to copyright in the software and shall
 * not be construed as granting a license to any other intellectual
 * property including but not limited to intellectual property relating
 * to a hardware implementation of the functionality of the software
 * licensed hereunder.  You may use the software subject to the license
 * terms below provided that you ensure that this notice is replicated
 * unmodified and in its entirety in all distributions of the software,
 * modified or unmodified, in source code or in binary form.
 *
 * Copyright (c) 2004-2005 The Regents of The University of Michigan
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer;
 * redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution;
 * neither the name of the copyright holders nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "cpu/o3/dyn_inst.hh"

#include <algorithm>

#include "base/intmath.hh"
#include "debug/DynInst.hh"
#include "debug/IQ.hh"
#include "debug/O3PipeView.hh"

namespace gem5
{

namespace o3
{

DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
        const StaticInstPtr &_macroop, InstSeqNum seq_num, CPU *_cpu)
    : seqNum(seq_num), staticInst(static_inst), cpu(_cpu),
      _numSrcs(arrays.numSrcs), _numDests(arrays.numDests),
      _flatDestIdx(arrays.flatDestIdx), _destIdx(arrays.destIdx),
      _prevDestIdx(arrays.prevDestIdx), _srcIdx(arrays.srcIdx),
      _readySrcIdx(arrays.readySrcIdx), macroop(_macroop)
{
    std::fill(_readySrcIdx, _readySrcIdx + (numSrcs() + 7) / 8, 0);

    status.reset();

    instFlags.reset();
    instFlags[RecordResult] = true;
    instFlags[Predicate] = true;
    instFlags[MemAccPredicate] = true;

#ifndef NDEBUG
    ++cpu->instcount;

    if (cpu->instcount > 1500) {
#ifdef GEM5_DEBUG
        cpu->dumpInsts();
        dumpSNList();
#endif
        assert(cpu->instcount <= 1500);
    }

    DPRINTF(DynInst,
        "DynInst: [sn:%lli] Instruction created. Instcount for %s = %i\n",
        seqNum, cpu->name(), cpu->instcount);
#endif

#ifdef GEM5_DEBUG
    cpu->snList.insert(seqNum);
#endif

}

DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
        const StaticInstPtr &_macroop, const PCStateBase &_pc,
        const PCStateBase &pred_pc, InstSeqNum seq_num, CPU *_cpu)
    : DynInst(arrays, static_inst, _macroop, seq_num, _cpu)
{
    set(pc, _pc);
    set(predPC, pred_pc);
}

DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &_staticInst,
        const StaticInstPtr &_macroop)
    : DynInst(arrays, _staticInst, _macroop, 0, nullptr)
{}

/*
 * This custom "new" operator uses the default "new" operator to allocate space
 * for a DynInst, but also pads out the number of bytes to make room for some
 * extra structures the DynInst needs. We save time and improve performance by
 * only going to the heap once to get space for all these structures.
 *
 * When a DynInst is allocated with new, the compiler will call this "new"
 * operator with "count" set to the number of bytes it needs to store the
 * DynInst. We ultimately call into the default new operator to get those
 * bytes, but before we do, we pad out "count" so that there will be extra
 * space for some structures the DynInst needs. We take into account both the
 * absolute size of these structures, and also what alignment they need.
 *
 * Once we've gotten a buffer large enough to hold the DynInst itself and these
 * extra structures, we construct the extra bits using placement new. This
 * constructs the structures in place in the space we created for them.
 *
 * Next, we return the buffer as the result of our operator. The compiler takes
 * that buffer and constructs the DynInst in the beginning of it using the
 * DynInst constructor.
 *
 * To avoid having to calculate where these extra structures are twice, once
 * when making room for them and initializing them, and then once again in the
 * DynInst constructor, we also pass in a structure called "arrays" which holds
 * pointers to them. The fields of "arrays" are initialized in this operator,
 * and are then consumed in the DynInst constructor.
 */
void *
DynInst::operator new(size_t count, Arrays &arrays)
{
    // Convenience variables for brevity.
    const auto num_dests = arrays.numDests;
    const auto num_srcs = arrays.numSrcs;

    // Figure out where everything will go.
    uintptr_t inst = 0;
    size_t inst_size = count;

    uintptr_t flat_dest_idx = roundUp(inst + inst_size, alignof(RegId));
    size_t flat_dest_idx_size = sizeof(*arrays.flatDestIdx) * num_dests;

    uintptr_t dest_idx =
        roundUp(flat_dest_idx + flat_dest_idx_size, alignof(PhysRegIdPtr));
    size_t dest_idx_size = sizeof(*arrays.destIdx) * num_dests;

    uintptr_t prev_dest_idx =
        roundUp(dest_idx + dest_idx_size, alignof(PhysRegIdPtr));
    size_t prev_dest_idx_size = sizeof(*arrays.prevDestIdx) * num_dests;

    uintptr_t src_idx =
        roundUp(prev_dest_idx + prev_dest_idx_size, alignof(PhysRegIdPtr));
    size_t src_idx_size = sizeof(*arrays.srcIdx) * num_srcs;

    uintptr_t ready_src_idx =
        roundUp(src_idx + src_idx_size, alignof(uint8_t));
    size_t ready_src_idx_size =
        sizeof(*arrays.readySrcIdx) * ((num_srcs + 7) / 8);

    // Figure out how much space we need in total.
    size_t total_size = ready_src_idx + ready_src_idx_size;

    // Actually allocate it.
    uint8_t *buf = (uint8_t *)::operator new(total_size);

    // Fill in "arrays" with pointers to all the arrays.
    arrays.flatDestIdx = (RegId *)(buf + flat_dest_idx);
    arrays.destIdx = (PhysRegIdPtr *)(buf + dest_idx);
    arrays.prevDestIdx = (PhysRegIdPtr *)(buf + prev_dest_idx);
    arrays.srcIdx = (PhysRegIdPtr *)(buf + src_idx);
    arrays.readySrcIdx = (uint8_t *)(buf + ready_src_idx);

    // Initialize all the extra components.
    new (arrays.flatDestIdx) RegId[num_dests];
    new (arrays.destIdx) PhysRegIdPtr[num_dests];
    new (arrays.prevDestIdx) PhysRegIdPtr[num_dests];
    new (arrays.srcIdx) PhysRegIdPtr[num_srcs];
    new (arrays.readySrcIdx) uint8_t[num_srcs];

    return buf;
}

// Because of the custom "new" operator that allocates more bytes than the
// size of the DynInst object, AddressSanitizer throw new-delete-type-mismatch.
// Adding a custom delete function is enough to shut down this false positive
void
DynInst::operator delete(void *ptr)
{
    ::operator delete(ptr);
}

DynInst::~DynInst()
{
    /*
     * The buffer this DynInst occupies also holds some of the structures it
     * points to. We need to call their destructors manually to make sure that
     * they're cleaned up appropriately, but we don't need to free their memory
     * explicitly since that's part of the DynInst's buffer and is already
     * going to be freed as part of deleting the DynInst.
     */
    for (int i = 0; i < _numDests; i++) {
        _flatDestIdx[i].~RegId();
        _destIdx[i].~PhysRegIdPtr();
        _prevDestIdx[i].~PhysRegIdPtr();
    }

    for (int i = 0; i < _numSrcs; i++)
        _srcIdx[i].~PhysRegIdPtr();

    for (int i = 0; i < ((_numSrcs + 7) / 8); i++)
        _readySrcIdx[i].~uint8_t();

#if TRACING_ON
    if (debug::O3PipeView) {
        Tick fetch = fetchTick;
        // fetchTick can be -1 if the instruction fetched outside the trace
        // window.
        if (fetch != -1) {
            Tick val;
            // Print info needed by the pipeline activity viewer.
            DPRINTFR(O3PipeView, "O3PipeView:fetch:%llu:0x%08llx:%d:%llu:%s\n",
                     fetch,
                     pcState().instAddr(),
                     pcState().microPC(),
                     seqNum,
                     staticInst->disassemble(pcState().instAddr()));

            val = (decodeTick == -1) ? 0 : fetch + decodeTick;
            DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", val);
            val = (renameTick == -1) ? 0 : fetch + renameTick;
            DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", val);
            val = (dispatchTick == -1) ? 0 : fetch + dispatchTick;
            DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", val);
            val = (issueTick == -1) ? 0 : fetch + issueTick;
            DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", val);
            val = (completeTick == -1) ? 0 : fetch + completeTick;
            DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", val);
            val = (commitTick == -1) ? 0 : fetch + commitTick;

            Tick valS = (storeTick == -1) ? 0 : fetch + storeTick;
            DPRINTFR(O3PipeView, "O3PipeView:retire:%llu:store:%llu\n",
                    val, valS);
        }
    }
#endif

    delete [] memData;
    delete traceData;
    fault = NoFault;

#ifndef NDEBUG
    --cpu->instcount;

    DPRINTF(DynInst,
        "DynInst: [sn:%lli] Instruction destroyed. Instcount for %s = %i\n",
        seqNum, cpu->name(), cpu->instcount);
#endif
#ifdef GEM5_DEBUG
    cpu->snList.erase(seqNum);
#endif
};


#ifdef GEM5_DEBUG
void
DynInst::dumpSNList()
{
    std::set<InstSeqNum>::iterator sn_it = cpu->snList.begin();

    int count = 0;
    while (sn_it != cpu->snList.end()) {
        cprintf("%i: [sn:%lli] not destroyed\n", count, (*sn_it));
        count++;
        sn_it++;
    }
}
#endif

void
DynInst::dump()
{
    cprintf("T%d : %#08d `", threadNumber, pc->instAddr());
    std::cout << staticInst->disassemble(pc->instAddr());
    cprintf("'\n");
}

void
DynInst::dump(std::string &outstring)
{
    std::ostringstream s;
    s << "T" << threadNumber << " : 0x" << pc->instAddr() << " "
      << staticInst->disassemble(pc->instAddr());

    outstring = s.str();
}

void
DynInst::markSrcRegReady()
{
    DPRINTF(IQ, "[sn:%lli] has %d ready out of %d sources. RTI %d)\n",
            seqNum, readyRegs+1, numSrcRegs(), readyToIssue());
    if (++readyRegs == numSrcRegs()) {
        setCanIssue();
    }
}

void
DynInst::markSrcRegReady(RegIndex src_idx)
{
    readySrcIdx(src_idx, true);
    markSrcRegReady();
}


void
DynInst::setSquashed()
{
    status.set(Squashed);

    if (!isPinnedRegsRenamed() || isPinnedRegsSquashDone())
        return;

    // This inst has been renamed already so it may go through rename
    // again (e.g. if the squash is due to memory access order violation).
    // Reset the write counters for all pinned destination register to ensure
    // that they are in a consistent state for a possible re-rename. This also
    // ensures that dest regs will be pinned to the same phys register if
    // re-rename happens.
    for (int idx = 0; idx < numDestRegs(); idx++) {
        PhysRegIdPtr phys_dest_reg = renamedDestIdx(idx);
        if (phys_dest_reg->isPinned()) {
            phys_dest_reg->incrNumPinnedWrites();
            if (isPinnedRegsWritten())
                phys_dest_reg->incrNumPinnedWritesToComplete();
        }
    }
    setPinnedRegsSquashDone();
}

Fault
DynInst::execute()
{
    // @todo: Pretty convoluted way to avoid squashing from happening
    // when using the TC during an instruction's execution
    // (specifically for instructions that have side-effects that use
    // the TC).  Fix this.
    bool no_squash_from_TC = thread->noSquashFromTC;
    thread->noSquashFromTC = true;

    fault = staticInst->execute(this, traceData);

    thread->noSquashFromTC = no_squash_from_TC;

    return fault;
}

Fault
DynInst::initiateAcc()
{
    // @todo: Pretty convoluted way to avoid squashing from happening
    // when using the TC during an instruction's execution
    // (specifically for instructions that have side-effects that use
    // the TC).  Fix this.
    bool no_squash_from_TC = thread->noSquashFromTC;
    thread->noSquashFromTC = true;

    fault = staticInst->initiateAcc(this, traceData);

    thread->noSquashFromTC = no_squash_from_TC;

    return fault;
}

Fault
DynInst::completeAcc(PacketPtr pkt)
{
    // @todo: Pretty convoluted way to avoid squashing from happening
    // when using the TC during an instruction's execution
    // (specifically for instructions that have side-effects that use
    // the TC).  Fix this.
    bool no_squash_from_TC = thread->noSquashFromTC;
    thread->noSquashFromTC = true;

    if (cpu->checker) {
        if (isStoreConditional()) {
            reqToVerify->setExtraData(pkt->req->getExtraData());
        }
    }

    fault = staticInst->completeAcc(pkt, this, traceData);

    thread->noSquashFromTC = no_squash_from_TC;

    return fault;
}

void
DynInst::trap(const Fault &fault)
{
    cpu->trap(fault, threadNumber, staticInst);
}

Fault
DynInst::initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
                               const std::vector<bool> &byte_enable)
{
    assert(byte_enable.size() == size);
    return cpu->pushRequest(
        dynamic_cast<DynInstPtr::PtrType>(this),
        /* ld */ true, nullptr, size, addr, flags, nullptr, nullptr,
        byte_enable);
}

Fault
DynInst::initiateMemMgmtCmd(Request::Flags flags)
{
    const unsigned int size = 8;
    return cpu->pushRequest(
            dynamic_cast<DynInstPtr::PtrType>(this),
            /* ld */ true, nullptr, size, 0x0ul, flags, nullptr, nullptr,
            std::vector<bool>(size, true));
}

Fault
DynInst::writeMem(uint8_t *data, unsigned size, Addr addr,
                        Request::Flags flags, uint64_t *res,
                        const std::vector<bool> &byte_enable)
{
    assert(byte_enable.size() == size);
    return cpu->pushRequest(
        dynamic_cast<DynInstPtr::PtrType>(this),
        /* st */ false, data, size, addr, flags, res, nullptr,
        byte_enable);
}

Fault
DynInst::initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
                              AtomicOpFunctorPtr amo_op)
{
    // atomic memory instructions do not have data to be written to memory yet
    // since the atomic operations will be executed directly in cache/memory.
    // Therefore, its `data` field is nullptr.
    // Atomic memory requests need to carry their `amo_op` fields to cache/
    // memory
    return cpu->pushRequest(
            dynamic_cast<DynInstPtr::PtrType>(this),
            /* atomic */ false, nullptr, size, addr, flags, nullptr,
            std::move(amo_op), std::vector<bool>(size, true));
}

} // namespace o3
} // namespace gem5