/* * Copyright (c) 2010-2011, 2021 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall * not be construed as granting a license to any other intellectual * property including but not limited to intellectual property relating * to a hardware implementation of the functionality of the software * licensed hereunder. You may use the software subject to the license * terms below provided that you ensure that this notice is replicated * unmodified and in its entirety in all distributions of the software, * modified or unmodified, in source code or in binary form. * * Copyright (c) 2004-2005 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "cpu/o3/dyn_inst.hh" #include #include "base/intmath.hh" #include "debug/DynInst.hh" #include "debug/IQ.hh" #include "debug/O3PipeView.hh" namespace gem5 { namespace o3 { DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst, const StaticInstPtr &_macroop, InstSeqNum seq_num, CPU *_cpu) : seqNum(seq_num), staticInst(static_inst), cpu(_cpu), _numSrcs(arrays.numSrcs), _numDests(arrays.numDests), _flatDestIdx(arrays.flatDestIdx), _destIdx(arrays.destIdx), _prevDestIdx(arrays.prevDestIdx), _srcIdx(arrays.srcIdx), _readySrcIdx(arrays.readySrcIdx), macroop(_macroop) { std::fill(_readySrcIdx, _readySrcIdx + (numSrcs() + 7) / 8, 0); status.reset(); instFlags.reset(); instFlags[RecordResult] = true; instFlags[Predicate] = true; instFlags[MemAccPredicate] = true; #ifndef NDEBUG ++cpu->instcount; if (cpu->instcount > 1500) { #ifdef GEM5_DEBUG cpu->dumpInsts(); dumpSNList(); #endif assert(cpu->instcount <= 1500); } DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction created. Instcount for %s = %i\n", seqNum, cpu->name(), cpu->instcount); #endif #ifdef GEM5_DEBUG cpu->snList.insert(seqNum); #endif } DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst, const StaticInstPtr &_macroop, const PCStateBase &_pc, const PCStateBase &pred_pc, InstSeqNum seq_num, CPU *_cpu) : DynInst(arrays, static_inst, _macroop, seq_num, _cpu) { set(pc, _pc); set(predPC, pred_pc); } DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &_staticInst, const StaticInstPtr &_macroop) : DynInst(arrays, _staticInst, _macroop, 0, nullptr) {} /* * This custom "new" operator uses the default "new" operator to allocate space * for a DynInst, but also pads out the number of bytes to make room for some * extra structures the DynInst needs. We save time and improve performance by * only going to the heap once to get space for all these structures. * * When a DynInst is allocated with new, the compiler will call this "new" * operator with "count" set to the number of bytes it needs to store the * DynInst. We ultimately call into the default new operator to get those * bytes, but before we do, we pad out "count" so that there will be extra * space for some structures the DynInst needs. We take into account both the * absolute size of these structures, and also what alignment they need. * * Once we've gotten a buffer large enough to hold the DynInst itself and these * extra structures, we construct the extra bits using placement new. This * constructs the structures in place in the space we created for them. * * Next, we return the buffer as the result of our operator. The compiler takes * that buffer and constructs the DynInst in the beginning of it using the * DynInst constructor. * * To avoid having to calculate where these extra structures are twice, once * when making room for them and initializing them, and then once again in the * DynInst constructor, we also pass in a structure called "arrays" which holds * pointers to them. The fields of "arrays" are initialized in this operator, * and are then consumed in the DynInst constructor. */ void * DynInst::operator new(size_t count, Arrays &arrays) { // Convenience variables for brevity. const auto num_dests = arrays.numDests; const auto num_srcs = arrays.numSrcs; // Figure out where everything will go. uintptr_t inst = 0; size_t inst_size = count; uintptr_t flat_dest_idx = roundUp(inst + inst_size, alignof(RegId)); size_t flat_dest_idx_size = sizeof(*arrays.flatDestIdx) * num_dests; uintptr_t dest_idx = roundUp(flat_dest_idx + flat_dest_idx_size, alignof(PhysRegIdPtr)); size_t dest_idx_size = sizeof(*arrays.destIdx) * num_dests; uintptr_t prev_dest_idx = roundUp(dest_idx + dest_idx_size, alignof(PhysRegIdPtr)); size_t prev_dest_idx_size = sizeof(*arrays.prevDestIdx) * num_dests; uintptr_t src_idx = roundUp(prev_dest_idx + prev_dest_idx_size, alignof(PhysRegIdPtr)); size_t src_idx_size = sizeof(*arrays.srcIdx) * num_srcs; uintptr_t ready_src_idx = roundUp(src_idx + src_idx_size, alignof(uint8_t)); size_t ready_src_idx_size = sizeof(*arrays.readySrcIdx) * ((num_srcs + 7) / 8); // Figure out how much space we need in total. size_t total_size = ready_src_idx + ready_src_idx_size; // Actually allocate it. uint8_t *buf = (uint8_t *)::operator new(total_size); // Fill in "arrays" with pointers to all the arrays. arrays.flatDestIdx = (RegId *)(buf + flat_dest_idx); arrays.destIdx = (PhysRegIdPtr *)(buf + dest_idx); arrays.prevDestIdx = (PhysRegIdPtr *)(buf + prev_dest_idx); arrays.srcIdx = (PhysRegIdPtr *)(buf + src_idx); arrays.readySrcIdx = (uint8_t *)(buf + ready_src_idx); // Initialize all the extra components. new (arrays.flatDestIdx) RegId[num_dests]; new (arrays.destIdx) PhysRegIdPtr[num_dests]; new (arrays.prevDestIdx) PhysRegIdPtr[num_dests]; new (arrays.srcIdx) PhysRegIdPtr[num_srcs]; new (arrays.readySrcIdx) uint8_t[num_srcs]; return buf; } // Because of the custom "new" operator that allocates more bytes than the // size of the DynInst object, AddressSanitizer throw new-delete-type-mismatch. // Adding a custom delete function is enough to shut down this false positive void DynInst::operator delete(void *ptr) { ::operator delete(ptr); } DynInst::~DynInst() { /* * The buffer this DynInst occupies also holds some of the structures it * points to. We need to call their destructors manually to make sure that * they're cleaned up appropriately, but we don't need to free their memory * explicitly since that's part of the DynInst's buffer and is already * going to be freed as part of deleting the DynInst. */ for (int i = 0; i < _numDests; i++) { _flatDestIdx[i].~RegId(); _destIdx[i].~PhysRegIdPtr(); _prevDestIdx[i].~PhysRegIdPtr(); } for (int i = 0; i < _numSrcs; i++) _srcIdx[i].~PhysRegIdPtr(); for (int i = 0; i < ((_numSrcs + 7) / 8); i++) _readySrcIdx[i].~uint8_t(); #if TRACING_ON if (debug::O3PipeView) { Tick fetch = fetchTick; // fetchTick can be -1 if the instruction fetched outside the trace // window. if (fetch != -1) { Tick val; // Print info needed by the pipeline activity viewer. DPRINTFR(O3PipeView, "O3PipeView:fetch:%llu:0x%08llx:%d:%llu:%s\n", fetch, pcState().instAddr(), pcState().microPC(), seqNum, staticInst->disassemble(pcState().instAddr())); val = (decodeTick == -1) ? 0 : fetch + decodeTick; DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", val); val = (renameTick == -1) ? 0 : fetch + renameTick; DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", val); val = (dispatchTick == -1) ? 0 : fetch + dispatchTick; DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", val); val = (issueTick == -1) ? 0 : fetch + issueTick; DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", val); val = (completeTick == -1) ? 0 : fetch + completeTick; DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", val); val = (commitTick == -1) ? 0 : fetch + commitTick; Tick valS = (storeTick == -1) ? 0 : fetch + storeTick; DPRINTFR(O3PipeView, "O3PipeView:retire:%llu:store:%llu\n", val, valS); } } #endif delete [] memData; delete traceData; fault = NoFault; #ifndef NDEBUG --cpu->instcount; DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount for %s = %i\n", seqNum, cpu->name(), cpu->instcount); #endif #ifdef GEM5_DEBUG cpu->snList.erase(seqNum); #endif }; #ifdef GEM5_DEBUG void DynInst::dumpSNList() { std::set::iterator sn_it = cpu->snList.begin(); int count = 0; while (sn_it != cpu->snList.end()) { cprintf("%i: [sn:%lli] not destroyed\n", count, (*sn_it)); count++; sn_it++; } } #endif void DynInst::dump() { cprintf("T%d : %#08d `", threadNumber, pc->instAddr()); std::cout << staticInst->disassemble(pc->instAddr()); cprintf("'\n"); } void DynInst::dump(std::string &outstring) { std::ostringstream s; s << "T" << threadNumber << " : 0x" << pc->instAddr() << " " << staticInst->disassemble(pc->instAddr()); outstring = s.str(); } void DynInst::markSrcRegReady() { DPRINTF(IQ, "[sn:%lli] has %d ready out of %d sources. RTI %d)\n", seqNum, readyRegs+1, numSrcRegs(), readyToIssue()); if (++readyRegs == numSrcRegs()) { setCanIssue(); } } void DynInst::markSrcRegReady(RegIndex src_idx) { readySrcIdx(src_idx, true); markSrcRegReady(); } void DynInst::setSquashed() { status.set(Squashed); if (!isPinnedRegsRenamed() || isPinnedRegsSquashDone()) return; // This inst has been renamed already so it may go through rename // again (e.g. if the squash is due to memory access order violation). // Reset the write counters for all pinned destination register to ensure // that they are in a consistent state for a possible re-rename. This also // ensures that dest regs will be pinned to the same phys register if // re-rename happens. for (int idx = 0; idx < numDestRegs(); idx++) { PhysRegIdPtr phys_dest_reg = renamedDestIdx(idx); if (phys_dest_reg->isPinned()) { phys_dest_reg->incrNumPinnedWrites(); if (isPinnedRegsWritten()) phys_dest_reg->incrNumPinnedWritesToComplete(); } } setPinnedRegsSquashDone(); } Fault DynInst::execute() { // @todo: Pretty convoluted way to avoid squashing from happening // when using the TC during an instruction's execution // (specifically for instructions that have side-effects that use // the TC). Fix this. bool no_squash_from_TC = thread->noSquashFromTC; thread->noSquashFromTC = true; fault = staticInst->execute(this, traceData); thread->noSquashFromTC = no_squash_from_TC; return fault; } Fault DynInst::initiateAcc() { // @todo: Pretty convoluted way to avoid squashing from happening // when using the TC during an instruction's execution // (specifically for instructions that have side-effects that use // the TC). Fix this. bool no_squash_from_TC = thread->noSquashFromTC; thread->noSquashFromTC = true; fault = staticInst->initiateAcc(this, traceData); thread->noSquashFromTC = no_squash_from_TC; return fault; } Fault DynInst::completeAcc(PacketPtr pkt) { // @todo: Pretty convoluted way to avoid squashing from happening // when using the TC during an instruction's execution // (specifically for instructions that have side-effects that use // the TC). Fix this. bool no_squash_from_TC = thread->noSquashFromTC; thread->noSquashFromTC = true; if (cpu->checker) { if (isStoreConditional()) { reqToVerify->setExtraData(pkt->req->getExtraData()); } } fault = staticInst->completeAcc(pkt, this, traceData); thread->noSquashFromTC = no_squash_from_TC; return fault; } void DynInst::trap(const Fault &fault) { cpu->trap(fault, threadNumber, staticInst); } Fault DynInst::initiateMemRead(Addr addr, unsigned size, Request::Flags flags, const std::vector &byte_enable) { assert(byte_enable.size() == size); return cpu->pushRequest( dynamic_cast(this), /* ld */ true, nullptr, size, addr, flags, nullptr, nullptr, byte_enable); } Fault DynInst::initiateMemMgmtCmd(Request::Flags flags) { const unsigned int size = 8; return cpu->pushRequest( dynamic_cast(this), /* ld */ true, nullptr, size, 0x0ul, flags, nullptr, nullptr, std::vector(size, true)); } Fault DynInst::writeMem(uint8_t *data, unsigned size, Addr addr, Request::Flags flags, uint64_t *res, const std::vector &byte_enable) { assert(byte_enable.size() == size); return cpu->pushRequest( dynamic_cast(this), /* st */ false, data, size, addr, flags, res, nullptr, byte_enable); } Fault DynInst::initiateMemAMO(Addr addr, unsigned size, Request::Flags flags, AtomicOpFunctorPtr amo_op) { // atomic memory instructions do not have data to be written to memory yet // since the atomic operations will be executed directly in cache/memory. // Therefore, its `data` field is nullptr. // Atomic memory requests need to carry their `amo_op` fields to cache/ // memory return cpu->pushRequest( dynamic_cast(this), /* atomic */ false, nullptr, size, addr, flags, nullptr, std::move(amo_op), std::vector(size, true)); } } // namespace o3 } // namespace gem5